Gitlab Community Edition Instance

Commit 39a595ac authored by mhellka's avatar mhellka
Browse files

Simplify NioPool garbage collector.

The GC now simply scans the directory and identifies index and binary
files to keep, based on last modification time alone. It no longer walks
the revision history, so missing revisions won't be a problem.
The "Keep N revisions" feature was removed, as it makes no sense. A user
might create 100 revisions a second, or none in a year. Revisions should
only be considered to allow MVCC, not be a 'history' of changes that one
might want to preserve. If so, GC must be disabled completely.
parent bca37abf
......@@ -16,40 +16,35 @@ import de.gwdg.cdstar.pool.PoolError;
import de.gwdg.cdstar.pool.nio.json.JsonIndex;
import de.gwdg.cdstar.pool.nio.json.JsonResource;
/**
* Perform garbage collection or 'trimming' on a single storage pool object.
*/
public class GCTask implements Runnable {
private static final Logger log = LoggerFactory.getLogger(GCTask.class);
private final NioPool pool;
private final String id;
private String headRev;
private final Instant keepNewerThan;
private final int keepRevisions;
private final Set<Path> keep = new HashSet<>();
private final boolean dryRun;
private long collectedBytes = 0;
private int collectedFiles = 0;
private SymlinkLock lock;
private JsonIndex head;
private final Set<Path> keepFiles = new HashSet<>();
private final Set<Path> allFiles = new HashSet<>();
private int collectedFiles;
private long collectedBytes;
/**
*
* @param pool The pool in which the object is stored.
* @param id id of the object to collect garbage from.
* @param dryRun if true, do not actually delete any files.
* @param keepAfter keep the revision visible at the given time, and all
* newer revisions.
* @param keepMinRevisions keep at least this many revisions.
* @param pool The pool in which the object is stored.
* @param id id of the object to collect garbage from.
* @param dryRun if true, do not actually delete any files.
* @param keepNewerThan Keep revisions newer than this point in time. If null,
* do not keep revisions based on time.
*/
public GCTask(NioPool pool, String id, boolean dryRun, Instant keepAfter, int keepMinRevisions) {
public GCTask(NioPool pool, String id, boolean dryRun, Instant keepNewerThan) {
this.pool = pool;
this.id = id;
keepNewerThan = keepAfter;
keepRevisions = keepMinRevisions;
this.keepNewerThan = keepNewerThan;
this.dryRun = dryRun;
}
......@@ -61,11 +56,14 @@ public class GCTask implements Runnable {
try {
log.debug("GC started: {} (path:{})", id, pool.getObjectPath(id));
headRev = pool.load(id, null).rev;
lock = pool.lockObject(id, headRev);
head = pool.load(id, null);
lock = pool.lockObject(id, head.rev);
performGC();
// Check head revision after lock was created
if (!head.rev.equals(pool.load(head.id, null).rev))
throw new PoolError.Conflict("Head revision changed");
performGC();
log.info("GC completed: {} (files:{}, bytes:{})", id, collectedFiles, collectedBytes);
} catch (final PoolError e) {
throw e;
......@@ -86,90 +84,78 @@ public class GCTask implements Runnable {
}
/**
* Walk the history, starting from HEAD and remember all encountered revisions
* and blobs as live.
* Scan the directory and collect all revisions that are older than
* {@link #keepNewerThan}, not pinned and not HEAD. Also collect resource files
* that are not referenced from any surviving revision.
*/
private void performGC() throws IOException {
if (!keep.isEmpty() || lock == null)
throw new IllegalStateException();
JsonIndex idx = pool.load(id, headRev);
int revisionsFound = 0;
while (true) {
// Keep index file
keep.add(normalize(pool.getObjectFile(idx.id, idx.rev)));
revisionsFound++;
// Keep BLOBs not marked as external.
if (idx.resources != null) {
for (final JsonResource r : idx.resources) {
if (r.size == 0)
continue;
if (NioResource.isExternal(r))
continue;
final Path blobFile = normalize(pool.getResourceBlob(id, r));
if (!Files.exists(blobFile))
throw new PoolError("Missing resource file (non-external): " + blobFile);
keep.add(blobFile);
}
}
// TODO: Add "protected" revisions and their blobs
// Stop if we reached the end of history
if (idx.parent == null)
break;
if (keepRevisions <= revisionsFound
&& (keepNewerThan == null || !Instant.ofEpochMilli(idx.mtime).isAfter(keepNewerThan)))
break;
try {
idx = pool.load(idx.id, idx.parent);
} catch (final PoolError.NotFound e) {
log.warn("A previous GC run removed revisions this GC run would not have removed.");
break;
}
}
if (keep.isEmpty() || lock == null)
throw new IllegalStateException();
try (DirectoryStream<Path> ds = Files.newDirectoryStream(pool.getObjectPath(id))) {
for (Path file : ds) {
file = normalize(file);
if (keep.contains(file)) {
log.debug("Keep {}", file);
continue;
}
if (!allFiles.add(file))
continue; // already seen
final String fname = file.getFileName().toString();
if (fname.endsWith(NioPool.EXT_BLOB) || fname.endsWith(NioPool.EXT_REVISION)) {
tryCollect(file);
if (fname.endsWith(NioPool.EXT_BLOB)) {
// Depends on revisions if kept or not
} else if (fname.endsWith(NioPool.EXT_REVISION)) {
inspectRev(file);
} else if (fname.equals(NioPool.FILENAME_HEAD) || fname.equals(NioPool.FILENAME_NEXT)) {
// Never collect these
keepFiles.add(file);
} else if (fname.endsWith(".tmp") || fname.endsWith(".temp")) {
// Never collect temporary files
keepFiles.add(file);
} else {
// Never collect unknown files
log.warn("Unrecognized file: {}", file);
keepFiles.add(file);
}
}
}
for (final Path file : allFiles) {
if (keepFiles.contains(file)) {
log.debug("Keeping: {}", file);
continue;
}
final long size = Files.size(file);
collectedFiles += 1;
collectedBytes += size;
if (dryRun) {
log.info("Collecting (dry run): {} ({} bytes)", file, size);
} else {
log.info("Collecting: {} ({} bytes)", file, size);
Files.delete(file);
}
}
}
private void tryCollect(Path toDelete) throws IOException {
final long size = Files.size(toDelete);
collectedFiles += 1;
collectedBytes += size;
private void inspectRev(Path file) throws IOException {
final JsonIndex idx = pool.loadFromDisk(file);
boolean keep = false;
// Keep head
keep = keep || idx.rev.equals(head.rev);
// Keep revisions not older than removeBefore
keep = keep || (keepNewerThan != null && Instant.ofEpochMilli(idx.mtime).isAfter(keepNewerThan));
if (!keep)
return;
if (dryRun) {
log.info("Collecting (dry run): {} ({} bytes)", toDelete, size);
} else {
log.info("Collecting: {} ({} bytes)", toDelete, size);
Files.delete(toDelete);
keepFiles.add(file);
if (idx.resources != null) {
for (final JsonResource r : idx.resources) {
if (r.size == 0)
continue;
if (NioResource.isExternal(r))
continue;
keepFiles.add(normalize(pool.getResourceBlob(id, r)));
}
}
}
......
......@@ -265,13 +265,13 @@ public class NioPool implements StoragePool {
@Override
public long trimObject(String objectId) throws NotFound {
Instant keepAfter = Instant.now().minus(Duration.ofDays(1));
Instant keepNewerThan = Instant.now().minus(Duration.ofDays(1));
for (final NioSession s : scopes.values()) {
if (keepAfter.isAfter(s.tx.getStarted()))
keepAfter = s.tx.getStarted().minusSeconds(10);
if (keepNewerThan.isAfter(s.tx.getStarted()))
keepNewerThan = s.tx.getStarted().minusSeconds(10);
}
final GCTask gc = new GCTask(this, objectId, false, keepAfter, 1);
final GCTask gc = new GCTask(this, objectId, false, keepNewerThan);
gc.run();
return gc.getCollectedBytes();
}
......
......@@ -56,7 +56,7 @@ public class GCTest {
}
private GCTask gc(final String id) {
final GCTask gc = new GCTask(pool, id, false, null, 0);
final GCTask gc = new GCTask(pool, id, false, null);
gc.run();
return gc;
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment