Skip to main content
aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
Diffstat (limited to 'org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollector.java')
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollector.java150
1 files changed, 122 insertions, 28 deletions
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollector.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollector.java
index 6f760caea1..d3803024a5 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollector.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollector.java
@@ -43,9 +43,12 @@
package org.eclipse.jgit.internal.storage.dfs;
+import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.COMPACT;
import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.GC;
import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.GC_REST;
import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.GC_TXN;
+import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.INSERT;
+import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.RECEIVE;
import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.UNREACHABLE_GARBAGE;
import static org.eclipse.jgit.internal.storage.pack.PackExt.BITMAP_INDEX;
import static org.eclipse.jgit.internal.storage.pack.PackExt.INDEX;
@@ -53,8 +56,11 @@ import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Calendar;
import java.util.Collection;
import java.util.Collections;
+import java.util.EnumSet;
+import java.util.GregorianCalendar;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
@@ -63,6 +69,7 @@ import java.util.concurrent.TimeUnit;
import org.eclipse.jgit.internal.JGitText;
import org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource;
import org.eclipse.jgit.internal.storage.file.PackIndex;
+import org.eclipse.jgit.internal.storage.file.PackReverseIndex;
import org.eclipse.jgit.internal.storage.pack.PackExt;
import org.eclipse.jgit.internal.storage.pack.PackWriter;
import org.eclipse.jgit.internal.storage.reftree.RefTreeNames;
@@ -77,6 +84,7 @@ import org.eclipse.jgit.lib.RefDatabase;
import org.eclipse.jgit.revwalk.RevWalk;
import org.eclipse.jgit.storage.pack.PackConfig;
import org.eclipse.jgit.storage.pack.PackStatistics;
+import org.eclipse.jgit.util.SystemReader;
import org.eclipse.jgit.util.io.CountingOutputStream;
/** Repack and garbage collect a repository. */
@@ -95,7 +103,8 @@ public class DfsGarbageCollector {
private PackConfig packConfig;
- // See pack(), below, for how these two variables interact.
+ // See packIsCoalesceableGarbage(), below, for how these two variables
+ // interact.
private long coalesceGarbageLimit = 50 << 20;
private long garbageTtlMillis = TimeUnit.DAYS.toMillis(1);
@@ -118,9 +127,9 @@ public class DfsGarbageCollector {
repo = repository;
refdb = repo.getRefDatabase();
objdb = repo.getObjectDatabase();
- newPackDesc = new ArrayList<DfsPackDescription>(4);
- newPackStats = new ArrayList<PackStatistics>(4);
- newPackObj = new ArrayList<ObjectIdSet>(4);
+ newPackDesc = new ArrayList<>(4);
+ newPackStats = new ArrayList<>(4);
+ newPackObj = new ArrayList<>(4);
packConfig = new PackConfig(repo);
packConfig.setIndexVersion(2);
@@ -223,14 +232,8 @@ public class DfsGarbageCollector {
if (packConfig.getIndexVersion() != 2)
throw new IllegalStateException(
JGitText.get().supportOnlyPackIndexVersion2);
- if (garbageTtlMillis > 0) {
- // We disable coalescing because the coalescing step will keep
- // refreshing the UNREACHABLE_GARBAGE pack and we wouldn't
- // actually prune anything.
- coalesceGarbageLimit = 0;
- }
- startTimeMillis = System.currentTimeMillis();
+ startTimeMillis = SystemReader.getInstance().getCurrentTime();
ctx = (DfsReader) objdb.newReader();
try {
refdb.refresh();
@@ -246,14 +249,14 @@ public class DfsGarbageCollector {
return true;
}
- allHeads = new HashSet<ObjectId>();
- nonHeads = new HashSet<ObjectId>();
- txnHeads = new HashSet<ObjectId>();
- tagTargets = new HashSet<ObjectId>();
+ allHeads = new HashSet<>();
+ nonHeads = new HashSet<>();
+ txnHeads = new HashSet<>();
+ tagTargets = new HashSet<>();
for (Ref ref : refsBefore) {
if (ref.isSymbolic() || ref.getObjectId() == null)
continue;
- if (isHead(ref))
+ if (isHead(ref) || isTag(ref))
allHeads.add(ref.getObjectId());
else if (RefTreeNames.isRefTree(refdb, ref.getName()))
txnHeads.add(ref.getObjectId());
@@ -301,18 +304,18 @@ public class DfsGarbageCollector {
private void readPacksBefore() throws IOException {
DfsPackFile[] packs = objdb.getPacks();
- packsBefore = new ArrayList<DfsPackFile>(packs.length);
- expiredGarbagePacks = new ArrayList<DfsPackFile>(packs.length);
+ packsBefore = new ArrayList<>(packs.length);
+ expiredGarbagePacks = new ArrayList<>(packs.length);
long mostRecentGC = mostRecentGC(packs);
- long now = System.currentTimeMillis();
+ long now = SystemReader.getInstance().getCurrentTime();
for (DfsPackFile p : packs) {
DfsPackDescription d = p.getPackDescription();
if (d.getPackSource() != UNREACHABLE_GARBAGE) {
packsBefore.add(p);
} else if (packIsExpiredGarbage(d, mostRecentGC, now)) {
expiredGarbagePacks.add(p);
- } else if (d.getFileSize(PackExt.PACK) < coalesceGarbageLimit) {
+ } else if (packIsCoalesceableGarbage(d, now)) {
packsBefore.add(p);
}
}
@@ -355,6 +358,68 @@ public class DfsGarbageCollector {
&& now - d.getLastModified() >= garbageTtlMillis;
}
+ private boolean packIsCoalesceableGarbage(DfsPackDescription d, long now) {
+ // An UNREACHABLE_GARBAGE pack can be coalesced if its size is less than
+ // the coalesceGarbageLimit and either garbageTtl is zero or if the pack
+ // is created in a close time interval (on a single calendar day when
+ // the garbageTtl is more than one day or one third of the garbageTtl).
+ //
+ // When the garbageTtl is more than 24 hours, garbage packs that are
+ // created within a single calendar day are coalesced together. This
+ // would make the effective ttl of the garbage pack as garbageTtl+23:59
+ // and limit the number of garbage to a maximum number of
+ // garbageTtl_in_days + 1 (assuming all of them are less than the size
+ // of coalesceGarbageLimit).
+ //
+ // When the garbageTtl is less than or equal to 24 hours, garbage packs
+ // that are created within a one third of garbageTtl are coalesced
+ // together. This would make the effective ttl of the garbage packs as
+ // garbageTtl + (garbageTtl / 3) and would limit the number of garbage
+ // packs to a maximum number of 4 (assuming all of them are less than
+ // the size of coalesceGarbageLimit).
+
+ if (d.getPackSource() != UNREACHABLE_GARBAGE
+ || d.getFileSize(PackExt.PACK) >= coalesceGarbageLimit) {
+ return false;
+ }
+
+ if (garbageTtlMillis == 0) {
+ return true;
+ }
+
+ long lastModified = d.getLastModified();
+ long dayStartLastModified = dayStartInMillis(lastModified);
+ long dayStartToday = dayStartInMillis(now);
+
+ if (dayStartLastModified != dayStartToday) {
+ return false; // this pack is not created today.
+ }
+
+ if (garbageTtlMillis > TimeUnit.DAYS.toMillis(1)) {
+ return true; // ttl is more than one day and pack is created today.
+ }
+
+ long timeInterval = garbageTtlMillis / 3;
+ if (timeInterval == 0) {
+ return false; // ttl is too small, don't try to coalesce.
+ }
+
+ long modifiedTimeSlot = (lastModified - dayStartLastModified) / timeInterval;
+ long presentTimeSlot = (now - dayStartToday) / timeInterval;
+ return modifiedTimeSlot == presentTimeSlot;
+ }
+
+ private static long dayStartInMillis(long timeInMillis) {
+ Calendar cal = new GregorianCalendar(
+ SystemReader.getInstance().getTimeZone());
+ cal.setTimeInMillis(timeInMillis);
+ cal.set(Calendar.HOUR_OF_DAY, 0);
+ cal.set(Calendar.MINUTE, 0);
+ cal.set(Calendar.SECOND, 0);
+ cal.set(Calendar.MILLISECOND, 0);
+ return cal.getTimeInMillis();
+ }
+
/** @return all of the source packs that fed into this compaction. */
public List<DfsPackDescription> getSourcePacks() {
return toPrune();
@@ -372,7 +437,7 @@ public class DfsGarbageCollector {
private List<DfsPackDescription> toPrune() {
int cnt = packsBefore.size();
- List<DfsPackDescription> all = new ArrayList<DfsPackDescription>(cnt);
+ List<DfsPackDescription> all = new ArrayList<>(cnt);
for (DfsPackFile pack : packsBefore) {
all.add(pack.getPackDescription());
}
@@ -390,7 +455,8 @@ public class DfsGarbageCollector {
pw.setTagTargets(tagTargets);
pw.preparePack(pm, allHeads, PackWriter.NONE);
if (0 < pw.getObjectCount())
- writePack(GC, pw, pm);
+ writePack(GC, pw, pm,
+ estimateGcPackSize(INSERT, RECEIVE, COMPACT, GC));
}
}
@@ -403,7 +469,8 @@ public class DfsGarbageCollector {
pw.excludeObjects(packedObjs);
pw.preparePack(pm, nonHeads, allHeads);
if (0 < pw.getObjectCount())
- writePack(GC_REST, pw, pm);
+ writePack(GC_REST, pw, pm,
+ estimateGcPackSize(INSERT, RECEIVE, COMPACT, GC_REST));
}
}
@@ -416,7 +483,7 @@ public class DfsGarbageCollector {
pw.excludeObjects(packedObjs);
pw.preparePack(pm, txnHeads, PackWriter.NONE);
if (0 < pw.getObjectCount())
- writePack(GC_TXN, pw, pm);
+ writePack(GC_TXN, pw, pm, 0 /* unknown pack size */);
}
}
@@ -432,21 +499,29 @@ public class DfsGarbageCollector {
pw.setDeltaBaseAsOffset(true);
pw.setReuseDeltaCommits(true);
pm.beginTask(JGitText.get().findingGarbage, objectsBefore());
+ long estimatedPackSize = 12 + 20; // header and trailer sizes.
for (DfsPackFile oldPack : packsBefore) {
PackIndex oldIdx = oldPack.getPackIndex(ctx);
+ PackReverseIndex oldRevIdx = oldPack.getReverseIdx(ctx);
+ long maxOffset = oldPack.getPackDescription().getFileSize(PACK)
+ - 20; // pack size - trailer size.
for (PackIndex.MutableEntry ent : oldIdx) {
pm.update(1);
ObjectId id = ent.toObjectId();
if (pool.lookupOrNull(id) != null || anyPackHas(id))
continue;
- int type = oldPack.getObjectType(ctx, ent.getOffset());
+ long offset = ent.getOffset();
+ int type = oldPack.getObjectType(ctx, offset);
pw.addObject(pool.lookupAny(id, type));
+ long objSize = oldRevIdx.findNextOffset(offset, maxOffset)
+ - offset;
+ estimatedPackSize += objSize;
}
}
pm.endTask();
if (0 < pw.getObjectCount())
- writePack(UNREACHABLE_GARBAGE, pw, pm);
+ writePack(UNREACHABLE_GARBAGE, pw, pm, estimatedPackSize);
}
}
@@ -461,6 +536,10 @@ public class DfsGarbageCollector {
return ref.getName().startsWith(Constants.R_HEADS);
}
+ private static boolean isTag(Ref ref) {
+ return ref.getName().startsWith(Constants.R_TAGS);
+ }
+
private int objectsBefore() {
int cnt = 0;
for (DfsPackFile p : packsBefore)
@@ -475,9 +554,24 @@ public class DfsGarbageCollector {
return pw;
}
+ private long estimateGcPackSize(PackSource first, PackSource... rest) {
+ EnumSet<PackSource> sourceSet = EnumSet.of(first, rest);
+ // Every pack file contains 12 bytes of header and 20 bytes of trailer.
+ // Include the final pack file header and trailer size here and ignore
+ // the same from individual pack files.
+ long size = 32;
+ for (DfsPackDescription pack : getSourcePacks()) {
+ if (sourceSet.contains(pack.getPackSource())) {
+ size += pack.getFileSize(PACK) - 32;
+ }
+ }
+ return size;
+ }
+
private DfsPackDescription writePack(PackSource source, PackWriter pw,
- ProgressMonitor pm) throws IOException {
- DfsPackDescription pack = repo.getObjectDatabase().newPack(source);
+ ProgressMonitor pm, long estimatedPackSize) throws IOException {
+ DfsPackDescription pack = repo.getObjectDatabase().newPack(source,
+ estimatedPackSize);
newPackDesc.add(pack);
try (DfsOutputStream out = objdb.writeFile(pack, PACK)) {

Back to the top