Skip to main content
aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJoshua Redstone2020-01-31 19:14:42 +0000
committerMatthias Sohn2020-02-02 13:44:22 +0000
commit2a134e1541ff49f640061db2c3ea89ab3305c7a6 (patch)
tree92fb22adc4193d545ae714635ca9c06cb18fad06
parenteaff0a2eba33c6191ef0e156c7e7e6899efe281c (diff)
downloadjgit-2a134e1541ff49f640061db2c3ea89ab3305c7a6.tar.gz
jgit-2a134e1541ff49f640061db2c3ea89ab3305c7a6.tar.xz
jgit-2a134e1541ff49f640061db2c3ea89ab3305c7a6.zip
AmazonS3: Speed up fetch, try recent packs first
When fetching remote objects, WalkFetchConnection searches remote packs in the order provided by WalkRemoteObjectDatabase:getPackNames. Previously, for TransportAmazonS3, the packs were in no particular order. This resulted in potential many extra calls to get pack idx files. This change modifies TransportAmazonS3 and AmazonS3 so that getPackNames returns a list sorted with the most recently modified packs first. In the case of fetching recent changes to a repo, this dramatically reduces the number of packs searched and speeds up fetch. Note: WalkRemoteObjectDatabase::getPackNames does not specify the order of the returned names. Testing: did "mvn clean install" in root dir and all tests passed. And manually constructed some S3 repos and using jgit.sh confirmed that the freshest pack was checked first. Change-Id: I3b968fee825e793be55566e28c2d69d0cbe53807 Signed-off-by: Joshua Redstone <redstone@gmail.com> Signed-off-by: Matthias Sohn <matthias.sohn@sap.com>
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/transport/AmazonS3.java55
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/transport/TransportAmazonS3.java8
2 files changed, 54 insertions, 9 deletions
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/transport/AmazonS3.java b/org.eclipse.jgit/src/org/eclipse/jgit/transport/AmazonS3.java
index 1e7f3b1f00..9210ec1721 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/transport/AmazonS3.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/transport/AmazonS3.java
@@ -33,6 +33,7 @@ import java.text.MessageFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collections;
+import java.util.Comparator;
import java.util.Date;
import java.util.HashSet;
import java.util.Iterator;
@@ -44,6 +45,8 @@ import java.util.Set;
import java.util.SortedMap;
import java.util.TimeZone;
import java.util.TreeMap;
+import java.util.stream.Collectors;
+import java.time.Instant;
import javax.crypto.Mac;
import javax.crypto.spec.SecretKeySpec;
@@ -288,6 +291,8 @@ public class AmazonS3 {
* <p>
* This method is primarily meant for obtaining a "recursive directory
* listing" rooted under the specified bucket and prefix location.
+ * It returns the keys sorted in reverse order of LastModified time
+ * (freshest keys first).
*
* @param bucket
* name of the bucket whose objects should be listed.
@@ -311,7 +316,10 @@ public class AmazonS3 {
do {
lp.list();
} while (lp.truncated);
- return lp.entries;
+
+ Comparator<KeyInfo> comparator = Comparator.comparingLong(KeyInfo::getLastModifiedSecs);
+ return lp.entries.stream().sorted(comparator.reversed())
+ .map(KeyInfo::getName).collect(Collectors.toList());
}
/**
@@ -620,8 +628,26 @@ public class AmazonS3 {
return p;
}
+ /**
+ * KeyInfo enables sorting of keys by lastModified time
+ */
+ private static final class KeyInfo {
+ private final String name;
+ private final long lastModifiedSecs;
+ public KeyInfo(String aname, long lsecs) {
+ name = aname;
+ lastModifiedSecs = lsecs;
+ }
+ public String getName() {
+ return name;
+ }
+ public long getLastModifiedSecs() {
+ return lastModifiedSecs;
+ }
+ }
+
private final class ListParser extends DefaultHandler {
- final List<String> entries = new ArrayList<>();
+ final List<KeyInfo> entries = new ArrayList<>();
private final String bucket;
@@ -630,6 +656,8 @@ public class AmazonS3 {
boolean truncated;
private StringBuilder data;
+ private String keyName;
+ private Instant keyLastModified;
ListParser(String bn, String p) {
bucket = bn;
@@ -641,7 +669,7 @@ public class AmazonS3 {
if (prefix.length() > 0)
args.put("prefix", prefix); //$NON-NLS-1$
if (!entries.isEmpty())
- args.put("marker", prefix + entries.get(entries.size() - 1)); //$NON-NLS-1$
+ args.put("marker", prefix + entries.get(entries.size() - 1).getName()); //$NON-NLS-1$
for (int curAttempt = 0; curAttempt < maxAttempts; curAttempt++) {
final HttpURLConnection c = open("GET", bucket, "", args); //$NON-NLS-1$ //$NON-NLS-2$
@@ -650,6 +678,8 @@ public class AmazonS3 {
case HttpURLConnection.HTTP_OK:
truncated = false;
data = null;
+ keyName = null;
+ keyLastModified = null;
final XMLReader xr;
try {
@@ -683,8 +713,13 @@ public class AmazonS3 {
public void startElement(final String uri, final String name,
final String qName, final Attributes attributes)
throws SAXException {
- if ("Key".equals(name) || "IsTruncated".equals(name)) //$NON-NLS-1$ //$NON-NLS-2$
+ if ("Key".equals(name) || "IsTruncated".equals(name) || "LastModified".equals(name)) { //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
data = new StringBuilder();
+ }
+ if ("Contents".equals(name)) { //$NON-NLS-1$
+ keyName = null;
+ keyLastModified = null;
+ }
}
@Override
@@ -704,10 +739,16 @@ public class AmazonS3 {
@Override
public void endElement(final String uri, final String name,
final String qName) throws SAXException {
- if ("Key".equals(name)) //$NON-NLS-1$
- entries.add(data.toString().substring(prefix.length()));
- else if ("IsTruncated".equals(name)) //$NON-NLS-1$
+ if ("Key".equals(name)) { //$NON-NLS-1$
+ keyName = data.toString().substring(prefix.length());
+ } else if ("IsTruncated".equals(name)) { //$NON-NLS-1$
truncated = StringUtils.equalsIgnoreCase("true", data.toString()); //$NON-NLS-1$
+ } else if ("LastModified".equals(name)) { //$NON-NLS-1$
+ keyLastModified = Instant.parse(data.toString());
+ } else if ("Contents".equals(name)) { //$NON-NLS-1$
+ entries.add(new KeyInfo(keyName, keyLastModified.getEpochSecond()));
+ }
+
data = null;
}
}
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/transport/TransportAmazonS3.java b/org.eclipse.jgit/src/org/eclipse/jgit/transport/TransportAmazonS3.java
index 94f36d2851..784f566159 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/transport/TransportAmazonS3.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/transport/TransportAmazonS3.java
@@ -23,6 +23,7 @@ import java.util.Collection;
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashSet;
+import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
@@ -241,11 +242,14 @@ public class TransportAmazonS3 extends HttpTransport implements WalkTransport {
@Override
Collection<String> getPackNames() throws IOException {
+ // s3.list returns most recently modified packs first.
+ // These are the packs most likely to contain missing refs.
+ final List<String> packList = s3.list(bucket, resolveKey("pack")); //$NON-NLS-1$
final HashSet<String> have = new HashSet<>();
- have.addAll(s3.list(bucket, resolveKey("pack"))); //$NON-NLS-1$
+ have.addAll(packList);
final Collection<String> packs = new ArrayList<>();
- for (String n : have) {
+ for (String n : packList) {
if (!n.startsWith("pack-") || !n.endsWith(".pack")) //$NON-NLS-1$ //$NON-NLS-2$
continue;

Back to the top