1 files changed, 561 insertions, 0 deletions
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/dfs/DfsBlockCache.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/dfs/DfsBlockCache.java
new file mode 100644
index 0000000000..6379b70a9a
--- /dev/null
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/dfs/DfsBlockCache.java
@@ -0,0 +1,561 @@
+/*
+ * Copyright (C) 2008-2011, Google Inc.
+ * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org>
+ * and other copyright owners as documented in the project's IP log.
+ *
+ * This program and the accompanying materials are made available
+ * under the terms of the Eclipse Distribution License v1.0 which
+ * accompanies this distribution, is reproduced below, and is
+ * available at http://www.eclipse.org/org/documents/edl-v10.php
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer in the documentation and/or other materials provided
+ *   with the distribution.
+ *
+ * - Neither the name of the Eclipse Foundation, Inc. nor the
+ *   names of its contributors may be used to endorse or promote
+ *   products derived from this software without specific prior
+ *   written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package org.eclipse.jgit.storage.dfs;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.concurrent.atomic.AtomicReferenceArray;
+import java.util.concurrent.locks.ReentrantLock;
+
+import org.eclipse.jgit.JGitText;
+
+/**
+ * Caches slices of a {@link DfsPackFile} in memory for faster read access.
+ * <p>
+ * The DfsBlockCache serves as a Java based "buffer cache", loading segments of
+ * a DfsPackFile into the JVM heap prior to use. As JGit often wants to do reads
+ * of only tiny slices of a file, the DfsBlockCache tries to smooth out these
+ * tiny reads into larger block-sized IO operations.
+ * <p>
+ * Whenever a cache miss occurs, loading is invoked by exactly one thread for
+ * the given <code>(DfsPackKey,position)</code> key tuple. This is ensured by an
+ * array of locks, with the tuple hashed to a lock instance.
+ * <p>
+ * Its too expensive during object access to be accurate with a least recently
+ * used (LRU) algorithm. Strictly ordering every read is a lot of overhead that
+ * typically doesn't yield a corresponding benefit to the application. This
+ * cache implements a clock replacement algorithm, giving each block one chance
+ * to have been accessed during a sweep of the cache to save itself from
+ * eviction.
+ * <p>
+ * Entities created by the cache are held under hard references, preventing the
+ * Java VM from clearing anything. Blocks are discarded by the replacement
+ * algorithm when adding a new block would cause the cache to exceed its
+ * configured maximum size.
+ * <p>
+ * The key tuple is passed through to methods as a pair of parameters rather
+ * than as a single Object, thus reducing the transient memory allocations of
+ * callers. It is more efficient to avoid the allocation, as we can't be 100%
+ * sure that a JIT would be able to stack-allocate a key tuple.
+ * <p>
+ * The internal hash table does not expand at runtime, instead it is fixed in
+ * size at cache creation time. The internal lock table used to gate load
+ * invocations is also fixed in size.
+ */
+public final class DfsBlockCache {
+	private static volatile DfsBlockCache cache;
+
+	static {
+		reconfigure(new DfsBlockCacheConfig());
+	}
+
+	/**
+	 * Modify the configuration of the window cache.
+	 * <p>
+	 * The new configuration is applied immediately. If the new limits are
+	 * smaller than what what is currently cached, older entries will be purged
+	 * as soon as possible to allow the cache to meet the new limit.
+	 *
+	 * @param cfg
+	 *            the new window cache configuration.
+	 * @throws IllegalArgumentException
+	 *             the cache configuration contains one or more invalid
+	 *             settings, usually too low of a limit.
+	 */
+	public static void reconfigure(DfsBlockCacheConfig cfg) {
+		DfsBlockCache nc = new DfsBlockCache(cfg);
+		DfsBlockCache oc = cache;
+		cache = nc;
+
+		if (oc != null && oc.readAheadService != null)
+			oc.readAheadService.shutdown();
+	}
+
+	/** @return the currently active DfsBlockCache. */
+	public static DfsBlockCache getInstance() {
+		return cache;
+	}
+
+	/** Number of entries in {@link #table}. */
+	private final int tableSize;
+
+	/** Hash bucket directory; entries are chained below. */
+	private final AtomicReferenceArray<HashEntry> table;
+
+	/** Locks to prevent concurrent loads for same (PackFile,position). */
+	private final ReentrantLock[] loadLocks;
+
+	/** Maximum number of bytes the cache should hold. */
+	private final long maxBytes;
+
+	/**
+	 * Suggested block size to read from pack files in.
+	 * <p>
+	 * If a pack file does not have a native block size, this size will be used.
+	 * <p>
+	 * If a pack file has a native size, a whole multiple of the native size
+	 * will be used until it matches this size.
+	 */
+	private final int blockSize;
+
+	/** As {@link #blockSize} is a power of 2, bits to shift for a / blockSize. */
+	private final int blockSizeShift;
+
+	/** Number of bytes to read-ahead from current read position. */
+	private final int readAheadLimit;
+
+	/** Thread pool to handle optimistic read-ahead. */
+	private final ThreadPoolExecutor readAheadService;
+
+	/** Cache of pack files, indexed by description. */
+	private final Map<DfsPackDescription, DfsPackFile> packCache;
+
+	/** Number of times a block was found in the cache. */
+	private final AtomicLong statHit;
+
+	/** Number of times a block was not found, and had to be loaded. */
+	private final AtomicLong statMiss;
+
+	/** Number of blocks evicted due to cache being full. */
+	private volatile long statEvict;
+
+	/** Protects the clock and its related data. */
+	private final ReentrantLock clockLock;
+
+	/** Current position of the clock. */
+	private Ref clockHand;
+
+	/** Number of bytes currently loaded in the cache. */
+	private volatile long liveBytes;
+
+	private DfsBlockCache(final DfsBlockCacheConfig cfg) {
+		tableSize = tableSize(cfg);
+		if (tableSize < 1)
+			throw new IllegalArgumentException(JGitText.get().tSizeMustBeGreaterOrEqual1);
+
+		table = new AtomicReferenceArray<HashEntry>(tableSize);
+		loadLocks = new ReentrantLock[32];
+		for (int i = 0; i < loadLocks.length; i++)
+			loadLocks[i] = new ReentrantLock(true /* fair */);
+
+		int eb = (int) (tableSize * .1);
+		if (64 < eb)
+			eb = 64;
+		else if (eb < 4)
+			eb = 4;
+		if (tableSize < eb)
+			eb = tableSize;
+
+		maxBytes = cfg.getBlockLimit();
+		blockSize = cfg.getBlockSize();
+		blockSizeShift = Integer.numberOfTrailingZeros(blockSize);
+
+		clockLock = new ReentrantLock(true /* fair */);
+		clockHand = new Ref<Object>(null, -1, 0, null);
+		clockHand.next = clockHand;
+
+		readAheadLimit = cfg.getReadAheadLimit();
+		readAheadService = cfg.getReadAheadService();
+
+		packCache = new HashMap<DfsPackDescription, DfsPackFile>();
+
+		statHit = new AtomicLong();
+		statMiss = new AtomicLong();
+	}
+
+	/** @return total number of bytes in the cache. */
+	public long getCurrentSize() {
+		return liveBytes;
+	}
+
+	/** @return 0..100, defining how full the cache is. */
+	public long getFillPercentage() {
+		return getCurrentSize() * 100 / maxBytes;
+	}
+
+	/** @return 0..100, defining number of cache hits. */
+	public long getHitRatio() {
+		long hits = statHit.get();
+		long miss = statMiss.get();
+		long total = hits + miss;
+		if (total == 0)
+			return 0;
+		return hits * 100 / total;
+	}
+
+	/** @return number of evictions performed due to cache being full. */
+	public long getEvictions() {
+		return statEvict;
+	}
+
+	DfsPackFile getOrCreate(DfsPackDescription dsc, DfsPackKey key) {
+		// TODO This table grows without bound. It needs to clean up
+		// entries that aren't in cache anymore, and aren't being used
+		// by a live DfsObjDatabase reference.
+		synchronized (packCache) {
+			DfsPackFile pack = packCache.get(dsc);
+			if (pack != null && pack.invalid()) {
+				packCache.remove(dsc);
+				pack = null;
+			}
+			if (pack == null) {
+				if (key == null)
+					key = new DfsPackKey();
+				pack = new DfsPackFile(this, dsc, key);
+				packCache.put(dsc, pack);
+			}
+			return pack;
+		}
+	}
+
+	private int hash(int packHash, long off) {
+		return packHash + (int) (off >>> blockSizeShift);
+	}
+
+	int getBlockSize() {
+		return blockSize;
+	}
+
+	private static int tableSize(final DfsBlockCacheConfig cfg) {
+		final int wsz = cfg.getBlockSize();
+		final long limit = cfg.getBlockLimit();
+		if (wsz <= 0)
+			throw new IllegalArgumentException(JGitText.get().invalidWindowSize);
+		if (limit < wsz)
+			throw new IllegalArgumentException(JGitText.get().windowSizeMustBeLesserThanLimit);
+		return (int) Math.min(5 * (limit / wsz) / 2, Integer.MAX_VALUE);
+	}
+
+	/**
+	 * Lookup a cached object, creating and loading it if it doesn't exist.
+	 *
+	 * @param pack
+	 *            the pack that "contains" the cached object.
+	 * @param position
+	 *            offset within <code>pack</code> of the object.
+	 * @param ctx
+	 *            current thread's reader.
+	 * @return the object reference.
+	 * @throws IOException
+	 *             the reference was not in the cache and could not be loaded.
+	 */
+	DfsBlock getOrLoad(DfsPackFile pack, long position, DfsReader ctx)
+			throws IOException {
+		final long requestedPosition = position;
+		position = pack.alignToBlock(position);
+
+		DfsPackKey key = pack.key;
+		int slot = slot(key, position);
+		HashEntry e1 = table.get(slot);
+		DfsBlock v = scan(e1, key, position);
+		if (v != null)
+			return v;
+
+		reserveSpace(blockSize);
+		ReentrantLock regionLock = lockFor(key, position);
+		regionLock.lock();
+		try {
+			HashEntry e2 = table.get(slot);
+			if (e2 != e1) {
+				v = scan(e2, key, position);
+				if (v != null) {
+					creditSpace(blockSize);
+					return v;
+				}
+			}
+
+			statMiss.incrementAndGet();
+			boolean credit = true;
+			try {
+				v = pack.readOneBlock(position, ctx);
+				credit = false;
+			} finally {
+				if (credit)
+					creditSpace(blockSize);
+			}
+			if (position != v.start) {
+				// The file discovered its blockSize and adjusted.
+				position = v.start;
+				slot = slot(key, position);
+				e2 = table.get(slot);
+			}
+
+			Ref<DfsBlock> ref = new Ref<DfsBlock>(key, position, v.size(), v);
+			ref.hot = true;
+			for (;;) {
+				HashEntry n = new HashEntry(clean(e2), ref);
+				if (table.compareAndSet(slot, e2, n))
+					break;
+				e2 = table.get(slot);
+			}
+			addToClock(ref, blockSize - v.size());
+		} finally {
+			regionLock.unlock();
+		}
+
+		// If the block size changed from the default, it is possible the block
+		// that was loaded is the wrong block for the requested position.
+		if (v.contains(pack.key, requestedPosition))
+			return v;
+		return getOrLoad(pack, requestedPosition, ctx);
+	}
+
+	@SuppressWarnings("unchecked")
+	private void reserveSpace(int reserve) {
+		clockLock.lock();
+		long live = liveBytes + reserve;
+		if (maxBytes < live) {
+			Ref prev = clockHand;
+			Ref hand = clockHand.next;
+			do {
+				if (hand.hot) {
+					// Value was recently touched. Clear
+					// hot and give it another chance.
+					hand.hot = false;
+					prev = hand;
+					hand = hand.next;
+					continue;
+				} else if (prev == hand)
+					break;
+
+				// No recent access since last scan, kill
+				// value and remove from clock.
+				Ref dead = hand;
+				hand = hand.next;
+				prev.next = hand;
+				dead.next = null;
+				dead.value = null;
+				live -= dead.size;
+				statEvict++;
+			} while (maxBytes < live);
+			clockHand = prev;
+		}
+		liveBytes = live;
+		clockLock.unlock();
+	}
+
+	private void creditSpace(int credit) {
+		clockLock.lock();
+		liveBytes -= credit;
+		clockLock.unlock();
+	}
+
+	private void addToClock(Ref ref, int credit) {
+		clockLock.lock();
+		if (credit != 0)
+			liveBytes -= credit;
+		Ref ptr = clockHand;
+		ref.next = ptr.next;
+		ptr.next = ref;
+		clockHand = ref;
+		clockLock.unlock();
+	}
+
+	void put(DfsBlock v) {
+		put(v.pack, v.start, v.size(), v);
+	}
+
+	<T> Ref<T> put(DfsPackKey key, long pos, int size, T v) {
+		int slot = slot(key, pos);
+		HashEntry e1 = table.get(slot);
+		Ref<T> ref = scanRef(e1, key, pos);
+		if (ref != null)
+			return ref;
+
+		reserveSpace(size);
+		ReentrantLock regionLock = lockFor(key, pos);
+		regionLock.lock();
+		try {
+			HashEntry e2 = table.get(slot);
+			if (e2 != e1) {
+				ref = scanRef(e2, key, pos);
+				if (ref != null) {
+					creditSpace(size);
+					return ref;
+				}
+			}
+
+			ref = new Ref<T>(key, pos, size, v);
+			ref.hot = true;
+			for (;;) {
+				HashEntry n = new HashEntry(clean(e2), ref);
+				if (table.compareAndSet(slot, e2, n))
+					break;
+				e2 = table.get(slot);
+			}
+			addToClock(ref, 0);
+		} finally {
+			regionLock.unlock();
+		}
+		return ref;
+	}
+
+	boolean contains(DfsPackKey key, long position) {
+		return scan(table.get(slot(key, position)), key, position) != null;
+	}
+
+	@SuppressWarnings("unchecked")
+	<T> T get(DfsPackKey key, long position) {
+		T val = (T) scan(table.get(slot(key, position)), key, position);
+		if (val == null)
+			statMiss.incrementAndGet();
+		return val;
+	}
+
+	boolean readAhead(ReadableChannel rc, DfsPackKey key, int size, long pos,
+			long len, DfsReader ctx) {
+		if (!ctx.wantReadAhead() || readAheadLimit <= 0 || readAheadService == null)
+			return false;
+
+		int cap = readAheadLimit / size;
+		long readAheadEnd = pos + readAheadLimit;
+		List<ReadAheadTask.BlockFuture> blocks = new ArrayList<ReadAheadTask.BlockFuture>(cap);
+		while (pos < readAheadEnd && pos < len) {
+			long end = Math.min(pos + size, len);
+			if (!contains(key, pos))
+				blocks.add(new ReadAheadTask.BlockFuture(key, pos, end));
+			pos = end;
+		}
+		if (blocks.isEmpty())
+			return false;
+
+		ReadAheadTask task = new ReadAheadTask(this, rc, blocks);
+		ReadAheadTask.TaskFuture t = new ReadAheadTask.TaskFuture(task);
+		for (ReadAheadTask.BlockFuture b : blocks)
+			b.setTask(t);
+		readAheadService.execute(t);
+		ctx.startedReadAhead(blocks);
+		return true;
+	}
+
+	@SuppressWarnings("unchecked")
+	private <T> T scan(HashEntry n, DfsPackKey pack, long position) {
+		for (; n != null; n = n.next) {
+			Ref<T> r = n.ref;
+			if (r.pack != pack || r.position != position)
+				continue;
+			T v = r.get();
+			if (v == null)
+				return null;
+			statHit.incrementAndGet();
+			return v;
+		}
+		return null;
+	}
+
+	@SuppressWarnings("unchecked")
+	private <T> Ref<T> scanRef(HashEntry n, DfsPackKey pack, long position) {
+		for (; n != null; n = n.next) {
+			Ref<T> r = n.ref;
+			if (r.pack == pack && r.position == position)
+				return r.get() != null ? r : null;
+		}
+		return null;
+	}
+
+	void remove(DfsPackFile pack) {
+		synchronized (packCache) {
+			packCache.remove(pack.getPackDescription());
+		}
+	}
+
+	private int slot(DfsPackKey pack, long position) {
+		return (hash(pack.hash, position) >>> 1) % tableSize;
+	}
+
+	private ReentrantLock lockFor(DfsPackKey pack, long position) {
+		return loadLocks[(hash(pack.hash, position) >>> 1) % loadLocks.length];
+	}
+
+	private static HashEntry clean(HashEntry top) {
+		while (top != null && top.ref.next == null)
+			top = top.next;
+		if (top == null)
+			return null;
+		HashEntry n = clean(top.next);
+		return n == top.next ? top : new HashEntry(n, top.ref);
+	}
+
+	private static final class HashEntry {
+		/** Next entry in the hash table's chain list. */
+		final HashEntry next;
+
+		/** The referenced object. */
+		final Ref ref;
+
+		HashEntry(HashEntry n, Ref r) {
+			next = n;
+			ref = r;
+		}
+	}
+
+	static final class Ref<T> {
+		final DfsPackKey pack;
+		final long position;
+		final int size;
+		volatile T value;
+		Ref next;
+		volatile boolean hot;
+
+		Ref(DfsPackKey pack, long position, int size, T v) {
+			this.pack = pack;
+			this.position = position;
+			this.size = size;
+			this.value = v;
+		}
+
+		T get() {
+			T v = value;
+			if (v != null)
+				hot = true;
+			return v;
+		}
+	}
+}