Skip to main content
aboutsummaryrefslogtreecommitdiffstats
blob: b2544707be776e43d15f2fad3e0746a793116141 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
/*
 * Copyright (C) 2008-2009, Google Inc.
 * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org> and others
 *
 * This program and the accompanying materials are made available under the
 * terms of the Eclipse Distribution License v. 1.0 which is available at
 * https://www.eclipse.org/org/documents/edl-v10.php.
 *
 * SPDX-License-Identifier: BSD-3-Clause
 */

package org.eclipse.jgit.internal.storage.file;

import java.io.IOException;
import java.lang.ref.ReferenceQueue;
import java.lang.ref.SoftReference;
import java.util.Random;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReferenceArray;
import java.util.concurrent.locks.ReentrantLock;

import org.eclipse.jgit.internal.JGitText;
import org.eclipse.jgit.storage.file.WindowCacheConfig;

/**
 * Caches slices of a {@link org.eclipse.jgit.internal.storage.file.PackFile} in
 * memory for faster read access.
 * <p>
 * The WindowCache serves as a Java based "buffer cache", loading segments of a
 * PackFile into the JVM heap prior to use. As JGit often wants to do reads of
 * only tiny slices of a file, the WindowCache tries to smooth out these tiny
 * reads into larger block-sized IO operations.
 * <p>
 * Whenever a cache miss occurs, {@link #load(PackFile, long)} is invoked by
 * exactly one thread for the given <code>(PackFile,position)</code> key tuple.
 * This is ensured by an array of locks, with the tuple hashed to a lock
 * instance.
 * <p>
 * During a miss, older entries are evicted from the cache so long as
 * {@link #isFull()} returns true.
 * <p>
 * Its too expensive during object access to be 100% accurate with a least
 * recently used (LRU) algorithm. Strictly ordering every read is a lot of
 * overhead that typically doesn't yield a corresponding benefit to the
 * application.
 * <p>
 * This cache implements a loose LRU policy by randomly picking a window
 * comprised of roughly 10% of the cache, and evicting the oldest accessed entry
 * within that window.
 * <p>
 * Entities created by the cache are held under SoftReferences, permitting the
 * Java runtime's garbage collector to evict entries when heap memory gets low.
 * Most JREs implement a loose least recently used algorithm for this eviction.
 * <p>
 * The internal hash table does not expand at runtime, instead it is fixed in
 * size at cache creation time. The internal lock table used to gate load
 * invocations is also fixed in size.
 * <p>
 * The key tuple is passed through to methods as a pair of parameters rather
 * than as a single Object, thus reducing the transient memory allocations of
 * callers. It is more efficient to avoid the allocation, as we can't be 100%
 * sure that a JIT would be able to stack-allocate a key tuple.
 * <p>
 * This cache has an implementation rule such that:
 * <ul>
 * <li>{@link #load(PackFile, long)} is invoked by at most one thread at a time
 * for a given <code>(PackFile,position)</code> tuple.</li>
 * <li>For every <code>load()</code> invocation there is exactly one
 * {@link #createRef(PackFile, long, ByteWindow)} invocation to wrap a
 * SoftReference around the cached entity.</li>
 * <li>For every Reference created by <code>createRef()</code> there will be
 * exactly one call to {@link #clear(Ref)} to cleanup any resources associated
 * with the (now expired) cached entity.</li>
 * </ul>
 * <p>
 * Therefore, it is safe to perform resource accounting increments during the
 * {@link #load(PackFile, long)} or
 * {@link #createRef(PackFile, long, ByteWindow)} methods, and matching
 * decrements during {@link #clear(Ref)}. Implementors may need to override
 * {@link #createRef(PackFile, long, ByteWindow)} in order to embed additional
 * accounting information into an implementation specific
 * {@link org.eclipse.jgit.internal.storage.file.WindowCache.Ref} subclass, as
 * the cached entity may have already been evicted by the JRE's garbage
 * collector.
 * <p>
 * To maintain higher concurrency workloads, during eviction only one thread
 * performs the eviction work, while other threads can continue to insert new
 * objects in parallel. This means that the cache can be temporarily over limit,
 * especially if the nominated eviction thread is being starved relative to the
 * other threads.
 */
public class WindowCache {
	private static final int bits(int newSize) {
		if (newSize < 4096)
			throw new IllegalArgumentException(JGitText.get().invalidWindowSize);
		if (Integer.bitCount(newSize) != 1)
			throw new IllegalArgumentException(JGitText.get().windowSizeMustBePowerOf2);
		return Integer.numberOfTrailingZeros(newSize);
	}

	private static final Random rng = new Random();

	private static volatile WindowCache cache;

	private static volatile int streamFileThreshold;

	static {
		reconfigure(new WindowCacheConfig());
	}

	/**
	 * Modify the configuration of the window cache.
	 * <p>
	 * The new configuration is applied immediately. If the new limits are
	 * smaller than what is currently cached, older entries will be purged
	 * as soon as possible to allow the cache to meet the new limit.
	 *
	 * @deprecated use {@code cfg.install()} to avoid internal reference.
	 * @param cfg
	 *            the new window cache configuration.
	 * @throws java.lang.IllegalArgumentException
	 *             the cache configuration contains one or more invalid
	 *             settings, usually too low of a limit.
	 */
	@Deprecated
	public static void reconfigure(WindowCacheConfig cfg) {
		final WindowCache nc = new WindowCache(cfg);
		final WindowCache oc = cache;
		if (oc != null)
			oc.removeAll();
		cache = nc;
		streamFileThreshold = cfg.getStreamFileThreshold();
		DeltaBaseCache.reconfigure(cfg);
	}

	static int getStreamFileThreshold() {
		return streamFileThreshold;
	}

	/**
	 * @return the cached instance.
	 */
	public static WindowCache getInstance() {
		return cache;
	}

	static final ByteWindow get(PackFile pack, long offset)
			throws IOException {
		final WindowCache c = cache;
		final ByteWindow r = c.getOrLoad(pack, c.toStart(offset));
		if (c != cache) {
			// The cache was reconfigured while we were using the old one
			// to load this window. The window is still valid, but our
			// cache may think its still live. Ensure the window is removed
			// from the old cache so resources can be released.
			//
			c.removeAll();
		}
		return r;
	}

	static final void purge(PackFile pack) {
		cache.removeAll(pack);
	}

	/** ReferenceQueue to cleanup released and garbage collected windows. */
	private final ReferenceQueue<ByteWindow> queue;

	/** Number of entries in {@link #table}. */
	private final int tableSize;

	/** Access clock for loose LRU. */
	private final AtomicLong clock;

	/** Hash bucket directory; entries are chained below. */
	private final AtomicReferenceArray<Entry> table;

	/** Locks to prevent concurrent loads for same (PackFile,position). */
	private final Lock[] locks;

	/** Lock to elect the eviction thread after a load occurs. */
	private final ReentrantLock evictLock;

	/** Number of {@link #table} buckets to scan for an eviction window. */
	private final int evictBatch;

	private final int maxFiles;

	private final long maxBytes;

	private final boolean mmap;

	private final int windowSizeShift;

	private final int windowSize;

	private final AtomicInteger openFiles;

	private final AtomicLong openBytes;

	private WindowCache(WindowCacheConfig cfg) {
		tableSize = tableSize(cfg);
		final int lockCount = lockCount(cfg);
		if (tableSize < 1)
			throw new IllegalArgumentException(JGitText.get().tSizeMustBeGreaterOrEqual1);
		if (lockCount < 1)
			throw new IllegalArgumentException(JGitText.get().lockCountMustBeGreaterOrEqual1);

		queue = new ReferenceQueue<>();
		clock = new AtomicLong(1);
		table = new AtomicReferenceArray<>(tableSize);
		locks = new Lock[lockCount];
		for (int i = 0; i < locks.length; i++)
			locks[i] = new Lock();
		evictLock = new ReentrantLock();

		int eb = (int) (tableSize * .1);
		if (64 < eb)
			eb = 64;
		else if (eb < 4)
			eb = 4;
		if (tableSize < eb)
			eb = tableSize;
		evictBatch = eb;

		maxFiles = cfg.getPackedGitOpenFiles();
		maxBytes = cfg.getPackedGitLimit();
		mmap = cfg.isPackedGitMMAP();
		windowSizeShift = bits(cfg.getPackedGitWindowSize());
		windowSize = 1 << windowSizeShift;

		openFiles = new AtomicInteger();
		openBytes = new AtomicLong();

		if (maxFiles < 1)
			throw new IllegalArgumentException(JGitText.get().openFilesMustBeAtLeast1);
		if (maxBytes < windowSize)
			throw new IllegalArgumentException(JGitText.get().windowSizeMustBeLesserThanLimit);
	}

	/**
	 * @return the number of open files.
	 */
	public int getOpenFiles() {
		return openFiles.get();
	}

	/**
	 * @return the number of open bytes.
	 */
	public long getOpenBytes() {
		return openBytes.get();
	}

	private int hash(int packHash, long off) {
		return packHash + (int) (off >>> windowSizeShift);
	}

	private ByteWindow load(PackFile pack, long offset)
			throws IOException {
		if (pack.beginWindowCache())
			openFiles.incrementAndGet();
		try {
			if (mmap)
				return pack.mmap(offset, windowSize);
			return pack.read(offset, windowSize);
		} catch (IOException | RuntimeException | Error e) {
			close(pack);
			throw e;
		}
	}

	private Ref createRef(PackFile p, long o, ByteWindow v) {
		final Ref ref = new Ref(p, o, v, queue);
		openBytes.addAndGet(ref.size);
		return ref;
	}

	private void clear(Ref ref) {
		openBytes.addAndGet(-ref.size);
		close(ref.pack);
	}

	private void close(PackFile pack) {
		if (pack.endWindowCache())
			openFiles.decrementAndGet();
	}

	private boolean isFull() {
		return maxFiles < openFiles.get() || maxBytes < openBytes.get();
	}

	private long toStart(long offset) {
		return (offset >>> windowSizeShift) << windowSizeShift;
	}

	private static int tableSize(WindowCacheConfig cfg) {
		final int wsz = cfg.getPackedGitWindowSize();
		final long limit = cfg.getPackedGitLimit();
		if (wsz <= 0)
			throw new IllegalArgumentException(JGitText.get().invalidWindowSize);
		if (limit < wsz)
			throw new IllegalArgumentException(JGitText.get().windowSizeMustBeLesserThanLimit);
		return (int) Math.min(5 * (limit / wsz) / 2, 2000000000);
	}

	private static int lockCount(WindowCacheConfig cfg) {
		return Math.max(cfg.getPackedGitOpenFiles(), 32);
	}

	/**
	 * Lookup a cached object, creating and loading it if it doesn't exist.
	 *
	 * @param pack
	 *            the pack that "contains" the cached object.
	 * @param position
	 *            offset within <code>pack</code> of the object.
	 * @return the object reference.
	 * @throws IOException
	 *             the object reference was not in the cache and could not be
	 *             obtained by {@link #load(PackFile, long)}.
	 */
	private ByteWindow getOrLoad(PackFile pack, long position)
			throws IOException {
		final int slot = slot(pack, position);
		final Entry e1 = table.get(slot);
		ByteWindow v = scan(e1, pack, position);
		if (v != null)
			return v;

		synchronized (lock(pack, position)) {
			Entry e2 = table.get(slot);
			if (e2 != e1) {
				v = scan(e2, pack, position);
				if (v != null)
					return v;
			}

			v = load(pack, position);
			final Ref ref = createRef(pack, position, v);
			hit(ref);
			for (;;) {
				final Entry n = new Entry(clean(e2), ref);
				if (table.compareAndSet(slot, e2, n))
					break;
				e2 = table.get(slot);
			}
		}

		if (evictLock.tryLock()) {
			try {
				gc();
				evict();
			} finally {
				evictLock.unlock();
			}
		}

		return v;
	}

	private ByteWindow scan(Entry n, PackFile pack, long position) {
		for (; n != null; n = n.next) {
			final Ref r = n.ref;
			if (r.pack == pack && r.position == position) {
				final ByteWindow v = r.get();
				if (v != null) {
					hit(r);
					return v;
				}
				n.kill();
				break;
			}
		}
		return null;
	}

	private void hit(Ref r) {
		// We don't need to be 100% accurate here. Its sufficient that at least
		// one thread performs the increment. Any other concurrent access at
		// exactly the same time can simply use the same clock value.
		//
		// Consequently we attempt the set, but we don't try to recover should
		// it fail. This is why we don't use getAndIncrement() here.
		//
		final long c = clock.get();
		clock.compareAndSet(c, c + 1);
		r.lastAccess = c;
	}

	private void evict() {
		while (isFull()) {
			int ptr = rng.nextInt(tableSize);
			Entry old = null;
			int slot = 0;
			for (int b = evictBatch - 1; b >= 0; b--, ptr++) {
				if (tableSize <= ptr)
					ptr = 0;
				for (Entry e = table.get(ptr); e != null; e = e.next) {
					if (e.dead)
						continue;
					if (old == null || e.ref.lastAccess < old.ref.lastAccess) {
						old = e;
						slot = ptr;
					}
				}
			}
			if (old != null) {
				old.kill();
				gc();
				final Entry e1 = table.get(slot);
				table.compareAndSet(slot, e1, clean(e1));
			}
		}
	}

	/**
	 * Clear every entry from the cache.
	 * <p>
	 * This is a last-ditch effort to clear out the cache, such as before it
	 * gets replaced by another cache that is configured differently. This
	 * method tries to force every cached entry through {@link #clear(Ref)} to
	 * ensure that resources are correctly accounted for and cleaned up by the
	 * subclass. A concurrent reader loading entries while this method is
	 * running may cause resource accounting failures.
	 */
	private void removeAll() {
		for (int s = 0; s < tableSize; s++) {
			Entry e1;
			do {
				e1 = table.get(s);
				for (Entry e = e1; e != null; e = e.next)
					e.kill();
			} while (!table.compareAndSet(s, e1, null));
		}
		gc();
	}

	/**
	 * Clear all entries related to a single file.
	 * <p>
	 * Typically this method is invoked during {@link PackFile#close()}, when we
	 * know the pack is never going to be useful to us again (for example, it no
	 * longer exists on disk). A concurrent reader loading an entry from this
	 * same pack may cause the pack to become stuck in the cache anyway.
	 *
	 * @param pack
	 *            the file to purge all entries of.
	 */
	private void removeAll(PackFile pack) {
		for (int s = 0; s < tableSize; s++) {
			final Entry e1 = table.get(s);
			boolean hasDead = false;
			for (Entry e = e1; e != null; e = e.next) {
				if (e.ref.pack == pack) {
					e.kill();
					hasDead = true;
				} else if (e.dead)
					hasDead = true;
			}
			if (hasDead)
				table.compareAndSet(s, e1, clean(e1));
		}
		gc();
	}

	private void gc() {
		Ref r;
		while ((r = (Ref) queue.poll()) != null) {
			clear(r);

			final int s = slot(r.pack, r.position);
			final Entry e1 = table.get(s);
			for (Entry n = e1; n != null; n = n.next) {
				if (n.ref == r) {
					n.dead = true;
					table.compareAndSet(s, e1, clean(e1));
					break;
				}
			}
		}
	}

	private int slot(PackFile pack, long position) {
		return (hash(pack.hash, position) >>> 1) % tableSize;
	}

	private Lock lock(PackFile pack, long position) {
		return locks[(hash(pack.hash, position) >>> 1) % locks.length];
	}

	private static Entry clean(Entry top) {
		while (top != null && top.dead) {
			top.ref.enqueue();
			top = top.next;
		}
		if (top == null)
			return null;
		final Entry n = clean(top.next);
		return n == top.next ? top : new Entry(n, top.ref);
	}

	private static class Entry {
		/** Next entry in the hash table's chain list. */
		final Entry next;

		/** The referenced object. */
		final Ref ref;

		/**
		 * Marked true when ref.get() returns null and the ref is dead.
		 * <p>
		 * A true here indicates that the ref is no longer accessible, and that
		 * we therefore need to eventually purge this Entry object out of the
		 * bucket's chain.
		 */
		volatile boolean dead;

		Entry(Entry n, Ref r) {
			next = n;
			ref = r;
		}

		final void kill() {
			dead = true;
			ref.enqueue();
		}
	}

	/** A soft reference wrapped around a cached object. */
	private static class Ref extends SoftReference<ByteWindow> {
		final PackFile pack;

		final long position;

		final int size;

		long lastAccess;

		protected Ref(final PackFile pack, final long position,
				final ByteWindow v, final ReferenceQueue<ByteWindow> queue) {
			super(v, queue);
			this.pack = pack;
			this.position = position;
			this.size = v.size();
		}
	}

	private static final class Lock {
		// Used only for its implicit monitor.
	}
}

Back to the top