Skip to main content
summaryrefslogtreecommitdiffstats
blob: bc2039c56b12ff998b46badfde314880392f0f52 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
/*
 * Copyright (C) 2019 Google LLC and others
 *
 * This program and the accompanying materials are made available under the
 * terms of the Eclipse Distribution License v. 1.0 which is available at
 * https://www.eclipse.org/org/documents/edl-v10.php.
 *
 * SPDX-License-Identifier: BSD-3-Clause
 */

package org.eclipse.jgit.internal.storage.file;

import static java.nio.charset.StandardCharsets.UTF_8;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.Supplier;
import java.util.stream.Collectors;

import org.eclipse.jgit.annotations.Nullable;
import org.eclipse.jgit.errors.LockFailedException;
import org.eclipse.jgit.internal.storage.io.BlockSource;
import org.eclipse.jgit.internal.storage.reftable.MergedReftable;
import org.eclipse.jgit.internal.storage.reftable.ReftableCompactor;
import org.eclipse.jgit.internal.storage.reftable.ReftableConfig;
import org.eclipse.jgit.internal.storage.reftable.ReftableReader;
import org.eclipse.jgit.internal.storage.reftable.ReftableWriter;
import org.eclipse.jgit.lib.Config;
import org.eclipse.jgit.util.FileUtils;

/**
 * A mutable stack of reftables on local filesystem storage. Not thread-safe.
 * This is an AutoCloseable because this object owns the file handles to the
 * open reftables.
 */
public class FileReftableStack implements AutoCloseable {
	private static class StackEntry {

		String name;

		ReftableReader reftableReader;
	}

	private MergedReftable mergedReftable;

	private List<StackEntry> stack;

	private long lastNextUpdateIndex;

	private final File stackPath;

	private final File reftableDir;

	private final Runnable onChange;

	private final Supplier<Config> configSupplier;

	// Used for stats & testing.
	static class CompactionStats {

		long tables;

		long bytes;

		int attempted;

		int failed;

		long refCount;

		long logCount;

		CompactionStats() {
			tables = 0;
			bytes = 0;
			attempted = 0;
			failed = 0;
			logCount = 0;
			refCount = 0;
		}
	}

	private final CompactionStats stats;

	/**
	 * Creates a stack corresponding to the list of reftables in the argument
	 *
	 * @param stackPath
	 *            the filename for the stack.
	 * @param reftableDir
	 *            the dir holding the tables.
	 * @param onChange
	 *            hook to call if we notice a new write
	 * @param configSupplier
	 *            Config supplier
	 * @throws IOException
	 *             on I/O problems
	 */
	public FileReftableStack(File stackPath, File reftableDir,
			@Nullable Runnable onChange, Supplier<Config> configSupplier)
			throws IOException {
		this.stackPath = stackPath;
		this.reftableDir = reftableDir;
		this.stack = new ArrayList<>();
		this.configSupplier = configSupplier;
		this.onChange = onChange;

		// skip event notification
		lastNextUpdateIndex = 0;
		reload();

		stats = new CompactionStats();
	}

	CompactionStats getStats() {
		return stats;
	}

	/** Thrown if the update indices in the stack are not monotonic */
	public static class ReftableNumbersNotIncreasingException
			extends RuntimeException {
		private static final long serialVersionUID = 1L;

		String name;

		long lastMax;

		long min;

		ReftableNumbersNotIncreasingException(String name, long lastMax,
				long min) {
			this.name = name;
			this.lastMax = lastMax;
			this.min = min;
		}

		@SuppressWarnings({ "nls", "boxing" })
		@Override
		public String toString() {
			return String.format(
					"ReftableNumbersNotIncreasingException %s: min %d, lastMax %d",
					name, min, lastMax);
		}
	}

	/**
	 * Reloads the stack, potentially reusing opened reftableReaders.
	 *
	 * @param names
	 *            holds the names of the tables to load.
	 * @throws FileNotFoundException
	 *             load must be retried.
	 * @throws IOException
	 *             on other IO errors.
	 */
	private void reloadOnce(List<String> names)
			throws IOException, FileNotFoundException {
		Map<String, ReftableReader> current = stack.stream()
				.collect(Collectors.toMap(e -> e.name, e -> e.reftableReader));

		List<ReftableReader> newTables = new ArrayList<>();
		List<StackEntry> newStack = new ArrayList<>(stack.size() + 1);
		try {
			ReftableReader last = null;
			for (String name : names) {
				StackEntry entry = new StackEntry();
				entry.name = name;

				ReftableReader t = null;
				if (current.containsKey(name)) {
					t = current.remove(name);
				} else {
					File subtable = new File(reftableDir, name);
					FileInputStream is;

					is = new FileInputStream(subtable);

					t = new ReftableReader(BlockSource.from(is));
					newTables.add(t);
				}

				if (last != null) {
					// TODO: move this to MergedReftable
					if (last.maxUpdateIndex() >= t.minUpdateIndex()) {
						throw new ReftableNumbersNotIncreasingException(name,
								last.maxUpdateIndex(), t.minUpdateIndex());
					}
				}
				last = t;

				entry.reftableReader = t;
				newStack.add(entry);
			}
			// survived without exceptions: swap in new stack, and close
			// dangling tables.
			stack = newStack;
			newTables.clear();

			current.values().forEach(r -> {
				try {
					r.close();
				} catch (IOException e) {
					throw new AssertionError(e);
				}
			});
		} finally {
			newTables.forEach(t -> {
				try {
					t.close();
				} catch (IOException ioe) {
					// reader close should not generate errors.
					throw new AssertionError(ioe);
				}
			});
		}
	}

	void reload() throws IOException {
		// Try for 2.5 seconds.
		long deadline = System.currentTimeMillis() + 2500;
		// A successful reftable transaction is 2 atomic file writes
		// (open, write, close, rename), which a fast Linux system should be
		// able to do in about ~200us. So 1 ms should be ample time.
		long min = 1;
		long max = 1000;
		long delay = 0;
		boolean success = false;

		// Don't check deadline for the first 3 retries, so we can step with a
		// debugger without worrying about deadlines.
		int tries = 0;
		while (tries < 3 || System.currentTimeMillis() < deadline) {
			List<String> names = readTableNames();
			tries++;
			try {
				reloadOnce(names);
				success = true;
				break;
			} catch (FileNotFoundException e) {
				List<String> changed = readTableNames();
				if (changed.equals(names)) {
					throw e;
				}
			}

			delay = FileUtils.delay(delay, min, max);
			try {
				Thread.sleep(delay);
			} catch (InterruptedException e) {
				Thread.currentThread().interrupt();
				throw new RuntimeException(e);
			}
		}

		if (!success) {
			throw new LockFailedException(stackPath);
		}

		mergedReftable = new MergedReftable(stack.stream()
				.map(x -> x.reftableReader).collect(Collectors.toList()));
		long curr = nextUpdateIndex();
		if (lastNextUpdateIndex > 0 && lastNextUpdateIndex != curr
				&& onChange != null) {
			onChange.run();
		}
		lastNextUpdateIndex = curr;
	}

	/**
	 * @return the merged reftable
	 */
	public MergedReftable getMergedReftable() {
		return mergedReftable;
	}

	/**
	 * Writer is a callable that writes data to a reftable under construction.
	 * It should set the min/max update index, and then write refs and/or logs.
	 * It should not call finish() on the writer.
	 */
	public interface Writer {
		/**
		 * Write data to reftable
		 *
		 * @param w
		 *            writer to use
		 * @throws IOException
		 */
		void call(ReftableWriter w) throws IOException;
	}

	private List<String> readTableNames() throws IOException {
		List<String> names = new ArrayList<>(stack.size() + 1);

		try (BufferedReader br = new BufferedReader(
				new InputStreamReader(new FileInputStream(stackPath), UTF_8))) {
			String line;
			while ((line = br.readLine()) != null) {
				if (!line.isEmpty()) {
					names.add(line);
				}
			}
		} catch (FileNotFoundException e) {
			// file isn't there: empty repository.
		}
		return names;
	}

	/**
	 * @return true if the on-disk file corresponds to the in-memory data.
	 * @throws IOException
	 *             on IO problem
	 */
	boolean isUpToDate() throws IOException {
		// We could use FileSnapshot to avoid reading the file, but the file is
		// small so it's probably a minor optimization.
		try {
			List<String> names = readTableNames();
			if (names.size() != stack.size()) {
				return false;
			}
			for (int i = 0; i < names.size(); i++) {
				if (!names.get(i).equals(stack.get(i).name)) {
					return false;
				}
			}
		} catch (FileNotFoundException e) {
			return stack.isEmpty();
		}
		return true;
	}

	/**
	 * {@inheritDoc}
	 */
	@Override
	public void close() {
		for (StackEntry entry : stack) {
			try {
				entry.reftableReader.close();
			} catch (Exception e) {
				// we are reading; this should never fail.
				throw new AssertionError(e);
			}
		}
	}

	private long nextUpdateIndex() throws IOException {
		return stack.size() > 0
				? stack.get(stack.size() - 1).reftableReader.maxUpdateIndex()
						+ 1
				: 1;
	}

	private String filename(long low, long high) {
		return String.format("%012x-%012x", //$NON-NLS-1$
				Long.valueOf(low), Long.valueOf(high));
	}

	/**
	 * Tries to add a new reftable to the stack. Returns true if it succeeded,
	 * or false if there was a lock failure, due to races with other processes.
	 * This is package private so FileReftableDatabase can call into here.
	 *
	 * @param w
	 *            writer to write data to a reftable under construction
	 * @return true if the transaction was successful.
	 * @throws IOException
	 *             on I/O problems
	 */
	@SuppressWarnings("nls")
	public boolean addReftable(Writer w) throws IOException {
		LockFile lock = new LockFile(stackPath);
		try {
			if (!lock.lockForAppend()) {
				return false;
			}
			if (!isUpToDate()) {
				return false;
			}

			String fn = filename(nextUpdateIndex(), nextUpdateIndex());

			File tmpTable = File.createTempFile(fn + "_", ".ref",
					stackPath.getParentFile());

			ReftableWriter.Stats s;
			try (FileOutputStream fos = new FileOutputStream(tmpTable)) {
				ReftableWriter rw = new ReftableWriter(reftableConfig(), fos);
				w.call(rw);
				rw.finish();
				s = rw.getStats();
			}

			if (s.minUpdateIndex() < nextUpdateIndex()) {
				return false;
			}

			// The spec says to name log-only files with .log, which is somewhat
			// pointless given compaction, but we do so anyway.
			fn += s.refCount() > 0 ? ".ref" : ".log";
			File dest = new File(reftableDir, fn);

			FileUtils.rename(tmpTable, dest, StandardCopyOption.ATOMIC_MOVE);
			lock.write((fn + "\n").getBytes(UTF_8));
			if (!lock.commit()) {
				FileUtils.delete(dest);
				return false;
			}

			reload();

			autoCompact();
		} finally {
			lock.unlock();
		}
		return true;
	}

	private ReftableConfig reftableConfig() {
		return new ReftableConfig(configSupplier.get());
	}

	/**
	 * Write the reftable for the given range into a temp file.
	 *
	 * @param first
	 *            index of first stack entry to be written
	 * @param last
	 *            index of last stack entry to be written
	 * @return the file holding the replacement table.
	 * @throws IOException
	 *             on I/O problem
	 */
	private File compactLocked(int first, int last) throws IOException {
		String fn = filename(first, last);

		File tmpTable = File.createTempFile(fn + "_", ".ref", //$NON-NLS-1$//$NON-NLS-2$
				stackPath.getParentFile());
		try (FileOutputStream fos = new FileOutputStream(tmpTable)) {
			ReftableCompactor c = new ReftableCompactor(fos)
					.setConfig(reftableConfig())
					.setIncludeDeletes(first > 0);

			List<ReftableReader> compactMe = new ArrayList<>();
			long totalBytes = 0;
			for (int i = first; i <= last; i++) {
				compactMe.add(stack.get(i).reftableReader);
				totalBytes += stack.get(i).reftableReader.size();
			}
			c.addAll(compactMe);

			c.compact();

			// Even though the compaction did not definitely succeed, we keep
			// tally here as we've expended the effort.
			stats.bytes += totalBytes;
			stats.tables += first - last + 1;
			stats.attempted++;
			stats.refCount += c.getStats().refCount();
			stats.logCount += c.getStats().logCount();
		}

		return tmpTable;
	}

	/**
	 * Compacts a range of the stack, following the file locking protocol
	 * documented in the spec.
	 *
	 * @param first
	 *            index of first stack entry to be considered in compaction
	 * @param last
	 *            index of last stack entry to be considered in compaction
	 * @return true if a compaction was successfully applied.
	 * @throws IOException
	 *             on I/O problem
	 */
	boolean compactRange(int first, int last) throws IOException {
		if (first >= last) {
			return true;
		}
		LockFile lock = new LockFile(stackPath);

		File tmpTable = null;
		List<LockFile> subtableLocks = new ArrayList<>();

		try {
			if (!lock.lock()) {
				return false;
			}
			if (!isUpToDate()) {
				return false;
			}

			List<File> deleteOnSuccess = new ArrayList<>();
			for (int i = first; i <= last; i++) {
				File f = new File(reftableDir, stack.get(i).name);
				LockFile lf = new LockFile(f);
				if (!lf.lock()) {
					return false;
				}
				subtableLocks.add(lf);
				deleteOnSuccess.add(f);
			}

			lock.unlock();
			lock = null;

			tmpTable = compactLocked(first, last);

			lock = new LockFile(stackPath);
			if (!lock.lock()) {
				return false;
			}
			if (!isUpToDate()) {
				return false;
			}

			String fn = filename(
					stack.get(first).reftableReader.minUpdateIndex(),
					stack.get(last).reftableReader.maxUpdateIndex());

			// The spec suggests to use .log for log-only tables, and collect
			// all log entries in a single file at the bottom of the stack. That would
			// require supporting overlapping ranges for the different tables. For the
			// sake of simplicity, we simply ignore this and always produce a log +
			// ref combined table.
			fn += ".ref"; //$NON-NLS-1$
			File dest = new File(reftableDir, fn);

			FileUtils.rename(tmpTable, dest, StandardCopyOption.ATOMIC_MOVE);
			tmpTable = null;

			StringBuilder sb = new StringBuilder();

			for (int i = 0; i < first; i++) {
				sb.append(stack.get(i).name + "\n"); //$NON-NLS-1$
			}
			sb.append(fn + "\n"); //$NON-NLS-1$
			for (int i = last + 1; i < stack.size(); i++) {
				sb.append(stack.get(i).name + "\n"); //$NON-NLS-1$
			}

			lock.write(sb.toString().getBytes(UTF_8));
			if (!lock.commit()) {
				dest.delete();
				return false;
			}

			for (File f : deleteOnSuccess) {
				Files.delete(f.toPath());
			}

			reload();
			return true;
		} finally {
			if (tmpTable != null) {
				tmpTable.delete();
			}
			for (LockFile lf : subtableLocks) {
				lf.unlock();
			}
			if (lock != null) {
				lock.unlock();
			}
		}
	}

	/**
	 * Calculate an approximate log2.
	 *
	 * @param sz
	 * @return log2
	 */
	static int log(long sz) {
		long base = 2;
		if (sz <= 0) {
			throw new IllegalArgumentException("log2 negative"); //$NON-NLS-1$
		}
		int l = 0;
		while (sz > 0) {
			l++;
			sz /= base;
		}

		return l - 1;
	}

	/**
	 * A segment is a consecutive list of reftables of the same approximate
	 * size.
	 */
	static class Segment {
		// the approximate log_2 of the size.
		int log;

		// The total bytes in this segment
		long bytes;

		int start;

		int end; // exclusive.

		int size() {
			return end - start;
		}

		Segment(int start, int end, int log, long bytes) {
			this.log = log;
			this.start = start;
			this.end = end;
			this.bytes = bytes;
		}

		Segment() {
			this(0, 0, 0, 0);
		}

		@Override
		public int hashCode() {
			return 0; // appease error-prone
		}

		@Override
		public boolean equals(Object other) {
			Segment o = (Segment) other;
			return o.bytes == bytes && o.log == log && o.start == start
					&& o.end == end;
		}

		@SuppressWarnings("boxing")
		@Override
		public String toString() {
			return String.format("{ [%d,%d) l=%d sz=%d }", start, end, log, //$NON-NLS-1$
					bytes);
		}
	}

	static List<Segment> segmentSizes(long[] sizes) {
		List<Segment> segments = new ArrayList<>();
		Segment cur = new Segment();
		for (int i = 0; i < sizes.length; i++) {
			int l = log(sizes[i]);
			if (l != cur.log && cur.bytes > 0) {
				segments.add(cur);
				cur = new Segment();
				cur.start = i;
				cur.log = l;
			}

			cur.log = l;
			cur.end = i + 1;
			cur.bytes += sizes[i];
		}
		segments.add(cur);
		return segments;
	}

	private static Optional<Segment> autoCompactCandidate(long[] sizes) {
		if (sizes.length == 0) {
			return Optional.empty();
		}

		// The cost of compaction is proportional to the size, and we want to
		// avoid frequent large compactions. We do this by playing the game 2048
		// here: first compact together the smallest tables if there are more
		// than one. Then try to see if the result will be big enough to match
		// up with next up.

		List<Segment> segments = segmentSizes(sizes);
		segments = segments.stream().filter(s -> s.size() > 1)
				.collect(Collectors.toList());
		if (segments.isEmpty()) {
			return Optional.empty();
		}

		Optional<Segment> optMinSeg = segments.stream()
				.min(Comparator.comparing(s -> Integer.valueOf(s.log)));
		// Input is non-empty, so always present.
		Segment smallCollected = optMinSeg.get();
		while (smallCollected.start > 0) {
			int prev = smallCollected.start - 1;
			long prevSize = sizes[prev];
			if (log(smallCollected.bytes) < log(prevSize)) {
				break;
			}
			smallCollected.start = prev;
			smallCollected.bytes += prevSize;
		}

		return Optional.of(smallCollected);
	}

	/**
	 * Heuristically tries to compact the stack if the stack has a suitable
	 * shape.
	 *
	 * @throws IOException
	 */
	private void autoCompact() throws IOException {
		Optional<Segment> cand = autoCompactCandidate(tableSizes());
		if (cand.isPresent()) {
			if (!compactRange(cand.get().start, cand.get().end - 1)) {
				stats.failed++;
			}
		}
	}

	// 68b footer, 24b header = 92.
	private static long OVERHEAD = 91;

	private long[] tableSizes() throws IOException {
		long[] sizes = new long[stack.size()];
		for (int i = 0; i < stack.size(); i++) {
			// If we don't subtract the overhead, the file size isn't
			// proportional to the number of entries. This will cause us to
			// compact too often, which is expensive.
			sizes[i] = stack.get(i).reftableReader.size() - OVERHEAD;
		}
		return sizes;
	}

	void compactFully() throws IOException {
		if (!compactRange(0, stack.size() - 1)) {
			stats.failed++;
		}
	}
}

Back to the top