Skip to main content
aboutsummaryrefslogtreecommitdiffstats
blob: 69abe96b638aec756625a6001b01f9de227a57bf (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
/*******************************************************************************
 * Copyright (c) 2000, 2007 IBM Corporation and others.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 *
 * Contributors:
 *     IBM Corporation - initial API and implementation
 *******************************************************************************/
package org.eclipse.jdt.internal.core.index;

import java.io.*;

import org.eclipse.jdt.core.compiler.CharOperation;
import org.eclipse.jdt.core.search.*;
import org.eclipse.jdt.internal.core.util.*;
import org.eclipse.jdt.internal.compiler.util.HashtableOfIntValues;
import org.eclipse.jdt.internal.compiler.util.HashtableOfObject;
import org.eclipse.jdt.internal.compiler.util.SimpleLookupTable;
import org.eclipse.jdt.internal.compiler.util.SimpleSet;
import org.eclipse.jdt.internal.compiler.util.SimpleSetOfCharArray;

public class DiskIndex {

File indexFile;

private int headerInfoOffset;
private int numberOfChunks;
private int sizeOfLastChunk;
private int[] chunkOffsets;
private int documentReferenceSize; // 1, 2 or more bytes... depends on # of document names
private int startOfCategoryTables;
private HashtableOfIntValues categoryOffsets, categoryEnds;

private int cacheUserCount;
private String[][] cachedChunks; // decompressed chunks of document names
private HashtableOfObject categoryTables; // category name -> HashtableOfObject(words -> int[] of document #'s) or offset if not read yet
private char[] cachedCategoryName;

private static final int DEFAULT_BUFFER_SIZE = 2048;
private static int BUFFER_READ_SIZE = DEFAULT_BUFFER_SIZE;
private static final int BUFFER_WRITE_SIZE = DEFAULT_BUFFER_SIZE;
private byte[] streamBuffer;
private int bufferIndex, bufferEnd; // used when reading from the file into the streamBuffer
private int streamEnd; // used when writing data from the streamBuffer to the file
char separator = Index.DEFAULT_SEPARATOR;

public static final String SIGNATURE= "INDEX VERSION 1.125"; //$NON-NLS-1$
private static final char[] SIGNATURE_CHARS = SIGNATURE.toCharArray();
public static boolean DEBUG = false;

private static final int RE_INDEXED = -1;
private static final int DELETED = -2;

private static final int CHUNK_SIZE = 100;

private static final SimpleSetOfCharArray INTERNED_CATEGORY_NAMES = new SimpleSetOfCharArray(20);

static class IntList {

int size;
int[] elements;

IntList(int[] elements) {
	this.elements = elements;
	this.size = elements.length;
}
void add(int newElement) {
	if (this.size == this.elements.length) {
		int newSize = this.size * 3;
		if (newSize < 7) newSize = 7;
		System.arraycopy(this.elements, 0, this.elements = new int[newSize], 0, this.size);
	}
	this.elements[this.size++] = newElement;
}
int[] asArray() {
	int[] result = new int[this.size];
	System.arraycopy(this.elements, 0, result, 0, this.size);
	return result;
}	
}


DiskIndex(String fileName) {
	if (fileName == null)
		throw new java.lang.IllegalArgumentException();
	this.indexFile = new File(fileName);

	// clear cached items
	this.headerInfoOffset = -1;
	this.numberOfChunks = -1;
	this.sizeOfLastChunk = -1;
	this.chunkOffsets = null;
	this.documentReferenceSize = -1;
	this.cacheUserCount = -1;
	this.cachedChunks = null;
	this.categoryTables = null;
	this.cachedCategoryName = null;
	this.categoryOffsets = null;
	this.categoryEnds = null;
}
SimpleSet addDocumentNames(String substring, MemoryIndex memoryIndex) throws IOException {
	// must skip over documents which have been added/changed/deleted in the memory index
	String[] docNames = readAllDocumentNames();
	SimpleSet results = new SimpleSet(docNames.length);
	if (substring == null) {
		if (memoryIndex == null) {
			for (int i = 0, l = docNames.length; i < l; i++)
				results.add(docNames[i]);
		} else {
			SimpleLookupTable docsToRefs = memoryIndex.docsToReferences;
			for (int i = 0, l = docNames.length; i < l; i++) {
				String docName = docNames[i];
				if (!docsToRefs.containsKey(docName))
					results.add(docName);
			}
		}
	} else {
		if (memoryIndex == null) {
			for (int i = 0, l = docNames.length; i < l; i++)
				if (docNames[i].startsWith(substring, 0))
					results.add(docNames[i]);
		} else {
			SimpleLookupTable docsToRefs = memoryIndex.docsToReferences;
			for (int i = 0, l = docNames.length; i < l; i++) {
				String docName = docNames[i];
				if (docName.startsWith(substring, 0) && !docsToRefs.containsKey(docName))
					results.add(docName);
			}
		}
	}
	return results;
}
private HashtableOfObject addQueryResult(HashtableOfObject results, char[] word, HashtableOfObject wordsToDocNumbers, MemoryIndex memoryIndex) throws IOException {
	// must skip over documents which have been added/changed/deleted in the memory index
	if (results == null)
		results = new HashtableOfObject(13);
	EntryResult result = (EntryResult) results.get(word);
	if (memoryIndex == null) {
		if (result == null)
			results.put(word, new EntryResult(word, wordsToDocNumbers));
		else
			result.addDocumentTable(wordsToDocNumbers);
	} else {
		SimpleLookupTable docsToRefs = memoryIndex.docsToReferences;
		if (result == null) result = new EntryResult(word, null);
		int[] docNumbers = readDocumentNumbers(wordsToDocNumbers.get(word));
		for (int i = 0, l = docNumbers.length; i < l; i++) {
			String docName = readDocumentName(docNumbers[i]);
			if (!docsToRefs.containsKey(docName))
				result.addDocumentName(docName);
		}
		if (!result.isEmpty())
			results.put(word, result);
	}
	return results;
}
HashtableOfObject addQueryResults(char[][] categories, char[] key, int matchRule, MemoryIndex memoryIndex) throws IOException {
	// assumes sender has called startQuery() & will call stopQuery() when finished
	if (this.categoryOffsets == null) return null; // file is empty

	HashtableOfObject results = null; // initialized if needed
	if (key == null) {
		for (int i = 0, l = categories.length; i < l; i++) {
			HashtableOfObject wordsToDocNumbers = readCategoryTable(categories[i], true); // cache if key is null since its a definite match
			if (wordsToDocNumbers != null) {
				char[][] words = wordsToDocNumbers.keyTable;
				if (results == null)
					results = new HashtableOfObject(wordsToDocNumbers.elementSize);
				for (int j = 0, m = words.length; j < m; j++)
					if (words[j] != null)
						results = addQueryResult(results, words[j], wordsToDocNumbers, memoryIndex);
			}
		}
		if (results != null && this.cachedChunks == null)
			cacheDocumentNames();
	} else {
		switch (matchRule) {
			case SearchPattern.R_EXACT_MATCH | SearchPattern.R_CASE_SENSITIVE:
				for (int i = 0, l = categories.length; i < l; i++) {
					HashtableOfObject wordsToDocNumbers = readCategoryTable(categories[i], false);
					if (wordsToDocNumbers != null && wordsToDocNumbers.containsKey(key))
						results = addQueryResult(results, key, wordsToDocNumbers, memoryIndex);
				}
				break;
			case SearchPattern.R_PREFIX_MATCH | SearchPattern.R_CASE_SENSITIVE:
				for (int i = 0, l = categories.length; i < l; i++) {
					HashtableOfObject wordsToDocNumbers = readCategoryTable(categories[i], false);
					if (wordsToDocNumbers != null) {
						char[][] words = wordsToDocNumbers.keyTable;
						for (int j = 0, m = words.length; j < m; j++) {
							char[] word = words[j];
							if (word != null && key[0] == word[0] && CharOperation.prefixEquals(key, word))
								results = addQueryResult(results, word, wordsToDocNumbers, memoryIndex);
						}
					}
				}
				break;
			default:
				for (int i = 0, l = categories.length; i < l; i++) {
					HashtableOfObject wordsToDocNumbers = readCategoryTable(categories[i], false);
					if (wordsToDocNumbers != null) {
						char[][] words = wordsToDocNumbers.keyTable;
						for (int j = 0, m = words.length; j < m; j++) {
							char[] word = words[j];
							if (word != null && Index.isMatch(key, word, matchRule))
								results = addQueryResult(results, word, wordsToDocNumbers, memoryIndex);
						}
					}
				}
		}
	}

	if (results == null) return null;
	return results;
}
private void cacheDocumentNames() throws IOException {
	// will need all document names so get them now
	this.cachedChunks = new String[this.numberOfChunks][];
	FileInputStream stream = new FileInputStream(this.indexFile);
	try {
		if (this.numberOfChunks > 5) BUFFER_READ_SIZE <<= 1;
		int offset = this.chunkOffsets[0];
		stream.skip(offset);
		this.streamBuffer = new byte[BUFFER_READ_SIZE];
		this.bufferIndex = 0;
		this.bufferEnd = stream.read(this.streamBuffer, 0, this.streamBuffer.length);
		for (int i = 0; i < this.numberOfChunks; i++) {
			int size = i == this.numberOfChunks - 1 ? this.sizeOfLastChunk : CHUNK_SIZE;
			readChunk(this.cachedChunks[i] = new String[size], stream, 0, size);
		}
	} catch (IOException e) {
		this.cachedChunks = null;
		throw e;
	} finally {
		stream.close();
		this.streamBuffer = null;
		BUFFER_READ_SIZE = DEFAULT_BUFFER_SIZE;
	}
}
private String[] computeDocumentNames(String[] onDiskNames, int[] positions, SimpleLookupTable indexedDocuments, MemoryIndex memoryIndex) {
	int onDiskLength = onDiskNames.length;
	Object[] docNames = memoryIndex.docsToReferences.keyTable;
	Object[] referenceTables = memoryIndex.docsToReferences.valueTable;
	if (onDiskLength == 0) {
		// disk index was empty, so add every indexed document
		for (int i = 0, l = referenceTables.length; i < l; i++)
			if (referenceTables[i] != null)
				indexedDocuments.put(docNames[i], null); // remember each new document

		String[] newDocNames = new String[indexedDocuments.elementSize];
		int count = 0;
		Object[] added = indexedDocuments.keyTable;
		for (int i = 0, l = added.length; i < l; i++)
			if (added[i] != null)
				newDocNames[count++] = (String) added[i];
		Util.sort(newDocNames);
		for (int i = 0, l = newDocNames.length; i < l; i++)
			indexedDocuments.put(newDocNames[i], new Integer(i));
		return newDocNames;
	}

	// initialize positions as if each document will remain in the same position
	for (int i = 0; i < onDiskLength; i++)
		positions[i] = i;

	// find out if the memory index has any new or deleted documents, if not then the names & positions are the same
	int numDeletedDocNames = 0;
	int numReindexedDocNames = 0;
	nextPath : for (int i = 0, l = docNames.length; i < l; i++) {
		String docName = (String) docNames[i];
		if (docName != null) {
			for (int j = 0; j < onDiskLength; j++) {
				if (docName.equals(onDiskNames[j])) {
					if (referenceTables[i] == null) {
						positions[j] = DELETED;
						numDeletedDocNames++;
					} else {
						positions[j] = RE_INDEXED;
						numReindexedDocNames++;
					}
					continue nextPath;
				}
			}
			if (referenceTables[i] != null)
				indexedDocuments.put(docName, null); // remember each new document, skip deleted documents which were never saved
		}
	}

	String[] newDocNames = onDiskNames;
	if (numDeletedDocNames > 0 || indexedDocuments.elementSize > 0) {
		// some new documents have been added or some old ones deleted
		newDocNames = new String[onDiskLength + indexedDocuments.elementSize - numDeletedDocNames];
		int count = 0;
		for (int i = 0; i < onDiskLength; i++)
			if (positions[i] >= RE_INDEXED)
				newDocNames[count++] = onDiskNames[i]; // keep each unchanged document
		Object[] added = indexedDocuments.keyTable;
		for (int i = 0, l = added.length; i < l; i++)
			if (added[i] != null)
				newDocNames[count++] = (String) added[i]; // add each new document
		Util.sort(newDocNames);
		for (int i = 0, l = newDocNames.length; i < l; i++)
			if (indexedDocuments.containsKey(newDocNames[i]))
				indexedDocuments.put(newDocNames[i], new Integer(i)); // remember the position for each new document
	}

	// need to be able to look up an old position (ref# from a ref[]) and map it to its new position
	// if its old position == DELETED then its forgotton
	// if its old position == ReINDEXED then its also forgotten but its new position is needed to map references
	int count = -1;
	for (int i = 0; i < onDiskLength;) {
		switch(positions[i]) {
			case DELETED :
				i++; // skip over deleted... references are forgotten
				break;
			case RE_INDEXED :
				String newName = newDocNames[++count];
				if (newName.equals(onDiskNames[i])) {
					indexedDocuments.put(newName, new Integer(count)); // the reindexed docName that was at position i is now at position count
					i++;
				}
				break;
			default :
				if (newDocNames[++count].equals(onDiskNames[i]))
					positions[i++] = count; // the unchanged docName that was at position i is now at position count
		}
	}
	return newDocNames;
}
private void copyQueryResults(HashtableOfObject categoryToWords, int newPosition) {
	char[][] categoryNames = categoryToWords.keyTable;
	Object[] wordSets = categoryToWords.valueTable;
	for (int i = 0, l = categoryNames.length; i < l; i++) {
		char[] categoryName = categoryNames[i];
		if (categoryName != null) {
			SimpleWordSet wordSet = (SimpleWordSet) wordSets[i];
			HashtableOfObject wordsToDocs = (HashtableOfObject) this.categoryTables.get(categoryName);
			if (wordsToDocs == null)
				this.categoryTables.put(categoryName, wordsToDocs = new HashtableOfObject(wordSet.elementSize));

			char[][] words = wordSet.words;
			for (int j = 0, m = words.length; j < m; j++) {
				char[] word = words[j];
				if (word != null) {
					Object o = wordsToDocs.get(word);
					if (o == null) {
						wordsToDocs.put(word, new int[] {newPosition});
					} else if (o instanceof IntList) {
						((IntList) o).add(newPosition);
					} else {
						IntList list = new IntList((int[]) o);
						list.add(newPosition);
						wordsToDocs.put(word, list);
					}
				}
			}
		}
	}
}
void initialize(boolean reuseExistingFile) throws IOException {
	if (this.indexFile.exists()) {
		if (reuseExistingFile) {
			FileInputStream stream = new FileInputStream(this.indexFile);
			this.streamBuffer = new byte[BUFFER_READ_SIZE];
			this.bufferIndex = 0;
			this.bufferEnd = stream.read(this.streamBuffer, 0, 128);
			try {
				char[] signature = readStreamChars(stream);
				if (!CharOperation.equals(signature, SIGNATURE_CHARS)) {
					throw new IOException(Messages.exception_wrongFormat);
				}
				this.headerInfoOffset = readStreamInt(stream);
				if (this.headerInfoOffset > 0) { // file is empty if its not set
					stream.skip(this.headerInfoOffset - this.bufferEnd); // assume that the header info offset is over current buffer end
					this.bufferIndex = 0;
					this.bufferEnd = stream.read(this.streamBuffer, 0, this.streamBuffer.length);
					readHeaderInfo(stream);
				}
			} finally {
				stream.close();
			}
			return;
		}
		if (!this.indexFile.delete()) {
			if (DEBUG)
				System.out.println("initialize - Failed to delete index " + this.indexFile); //$NON-NLS-1$
			throw new IOException("Failed to delete index " + this.indexFile); //$NON-NLS-1$
		}
	}
	if (this.indexFile.createNewFile()) {
		FileOutputStream stream = new FileOutputStream(this.indexFile, false);
		try {
			this.streamBuffer = new byte[BUFFER_READ_SIZE];
			this.bufferIndex = 0;
			writeStreamChars(stream, SIGNATURE_CHARS);
			writeStreamInt(stream, -1); // file is empty
			// write the buffer to the stream
			if (this.bufferIndex > 0) {
				stream.write(this.streamBuffer, 0, this.bufferIndex);
				this.bufferIndex = 0;
			}
		} finally {
			stream.close();
		}
	} else {
		if (DEBUG)
			System.out.println("initialize - Failed to create new index " + this.indexFile); //$NON-NLS-1$
		throw new IOException("Failed to create new index " + this.indexFile); //$NON-NLS-1$
	}
}
private void initializeFrom(DiskIndex diskIndex, File newIndexFile) throws IOException {
	if (newIndexFile.exists() && !newIndexFile.delete()) { // delete the temporary index file
		if (DEBUG)
			System.out.println("initializeFrom - Failed to delete temp index " + this.indexFile); //$NON-NLS-1$
	} else if (!newIndexFile.createNewFile()) {
		if (DEBUG)
			System.out.println("initializeFrom - Failed to create temp index " + this.indexFile); //$NON-NLS-1$
		throw new IOException("Failed to create temp index " + this.indexFile); //$NON-NLS-1$
	}

	int size = diskIndex.categoryOffsets == null ? 8 : diskIndex.categoryOffsets.elementSize;
	this.categoryOffsets = new HashtableOfIntValues(size);
	this.categoryEnds = new HashtableOfIntValues(size);
	this.categoryTables = new HashtableOfObject(size);
	this.separator = diskIndex.separator;
}
private void mergeCategories(DiskIndex onDisk, int[] positions, FileOutputStream stream) throws IOException {
	// at this point, this.categoryTables contains the names -> wordsToDocs added in copyQueryResults()
	char[][] oldNames = onDisk.categoryOffsets.keyTable;
	for (int i = 0, l = oldNames.length; i < l; i++) {
		char[] oldName = oldNames[i];
		if (oldName != null && !this.categoryTables.containsKey(oldName))
			this.categoryTables.put(oldName, null);
	}

	char[][] categoryNames = this.categoryTables.keyTable;
	for (int i = 0, l = categoryNames.length; i < l; i++)
		if (categoryNames[i] != null)
			mergeCategory(categoryNames[i], onDisk, positions, stream);
	this.categoryTables = null;
}
private void mergeCategory(char[] categoryName, DiskIndex onDisk, int[] positions, FileOutputStream stream) throws IOException {
	HashtableOfObject wordsToDocs = (HashtableOfObject) this.categoryTables.get(categoryName);
	if (wordsToDocs == null)
		wordsToDocs = new HashtableOfObject(3);

	HashtableOfObject oldWordsToDocs = onDisk.readCategoryTable(categoryName, true);
	if (oldWordsToDocs != null) {
		char[][] oldWords = oldWordsToDocs.keyTable;
		Object[] oldArrayOffsets = oldWordsToDocs.valueTable;
		nextWord: for (int i = 0, l = oldWords.length; i < l; i++) {
			char[] oldWord = oldWords[i];
			if (oldWord != null) {
				int[] oldDocNumbers = (int[]) oldArrayOffsets[i];
				int length = oldDocNumbers.length;
				int[] mappedNumbers = new int[length];
				int count = 0;
				for (int j = 0; j < length; j++) {
					int pos = positions[oldDocNumbers[j]];
					if (pos > RE_INDEXED) // forget any reference to a document which was deleted or re_indexed
						mappedNumbers[count++] = pos;
				}
				if (count < length) {
					if (count == 0) continue nextWord; // skip words which no longer have any references
					System.arraycopy(mappedNumbers, 0, mappedNumbers = new int[count], 0, count);
				}

				Object o = wordsToDocs.get(oldWord);
				if (o == null) {
					wordsToDocs.put(oldWord, mappedNumbers);
				} else {
					IntList list = null;
					if (o instanceof IntList) {
						list = (IntList) o;
					} else {
						list = new IntList((int[]) o);
						wordsToDocs.put(oldWord, list);
					}
					for (int j = 0; j < count; j++)
						list.add(mappedNumbers[j]);
				}
			}
		}
		onDisk.categoryTables.put(categoryName, null); // flush cached table
	}
	writeCategoryTable(categoryName, wordsToDocs, stream);
}
DiskIndex mergeWith(MemoryIndex memoryIndex) throws IOException {
 	// assume write lock is held
	// compute & write out new docNames
	String[] docNames = readAllDocumentNames();
	int previousLength = docNames.length;
	int[] positions = new int[previousLength]; // keeps track of the position of each document in the new sorted docNames
	SimpleLookupTable indexedDocuments = new SimpleLookupTable(3); // for each new/changed document in the memoryIndex
	docNames = computeDocumentNames(docNames, positions, indexedDocuments, memoryIndex);
	if (docNames.length == 0) {
		if (previousLength == 0) return this; // nothing to do... memory index contained deleted documents that had never been saved

		// index is now empty since all the saved documents were removed
		DiskIndex newDiskIndex = new DiskIndex(this.indexFile.getPath());
		newDiskIndex.initialize(false);
		return newDiskIndex;
	}

	DiskIndex newDiskIndex = new DiskIndex(this.indexFile.getPath() + ".tmp"); //$NON-NLS-1$
	try {
		newDiskIndex.initializeFrom(this, newDiskIndex.indexFile);
		FileOutputStream stream = new FileOutputStream(newDiskIndex.indexFile, false);
		int offsetToHeader = -1;
		try {
			newDiskIndex.writeAllDocumentNames(docNames, stream);
			docNames = null; // free up the space

			// add each new/changed doc to empty category tables using its new position #
			if (indexedDocuments.elementSize > 0) {
				Object[] names = indexedDocuments.keyTable;
				Object[] integerPositions = indexedDocuments.valueTable;
				for (int i = 0, l = names.length; i < l; i++)
					if (names[i] != null)
						newDiskIndex.copyQueryResults(
							(HashtableOfObject) memoryIndex.docsToReferences.get(names[i]), ((Integer) integerPositions[i]).intValue());
			}
			indexedDocuments = null; // free up the space

			// merge each category table with the new ones & write them out
			if (previousLength == 0)
				newDiskIndex.writeCategories(stream);
			else
				newDiskIndex.mergeCategories(this, positions, stream);
			offsetToHeader = newDiskIndex.streamEnd;
			newDiskIndex.writeHeaderInfo(stream);
			positions = null; // free up the space
		} finally {
			stream.close();
			this.streamBuffer = null;
		}
		newDiskIndex.writeOffsetToHeader(offsetToHeader);

		// rename file by deleting previous index file & renaming temp one
		if (this.indexFile.exists() && !this.indexFile.delete()) {
			if (DEBUG)
				System.out.println("mergeWith - Failed to delete " + this.indexFile); //$NON-NLS-1$
			throw new IOException("Failed to delete index file " + this.indexFile); //$NON-NLS-1$
		}
		if (!newDiskIndex.indexFile.renameTo(this.indexFile)) {
			if (DEBUG)
				System.out.println("mergeWith - Failed to rename " + this.indexFile); //$NON-NLS-1$
			throw new IOException("Failed to rename index file " + this.indexFile); //$NON-NLS-1$
		}
	} catch (IOException e) {
		if (newDiskIndex.indexFile.exists() && !newDiskIndex.indexFile.delete())
			if (DEBUG)
				System.out.println("mergeWith - Failed to delete temp index " + newDiskIndex.indexFile); //$NON-NLS-1$
		throw e;
	}

	newDiskIndex.indexFile = this.indexFile;
	return newDiskIndex;
}
private synchronized String[] readAllDocumentNames() throws IOException {
	if (this.numberOfChunks <= 0)
		return CharOperation.NO_STRINGS;

	FileInputStream stream = new FileInputStream(this.indexFile);
	try {
		int offset = this.chunkOffsets[0];
		stream.skip(offset);
		this.streamBuffer = new byte[BUFFER_READ_SIZE];
		this.bufferIndex = 0;
		this.bufferEnd = stream.read(this.streamBuffer, 0, this.streamBuffer.length);
		int lastIndex = this.numberOfChunks - 1;
		String[] docNames = new String[lastIndex * CHUNK_SIZE + sizeOfLastChunk];
		for (int i = 0; i < this.numberOfChunks; i++)
			readChunk(docNames, stream, i * CHUNK_SIZE, i < lastIndex ? CHUNK_SIZE : sizeOfLastChunk);
		return docNames;
	} finally {
		stream.close();
		this.streamBuffer = null;
	}
}
private synchronized HashtableOfObject readCategoryTable(char[] categoryName, boolean readDocNumbers) throws IOException {
	// result will be null if categoryName is unknown
	int offset = this.categoryOffsets.get(categoryName);
	if (offset == HashtableOfIntValues.NO_VALUE) {
		return null;
	}

	if (this.categoryTables == null) {
		this.categoryTables = new HashtableOfObject(3);
	} else {
		HashtableOfObject cachedTable = (HashtableOfObject) this.categoryTables.get(categoryName);
		if (cachedTable != null) {
			if (readDocNumbers) { // must cache remaining document number arrays
				Object[] arrayOffsets = cachedTable.valueTable;
				for (int i = 0, l = arrayOffsets.length; i < l; i++)
					if (arrayOffsets[i] instanceof Integer)
						arrayOffsets[i] = readDocumentNumbers(arrayOffsets[i]);
			}
			return cachedTable;
		}
	}

	FileInputStream stream = new FileInputStream(this.indexFile);
	HashtableOfObject categoryTable = null;
	char[][] matchingWords = null;
	int count = 0;
	int firstOffset = -1;
	this.streamBuffer = new byte[BUFFER_READ_SIZE];
	try {
		stream.skip(offset);
		this.bufferIndex = 0;
		this.bufferEnd = stream.read(this.streamBuffer, 0, this.streamBuffer.length);
		int size = readStreamInt(stream);
		try {
			if (size < 0) { // DEBUG
				System.err.println("-------------------- DEBUG --------------------"); //$NON-NLS-1$
				System.err.println("file = "+this.indexFile); //$NON-NLS-1$
				System.err.println("offset = "+offset); //$NON-NLS-1$
				System.err.println("size = "+size); //$NON-NLS-1$
				System.err.println("--------------------   END   --------------------"); //$NON-NLS-1$
			}
			categoryTable = new HashtableOfObject(size);
		} catch (OutOfMemoryError oom) {
			// DEBUG
			oom.printStackTrace();
			System.err.println("-------------------- DEBUG --------------------"); //$NON-NLS-1$
			System.err.println("file = "+this.indexFile); //$NON-NLS-1$
			System.err.println("offset = "+offset); //$NON-NLS-1$
			System.err.println("size = "+size); //$NON-NLS-1$
			System.err.println("--------------------   END   --------------------"); //$NON-NLS-1$
			throw oom;
		}
		int largeArraySize = 256;
		for (int i = 0; i < size; i++) {
			char[] word = readStreamChars(stream);
			int arrayOffset = readStreamInt(stream);
			// if arrayOffset is:
			//		<= 0 then the array size == 1 with the value -> -arrayOffset
			//		> 1 & < 256 then the size of the array is > 1 & < 256, the document array follows immediately
			//		256 if the array size >= 256 followed by another int which is the offset to the array (written prior to the table)
			if (arrayOffset <= 0) {
				categoryTable.put(word, new int[] {-arrayOffset}); // store 1 element array by negating documentNumber
			} else if (arrayOffset < largeArraySize) {
				categoryTable.put(word, readStreamDocumentArray(stream, arrayOffset)); // read in-lined array providing size
			} else {
				arrayOffset = readStreamInt(stream); // read actual offset
				if (readDocNumbers) {
					if (matchingWords == null)
						matchingWords = new char[size][];
					if (count == 0)
						firstOffset = arrayOffset;
					matchingWords[count++] = word;
				}
				categoryTable.put(word, new Integer(arrayOffset)); // offset to array in the file
			}
		}
		this.categoryTables.put(INTERNED_CATEGORY_NAMES.get(categoryName), categoryTable);
		// cache the table as long as its not too big
		// in practice, some tables can be greater than 500K when they contain more than 10K elements
		this.cachedCategoryName = categoryTable.elementSize < 20000 ? categoryName : null;
	} catch (IOException ioe) {
		this.streamBuffer = null;
		throw ioe;
	} finally {
		stream.close();
	}

	if (matchingWords != null && count > 0) {
		stream = new FileInputStream(this.indexFile);
		try {
			stream.skip(firstOffset);
			this.bufferIndex = 0;
			this.bufferEnd = stream.read(this.streamBuffer, 0, this.streamBuffer.length);
			for (int i = 0; i < count; i++) { // each array follows the previous one
				categoryTable.put(matchingWords[i], readStreamDocumentArray(stream, readStreamInt(stream)));
			}
		} catch (IOException ioe) {
			this.streamBuffer = null;
			throw ioe;
		} finally {
			stream.close();
		}
	}
	this.streamBuffer = null;
	return categoryTable;
}
private void readChunk(String[] docNames, FileInputStream stream, int index, int size) throws IOException {
	String current = new String(readStreamChars(stream));
	docNames[index++] = current;
	for (int i = 1; i < size; i++) {
		if (stream != null && this.bufferIndex + 2 >= this.bufferEnd)
			readStreamBuffer(stream);
		int start = streamBuffer[this.bufferIndex++] & 0xFF;
		int end = streamBuffer[this.bufferIndex++] & 0xFF;
		String next  = new String(readStreamChars(stream));
		if (start > 0) {
			if (end > 0) {
				int length = current.length();
				next = current.substring(0, start) + next + current.substring(length - end, length);
			} else {
				next = current.substring(0, start) + next;
			}
		} else if (end > 0) {
			int length = current.length();
			next = next + current.substring(length - end, length);
		}
		docNames[index++] = next;
		current = next;
	}
}
synchronized String readDocumentName(int docNumber) throws IOException {
	if (this.cachedChunks == null)
		this.cachedChunks = new String[this.numberOfChunks][];

	int chunkNumber = docNumber / CHUNK_SIZE;
	String[] chunk = this.cachedChunks[chunkNumber];
	if (chunk == null) {
		boolean isLastChunk = chunkNumber == this.numberOfChunks - 1;
		int start = this.chunkOffsets[chunkNumber];
		int numberOfBytes = (isLastChunk ? this.startOfCategoryTables : this.chunkOffsets[chunkNumber + 1]) - start;
		if (numberOfBytes < 0)
			throw new IllegalArgumentException();
		this.streamBuffer = new byte[numberOfBytes];
		this.bufferIndex = 0;
		FileInputStream file = new FileInputStream(this.indexFile);
		try {
			file.skip(start);
			if (file.read(this.streamBuffer, 0, numberOfBytes) != numberOfBytes)
				throw new IOException();
		} catch (IOException ioe) {
			this.streamBuffer = null;
			throw ioe;
		} finally {
			file.close();
		}
		int numberOfNames = isLastChunk ? this.sizeOfLastChunk : CHUNK_SIZE;
		chunk = new String[numberOfNames];
		try {
			readChunk(chunk, null, 0, numberOfNames);
		} catch (IOException ioe) {
			this.streamBuffer = null;
			throw ioe;
		}
		this.cachedChunks[chunkNumber] = chunk;
	}
	this.streamBuffer = null;
	return chunk[docNumber - (chunkNumber * CHUNK_SIZE)];
}
synchronized int[] readDocumentNumbers(Object arrayOffset) throws IOException {
	// arrayOffset is either a cached array of docNumbers or an Integer offset in the file
	if (arrayOffset instanceof int[])
		return (int[]) arrayOffset;

	FileInputStream stream = new FileInputStream(this.indexFile);
	try {
		int offset = ((Integer) arrayOffset).intValue();
		stream.skip(offset);
		this.streamBuffer = new byte[BUFFER_READ_SIZE];
		this.bufferIndex = 0;
		this.bufferEnd = stream.read(this.streamBuffer, 0, this.streamBuffer.length);
		return readStreamDocumentArray(stream, readStreamInt(stream));
	} finally {
		stream.close();
		this.streamBuffer = null;
	}
}
private void readHeaderInfo(FileInputStream stream) throws IOException {

	// must be same order as writeHeaderInfo()
	this.numberOfChunks = readStreamInt(stream);
	this.sizeOfLastChunk = this.streamBuffer[this.bufferIndex++] & 0xFF;
	this.documentReferenceSize = this.streamBuffer[this.bufferIndex++] & 0xFF;
	this.separator = (char) (this.streamBuffer[this.bufferIndex++] & 0xFF);

	this.chunkOffsets = new int[this.numberOfChunks];
	for (int i = 0; i < this.numberOfChunks; i++)
		this.chunkOffsets[i] = readStreamInt(stream);

	this.startOfCategoryTables = readStreamInt(stream);

	int size = readStreamInt(stream);
	this.categoryOffsets = new HashtableOfIntValues(size);
	this.categoryEnds = new HashtableOfIntValues(size);
	char[] previousCategory = null;
	int offset = -1;
	for (int i = 0; i < size; i++) {
		char[] categoryName = INTERNED_CATEGORY_NAMES.get(readStreamChars(stream));
		offset = readStreamInt(stream);
		this.categoryOffsets.put(categoryName, offset); // cache offset to category table
		if (previousCategory != null) {
			this.categoryEnds.put(previousCategory, offset); // cache end of the category table
		}
		previousCategory = categoryName;
	}
	if (previousCategory != null) {
		this.categoryEnds.put(previousCategory, this.headerInfoOffset); // cache end of the category table
	}
	this.categoryTables = new HashtableOfObject(3);
}
synchronized void startQuery() {
	this.cacheUserCount++;
}
synchronized void stopQuery() {
	if (--this.cacheUserCount < 0) {
		// clear cached items
		this.cacheUserCount = -1;
		this.cachedChunks = null;
		if (this.categoryTables != null) {
			if (this.cachedCategoryName == null) {
				this.categoryTables = null;
			} else if (this.categoryTables.elementSize > 1) {
				HashtableOfObject newTables = new HashtableOfObject(3);
				newTables.put(this.cachedCategoryName, this.categoryTables.get(this.cachedCategoryName));
				this.categoryTables = newTables;
			}
		}
	}
}
private void readStreamBuffer(FileInputStream stream) throws IOException {
	// if we're about to read a known amount at the end of the existing buffer, but it does not completely fit
	// so we need to shift the remaining bytes to be read, and fill the buffer from the stream
	if (this.bufferEnd < this.streamBuffer.length)
		return; // we're at the end of the stream - nothing left to read

	int bytesInBuffer = this.bufferEnd - this.bufferIndex;
	if (bytesInBuffer > 0)
		System.arraycopy(this.streamBuffer, this.bufferIndex, this.streamBuffer, 0, bytesInBuffer);
	this.bufferEnd = bytesInBuffer + stream.read(this.streamBuffer, bytesInBuffer, this.bufferIndex);
	this.bufferIndex = 0;
}
/**
 * Reads in a string from the specified data input stream. The 
 * string has been encoded using a modified UTF-8 format. 
 * <p>
 * The first two bytes are read as an unsigned short.
 * This value gives the number of following bytes that are in the encoded string,
 * not the length of the resulting string. The following bytes are then 
 * interpreted as bytes encoding characters in the UTF-8 format 
 * and are converted into characters. 
 * <p>
 * This method blocks until all the bytes are read, the end of the 
 * stream is detected, or an exception is thrown. 
 *
 * @param      stream   a data input stream.
 * @return     UTF decoded string as a char array
 * @exception  EOFException if this end of data input is reached while reading it.
 * @exception  IOException if an I/O error occurs while reading data input.
 * @exception  UTFDataFormatException  if the bytes do not represent a
 *               valid UTF-8 encoding of a Unicode string.
 */
private char[] readStreamChars(FileInputStream stream) throws IOException {
	// read chars array length
	if (stream != null && this.bufferIndex + 2 >= this.bufferEnd)
		readStreamBuffer(stream);
	int length = (streamBuffer[this.bufferIndex++] & 0xFF) << 8;
	length += this.streamBuffer[this.bufferIndex++] & 0xFF;

	// fill the chars from bytes buffer
	char[] word = new char[length];
	int i = 0;
	while (i < length) {
		// how many characters can be decoded without refilling the buffer?
		int charsInBuffer = i + ((this.bufferEnd - this.bufferIndex) / 3);
		// all the characters must already be in the buffer if we're at the end of the stream
		if (charsInBuffer > length || this.bufferEnd != this.streamBuffer.length || stream == null)
			charsInBuffer = length;
		while (i < charsInBuffer) {
			byte b = this.streamBuffer[this.bufferIndex++];
			switch (b & 0xF0) {
				case 0x00 :
				case 0x10 :
				case 0x20 :
				case 0x30 :
				case 0x40 :
				case 0x50 :
				case 0x60 :
				case 0x70 :
					word[i++]= (char) b;
					break;
				case 0xC0 :
				case 0xD0 :
					char next = (char) this.streamBuffer[this.bufferIndex++];
					if ((next & 0xC0) != 0x80) {
						throw new UTFDataFormatException();
					}
					char ch = (char) ((b & 0x1F) << 6);
					ch |= next & 0x3F;
					word[i++] = ch;
					break;
				case 0xE0 :
					char first = (char) this.streamBuffer[this.bufferIndex++];
					char second = (char) this.streamBuffer[this.bufferIndex++];
					if ((first & second & 0xC0) != 0x80) {
						throw new UTFDataFormatException();
					}
					ch = (char) ((b & 0x0F) << 12);
					ch |= ((first& 0x3F) << 6);
					ch |= second & 0x3F;
					word[i++] = ch;
					break;
				default:
					throw new UTFDataFormatException();
			}
		}
		if (i < length && stream != null)
			readStreamBuffer(stream);
	}
	return word;
}
private int[] readStreamDocumentArray(FileInputStream stream, int arraySize) throws IOException {
	int[] indexes = new int[arraySize];
	if (arraySize == 0) return indexes;

	int i = 0;
	switch (this.documentReferenceSize) {
		case 1 :
			while (i < arraySize) {
				// how many bytes without refilling the buffer?
				int bytesInBuffer = i + this.bufferEnd - this.bufferIndex;
				if (bytesInBuffer > arraySize)
					bytesInBuffer = arraySize;
				while (i < bytesInBuffer) {
					indexes[i++] = this.streamBuffer[this.bufferIndex++] & 0xFF;
				}
				if (i < arraySize && stream != null)
					readStreamBuffer(stream);
			}
			break;
		case 2 :
			while (i < arraySize) {
				// how many shorts without refilling the buffer?
				int shortsInBuffer = i + ((this.bufferEnd - this.bufferIndex) / 2);
				if (shortsInBuffer > arraySize)
					shortsInBuffer = arraySize;
				while (i < shortsInBuffer) {
					int val = (this.streamBuffer[this.bufferIndex++] & 0xFF) << 8;
					indexes[i++] = val + (this.streamBuffer[this.bufferIndex++] & 0xFF);
				}
				if (i < arraySize && stream != null)
					readStreamBuffer(stream);
			}
			break;
		default :
			while (i < arraySize) {
				indexes[i++] = readStreamInt(stream);
			}
			break;
	}
	return indexes;
}
private int readStreamInt(FileInputStream stream) throws IOException {
	if (this.bufferIndex + 4 >= this.bufferEnd) {
		readStreamBuffer(stream);
	}
	int val = (streamBuffer[this.bufferIndex++] & 0xFF) << 24;
	val += (streamBuffer[this.bufferIndex++] & 0xFF) << 16;
	val += (streamBuffer[this.bufferIndex++] & 0xFF) << 8;
	return val + (streamBuffer[this.bufferIndex++] & 0xFF);
}
private void writeAllDocumentNames(String[] sortedDocNames, FileOutputStream stream) throws IOException {
	if (sortedDocNames.length == 0)
		throw new IllegalArgumentException();

	// assume the file was just created by initializeFrom()
	this.streamBuffer = new byte[BUFFER_WRITE_SIZE];
	this.bufferIndex = 0;
	this.streamEnd = 0;

	// in order, write: SIGNATURE & headerInfoOffset place holder, then each compressed chunk of document names
	writeStreamChars(stream, SIGNATURE_CHARS);
	this.headerInfoOffset = this.streamEnd;
	writeStreamInt(stream, -1); // will overwrite with correct value later

	int size = sortedDocNames.length;
	this.numberOfChunks = (size / CHUNK_SIZE) + 1;
	this.sizeOfLastChunk = size % CHUNK_SIZE;
	if (this.sizeOfLastChunk == 0) {
		this.numberOfChunks--;
		this.sizeOfLastChunk = CHUNK_SIZE;
	}
	this.documentReferenceSize = size <= 0x7F ? 1 : (size <= 0x7FFF ? 2 : 4); // number of bytes used to encode a reference

	this.chunkOffsets = new int[this.numberOfChunks];
	int lastIndex = this.numberOfChunks - 1;
	for (int i = 0; i < this.numberOfChunks; i++) {
		this.chunkOffsets[i] = this.streamEnd;

		int chunkSize = i == lastIndex ? this.sizeOfLastChunk : CHUNK_SIZE;
		int chunkIndex = i * CHUNK_SIZE;
		String current = sortedDocNames[chunkIndex];
		writeStreamChars(stream, current.toCharArray());
		for (int j = 1; j < chunkSize; j++) {
			String next = sortedDocNames[chunkIndex + j];
			int len1 = current.length();
			int len2 = next.length();
			int max = len1 < len2 ? len1 : len2;
			int start = 0; // number of identical characters at the beginning (also the index of first character that is different)
			while (current.charAt(start) == next.charAt(start)) {
				start++;
				if (max == start) break; // current is 'abba', next is 'abbab'
			}
			if (start > 255) start = 255;

			int end = 0; // number of identical characters at the end
			while (current.charAt(--len1) == next.charAt(--len2)) {
				end++;
				if (len2 == start) break; // current is 'abbba', next is 'abba'
				if (len1 == 0) break; // current is 'xabc', next is 'xyabc'
			}
			if (end > 255) end = 255;
			if ((this.bufferIndex + 2) >= BUFFER_WRITE_SIZE)  {
				stream.write(this.streamBuffer, 0, this.bufferIndex);
				this.bufferIndex = 0;
			}
			this.streamBuffer[this.bufferIndex++] = (byte) start;
			this.streamBuffer[this.bufferIndex++] = (byte) end;
			this.streamEnd += 2;

			int last = next.length() - end;
			writeStreamChars(stream, (start < last ? CharOperation.subarray(next.toCharArray(), start, last) : CharOperation.NO_CHAR));
			current = next;
		}
	}
	this.startOfCategoryTables = this.streamEnd + 1;
}
private void writeCategories(FileOutputStream stream) throws IOException {
	char[][] categoryNames = this.categoryTables.keyTable;
	Object[] tables = this.categoryTables.valueTable;
	for (int i = 0, l = categoryNames.length; i < l; i++)
		if (categoryNames[i] != null)
			writeCategoryTable(categoryNames[i], (HashtableOfObject) tables[i], stream);
	this.categoryTables = null;
}
private void writeCategoryTable(char[] categoryName, HashtableOfObject wordsToDocs, FileOutputStream stream) throws IOException {
	// the format of a category table is as follows:
	// any document number arrays with >= 256 elements are written before the table (the offset to each array is remembered)
	// then the number of word->int[] pairs in the table is written
	// for each word -> int[] pair, the word is written followed by:
	//		an int <= 0 if the array size == 1
	//		an int > 1 & < 256 for the size of the array if its > 1 & < 256, the document array follows immediately
	//		256 if the array size >= 256 followed by another int which is the offset to the array (written prior to the table)

	int largeArraySize = 256;
	Object[] values = wordsToDocs.valueTable;
	for (int i = 0, l = values.length; i < l; i++) {
		Object o = values[i];
		if (o != null) {
			if (o instanceof IntList)
				o = values[i] = ((IntList) values[i]).asArray();
			int[] documentNumbers = (int[]) o;
			if (documentNumbers.length >= largeArraySize) {
				values[i] = new Integer(this.streamEnd);
				writeDocumentNumbers(documentNumbers, stream);
			}
		}
	}

	this.categoryOffsets.put(categoryName, this.streamEnd); // remember the offset to the start of the table
	this.categoryTables.put(categoryName, null); // flush cached table
	writeStreamInt(stream, wordsToDocs.elementSize);
	char[][] words = wordsToDocs.keyTable;
	for (int i = 0, l = words.length; i < l; i++) {
		Object o = values[i];
		if (o != null) {
			writeStreamChars(stream, words[i]);
			if (o instanceof int[]) {
				int[] documentNumbers = (int[]) o;
				if (documentNumbers.length == 1)
					writeStreamInt(stream, -documentNumbers[0]); // store an array of 1 element by negating the documentNumber (can be zero)
				else
					writeDocumentNumbers(documentNumbers, stream);
			} else {
				writeStreamInt(stream, largeArraySize); // mark to identify that an offset follows
				writeStreamInt(stream, ((Integer) o).intValue()); // offset in the file of the array of document numbers
			}
		}
	}
}
private void writeDocumentNumbers(int[] documentNumbers, FileOutputStream stream) throws IOException {
	// must store length as a positive int to detect in-lined array of 1 element
	int length = documentNumbers.length;
	writeStreamInt(stream, length);
	Util.sort(documentNumbers);
	int start = 0;
	switch (this.documentReferenceSize) {
		case 1 :
			while ((this.bufferIndex + length - start) >= BUFFER_WRITE_SIZE) {
				// when documentNumbers is large, write BUFFER_WRITE_SIZE parts & fall thru to write the last part
				int bytesLeft = BUFFER_WRITE_SIZE - this.bufferIndex;
				for (int i=0; i < bytesLeft; i++) {
					this.streamBuffer[this.bufferIndex++] = (byte) documentNumbers[start++];
				}
				stream.write(this.streamBuffer, 0, this.bufferIndex);
				this.bufferIndex = 0;
			}
			while (start < length) {
				this.streamBuffer[this.bufferIndex++] = (byte) documentNumbers[start++];
			}
			this.streamEnd += length;
			break;
		case 2 :
			while ((this.bufferIndex + ((length - start) * 2)) >= BUFFER_WRITE_SIZE) {
				// when documentNumbers is large, write BUFFER_WRITE_SIZE parts & fall thru to write the last part
				int shortsLeft = (BUFFER_WRITE_SIZE - this.bufferIndex) / 2;
				for (int i=0; i < shortsLeft; i++) {
					this.streamBuffer[this.bufferIndex++] = (byte) (documentNumbers[start] >> 8);
					this.streamBuffer[this.bufferIndex++] = (byte) documentNumbers[start++];
				}
				stream.write(this.streamBuffer, 0, this.bufferIndex);
				this.bufferIndex = 0;
			}
			while (start < length) {
				this.streamBuffer[this.bufferIndex++] = (byte) (documentNumbers[start] >> 8);
				this.streamBuffer[this.bufferIndex++] = (byte) documentNumbers[start++];
			}
			this.streamEnd += length * 2;
			break;
		default :
			while (start < length) {
				writeStreamInt(stream, documentNumbers[start++]);
			}
			break;
	}
}
private void writeHeaderInfo(FileOutputStream stream) throws IOException {
	writeStreamInt(stream, this.numberOfChunks);
	if ((this.bufferIndex + 3) >= BUFFER_WRITE_SIZE)  {
		stream.write(this.streamBuffer, 0, this.bufferIndex);
		this.bufferIndex = 0;
	}
	this.streamBuffer[this.bufferIndex++] = (byte) this.sizeOfLastChunk;
	this.streamBuffer[this.bufferIndex++] = (byte) this.documentReferenceSize;
	this.streamBuffer[this.bufferIndex++] = (byte) this.separator;
	this.streamEnd += 3;

	// apend the file with chunk offsets
	for (int i = 0; i < this.numberOfChunks; i++) {
		writeStreamInt(stream, this.chunkOffsets[i]);
	}

	writeStreamInt(stream, this.startOfCategoryTables);

	// append the file with the category offsets... # of name -> offset pairs, followed by each name & an offset to its word->doc# table
	writeStreamInt(stream, this.categoryOffsets.elementSize);
	char[][] categoryNames = this.categoryOffsets.keyTable;
	int[] offsets = this.categoryOffsets.valueTable;
	for (int i = 0, l = categoryNames.length; i < l; i++) {
		if (categoryNames[i] != null) {
			writeStreamChars(stream, categoryNames[i]);
			writeStreamInt(stream, offsets[i]);
		}
	}
	// ensure buffer is written to the stream
	if (this.bufferIndex > 0) {
		stream.write(this.streamBuffer, 0, this.bufferIndex);
		this.bufferIndex = 0;
	}
}
private void writeOffsetToHeader(int offsetToHeader) throws IOException {
	if (offsetToHeader > 0) {
		RandomAccessFile file = new RandomAccessFile(this.indexFile, "rw"); //$NON-NLS-1$
		try {
			file.seek(this.headerInfoOffset); // offset to position in header
			file.writeInt(offsetToHeader);
			this.headerInfoOffset = offsetToHeader; // update to reflect the correct offset
		} finally {
			file.close();
		}
	}
}
/**
 * Writes a string to the given output stream using UTF-8 
 * encoding in a machine-independent manner. 
 * <p>
 * First, two bytes of the array are giving the number of bytes to 
 * follow. This value is the number of bytes actually written out, 
 * not the length of the string. Following the length, each character 
 * of the string is put in the bytes array, in sequence, using the UTF-8
 * encoding for the character.
 * </p>
 * <p>
 * Then the entire byte array is written to the output stream
 * using {@link OutputStream#write(byte[], int, int)} method.
 * </p>
 *
 * @param array char array to be written.
 * @exception  IOException  if an I/O error occurs while writting
 * 	the bytes array to the stream.
 */
private void writeStreamChars(FileOutputStream stream, char[] array) throws IOException {
	if ((this.bufferIndex + 2) >= BUFFER_WRITE_SIZE)  {
		stream.write(this.streamBuffer, 0, this.bufferIndex);
		this.bufferIndex = 0;
	}
	int length = array.length;
	this.streamBuffer[this.bufferIndex++] = (byte) ((length >>> 8) & 0xFF); // store chars array length instead of bytes
	this.streamBuffer[this.bufferIndex++] = (byte) (length & 0xFF); // this will allow to read it faster
	this.streamEnd += 2;

	// we're assuming that very few char[] are so large that we need to flush the buffer more than once, if at all
	int totalBytesNeeded = length * 3;
	if (totalBytesNeeded <= BUFFER_WRITE_SIZE) {
		if (this.bufferIndex + totalBytesNeeded > BUFFER_WRITE_SIZE) {
			// flush the buffer now to make sure there is room for the array
			stream.write(this.streamBuffer, 0, this.bufferIndex);
			this.bufferIndex = 0;
		}
		writeStreamChars(stream, array, 0, length);
	} else {
		int charsPerWrite = BUFFER_WRITE_SIZE / 3;
		int start = 0;
		while (start < length) {
			stream.write(this.streamBuffer, 0, this.bufferIndex);
			this.bufferIndex = 0;
			int charsLeftToWrite = length - start;
			int end = start + (charsPerWrite < charsLeftToWrite ? charsPerWrite : charsLeftToWrite);
			writeStreamChars(stream, array, start, end);
			start = end;
		}
	}
}
private void writeStreamChars(FileOutputStream stream, char[] array, int start, int end) throws IOException {
	// start can NOT be == end
	// must have checked that there is enough room for end - start * 3 bytes in the buffer

	int oldIndex = this.bufferIndex;
	while (start < end) {
		int ch = array[start++];
		if ((ch & 0x007F) == ch) {
			this.streamBuffer[this.bufferIndex++] = (byte) ch;
		} else if ((ch & 0x07FF) == ch) {
			// first two bits are stored in first byte
			byte b = (byte) (ch >> 6);
			b &= 0x1F;
			b |= 0xC0;
			this.streamBuffer[this.bufferIndex++] = b;
			// last six bits are stored in second byte
			b = (byte) (ch & 0x3F);
			b |= 0x80;
			this.streamBuffer[this.bufferIndex++] = b;
		} else {
			// first four bits are stored in first byte
			byte b = (byte) (ch >> 12);
			b &= 0x0F;
			b |= 0xE0;
			this.streamBuffer[this.bufferIndex++] = b;
			// six following bits are stored in second byte
			b = (byte) (ch >> 6);
			b &= 0x3F;
			b |= 0x80;
			this.streamBuffer[this.bufferIndex++] = b;
			// last six bits are stored in third byte
			b = (byte) (ch & 0x3F);
			b |= 0x80;
			this.streamBuffer[this.bufferIndex++] = b;
		}
	}
	this.streamEnd += this.bufferIndex - oldIndex;
}
private void writeStreamInt(FileOutputStream stream, int val) throws IOException {
	if ((this.bufferIndex + 4) >= BUFFER_WRITE_SIZE)  {
		stream.write(this.streamBuffer, 0, this.bufferIndex);
		this.bufferIndex = 0;
	}
	this.streamBuffer[this.bufferIndex++] = (byte) (val >> 24);
	this.streamBuffer[this.bufferIndex++] = (byte) (val >> 16);
	this.streamBuffer[this.bufferIndex++] = (byte) (val >> 8);
	this.streamBuffer[this.bufferIndex++] = (byte) val;
	this.streamEnd += 4;
}
}

Back to the top