Young-Seok Kim has submitted this change and it was merged. Change subject: ASTERIXDB-1067: removed Bloomfilters from internal datasets' secondary LSM Btree indexes ......................................................................
ASTERIXDB-1067: removed Bloomfilters from internal datasets' secondary LSM Btree indexes Change-Id: I45ee757641e167dfd7ff73436111ba12b335dc93 Reviewed-on: https://asterix-gerrit.ics.uci.edu/843 Tested-by: Jenkins <[email protected]> Reviewed-by: Ian Maxon <[email protected]> --- M hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/ExternalBTree.java M hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTree.java M hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTreeDiskComponent.java M hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTreeDiskComponentFactory.java M hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTreeFileManager.java M hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTreeFlushOperation.java M hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTreeMergeOperation.java M hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTreeOpContext.java M hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/util/LSMBTreeUtils.java 9 files changed, 183 insertions(+), 98 deletions(-) Approvals: Ian Maxon: Looks good to me, approved Jenkins: Verified diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/ExternalBTree.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/ExternalBTree.java index c9b8896..1175130 100644 --- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/ExternalBTree.java +++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/ExternalBTree.java @@ -88,6 +88,7 @@ private final ITreeIndexFrameFactory interiorFrameFactory; + //TODO remove BloomFilter from external dataset's secondary LSMBTree index public ExternalBTree(ITreeIndexFrameFactory interiorFrameFactory, ITreeIndexFrameFactory insertLeafFrameFactory, ITreeIndexFrameFactory deleteLeafFrameFactory, ILSMIndexFileManager fileManager, TreeIndexFactory<BTree> diskBTreeFactory, TreeIndexFactory<BTree> bulkLoadBTreeFactory, diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTree.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTree.java index 13c6949..969828e 100644 --- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTree.java +++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTree.java @@ -31,7 +31,6 @@ import org.apache.hyracks.data.std.primitive.IntegerPointable; import org.apache.hyracks.dataflow.common.data.accessors.ITupleReference; import org.apache.hyracks.storage.am.bloomfilter.impls.BloomCalculations; -import org.apache.hyracks.storage.am.bloomfilter.impls.BloomFilter; import org.apache.hyracks.storage.am.bloomfilter.impls.BloomFilterFactory; import org.apache.hyracks.storage.am.bloomfilter.impls.BloomFilterSpecification; import org.apache.hyracks.storage.am.btree.impls.BTree; @@ -98,6 +97,8 @@ private final boolean needKeyDupCheck; private final int[] btreeFields; + // Primary LSMBTree has a Bloomfilter, but Secondary one doesn't have. + private final boolean hasBloomFilter; public LSMBTree(List<IVirtualBufferCache> virtualBufferCaches, ITreeIndexFrameFactory interiorFrameFactory, ITreeIndexFrameFactory insertLeafFrameFactory, ITreeIndexFrameFactory deleteLeafFrameFactory, @@ -132,6 +133,7 @@ filterFactory); this.needKeyDupCheck = needKeyDupCheck; this.btreeFields = btreeFields; + this.hasBloomFilter = needKeyDupCheck; } // Without memory components @@ -151,6 +153,8 @@ bulkLoadComponentFactory = new LSMBTreeDiskComponentFactory(bulkLoadBTreeFactory, bloomFilterFactory, null); this.needKeyDupCheck = needKeyDupCheck; this.btreeFields = null; + //TODO remove BloomFilter from external dataset's secondary LSMBTree index + this.hasBloomFilter = true; } @Override @@ -211,10 +215,10 @@ List<ILSMComponent> immutableComponents = diskComponents; for (ILSMComponent c : immutableComponents) { LSMBTreeDiskComponent component = (LSMBTreeDiskComponent) c; - BTree btree = component.getBTree(); - BloomFilter bloomFilter = component.getBloomFilter(); - btree.deactivateCloseHandle(); - bloomFilter.deactivate(); + component.getBTree().deactivateCloseHandle(); + if (hasBloomFilter) { + component.getBloomFilter().deactivate(); + } } deallocateMemoryComponents(); isActivated = false; @@ -235,7 +239,9 @@ for (ILSMComponent c : immutableComponents) { LSMBTreeDiskComponent component = (LSMBTreeDiskComponent) c; component.getBTree().destroy(); - component.getBloomFilter().destroy(); + if (hasBloomFilter) { + component.getBloomFilter().destroy(); + } } for (ILSMComponent c : memoryComponents) { LSMBTreeMemoryComponent mutableComponent = (LSMBTreeMemoryComponent) c; @@ -254,9 +260,13 @@ List<ILSMComponent> immutableComponents = diskComponents; for (ILSMComponent c : immutableComponents) { LSMBTreeDiskComponent component = (LSMBTreeDiskComponent) c; - component.getBloomFilter().deactivate(); + if (hasBloomFilter) { + component.getBloomFilter().deactivate(); + } component.getBTree().deactivate(); - component.getBloomFilter().destroy(); + if (hasBloomFilter) { + component.getBloomFilter().destroy(); + } component.getBTree().destroy(); } immutableComponents.clear(); @@ -425,40 +435,51 @@ NoOpOperationCallback.INSTANCE); RangePredicate nullPred = new RangePredicate(null, null, true, true, null, null); - IIndexCursor countingCursor = ((BTreeAccessor) accessor).createCountingSearchCursor(); - accessor.search(countingCursor, nullPred); long numElements = 0L; - try { - while (countingCursor.hasNext()) { - countingCursor.next(); - ITupleReference countTuple = countingCursor.getTuple(); - numElements = IntegerPointable.getInteger(countTuple.getFieldData(0), countTuple.getFieldStart(0)); - } - } finally { - countingCursor.close(); - } + BloomFilterSpecification bloomFilterSpec = null; + if (hasBloomFilter) { + //count elements in btree for creating Bloomfilter + IIndexCursor countingCursor = ((BTreeAccessor) accessor).createCountingSearchCursor(); + accessor.search(countingCursor, nullPred); - int maxBucketsPerElement = BloomCalculations.maxBucketsPerElement(numElements); - BloomFilterSpecification bloomFilterSpec = BloomCalculations.computeBloomSpec(maxBucketsPerElement, - bloomFilterFalsePositiveRate); + try { + while (countingCursor.hasNext()) { + countingCursor.next(); + ITupleReference countTuple = countingCursor.getTuple(); + numElements = IntegerPointable.getInteger(countTuple.getFieldData(0), countTuple.getFieldStart(0)); + } + } finally { + countingCursor.close(); + } + + int maxBucketsPerElement = BloomCalculations.maxBucketsPerElement(numElements); + bloomFilterSpec = BloomCalculations.computeBloomSpec(maxBucketsPerElement, bloomFilterFalsePositiveRate); + } LSMBTreeDiskComponent component = createDiskComponent(componentFactory, flushOp.getBTreeFlushTarget(), flushOp.getBloomFilterFlushTarget(), true); IIndexBulkLoader bulkLoader = component.getBTree().createBulkLoader(1.0f, false, numElements, false, true); - IIndexBulkLoader builder = component.getBloomFilter().createBuilder(numElements, bloomFilterSpec.getNumHashes(), - bloomFilterSpec.getNumBucketsPerElements()); + IIndexBulkLoader builder = null; + if (hasBloomFilter) { + builder = component.getBloomFilter().createBuilder(numElements, bloomFilterSpec.getNumHashes(), + bloomFilterSpec.getNumBucketsPerElements()); + } IIndexCursor scanCursor = accessor.createSearchCursor(false); accessor.search(scanCursor, nullPred); try { while (scanCursor.hasNext()) { scanCursor.next(); - builder.add(scanCursor.getTuple()); + if (hasBloomFilter) { + builder.add(scanCursor.getTuple()); + } bulkLoader.add(scanCursor.getTuple()); } } finally { scanCursor.close(); - builder.end(); + if (hasBloomFilter) { + builder.end(); + } } if (component.getLSMComponentFilter() != null) { @@ -509,30 +530,39 @@ List<ILSMComponent> mergedComponents = mergeOp.getMergingComponents(); long numElements = 0L; - for (int i = 0; i < mergedComponents.size(); ++i) { - numElements += ((LSMBTreeDiskComponent) mergedComponents.get(i)).getBloomFilter().getNumElements(); + BloomFilterSpecification bloomFilterSpec = null; + if (hasBloomFilter) { + //count elements in btree for creating Bloomfilter + for (int i = 0; i < mergedComponents.size(); ++i) { + numElements += ((LSMBTreeDiskComponent) mergedComponents.get(i)).getBloomFilter().getNumElements(); + } + int maxBucketsPerElement = BloomCalculations.maxBucketsPerElement(numElements); + bloomFilterSpec = BloomCalculations.computeBloomSpec(maxBucketsPerElement, bloomFilterFalsePositiveRate); } - - int maxBucketsPerElement = BloomCalculations.maxBucketsPerElement(numElements); - BloomFilterSpecification bloomFilterSpec = BloomCalculations.computeBloomSpec(maxBucketsPerElement, - bloomFilterFalsePositiveRate); LSMBTreeDiskComponent mergedComponent = createDiskComponent(componentFactory, mergeOp.getBTreeMergeTarget(), mergeOp.getBloomFilterMergeTarget(), true); IIndexBulkLoader bulkLoader = mergedComponent.getBTree().createBulkLoader(1.0f, false, numElements, false, true); - IIndexBulkLoader builder = mergedComponent.getBloomFilter().createBuilder(numElements, - bloomFilterSpec.getNumHashes(), bloomFilterSpec.getNumBucketsPerElements()); + IIndexBulkLoader builder = null; + if (hasBloomFilter) { + builder = mergedComponent.getBloomFilter().createBuilder(numElements, bloomFilterSpec.getNumHashes(), + bloomFilterSpec.getNumBucketsPerElements()); + } try { while (cursor.hasNext()) { cursor.next(); ITupleReference frameTuple = cursor.getTuple(); - builder.add(frameTuple); + if (hasBloomFilter) { + builder.add(frameTuple); + } bulkLoader.add(frameTuple); } } finally { cursor.close(); - builder.end(); + if (hasBloomFilter) { + builder.end(); + } } if (mergedComponent.getLSMComponentFilter() != null) { List<ITupleReference> filterTuples = new ArrayList<ITupleReference>(); @@ -551,7 +581,7 @@ protected LSMBTreeDiskComponent createDiskComponent(LSMBTreeDiskComponentFactory factory, FileReference btreeFileRef, FileReference bloomFilterFileRef, boolean createComponent) - throws HyracksDataException, IndexException { + throws HyracksDataException, IndexException { // Create new BTree instance. LSMBTreeDiskComponent component = (LSMBTreeDiskComponent) factory .createLSMComponentInstance(new LSMComponentFileReferences(btreeFileRef, null, bloomFilterFileRef)); @@ -559,7 +589,9 @@ if (!createComponent) { component.getBTree().activate(); } - component.getBloomFilter().activate(); + if (hasBloomFilter) { + component.getBloomFilter().activate(); + } if (component.getLSMComponentFilter() != null && !createComponent) { filterManager.readFilterInfo(component.getLSMComponentFilter(), component.getBTree()); } @@ -587,7 +619,9 @@ // The order of forcing the dirty page to be flushed is critical. The // bloom filter must be always done first. LSMBTreeDiskComponent component = (LSMBTreeDiskComponent) lsmComponent; - markAsValidInternal(component.getBTree().getBufferCache(), component.getBloomFilter()); + if (hasBloomFilter) { + markAsValidInternal(component.getBTree().getBufferCache(), component.getBloomFilter()); + } markAsValidInternal(component.getBTree()); } @@ -615,11 +649,15 @@ bulkLoader = (BTreeBulkLoader) ((LSMBTreeDiskComponent) component).getBTree().createBulkLoader(fillFactor, verifyInput, numElementsHint, false, true); - int maxBucketsPerElement = BloomCalculations.maxBucketsPerElement(numElementsHint); - BloomFilterSpecification bloomFilterSpec = BloomCalculations.computeBloomSpec(maxBucketsPerElement, - bloomFilterFalsePositiveRate); - builder = ((LSMBTreeDiskComponent) component).getBloomFilter().createBuilder(numElementsHint, - bloomFilterSpec.getNumHashes(), bloomFilterSpec.getNumBucketsPerElements()); + if (hasBloomFilter) { + int maxBucketsPerElement = BloomCalculations.maxBucketsPerElement(numElementsHint); + BloomFilterSpecification bloomFilterSpec = BloomCalculations.computeBloomSpec(maxBucketsPerElement, + bloomFilterFalsePositiveRate); + builder = ((LSMBTreeDiskComponent) component).getBloomFilter().createBuilder(numElementsHint, + bloomFilterSpec.getNumHashes(), bloomFilterSpec.getNumBucketsPerElements()); + } else { + builder = null; + } if (filterFields != null) { indexTuple = new PermutingTupleReference(btreeFields); @@ -644,7 +682,9 @@ } bulkLoader.add(t); - builder.add(t); + if (hasBloomFilter) { + builder.add(t); + } if (filterTuple != null) { filterTuple.reset(tuple); @@ -662,21 +702,23 @@ protected void cleanupArtifacts() throws HyracksDataException { if (!cleanedUpArtifacts) { cleanedUpArtifacts = true; - if (!endedBloomFilterLoad) { + if (hasBloomFilter && !endedBloomFilterLoad) { builder.abort(); endedBloomFilterLoad = true; } ((LSMBTreeDiskComponent) component).getBTree().deactivate(); ((LSMBTreeDiskComponent) component).getBTree().destroy(); - ((LSMBTreeDiskComponent) component).getBloomFilter().deactivate(); - ((LSMBTreeDiskComponent) component).getBloomFilter().destroy(); + if (hasBloomFilter) { + ((LSMBTreeDiskComponent) component).getBloomFilter().deactivate(); + ((LSMBTreeDiskComponent) component).getBloomFilter().destroy(); + } } } @Override public void end() throws HyracksDataException, IndexException { if (!cleanedUpArtifacts) { - if (!endedBloomFilterLoad) { + if (hasBloomFilter && !endedBloomFilterLoad) { builder.end(); endedBloomFilterLoad = true; } @@ -711,9 +753,9 @@ public LSMBTreeOpContext createOpContext(IModificationOperationCallback modificationCallback, ISearchOperationCallback searchCallback) { + int numBloomFilterKeyFields = hasBloomFilter ? componentFactory.getBloomFilterKeyFields().length : 0; return new LSMBTreeOpContext(memoryComponents, insertLeafFrameFactory, deleteLeafFrameFactory, - modificationCallback, searchCallback, componentFactory.getBloomFilterKeyFields().length, btreeFields, - filterFields, lsmHarness); + modificationCallback, searchCallback, numBloomFilterKeyFields, btreeFields, filterFields, lsmHarness); } @Override diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTreeDiskComponent.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTreeDiskComponent.java index f553368..c43590b 100644 --- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTreeDiskComponent.java +++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTreeDiskComponent.java @@ -38,8 +38,10 @@ public void destroy() throws HyracksDataException { btree.deactivate(); btree.destroy(); - bloomFilter.deactivate(); - bloomFilter.destroy(); + if (bloomFilter != null) { + bloomFilter.deactivate(); + bloomFilter.destroy(); + } } public BTree getBTree() { @@ -52,7 +54,8 @@ @Override public long getComponentSize() { - return btree.getFileReference().getFile().length() + bloomFilter.getFileReference().getFile().length(); + return btree.getFileReference().getFile().length() + + (bloomFilter == null ? 0 : bloomFilter.getFileReference().getFile().length()); } @Override diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTreeDiskComponentFactory.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTreeDiskComponentFactory.java index 7d33a79..c46119e 100644 --- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTreeDiskComponentFactory.java +++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTreeDiskComponentFactory.java @@ -43,10 +43,11 @@ } @Override - public ILSMComponent createLSMComponentInstance(LSMComponentFileReferences cfr) throws IndexException, - HyracksDataException { + public ILSMComponent createLSMComponentInstance(LSMComponentFileReferences cfr) + throws IndexException, HyracksDataException { return new LSMBTreeDiskComponent(btreeFactory.createIndexInstance(cfr.getInsertIndexFileReference()), - bloomFilterFactory.createBloomFiltertInstance(cfr.getBloomFilterFileReference()), + bloomFilterFactory == null ? null + : bloomFilterFactory.createBloomFiltertInstance(cfr.getBloomFilterFileReference()), filterFactory == null ? null : filterFactory.createLSMComponentFilter()); } @@ -56,6 +57,6 @@ } public int[] getBloomFilterKeyFields() { - return bloomFilterFactory.getBloomFilterKeyFields(); + return bloomFilterFactory == null ? null : bloomFilterFactory.getBloomFilterKeyFields(); } } diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTreeFileManager.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTreeFileManager.java index 2be4e4a..653c451 100644 --- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTreeFileManager.java +++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTreeFileManager.java @@ -43,11 +43,13 @@ public static final String BTREE_STRING = "b"; private final TreeIndexFactory<? extends ITreeIndex> btreeFactory; + private final boolean hasBloomFilter; public LSMBTreeFileManager(IFileMapProvider fileMapProvider, FileReference file, - TreeIndexFactory<? extends ITreeIndex> btreeFactory) { + TreeIndexFactory<? extends ITreeIndex> btreeFactory, boolean hasBloomFilter) { super(fileMapProvider, file, null); this.btreeFactory = btreeFactory; + this.hasBloomFilter = hasBloomFilter; } @Override @@ -56,7 +58,7 @@ String baseName = baseDir + ts + SPLIT_STRING + ts; // Begin timestamp and end timestamp are identical since it is a flush return new LSMComponentFileReferences(createFlushFile(baseName + SPLIT_STRING + BTREE_STRING), null, - createFlushFile(baseName + SPLIT_STRING + BLOOM_FILTER_STRING)); + hasBloomFilter ? createFlushFile(baseName + SPLIT_STRING + BLOOM_FILTER_STRING) : null); } @Override @@ -68,7 +70,7 @@ String baseName = baseDir + firstTimestampRange[0] + SPLIT_STRING + lastTimestampRange[1]; // Get the range of timestamps by taking the earliest and the latest timestamps return new LSMComponentFileReferences(createMergeFile(baseName + SPLIT_STRING + BTREE_STRING), null, - createMergeFile(baseName + SPLIT_STRING + BLOOM_FILTER_STRING)); + hasBloomFilter ? createMergeFile(baseName + SPLIT_STRING + BLOOM_FILTER_STRING) : null); } private static FilenameFilter btreeFilter = new FilenameFilter() { @@ -96,74 +98,99 @@ int index = cmpFileName.fileName.lastIndexOf(SPLIT_STRING); btreeFilesSet.add(cmpFileName.fileName.substring(0, index)); } - validateFiles(btreeFilesSet, allBloomFilterFiles, getCompoundFilter(transactionFilter, bloomFilterFilter), null); - // Sanity check. - if (allBTreeFiles.size() != allBloomFilterFiles.size()) { - throw new HyracksDataException( - "Unequal number of valid BTree and bloom filter files found. Aborting cleanup."); + if (hasBloomFilter) { + validateFiles(btreeFilesSet, allBloomFilterFiles, getCompoundFilter(transactionFilter, bloomFilterFilter), + null); + // Sanity check. + if (allBTreeFiles.size() != allBloomFilterFiles.size()) { + throw new HyracksDataException( + "Unequal number of valid BTree and bloom filter files found. Aborting cleanup."); + } } // Trivial cases. - if (allBTreeFiles.isEmpty() || allBloomFilterFiles.isEmpty()) { + if (allBTreeFiles.isEmpty() || hasBloomFilter && allBloomFilterFiles.isEmpty()) { return validFiles; } - if (allBTreeFiles.size() == 1 && allBloomFilterFiles.size() == 1) { - validFiles.add(new LSMComponentFileReferences(allBTreeFiles.get(0).fileRef, null, allBloomFilterFiles - .get(0).fileRef)); + // Special case: sorting is not required + if (allBTreeFiles.size() == 1 && (!hasBloomFilter || allBloomFilterFiles.size() == 1)) { + validFiles.add(new LSMComponentFileReferences(allBTreeFiles.get(0).fileRef, null, + hasBloomFilter ? allBloomFilterFiles.get(0).fileRef : null)); return validFiles; } // Sorts files names from earliest to latest timestamp. Collections.sort(allBTreeFiles); - Collections.sort(allBloomFilterFiles); + if (hasBloomFilter) { + Collections.sort(allBloomFilterFiles); + } List<ComparableFileName> validComparableBTreeFiles = new ArrayList<ComparableFileName>(); ComparableFileName lastBTree = allBTreeFiles.get(0); validComparableBTreeFiles.add(lastBTree); - List<ComparableFileName> validComparableBloomFilterFiles = new ArrayList<ComparableFileName>(); - ComparableFileName lastBloomFilter = allBloomFilterFiles.get(0); - validComparableBloomFilterFiles.add(lastBloomFilter); + List<ComparableFileName> validComparableBloomFilterFiles = null; + ComparableFileName lastBloomFilter = null; + if (hasBloomFilter) { + validComparableBloomFilterFiles = new ArrayList<ComparableFileName>(); + lastBloomFilter = allBloomFilterFiles.get(0); + validComparableBloomFilterFiles.add(lastBloomFilter); + } + ComparableFileName currentBTree = null; + ComparableFileName currentBloomFilter = null; for (int i = 1; i < allBTreeFiles.size(); i++) { - ComparableFileName currentBTree = allBTreeFiles.get(i); - ComparableFileName currentBloomFilter = allBloomFilterFiles.get(i); + currentBTree = allBTreeFiles.get(i); + if (hasBloomFilter) { + currentBloomFilter = allBloomFilterFiles.get(i); + } // Current start timestamp is greater than last stop timestamp. if (currentBTree.interval[0].compareTo(lastBTree.interval[1]) > 0 - && currentBloomFilter.interval[0].compareTo(lastBloomFilter.interval[1]) > 0) { + && (!hasBloomFilter || currentBloomFilter.interval[0].compareTo(lastBloomFilter.interval[1]) > 0)) { validComparableBTreeFiles.add(currentBTree); - validComparableBloomFilterFiles.add(currentBloomFilter); lastBTree = currentBTree; - lastBloomFilter = currentBloomFilter; + if (hasBloomFilter) { + validComparableBloomFilterFiles.add(currentBloomFilter); + lastBloomFilter = currentBloomFilter; + } } else if (currentBTree.interval[0].compareTo(lastBTree.interval[0]) >= 0 && currentBTree.interval[1].compareTo(lastBTree.interval[1]) <= 0 - && currentBloomFilter.interval[0].compareTo(lastBloomFilter.interval[0]) >= 0 - && currentBloomFilter.interval[1].compareTo(lastBloomFilter.interval[1]) <= 0) { + && (!hasBloomFilter || (currentBloomFilter.interval[0].compareTo(lastBloomFilter.interval[0]) >= 0 + && currentBloomFilter.interval[1].compareTo(lastBloomFilter.interval[1]) <= 0))) { // Invalid files are completely contained in last interval. File invalidBTreeFile = new File(currentBTree.fullPath); invalidBTreeFile.delete(); - File invalidBloomFilterFile = new File(currentBloomFilter.fullPath); - invalidBloomFilterFile.delete(); + if (hasBloomFilter) { + File invalidBloomFilterFile = new File(currentBloomFilter.fullPath); + invalidBloomFilterFile.delete(); + } } else { // This scenario should not be possible. - throw new HyracksDataException("Found LSM files with overlapping but not contained timetamp intervals."); + throw new HyracksDataException( + "Found LSM files with overlapping but not contained timetamp intervals."); } } // Sort valid files in reverse lexicographical order, such that newer // files come first. Collections.sort(validComparableBTreeFiles, recencyCmp); - Collections.sort(validComparableBloomFilterFiles, recencyCmp); - Iterator<ComparableFileName> btreeFileIter = validComparableBTreeFiles.iterator(); - Iterator<ComparableFileName> bloomFilterFileIter = validComparableBloomFilterFiles.iterator(); + Iterator<ComparableFileName> bloomFilterFileIter = null; + if (hasBloomFilter) { + Collections.sort(validComparableBloomFilterFiles, recencyCmp); + bloomFilterFileIter = validComparableBloomFilterFiles.iterator(); + } + ComparableFileName cmpBTreeFileName = null; + ComparableFileName cmpBloomFilterFileName = null; while (btreeFileIter.hasNext() && bloomFilterFileIter.hasNext()) { - ComparableFileName cmpBTreeFileName = btreeFileIter.next(); - ComparableFileName cmpBloomFilterFileName = bloomFilterFileIter.next(); + cmpBTreeFileName = btreeFileIter.next(); + if (hasBloomFilter) { + cmpBloomFilterFileName = bloomFilterFileIter.next(); + } validFiles.add(new LSMComponentFileReferences(cmpBTreeFileName.fileRef, null, - cmpBloomFilterFileName.fileRef)); + hasBloomFilter ? cmpBloomFilterFileName.fileRef : null)); } return validFiles; diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTreeFlushOperation.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTreeFlushOperation.java index c56293f..a30527d 100644 --- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTreeFlushOperation.java +++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTreeFlushOperation.java @@ -60,7 +60,9 @@ public Set<IODeviceHandle> getWriteDevices() { Set<IODeviceHandle> devs = new HashSet<IODeviceHandle>(); devs.add(btreeFlushTarget.getDeviceHandle()); - devs.add(bloomFilterFlushTarget.getDeviceHandle()); + if (bloomFilterFlushTarget != null) { + devs.add(bloomFilterFlushTarget.getDeviceHandle()); + } return devs; } diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTreeMergeOperation.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTreeMergeOperation.java index f69a77f..9f659e4 100644 --- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTreeMergeOperation.java +++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTreeMergeOperation.java @@ -61,7 +61,9 @@ for (ILSMComponent o : mergingComponents) { LSMBTreeDiskComponent component = (LSMBTreeDiskComponent) o; devs.add(component.getBTree().getFileReference().getDeviceHandle()); - devs.add(component.getBloomFilter().getFileReference().getDeviceHandle()); + if (bloomFilterMergeTarget != null) { + devs.add(component.getBloomFilter().getFileReference().getDeviceHandle()); + } } return devs; } @@ -70,7 +72,9 @@ public Set<IODeviceHandle> getWriteDevices() { Set<IODeviceHandle> devs = new HashSet<IODeviceHandle>(); devs.add(btreeMergeTarget.getDeviceHandle()); - devs.add(bloomFilterMergeTarget.getDeviceHandle()); + if (bloomFilterMergeTarget != null) { + devs.add(bloomFilterMergeTarget.getDeviceHandle()); + } return devs; } diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTreeOpContext.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTreeOpContext.java index 31c9d40..fc8f9b2 100644 --- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTreeOpContext.java +++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTreeOpContext.java @@ -79,7 +79,8 @@ this.cmp = null; } - bloomFilterCmp = MultiComparator.create(c.getBTree().getComparatorFactories(), 0, numBloomFilterKeyFields); + bloomFilterCmp = numBloomFilterKeyFields == 0 ? null + : MultiComparator.create(c.getBTree().getComparatorFactories(), 0, numBloomFilterKeyFields); mutableBTrees = new BTree[mutableComponents.size()]; mutableBTreeAccessors = new BTree.BTreeAccessor[mutableComponents.size()]; diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/util/LSMBTreeUtils.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/util/LSMBTreeUtils.java index 332ea78..16901e9 100644 --- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/util/LSMBTreeUtils.java +++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/util/LSMBTreeUtils.java @@ -83,8 +83,8 @@ TreeIndexFactory<BTree> bulkLoadBTreeFactory = new BTreeFactory(diskBufferCache, diskFileMapProvider, freePageManagerFactory, interiorFrameFactory, insertLeafFrameFactory, cmpFactories, typeTraits.length); - BloomFilterFactory bloomFilterFactory = new BloomFilterFactory(diskBufferCache, diskFileMapProvider, - bloomFilterKeyFields); + BloomFilterFactory bloomFilterFactory = needKeyDupCheck + ? new BloomFilterFactory(diskBufferCache, diskFileMapProvider, bloomFilterKeyFields) : null; LSMComponentFilterFactory filterFactory = null; LSMComponentFilterFrameFactory filterFrameFactory = null; @@ -97,7 +97,9 @@ filterManager = new LSMComponentFilterManager(diskBufferCache, filterFrameFactory); } - ILSMIndexFileManager fileNameManager = new LSMBTreeFileManager(diskFileMapProvider, file, diskBTreeFactory); + //Primary LSMBTree index has a BloomFilter. + ILSMIndexFileManager fileNameManager = new LSMBTreeFileManager(diskFileMapProvider, file, diskBTreeFactory, + needKeyDupCheck); LSMBTree lsmTree = new LSMBTree(virtualBufferCaches, interiorFrameFactory, insertLeafFrameFactory, deleteLeafFrameFactory, fileNameManager, diskBTreeFactory, bulkLoadBTreeFactory, bloomFilterFactory, @@ -145,7 +147,9 @@ TreeIndexFactory<BTree> transactionBTreeFactory = new BTreeFactory(diskBufferCache, diskFileMapProvider, freePageManagerFactory, interiorFrameFactory, dualLeafFrameFactory, cmpFactories, typeTraits.length); - ILSMIndexFileManager fileNameManager = new LSMBTreeFileManager(diskFileMapProvider, file, diskBTreeFactory); + //TODO remove BloomFilter from external dataset's secondary LSMBTree index + ILSMIndexFileManager fileNameManager = new LSMBTreeFileManager(diskFileMapProvider, file, diskBTreeFactory, + true); // the disk only index uses an empty ArrayList for virtual buffer caches ExternalBTree lsmTree = new ExternalBTree(interiorFrameFactory, insertLeafFrameFactory, deleteLeafFrameFactory, -- To view, visit https://asterix-gerrit.ics.uci.edu/843 To unsubscribe, visit https://asterix-gerrit.ics.uci.edu/settings Gerrit-MessageType: merged Gerrit-Change-Id: I45ee757641e167dfd7ff73436111ba12b335dc93 Gerrit-PatchSet: 2 Gerrit-Project: asterixdb Gerrit-Branch: master Gerrit-Owner: Young-Seok Kim <[email protected]> Gerrit-Reviewer: Ian Maxon <[email protected]> Gerrit-Reviewer: Jenkins <[email protected]> Gerrit-Reviewer: Young-Seok Kim <[email protected]>
