>From Ritik Raj <[email protected]>: Ritik Raj has uploaded this change for review. ( https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/21129?usp=email )
Change subject: bufferCacheAlreadyHit Optimization opp ...................................................................... bufferCacheAlreadyHit Optimization opp Change-Id: I66b54548adf6fada67978d3ec4f7b2e4e91032a8 --- M asterixdb/asterix-app/src/test/resources/cc-cloud-storage-main.conf M asterixdb/asterix-app/src/test/resources/cc-cloud-storage-main.ftl M hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/ColumnRanges.java M hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/buffercache/read/CloudColumnReadContext.java M hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/buffercache/read/CloudMegaPagePrefetchRequest.java M hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/buffercache/read/CloudMegaPageReadContext.java 6 files changed, 55 insertions(+), 17 deletions(-) git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb refs/changes/29/21129/1 diff --git a/asterixdb/asterix-app/src/test/resources/cc-cloud-storage-main.conf b/asterixdb/asterix-app/src/test/resources/cc-cloud-storage-main.conf index 8745d9d..d8b3c59 100644 --- a/asterixdb/asterix-app/src/test/resources/cc-cloud-storage-main.conf +++ b/asterixdb/asterix-app/src/test/resources/cc-cloud-storage-main.conf @@ -68,8 +68,12 @@ cloud.storage.scheme=s3 cloud.storage.bucket=cloud-storage-container cloud.storage.region=us-west-2 -cloud.storage.endpoint=http://127.0.0.1:8001 +cloud.storage.endpoint=http://127.0.0.1:49612 cloud.storage.anonymous.auth=true cloud.storage.cache.policy=selective cloud.max.write.requests.per.second=2000 -cloud.max.read.requests.per.second=4000 \ No newline at end of file +cloud.max.read.requests.per.second=4000 +; --- prefetch testing: force sweeper to evict pages to cloud --- +cloud.storage.debug.mode.enabled=true +cloud.storage.debug.sweep.threshold.size=1MB +cloud.storage.disk.monitor.interval=5 \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/cc-cloud-storage-main.ftl b/asterixdb/asterix-app/src/test/resources/cc-cloud-storage-main.ftl index 8271bfd..8a426f9 100644 --- a/asterixdb/asterix-app/src/test/resources/cc-cloud-storage-main.ftl +++ b/asterixdb/asterix-app/src/test/resources/cc-cloud-storage-main.ftl @@ -72,4 +72,8 @@ cloud.storage.anonymous.auth=true cloud.storage.cache.policy=selective cloud.max.write.requests.per.second=2000 -cloud.max.read.requests.per.second=4000 \ No newline at end of file +cloud.max.read.requests.per.second=4000 +; --- prefetch testing: force sweeper to evict pages to cloud --- +cloud.storage.debug.mode.enabled=true +cloud.storage.debug.sweep.threshold.size=1MB +cloud.storage.disk.monitor.interval=5 \ No newline at end of file diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/ColumnRanges.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/ColumnRanges.java index 2beef71..6640c32 100644 --- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/ColumnRanges.java +++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/ColumnRanges.java @@ -131,6 +131,12 @@ // Sort the pairs by offset (i.e., lowest offset first) LongArrays.stableSort(offsetColumnIndexPairs, 0, numberOfPresentColumnsInLeaf, OFFSET_COMPARATOR); + // Evaluate once per leaf: both BitSets are stable for the entire column loop. + // evictableColumns comes from planner.getPlanCopy() (a clone, never == EMPTY when the plan is active) + // cloudOnlyColumns is cleared/populated in onPin() and does not change mid-loop. + boolean noEviction = (evictableColumns == EMPTY || evictableColumns.isEmpty()) + && (cloudOnlyColumns == EMPTY || cloudOnlyColumns.isEmpty()); + int columnOrdinal = 0; for (int i = 0; i < numberOfPresentColumnsInLeaf; i++) { if (offsetColumnIndexPairs[i] == 0) { @@ -158,8 +164,8 @@ // Set column index columnsOrder[columnOrdinal++] = columnIndex; // Compute cloud-only and evictable pages - setCloudOnlyAndEvictablePages(columnIndex, cloudOnlyColumns, evictableColumns, startPageId, - numberOfPages); + setCloudOnlyAndEvictablePages(noEviction, columnIndex, cloudOnlyColumns, evictableColumns, + startPageId, numberOfPages); // A requested column. Keep its pages as requested continue; } @@ -286,9 +292,9 @@ return IntPairUtil.getSecond(pair); } - private void setCloudOnlyAndEvictablePages(int columnIndex, BitSet cloudOnlyColumns, BitSet evictableColumns, - int startPageId, int numberOfPages) { - if (evictableColumns == EMPTY && cloudOnlyColumns == EMPTY) { + private void setCloudOnlyAndEvictablePages(boolean noEviction, int columnIndex, BitSet cloudOnlyColumns, + BitSet evictableColumns, int startPageId, int numberOfPages) { + if (noEviction) { return; } diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/buffercache/read/CloudColumnReadContext.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/buffercache/read/CloudColumnReadContext.java index dcbcfbf..cf0d344 100644 --- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/buffercache/read/CloudColumnReadContext.java +++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/buffercache/read/CloudColumnReadContext.java @@ -99,6 +99,10 @@ if (lockTest.isLocked()) { ColumnSweepLockInfo lockedColumns = (ColumnSweepLockInfo) lockTest; lockedColumns.getLockedColumns(cloudOnlyColumns); + // The sweeper has locked some columns for eviction while this query is reading them. + // Those columns will be served from the cloud stream rather than local disk. + // Frequent occurrences indicate contention between the sweeper and active queries. + LOGGER.debug("Sweep lock active on page-zero pin: cloudOnlyColumns={}", cloudOnlyColumns); } } @@ -235,6 +239,9 @@ if (dataPageCount <= 0) { return; } + // Always use range-merging to coalesce adjacent column pages into a single cloud stream. + // Iterating columns one-by-one would open a separate stream per column even when they + // are physically contiguous on disk, multiplying S3 round-trips by the projected column count. computeProjectedRanges(columnRanges, mergedPageRanges); mergedPageRanges.pin(columnCtx, bufferCache, fileId, pageZeroId); } diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/buffercache/read/CloudMegaPagePrefetchRequest.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/buffercache/read/CloudMegaPagePrefetchRequest.java index 535cc70..51bf4f6 100644 --- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/buffercache/read/CloudMegaPagePrefetchRequest.java +++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/buffercache/read/CloudMegaPagePrefetchRequest.java @@ -220,6 +220,9 @@ private void pinProjected(CloudMegaPageReadContext columnCtx, ColumnRanges columnRanges) throws HyracksDataException { + // Always use range-merging to coalesce adjacent column pages into a single cloud stream. + // Iterating columns one-by-one would open a separate stream per column even when they + // are physically contiguous on disk, multiplying S3 round-trips by the projected column count. AbstractPageRangesComputer mergedPageRanges = AbstractPageRangesComputer.create(CloudColumnReadContext.MAX_RANGES_COUNT); CloudColumnReadContext.computeProjectedRanges(columnRanges, mergedPageRanges); diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/buffercache/read/CloudMegaPageReadContext.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/buffercache/read/CloudMegaPageReadContext.java index e7d2548..5b6893e 100644 --- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/buffercache/read/CloudMegaPageReadContext.java +++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/buffercache/read/CloudMegaPageReadContext.java @@ -22,9 +22,7 @@ import static org.apache.hyracks.storage.common.buffercache.context.read.DefaultBufferCacheReadContextProvider.DEFAULT; import java.nio.ByteBuffer; -import java.util.ArrayList; import java.util.BitSet; -import java.util.List; import org.apache.hyracks.api.exceptions.HyracksDataException; import org.apache.hyracks.api.util.ExceptionUtils; @@ -55,11 +53,16 @@ private final ColumnProjectorType operation; private final ColumnRanges columnRanges; private final IPhysicalDrive drive; - private final List<ICachedPage> pinnedPages; + private ICachedPage[] pinnedPages = new ICachedPage[0]; + private int pinnedPageCount = 0; private int numberOfContiguousPages; private int pageCounter; private CloudInputStream gapStream; + // Counts pages inside an open cloud stream range that were already valid in the buffer cache. + // A non-zero value means the range was partially warm: some pages were fetched from cloud, + // others were skipped because they were already cached. + private int skippedInRange; // for debugging int pageZeroId; @@ -68,7 +71,6 @@ this.operation = operation; this.columnRanges = columnRanges; this.drive = drive; - pinnedPages = new ArrayList<>(); } void pin(IBufferCache bufferCache, int fileId, int pageZeroId, int start, int numberOfPages, BitSet requestedPages) @@ -76,6 +78,7 @@ closeStream(); this.numberOfContiguousPages = numberOfPages; pageCounter = 0; + skippedInRange = 0; this.pageZeroId = pageZeroId; doPin(bufferCache, fileId, pageZeroId, start, numberOfPages, requestedPages); } @@ -147,16 +150,23 @@ } void unpinAll(IBufferCache bufferCache) throws HyracksDataException { - for (int i = 0; i < pinnedPages.size(); i++) { - bufferCache.unpin(pinnedPages.get(i), this); + for (int i = 0; i < pinnedPageCount; i++) { + bufferCache.unpin(pinnedPages[i], this); + pinnedPages[i] = null; // Help GC } - pinnedPages.clear(); + pinnedPageCount = 0; } void closeStream() { if (gapStream != null) { + // Log per-range summary: how many pinned pages were served from cloud vs already cached. + // skippedInRange > 0 means the range was a partial warm range -- some pages were + // already valid in the buffer pool when the cloud stream was opened. + LOGGER.debug("Closed cloud stream: pageZeroId={}, pinnedInRange={}, skippedInRange={}", pageZeroId, + pinnedPageCount, skippedInRange); gapStream.close(); gapStream = null; + skippedInRange = 0; } } @@ -203,7 +213,7 @@ threadStats.cloudReadRequest(); - LOGGER.info( + LOGGER.debug( "Cloud stream read for pageId={} starting from pageCounter={} out of " + "numberOfContiguousPages={}. pageZeroId={} stream: {}", pageId, pageCounter, numberOfContiguousPages, pageZeroId, gapStream); @@ -229,6 +239,7 @@ } // Ensure the stream starts from the page's offset and also skip the page's content + skippedInRange++; long newOffset = cPage.getCompressedPageOffset() + cPage.getCompressedPageSize(); try { gapStream.skipTo(newOffset); @@ -240,12 +251,15 @@ private void doPin(IBufferCache bufferCache, int fileId, int pageZeroId, int start, int numberOfPages, BitSet requestedPages) throws HyracksDataException { + if (pinnedPages.length < pinnedPageCount + numberOfPages) { + pinnedPages = java.util.Arrays.copyOf(pinnedPages, pinnedPageCount + numberOfPages); + } for (int i = start; i < start + numberOfPages; i++) { try { if (requestedPages == ALL_PAGES || requestedPages.get(i)) { int pageId = pageZeroId + i; long dpid = BufferedFileHandle.getDiskPageId(fileId, pageId); - pinnedPages.add(bufferCache.pin(dpid, this)); + pinnedPages[pinnedPageCount++] = bufferCache.pin(dpid, this); } pageCounter++; } catch (Throwable th) { -- To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/21129?usp=email To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings?usp=email Gerrit-MessageType: newchange Gerrit-Project: asterixdb Gerrit-Branch: lumina Gerrit-Change-Id: I66b54548adf6fada67978d3ec4f7b2e4e91032a8 Gerrit-Change-Number: 21129 Gerrit-PatchSet: 1 Gerrit-Owner: Ritik Raj <[email protected]>
