>From Wail Alkowaileet <wael....@gmail.com>: Wail Alkowaileet has uploaded this change for review. ( https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17975 )
Change subject: [ASTERIXDB-3314][STO] Limit mega leaf node size ...................................................................... [ASTERIXDB-3314][STO] Limit mega leaf node size - user model changes: yes - storage format changes: yes - interface changes: yes Details: - Limit mega leaf node sizes to 10MB by default - Make the max leaf node size configurable - Local resouce format change to include the max leaf node size in columnar manager factory Change-Id: I43965026044543698d73b2f4ad5f8b68b7d3fc99 --- M asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnTupleWriter.java M asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java M asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/StorageProperties.java M asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm M asterixdb/asterix-column/src/test/java/org/apache/asterix/column/test/bytes/AbstractBytesTest.java M asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/util/DatasetDeclParametersUtil.java M asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/bootstrap/MetadataRecordTypes.java M asterixdb/asterix-column/src/main/java/org/apache/asterix/column/ColumnManagerFactory.java M asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/merge/MergeColumnTupleReaderWriterFactory.java M asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/BTreeResourceFactoryProvider.java M asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/merge/MergeColumnTupleWriter.java M asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/load/LoadColumnTupleWriter.java M asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/DatasetDecl.java M asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/metadata/metadata.001.ddl.sqlpp M asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm M asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm M asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entitytupletranslators/DatasetTupleTranslator.java M hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/impls/lsm/tuples/AbstractColumnTupleReference.java M asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnTupleWithMetaWriter.java M asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/dataset/DatasetFormatInfo.java M asterixdb/asterix-app/src/test/resources/runtimets/results/column/metadata/metadata.002.adm M asterixdb/asterix-app/src/test/resources/runtimets/sqlpp_queries.xml M asterixdb/asterix-column/src/main/java/org/apache/asterix/column/tuple/MergeColumnTupleReference.java M hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/api/AbstractColumnTupleReaderWriterFactory.java M asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/ColumnTransformer.java M asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/load/LoadColumnTupleReaderWriterFactory.java M asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnTupleReaderWriterFactory.java M hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/impls/btree/ColumnBTreeBulkloader.java 28 files changed, 197 insertions(+), 58 deletions(-) git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb refs/changes/75/17975/1 diff --git a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java index a99fc22..74f5305 100644 --- a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java +++ b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java @@ -889,7 +889,8 @@ StorageProperties storageProperties = metadataProvider.getStorageProperties(); DatasetFormatInfo datasetFormatInfo = dd.getDatasetFormatInfo(storageProperties.getStorageFormat(), - storageProperties.getColumnMaxTupleCount(), storageProperties.getColumnFreeSpaceTolerance()); + storageProperties.getColumnMaxTupleCount(), storageProperties.getColumnFreeSpaceTolerance(), + storageProperties.getColumnMaxLeafNodeSize()); try { //TODO(DB): also check for database existence? diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/metadata/metadata.001.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/metadata/metadata.001.ddl.sqlpp index 3ba53c8..7cb27a1 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/metadata/metadata.001.ddl.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/metadata/metadata.001.ddl.sqlpp @@ -42,7 +42,7 @@ CREATE DATASET ColumnDataset4(ColumnType) PRIMARY KEY id WITH { - "storage-format": {"format" : "column", "free-space-tolerance": 0.11, "max-tuple-count": 10} + "storage-format": {"format" : "column", "free-space-tolerance": 0.11, "max-tuple-count": 10, "max-leaf-node-size": "5MB"} }; CREATE DATASET RowDataset(ColumnType) diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm index 716e0b4..1bb6ef9 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm @@ -61,6 +61,7 @@ "ssl\.enabled" : false, "storage.buffercache.pagesize" : 32768, "storage.column.free.space.tolerance" : 0.15, + "storage.column.max.leaf.node.size" : 10485760, "storage.column.max.tuple.count" : 15000, "storage.compression.block" : "snappy", "storage.format" : "row", diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm index 93fea8f..c2883ae 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm @@ -61,6 +61,7 @@ "ssl\.enabled" : false, "storage.buffercache.pagesize" : 32768, "storage.column.free.space.tolerance" : 0.15, + "storage.column.max.leaf.node.size" : 10485760, "storage.column.max.tuple.count" : 15000, "storage.compression.block" : "snappy", "storage.format" : "row", diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm index e68b30a..fafd7b5 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm @@ -61,6 +61,7 @@ "ssl\.enabled" : false, "storage.buffercache.pagesize" : 32768, "storage.column.free.space.tolerance" : 0.15, + "storage.column.max.leaf.node.size" : 10485760 "storage.column.max.tuple.count" : 15000, "storage.compression.block" : "snappy", "storage.format" : "row", diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/column/metadata/metadata.002.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/column/metadata/metadata.002.adm index 4b0eed3..102b113 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/column/metadata/metadata.002.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/column/metadata/metadata.002.adm @@ -1,5 +1,5 @@ -{ "DatasetName": "ColumnDataset1", "DatasetFormat": { "Format": "COLUMN", "MaxTupleCount": 15000, "FreeSpaceTolerance": 0.15 } } -{ "DatasetName": "ColumnDataset2", "DatasetFormat": { "Format": "COLUMN", "MaxTupleCount": 10, "FreeSpaceTolerance": 0.15 } } -{ "DatasetName": "ColumnDataset3", "DatasetFormat": { "Format": "COLUMN", "MaxTupleCount": 15000, "FreeSpaceTolerance": 0.11 } } -{ "DatasetName": "ColumnDataset4", "DatasetFormat": { "Format": "COLUMN", "MaxTupleCount": 10, "FreeSpaceTolerance": 0.11 } } +{ "DatasetName": "ColumnDataset1", "DatasetFormat": { "Format": "COLUMN", "MaxTupleCount": 15000, "FreeSpaceTolerance": 0.15, "MaxLeafNodeSize": 10485760 } } +{ "DatasetName": "ColumnDataset2", "DatasetFormat": { "Format": "COLUMN", "MaxTupleCount": 10, "FreeSpaceTolerance": 0.15, "MaxLeafNodeSize": 10485760 } } +{ "DatasetName": "ColumnDataset3", "DatasetFormat": { "Format": "COLUMN", "MaxTupleCount": 15000, "FreeSpaceTolerance": 0.11, "MaxLeafNodeSize": 10485760 } } +{ "DatasetName": "ColumnDataset4", "DatasetFormat": { "Format": "COLUMN", "MaxTupleCount": 10, "FreeSpaceTolerance": 0.11, "MaxLeafNodeSize": 5242880 } } { "DatasetName": "RowDataset", "DatasetFormat": { "Format": "ROW" } } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/sqlpp_queries.xml b/asterixdb/asterix-app/src/test/resources/runtimets/sqlpp_queries.xml index 622d91f..14076d4 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/sqlpp_queries.xml +++ b/asterixdb/asterix-app/src/test/resources/runtimets/sqlpp_queries.xml @@ -16413,6 +16413,11 @@ <output-dir compare="Text">analyze-dataset</output-dir> </compilation-unit> </test-case> + <test-case FilePath="column"> + <compilation-unit name="metadata"> + <output-dir compare="Text">metadata</output-dir> + </compilation-unit> + </test-case> </test-group> <test-group name="copy-from"> <test-case FilePath="copy-from"> diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/ColumnManagerFactory.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/ColumnManagerFactory.java index 359c3e1..cfd5143 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/ColumnManagerFactory.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/ColumnManagerFactory.java @@ -46,12 +46,14 @@ private final int pageSize; private final int maxTupleCount; private final double tolerance; + private final int maxLeafNodeSize; public ColumnManagerFactory(ARecordType datasetType, ARecordType metaType, List<List<String>> primaryKeys, - List<Integer> keySourceIndicator, int pageSize, int maxTupleCount, double tolerance) { + List<Integer> keySourceIndicator, int pageSize, int maxTupleCount, double tolerance, int maxLeafNodeSize) { this.pageSize = pageSize; this.maxTupleCount = maxTupleCount; this.tolerance = tolerance; + this.maxLeafNodeSize = maxLeafNodeSize; this.datasetType = datasetType; if (containsSplitKeys(keySourceIndicator)) { @@ -70,17 +72,17 @@ @Override public AbstractColumnTupleReaderWriterFactory getLoadColumnTupleReaderWriterFactory() { - return new LoadColumnTupleReaderWriterFactory(pageSize, maxTupleCount, tolerance); + return new LoadColumnTupleReaderWriterFactory(pageSize, maxTupleCount, tolerance, maxLeafNodeSize); } @Override public AbstractColumnTupleReaderWriterFactory getFlushColumnTupleReaderWriterFactory() { - return new FlushColumnTupleReaderWriterFactory(pageSize, maxTupleCount, tolerance); + return new FlushColumnTupleReaderWriterFactory(pageSize, maxTupleCount, tolerance, maxLeafNodeSize); } @Override public AbstractColumnTupleReaderWriterFactory createMergeColumnTupleReaderWriterFactory() { - return new MergeColumnTupleReaderWriterFactory(pageSize, maxTupleCount, tolerance); + return new MergeColumnTupleReaderWriterFactory(pageSize, maxTupleCount, tolerance, maxLeafNodeSize); } @Override @@ -94,6 +96,7 @@ json.put("pageSize", pageSize); json.put("maxTupleCount", maxTupleCount); json.put("tolerance", tolerance); + json.put("maxLeafNodeSize", maxLeafNodeSize); ArrayNode primaryKeysArray = json.putArray("primaryKeys"); for (List<String> primaryKey : primaryKeys) { @@ -121,7 +124,8 @@ int pageSize = json.get("pageSize").asInt(); int maxTupleCount = json.get("maxTupleCount").asInt(); - float tolerance = (float) json.get("tolerance").asDouble(); + double tolerance = json.get("tolerance").asDouble(); + int maxLeafNodeSize = json.get("maxLeafNodeSize").asInt(); List<List<String>> primaryKeys = new ArrayList<>(); ArrayNode primaryKeysNode = (ArrayNode) json.get("primaryKeys"); @@ -141,7 +145,7 @@ } return new ColumnManagerFactory(datasetType, metaType, primaryKeys, keySourceIndicator, pageSize, maxTupleCount, - tolerance); + tolerance, maxLeafNodeSize); } private static boolean containsSplitKeys(List<Integer> keySourceIndicator) { diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/ColumnTransformer.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/ColumnTransformer.java index d1e439d..d40e00c 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/ColumnTransformer.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/ColumnTransformer.java @@ -43,11 +43,26 @@ private final ObjectSchemaNode root; private AbstractSchemaNestedNode currentParent; private int primaryKeysLength; + /** + * Hack-alert! This tracks the total length of all strings (as they're not as encodable as numerics) + * The total length can be used by {@link FlushColumnTupleWriter} to stop writing tuples to the current mega + * leaf node to avoid having a single column that spans to megabytes of pages. + */ + private int stringLengths; public ColumnTransformer(FlushColumnMetadata columnMetadata, ObjectSchemaNode root) { this.columnMetadata = columnMetadata; this.root = root; nonTaggedValue = new VoidPointable(); + stringLengths = 0; + } + + public int getStringLengths() { + return stringLengths; + } + + public void resetStringLengths() { + stringLengths = 0; } /** @@ -153,6 +168,8 @@ } if (node.isPrimaryKey()) { primaryKeysLength += writer.getEstimatedSize(); + } else if (node.getTypeTag() == ATypeTag.STRING) { + stringLengths += pointable.getLength(); } columnMetadata.exitNode(arg); return null; diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnTupleReaderWriterFactory.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnTupleReaderWriterFactory.java index 7107333..ae3559d 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnTupleReaderWriterFactory.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnTupleReaderWriterFactory.java @@ -28,8 +28,9 @@ public class FlushColumnTupleReaderWriterFactory extends AbstractColumnTupleReaderWriterFactory { private static final long serialVersionUID = -9197679192729634493L; - public FlushColumnTupleReaderWriterFactory(int pageSize, int maxNumberOfTuples, double tolerance) { - super(pageSize, maxNumberOfTuples, tolerance); + public FlushColumnTupleReaderWriterFactory(int pageSize, int maxNumberOfTuples, double tolerance, + int maxLeafNodeSize) { + super(pageSize, maxNumberOfTuples, tolerance, maxLeafNodeSize); } @Override @@ -37,9 +38,11 @@ FlushColumnMetadata flushColumnMetadata = (FlushColumnMetadata) columnMetadata; if (flushColumnMetadata.getMetaType() == null) { //no meta - return new FlushColumnTupleWriter(flushColumnMetadata, pageSize, maxNumberOfTuples, tolerance); + return new FlushColumnTupleWriter(flushColumnMetadata, pageSize, maxNumberOfTuples, tolerance, + maxLeafNodeSize); } - return new FlushColumnTupleWithMetaWriter(flushColumnMetadata, pageSize, maxNumberOfTuples, tolerance); + return new FlushColumnTupleWithMetaWriter(flushColumnMetadata, pageSize, maxNumberOfTuples, tolerance, + maxLeafNodeSize); } @Override diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnTupleWithMetaWriter.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnTupleWithMetaWriter.java index 9b2b7b8..b51b395 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnTupleWithMetaWriter.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnTupleWithMetaWriter.java @@ -28,8 +28,8 @@ private final RecordLazyVisitablePointable metaPointable; public FlushColumnTupleWithMetaWriter(FlushColumnMetadata columnMetadata, int pageSize, int maxNumberOfTuples, - double tolerance) { - super(columnMetadata, pageSize, maxNumberOfTuples, tolerance); + double tolerance, int maxLeafNodeSize) { + super(columnMetadata, pageSize, maxNumberOfTuples, tolerance, maxLeafNodeSize); metaColumnTransformer = new ColumnTransformer(columnMetadata, columnMetadata.getMetaRoot()); metaPointable = new TypedRecordLazyVisitablePointable(columnMetadata.getMetaType()); } diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnTupleWriter.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnTupleWriter.java index c5c1753..41cad49 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnTupleWriter.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnTupleWriter.java @@ -40,16 +40,18 @@ private final RecordLazyVisitablePointable pointable; private final int maxNumberOfTuples; private final IColumnValuesWriter[] primaryKeyWriters; + private final int maxLeafNodeSize; protected int primaryKeysEstimatedSize; public FlushColumnTupleWriter(FlushColumnMetadata columnMetadata, int pageSize, int maxNumberOfTuples, - double tolerance) { + double tolerance, int maxLeafNodeSize) { this.columnMetadata = columnMetadata; transformer = new ColumnTransformer(columnMetadata, columnMetadata.getRoot()); finalizer = new BatchFinalizerVisitor(columnMetadata); writer = new ColumnBatchWriter(columnMetadata.getMultiPageOpRef(), pageSize, tolerance); this.maxNumberOfTuples = maxNumberOfTuples; + this.maxLeafNodeSize = maxLeafNodeSize; pointable = new TypedRecordLazyVisitablePointable(columnMetadata.getDatasetType()); int numberOfPrimaryKeys = columnMetadata.getNumberOfPrimaryKeys(); @@ -87,8 +89,16 @@ return primaryKeysEstimatedSize + filterSize; } + /** + * TODO a better approach should be adopted + * + * @return the configured max number of tuples or zero if strings exceeded the maximum size + */ @Override public final int getMaxNumberOfTuples() { + if (transformer.getStringLengths() >= maxLeafNodeSize) { + return 0; + } return maxNumberOfTuples; } @@ -113,6 +123,7 @@ @Override public final int flush(ByteBuffer pageZero) throws HyracksDataException { writer.setPageZeroBuffer(pageZero, getNumberOfColumns(), columnMetadata.getNumberOfPrimaryKeys()); + transformer.resetStringLengths(); return finalizer.finalizeBatch(writer, columnMetadata); } diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/load/LoadColumnTupleReaderWriterFactory.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/load/LoadColumnTupleReaderWriterFactory.java index 5ac41fa..dec2ec3 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/load/LoadColumnTupleReaderWriterFactory.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/load/LoadColumnTupleReaderWriterFactory.java @@ -26,12 +26,14 @@ public class LoadColumnTupleReaderWriterFactory extends FlushColumnTupleReaderWriterFactory { private static final long serialVersionUID = -7583574057314353873L; - public LoadColumnTupleReaderWriterFactory(int pageSize, int maxNumberOfTuples, double tolerance) { - super(pageSize, maxNumberOfTuples, tolerance); + public LoadColumnTupleReaderWriterFactory(int pageSize, int maxNumberOfTuples, double tolerance, + int maxLeafNodeSize) { + super(pageSize, maxNumberOfTuples, tolerance, maxLeafNodeSize); } @Override public AbstractColumnTupleWriter createColumnWriter(IColumnMetadata columnMetadata) { - return new LoadColumnTupleWriter((FlushColumnMetadata) columnMetadata, pageSize, maxNumberOfTuples, tolerance); + return new LoadColumnTupleWriter((FlushColumnMetadata) columnMetadata, pageSize, maxNumberOfTuples, tolerance, + maxLeafNodeSize); } } diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/load/LoadColumnTupleWriter.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/load/LoadColumnTupleWriter.java index 6b840df..e47b210 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/load/LoadColumnTupleWriter.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/load/LoadColumnTupleWriter.java @@ -25,8 +25,8 @@ public class LoadColumnTupleWriter extends FlushColumnTupleWriter { public LoadColumnTupleWriter(FlushColumnMetadata columnMetadata, int pageSize, int maxNumberOfTuples, - double tolerance) { - super(columnMetadata, pageSize, maxNumberOfTuples, tolerance); + double tolerance, int maxLeafNodeSize) { + super(columnMetadata, pageSize, maxNumberOfTuples, tolerance, maxLeafNodeSize); } @Override diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/merge/MergeColumnTupleReaderWriterFactory.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/merge/MergeColumnTupleReaderWriterFactory.java index 3d89933..ae1c8d2 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/merge/MergeColumnTupleReaderWriterFactory.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/merge/MergeColumnTupleReaderWriterFactory.java @@ -28,14 +28,15 @@ public class MergeColumnTupleReaderWriterFactory extends AbstractColumnTupleReaderWriterFactory { private static final long serialVersionUID = -2131401304338796428L; - public MergeColumnTupleReaderWriterFactory(int pageSize, int maxNumberOfTuples, double tolerance) { - super(pageSize, maxNumberOfTuples, tolerance); + public MergeColumnTupleReaderWriterFactory(int pageSize, int maxNumberOfTuples, double tolerance, + int maxMegaLeafNodeSize) { + super(pageSize, maxNumberOfTuples, tolerance, maxMegaLeafNodeSize); } @Override public AbstractColumnTupleWriter createColumnWriter(IColumnMetadata columnMetadata) { MergeColumnWriteMetadata mergeWriteMetadata = (MergeColumnWriteMetadata) columnMetadata; - return new MergeColumnTupleWriter(mergeWriteMetadata, pageSize, maxNumberOfTuples, tolerance); + return new MergeColumnTupleWriter(mergeWriteMetadata, pageSize, maxNumberOfTuples, tolerance, maxLeafNodeSize); } @Override diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/merge/MergeColumnTupleWriter.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/merge/MergeColumnTupleWriter.java index 0df4aca..d3c102a 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/merge/MergeColumnTupleWriter.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/merge/MergeColumnTupleWriter.java @@ -40,6 +40,7 @@ public class MergeColumnTupleWriter extends AbstractColumnTupleWriter { private final MergeColumnWriteMetadata columnMetadata; + private final int maxLeafNodeSize; private final MergeColumnTupleReference[] componentsTuples; private final RunLengthIntArray writtenComponents; @@ -51,20 +52,24 @@ private int numberOfAntiMatter; public MergeColumnTupleWriter(MergeColumnWriteMetadata columnMetadata, int pageSize, int maxNumberOfTuples, - double tolerance) { + double tolerance, int maxLeafNodeSize) { this.columnMetadata = columnMetadata; + this.maxLeafNodeSize = maxLeafNodeSize; List<IColumnTupleIterator> componentsTuplesList = columnMetadata.getComponentsTuples(); this.componentsTuples = new MergeColumnTupleReference[componentsTuplesList.size()]; + int totalLength = 0; + int totalNumberOfTuples = 0; for (int i = 0; i < componentsTuplesList.size(); i++) { MergeColumnTupleReference mergeTuple = (MergeColumnTupleReference) componentsTuplesList.get(i); this.componentsTuples[i] = mergeTuple; mergeTuple.registerEndOfPageCallBack(this::writeAllColumns); + totalNumberOfTuples += mergeTuple.getTupleCount(); + totalLength += mergeTuple.getMergingLength(); } + this.maxNumberOfTuples = getMaxNumberOfTuples(maxNumberOfTuples, totalNumberOfTuples, totalLength); this.writtenComponents = new RunLengthIntArray(); - this.maxNumberOfTuples = maxNumberOfTuples; writer = new ColumnBatchWriter(columnMetadata.getMultiPageOpRef(), pageSize, tolerance); writtenComponents.reset(); - primaryKeyWriters = new IColumnValuesWriter[columnMetadata.getNumberOfPrimaryKeys()]; for (int i = 0; i < primaryKeyWriters.length; i++) { primaryKeyWriters[i] = columnMetadata.getWriter(i); @@ -108,7 +113,6 @@ @Override public void writeTuple(ITupleReference tuple) throws HyracksDataException { MergeColumnTupleReference columnTuple = (MergeColumnTupleReference) tuple; - // +1 to avoid having -0, where the '-' is an antimatter indicator int componentIndex = columnTuple.getComponentIndex(); int skipCount = columnTuple.getAndResetSkipCount(); if (skipCount > 0) { @@ -226,4 +230,13 @@ private static int clearAntimatterIndicator(int componentIndex) { return -componentIndex - 1; } + + private int getMaxNumberOfTuples(int maxNumberOfTuples, int totalNumberOfTuples, int totalLength) { + int numberOfTuplesUsingMaxSize = Integer.MAX_VALUE; + if (totalLength > maxLeafNodeSize && totalNumberOfTuples > 0) { + int bytesPerTuple = (int) Math.ceil(totalLength / (double) totalNumberOfTuples); + numberOfTuplesUsingMaxSize = maxLeafNodeSize / bytesPerTuple; + } + return Math.min(maxNumberOfTuples, numberOfTuplesUsingMaxSize); + } } diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/tuple/MergeColumnTupleReference.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/tuple/MergeColumnTupleReference.java index 7657009..33126e8 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/tuple/MergeColumnTupleReference.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/tuple/MergeColumnTupleReference.java @@ -38,6 +38,7 @@ private final IColumnValuesReader[] columnReaders; private int skipCount; private IEndOfPageCallBack endOfPageCallBack; + private int mergingLength; public MergeColumnTupleReference(int componentIndex, ColumnBTreeReadLeafFrame frame, MergeColumnReadMetadata columnMetadata, IColumnReadMultiPageOp multiPageOp) { @@ -64,6 +65,7 @@ pageZero.position(pageZero.position() + numberOfColumns * AbstractColumnFilterWriter.FILTER_SIZE); // skip count is always start from zero as no "search" is conducted during a merge this.skipCount = 0; + mergingLength = 0; return true; } @@ -79,6 +81,7 @@ columnStream.reset(buffersProvider); IColumnValuesReader reader = columnReaders[ordinal]; reader.reset(columnStream, numberOfTuples); + mergingLength += buffersProvider.getLength(); } @Override @@ -116,6 +119,10 @@ this.endOfPageCallBack = endOfPageCallBack; } + public int getMergingLength() { + return mergingLength; + } + private static IEndOfPageCallBack createNoOpCallBack() { return columnTuple -> { if (!columnTuple.isEmpty()) { @@ -124,5 +131,4 @@ } }; } - } diff --git a/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/test/bytes/AbstractBytesTest.java b/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/test/bytes/AbstractBytesTest.java index 36ebab9..6b52eb7 100644 --- a/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/test/bytes/AbstractBytesTest.java +++ b/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/test/bytes/AbstractBytesTest.java @@ -62,6 +62,7 @@ import org.apache.hyracks.storage.am.common.api.ITreeIndexTupleWriter; import org.apache.hyracks.storage.am.lsm.btree.column.api.AbstractColumnTupleWriter; import org.apache.hyracks.storage.am.lsm.btree.column.api.IColumnWriteMultiPageOp; +import org.apache.hyracks.util.StorageUtil; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -83,6 +84,10 @@ * Cap the maximum number of tuples stored per AMAX page */ public static final int MAX_NUMBER_OF_TUPLES = 100; + /** + * Max size of the mega leaf node + */ + public static final int MAX_LEAF_NODE_SIZE = StorageUtil.getIntSizeInBytes(512, StorageUtil.StorageUnit.KILOBYTE); /* *************************************** * Test static instances @@ -145,8 +150,8 @@ protected List<DummyPage> transform(int fileId, FlushColumnMetadata columnMetadata, List<IValueReference> records, int numberOfTuplesToWrite) throws IOException { IColumnWriteMultiPageOp multiPageOp = columnMetadata.getMultiPageOpRef().getValue(); - FlushColumnTupleWriter writer = - new FlushColumnTupleWriter(columnMetadata, PAGE_SIZE, MAX_NUMBER_OF_TUPLES, TOLERANCE); + FlushColumnTupleWriter writer = new FlushColumnTupleWriter(columnMetadata, PAGE_SIZE, MAX_NUMBER_OF_TUPLES, + TOLERANCE, MAX_LEAF_NODE_SIZE); try { return writeTuples(fileId, writer, records, numberOfTuplesToWrite, multiPageOp); diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/StorageProperties.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/StorageProperties.java index 2e22a30..a1ca571 100644 --- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/StorageProperties.java +++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/StorageProperties.java @@ -66,6 +66,7 @@ STORAGE_GLOBAL_CLEANUP_TIMEOUT(POSITIVE_INTEGER, (int) TimeUnit.MINUTES.toSeconds(10)), STORAGE_COLUMN_MAX_TUPLE_COUNT(NONNEGATIVE_INTEGER, 15000), STORAGE_COLUMN_FREE_SPACE_TOLERANCE(DOUBLE, 0.15d), + STORAGE_COLUMN_MAX_LEAF_NODE_SIZE(INTEGER_BYTE_UNIT, StorageUtil.getIntSizeInBytes(10, MEGABYTE)), STORAGE_FORMAT(STRING, "row"), STORAGE_PARTITIONING(STRING, "dynamic"), STORAGE_PARTITIONS_COUNT(INTEGER, 8); @@ -91,6 +92,7 @@ case STORAGE_FORMAT: case STORAGE_COLUMN_MAX_TUPLE_COUNT: case STORAGE_COLUMN_FREE_SPACE_TOLERANCE: + case STORAGE_COLUMN_MAX_LEAF_NODE_SIZE: return Section.COMMON; default: return Section.NC; @@ -147,6 +149,8 @@ case STORAGE_COLUMN_FREE_SPACE_TOLERANCE: return "The percentage of the maximum tolerable empty space for a physical mega leaf page (e.g.," + " 0.15 means a physical page with 15% or less empty space is tolerable)"; + case STORAGE_COLUMN_MAX_LEAF_NODE_SIZE: + return "The maximum mega leaf node to write during flush and merge operations (default: 10MB)"; case STORAGE_FORMAT: return "The default storage format (either row or column)"; case STORAGE_PARTITIONING: @@ -300,6 +304,10 @@ return accessor.getDouble(Option.STORAGE_COLUMN_FREE_SPACE_TOLERANCE); } + public int getColumnMaxLeafNodeSize() { + return accessor.getInt(Option.STORAGE_COLUMN_MAX_LEAF_NODE_SIZE); + } + public String getStorageFormat() { return accessor.getString(Option.STORAGE_FORMAT); } diff --git a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/DatasetDecl.java b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/DatasetDecl.java index e4afd6b..a8d3113 100644 --- a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/DatasetDecl.java +++ b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/DatasetDecl.java @@ -37,6 +37,7 @@ import org.apache.asterix.object.base.AdmObjectNode; import org.apache.asterix.object.base.IAdmNode; import org.apache.asterix.runtime.compression.CompressionManager; +import org.apache.hyracks.util.StorageUtil; public class DatasetDecl extends AbstractStatement { @@ -153,7 +154,7 @@ } public DatasetFormatInfo getDatasetFormatInfo(String defaultFormat, int defaultMaxTupleCount, - double defaultFreeSpaceTolerance) { + double defaultFreeSpaceTolerance, int defaultMaxLeafNodeSize) { if (datasetType != DatasetType.INTERNAL) { return DatasetFormatInfo.SYSTEM_DEFAULT; } @@ -172,8 +173,12 @@ double freeSpaceTolerance = datasetFormatNode.getOptionalDouble( DatasetDeclParametersUtil.DATASET_FORMAT_FREE_SPACE_TOLERANCE_PARAMETER_NAME, defaultFreeSpaceTolerance); + String maxLeafNodeSizeString = + datasetFormatNode.getOptionalString(DatasetDeclParametersUtil.DATASET_FORMAT_FREE_MAX_LEAF_NODE_SIZE); + int maxLeafNodeSize = maxLeafNodeSizeString == null ? defaultMaxLeafNodeSize + : (int) StorageUtil.getByteValue(maxLeafNodeSizeString); - return new DatasetFormatInfo(datasetFormat, maxTupleCount, freeSpaceTolerance); + return new DatasetFormatInfo(datasetFormat, maxTupleCount, freeSpaceTolerance, maxLeafNodeSize); } public Map<String, String> getHints() { diff --git a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/util/DatasetDeclParametersUtil.java b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/util/DatasetDeclParametersUtil.java index b884a80..7bb6f11 100644 --- a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/util/DatasetDeclParametersUtil.java +++ b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/util/DatasetDeclParametersUtil.java @@ -71,6 +71,7 @@ public static final String DATASET_FORMAT_FORMAT_PARAMETER_NAME = "format"; public static final String DATASET_FORMAT_MAX_TUPLE_COUNT_PARAMETER_NAME = "max-tuple-count"; public static final String DATASET_FORMAT_FREE_SPACE_TOLERANCE_PARAMETER_NAME = "free-space-tolerance"; + public static final String DATASET_FORMAT_FREE_MAX_LEAF_NODE_SIZE = "max-leaf-node-size"; /* *********************************************** * Private members @@ -179,10 +180,12 @@ } private static ARecordType getDatasetFormatType() { - final String[] formatFieldNames = { DATASET_FORMAT_FORMAT_PARAMETER_NAME, - DATASET_FORMAT_MAX_TUPLE_COUNT_PARAMETER_NAME, DATASET_FORMAT_FREE_SPACE_TOLERANCE_PARAMETER_NAME }; + final String[] formatFieldNames = + { DATASET_FORMAT_FORMAT_PARAMETER_NAME, DATASET_FORMAT_MAX_TUPLE_COUNT_PARAMETER_NAME, + DATASET_FORMAT_FREE_SPACE_TOLERANCE_PARAMETER_NAME, DATASET_FORMAT_FREE_MAX_LEAF_NODE_SIZE }; final IAType[] formatFieldTypes = { BuiltinType.ASTRING, AUnionType.createUnknownableType(BuiltinType.AINT64), - AUnionType.createUnknownableType(BuiltinType.ADOUBLE) }; + AUnionType.createUnknownableType(BuiltinType.ADOUBLE), + AUnionType.createUnknownableType(BuiltinType.ASTRING) }; return new ARecordType(DATASET_FORMAT_PARAMETER_NAME, formatFieldNames, formatFieldTypes, false); } } diff --git a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/bootstrap/MetadataRecordTypes.java b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/bootstrap/MetadataRecordTypes.java index cecf3d3..c35c03a 100644 --- a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/bootstrap/MetadataRecordTypes.java +++ b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/bootstrap/MetadataRecordTypes.java @@ -188,6 +188,7 @@ public static final String DATASET_ARECORD_DATASET_FORMAT_FORMAT_FIELD_NAME = "Format"; public static final String DATASET_ARECORD_DATASET_MAX_TUPLE_COUNT_FIELD_NAME = "MaxTupleCount"; public static final String DATASET_ARECORD_DATASET_FREE_SPACE_TOLERANCE_FIELD_NAME = "FreeSpaceTolerance"; + public static final String DATASET_ARECORD_DATASET_MAX_LEAF_NODE_SIZE_FIELD_NAME = "MaxLeafNodeSize"; //------------------------------------------ Field ------------------------------------------// public static final int FIELD_ARECORD_FIELDNAME_FIELD_INDEX = 0; diff --git a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/dataset/DatasetFormatInfo.java b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/dataset/DatasetFormatInfo.java index 23b90cd..4e1deb9 100644 --- a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/dataset/DatasetFormatInfo.java +++ b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/dataset/DatasetFormatInfo.java @@ -32,15 +32,17 @@ private final DatasetFormat format; private final int maxTupleCount; private final double freeSpaceTolerance; + private final int maxLeafNodeSize; private DatasetFormatInfo() { - this(DatasetFormat.ROW, -1, 0.0d); + this(DatasetFormat.ROW, -1, 0.0d, 0); } - public DatasetFormatInfo(DatasetFormat format, int maxTupleCount, double freeSpaceTolerance) { + public DatasetFormatInfo(DatasetFormat format, int maxTupleCount, double freeSpaceTolerance, int maxLeafNodeSize) { this.format = format; this.maxTupleCount = maxTupleCount; this.freeSpaceTolerance = freeSpaceTolerance; + this.maxLeafNodeSize = maxLeafNodeSize; } public DatasetFormat getFormat() { @@ -55,6 +57,10 @@ return freeSpaceTolerance; } + public int getMaxLeafNodeSize() { + return maxLeafNodeSize; + } + @Override public String toString() { return "(format:" + format + ", maxTupleCount:" + maxTupleCount + ')'; diff --git a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/BTreeResourceFactoryProvider.java b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/BTreeResourceFactoryProvider.java index ab4b585..e3c15db 100644 --- a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/BTreeResourceFactoryProvider.java +++ b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/BTreeResourceFactoryProvider.java @@ -121,7 +121,8 @@ IColumnManagerFactory columnManagerFactory = new ColumnManagerFactory(recordType, metaType, dataset.getPrimaryKeys(), keySourceIndicator, mdProvider.getStorageProperties().getBufferCachePageSize(), - datasetFormatInfo.getMaxTupleCount(), datasetFormatInfo.getFreeSpaceTolerance()); + datasetFormatInfo.getMaxTupleCount(), datasetFormatInfo.getFreeSpaceTolerance(), + datasetFormatInfo.getMaxLeafNodeSize()); return new LSMColumnBTreeLocalResourceFactory(storageManager, typeTraits, cmpFactories, filterTypeTraits, filterCmpFactories, filterFields, opTrackerFactory, ioOpCallbackFactory, pageWriteCallbackFactory, metadataPageManagerFactory, vbcProvider, ioSchedulerProvider, diff --git a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entitytupletranslators/DatasetTupleTranslator.java b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entitytupletranslators/DatasetTupleTranslator.java index 83c0d5b..7613dd3 100644 --- a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entitytupletranslators/DatasetTupleTranslator.java +++ b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entitytupletranslators/DatasetTupleTranslator.java @@ -500,8 +500,14 @@ ADouble freeSpaceToleranceDouble = (ADouble) datasetFormatRecord.getValueByPos(freeSpaceToleranceIndex); double freeSpaceTolerance = freeSpaceToleranceDouble.getDoubleValue(); + // MaxTupleCount + int maxLeafNodeSizeIndex = datasetFormatType + .getFieldIndex(MetadataRecordTypes.DATASET_ARECORD_DATASET_MAX_LEAF_NODE_SIZE_FIELD_NAME); + AInt64 maxLeafNodeSizeInt = (AInt64) datasetFormatRecord.getValueByPos(maxLeafNodeSizeIndex); + int maxLeafNodeSize = (int) maxLeafNodeSizeInt.getLongValue(); + // Columnar - return new DatasetFormatInfo(format, maxTupleCount, freeSpaceTolerance); + return new DatasetFormatInfo(format, maxTupleCount, freeSpaceTolerance, maxLeafNodeSize); } @Override @@ -772,6 +778,7 @@ // Columnar settings if (info.getFormat() == DatasetConfig.DatasetFormat.COLUMN) { + // Max tuple count fieldName.reset(); aString.setValue(MetadataRecordTypes.DATASET_ARECORD_DATASET_MAX_TUPLE_COUNT_FIELD_NAME); stringSerde.serialize(aString, fieldName.getDataOutput()); @@ -780,6 +787,7 @@ int64Serde.serialize(aInt64, fieldValue.getDataOutput()); datasetFormatObject.addField(fieldName, fieldValue); + // free space tolerance fieldName.reset(); aString.setValue(MetadataRecordTypes.DATASET_ARECORD_DATASET_FREE_SPACE_TOLERANCE_FIELD_NAME); stringSerde.serialize(aString, fieldName.getDataOutput()); @@ -787,6 +795,16 @@ aDouble.setValue(info.getFreeSpaceTolerance()); doubleSerde.serialize(aDouble, fieldValue.getDataOutput()); datasetFormatObject.addField(fieldName, fieldValue); + + // max leaf node size + fieldName.reset(); + aString.setValue(MetadataRecordTypes.DATASET_ARECORD_DATASET_MAX_LEAF_NODE_SIZE_FIELD_NAME); + stringSerde.serialize(aString, fieldName.getDataOutput()); + fieldValue.reset(); + aInt64.setValue(info.getMaxLeafNodeSize()); + int64Serde.serialize(aInt64, fieldValue.getDataOutput()); + datasetFormatObject.addField(fieldName, fieldValue); + } fieldName.reset(); diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/api/AbstractColumnTupleReaderWriterFactory.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/api/AbstractColumnTupleReaderWriterFactory.java index 79d902c..6ba04ba 100644 --- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/api/AbstractColumnTupleReaderWriterFactory.java +++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/api/AbstractColumnTupleReaderWriterFactory.java @@ -31,6 +31,7 @@ protected final int pageSize; protected final int maxNumberOfTuples; protected final double tolerance; + protected final int maxLeafNodeSize; /** * Tuple reader/writer factory @@ -38,11 +39,14 @@ * @param pageSize {@link IBufferCache} page size * @param maxNumberOfTuples maximum number of tuples stored per a mega leaf page * @param tolerance percentage of tolerated empty space + * @param maxLeafNodeSize the maximum size a mega leaf node can occupy */ - protected AbstractColumnTupleReaderWriterFactory(int pageSize, int maxNumberOfTuples, double tolerance) { + protected AbstractColumnTupleReaderWriterFactory(int pageSize, int maxNumberOfTuples, double tolerance, + int maxLeafNodeSize) { this.pageSize = pageSize; this.maxNumberOfTuples = maxNumberOfTuples; this.tolerance = tolerance; + this.maxLeafNodeSize = maxLeafNodeSize; } /** diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/impls/btree/ColumnBTreeBulkloader.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/impls/btree/ColumnBTreeBulkloader.java index 3e72584..51e8c09 100644 --- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/impls/btree/ColumnBTreeBulkloader.java +++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/impls/btree/ColumnBTreeBulkloader.java @@ -56,6 +56,7 @@ private int numberOfPagesInCurrentLeafNode; private int maxNumberOfPagesForAColumn; private int maxNumberOfPagesInALeafNode; + private int maxTupleCount; public ColumnBTreeBulkloader(float fillFactor, boolean verifyInput, IPageWriteCallback callback, ITreeIndex index, ITreeIndexFrame leafFrame) throws HyracksDataException { @@ -74,6 +75,7 @@ maxNumberOfPagesForAColumn = 0; maxNumberOfPagesInALeafNode = 0; numberOfLeafNodes = 1; + maxTupleCount = 0; } @Override @@ -140,7 +142,7 @@ //Where Page0 and columns pages will be written super.end(); - log("Finished"); + log("Finished", numberOfTempConfiscatedPages); } @Override @@ -172,16 +174,18 @@ write(c); } + // For logging + maxNumberOfPagesInALeafNode = Math.max(maxNumberOfPagesInALeafNode, numberOfPagesInCurrentLeafNode); + maxTupleCount = Math.max(maxTupleCount, tupleCount); + // Starts with 1 for page0 + numberOfPagesInCurrentLeafNode = 1; + numberOfLeafNodes++; + + // Clear for next page pagesToWrite.clear(); splitKey.setRightPage(leafFrontier.pageId); setLowKey = true; tupleCount = 0; - - // For logging - maxNumberOfPagesInALeafNode = Math.max(maxNumberOfPagesInALeafNode, numberOfPagesInCurrentLeafNode); - // Starts with 1 for page0 - numberOfPagesInCurrentLeafNode = 1; - numberOfLeafNodes++; } @Override @@ -219,7 +223,7 @@ super.abort(); // For logging - log("Aborted"); + log("Aborted", tempConfiscatedPages.size()); } private void setSplitKey(ISplitKey splitKey, ITupleReference tuple) { @@ -228,16 +232,15 @@ tupleWriter.writeTupleFields(tuple, 0, cmp.getKeyFieldCount(), splitKey.getBuffer().array(), 0); } - private void log(String status) { + private void log(String status, int numberOfTempConfiscatedPages) { if (!LOGGER.isDebugEnabled()) { return; } - int numberOfTempConfiscatedPages = tempConfiscatedPages.size(); LOGGER.debug( - "{} columnar bulkloader used leafNodes: {}, tempPagesAllocated: {}, maxPagesPerColumn: {}, and maxLeafNodePages: {}", - status, numberOfLeafNodes, numberOfTempConfiscatedPages, maxNumberOfPagesForAColumn, - maxNumberOfPagesInALeafNode); + "{} columnar bulkloader wrote maximum {} and last {} and used leafNodes: {}, tempPagesAllocated: {}, maxPagesPerColumn: {}, and maxLeafNodePages: {}", + status, maxTupleCount, tupleCount, numberOfLeafNodes, numberOfTempConfiscatedPages, + maxNumberOfPagesForAColumn, maxNumberOfPagesInALeafNode); } /* diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/impls/lsm/tuples/AbstractColumnTupleReference.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/impls/lsm/tuples/AbstractColumnTupleReference.java index 3923025..99dabfc 100644 --- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/impls/lsm/tuples/AbstractColumnTupleReference.java +++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/impls/lsm/tuples/AbstractColumnTupleReference.java @@ -209,7 +209,7 @@ protected abstract void onNext() throws HyracksDataException; - protected final int getTupleCount() { + public final int getTupleCount() { return frame.getTupleCount(); } -- To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17975 To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings Gerrit-Project: asterixdb Gerrit-Branch: master Gerrit-Change-Id: I43965026044543698d73b2f4ad5f8b68b7d3fc99 Gerrit-Change-Number: 17975 Gerrit-PatchSet: 1 Gerrit-Owner: Wail Alkowaileet <wael....@gmail.com> Gerrit-MessageType: newchange