This is an automated email from the ASF dual-hosted git repository. russellspitzer pushed a commit to branch ValidateSplitPlanning in repository https://gitbox.apache.org/repos/asf/iceberg.git
commit b1822bad6d90641f88498bd591d2765ebd099401 Author: Russell_Spitzer <[email protected]> AuthorDate: Tue Jun 29 15:37:33 2021 -0500 Core: Validate planTasks and splitFiles in TableScanUtil Previously these values were not validated which could lead to a huge runtime issues or OOMs. To prevent errors we validate the parameters for these methods and prevent these issues from occuring. --- .../org/apache/iceberg/util/TableScanUtil.java | 7 +++++ .../java/org/apache/iceberg/TestSplitPlanning.java | 33 ++++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/core/src/main/java/org/apache/iceberg/util/TableScanUtil.java b/core/src/main/java/org/apache/iceberg/util/TableScanUtil.java index a9c60df..4a1ad5a 100644 --- a/core/src/main/java/org/apache/iceberg/util/TableScanUtil.java +++ b/core/src/main/java/org/apache/iceberg/util/TableScanUtil.java @@ -24,6 +24,7 @@ import org.apache.iceberg.BaseCombinedScanTask; import org.apache.iceberg.CombinedScanTask; import org.apache.iceberg.FileScanTask; import org.apache.iceberg.io.CloseableIterable; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.FluentIterable; public class TableScanUtil { @@ -40,6 +41,8 @@ public class TableScanUtil { } public static CloseableIterable<FileScanTask> splitFiles(CloseableIterable<FileScanTask> tasks, long splitSize) { + Preconditions.checkArgument(splitSize > 0, "Invalid split size (negative or 0): %s", splitSize); + Iterable<FileScanTask> splitTasks = FluentIterable .from(tasks) .transformAndConcat(input -> input.split(splitSize)); @@ -49,6 +52,10 @@ public class TableScanUtil { public static CloseableIterable<CombinedScanTask> planTasks(CloseableIterable<FileScanTask> splitFiles, long splitSize, int lookback, long openFileCost) { + Preconditions.checkArgument(splitSize > 0, "Invalid split size (negative or 0): %s", splitSize); + Preconditions.checkArgument(lookback > 0, "Invalid split planning lookback (negative or 0): %s", lookback); + Preconditions.checkArgument(openFileCost >= 0, "Invalid file open cost (negative): %s", openFileCost); + Function<FileScanTask, Long> weightFunc = file -> Math.max(file.length(), openFileCost); return CloseableIterable.transform( diff --git a/core/src/test/java/org/apache/iceberg/TestSplitPlanning.java b/core/src/test/java/org/apache/iceberg/TestSplitPlanning.java index c5b9ac8..f211ac9 100644 --- a/core/src/test/java/org/apache/iceberg/TestSplitPlanning.java +++ b/core/src/test/java/org/apache/iceberg/TestSplitPlanning.java @@ -173,6 +173,39 @@ public class TestSplitPlanning extends TableTestBase { Assert.assertEquals(4, Iterables.size(scan.planTasks())); } + @Test + public void testSplitPlanningWithNegativeValues() { + AssertHelpers.assertThrows( + "User provided split size should be validated", + IllegalArgumentException.class, + "Invalid split size (negative or 0): -10", + () -> { + table.newScan() + .option(TableProperties.SPLIT_SIZE, String.valueOf(-10)) + .planTasks(); + }); + + AssertHelpers.assertThrows( + "User provided split planning lookback should be validated", + IllegalArgumentException.class, + "Invalid split planning lookback (negative or 0): -10", + () -> { + table.newScan() + .option(TableProperties.SPLIT_LOOKBACK, String.valueOf(-10)) + .planTasks(); + }); + + AssertHelpers.assertThrows( + "User provided split open file cost should be validated", + IllegalArgumentException.class, + "Invalid file open cost (negative): -10", + () -> { + table.newScan() + .option(TableProperties.SPLIT_OPEN_FILE_COST, String.valueOf(-10)) + .planTasks(); + }); + } + private void appendFiles(Iterable<DataFile> files) { AppendFiles appendFiles = table.newAppend(); files.forEach(appendFiles::appendFile);
