This is an automated email from the ASF dual-hosted git repository.
singhpk234 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/main by this push:
new 23dc32e5f1 REST: Implement Batch Scan for RESTTableScan (#14776)
23dc32e5f1 is described below
commit 23dc32e5f1130c9d79d71cbf0be5b85162d8976c
Author: Prashant Singh <[email protected]>
AuthorDate: Wed Dec 10 23:20:46 2025 -0800
REST: Implement Batch Scan for RESTTableScan (#14776)
Co-authored-by: Prashant Singh <[email protected]>
---
.../java/org/apache/iceberg/BatchScanAdapter.java | 4 +--
.../java/org/apache/iceberg/rest/RESTTable.java | 7 +++++
.../apache/iceberg/rest/TestRESTScanPlanning.java | 34 +++++++++++++++++-----
3 files changed, 36 insertions(+), 9 deletions(-)
diff --git a/api/src/main/java/org/apache/iceberg/BatchScanAdapter.java
b/api/src/main/java/org/apache/iceberg/BatchScanAdapter.java
index d8c5dda885..f723e04d0c 100644
--- a/api/src/main/java/org/apache/iceberg/BatchScanAdapter.java
+++ b/api/src/main/java/org/apache/iceberg/BatchScanAdapter.java
@@ -25,11 +25,11 @@ import org.apache.iceberg.io.CloseableIterable;
import org.apache.iceberg.metrics.MetricsReporter;
/** An adapter that allows using {@link TableScan} as {@link BatchScan}. */
-class BatchScanAdapter implements BatchScan {
+public class BatchScanAdapter implements BatchScan {
private final TableScan scan;
- BatchScanAdapter(TableScan scan) {
+ public BatchScanAdapter(TableScan scan) {
this.scan = scan;
}
diff --git a/core/src/main/java/org/apache/iceberg/rest/RESTTable.java
b/core/src/main/java/org/apache/iceberg/rest/RESTTable.java
index 5565122aaa..052e143270 100644
--- a/core/src/main/java/org/apache/iceberg/rest/RESTTable.java
+++ b/core/src/main/java/org/apache/iceberg/rest/RESTTable.java
@@ -22,6 +22,8 @@ import java.util.Map;
import java.util.Set;
import java.util.function.Supplier;
import org.apache.iceberg.BaseTable;
+import org.apache.iceberg.BatchScan;
+import org.apache.iceberg.BatchScanAdapter;
import org.apache.iceberg.ImmutableTableScanContext;
import org.apache.iceberg.TableOperations;
import org.apache.iceberg.TableScan;
@@ -67,4 +69,9 @@ class RESTTable extends BaseTable {
resourcePaths,
supportedEndpoints);
}
+
+ @Override
+ public BatchScan newBatchScan() {
+ return new BatchScanAdapter(newScan());
+ }
}
diff --git
a/core/src/test/java/org/apache/iceberg/rest/TestRESTScanPlanning.java
b/core/src/test/java/org/apache/iceberg/rest/TestRESTScanPlanning.java
index ba13d1e3c1..f84197b0f1 100644
--- a/core/src/test/java/org/apache/iceberg/rest/TestRESTScanPlanning.java
+++ b/core/src/test/java/org/apache/iceberg/rest/TestRESTScanPlanning.java
@@ -30,7 +30,6 @@ import static org.apache.iceberg.TestBase.SCHEMA;
import static org.apache.iceberg.TestBase.SPEC;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
-import static org.assertj.core.api.Assumptions.assumeThat;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
@@ -58,6 +57,7 @@ import org.apache.iceberg.MetadataTableType;
import org.apache.iceberg.MetadataTableUtils;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.Scan;
+import org.apache.iceberg.ScanTask;
import org.apache.iceberg.Table;
import org.apache.iceberg.TableScan;
import org.apache.iceberg.catalog.Namespace;
@@ -348,6 +348,25 @@ public class TestRESTScanPlanning {
}
}
+ @ParameterizedTest
+ @EnumSource(PlanningMode.class)
+ void scanPlanningWithBatchScan(
+ Function<TestPlanningBehavior.Builder, TestPlanningBehavior.Builder>
planMode)
+ throws IOException {
+ configurePlanningBehavior(planMode);
+ Table table = restTableFor(scanPlanningCatalog(), "batch_scan_table");
+ setParserContext(table);
+
+ // Verify actual data file is returned with correct count
+ try (CloseableIterable<ScanTask> iterable =
table.newBatchScan().planFiles()) {
+ List<ScanTask> tasks = Lists.newArrayList(iterable);
+
+ assertThat(tasks).hasSize(1);
+
assertThat(tasks.get(0).asFileScanTask().file().location()).isEqualTo(FILE_A.location());
+ assertThat(tasks.get(0).asFileScanTask().deletes()).isEmpty();
+ }
+ }
+
@Test
public void nestedPlanTaskPagination() throws IOException {
// Configure: synchronous planning with very small pages (creates nested
plan task structure)
@@ -409,11 +428,7 @@ public class TestRESTScanPlanning {
@ParameterizedTest
@EnumSource(MetadataTableType.class)
- public void metadataTablesWithRemotePlanning(MetadataTableType type) throws
IOException {
- assumeThat(type)
- .as("POSITION_DELETES table does not implement newScan() method")
- .isNotEqualTo(MetadataTableType.POSITION_DELETES);
-
+ public void metadataTablesWithRemotePlanning(MetadataTableType type) {
configurePlanningBehavior(TestPlanningBehavior.Builder::synchronous);
RESTTable table = restTableFor(scanPlanningCatalog(),
"metadata_tables_test");
table.newAppend().appendFile(FILE_B).commit();
@@ -424,7 +439,12 @@ public class TestRESTScanPlanning {
// tasks, this test just verifies that metadata tables can be scanned with
RESTTable.
Table metadataTableInstance =
MetadataTableUtils.createMetadataTableInstance(table, type);
assertThat(metadataTableInstance).isNotNull();
- assertThat(metadataTableInstance.newScan().planFiles()).isNotEmpty();
+ if (type.equals(MetadataTableType.POSITION_DELETES)) {
+ // Position deletes table only uses batch scan
+
assertThat(metadataTableInstance.newBatchScan().planFiles()).isNotEmpty();
+ } else {
+ assertThat(metadataTableInstance.newScan().planFiles()).isNotEmpty();
+ }
}
@ParameterizedTest