This is an automated email from the ASF dual-hosted git repository.
blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/master by this push:
new 4fdec90a85 API, Core: Add scan planning metrics for scanned/skipped
delete manifests (#5792)
4fdec90a85 is described below
commit 4fdec90a852e417b10d9707f07b21963e4f01dff
Author: Eduard Tudenhöfner <[email protected]>
AuthorDate: Thu Sep 29 23:19:02 2022 +0200
API, Core: Add scan planning metrics for scanned/skipped delete manifests
(#5792)
---
.../java/org/apache/iceberg/metrics/ScanReport.java | 20 ++++++++++++++++++++
.../java/org/apache/iceberg/DeleteFileIndex.java | 13 +++++++++----
.../iceberg/metrics/ScanMetricsResultParser.java | 15 +++++++++++++++
.../iceberg/TestScanPlanningAndReporting.java | 21 +++++++++++++++++++--
.../metrics/TestScanMetricsResultParser.java | 14 ++++++++++++++
.../iceberg/metrics/TestScanReportParser.java | 14 ++++++++++++++
6 files changed, 91 insertions(+), 6 deletions(-)
diff --git a/api/src/main/java/org/apache/iceberg/metrics/ScanReport.java
b/api/src/main/java/org/apache/iceberg/metrics/ScanReport.java
index 304821199d..631306b681 100644
--- a/api/src/main/java/org/apache/iceberg/metrics/ScanReport.java
+++ b/api/src/main/java/org/apache/iceberg/metrics/ScanReport.java
@@ -131,6 +131,12 @@ public interface ScanReport {
@Nullable
CounterResult skippedDeleteFiles();
+ @Nullable
+ CounterResult scannedDeleteManifests();
+
+ @Nullable
+ CounterResult skippedDeleteManifests();
+
static ScanMetricsResult fromScanMetrics(ScanMetrics scanMetrics) {
Preconditions.checkArgument(null != scanMetrics, "Invalid scan metrics:
null");
return ImmutableScanMetricsResult.builder()
@@ -146,6 +152,8 @@ public interface ScanReport {
CounterResult.fromCounter(scanMetrics.totalDeleteFileSizeInBytes()))
.skippedDataFiles(CounterResult.fromCounter(scanMetrics.skippedDataFiles()))
.skippedDeleteFiles(CounterResult.fromCounter(scanMetrics.skippedDeleteFiles()))
+
.scannedDeleteManifests(CounterResult.fromCounter(scanMetrics.scannedDeleteManifests()))
+
.skippedDeleteManifests(CounterResult.fromCounter(scanMetrics.skippedDeleteManifests()))
.build();
}
}
@@ -157,11 +165,13 @@ public interface ScanReport {
public static final String RESULT_DATA_FILES = "result-data-files";
public static final String RESULT_DELETE_FILES = "result-delete-files";
public static final String SCANNED_DATA_MANIFESTS =
"scanned-data-manifests";
+ public static final String SCANNED_DELETE_MANIFESTS =
"scanned-delete-manifests";
public static final String TOTAL_DATA_MANIFESTS = "total-data-manifests";
public static final String TOTAL_DELETE_MANIFESTS =
"total-delete-manifests";
public static final String TOTAL_FILE_SIZE_IN_BYTES =
"total-file-size-in-bytes";
public static final String TOTAL_DELETE_FILE_SIZE_IN_BYTES =
"total-delete-file-size-in-bytes";
public static final String SKIPPED_DATA_MANIFESTS =
"skipped-data-manifests";
+ public static final String SKIPPED_DELETE_MANIFESTS =
"skipped-delete-manifests";
public static final String SKIPPED_DATA_FILES = "skipped-data-files";
public static final String SKIPPED_DELETE_FILES = "skipped-delete-files";
@@ -226,6 +236,16 @@ public interface ScanReport {
return metricsContext().counter(SKIPPED_DELETE_FILES,
MetricsContext.Unit.COUNT);
}
+ @Value.Derived
+ public Counter scannedDeleteManifests() {
+ return metricsContext().counter(SCANNED_DELETE_MANIFESTS,
MetricsContext.Unit.COUNT);
+ }
+
+ @Value.Derived
+ public Counter skippedDeleteManifests() {
+ return metricsContext().counter(SKIPPED_DELETE_MANIFESTS,
MetricsContext.Unit.COUNT);
+ }
+
public static ScanMetrics of(MetricsContext metricsContext) {
return
ImmutableScanMetrics.builder().metricsContext(metricsContext).build();
}
diff --git a/core/src/main/java/org/apache/iceberg/DeleteFileIndex.java
b/core/src/main/java/org/apache/iceberg/DeleteFileIndex.java
index 0746ea26d0..6871feaa8a 100644
--- a/core/src/main/java/org/apache/iceberg/DeleteFileIndex.java
+++ b/core/src/main/java/org/apache/iceberg/DeleteFileIndex.java
@@ -529,16 +529,21 @@ class DeleteFileIndex {
caseSensitive);
});
- Iterable<ManifestFile> matchingManifests =
+ CloseableIterable<ManifestFile> closeableDeleteManifests =
+ CloseableIterable.withNoopClose(deleteManifests);
+ CloseableIterable<ManifestFile> matchingManifests =
evalCache == null
- ? deleteManifests
- : Iterables.filter(
- deleteManifests,
+ ? closeableDeleteManifests
+ : CloseableIterable.filter(
+ scanMetrics.skippedDeleteManifests(),
+ closeableDeleteManifests,
manifest ->
manifest.content() == ManifestContent.DELETES
&& (manifest.hasAddedFiles() ||
manifest.hasExistingFiles())
&&
evalCache.get(manifest.partitionSpecId()).eval(manifest));
+ matchingManifests =
+ CloseableIterable.count(scanMetrics.scannedDeleteManifests(),
matchingManifests);
return Iterables.transform(
matchingManifests,
manifest ->
diff --git
a/core/src/main/java/org/apache/iceberg/metrics/ScanMetricsResultParser.java
b/core/src/main/java/org/apache/iceberg/metrics/ScanMetricsResultParser.java
index 470de5c714..7ff12b7e24 100644
--- a/core/src/main/java/org/apache/iceberg/metrics/ScanMetricsResultParser.java
+++ b/core/src/main/java/org/apache/iceberg/metrics/ScanMetricsResultParser.java
@@ -37,6 +37,7 @@ class ScanMetricsResultParser {
return JsonUtil.generate(gen -> toJson(metrics, gen), pretty);
}
+ @SuppressWarnings("checkstyle:CyclomaticComplexity")
static void toJson(ScanMetricsResult metrics, JsonGenerator gen) throws
IOException {
Preconditions.checkArgument(null != metrics, "Invalid scan metrics: null");
@@ -97,6 +98,16 @@ class ScanMetricsResultParser {
CounterResultParser.toJson(metrics.skippedDeleteFiles(), gen);
}
+ if (null != metrics.scannedDeleteManifests()) {
+ gen.writeFieldName(ScanMetrics.SCANNED_DELETE_MANIFESTS);
+ CounterResultParser.toJson(metrics.scannedDeleteManifests(), gen);
+ }
+
+ if (null != metrics.skippedDeleteManifests()) {
+ gen.writeFieldName(ScanMetrics.SKIPPED_DELETE_MANIFESTS);
+ CounterResultParser.toJson(metrics.skippedDeleteManifests(), gen);
+ }
+
gen.writeEndObject();
}
@@ -127,6 +138,10 @@ class ScanMetricsResultParser {
CounterResultParser.fromJson(ScanMetrics.TOTAL_DELETE_FILE_SIZE_IN_BYTES, json))
.skippedDataFiles(CounterResultParser.fromJson(ScanMetrics.SKIPPED_DATA_FILES,
json))
.skippedDeleteFiles(CounterResultParser.fromJson(ScanMetrics.SKIPPED_DELETE_FILES,
json))
+ .scannedDeleteManifests(
+ CounterResultParser.fromJson(ScanMetrics.SCANNED_DELETE_MANIFESTS,
json))
+ .skippedDeleteManifests(
+ CounterResultParser.fromJson(ScanMetrics.SKIPPED_DELETE_MANIFESTS,
json))
.build();
}
}
diff --git
a/core/src/test/java/org/apache/iceberg/TestScanPlanningAndReporting.java
b/core/src/test/java/org/apache/iceberg/TestScanPlanningAndReporting.java
index eefe7fc128..3ee1b35eef 100644
--- a/core/src/test/java/org/apache/iceberg/TestScanPlanningAndReporting.java
+++ b/core/src/test/java/org/apache/iceberg/TestScanPlanningAndReporting.java
@@ -67,11 +67,15 @@ public class TestScanPlanningAndReporting extends
TableTestBase {
assertThat(result.resultDataFiles().value()).isEqualTo(3);
assertThat(result.resultDeleteFiles().value()).isEqualTo(0);
assertThat(result.scannedDataManifests().value()).isEqualTo(2);
+ assertThat(result.scannedDeleteManifests().value()).isEqualTo(0);
assertThat(result.skippedDataManifests().value()).isEqualTo(0);
+ assertThat(result.skippedDeleteManifests().value()).isEqualTo(0);
assertThat(result.totalDataManifests().value()).isEqualTo(2);
assertThat(result.totalDeleteManifests().value()).isEqualTo(0);
assertThat(result.totalFileSizeInBytes().value()).isEqualTo(30L);
assertThat(result.totalDeleteFileSizeInBytes().value()).isEqualTo(0L);
+ assertThat(result.skippedDataFiles().value()).isEqualTo(0);
+ assertThat(result.skippedDeleteFiles().value()).isEqualTo(0);
// we should hit only a single data manifest and only a single data file
try (CloseableIterable<FileScanTask> fileScanTasks =
@@ -88,11 +92,15 @@ public class TestScanPlanningAndReporting extends
TableTestBase {
assertThat(result.resultDataFiles().value()).isEqualTo(1);
assertThat(result.resultDeleteFiles().value()).isEqualTo(0);
assertThat(result.scannedDataManifests().value()).isEqualTo(1);
+ assertThat(result.scannedDeleteManifests().value()).isEqualTo(0);
assertThat(result.skippedDataManifests().value()).isEqualTo(1);
+ assertThat(result.skippedDeleteManifests().value()).isEqualTo(0);
assertThat(result.totalDataManifests().value()).isEqualTo(2);
assertThat(result.totalDeleteManifests().value()).isEqualTo(0);
assertThat(result.totalFileSizeInBytes().value()).isEqualTo(10L);
assertThat(result.totalDeleteFileSizeInBytes().value()).isEqualTo(0L);
+ assertThat(result.skippedDataFiles().value()).isEqualTo(0);
+ assertThat(result.skippedDeleteFiles().value()).isEqualTo(0);
}
@Test
@@ -124,11 +132,15 @@ public class TestScanPlanningAndReporting extends
TableTestBase {
assertThat(result.resultDataFiles().value()).isEqualTo(3);
assertThat(result.resultDeleteFiles().value()).isEqualTo(2);
assertThat(result.scannedDataManifests().value()).isEqualTo(1);
+ assertThat(result.scannedDeleteManifests().value()).isEqualTo(1);
assertThat(result.skippedDataManifests().value()).isEqualTo(0);
+ assertThat(result.skippedDeleteManifests().value()).isEqualTo(0);
assertThat(result.totalDataManifests().value()).isEqualTo(1);
assertThat(result.totalDeleteManifests().value()).isEqualTo(1);
assertThat(result.totalFileSizeInBytes().value()).isEqualTo(30L);
assertThat(result.totalDeleteFileSizeInBytes().value()).isEqualTo(20L);
+ assertThat(result.skippedDataFiles().value()).isEqualTo(0);
+ assertThat(result.skippedDeleteFiles().value()).isEqualTo(0);
}
@Test
@@ -157,7 +169,9 @@ public class TestScanPlanningAndReporting extends
TableTestBase {
assertThat(result.resultDataFiles().value()).isEqualTo(1);
assertThat(result.resultDeleteFiles().value()).isEqualTo(0);
assertThat(result.scannedDataManifests().value()).isEqualTo(1);
+ assertThat(result.scannedDeleteManifests().value()).isEqualTo(0);
assertThat(result.skippedDataManifests().value()).isEqualTo(1);
+ assertThat(result.skippedDeleteManifests().value()).isEqualTo(0);
assertThat(result.totalDataManifests().value()).isEqualTo(2);
assertThat(result.totalDeleteManifests().value()).isEqualTo(0);
assertThat(result.totalFileSizeInBytes().value()).isEqualTo(10L);
@@ -172,6 +186,7 @@ public class TestScanPlanningAndReporting extends
TableTestBase {
tableDir, tableName, SCHEMA, SPEC, SortOrder.unsorted(),
formatVersion, reporter);
table.newAppend().appendFile(FILE_A).appendFile(FILE_D).commit();
table.newRowDelta().addDeletes(FILE_A_DELETES).addDeletes(FILE_D2_DELETES).commit();
+
table.newRowDelta().addDeletes(FILE_B_DELETES).addDeletes(FILE_C2_DELETES).commit();
TableScan tableScan = table.newScan();
try (CloseableIterable<FileScanTask> fileScanTasks =
@@ -182,7 +197,7 @@ public class TestScanPlanningAndReporting extends
TableTestBase {
ScanReport scanReport = reporter.lastReport();
assertThat(scanReport).isNotNull();
assertThat(scanReport.tableName()).isEqualTo(tableName);
- assertThat(scanReport.snapshotId()).isEqualTo(2L);
+ assertThat(scanReport.snapshotId()).isEqualTo(3L);
ScanMetricsResult result = scanReport.scanMetrics();
assertThat(result.totalPlanningDuration().totalDuration()).isGreaterThan(Duration.ZERO);
assertThat(result.resultDataFiles().value()).isEqualTo(1);
@@ -190,9 +205,11 @@ public class TestScanPlanningAndReporting extends
TableTestBase {
assertThat(result.skippedDataFiles().value()).isEqualTo(1);
assertThat(result.skippedDeleteFiles().value()).isEqualTo(1);
assertThat(result.scannedDataManifests().value()).isEqualTo(1);
+ assertThat(result.scannedDeleteManifests().value()).isEqualTo(1);
assertThat(result.skippedDataManifests().value()).isEqualTo(0);
+ assertThat(result.skippedDeleteManifests().value()).isEqualTo(1);
assertThat(result.totalDataManifests().value()).isEqualTo(1);
- assertThat(result.totalDeleteManifests().value()).isEqualTo(1);
+ assertThat(result.totalDeleteManifests().value()).isEqualTo(2);
assertThat(result.totalFileSizeInBytes().value()).isEqualTo(10L);
assertThat(result.totalDeleteFileSizeInBytes().value()).isEqualTo(10L);
}
diff --git
a/core/src/test/java/org/apache/iceberg/metrics/TestScanMetricsResultParser.java
b/core/src/test/java/org/apache/iceberg/metrics/TestScanMetricsResultParser.java
index 00489cb0d3..ffc0a6baf8 100644
---
a/core/src/test/java/org/apache/iceberg/metrics/TestScanMetricsResultParser.java
+++
b/core/src/test/java/org/apache/iceberg/metrics/TestScanMetricsResultParser.java
@@ -176,6 +176,8 @@ public class TestScanMetricsResultParser {
scanMetrics.totalDeleteFileSizeInBytes().increment(23L);
scanMetrics.skippedDataFiles().increment(3L);
scanMetrics.skippedDeleteFiles().increment(3L);
+ scanMetrics.scannedDeleteManifests().increment(3L);
+ scanMetrics.skippedDeleteManifests().increment(3L);
ScanMetricsResult scanMetricsResult =
ScanMetricsResult.fromScanMetrics(scanMetrics);
Assertions.assertThat(
@@ -191,6 +193,8 @@ public class TestScanMetricsResultParser {
+
"\"total-delete-file-size-in-bytes\":{\"unit\":\"bytes\",\"value\":23},"
+
"\"skipped-data-files\":{\"unit\":\"count\",\"value\":3},"
+
"\"skipped-delete-files\":{\"unit\":\"count\",\"value\":3},"
+ +
"\"scanned-delete-manifests\":{\"unit\":\"count\",\"value\":3},"
+ +
"\"skipped-delete-manifests\":{\"unit\":\"count\",\"value\":3},"
+ "\"extra\": \"value\",\"extra2\":23}"))
.isEqualTo(scanMetricsResult);
}
@@ -230,6 +234,8 @@ public class TestScanMetricsResultParser {
scanMetrics.totalDeleteFileSizeInBytes().increment(23L);
scanMetrics.skippedDataFiles().increment(3L);
scanMetrics.skippedDeleteFiles().increment(3L);
+ scanMetrics.scannedDeleteManifests().increment(3L);
+ scanMetrics.skippedDeleteManifests().increment(3L);
ScanMetricsResult scanMetricsResult =
ScanMetricsResult.fromScanMetrics(scanMetrics);
@@ -279,6 +285,14 @@ public class TestScanMetricsResultParser {
+ " \"skipped-delete-files\" : {\n"
+ " \"unit\" : \"count\",\n"
+ " \"value\" : 3\n"
+ + " },\n"
+ + " \"scanned-delete-manifests\" : {\n"
+ + " \"unit\" : \"count\",\n"
+ + " \"value\" : 3\n"
+ + " },\n"
+ + " \"skipped-delete-manifests\" : {\n"
+ + " \"unit\" : \"count\",\n"
+ + " \"value\" : 3\n"
+ " }\n"
+ "}";
diff --git
a/core/src/test/java/org/apache/iceberg/metrics/TestScanReportParser.java
b/core/src/test/java/org/apache/iceberg/metrics/TestScanReportParser.java
index 4666ec7068..fffcdf64ad 100644
--- a/core/src/test/java/org/apache/iceberg/metrics/TestScanReportParser.java
+++ b/core/src/test/java/org/apache/iceberg/metrics/TestScanReportParser.java
@@ -82,6 +82,8 @@ public class TestScanReportParser {
scanMetrics.totalDeleteFileSizeInBytes().increment(23L);
scanMetrics.skippedDataFiles().increment(3L);
scanMetrics.skippedDeleteFiles().increment(3L);
+ scanMetrics.scannedDeleteManifests().increment(3L);
+ scanMetrics.skippedDeleteManifests().increment(3L);
String tableName = "roundTripTableName";
Schema projection =
@@ -110,6 +112,8 @@ public class TestScanReportParser {
+
"\"total-delete-file-size-in-bytes\":{\"unit\":\"bytes\",\"value\":23},"
+
"\"skipped-data-files\":{\"unit\":\"count\",\"value\":3},"
+
"\"skipped-delete-files\":{\"unit\":\"count\",\"value\":3},"
+ +
"\"scanned-delete-manifests\":{\"unit\":\"count\",\"value\":3},"
+ +
"\"skipped-delete-manifests\":{\"unit\":\"count\",\"value\":3},"
+ "\"extra-metric\":\"extra-val\"},"
+ "\"extra\":\"extraVal\"}"))
.usingRecursiveComparison()
@@ -168,6 +172,8 @@ public class TestScanReportParser {
scanMetrics.totalDeleteFileSizeInBytes().increment(23L);
scanMetrics.skippedDataFiles().increment(3L);
scanMetrics.skippedDeleteFiles().increment(3L);
+ scanMetrics.scannedDeleteManifests().increment(3L);
+ scanMetrics.skippedDeleteManifests().increment(3L);
String tableName = "roundTripTableName";
Schema projection =
@@ -242,6 +248,14 @@ public class TestScanReportParser {
+ " \"skipped-delete-files\" : {\n"
+ " \"unit\" : \"count\",\n"
+ " \"value\" : 3\n"
+ + " },\n"
+ + " \"scanned-delete-manifests\" : {\n"
+ + " \"unit\" : \"count\",\n"
+ + " \"value\" : 3\n"
+ + " },\n"
+ + " \"skipped-delete-manifests\" : {\n"
+ + " \"unit\" : \"count\",\n"
+ + " \"value\" : 3\n"
+ " }\n"
+ " }\n"
+ "}";