singhpk234 commented on code in PR #5063:
URL: https://github.com/apache/iceberg/pull/5063#discussion_r904987799
##########
spark/v3.2/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestMetadataTables.java:
##########
@@ -319,6 +321,58 @@ public void testAllFilesPartitioned() throws Exception {
TestHelpers.assertEqualsSafe(filesTableSchema.asStruct(), expectedFiles,
actualFiles);
}
+ @Test
+ public void testMetadataLogMetatable() throws Exception {
+ // Create table and insert data
+ sql("CREATE TABLE %s (id bigint, data string) " +
+ "USING iceberg " +
+ "PARTITIONED BY (data) " +
+ "TBLPROPERTIES" +
+ "('format-version'='2', 'write.delete.mode'='merge-on-read')",
tableName);
+
+ List<SimpleRecord> recordsA = Lists.newArrayList(
+ new SimpleRecord(1, "a"),
+ new SimpleRecord(2, "a")
+ );
+ spark.createDataset(recordsA, Encoders.bean(SimpleRecord.class))
+ .coalesce(1)
+ .writeTo(tableName)
+ .append();
+
+ List<SimpleRecord> recordsB = Lists.newArrayList(
+ new SimpleRecord(1, "b"),
+ new SimpleRecord(2, "b")
+ );
+ spark.createDataset(recordsB, Encoders.bean(SimpleRecord.class))
+ .coalesce(1)
+ .writeTo(tableName)
+ .append();
+
+ Table table = Spark3Util.loadIcebergTable(spark, tableName);
+ Long currentSnapshotId = table.currentSnapshot().snapshotId();
+
+ // Check metadataLog table
+ List<Object[]> metadataLogs = sql("SELECT * FROM %s.metadata_log",
tableName);
+ Assert.assertEquals("metadataLog table should return 3 rows", 3,
metadataLogs.size());
+
+ // test filtering
+ List<Object[]> metadataLogWithFilters =
+ sql("SELECT * FROM %s.metadata_log WHERE latest_snapshot_id = %s",
tableName, currentSnapshotId);
+ Assert.assertEquals("metadataLog table should return 1 row", 1,
metadataLogWithFilters.size());
+ Assert.assertEquals("timestampMillis should match currentSnapshot",
+ table.currentSnapshot().timestampMillis() * 1000,
metadataLogWithFilters.get(0)[0]);
+
+ if (((HasTableOperations) table).operations() instanceof
HiveTableOperations) {
+ Assert.assertEquals("file should match current metadata location",
Review Comment:
> Will this test ever not be HiveTableOperations
Yup it can have HadoopTableOperations, as we have a test with
[HadoopCatalog](https://github.com/apache/iceberg/blob/master/spark/v3.0/spark/src/test/java/org/apache/iceberg/spark/SparkCatalogTestBase.java#L39-L42)
as well.
> Usually tests use Assume for these kind of things, but maybe this check is
not even necessary here if we control the environment
Agree with you modified the UT to pick the metadatalocation from
tableMetadata directly
##########
spark/v3.2/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestMetadataTables.java:
##########
@@ -319,6 +321,58 @@ public void testAllFilesPartitioned() throws Exception {
TestHelpers.assertEqualsSafe(filesTableSchema.asStruct(), expectedFiles,
actualFiles);
}
+ @Test
+ public void testMetadataLogMetatable() throws Exception {
+ // Create table and insert data
+ sql("CREATE TABLE %s (id bigint, data string) " +
+ "USING iceberg " +
+ "PARTITIONED BY (data) " +
+ "TBLPROPERTIES" +
+ "('format-version'='2', 'write.delete.mode'='merge-on-read')",
tableName);
+
+ List<SimpleRecord> recordsA = Lists.newArrayList(
+ new SimpleRecord(1, "a"),
+ new SimpleRecord(2, "a")
+ );
+ spark.createDataset(recordsA, Encoders.bean(SimpleRecord.class))
+ .coalesce(1)
+ .writeTo(tableName)
+ .append();
+
+ List<SimpleRecord> recordsB = Lists.newArrayList(
+ new SimpleRecord(1, "b"),
+ new SimpleRecord(2, "b")
+ );
+ spark.createDataset(recordsB, Encoders.bean(SimpleRecord.class))
+ .coalesce(1)
+ .writeTo(tableName)
+ .append();
+
+ Table table = Spark3Util.loadIcebergTable(spark, tableName);
+ Long currentSnapshotId = table.currentSnapshot().snapshotId();
+
+ // Check metadataLog table
+ List<Object[]> metadataLogs = sql("SELECT * FROM %s.metadata_log",
tableName);
+ Assert.assertEquals("metadataLog table should return 3 rows", 3,
metadataLogs.size());
+
+ // test filtering
+ List<Object[]> metadataLogWithFilters =
+ sql("SELECT * FROM %s.metadata_log WHERE latest_snapshot_id = %s",
tableName, currentSnapshotId);
+ Assert.assertEquals("metadataLog table should return 1 row", 1,
metadataLogWithFilters.size());
+ Assert.assertEquals("timestampMillis should match currentSnapshot",
+ table.currentSnapshot().timestampMillis() * 1000,
metadataLogWithFilters.get(0)[0]);
+
+ if (((HasTableOperations) table).operations() instanceof
HiveTableOperations) {
+ Assert.assertEquals("file should match current metadata location",
Review Comment:
> Will this test ever not be HiveTableOperations
Yup it can have HadoopTableOperations, as we have a test with
[HadoopCatalog](https://github.com/apache/iceberg/blob/master/spark/v3.0/spark/src/test/java/org/apache/iceberg/spark/SparkCatalogTestBase.java#L39-L42)
as well.
> Usually tests use Assume for these kind of things, but maybe this check is
not even necessary here if we control the environment
Agree with you modified the UT to pick the metadatalocation from
tableMetadata directly
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]