szehon-ho commented on code in PR #4456:
URL: https://github.com/apache/iceberg/pull/4456#discussion_r845518169
##########
hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java:
##########
@@ -468,4 +478,83 @@ public void testUUIDinTableProperties() throws Exception {
catalog.dropTable(tableIdentifier);
}
}
+
+ @Test
+ public void testSnapshotStatsTableProperties() throws Exception {
+ Schema schema = new Schema(
+ required(1, "id", Types.IntegerType.get(), "unique ID"),
+ required(2, "data", Types.StringType.get())
+ );
+ TableIdentifier tableIdentifier = TableIdentifier.of(DB_NAME, "tbl");
+ String location = temp.newFolder("tbl").toString();
+
+ try {
+ catalog.buildTable(tableIdentifier, schema)
+ .withLocation(location)
+ .create();
+
+ String tableName = tableIdentifier.name();
+ org.apache.hadoop.hive.metastore.api.Table hmsTable =
+ metastoreClient.getTable(tableIdentifier.namespace().level(0),
tableName);
+
+ // check whether parameters are in expected state
+ Map<String, String> parameters = hmsTable.getParameters();
+ Assert.assertEquals("0", parameters.get(TableProperties.SNAPSHOT_COUNT));
+
Assert.assertNull(parameters.get(TableProperties.CURRENT_SNAPSHOT_SUMMARY));
+ Assert.assertNull(parameters.get(TableProperties.CURRENT_SNAPSHOT_ID));
+
Assert.assertNull(parameters.get(TableProperties.CURRENT_SNAPSHOT_TIMESTAMP));
+
+ // create a snapshot
+ Table icebergTable = catalog.loadTable(tableIdentifier);
+ String fileName = UUID.randomUUID().toString();
+ DataFile file = DataFiles.builder(icebergTable.spec())
+ .withPath(FileFormat.PARQUET.addExtension(fileName))
+ .withRecordCount(2)
+ .withFileSizeInBytes(0)
+ .build();
+ icebergTable.newFastAppend().appendFile(file).commit();
+
+ // check whether parameters are in expected state
+ hmsTable =
metastoreClient.getTable(tableIdentifier.namespace().level(0), tableName);
+ parameters = hmsTable.getParameters();
+ Assert.assertEquals("1", parameters.get(TableProperties.SNAPSHOT_COUNT));
+ String summary =
JsonUtil.mapper().writeValueAsString(icebergTable.currentSnapshot().summary());
+ Assert.assertEquals(summary,
parameters.get(TableProperties.CURRENT_SNAPSHOT_SUMMARY));
+ long snapshotId = icebergTable.currentSnapshot().snapshotId();
+ Assert.assertEquals(String.valueOf(snapshotId),
parameters.get(TableProperties.CURRENT_SNAPSHOT_ID));
+
Assert.assertEquals(String.valueOf(icebergTable.currentSnapshot().timestampMillis()),
+ parameters.get(TableProperties.CURRENT_SNAPSHOT_TIMESTAMP));
+
+ } finally {
+ catalog.dropTable(tableIdentifier);
+ }
+ }
+
+ @Test
+ public void testSetSnapshotSummary() throws Exception {
+ Configuration conf = new Configuration();
+ conf.set("iceberg.hive.table.parameter.size.max", "4000");
+ HiveTableOperations spyOps = spy(new HiveTableOperations(conf, null, null,
catalog.name(), DB_NAME, "tbl"));
+ Snapshot snapshot = mock(Snapshot.class);
+ Map<String, String> summary = Maps.newHashMap();
+ when(snapshot.summary()).thenReturn(summary);
+
+ // create a snapshot summary whose json string size is less than the limit
+ for (int i = 0; i < 100; i++) {
+ summary.put(String.valueOf(i), "value");
+ }
+ Assert.assertTrue(JsonUtil.mapper().writeValueAsString(summary).length() <
4000);
+ Map<String, String> parameter = Maps.newHashMap();
+ spyOps.setSnapshotSummary(parameter, snapshot);
+ Assert.assertEquals("The snapshot summary must be in parameters", 1,
parameter.size());
+
+ // create a snapshot summary whose json string size exceeds the limit
+ for (int i = 0; i < 1000; i++) {
+ summary.put(String.valueOf(i), "value");
+ }
+ long summarySize = JsonUtil.mapper().writeValueAsString(summary).length();
+ Assert.assertTrue(summarySize > 4000 && summarySize < 32672);
Review Comment:
Hm, I meant, can't we just do
```Assert.assertTrue("Summary size should be greater than limit",
summarySize > 4000). ```
To me the test is checking whether you can persist something beyond 4000
chars right? The fact that it's below or above 32627 chars should not trigger
now that we changed limit to 4000 right?
##########
hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java:
##########
@@ -90,6 +92,8 @@
private static final String HIVE_LOCK_CHECK_MAX_WAIT_MS =
"iceberg.hive.lock-check-max-wait-ms";
private static final String HIVE_ICEBERG_METADATA_REFRESH_MAX_RETRIES =
"iceberg.hive.metadata-refresh-max-retries";
private static final String HIVE_TABLE_LEVEL_LOCK_EVICT_MS =
"iceberg.hive.table-level-lock-evict-ms";
+ private static final String HIVE_TABLE_PARAMETER_SIZE_MAX =
"iceberg.hive.table.parameter.size.max";
+ private static final long HIVE_TABLE_PARAMETER_SIZE_MAX_DEFAULT = 32672;
Review Comment:
Thanks, nit: how about "iceberg.hive.max.table.parameter.size".
Also, regarding the comment, it's kind of based on backend but Hive had a
global limit of 4000 regardless of database to allow it to support across
different database, so maybe "For Hive versions below 2.3, table parameter
values should be capped at 4000 characters, see
https://issues.apache.org/jira/browse/HIVE-12274"?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]