wombatu-kun commented on code in PR #19052:
URL: https://github.com/apache/hudi/pull/19052#discussion_r3463916148
##########
hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestFlinkWriteClients.java:
##########
@@ -121,6 +122,17 @@ void
testUserConfiguredGlobalRecordIndexMinFileGroupCountIsNotOverridden() {
assertEquals(12, writeConfig.getGlobalRecordLevelIndexMinFileGroupCount());
}
+ @Test
+ void testUserConfiguredMigrationCommitArchivalBatchSizeIsPropagated() {
+ // A raw hoodie.* property set on the Flink configuration must be
propagated to the write config
+ // (and therefore reach the upgrade handler that reads it during the v7 ->
v8 LSM timeline migration).
+
conf.setString(HoodieArchivalConfig.MIGRATION_COMMITS_ARCHIVAL_BATCH_SIZE.key(),
"123");
+ HoodieWriteConfig writeConfig =
FlinkWriteClients.getHoodieClientConfig(conf, false, false);
+ assertEquals(123, writeConfig.getMigrationCommitArchivalBatchSize());
+ // The regular archival batch size must stay independent at its own
default.
+ assertEquals(10, writeConfig.getCommitArchivalBatchSize());
Review Comment:
+1
##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/SevenToEightUpgradeHandler.java:
##########
@@ -257,23 +257,25 @@ static void upgradeToLSMTimeline(HoodieTable table,
HoodieEngineContext engineCo
LegacyArchivedMetaEntryReader reader = new
LegacyArchivedMetaEntryReader(table.getMetaClient());
StoragePath archivePath = new
StoragePath(table.getMetaClient().getMetaPath(), "timeline/history");
LSMTimelineWriter lsmTimelineWriter =
LSMTimelineWriter.getInstance(config, table, Option.of(archivePath));
- int batchSize = config.getCommitArchivalBatchSize();
+ // Use a dedicated, larger batch size for the one-time migration to
minimize the number of parquet
+ // files created on remote storage. Each write() call involves multiple
remote storage operations
+ // (exists check, parquet write, manifest update); using the regular
archival batch size (default 10)
Review Comment:
+1
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]