1fanwang commented on code in PR #18721:
URL: https://github.com/apache/hudi/pull/18721#discussion_r3243083660
##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java:
##########
@@ -3862,6 +3863,19 @@ private void validate() {
checkArgument(lookbackCommits >= 0,
String.format("%s must be non-negative, but was %d",
ROLLING_METADATA_TIMELINE_LOOKBACK_COMMITS.key(),
lookbackCommits));
+
+ // If the user has configured an event-time field but not enabled
watermark tracking,
+ // event-time metadata will be silently absent from commit metadata.
Surface a hint
+ // so the user can opt into tracking via TRACK_EVENT_TIME_WATERMARK.
+ String eventTimeFieldName =
writeConfig.getString(HoodiePayloadProps.PAYLOAD_EVENT_TIME_FIELD_PROP_KEY);
+ if (!StringUtils.isNullOrEmpty(eventTimeFieldName)
+ && !writeConfig.getBooleanOrDefault(TRACK_EVENT_TIME_WATERMARK)) {
+ log.warn("{}={} is configured but {}={}; event-time watermark metadata
will not be tracked. "
+ + "Set {}=true to record event-time watermark in commit
metadata.",
+ HoodiePayloadProps.PAYLOAD_EVENT_TIME_FIELD_PROP_KEY,
eventTimeFieldName,
+ TRACK_EVENT_TIME_WATERMARK.key(),
writeConfig.getBooleanOrDefault(TRACK_EVENT_TIME_WATERMARK),
+ TRACK_EVENT_TIME_WATERMARK.key());
+ }
Review Comment:
Done in c3d3f45 — extracted to `validateEventTimeConfigs()` called at the
end of `validate()`.
##########
hudi-client/hudi-client-common/src/test/java/org/apache/hudi/config/TestHoodieWriteConfig.java:
##########
@@ -908,4 +917,69 @@ public void
testUpdatesStrategyNotOverriddenWhenExplicitlySet() {
.build();
assertEquals(customStrategy,
writeConfig.getClusteringUpdatesStrategyClass());
}
+
+ @Test
+ public void testWarnsWhenEventTimeFieldSetWithoutWatermarkTracking() {
+ Properties props = new Properties();
+ props.setProperty(HoodiePayloadProps.PAYLOAD_EVENT_TIME_FIELD_PROP_KEY,
"ts");
+ // hoodie.write.track.event.time.watermark is not set; default is false.
+ List<LogEvent> events = captureWarnLogsFromBuild(props);
+ boolean foundHint = events.stream()
+ .filter(e -> e.getLevel() == Level.WARN)
+ .map(e -> e.getMessage().getFormattedMessage())
+ .anyMatch(msg ->
msg.contains(HoodiePayloadProps.PAYLOAD_EVENT_TIME_FIELD_PROP_KEY)
+ &&
msg.contains(HoodieWriteConfig.TRACK_EVENT_TIME_WATERMARK.key()));
+ assertTrue(foundHint,
+ "Expected warning hint about TRACK_EVENT_TIME_WATERMARK when
event-time field is set without tracking");
+ }
+
+ @Test
+ public void testNoWarnWhenEventTimeFieldSetAndWatermarkTrackingEnabled() {
+ Properties props = new Properties();
+ props.setProperty(HoodiePayloadProps.PAYLOAD_EVENT_TIME_FIELD_PROP_KEY,
"ts");
+ props.setProperty(HoodieWriteConfig.TRACK_EVENT_TIME_WATERMARK.key(),
"true");
+ List<LogEvent> events = captureWarnLogsFromBuild(props);
+ boolean foundHint = events.stream()
+ .filter(e -> e.getLevel() == Level.WARN)
+ .map(e -> e.getMessage().getFormattedMessage())
+ .anyMatch(msg ->
msg.contains(HoodieWriteConfig.TRACK_EVENT_TIME_WATERMARK.key())
+ && msg.contains("event-time watermark metadata will not be
tracked"));
+ assertFalse(foundHint,
+ "Expected no warning when both event-time field and watermark tracking
are configured");
+ }
+
+ @Test
+ public void testNoWarnWhenEventTimeFieldNotSet() {
+ Properties props = new Properties();
+ // No event-time field configured.
+ List<LogEvent> events = captureWarnLogsFromBuild(props);
+ boolean foundHint = events.stream()
+ .filter(e -> e.getLevel() == Level.WARN)
+ .map(e -> e.getMessage().getFormattedMessage())
+ .anyMatch(msg ->
msg.contains(HoodieWriteConfig.TRACK_EVENT_TIME_WATERMARK.key())
+ && msg.contains("event-time watermark metadata will not be
tracked"));
+ assertFalse(foundHint,
+ "Expected no warning when event-time field is not configured");
+ }
Review Comment:
Done in c3d3f45 — dropped the two negative-case tests
(`testNoWarnWhenEventTimeFieldSetAndWatermarkTrackingEnabled`,
`testNoWarnWhenEventTimeFieldNotSet`) and inlined the
`captureWarnLogsFromBuild` helper into the one remaining test. Net: 88 added
lines down to 30.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]