debabhishek53 commented on code in PR #4171:
URL: https://github.com/apache/gobblin/pull/4171#discussion_r2904029523
##########
gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/iceberg/IcebergSource.java:
##########
@@ -286,62 +335,112 @@ private List<IcebergTable.FilePathWithPartition>
discoverPartitionFilePaths(Sour
log.info("Resolved {} placeholder to current date: {}",
CURRENT_DATE_PLACEHOLDER, dateValue);
}
- // Apply lookback period for date partitions
- // lookbackDays=1 (default) means copy only the specified date
- // lookbackDays=3 means copy specified date + 2 previous days (total 3
days)
- int lookbackDays = state.getPropAsInt(ICEBERG_LOOKBACK_DAYS,
DEFAULT_LOOKBACK_DAYS);
- List<String> values = Lists.newArrayList();
+ // Resolve hour (0-23) — shared by both daily and hourly lookback paths.
+ int hour = 0;
+ if (state.contains(ICEBERG_PARTITION_HOUR)) {
+ hour = state.getPropAsInt(ICEBERG_PARTITION_HOUR, 0);
+ Preconditions.checkArgument(hour >= 0 && hour <= 23,
+ String.format("iceberg.partition.hour must be between 0 and 23, got:
%d", hour));
+ }
- if (lookbackDays >= 1) {
- log.info("Applying lookback period of {} days for date partition column
'{}': {}", lookbackDays, datePartitionColumn, dateValue);
+ // Resolve the DateTimeFormatter used to render each partition value.
+ // resolvePartitionFormatter normalises both the new
iceberg.partition.value.format path
+ // and the legacy iceberg.hourly.partition.enabled path into a single
formatter.
+ DateTimeFormatter partitionFormatter = resolvePartitionFormatter(state,
hour);
- // Check if hourly partitioning is enabled
- boolean isHourlyPartition =
state.getPropAsBoolean(ICEBERG_HOURLY_PARTITION_ENABLED,
DEFAULT_HOURLY_PARTITION_ENABLED);
+ // Parse the input date — always expected in canonical yyyy-MM-dd form.
+ LocalDate startDate;
+ try {
+ startDate = LocalDate.parse(dateValue);
+ } catch (java.time.format.DateTimeParseException e) {
+ String errorMsg = String.format(
+ "Invalid date format for '%s': '%s'. Expected format: yyyy-MM-dd.
Error: %s",
+ ICEBERG_FILTER_DATE, dateValue, e.getMessage());
+ log.error(errorMsg);
+ throw new IllegalArgumentException(errorMsg, e);
+ }
- // Parse the date in yyyy-MM-dd format
- LocalDate start;
- try {
- start = LocalDate.parse(dateValue);
- } catch (java.time.format.DateTimeParseException e) {
- String errorMsg = String.format(
- "Invalid date format for '%s': '%s'. Expected format: yyyy-MM-dd.
Error: %s",
- ICEBERG_FILTER_DATE, dateValue, e.getMessage());
- log.error(errorMsg);
- throw new IllegalArgumentException(errorMsg, e);
- }
+ // Combine resolved date + hour into a single LocalDateTime for the
generator.
+ LocalDateTime startDateTime = startDate.atTime(hour, 0);
+
+ // Delegate partition value list + OR expression to
IcebergPartitionFilterGenerator.
+ // When iceberg.lookback.hours > 0 it takes precedence over
iceberg.lookback.days.
+ int lookbackHours = state.getPropAsInt(ICEBERG_LOOKBACK_HOURS,
DEFAULT_LOOKBACK_HOURS);
+ int lookbackDays = state.getPropAsInt(ICEBERG_LOOKBACK_DAYS,
DEFAULT_LOOKBACK_DAYS);
Review Comment:
This is a expected behavior and it's by design as mentioned in the
comment's we want to gracefully fallback to lookup by days incase the value is
missing or < 0
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]