debabhishek53 commented on code in PR #4171:
URL: https://github.com/apache/gobblin/pull/4171#discussion_r2904029523


##########
gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/iceberg/IcebergSource.java:
##########
@@ -286,62 +335,112 @@ private List<IcebergTable.FilePathWithPartition> 
discoverPartitionFilePaths(Sour
       log.info("Resolved {} placeholder to current date: {}", 
CURRENT_DATE_PLACEHOLDER, dateValue);
     }
 
-    // Apply lookback period for date partitions
-    // lookbackDays=1 (default) means copy only the specified date
-    // lookbackDays=3 means copy specified date + 2 previous days (total 3 
days)
-    int lookbackDays = state.getPropAsInt(ICEBERG_LOOKBACK_DAYS, 
DEFAULT_LOOKBACK_DAYS);
-    List<String> values = Lists.newArrayList();
+    // Resolve hour (0-23) — shared by both daily and hourly lookback paths.
+    int hour = 0;
+    if (state.contains(ICEBERG_PARTITION_HOUR)) {
+      hour = state.getPropAsInt(ICEBERG_PARTITION_HOUR, 0);
+      Preconditions.checkArgument(hour >= 0 && hour <= 23,
+        String.format("iceberg.partition.hour must be between 0 and 23, got: 
%d", hour));
+    }
 
-    if (lookbackDays >= 1) {
-      log.info("Applying lookback period of {} days for date partition column 
'{}': {}", lookbackDays, datePartitionColumn, dateValue);
+    // Resolve the DateTimeFormatter used to render each partition value.
+    // resolvePartitionFormatter normalises both the new 
iceberg.partition.value.format path
+    // and the legacy iceberg.hourly.partition.enabled path into a single 
formatter.
+    DateTimeFormatter partitionFormatter = resolvePartitionFormatter(state, 
hour);
 
-      // Check if hourly partitioning is enabled
-      boolean isHourlyPartition = 
state.getPropAsBoolean(ICEBERG_HOURLY_PARTITION_ENABLED, 
DEFAULT_HOURLY_PARTITION_ENABLED);
+    // Parse the input date — always expected in canonical yyyy-MM-dd form.
+    LocalDate startDate;
+    try {
+      startDate = LocalDate.parse(dateValue);
+    } catch (java.time.format.DateTimeParseException e) {
+      String errorMsg = String.format(
+        "Invalid date format for '%s': '%s'. Expected format: yyyy-MM-dd. 
Error: %s",
+        ICEBERG_FILTER_DATE, dateValue, e.getMessage());
+      log.error(errorMsg);
+      throw new IllegalArgumentException(errorMsg, e);
+    }
 
-      // Parse the date in yyyy-MM-dd format
-      LocalDate start;
-      try {
-        start = LocalDate.parse(dateValue);
-      } catch (java.time.format.DateTimeParseException e) {
-        String errorMsg = String.format(
-          "Invalid date format for '%s': '%s'. Expected format: yyyy-MM-dd. 
Error: %s",
-          ICEBERG_FILTER_DATE, dateValue, e.getMessage());
-        log.error(errorMsg);
-        throw new IllegalArgumentException(errorMsg, e);
-      }
+    // Combine resolved date + hour into a single LocalDateTime for the 
generator.
+    LocalDateTime startDateTime = startDate.atTime(hour, 0);
+
+    // Delegate partition value list + OR expression to 
IcebergPartitionFilterGenerator.
+    // When iceberg.lookback.hours > 0 it takes precedence over 
iceberg.lookback.days.
+    int lookbackHours = state.getPropAsInt(ICEBERG_LOOKBACK_HOURS, 
DEFAULT_LOOKBACK_HOURS);
+    int lookbackDays  = state.getPropAsInt(ICEBERG_LOOKBACK_DAYS, 
DEFAULT_LOOKBACK_DAYS);
 

Review Comment:
   This is a expected behavior  and it's by design as mentioned in the 
comment's we want to gracefully fallback to lookup by days incase the value is 
missing or < 0



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to