gemini-code-assist[bot] commented on code in PR #37235:
URL: https://github.com/apache/beam/pull/37235#discussion_r2665475008
##########
sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryTimestampPicosIT.java:
##########
@@ -392,6 +398,111 @@ public void testReadWithNanosPrecision_Arrow() {
runReadTest(TimestampPrecision.NANOS, DataFormat.ARROW, expectedOutput,
simpleTableSpec);
}
+ // Schema with custom timestamp-nanos logical type
+ private static org.apache.avro.Schema createTimestampNanosAvroSchema() {
+ org.apache.avro.Schema longSchema =
+ org.apache.avro.Schema.create(org.apache.avro.Schema.Type.LONG);
+ longSchema.addProp("logicalType", "timestamp-nanos");
+ return org.apache.avro.SchemaBuilder.record("TimestampNanosRecord")
+ .fields()
+ .name("ts_nanos")
+ .type(longSchema)
+ .noDefault()
+ .endRecord();
+ }
+
+ private static final java.time.Instant TEST_INSTANT =
+ java.time.Instant.parse("2024-01-15T10:30:45.123456789Z");
+
+ private static final org.apache.avro.Schema TIMESTAMP_NANOS_AVRO_SCHEMA =
+ createTimestampNanosAvroSchema();
+
+ @Test
+ public void testWriteGenericRecordTimestampNanos() throws Exception {
+ String tableSpec =
+ String.format("%s:%s.%s", project, DATASET_ID,
"generic_record_ts_nanos_test");
+
+ // Create GenericRecord with timestamp-nanos value
+ GenericRecord record =
+ new GenericRecordBuilder(TIMESTAMP_NANOS_AVRO_SCHEMA)
+ .set("ts_nanos", TEST_INSTANT.getEpochSecond() * 1_000_000_000 +
TEST_INSTANT.getNano())
Review Comment:

To avoid potential integer overflow and improve clarity, it's safer to use a
long literal `1_000_000_000L` when calculating nanoseconds from epoch seconds.
```suggestion
.set("ts_nanos", TEST_INSTANT.getEpochSecond() * 1_000_000_000L
+ TEST_INSTANT.getNano())
```
##########
sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtils.java:
##########
@@ -587,7 +587,21 @@ private static List<TableFieldSchema>
toTableFieldSchema(Schema schema) {
field.setFields(toTableFieldSchema(mapSchema));
field.setMode(Mode.REPEATED.toString());
}
- field.setType(toStandardSQLTypeName(type).toString());
+ if (type.getTypeName().isLogicalType()
+ && Preconditions.checkArgumentNotNull(type.getLogicalType())
+ .getIdentifier()
+ .equals(Timestamp.IDENTIFIER)) {
+ Schema.LogicalType<?, ?> logicalType =
+ Preconditions.checkArgumentNotNull(type.getLogicalType());
+ int precision =
Preconditions.checkArgumentNotNull(logicalType.getArgument());
+ if (precision != 9) {
+ throw new IllegalArgumentException(
+ "Unsupported precision for Timestamp logical type " + precision);
+ }
+
field.setType(StandardSQLTypeName.TIMESTAMP.toString()).setTimestampPrecision(12L);
+ } else {
+ field.setType(toStandardSQLTypeName(type).toString());
+ }
Review Comment:

This block can be simplified to improve readability and avoid redundant
calls to `type.getLogicalType()`. You can fetch the logical type once and then
perform the checks.
```java
Schema.LogicalType<?, ?> logicalType = type.getLogicalType();
if (logicalType != null &&
Timestamp.IDENTIFIER.equals(logicalType.getIdentifier())) {
int precision =
Preconditions.checkArgumentNotNull(logicalType.getArgument());
if (precision != 9) {
throw new IllegalArgumentException(
"Unsupported precision for Timestamp logical type " +
precision);
}
field.setType(StandardSQLTypeName.TIMESTAMP.toString()).setTimestampPrecision(12L);
} else {
field.setType(toStandardSQLTypeName(type).toString());
}
```
##########
sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/AvroGenericRecordToStorageApiProto.java:
##########
@@ -502,10 +516,42 @@ static Object mapEntryToProtoValue(
return builder.build();
}
+ private static DynamicMessage buildTimestampPicosMessage(
+ Descriptor timestampPicosDescriptor, long seconds, long picoseconds) {
+ return DynamicMessage.newBuilder(timestampPicosDescriptor)
+ .setField(
+
Preconditions.checkNotNull(timestampPicosDescriptor.findFieldByName("seconds")),
+ seconds)
+ .setField(
+
Preconditions.checkNotNull(timestampPicosDescriptor.findFieldByName("picoseconds")),
+ picoseconds)
+ .build();
+ }
+
@VisibleForTesting
- static Object scalarToProtoValue(Schema fieldSchema, Object value) {
+ static Object scalarToProtoValue(
+ @Nullable FieldDescriptor descriptor, Schema fieldSchema, Object value) {
TypeWithNullability type = TypeWithNullability.create(fieldSchema);
+ if
(TIMESTAMP_NANOS_LOGICAL_TYPE.equals(type.getType().getProp("logicalType"))) {
+ Preconditions.checkArgument(
+ value instanceof Long, "Expecting a value as Long type
(timestamp-nanos).");
+ long nanos = (Long) value;
+
+ long seconds = nanos / 1_000_000_000L;
+ int nanoAdjustment = (int) (nanos % 1_000_000_000L);
+
+ // Handle negative timestamps (before epoch)
+ if (nanos < 0 && nanoAdjustment != 0) {
+ seconds -= 1;
+ nanoAdjustment += 1_000_000_000;
+ }
+
+ long picoseconds = nanoAdjustment * 1000L;
Review Comment:

The nanosecond-to-picosecond conversion logic uses magic numbers
`1_000_000_000L` and `1000L`. To improve readability and maintainability,
consider defining these as constants, for example `NANOS_PER_SECOND` and
`PICOS_PER_NANO`.
##########
sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/AvroGenericRecordToStorageApiProto.java:
##########
@@ -380,34 +384,44 @@ private static TableFieldSchema
fieldDescriptorFromAvroField(org.apache.avro.Sch
.setType(unionFieldSchema.getType())
.setMode(unionFieldSchema.getMode())
.addAllFields(unionFieldSchema.getFieldsList());
+
+ if (unionFieldSchema.hasTimestampPrecision()) {
+
builder.setTimestampPrecision(unionFieldSchema.getTimestampPrecision());
+ }
break;
default:
elementType = TypeWithNullability.create(schema).getType();
- Optional<LogicalType> logicalType =
- Optional.ofNullable(LogicalTypes.fromSchema(elementType));
- @Nullable
- TableFieldSchema.Type primitiveType =
- logicalType
- .flatMap(AvroGenericRecordToStorageApiProto::logicalTypes)
- .orElse(PRIMITIVE_TYPES.get(elementType.getType()));
- if (primitiveType == null) {
- throw new RuntimeException("Unsupported type " +
elementType.getType());
- }
- // a scalar will be required by default, if defined as part of union
then
- // caller will set nullability requirements
- builder = builder.setType(primitiveType);
- // parametrized types
- if (logicalType.isPresent() &&
logicalType.get().getName().equals("decimal")) {
- LogicalTypes.Decimal decimal = (LogicalTypes.Decimal)
logicalType.get();
- int precision = decimal.getPrecision();
- int scale = decimal.getScale();
- if (!(precision == 38 && scale == 9) // NUMERIC
- && !(precision == 77 && scale == 38) // BIGNUMERIC
- ) {
- // parametrized type
- builder = builder.setPrecision(precision);
- if (scale != 0) {
- builder = builder.setScale(scale);
+ if
(TIMESTAMP_NANOS_LOGICAL_TYPE.equals(elementType.getProp("logicalType"))) {
+ builder = builder.setType(TableFieldSchema.Type.TIMESTAMP);
+
builder.setTimestampPrecision(Int64Value.newBuilder().setValue(12L).build());
Review Comment:

The magic number `12L` for picosecond precision is used in multiple places
across the codebase. Consider defining it as a shared constant, for example
`private static final long PICOSECOND_PRECISION = 12L;`, to improve
maintainability and consistency.
##########
sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BeamRowToStorageApiProto.java:
##########
@@ -341,17 +358,39 @@ private static Object toProtoValue(
fieldDescriptor.getMessageType(), keyType, valueType,
entry))
.collect(Collectors.toList());
default:
- return scalarToProtoValue(beamFieldType, value);
+ return scalarToProtoValue(fieldDescriptor, beamFieldType, value);
}
}
+ private static DynamicMessage buildTimestampPicosMessage(
+ Descriptor timestampPicosDescriptor, Instant instant) {
+ long seconds = instant.getEpochSecond();
+ long picoseconds = instant.getNano() * 1000L; // nanos → picos
+
+ return DynamicMessage.newBuilder(timestampPicosDescriptor)
+ .setField(
+
Preconditions.checkNotNull(timestampPicosDescriptor.findFieldByName("seconds")),
+ seconds)
+ .setField(
+
Preconditions.checkNotNull(timestampPicosDescriptor.findFieldByName("picoseconds")),
+ picoseconds)
+ .build();
+ }
Review Comment:

This `buildTimestampPicosMessage` method is also implemented in
`AvroGenericRecordToStorageApiProto.java`. To avoid code duplication, consider
creating a shared utility class for timestamp conversions. This utility could
contain methods for converting from different sources (like `long` nanoseconds
or `Instant`) to picoseconds, and a single method to build the `DynamicMessage`.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]