ahmedabu98 commented on code in PR #35435: URL: https://github.com/apache/beam/pull/35435#discussion_r2213730337
########## sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableWriteSchemaTransformProvider.java: ########## @@ -140,40 +148,74 @@ public PCollectionRowTuple expand(PCollectionRowTuple input) { String.format( "Could not find expected input [%s] to %s.", INPUT_TAG, getClass().getSimpleName())); - Schema testOriginialSchema = - Schema.builder() - .addByteArrayField("key") - .addArrayField( - "mutations", - Schema.FieldType.map(Schema.FieldType.STRING, Schema.FieldType.BYTES)) - .build(); - Schema inputSchema = input.getSinglePCollection().getSchema(); - System.out.println("Input Schema for BigTableMutations: " + inputSchema); - PCollection<KV<ByteString, Iterable<Mutation>>> bigtableMutations = null; - if (inputSchema.equals(testOriginialSchema)) { + if (inputSchema.equals(BATCHED_MUTATIONS_SCHEMA)) { PCollection<Row> beamRowMutations = input.get(INPUT_TAG); bigtableMutations = beamRowMutations.apply( // Original schema inputs gets sent out to the original transform provider mutations // function - MapElements.via( - new BigtableWriteSchemaTransformProvider.GetMutationsFromBeamRow())); + MapElements.via(new GetMutationsFromBeamRow())); } else if (inputSchema.hasField("type")) { + checkState( + inputSchema.getField("type").getType().equals(Schema.FieldType.STRING), + "Schema field 'type' should be of type STRING."); + + if (inputSchema.hasField("value")) { + checkState( + inputSchema.getField("value").getType().equals(Schema.FieldType.BYTES), + "Schema field 'value' should be of type BYTES."); + } + + if (inputSchema.hasField("column_qualifier")) { + checkState( + inputSchema.getField("column_qualifier").getType().equals(Schema.FieldType.BYTES), + "Schema field 'column_qualifier' should be of type BYTES."); + } + + if (inputSchema.hasField("family_name")) { + checkState( + inputSchema.getField("family_name").getType().equals(Schema.FieldType.BYTES), + "Schema field 'family_name' should be of type BYTES."); + } + + if (inputSchema.hasField("timestamp_micros")) { + checkState( + inputSchema.getField("timestamp_micros").getType().equals(Schema.FieldType.INT64), + "Schema field 'timestamp_micros' should be of type BYTES."); Review Comment: ```suggestion "Schema field 'timestamp_micros' should be of type INT64."); ``` ########## sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableWriteSchemaTransformProvider.java: ########## @@ -140,40 +148,74 @@ public PCollectionRowTuple expand(PCollectionRowTuple input) { String.format( "Could not find expected input [%s] to %s.", INPUT_TAG, getClass().getSimpleName())); - Schema testOriginialSchema = - Schema.builder() - .addByteArrayField("key") - .addArrayField( - "mutations", - Schema.FieldType.map(Schema.FieldType.STRING, Schema.FieldType.BYTES)) - .build(); - Schema inputSchema = input.getSinglePCollection().getSchema(); - System.out.println("Input Schema for BigTableMutations: " + inputSchema); - PCollection<KV<ByteString, Iterable<Mutation>>> bigtableMutations = null; - if (inputSchema.equals(testOriginialSchema)) { + if (inputSchema.equals(BATCHED_MUTATIONS_SCHEMA)) { PCollection<Row> beamRowMutations = input.get(INPUT_TAG); bigtableMutations = beamRowMutations.apply( // Original schema inputs gets sent out to the original transform provider mutations // function - MapElements.via( - new BigtableWriteSchemaTransformProvider.GetMutationsFromBeamRow())); + MapElements.via(new GetMutationsFromBeamRow())); } else if (inputSchema.hasField("type")) { + checkState( + inputSchema.getField("type").getType().equals(Schema.FieldType.STRING), + "Schema field 'type' should be of type STRING."); + + if (inputSchema.hasField("value")) { + checkState( + inputSchema.getField("value").getType().equals(Schema.FieldType.BYTES), + "Schema field 'value' should be of type BYTES."); + } + + if (inputSchema.hasField("column_qualifier")) { + checkState( + inputSchema.getField("column_qualifier").getType().equals(Schema.FieldType.BYTES), + "Schema field 'column_qualifier' should be of type BYTES."); + } + + if (inputSchema.hasField("family_name")) { + checkState( + inputSchema.getField("family_name").getType().equals(Schema.FieldType.BYTES), + "Schema field 'family_name' should be of type BYTES."); + } + + if (inputSchema.hasField("timestamp_micros")) { + checkState( + inputSchema.getField("timestamp_micros").getType().equals(Schema.FieldType.INT64), + "Schema field 'timestamp_micros' should be of type BYTES."); + } + + if (inputSchema.hasField("start_timestamp_micros")) { + checkState( + inputSchema + .getField("start_timestamp_micros") + .getType() + .equals(Schema.FieldType.INT64), + "Schema field 'start_timestamp_micros' should be of type BYTES."); + } + + if (inputSchema.hasField("end_timestamp_micros")) { + checkState( + inputSchema.getField("end_timestamp_micros").getType().equals(Schema.FieldType.INT64), + "Schema field 'end_timestamp_micros' should be of type BYTES."); Review Comment: ```suggestion "Schema field 'end_timestamp_micros' should be of type INT64."); ``` ########## sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableWriteSchemaTransformProvider.java: ########## @@ -140,40 +148,74 @@ public PCollectionRowTuple expand(PCollectionRowTuple input) { String.format( "Could not find expected input [%s] to %s.", INPUT_TAG, getClass().getSimpleName())); - Schema testOriginialSchema = - Schema.builder() - .addByteArrayField("key") - .addArrayField( - "mutations", - Schema.FieldType.map(Schema.FieldType.STRING, Schema.FieldType.BYTES)) - .build(); - Schema inputSchema = input.getSinglePCollection().getSchema(); - System.out.println("Input Schema for BigTableMutations: " + inputSchema); - PCollection<KV<ByteString, Iterable<Mutation>>> bigtableMutations = null; - if (inputSchema.equals(testOriginialSchema)) { + if (inputSchema.equals(BATCHED_MUTATIONS_SCHEMA)) { PCollection<Row> beamRowMutations = input.get(INPUT_TAG); bigtableMutations = beamRowMutations.apply( // Original schema inputs gets sent out to the original transform provider mutations // function - MapElements.via( - new BigtableWriteSchemaTransformProvider.GetMutationsFromBeamRow())); + MapElements.via(new GetMutationsFromBeamRow())); } else if (inputSchema.hasField("type")) { + checkState( + inputSchema.getField("type").getType().equals(Schema.FieldType.STRING), + "Schema field 'type' should be of type STRING."); + + if (inputSchema.hasField("value")) { + checkState( + inputSchema.getField("value").getType().equals(Schema.FieldType.BYTES), + "Schema field 'value' should be of type BYTES."); + } + + if (inputSchema.hasField("column_qualifier")) { + checkState( + inputSchema.getField("column_qualifier").getType().equals(Schema.FieldType.BYTES), + "Schema field 'column_qualifier' should be of type BYTES."); + } + + if (inputSchema.hasField("family_name")) { + checkState( + inputSchema.getField("family_name").getType().equals(Schema.FieldType.BYTES), + "Schema field 'family_name' should be of type BYTES."); + } + + if (inputSchema.hasField("timestamp_micros")) { + checkState( + inputSchema.getField("timestamp_micros").getType().equals(Schema.FieldType.INT64), + "Schema field 'timestamp_micros' should be of type BYTES."); + } + + if (inputSchema.hasField("start_timestamp_micros")) { + checkState( + inputSchema + .getField("start_timestamp_micros") + .getType() + .equals(Schema.FieldType.INT64), + "Schema field 'start_timestamp_micros' should be of type BYTES."); Review Comment: ```suggestion "Schema field 'start_timestamp_micros' should be of type INT64."); ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@beam.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org