aromanenko-dev commented on a change in pull request #13282:
URL: https://github.com/apache/beam/pull/13282#discussion_r520677337
##########
File path: .test-infra/jenkins/job_PerformanceTests_KafkaIO_IT.groovy
##########
@@ -61,14 +61,38 @@ job(jobName) {
autoscalingAlgorithm : 'NONE'
]
+ Map runnerV2SdfWrapperPipelineOptions = pipelineOptions + [
+ kafkaTopic : 'beam-runnerv2',
+ bigQueryTable : 'kafkaioit_results_sdf_wrapper',
+ influxMeasurement : 'kafkaioit_results_sdf_wrapper',
+ experiments :
'beam_fn_api,use_runner_v2,use_unified_worker',
+ ]
+
+ Map runnerV2SdfPipelineOptions = pipelineOptions + [
+ kafkaTopic : 'beam-sdf',
+ bigQueryTable : 'kafkaioit_results_runner_v2',
+ influxMeasurement : 'kafkaioit_results_runner_v2',
+ experiments :
'beam_fn_api,use_runner_v2,use_unified_worker,use_sdf_kafka_read',
+ ]
+
steps {
gradle {
rootBuildScriptDir(common.checkoutDir)
common.setGradleSwitches(delegate)
switches("--info")
-
switches("-DintegrationTestPipelineOptions=\'${common.joinOptionsWithNestedJsonValues(pipelineOptions)}\'")
+
switches("-DintegrationTestPipelineOptions=\'${common.joinOptionsWithNestedJsonValues(runnerV2SdfWrapperPipelineOptions)}\'")
+ switches("-DintegrationTestRunner=dataflow")
+ switches("-Dexperiment=use_runner_v2")
Review comment:
What actually "runner v2" is?
##########
File path:
sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java
##########
@@ -115,6 +122,43 @@ public static void setup() throws IOException {
.get();
}
+ @Test
+ public void testKafkaIOWithRunnerV2() throws IOException {
+ writePipeline
+ .apply("Generate records", Read.from(new
SyntheticBoundedSource(sourceOptions)))
+ .apply("Measure write time", ParDo.of(new TimeMonitor<>(NAMESPACE,
WRITE_TIME_METRIC_NAME)))
+ .apply("Write to Kafka", writeToKafka());
+
+ readPipeline.getOptions().as(Options.class).setStreaming(true);
+ PCollection<Integer> elementCount =
+ readPipeline
+ .apply("Read from Runner V2 Kafka", readFromKafka())
+ .apply(
+ "Measure read time", ParDo.of(new TimeMonitor<>(NAMESPACE,
READ_TIME_METRIC_NAME)))
+ .apply("Map records to strings", MapElements.via(new
MapKafkaRecordsToStrings()))
+ .apply(
+ "Keyed by empty key",
+ MapElements.into(new TypeDescriptor<KV<byte[], String>>() {})
+ .via(element -> KV.of(new byte[0], element)))
+ .apply(
+ "Counting elements", ParDo.of(new
CountingElementFn(options.getNumberOfRecords())));
Review comment:
Why not to use metrics for counting?
##########
File path:
sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/ReadFromKafkaDoFn.java
##########
@@ -407,4 +408,13 @@ public double getTotalSize(double numRecords) {
return avgRecordSize.get() * numRecords / (1 + avgRecordGap.get());
}
}
+
+ private static Instant ensureTimestampWithinBounds(Instant timestamp) {
Review comment:
Why do we need this function? Is it possible to have a timestamp out of
window bounds?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]