kamilwu commented on a change in pull request #13389: URL: https://github.com/apache/beam/pull/13389#discussion_r529395730
########## File path: .test-infra/jenkins/job_LoadTests_Combine_Flink_Go.groovy ########## @@ -0,0 +1,136 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import CommonJobProperties as commonJobProperties +import CommonTestProperties +import LoadTestsBuilder as loadTestsBuilder +import PhraseTriggeringPostCommitBuilder +import Flink +import InfluxDBCredentialsHelper + +import static LoadTestsBuilder.DOCKER_CONTAINER_REGISTRY + +String now = new Date().format('MMddHHmmss', TimeZone.getTimeZone('UTC')) + +def batchScenarios = { + [ + [ + title : 'Group By Key Go Load test: 2GB of 10B records', Review comment: `Group By Key Go Load test` -> `Combine Go Load test` ########## File path: .test-infra/jenkins/job_LoadTests_Combine_Flink_Go.groovy ########## @@ -0,0 +1,136 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import CommonJobProperties as commonJobProperties +import CommonTestProperties +import LoadTestsBuilder as loadTestsBuilder +import PhraseTriggeringPostCommitBuilder +import Flink +import InfluxDBCredentialsHelper + +import static LoadTestsBuilder.DOCKER_CONTAINER_REGISTRY + +String now = new Date().format('MMddHHmmss', TimeZone.getTimeZone('UTC')) + +def batchScenarios = { + [ + [ + title : 'Group By Key Go Load test: 2GB of 10B records', + test : 'combine', + runner : CommonTestProperties.Runner.FLINK, + pipelineOptions: [ + job_name : "load-tests-go-flink-batch-combine-1-${now}", + influx_namespace : 'flink', + influx_measurement : 'go_batch_combine_1', + input_options : '\'{' + + '"num_records": 200000000,' + + '"key_size": 1,' + + '"value_size": 9}\'', + fanout : 1, + top_count : 20, + parallelism : 5, + endpoint : 'localhost:8099', + environment_type : 'DOCKER', + environment_config : "${DOCKER_CONTAINER_REGISTRY}/beam_go_sdk:latest", + ] + ], + [ + title : 'Group By Key Go Load test: fanout 4 times with 2GB 10-byte records total', + test : 'combine', + runner : CommonTestProperties.Runner.FLINK, + pipelineOptions: [ + job_name : "load-tests-go-flink-batch-combine-2-${now}", Review comment: Please replace "2" with "4" here and below, at line 59. This is for compatibility reasons. Some time ago, we had tests 2 and 3, but they were eventually removed. ########## File path: sdks/go/test/load/combine/combine.go ########## @@ -0,0 +1,81 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "bytes" + "context" + "flag" + + "github.com/apache/beam/sdks/go/pkg/beam" + "github.com/apache/beam/sdks/go/pkg/beam/io/synthetic" + "github.com/apache/beam/sdks/go/pkg/beam/log" + "github.com/apache/beam/sdks/go/pkg/beam/transforms/top" + "github.com/apache/beam/sdks/go/pkg/beam/x/beamx" + "github.com/apache/beam/sdks/go/test/load" +) + +var ( + fanout = flag.Int( Review comment: This formatting looks a bit strange. Can we do it in the same way as for test GBK test? ########## File path: sdks/go/test/load/combine/combine.go ########## @@ -0,0 +1,81 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "bytes" + "context" + "flag" + + "github.com/apache/beam/sdks/go/pkg/beam" + "github.com/apache/beam/sdks/go/pkg/beam/io/synthetic" + "github.com/apache/beam/sdks/go/pkg/beam/log" + "github.com/apache/beam/sdks/go/pkg/beam/transforms/top" + "github.com/apache/beam/sdks/go/pkg/beam/x/beamx" + "github.com/apache/beam/sdks/go/test/load" +) + +var ( + fanout = flag.Int( + "fanout", + 1, + "A number of combine operations to perform in parallel.") + topCount = flag.Int( + "top_count", + 20, + "A number passed to the combiner.") + syntheticConfig = flag.String( + "input_options", + "", + "A JSON object that describes the configuration for synthetic source.") +) + +func parseSyntheticConfig() synthetic.SourceConfig { + if *syntheticConfig == "" { + panic("--input_options not provided") + } else { + encoded := []byte(*syntheticConfig) + return synthetic.DefaultSourceConfig().BuildFromJSON(encoded) + } +} + +func CompareLess(key []byte, value []byte) bool { Review comment: In Go, if a name begins with a capital letter, it is an exported name. I think there's no need to mark this function as an exported one. ########## File path: sdks/go/test/load/combine/combine.go ########## @@ -0,0 +1,81 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "bytes" + "context" + "flag" + + "github.com/apache/beam/sdks/go/pkg/beam" + "github.com/apache/beam/sdks/go/pkg/beam/io/synthetic" + "github.com/apache/beam/sdks/go/pkg/beam/log" + "github.com/apache/beam/sdks/go/pkg/beam/transforms/top" + "github.com/apache/beam/sdks/go/pkg/beam/x/beamx" + "github.com/apache/beam/sdks/go/test/load" +) + +var ( + fanout = flag.Int( + "fanout", + 1, + "A number of combine operations to perform in parallel.") + topCount = flag.Int( + "top_count", + 20, + "A number passed to the combiner.") Review comment: What about something like this: `A number of greatest elements to extract from the PCollection.` ? ########## File path: .test-infra/jenkins/job_LoadTests_Combine_Flink_Go.groovy ########## @@ -0,0 +1,136 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import CommonJobProperties as commonJobProperties +import CommonTestProperties +import LoadTestsBuilder as loadTestsBuilder +import PhraseTriggeringPostCommitBuilder +import Flink +import InfluxDBCredentialsHelper + +import static LoadTestsBuilder.DOCKER_CONTAINER_REGISTRY + +String now = new Date().format('MMddHHmmss', TimeZone.getTimeZone('UTC')) + +def batchScenarios = { + [ + [ + title : 'Group By Key Go Load test: 2GB of 10B records', + test : 'combine', + runner : CommonTestProperties.Runner.FLINK, + pipelineOptions: [ + job_name : "load-tests-go-flink-batch-combine-1-${now}", + influx_namespace : 'flink', + influx_measurement : 'go_batch_combine_1', + input_options : '\'{' + + '"num_records": 200000000,' + + '"key_size": 1,' + + '"value_size": 9}\'', + fanout : 1, + top_count : 20, + parallelism : 5, + endpoint : 'localhost:8099', + environment_type : 'DOCKER', + environment_config : "${DOCKER_CONTAINER_REGISTRY}/beam_go_sdk:latest", + ] + ], + [ + title : 'Group By Key Go Load test: fanout 4 times with 2GB 10-byte records total', + test : 'combine', + runner : CommonTestProperties.Runner.FLINK, + pipelineOptions: [ + job_name : "load-tests-go-flink-batch-combine-2-${now}", + influx_namespace : 'flink', + influx_measurement : 'go_batch_combine_2', + input_options : '\'{' + + '"num_records": 5000000,' + + '"key_size": 10,' + + '"value_size": 90}\'', + fanout : 4, + top_count : 20, + parallelism : 16, + endpoint : 'localhost:8099', + environment_type : 'DOCKER', + environment_config : "${DOCKER_CONTAINER_REGISTRY}/beam_go_sdk:latest", + ] + ], + [ + title : 'Group By Key Go Load test: fanout 8 times with 2GB 10-byte records total', + test : 'combine', + runner : CommonTestProperties.Runner.FLINK, + pipelineOptions: [ + job_name : "load-tests-go-flink-batch-combine-3-${now}", + influx_namespace : 'flink', + influx_measurement : 'go_batch_combine_3', + fanout : 8, + top_count : 20, + parallelism : 16, + input_options : '\'{' + + '"num_records": 2500000,' + + '"key_size": 10,' + + '"value_size": 90}\'', + endpoint : 'localhost:8099', + environment_type : 'DOCKER', + environment_config : "${DOCKER_CONTAINER_REGISTRY}/beam_go_sdk:latest", + ] + ], + ].each { test -> test.pipelineOptions.putAll(additionalPipelineArgs) } +} + +def loadTestJob = { scope, triggeringContext, mode -> + Map<Integer, List> testScenariosByParallelism = batchScenarios().groupBy { test -> + test.pipelineOptions.parallelism + } + Integer initialParallelism = testScenariosByParallelism.keySet().iterator().next() + List initialScenarios = testScenariosByParallelism.remove(initialParallelism) + + def flink = new Flink(scope, "beam_LoadTests_Go_Combine_Flink_${mode.capitalize()}") + flink.setUp( + [ + "${DOCKER_CONTAINER_REGISTRY}/beam_go_sdk:latest" + ], + initialParallelism, + "${DOCKER_CONTAINER_REGISTRY}/beam_flink1.10_job_server:latest") + + // Execute all scenarios connected with initial parallelism. + loadTestsBuilder.loadTests(scope, CommonTestProperties.SDK.GO, initialScenarios, 'combine', mode) + + // Execute the rest of scenarios. + testScenariosByParallelism.each { parallelism, scenarios -> + flink.scaleCluster(parallelism) + loadTestsBuilder.loadTests(scope, CommonTestProperties.SDK.GO, scenarios, 'combine', mode) + } +} + +PhraseTriggeringPostCommitBuilder.postCommitJob( + 'beam_LoadTests_Go_Combine_Flink_Batch', + 'Run Load Tests Go Combine Flink Batch', + 'Load Tests Go Combine Batch suite', + this + ) { + additionalPipelineArgs = [:] + loadTestJob(delegate, CommonTestProperties.TriggeringContext.PR, 'batch') + } + +CronJobBuilder.cronJob('beam_LoadTests_Go_Combine_Flink_Batch', 'H 10 * * *', this) { Review comment: We already have two tests firing at the same time (at 10 am UTC): Go ParDo (merged) and Go GBK (will be merged soon). Let's choose something else. I think `8` is a good choice. ########## File path: sdks/go/test/load/combine/combine.go ########## @@ -0,0 +1,81 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "bytes" + "context" + "flag" + + "github.com/apache/beam/sdks/go/pkg/beam" + "github.com/apache/beam/sdks/go/pkg/beam/io/synthetic" + "github.com/apache/beam/sdks/go/pkg/beam/log" + "github.com/apache/beam/sdks/go/pkg/beam/transforms/top" + "github.com/apache/beam/sdks/go/pkg/beam/x/beamx" + "github.com/apache/beam/sdks/go/test/load" +) + +var ( + fanout = flag.Int( + "fanout", + 1, + "A number of combine operations to perform in parallel.") + topCount = flag.Int( + "top_count", + 20, + "A number passed to the combiner.") + syntheticConfig = flag.String( + "input_options", + "", + "A JSON object that describes the configuration for synthetic source.") +) + +func parseSyntheticConfig() synthetic.SourceConfig { + if *syntheticConfig == "" { + panic("--input_options not provided") + } else { + encoded := []byte(*syntheticConfig) + return synthetic.DefaultSourceConfig().BuildFromJSON(encoded) + } +} + +func CompareLess(key []byte, value []byte) bool { + return bytes.Compare(key, value) < 0 +} + +func main() { + flag.Parse() + beam.Init() + + ctx := context.Background() + + p, s := beam.NewPipelineWithRoot() + src := synthetic.SourceSingle(s, parseSyntheticConfig()) + pcoll := beam.ParDo(s, &load.RuntimeMonitor{}, src) + for i := 0; i < *fanout; i++ { + pcoll = top.LargestPerKey(s, pcoll, *topCount, CompareLess) Review comment: Two transforms are missing after the combing operation: - a ParDo for consuming output elements. Please visit `sdks/python/apache_beam/testing/load_tests/combine_test.py` to check how it was implemented in Python - a final ParDo with a RuntimeMonitor ########## File path: .test-infra/jenkins/job_LoadTests_Combine_Flink_Go.groovy ########## @@ -0,0 +1,136 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import CommonJobProperties as commonJobProperties +import CommonTestProperties +import LoadTestsBuilder as loadTestsBuilder +import PhraseTriggeringPostCommitBuilder +import Flink +import InfluxDBCredentialsHelper + +import static LoadTestsBuilder.DOCKER_CONTAINER_REGISTRY + +String now = new Date().format('MMddHHmmss', TimeZone.getTimeZone('UTC')) + +def batchScenarios = { + [ + [ + title : 'Group By Key Go Load test: 2GB of 10B records', + test : 'combine', + runner : CommonTestProperties.Runner.FLINK, + pipelineOptions: [ + job_name : "load-tests-go-flink-batch-combine-1-${now}", + influx_namespace : 'flink', + influx_measurement : 'go_batch_combine_1', + input_options : '\'{' + + '"num_records": 200000000,' + + '"key_size": 1,' + + '"value_size": 9}\'', + fanout : 1, + top_count : 20, + parallelism : 5, + endpoint : 'localhost:8099', + environment_type : 'DOCKER', + environment_config : "${DOCKER_CONTAINER_REGISTRY}/beam_go_sdk:latest", + ] + ], + [ + title : 'Group By Key Go Load test: fanout 4 times with 2GB 10-byte records total', + test : 'combine', + runner : CommonTestProperties.Runner.FLINK, + pipelineOptions: [ + job_name : "load-tests-go-flink-batch-combine-2-${now}", + influx_namespace : 'flink', + influx_measurement : 'go_batch_combine_2', + input_options : '\'{' + + '"num_records": 5000000,' + + '"key_size": 10,' + + '"value_size": 90}\'', + fanout : 4, + top_count : 20, + parallelism : 16, + endpoint : 'localhost:8099', + environment_type : 'DOCKER', + environment_config : "${DOCKER_CONTAINER_REGISTRY}/beam_go_sdk:latest", + ] + ], + [ + title : 'Group By Key Go Load test: fanout 8 times with 2GB 10-byte records total', + test : 'combine', + runner : CommonTestProperties.Runner.FLINK, + pipelineOptions: [ + job_name : "load-tests-go-flink-batch-combine-3-${now}", Review comment: "3" -> "5" ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected]
