[ 
https://issues.apache.org/jira/browse/BEAM-5985?focusedWorklogId=208796&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-208796
 ]

ASF GitHub Bot logged work on BEAM-5985:
----------------------------------------

                Author: ASF GitHub Bot
            Created on: 06/Mar/19 13:21
            Start Date: 06/Mar/19 13:21
    Worklog Time Spent: 10m 
      Work Description: kkucharc commented on pull request #7903: [BEAM-5985] 
Dataflow batch load test jobs
URL: https://github.com/apache/beam/pull/7903#discussion_r262937050
 
 

 ##########
 File path: .test-infra/jenkins/job_LoadTests_Java.groovy
 ##########
 @@ -17,118 +17,213 @@
  */
 
 import CommonJobProperties as commonJobProperties
+import CommonTestProperties
 import LoadTestsBuilder as loadTestsBuilder
 import PhraseTriggeringPostCommitBuilder
+import CronJobBuilder
 
 def loadTestConfigurations = [
         [
-                jobName           : 
'beam_Java_LoadTests_GroupByKey_Dataflow_Small',
-                jobDescription    : 'Runs GroupByKey load tests on Dataflow 
runner small records 10b',
-                itClass           : 
'org.apache.beam.sdk.loadtests.GroupByKeyLoadTest',
-                prCommitStatusName: 'Java GroupByKey Small Load Test Dataflow',
-                prTriggerPhrase   : 'Run GroupByKey Small Java Load Test 
Dataflow',
-                runner            : CommonTestProperties.Runner.DATAFLOW,
+                title        : 'Load test: 2GB of 10B records',
+                itClass      : 
'org.apache.beam.sdk.loadtests.GroupByKeyLoadTest',
+                runner       : CommonTestProperties.Runner.DATAFLOW,
                 jobProperties     : [
                         project             : 'apache-beam-testing',
+                        appName             : 
'load_tests_Java_Dataflow_Batch_GBK_1',
                         tempLocation        : 
'gs://temp-storage-for-perf-tests/loadtests',
                         publishToBigQuery   : true,
-                        bigQueryDataset     : 'load_test_PRs',
-                        bigQueryTable       : 'dataflow_gbk_small',
-                        sourceOptions       : 
'{"numRecords":1000000000,"splitPointFrequencyRecords":1,"keySizeBytes":1,"valueSizeBytes":9,"numHotKeys":0,"hotKeyFraction":0,"seed":123456,"bundleSizeDistribution":{"type":"const","const":42},"forceNumInitialBundles":100,"progressShape":"LINEAR","initializeDelayDistribution":{"type":"const","const":42}}',
-                        stepOptions         : 
'{"outputRecordsPerInputRecord":1,"preservesInputKeyDistribution":true,"perBundleDelay":10000,"perBundleDelayType":"MIXED","cpuUtilizationInMixedDelay":0.5}',
-                        fanout              : 10,
+                        bigQueryDataset     : 'load_test',
+                        bigQueryTable       : 'java_dataflow_batch_GBK_1',
+                        sourceOptions       : """
+                                            {
+                                              "numRecords": 200000000,
+                                              "keySizeBytes": 1,
+                                              "valueSizeBytes": 9
+                                            }
+                                       """.trim().replaceAll("\\s", ""),
+                        fanout              : 1,
                         iterations          : 1,
-                        maxNumWorkers       : 32,
+                        maxNumWorkers       : 5,
+                        numWorkers          : 5,
+                        autoscalingAlgorithm: "NONE"
                 ]
-
         ],
-]
-
-for (testConfiguration in loadTestConfigurations) {
-    PhraseTriggeringPostCommitBuilder.postCommitJob(
-            testConfiguration.jobName,
-            testConfiguration.prTriggerPhrase,
-            testConfiguration.prCommitStatusName,
-            this
-    ) {
-        description(testConfiguration.jobDescription)
-        commonJobProperties.setTopLevelMainJobProperties(delegate, 'master', 
240)
-        loadTestsBuilder.loadTest(delegate, testConfiguration.jobDescription, 
testConfiguration.runner, testConfiguration.jobProperties, 
testConfiguration.itClass)
-    }
-}
-
-def smokeTestConfigurations = [
         [
-                title        : 'GroupByKey load test Direct',
+                title        : 'Load test: 2GB of 100B records',
                 itClass      : 
'org.apache.beam.sdk.loadtests.GroupByKeyLoadTest',
-                runner       : CommonTestProperties.Runner.DIRECT,
-                jobProperties: [
-                        publishToBigQuery: true,
-                        bigQueryDataset  : 'load_test_SMOKE',
-                        bigQueryTable    : 'direct_gbk',
-                        sourceOptions    : 
'{"numRecords":100000,"splitPointFrequencyRecords":1}',
-                        stepOptions      : 
'{"outputRecordsPerInputRecord":1,"preservesInputKeyDistribution":true}',
-                        fanout           : 10,
-                        iterations       : 1,
+                runner       : CommonTestProperties.Runner.DATAFLOW,
+                jobProperties     : [
+                        project             : 'apache-beam-testing',
+                        appName             : 
'load_tests_Java_Dataflow_Batch_GBK_2',
+                        tempLocation        : 
'gs://temp-storage-for-perf-tests/loadtests',
+                        publishToBigQuery   : true,
+                        bigQueryDataset     : 'load_test',
+                        bigQueryTable       : 'java_dataflow_batch_GBK_2',
+                        sourceOptions       : """
+                                            {
+                                              "numRecords": 20000000,
+                                              "keySizeBytes": 10,
+                                              "valueSizeBytes": 90
+                                            }
+                                       """.trim().replaceAll("\\s", ""),
+                        fanout              : 1,
+                        iterations          : 1,
+                        maxNumWorkers       : 5,
+                        numWorkers          : 5,
+                        autoscalingAlgorithm: "NONE"
                 ]
         ],
         [
-                title        : 'GroupByKey load test Dataflow',
+                title        : 'Load test: 2GB of 100kB records',
                 itClass      : 
'org.apache.beam.sdk.loadtests.GroupByKeyLoadTest',
                 runner       : CommonTestProperties.Runner.DATAFLOW,
-                jobProperties: [
-                        project          : 'apache-beam-testing',
-                        tempLocation     : 
'gs://temp-storage-for-perf-tests/smoketests',
-                        publishToBigQuery: true,
-                        bigQueryDataset  : 'load_test_SMOKE',
-                        bigQueryTable    : 'dataflow_gbk',
-                        sourceOptions    : 
'{"numRecords":100000,"splitPointFrequencyRecords":1}',
-                        stepOptions      : 
'{"outputRecordsPerInputRecord":1,"preservesInputKeyDistribution":true}',
-                        fanout           : 10,
-                        iterations       : 1,
+                jobProperties     : [
+                        project             : 'apache-beam-testing',
+                        appName             : 
'load_tests_Java_Dataflow_Batch_GBK_3',
+                        tempLocation        : 
'gs://temp-storage-for-perf-tests/loadtests',
+                        publishToBigQuery   : true,
+                        bigQueryDataset     : 'load_test',
+                        bigQueryTable       : 'java_dataflow_batch_GBK_3',
+                        sourceOptions       : """
+                                            {
+                                              "numRecords": 2000,
+                                              "keySizeBytes": 100000,
+                                              "valueSizeBytes": 900000
+                                            }
+                                       """.trim().replaceAll("\\s", ""),
+                        fanout              : 1,
+                        iterations          : 1,
+                        maxNumWorkers       : 5,
+                        numWorkers          : 5,
+                        autoscalingAlgorithm: "NONE"
                 ]
         ],
         [
-                title        : 'GroupByKey load test Flink',
+                title        : 'Load test: fanout 4 times with 2GB 10-byte 
records total',
                 itClass      : 
'org.apache.beam.sdk.loadtests.GroupByKeyLoadTest',
-                runner       : CommonTestProperties.Runner.FLINK,
-                jobProperties: [
-                        publishToBigQuery: true,
-                        bigQueryDataset  : 'load_test_SMOKE',
-                        bigQueryTable    : 'flink_gbk',
-                        sourceOptions    : 
'{"numRecords":100000,"splitPointFrequencyRecords":1}',
-                        stepOptions      : 
'{"outputRecordsPerInputRecord":1,"preservesInputKeyDistribution":true}',
-                        fanout           : 10,
-                        iterations       : 1,
+                runner       : CommonTestProperties.Runner.DATAFLOW,
+                jobProperties     : [
+                        project             : 'apache-beam-testing',
+                        appName             : 
'load_tests_Java_Dataflow_Batch_GBK_4',
+                        tempLocation        : 
'gs://temp-storage-for-perf-tests/loadtests',
+                        publishToBigQuery   : true,
+                        bigQueryDataset     : 'load_test',
+                        bigQueryTable       : 'java_dataflow_batch_GBK_4',
+                        sourceOptions       : """
+                                            {
+                                              "numRecords": 5000000,
+                                              "keySizeBytes": 10,
+                                              "valueSizeBytes": 90
+                                            }
+                                       """.trim().replaceAll("\\s", ""),
+                        fanout              : 4,
+                        iterations          : 1,
+                        maxNumWorkers       : 16,
+                        numWorkers          : 16,
+                        autoscalingAlgorithm: "NONE"
                 ]
         ],
         [
-                title        : 'GroupByKey load test Spark',
+                title        : 'Load test: fanout 8 times with 2GB 10-byte 
records total',
                 itClass      : 
'org.apache.beam.sdk.loadtests.GroupByKeyLoadTest',
-                runner       : CommonTestProperties.Runner.SPARK,
-                jobProperties: [
-                        sparkMaster      : 'local[4]',
-                        publishToBigQuery: true,
-                        bigQueryDataset  : 'load_test_SMOKE',
-                        bigQueryTable    : 'spark_gbk',
-                        sourceOptions    : 
'{"numRecords":100000,"splitPointFrequencyRecords":1}',
-                        stepOptions      : 
'{"outputRecordsPerInputRecord":1,"preservesInputKeyDistribution":true}',
-                        fanout           : 10,
-                        iterations       : 1,
+                runner       : CommonTestProperties.Runner.DATAFLOW,
+                jobProperties     : [
+                        project             : 'apache-beam-testing',
+                        appName             : 
'load_tests_Java_Dataflow_Batch_GBK_5',
+                        tempLocation        : 
'gs://temp-storage-for-perf-tests/loadtests',
+                        publishToBigQuery   : true,
+                        bigQueryDataset     : 'load_test',
+                        bigQueryTable       : 'java_dataflow_batch_GBK_5',
+                        sourceOptions       : """
+                                            {
+                                              "numRecords": 2500000,
+                                              "keySizeBytes": 10,
+                                              "valueSizeBytes": 90
+                                            }
+                                       """.trim().replaceAll("\\s", ""),
+                        fanout              : 8,
+                        iterations          : 1,
+                        maxNumWorkers       : 16,
+                        numWorkers          : 16,
+                        autoscalingAlgorithm: "NONE"
+                ]
+        ],
+        [
+                title        : 'Load test: reiterate 4 times 10kB values',
+                itClass      : 
'org.apache.beam.sdk.loadtests.GroupByKeyLoadTest',
+                runner       : CommonTestProperties.Runner.DATAFLOW,
+                jobProperties     : [
+                        project             : 'apache-beam-testing',
+                        appName             : 
'load_tests_Java_Dataflow_Batch_GBK_6',
+                        tempLocation        : 
'gs://temp-storage-for-perf-tests/loadtests',
+                        publishToBigQuery   : true,
+                        bigQueryDataset     : 'load_test',
+                        bigQueryTable       : 'java_dataflow_batch_GBK_6',
+                        sourceOptions       : """
+                                            {
+                                              "numRecords": 20000000,
+                                              "keySizeBytes": 10,
+                                              "valueSizeBytes": 90,
+                                              "numHotKeys": 200,
+                                              "hotKeyFraction": 1
+                                            }
+                                       """.trim().replaceAll("\\s", ""),
+                        fanout              : 1,
+                        iterations          : 4,
+                        maxNumWorkers       : 5,
+                        numWorkers          : 5,
+                        autoscalingAlgorithm: "NONE"
+                ]
+        ],
+        [
+                title        : 'Load test: reiterate 4 times 2MB values',
+                itClass      : 
'org.apache.beam.sdk.loadtests.GroupByKeyLoadTest',
+                runner       : CommonTestProperties.Runner.DATAFLOW,
+                jobProperties     : [
+                        project             : 'apache-beam-testing',
+                        appName             : 
'load_tests_Java_Dataflow_Batch_GBK_7',
+                        tempLocation        : 
'gs://temp-storage-for-perf-tests/loadtests',
+                        publishToBigQuery   : true,
+                        bigQueryDataset     : 'load_test',
+                        bigQueryTable       : 'java_dataflow_batch_GBK_7',
+                        sourceOptions       : """
+                                            {
+                                              "numRecords": 20000000,
+                                              "keySizeBytes": 10,
+                                              "valueSizeBytes": 90,
+                                              "numHotKeys": 10,
+                                              "hotKeyFraction": 1
+                                            }
+                                       """.trim().replaceAll("\\s", ""),
+                        fanout              : 1,
+                        iterations          : 4,
+                        maxNumWorkers       : 5,
+                        numWorkers          : 5,
+                        autoscalingAlgorithm: "NONE"
                 ]
         ]
 ]
 
+def loadTestJob = { scope ->
+  scope.description('Runs Java GBK load tests on Dataflow runner in batch 
mode')
+  commonJobProperties.setTopLevelMainJobProperties(scope, 'master', 240)
+
+  for (testConfiguration in loadTestConfigurations) {
+    loadTestsBuilder.loadTest(scope, testConfiguration.title, 
testConfiguration.runner, testConfiguration.jobProperties, 
testConfiguration.itClass, CommonTestProperties.TriggeringContext.PR)
 
 Review comment:
   Just to remember: merge to master and add `SDK` enum parameter in tests.
 
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
-------------------

    Worklog Id:     (was: 208796)
    Time Spent: 15.5h  (was: 15h 20m)

> Create jenkins jobs to run the load tests for Java SDK
> ------------------------------------------------------
>
>                 Key: BEAM-5985
>                 URL: https://issues.apache.org/jira/browse/BEAM-5985
>             Project: Beam
>          Issue Type: Sub-task
>          Components: testing
>            Reporter: Lukasz Gajowy
>            Assignee: Kasia Kucharczyk
>            Priority: Major
>          Time Spent: 15.5h
>  Remaining Estimate: 0h
>
> How/how often/in what cases we run those tests is yet to be decided (this is 
> part of the task)



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to