[ 
https://issues.apache.org/jira/browse/BEAM-5985?focusedWorklogId=209490&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-209490
 ]

ASF GitHub Bot logged work on BEAM-5985:
----------------------------------------

                Author: ASF GitHub Bot
            Created on: 07/Mar/19 10:32
            Start Date: 07/Mar/19 10:32
    Worklog Time Spent: 10m 
      Work Description: lgajowy commented on pull request #7903: [BEAM-5985] 
Dataflow batch load test jobs
URL: https://github.com/apache/beam/pull/7903#discussion_r263323037
 
 

 ##########
 File path: .test-infra/jenkins/job_LoadTests_Java.groovy
 ##########
 @@ -17,118 +17,213 @@
  */
 
 import CommonJobProperties as commonJobProperties
+import CommonTestProperties
 import LoadTestsBuilder as loadTestsBuilder
 import PhraseTriggeringPostCommitBuilder
+import CronJobBuilder
 
 def loadTestConfigurations = [
         [
-                jobName           : 
'beam_Java_LoadTests_GroupByKey_Dataflow_Small',
-                jobDescription    : 'Runs GroupByKey load tests on Dataflow 
runner small records 10b',
-                itClass           : 
'org.apache.beam.sdk.loadtests.GroupByKeyLoadTest',
-                prCommitStatusName: 'Java GroupByKey Small Load Test Dataflow',
-                prTriggerPhrase   : 'Run GroupByKey Small Java Load Test 
Dataflow',
-                runner            : CommonTestProperties.Runner.DATAFLOW,
+                title        : 'Load test: 2GB of 10B records',
+                itClass      : 
'org.apache.beam.sdk.loadtests.GroupByKeyLoadTest',
+                runner       : CommonTestProperties.Runner.DATAFLOW,
                 jobProperties     : [
                         project             : 'apache-beam-testing',
+                        appName             : 
'load_tests_Java_Dataflow_Batch_GBK_1',
                         tempLocation        : 
'gs://temp-storage-for-perf-tests/loadtests',
                         publishToBigQuery   : true,
-                        bigQueryDataset     : 'load_test_PRs',
-                        bigQueryTable       : 'dataflow_gbk_small',
-                        sourceOptions       : 
'{"numRecords":1000000000,"splitPointFrequencyRecords":1,"keySizeBytes":1,"valueSizeBytes":9,"numHotKeys":0,"hotKeyFraction":0,"seed":123456,"bundleSizeDistribution":{"type":"const","const":42},"forceNumInitialBundles":100,"progressShape":"LINEAR","initializeDelayDistribution":{"type":"const","const":42}}',
-                        stepOptions         : 
'{"outputRecordsPerInputRecord":1,"preservesInputKeyDistribution":true,"perBundleDelay":10000,"perBundleDelayType":"MIXED","cpuUtilizationInMixedDelay":0.5}',
-                        fanout              : 10,
+                        bigQueryDataset     : 'load_test',
+                        bigQueryTable       : 'java_dataflow_batch_GBK_1',
+                        sourceOptions       : """
+                                            {
+                                              "numRecords": 200000000,
+                                              "keySizeBytes": 1,
+                                              "valueSizeBytes": 9
+                                            }
+                                       """.trim().replaceAll("\\s", ""),
+                        fanout              : 1,
                         iterations          : 1,
-                        maxNumWorkers       : 32,
+                        maxNumWorkers       : 5,
+                        numWorkers          : 5,
+                        autoscalingAlgorithm: "NONE"
                 ]
-
         ],
-]
-
-for (testConfiguration in loadTestConfigurations) {
-    PhraseTriggeringPostCommitBuilder.postCommitJob(
-            testConfiguration.jobName,
-            testConfiguration.prTriggerPhrase,
-            testConfiguration.prCommitStatusName,
-            this
-    ) {
-        description(testConfiguration.jobDescription)
-        commonJobProperties.setTopLevelMainJobProperties(delegate, 'master', 
240)
-        loadTestsBuilder.loadTest(delegate, testConfiguration.jobDescription, 
testConfiguration.runner, testConfiguration.jobProperties, 
testConfiguration.itClass)
-    }
-}
-
-def smokeTestConfigurations = [
         [
-                title        : 'GroupByKey load test Direct',
+                title        : 'Load test: 2GB of 100B records',
                 itClass      : 
'org.apache.beam.sdk.loadtests.GroupByKeyLoadTest',
-                runner       : CommonTestProperties.Runner.DIRECT,
-                jobProperties: [
-                        publishToBigQuery: true,
-                        bigQueryDataset  : 'load_test_SMOKE',
-                        bigQueryTable    : 'direct_gbk',
-                        sourceOptions    : 
'{"numRecords":100000,"splitPointFrequencyRecords":1}',
-                        stepOptions      : 
'{"outputRecordsPerInputRecord":1,"preservesInputKeyDistribution":true}',
-                        fanout           : 10,
-                        iterations       : 1,
+                runner       : CommonTestProperties.Runner.DATAFLOW,
+                jobProperties     : [
+                        project             : 'apache-beam-testing',
+                        appName             : 
'load_tests_Java_Dataflow_Batch_GBK_2',
+                        tempLocation        : 
'gs://temp-storage-for-perf-tests/loadtests',
+                        publishToBigQuery   : true,
+                        bigQueryDataset     : 'load_test',
+                        bigQueryTable       : 'java_dataflow_batch_GBK_2',
+                        sourceOptions       : """
+                                            {
+                                              "numRecords": 20000000,
+                                              "keySizeBytes": 10,
+                                              "valueSizeBytes": 90
+                                            }
+                                       """.trim().replaceAll("\\s", ""),
+                        fanout              : 1,
+                        iterations          : 1,
+                        maxNumWorkers       : 5,
+                        numWorkers          : 5,
+                        autoscalingAlgorithm: "NONE"
                 ]
         ],
         [
-                title        : 'GroupByKey load test Dataflow',
+                title        : 'Load test: 2GB of 100kB records',
                 itClass      : 
'org.apache.beam.sdk.loadtests.GroupByKeyLoadTest',
                 runner       : CommonTestProperties.Runner.DATAFLOW,
-                jobProperties: [
-                        project          : 'apache-beam-testing',
-                        tempLocation     : 
'gs://temp-storage-for-perf-tests/smoketests',
-                        publishToBigQuery: true,
-                        bigQueryDataset  : 'load_test_SMOKE',
-                        bigQueryTable    : 'dataflow_gbk',
-                        sourceOptions    : 
'{"numRecords":100000,"splitPointFrequencyRecords":1}',
-                        stepOptions      : 
'{"outputRecordsPerInputRecord":1,"preservesInputKeyDistribution":true}',
-                        fanout           : 10,
-                        iterations       : 1,
+                jobProperties     : [
+                        project             : 'apache-beam-testing',
+                        appName             : 
'load_tests_Java_Dataflow_Batch_GBK_3',
+                        tempLocation        : 
'gs://temp-storage-for-perf-tests/loadtests',
+                        publishToBigQuery   : true,
+                        bigQueryDataset     : 'load_test',
+                        bigQueryTable       : 'java_dataflow_batch_GBK_3',
+                        sourceOptions       : """
+                                            {
+                                              "numRecords": 2000,
+                                              "keySizeBytes": 100000,
+                                              "valueSizeBytes": 900000
+                                            }
+                                       """.trim().replaceAll("\\s", ""),
+                        fanout              : 1,
+                        iterations          : 1,
+                        maxNumWorkers       : 5,
+                        numWorkers          : 5,
+                        autoscalingAlgorithm: "NONE"
                 ]
         ],
         [
-                title        : 'GroupByKey load test Flink',
+                title        : 'Load test: fanout 4 times with 2GB 10-byte 
records total',
                 itClass      : 
'org.apache.beam.sdk.loadtests.GroupByKeyLoadTest',
-                runner       : CommonTestProperties.Runner.FLINK,
-                jobProperties: [
-                        publishToBigQuery: true,
-                        bigQueryDataset  : 'load_test_SMOKE',
-                        bigQueryTable    : 'flink_gbk',
-                        sourceOptions    : 
'{"numRecords":100000,"splitPointFrequencyRecords":1}',
-                        stepOptions      : 
'{"outputRecordsPerInputRecord":1,"preservesInputKeyDistribution":true}',
-                        fanout           : 10,
-                        iterations       : 1,
+                runner       : CommonTestProperties.Runner.DATAFLOW,
+                jobProperties     : [
+                        project             : 'apache-beam-testing',
+                        appName             : 
'load_tests_Java_Dataflow_Batch_GBK_4',
+                        tempLocation        : 
'gs://temp-storage-for-perf-tests/loadtests',
+                        publishToBigQuery   : true,
+                        bigQueryDataset     : 'load_test',
+                        bigQueryTable       : 'java_dataflow_batch_GBK_4',
+                        sourceOptions       : """
+                                            {
+                                              "numRecords": 5000000,
+                                              "keySizeBytes": 10,
+                                              "valueSizeBytes": 90
+                                            }
+                                       """.trim().replaceAll("\\s", ""),
+                        fanout              : 4,
+                        iterations          : 1,
+                        maxNumWorkers       : 16,
+                        numWorkers          : 16,
+                        autoscalingAlgorithm: "NONE"
                 ]
         ],
         [
-                title        : 'GroupByKey load test Spark',
+                title        : 'Load test: fanout 8 times with 2GB 10-byte 
records total',
                 itClass      : 
'org.apache.beam.sdk.loadtests.GroupByKeyLoadTest',
-                runner       : CommonTestProperties.Runner.SPARK,
-                jobProperties: [
-                        sparkMaster      : 'local[4]',
-                        publishToBigQuery: true,
-                        bigQueryDataset  : 'load_test_SMOKE',
-                        bigQueryTable    : 'spark_gbk',
-                        sourceOptions    : 
'{"numRecords":100000,"splitPointFrequencyRecords":1}',
-                        stepOptions      : 
'{"outputRecordsPerInputRecord":1,"preservesInputKeyDistribution":true}',
-                        fanout           : 10,
-                        iterations       : 1,
+                runner       : CommonTestProperties.Runner.DATAFLOW,
+                jobProperties     : [
+                        project             : 'apache-beam-testing',
+                        appName             : 
'load_tests_Java_Dataflow_Batch_GBK_5',
+                        tempLocation        : 
'gs://temp-storage-for-perf-tests/loadtests',
+                        publishToBigQuery   : true,
+                        bigQueryDataset     : 'load_test',
+                        bigQueryTable       : 'java_dataflow_batch_GBK_5',
+                        sourceOptions       : """
+                                            {
+                                              "numRecords": 2500000,
+                                              "keySizeBytes": 10,
+                                              "valueSizeBytes": 90
+                                            }
+                                       """.trim().replaceAll("\\s", ""),
+                        fanout              : 8,
+                        iterations          : 1,
+                        maxNumWorkers       : 16,
+                        numWorkers          : 16,
 
 Review comment:
   Yes. The idea is to run fanout 8 with twice as much records as for fanout 4 
and see if the execution time is the same. I did some runs and I confirmed this 
happening on Dataflow. 
   
   Thanks for staying alert, Kasia. :)
 
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
-------------------

    Worklog Id:     (was: 209490)
    Time Spent: 15h 50m  (was: 15h 40m)

> Create jenkins jobs to run the load tests for Java SDK
> ------------------------------------------------------
>
>                 Key: BEAM-5985
>                 URL: https://issues.apache.org/jira/browse/BEAM-5985
>             Project: Beam
>          Issue Type: Sub-task
>          Components: testing
>            Reporter: Lukasz Gajowy
>            Assignee: Kasia Kucharczyk
>            Priority: Major
>          Time Spent: 15h 50m
>  Remaining Estimate: 0h
>
> How/how often/in what cases we run those tests is yet to be decided (this is 
> part of the task)



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to