upgrade new version Author: ahutsunshine <ahutsunsh...@gmail.com> Author: He Wang <wanghe...@qq.com>
Closes #185 from ahutsunshine/master. Project: http://git-wip-us.apache.org/repos/asf/incubator-griffin/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-griffin/commit/e7e4c3a7 Tree: http://git-wip-us.apache.org/repos/asf/incubator-griffin/tree/e7e4c3a7 Diff: http://git-wip-us.apache.org/repos/asf/incubator-griffin/diff/e7e4c3a7 Branch: refs/heads/master Commit: e7e4c3a76cf6b66aee8ea46639de8e5210ec0a59 Parents: 9c586ee Author: ahutsunshine <ahutsunsh...@gmail.com> Authored: Tue Jan 9 17:43:16 2018 +0800 Committer: Lionel Liu <bhlx3l...@163.com> Committed: Tue Jan 9 17:43:16 2018 +0800 ---------------------------------------------------------------------- griffin-doc/Testcase.md | 78 - griffin-doc/docker/griffin-docker-guide.md | 105 + griffin-doc/docker/measure-demo-docker.md | 63 + griffin-doc/dockerUIguide.md | 50 - griffin-doc/dsl-guide.md | 181 -- griffin-doc/griffin-docker-guide.md | 105 - griffin-doc/measure-batch-sample.md | 140 - griffin-doc/measure-configuration-guide.md | 211 -- griffin-doc/measure-demo-docker.md | 63 - griffin-doc/measure-streaming-sample-old.md | 204 -- griffin-doc/measure/dsl-guide.md | 181 ++ griffin-doc/measure/measure-batch-sample.md | 140 + .../measure/measure-configuration-guide.md | 211 ++ .../measure/measure-streaming-sample-old.md | 204 ++ griffin-doc/measure/measures.md | 173 ++ griffin-doc/measures.md | 173 -- griffin-doc/postman/griffin.json | 2642 ------------------ griffin-doc/postman/griffin_environment.json | 16 - griffin-doc/service/postman/griffin.json | 2635 +++++++++++++++++ .../service/postman/griffin_environment.json | 16 + griffin-doc/ui/dockerUIguide.md | 50 + griffin-doc/ui/test-case.md | 78 + griffin-doc/ui/user-guide.md | 193 ++ griffin-doc/userguide.md | 193 -- service/pom.xml | 23 +- .../apache/griffin/core/config/CacheConfig.java | 14 + .../griffin/core/config/PropertiesConfig.java | 96 + .../griffin/core/config/SchedulerConfig.java | 60 + .../AutowiringSpringBeanJobFactory.java | 54 - .../core/config/jobConfig/SchedulerConfig.java | 59 - .../core/config/jobConfig/SparkJobConfig.java | 34 - .../griffin/core/job/FileExistPredicator.java | 67 + .../apache/griffin/core/job/JobController.java | 43 +- .../apache/griffin/core/job/JobInstance.java | 282 ++ .../org/apache/griffin/core/job/JobService.java | 20 +- .../apache/griffin/core/job/JobServiceImpl.java | 629 +++-- .../org/apache/griffin/core/job/Predicator.java | 26 + .../apache/griffin/core/job/SparkSubmitJob.java | 331 +-- .../griffin/core/job/entity/AbstractJob.java | 88 + .../griffin/core/job/entity/GriffinJob.java | 79 + .../griffin/core/job/entity/JobDataBean.java | 98 + .../griffin/core/job/entity/JobDataSegment.java | 81 + .../griffin/core/job/entity/JobHealth.java | 2 + .../griffin/core/job/entity/JobInstance.java | 111 - .../core/job/entity/JobInstanceBean.java | 156 ++ .../griffin/core/job/entity/JobRequestBody.java | 114 - .../griffin/core/job/entity/JobSchedule.java | 188 ++ .../griffin/core/job/entity/LivyConf.java | 148 + .../core/job/entity/LivySessionStates.java | 27 +- .../core/job/entity/SegmentPredicate.java | 81 + .../griffin/core/job/entity/SegmentRange.java | 53 + .../griffin/core/job/entity/SparkJobDO.java | 148 - .../griffin/core/job/entity/VirtualJob.java | 34 + .../factory/AutowiringSpringBeanJobFactory.java | 54 + .../core/job/factory/PredicatorFactory.java | 38 + .../griffin/core/job/repo/GriffinJobRepo.java | 25 + .../core/job/repo/JobDataSegmentRepo.java | 26 + .../griffin/core/job/repo/JobInstanceRepo.java | 40 +- .../apache/griffin/core/job/repo/JobRepo.java | 40 + .../griffin/core/job/repo/JobScheduleRepo.java | 26 + .../griffin/core/job/repo/VirtualJobRepo.java | 25 + .../measure/ExternalMeasureOperationImpl.java | 102 + .../measure/GriffinMeasureOperationImpl.java | 114 + .../griffin/core/measure/MeasureController.java | 8 +- .../griffin/core/measure/MeasureOperation.java | 34 + .../core/measure/MeasureOrgController.java | 10 - .../griffin/core/measure/MeasureOrgService.java | 2 +- .../core/measure/MeasureOrgServiceImpl.java | 22 +- .../griffin/core/measure/MeasureService.java | 3 +- .../core/measure/MeasureServiceImpl.java | 96 +- .../core/measure/entity/DataConnector.java | 100 +- .../griffin/core/measure/entity/DataSource.java | 11 +- .../core/measure/entity/EvaluateRule.java | 3 +- .../core/measure/entity/ExternalMeasure.java | 71 + .../core/measure/entity/GriffinMeasure.java | 115 + .../griffin/core/measure/entity/Measure.java | 85 +- .../griffin/core/measure/entity/Rule.java | 40 +- .../core/measure/repo/DataConnectorRepo.java | 9 +- .../core/measure/repo/EvaluateRuleRepo.java | 1 - .../core/measure/repo/ExternalMeasureRepo.java | 25 + .../core/measure/repo/GriffinMeasureRepo.java | 25 + .../griffin/core/measure/repo/MeasureRepo.java | 19 +- .../hive/HiveMetaStoreServiceImpl.java | 32 +- .../griffin/core/metric/MetricController.java | 39 +- .../griffin/core/metric/MetricService.java | 15 +- .../griffin/core/metric/MetricServiceImpl.java | 71 +- .../apache/griffin/core/metric/MetricStore.java | 33 + .../griffin/core/metric/MetricStoreImpl.java | 90 + .../griffin/core/metric/model/Metric.java | 82 + .../griffin/core/metric/model/MetricValue.java | 64 + .../org/apache/griffin/core/util/FSUtil.java | 161 ++ .../org/apache/griffin/core/util/JsonUtil.java | 31 +- .../griffin/core/util/PropertiesUtil.java | 17 +- .../org/apache/griffin/core/util/TimeUtil.java | 119 + .../src/main/resources/Init_quartz_derby.sql | 187 ++ .../src/main/resources/application.properties | 34 +- service/src/main/resources/sparkJob.properties | 23 +- .../griffin/core/job/JobControllerTest.java | 311 +-- .../core/job/JobInstanceBeanRepoTest.java | 87 + .../griffin/core/job/JobInstanceRepoTest.java | 107 - .../griffin/core/job/JobServiceImplTest.java | 770 +++-- .../griffin/core/job/SparkSubmitJobTest.java | 121 +- .../core/measure/MeasureControllerTest.java | 401 ++- .../core/measure/MeasureOrgControllerTest.java | 22 - .../core/measure/MeasureOrgServiceImplTest.java | 196 +- .../core/measure/MeasureServiceImplTest.java | 124 +- .../griffin/core/measure/MeasureTestHelper.java | 26 +- .../core/measure/repo/MeasureRepoTest.java | 170 +- .../core/metric/MetricControllerTest.java | 65 - .../core/metric/MetricServiceImplTest.java | 62 - .../griffin/core/util/GriffinUtilTest.java | 12 +- .../apache/griffin/core/util/TimeUtilTest.java | 27 + 112 files changed, 9216 insertions(+), 6703 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-griffin/blob/e7e4c3a7/griffin-doc/Testcase.md ---------------------------------------------------------------------- diff --git a/griffin-doc/Testcase.md b/griffin-doc/Testcase.md deleted file mode 100644 index 3b703eb..0000000 --- a/griffin-doc/Testcase.md +++ /dev/null @@ -1,78 +0,0 @@ -<!-- -Licensed to the Apache Software Foundation (ASF) under one -or more contributor license agreements. See the NOTICE file -distributed with this work for additional information -regarding copyright ownership. The ASF licenses this file -to you under the Apache License, Version 2.0 (the -"License"); you may not use this file except in compliance -with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, -software distributed under the License is distributed on an -"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -KIND, either express or implied. See the License for the -specific language governing permissions and limitations -under the License. ---> -#Functional Test cases - - -|TestCase ID|Test Page|Test Case Description|Test Case Steps|Test Data|Expected Result|Actual Result|Test Result|Jira Bug ID| -|---|---|---|---|---|---|---|---|---| -|0101|login page|invalid corp id - check user cannot login the system with invalid corp id.|1. Input invalid corp id.<br>2. Input password.<br>3. click 'log in' button.||1. login failed.||Passed|| -|0102|login page|invalid password - check user cannot login the system with invalid password.|1. input valid corp id.<br>2.input invalid password<br>3.click 'log in' button.||1. login failed.||Passed|| -|0103|login page|valid corp id and passoword - check user can login the system with valid corp id and password.|1. Input the corp id and password.<br>2 click 'log in' button.||1. login succesfully||Passed|| -|0104|login page|remember password|1. Input the corp id and password.<br>2. select 'remember password'.<br>3.click 'log in' button.<br>4. close the brower.<br>5. open the brower again.<br>6. visit the griffin page.||1.the id and password are valid.<br>2.'remember password' is checked.<br>3.logged in the griffin homepage.<br>4.the brower is closed.<br>5.the brower is reopened.<br>6.the griffin homepage is opened, instead of the login page.||Passed|| -|0105|login page|not remember password|1. Input the corp id and password.<br>2. unselect 'remember password'.<br>3.click 'log in' button.<br>4. close the brower.<br>5. open the brower again.<br>6. visit the griffin page.||1.the id and password are valid.<br>2.'remember password' is unchecked.<br>3.logged in the griffin homepage.<br>4.the brower is closed.<br>5.the brower is reopened.<br>6.the login page is opened.||Passed|| -|0201|main page|menu bar - check all links in the menu work.|1. click 'health'.<br>2. click 'models'.<br>3.click 'Data profiling'.<br>4. click your username -> 'API Docs'.||1.show 'health' page.<br>2.show 'models' page.<br>3.show 'data profiling' page<br>4.open new page for API page.||Passed|| -|0202|main page|menu bar - search|1.input a word in the search box.<br>2.do search.||1. show search result.|unimplemented||| -|0203|main page|menu bar - user profile|1. click username -> 'user profile'||1. show user profile page|unimplemented||| -|0204|main page|menu bar - setting|1. click username -> 'setting'||1. show setting page.|unimplemented||| -|0205|main page|right side - DataAssets|1. click '*** DataAssets' link||1.show the data assets page.||Passed|| -|0206|main page|right side - DQ Metrics|1. click '*** DQ Metrics' link.||1. show DQ Metrics page||Passed|| -|0207|main page|right side - health percentage |1. check the pie for the health percentage.||1. show the health percentage.||Passed|| -|0208|main page|right side - issue tracking|1. click 'issue tracking'||1. show 'issue tracking' page|unimplemented||| -|0209|main page|right side - statistics for the DQ data.|1. check the DQ data with the name, last updated time, and the data quality.<br>2. show more for one item, check the dq trend chart. <br>3. click the chart.<br>4. close the zoomed-in chart.||1.show all the dq data.<br>2.show the latest dq trend chart for the item.<br>3.the dq chart is zoomed in.<br>4.the zoomed-in chart is closed.||Passed|| -|0210|main page|right side - report issue.|1. click 'Report issue'||1. open the jira page.||Passed|| -|0301|health page|heatmap|1. open 'heatmap' tab.<br>2. check the data quality metrics heatmap.<br>3. click inside the heatmap.||1.show the heatmap.<br>2.all the data are shown successfully.<br>3.show the metrics page.||Passed|| -|0302|health page|Topology|1. open 'Topology' tab.<br>2. check the data.||1. show topology.|unimplemented||| -|0303|health page|check the UI layout when the page is zoomed in and out.|1.zoom in the page.<br>2.zoom out the page.||1. display the page correctly.||Passed|| -|0401|metrics page|check metrics data|1. check the dq charts for the metrics.<br>2. click one chart.||1. all the data in the dq charts are correct.<br>2. the chart is zoomed in.||Passed|| -|0402|metrics page|Download Sample|1. click 'download sample'.||1. the sample is downloaded to the local path.|unimplemented||| -|0403|metrics page|Profiling|1. click 'profiling'||1. show 'profiling'|unimplemented||| -|0404|metrics page|check the UI layout when the page is zoomed in and out.|1.zoom in the page.<br>2.zoom out the page.||1. display the page correctly.||Passed|| -|0501|models page|check the models data|1. check all the columns are correct or not.<br>2. click one model name.||1. all the data are correct.<br>2. show more information of the model.||Passed|| -|0502|models page|edit model|1. click 'edit' icon.||1. open the edit page.|unimplemented||| -|0503|models page|delete model|1. click 'delete' icon for one model.<br>2. confirm to delete the model.||1. open delete confirmation page.<br>2. the model is removed from the models table.||Passed|| -|0504|models page|subscribe|1. click 'subscribe' icon for one model.||1. open subscribe page|unimplemented||| -|0505|models page|table paging|1. click other pages in the models table.||1.all the data in other pages are show correctly.||Passed|| -|0506|models page|create DQ model|1. click 'create DQ model' button||1. open 'create DQ model' page.||Passed|| -|0507|models page|check the UI layout when the page is zoomed in and out.|1.zoom in the page.<br>2.zoom out the page.||1. display the page correctly.||Passed|| -|0601|create dq model - accuracy|create accuracy|1. click 'models' -> 'create DQ model' -> 'Accuracy'<br>2.choose the source. Select a schema, e.g. 'apollo -> Sojorner -> sog_search_event'.<br>3.select one or more attributes, e.g. uid, site_id.<br>4. click 'next'.<br>5. choose the target. Select a schema, e.g. 'appollo -> Bullseye -> adchoice_user_pref'.<br>6. select one or more attributes, e.g. user_id, scope.<br>7. click 'next'.<br>8. select a primary key, e.g. Bullseye.achoice_user_pref.user_id.<br>9. select 'Map To' exactly.<br>10. select a source field for each target.<br>11. click 'next'.<br>12. input the required information, e.g. model name 'atest', notification email 'a...@ebay.com'.<br>13.click 'submit'.<br>14. confirm to save.|source schema: 'apollo -> Sojorner -> sog_search_event'.<br>Source attributes: uid, site_id.<br>target schema: 'appollo -> Bullseye -> adchoice_user_pref'.<br>target attributes, e.g. user_id, scope.<br>primary key: Bullseye.achoice_user_pref.user_id.< br>model name: 'atest', <br>notification email: 'a...@ebay.com'.|1. open 'create accuracy' page.<br>2. the source shcema is selected. The corresponding attributes are shown in the attributes table.<br>3. the source attributes are selected.<br>4. go to 'choose target' step.<br>5. the target schema is selected. The corresponding attributes are shown in the attributes table.<br>6. the target attributes are selected.<br>7. go to 'mapping source and target' step.<br>8. the PK is selected.<br>9. exactly map to the source.<br>10. the source field is selected for each target.<br>11. go to 'configuration' step.<br>12. the required info are input correctly.<br>13. open a confirmation page.<br>14. the new model 'atest' is created. It is shown in the models table||Passed|| -|0602|create dq model - accuracy|show error message if no source attribute is selected.|1. click 'models' -> 'create DQ model' -> 'Accuracy'.<br>2. click 'next'||1. open 'create accuracy' page.<br>2. show error message to select at least one attribute.||Passed|| -|0603|create dq model - accuracy|show error message if no target attribute is selected.|1. click 'models' -> 'create DQ model' -> 'Accuracy'<br>2.choose the source. Select a schema, e.g. 'apollo -> Sojorner -> sog_search_event'.<br>3.select one or more attributes, e.g. uid, site_id.<br>4. click 'next'.<br>5. in the 'target' step, click 'next'.|source schema: 'apollo -> Sojorner -> sog_search_event'.<br>Source attributes: uid, site_id.|"1. open 'create accuracy' page.<br>2. the source shcema is selected. The corresponding attributes are shown in the attributes table.<br>3. the source attributes are selected.<br>4. go to 'choose target' step.<br>5. show error message to select at least one attribute.||Passed|| -|0604|create dq model - accuracy|show error message if 'map fields' is not set.|1. click 'models' -> 'create DQ model' -> 'Accuracy'<br>2.choose the source. Select a schema, e.g. 'apollo -> Sojorner -> sog_search_event'.<br>3.select one or more attributes, e.g. uid, site_id.<br>4. click 'next'.<br>5. choose the target. Select a schema, e.g. 'appollo -> Bullseye -> adchoice_user_pref'.<br>6. select one or more attributes, e.g. user_id, scope.<br>7. click 'next'.<br>8. no selection. click 'next'.<br>9. select a primary key. click 'next'.|source schema: 'apollo -> Sojorner -> sog_search_event'.<br>Source attributes: uid, site_id.<br>target schema: 'appollo -> Bullseye -> adchoice_user_pref'.<br>target attributes, e.g. user_id, scope.<br>primary key: Bullseye.achoice_user_pref.user_id.|1. open 'create accuracy' page.<br>2. the source shcema is selected. The corresponding attributes are shown in the attributes table.<br>3. the source attributes are selected.<br>4. go to 'choose target' s tep.<br>5. the target schema is selected. The corresponding attributes are shown in the attributes table.<br>6. the target attributes are selected.<br>7. go to 'mapping source and target' step.<br>8. no PK is selected.<br>9. show error message.||Passed|| -|0605|create dq model - accuracy|show error if the configuration is invalid|1. click 'models' -> 'create DQ model' -> 'Accuracy'<br>2.choose the source. Select a schema, e.g. 'apollo -> Sojorner -> sog_search_event'.<br>3.select one or more attributes, e.g. uid, site_id.<br>4. click 'next'.<br>5. choose the target. Select a schema, e.g. 'appollo -> Bullseye -> adchoice_user_pref'.<br>6. select one or more attributes, e.g. user_id, scope.<br>7. click 'next'.<br>8. select a primary key, e.g. Bullseye.achoice_user_pref.user_id.<br>9. select 'Map To' exactly.<br>10. select a source field for each target.<br>11. click 'next'.<br>12. input invalid value for each field, e.g. model name 'a test', notification email 'aa'.|source schema: 'apollo -> Sojorner -> sog_search_event'.<br>Source attributes: uid, site_id.<br>target schema: 'appollo -> Bullseye -> adchoice_user_pref'.<br>target attributes, e.g. user_id, scope.<br>primary key: Bullseye.achoice_user_pref.user_id.<br>model name: 'a test' , <br>notification email: 'aa'.|1. open 'create accuracy' page.<br>2. the source shcema is selected. The corresponding attributes are shown in the attributes table.<br>3. the source attributes are selected.<br>4. go to 'choose target' step.<br>5. the target schema is selected. The corresponding attributes are shown in the attributes table.<br>6. the target attributes are selected.<br>7. go to 'mapping source and target' step.<br>8. the PK is selected.<br>9. exactly map to the source.<br>10. the source field is selected for each target.<br>11. go to 'configuration' step.<br>12. show error for invalid value.||Passed|| -|0606|create dq model - accuracy|check the link to add new data asset.|1. click the link for adding new data asset.||1. go to the 'register data asset' page.||Passed|| -|0607|create dq model - accuracy|check the UI layout for all the steps when the page is zoomed in and out.|1.zoom in the page.<br>2.zoom out the page.||1.all the steps in the page can be shown correctly.||Passed|| -|0701|create dq model - validity|check dq model with validity type can be created.|1. click 'models' -> 'create DQ model' -> Validity<br>2.choose the target. Select a schema, e.g. 'apollo -> Sojorner -> sog_search_event'.<br>3.select one attribute, e.g. uid.<br>4. click 'next'.<br>5. choose one validity model, e.g. unique count.<br>6. click 'next'.<br>7. input valid value for the configuration fields. e.g. model name 'avalidity', email 'a...@b.com'.<br>8. click 'submit'<br>9. click 'save'.|schema: 'apollo -> Sojorner -> sog_search_event'.<br>Attribute: uid.<br>validity model: unique count.<br>model name: 'a validity', <br>email: 'aa'.|1. open 'create validity' page.<br>2. the target schem is selected. The corresponding attributes are shown in the attributes table.<br>3. the attribute is selected.<br>4. go to 'select model' page.<br>5. the validity model is selected. The description of the model is shown as well.<br>6. go to 'configuration' step.<br>7. all the value are valid.<br>8. op en a confirmation page.<br>9. the new model 'avalidity' is created successfully. it is shown in the models page.||Passed|| -|0702|create dq model - validity|show error if no target is selected.|1. click 'models' -> 'create DQ model' -> Validity<br>2. not choose the target.<br>3. click 'next'.||1. open 'create validity' page.<br>2. no target schem is selected.<br>3. show error.||Passed|| -|0703|create dq model - validity|show error if any field is invalid.|1. click 'models' -> 'create DQ model' -> Validity<br>2.choose the target. Select a schema, e.g. 'apollo -> Sojorner -> sog_search_event'.<br>3.select one attribute, e.g. uid.<br>4. click 'next'.<br>5. choose one validity model, e.g. unique count.<br>6. click 'next'.<br>7. input invalid value for the configuration fields.|schema: 'apollo -> Sojorner -> sog_search_event'.<br>validity model: unique count.<br>Attribute: uid.<br>model name: 'a validity', <br>email: 'aa'.|1. open 'create validity' page.<br>2. the target schem is selected. The corresponding attributes are shown in the attributes table.<br>3. the attribute is selected.<br>4. go to 'select model' page.<br>5. the validity model is selected. The description of the model is shown as well.<br>6. go to 'configuration' step.<br>7. show error for the invalid value.||Passed|| -|0704|create dq model - validity|check the UI layout for all the steps when the page is zoomed in and out.|1.zoom in the page.<br>2.zoom out the page.||1.all the steps in the page can be shown correctly.||Passed|| -|0801|create dq model - anomaly detection|check the dq model with anomaly detection can be created.|1. click 'models' -> 'create DQ model' -> Validity<br>2.choose the target. Select a schema, e.g. 'apollo -> Sojorner -> sog_search_event'.<br>3. click 'next'.<br>4. choose one statistical techniques, e.g. bollinger bands detection.<br>5. click 'next'.<br>6. input valid value for the configuration fields. e.g. model name 'anomaly', email 'a...@b.com'.<br>7. click 'submit'<br>8. click 'save'.|schema: 'apollo -> Sojorner -> sog_search_event'.<br>statistical techniques: bollinger bands detection.<br>model name 'anomaly', <br>email 'a...@b.com'.|1. open 'create validity' page.<br>2. the target schem is selected. The corresponding attributes are shown in the attributes table.<br>3. go to 'select model' page.<br>4. the validity model is selected. The description of the model is shown as well.<br>5. go to 'configuration' step.<br>6. all the value are valid.<br>7. open a confirmation page.<br>8. t wo new models, 'anomaly' with 'anomaly detection' type, and 'Count_anomaly_1' with 'validity' type are created successfully. They are shown in the models page.||Passed|| -|0802|create dq model - anomaly detection|show error if no target is selected.|1. click 'models' -> 'create DQ model' -> Validity<br>2. not choose the target.<br>3. click 'next'.||1. open 'create validity' page.<br>2. no target schem is selected.<br>3. show error.||Passed|| -|0803|create dq model - anomaly detection|show error if any field is invalid.|1. click 'models' -> 'create DQ model' -> Validity<br>2.choose the target. Select a schema, e.g. 'apollo -> Sojorner -> sog_search_event'.<br>3. click 'next'.<br>4. choose one statistical techniques, e.g. bollinger bands detection.<br>5. click 'next'.<br>6. input invalid value for the configuration fields.|schema: 'apollo.Sojorner. sog_search_event'<br>model name: 'a nomaly', <br>email: 'aa'.|1. open 'create validity' page.<br>. the target schem is selected. The corresponding attributes are shown in the attributes table.<br>3. go to 'select model' page.<br>4. the validity model is selected. The description of the model is shown as well.<br>5. go to 'configuration' step.<br>6. show error for the invalid value.||Passed|| -|0804|create dq model - anomaly detection|check the UI layout for all the steps when the page is zoomed in and out.|1.zoom in the page.<br>2.zoom out the page.||1.all the steps in the page can be shown correctly.||Passed|| -|0901|create dq model - publish DQ data directly|check the dq model with publish type can be created.|1. click 'models' -> 'create DQ model' -> publish DQ data directly.<br>2.input valid value for the configuration fields.<br>3. click 'submit'<br>4. click 'save'.|model name 'apu', <br>organization 'hadoop', <br>email 'a...@b.com'.|1. open 'create validity' page.<br>2. all the value are valid.<br>3. open a confirmation page.<br>4. the new model 'apu' is created successfully. It is shown in the models page.||Passed|| -|0902|create dq model - publish DQ data directly|show error if any field is invalid.|1. click 'models' -> 'create DQ model' -> publish DQ data directly.<br>2.input invalid value for the configuration fields. |model name 'a pu', email 'aa'.|1. open 'create validity' page.<br>2. show error for the invalid value.||Passed|| -|0903|create dq model - publish DQ data directly|check the UI layout for all the steps when the page is zoomed in and out.|1.zoom in the page.<br>2.zoom out the page.||1.all the steps in the page can be shown correctly.||Passed|| -|1001|data assets|check the data assets information|1. check all the columns are correct or not.<br>2. show more for an asset.||1. all the data are correct.<br>2. show the schemas of the asset.||Passed|| -|1002|data assets|edit asset|1. click 'edit' icon for an asset, e.g. 'abc'.<br>2. edit the schema description and sample.<br>3. click 'submit'.<br>4. confirm to save.<br>5. in the asset table, show more for the asset 'abc'.||1. open the edit page.<br>2. the schema description and sample are valid.<br>3. open a confirmation page.<br>4. the asset info are saved.<br>5. the schema info are updated.||Passed|| -|1003|data assets|delete asset|1. click 'delete' icon for an asset, e.g. 'abc'.<br>2. confirm to delete the asset.||1. open delete confirmation page.<br>2. the asset is removed from the table.||Passed|| -|1004|data assets|table paging|1. click other pages in the table.||1.all the data in other pages are show correctly.||Passed|| -|1005|data assets|check the UI layout when the page is zoomed in and out.|1.zoom in the page.<br>2.zoom out the page.||1. display the page correctly.||Passed|| -|1101|register data asset|check data asset can be registered.|1. click 'register data asset' in the 'data assets' page.<br>2. input valid value.<br>3. click 'submit'.<br>4. confirm to save.|asset name: 'atest', <br>type: 'hdfsfile',<br>HDFS path: '/var', <br>data folder pattern: '16-06-01',<br>platform: 'Apollo',<br>organization: 'GPS',<br>schema: name 'dmg', type 'string'|1. open 'register data asset' page.<br>2. all the value are valid.<br>3. open a confirmation page.<br>4. the new asset is registered successfully. It is shown in the assets table.||Passed|| -|1102|register data asset|show error if any field is invalid.|1. click 'register data asset' in the 'data assets' page.<br>2. input some invalid value.<br>3. click 'submit'.|asset name: 'a test', <br>type: 'hdfsfile',<br>HDFS path: '/var', <br>data folder pattern: '16-06-01',<br>platform: 'Apollo',<br>organization: null,<br>schema: name 'dmg', type 'string',|1. open 'register data asset' page.<br>2. some value are invalid.<br>3. show error for the invalid value.||Passed|| -|1103|register data asset|check the UI layout when the page is zoomed in and out.|1.zoom in the page.<br>2.zoom out the page.||1. display the page correctly.||Passed|| http://git-wip-us.apache.org/repos/asf/incubator-griffin/blob/e7e4c3a7/griffin-doc/docker/griffin-docker-guide.md ---------------------------------------------------------------------- diff --git a/griffin-doc/docker/griffin-docker-guide.md b/griffin-doc/docker/griffin-docker-guide.md new file mode 100644 index 0000000..1fb5980 --- /dev/null +++ b/griffin-doc/docker/griffin-docker-guide.md @@ -0,0 +1,105 @@ +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +# Apache Griffin Docker Guide +Griffin docker images are pre-built on docker hub, users can pull them to try griffin in docker. + +## Preparation + +### Environment preparation +1. Install [docker](https://docs.docker.com/engine/installation/) and [docker compose](https://docs.docker.com/compose/install/). +2. Increase vm.max_map_count of your local machine, to use elasticsearch. + ``` + sysctl -w vm.max_map_count=262144 + ``` +3. Pull griffin pre-built docker images. + ``` + docker pull bhlx3lyx7/svc_msr:0.1.6 + docker pull bhlx3lyx7/elasticsearch + docker pull bhlx3lyx7/kafka + docker pull zookeeper:3.5 + ``` + Or you can pull the images faster through mirror acceleration if you are in China. + ``` + docker pull registry.docker-cn.com/bhlx3lyx7/svc_msr:0.1.6 + docker pull registry.docker-cn.com/bhlx3lyx7/elasticsearch + docker pull registry.docker-cn.com/bhlx3lyx7/kafka + docker pull registry.docker-cn.com/zookeeper:3.5 + ``` + The docker images are the griffin environment images. + - `bhlx3lyx7/svc_msr`: This image contains mysql, hadoop, hive, spark, livy, griffin service, griffin measure, and some prepared demo data, it works as a single node spark cluster, providing spark engine and griffin service. + - `bhlx3lyx7/elasticsearch`: This image is based on official elasticsearch, adding some configurations to enable cors requests, to provide elasticsearch service for metrics persist. + - `bhlx3lyx7/kafka`: This image contains kafka 0.8, and some demo streaming data, to provide streaming data source in streaming mode. + - `zookeeper:3.5`: This image is official zookeeper, to provide zookeeper service in streaming mode. + +### How to use griffin docker images in batch mode +1. Copy [docker-compose-batch.yml](https://github.com/apache/incubator-griffin/blob/master/griffin-doc/docker/svc_msr/docker-compose-batch.yml) to your work path. +2. In your work path, start docker containers by using docker compose, wait for about one minutes, then griffin service is ready. + ``` + docker-compose -f docker-compose-batch.yml up -d + ``` +3. Now you can try griffin APIs by using postman after importing the [json files](https://github.com/apache/incubator-griffin/blob/master/griffin-doc/postman). + In which you need to modify the environment `BASE_PATH` value into `<your local IP address>:38080`. +4. You can try the api `Basic -> Get griffin version`, to make sure griffin service has started up. +5. Add an accuracy measure through api `Measures -> Add measure`, to create a measure in griffin. +6. Add a job to through api `jobs -> Add job`, to schedule a job to execute the measure. In the example, the schedule interval is 5 minutes. +7. After some minutes, you can get the metrics from elasticsearch. + ``` + curl -XGET '<your local IP address>:39200/griffin/accuracy/_search?pretty&filter_path=hits.hits._source' -d '{"query":{"match_all":{}}, "sort": [{"tmst": {"order": "asc"}}]}' + ``` + +### How to use griffin docker images in streaming mode +1. Copy [docker-compose-streaming.yml](https://github.com/apache/incubator-griffin/blob/master/griffin-doc/docker/svc_msr/docker-compose-streaming.yml) to your work path. +2. In your work path, start docker containers by using docker compose, wait for about one minutes, then griffin service is ready. + ``` + docker-compose -f docker-compose-streaming.yml up -d + ``` +3. Enter the griffin docker container. + ``` + docker exec -it griffin bash + ``` +4. Switch into the measure directory. + ``` + cd ~/measure + ``` +5. Execute the script of streaming-accu, to execute streaming accuracy measurement. + ``` + ./streaming-accu.sh + ``` + You can trace the log in streaming-accu.log. + ``` + tail -f streaming-accu.log + ``` +6. Limited by the docker container resource, you can only execute accuracy or profiling separately. + If you want to try streaming profiling measurement, please kill the streaming-accu process first. + ``` + kill -9 `ps -ef | awk '/griffin-measure/{print $2}'` + ``` + Then clear the checkpoint directory and other related directories of last streaming job. + ``` + ./clear.sh + ``` + Execute the script of streaming-prof, to execute streaming profiling measurement. + ``` + ./streaming-prof.sh + ``` + You can trace the log in streaming-prof.log. + ``` + tail -f streaming-prof.log + ``` \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-griffin/blob/e7e4c3a7/griffin-doc/docker/measure-demo-docker.md ---------------------------------------------------------------------- diff --git a/griffin-doc/docker/measure-demo-docker.md b/griffin-doc/docker/measure-demo-docker.md new file mode 100644 index 0000000..bdda030 --- /dev/null +++ b/griffin-doc/docker/measure-demo-docker.md @@ -0,0 +1,63 @@ +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +# Griffin Measure Demo Docker +We've prepared a docker for griffin measure demo. + +## Preparation +1. Install [docker](https://docs.docker.com/engine/installation/). +2. Download docker image. In this image, the environment for measure module has been prepared, including: hadoop, hive, spark, mysql. +``` +docker pull bhlx3lyx7/griffin_measure_demo:0.0.1 +``` +3. Run docker image. +``` +docker run -it -h griffin --name griffin_measure_demo -m 8G --memory-swap -1 \ +-p 42122:2122 -p 47077:7077 -p 46066:6066 -p 48088:8088 -p 48040:8040 \ +-p 43306:3306 -p 49000:9000 -p 48042:8042 -p 48080:8080 -p 47017:27017 \ +-p 49083:9083 -p 48998:8998 -p 49200:9200 bhlx3lyx7/griffin_measure_demo:0.0.1 +``` +4. In this docker container, run the prepared demo. +- **accuracy demo**: This demo is batch accuracy, source data is Hive table "demo_src", target data is Hive table "demo_tgt", metrics will be persisted in `hdfs:///griffin/persist/accu` after calculation. + + switch into `job/accu`. + ``` + cd job/accu + ``` + + run the prepared script. + ``` + ./bgwork.sh + ``` + + check job log. + ``` + tail -f accu.log + ``` +- **profiling demo**: This demo is batch profiling, source data is Hive table "demo_src", metrics will be persisted in `hdfs:///griffin/persist/prof` after calculation. + + switch into `job/prof`. + ``` + cd job/prof + ``` + + run the prepared script. + ``` + ./bgwork.sh + ``` + + check job log. + ``` + tail -f prof.log + ``` +5. You can modify the job configuration file `config.json` of the above demos, or create your own data sources, to get more metrics of data. \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-griffin/blob/e7e4c3a7/griffin-doc/dockerUIguide.md ---------------------------------------------------------------------- diff --git a/griffin-doc/dockerUIguide.md b/griffin-doc/dockerUIguide.md deleted file mode 100644 index 2d434e3..0000000 --- a/griffin-doc/dockerUIguide.md +++ /dev/null @@ -1,50 +0,0 @@ -<!-- -Licensed to the Apache Software Foundation (ASF) under one -or more contributor license agreements. See the NOTICE file -distributed with this work for additional information -regarding copyright ownership. The ASF licenses this file -to you under the Apache License, Version 2.0 (the -"License"); you may not use this file except in compliance -with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, -software distributed under the License is distributed on an -"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -KIND, either express or implied. See the License for the -specific language governing permissions and limitations -under the License. ---> -## Docker webUI Guide - -### Preparatory work - -Follow the steps [here](https://github.com/apache/incubator-griffin#how-to-run-in-docker), prepare your docker container of griffin, and get your webUI ready. - -### webUI test case guide - -1. Click "Data Assets" at the top right corner, to watch all the exist data assets. - In docker, we've prepared two data asset in Hive, through this page, you can see all the table metadata in Hive. - -2. Click "Measures" button at the top left corner to watch all the measures here, and you can also create a new DQ measurement by following steps. - 1) Click "Create Measure" button at the top left corner, choose the top left block "Accuracy", at current we only support accuracy type. - 2) Choose Source: find "demo_src" in the left tree, select some or all attributes in the right block, click "Next". - 3) Choose Target: find "demo_tgt" in the left tree, select the matching attributes with source data asset in the right block, click "Next". - 4) Mapping Source and Target: select "Source Fields" of each row, to match the corresponding field in target table, e.g. id maps to id, age maps to age, desc maps to desc. - Finish all the mapping, click "Next". - 5) Fill out the required table as required, "Organization" is the group of this measurement. - Submit and save, you can see your new DQ measurement created in the measures list. - -3. Now you've created a new DQ measurement, the measurement needs to be scheduled to run in the docker container. Click "Jobs" button to watch all the jobs here, at current there is no job, you need to create a new one. - Click "Create Job" button at the top left corner, fill out all the blocks as below. - ``` - "Source Partition": YYYYMMdd-HH - "Target Partition": YYYYMMdd-HH - "Measure Name": <choose the measure you just created> - "Start After(s)": 0 - "Interval": 300 - ``` - The source and target partition means the partition pattern of the demo data, which is based on timestamp, "Start After(s)" means the job will start after n seconds, "Interval" is the interval of job, the unit is second. In the example above, the job will run every 5 minutes. - - Wait for about 1 minute, after the calculation, results would be published to web UI, then you can watch the dashboard by clicking "DQ Metrics" at the top right corner. http://git-wip-us.apache.org/repos/asf/incubator-griffin/blob/e7e4c3a7/griffin-doc/dsl-guide.md ---------------------------------------------------------------------- diff --git a/griffin-doc/dsl-guide.md b/griffin-doc/dsl-guide.md deleted file mode 100644 index e7f8569..0000000 --- a/griffin-doc/dsl-guide.md +++ /dev/null @@ -1,181 +0,0 @@ -<!-- -Licensed to the Apache Software Foundation (ASF) under one -or more contributor license agreements. See the NOTICE file -distributed with this work for additional information -regarding copyright ownership. The ASF licenses this file -to you under the Apache License, Version 2.0 (the -"License"); you may not use this file except in compliance -with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, -software distributed under the License is distributed on an -"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -KIND, either express or implied. See the License for the -specific language governing permissions and limitations -under the License. ---> - -# Apache Griffin DSL Guide -Griffin DSL is designed for DQ measurement, as a SQL-like language, trying to describe the DQ domain request. - -## Griffin DSL Syntax Description -Griffin DSL is SQL-like, case insensitive, and easy to learn. - -### Supporting process -- logical operation: not, and, or, in, between, like, is null, is nan, =, !=, <=, >=, <, > -- mathematical operation: +, -, *, /, % -- sql statement: as, where, group by, having, order by, limit - - -### Keywords -- `null, nan, true, false` -- `not, and, or` -- `in, between, like, is` -- `select, from, as, where, group, by, having, order, desc, asc, limit` - -### Operators -- `!, &&, ||, =, !=, <, >, <=, >=, <>` -- `+, -, *, /, %` -- `(, )` -- `., [, ]` - -### Literals -- **string**: any string surrounded with a pair of " or ', with escape charactor \ if any request. - e.g. `"test"`, `'string 1'`, `"hello \" world \" "` -- **number**: double or integer number. - e.g. `123`, `33.5` -- **time**: a integer with unit in a string, will be translated to a integer number in millisecond. - e.g. `3d`, `5h`, `4ms` -- **boolean**: boolean value directly. - e.g. `true`, `false` - -### Selections -- **selection head**: data source name. - e.g. `source`, `target`, `` `my table name` `` -- **all field selection**: * or with data source name ahead. - e.g. `*`, `source.*`, `target.*` -- **field selection**: field name or with data source name ahead. - e.g. `source.age`, `target.name`, `user_id` -- **index selection**: interget between square brackets "[]" with field name ahead. - e.g. `source.attributes[3]` -- **function selection**: function name with brackets "()", with field name ahead or not. - e.g. `count(*)`, `*.count()`, `source.user_id.count()`, `max(source.age)` -- **alias**: declare an alias after a selection. - e.g. `source.user_id as id`, `target.user_name as name` - -### Math expressions -- **math factor**: literal or function or selection or math exression with brackets. - e.g. `123`, `max(1, 2, 3, 4)`, `source.age`, `(source.age + 13)` -- **unary math expression**: unary math operator with factor. - e.g. `-(100 - source.score)` -- **binary math expression**: math factors with binary math operators. - e.g. `source.age + 13`, `score * 2 + ratio` - -### Logical expression -- **in**: in clause like sql. - e.g. `source.country in ("USA", "CHN", "RSA")` -- **between**: between clause like sql. - e.g. `source.age between 3 and 30`, `source.age between (3, 30)` -- **like**: like clause like sql. - e.g. `source.name like "%abc%"` -- **is null**: is null operator like sql. - e.g. `source.desc is not null` -- **is nan**: check if the value is not a number, the syntax like `is null` - e.g. `source.age is not nan` -- **logical factor**: math expression or logical expressions above or other logical expressions with brackets. - e.g. `(source.user_id = target.user_id AND source.age > target.age)` -- **unary logical expression**: unary logical operator with factor. - e.g. `NOT source.has_data`, `!(source.age = target.age)` -- **binary logical expression**: logical factors with binary logical operators, including `and`, `or` and comparison operators. - e.g. `source.age = target.age OR source.ticket = target.tck` - - -### Expression -- **expression**: logical expression and math expression. - -### Function -- **argument**: expression. -- **function**: function name with arguments between brackets. - e.g. `max(source.age, target.age)`, `count(*)` - -### Clause -- **select clause**: the result columns like sql select clause, we can ignore the word "select" in Griffin DSL. - e.g. `select user_id.count(), age.max() as max`, `source.user_id.count() as cnt, source.age.min()` -- **from clause**: the table name like sql from clause, in which the data source name must be one of data source names or the output table name of the former rule steps, we can ignore this clause by configoring the data source name. - e.g. `from source`, ``from `target` `` -- **where clause**: the filter condition like sql where clause, optional. - e.g. `where source.id = target.id and source.age = target.age` -- **group-by clause**: like the group-by clause in sql, optional. Optional having clause could be following. - e.g. `group by cntry`, `group by gender having count(*) > 50` -- **order-by clause**: like the order-by clause, optional. - e.g. `order by name`, `order by first_name desc, age asc` -- **limit clause**: like the limit clause in sql, optional. - e.g. `limit 5` - -### Accuracy Rule -Accuracy rule expression in Griffin DSL is a logical expression, telling the mapping relation between data sources. - e.g. `source.id = target.id and source.name = target.name and source.age between (target.age, target.age + 5)` - -### Profiling Rule -Profiling rule expression in Griffin DSL is a sql-like expression, with select clause ahead, following optional from clause, where clause, group-by clause, order-by clause, limit clause in order. - e.g. `source.gender, source.id.count() where source.age > 20 group by source.gender`, `select country, max(age), min(age), count(*) as cnt from source group by country order by cnt desc limit 5` - -## Griffin DSL translation to SQL -Griffin DSL is defined for DQ measurement, to describe DQ domain problem. -Actually, in Griffin, we get Griffin DSL rules, translate them into spark-sql rules for calculation in spark-sql engine. -In DQ domain, there're multiple dimensions, we need to translate them in different ways. - -### Accuracy -For accuracy, we need to get the match count between source and target, the rule describes the mapping relation between data sources. Griffin needs to translate the dsl rule into multiple sql rules. -For example, the dsl rule is `source.id = target.id and source.name = target.name`, which represents the match condition of accuracy. After the translation, the sql rules are as below: -- **get miss items from source**: `SELECT source.* FROM source LEFT JOIN target ON coalesce(source.id, '') = coalesce(target.id, '') and coalesce(source.name, '') = coalesce(target.name, '') WHERE (NOT (source.id IS NULL AND source.name IS NULL)) AND (target.id IS NULL AND target.name IS NULL)`, save as table `miss_items`. -- **get miss count**: `SELECT COUNT(*) AS miss FROM miss_items`, save as table `miss_count`. -- **get total count from source**: `SELECT COUNT(*) AS total FROM source`, save as table `total_count`. -- **get accuracy metric**: `SELECT miss_count.miss AS miss, total_count.total AS total, (total_count.total - miss_count.miss) AS matched FROM miss_count FULL JOIN total_count`, save as table `accuracy`. - -After the translation, the metrics will be persisted in table `accuracy`. - -### Profiling -For profiling, the request is always the aggregation function of data, the rule is mainly the same as sql, but only supporting `select`, `from`, `where`, `group-by`, `having`, `order-by`, `limit` clauses, which can describe most of the profiling requests. If any complicate request, you can use sql rule directly to describe it. -For example, the dsl rule is `source.cntry, source.id.count(), source.age.max() group by source.cntry`, which represents the profiling requests. After the translation, the sql rule is as below: -- **profiling sql rule**: `SELECT source.cntry, count(source.id), max(source.age) FROM source GROUP BY source.cntry`, save as table `profiling`. - -After the translation, the metrics will be persisted in table `profiling`. - -## Alternative Rules -You can simply use Griffin DSL rule to describe your problem in DQ domain, for some complicate requirement, you can also use some alternative rules supported by Griffin. - -### Spark sql -Griffin supports spark-sql directly, you can write rule in sql like this: -``` -{ - "dsl.type": "spark-sql", - "name": "source", - "rule": "SELECT count(id) AS cnt, max(timestamp) AS fresh_time FROM source" -} -``` -Griffin will calculate it in spark-sql engine directly. - -### Data frame operation -Griffin supports some other operations on data frame in spark, like converting json string data frame into extracted data frame with extracted object schema. For example: -``` -{ - "dsl.type": "df-opr", - "name": "ext_source", - "rule": "from_json", - "details": { - "df.name": "json_source" - } -} -``` -Griffin will do the operation to extract json strings. -Actually, you can also extend the df-opr engine and df-opr adaptor in Griffin to support more types of data frame operations. - -## Tips -Griffin engine runs on spark, it might works in two phases, pre-proc phase and run phase. -- **Pre-proc phase**: Griffin calculates data source directly, to get appropriate data format, as a preparation for DQ calculation. In this phase, you can use df-opr and spark-sql rules. -After preparation, to support streaming DQ calculation, a timestamp column will be added in each row of data, so the data frame in run phase contains an extra column named "__tmst". -- **Run phase**: Griffin calculates with prepared data, to get the DQ metrics. In this phase, you can use griffin-dsl, spark-sql rules, and a part of df-opr rules. -For griffin-dsl rule, griffin translates it into spark-sql rule with a group-by condition for column "__tmst", it's useful for especially streaming DQ calculation. But for spark-sql rule, griffin use it directly, you need to add the "__tmst" column in your spark-sql rule explicitly, or you can't get correct metrics result after calculation. \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-griffin/blob/e7e4c3a7/griffin-doc/griffin-docker-guide.md ---------------------------------------------------------------------- diff --git a/griffin-doc/griffin-docker-guide.md b/griffin-doc/griffin-docker-guide.md deleted file mode 100644 index 1fb5980..0000000 --- a/griffin-doc/griffin-docker-guide.md +++ /dev/null @@ -1,105 +0,0 @@ -<!-- -Licensed to the Apache Software Foundation (ASF) under one -or more contributor license agreements. See the NOTICE file -distributed with this work for additional information -regarding copyright ownership. The ASF licenses this file -to you under the Apache License, Version 2.0 (the -"License"); you may not use this file except in compliance -with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, -software distributed under the License is distributed on an -"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -KIND, either express or implied. See the License for the -specific language governing permissions and limitations -under the License. ---> - -# Apache Griffin Docker Guide -Griffin docker images are pre-built on docker hub, users can pull them to try griffin in docker. - -## Preparation - -### Environment preparation -1. Install [docker](https://docs.docker.com/engine/installation/) and [docker compose](https://docs.docker.com/compose/install/). -2. Increase vm.max_map_count of your local machine, to use elasticsearch. - ``` - sysctl -w vm.max_map_count=262144 - ``` -3. Pull griffin pre-built docker images. - ``` - docker pull bhlx3lyx7/svc_msr:0.1.6 - docker pull bhlx3lyx7/elasticsearch - docker pull bhlx3lyx7/kafka - docker pull zookeeper:3.5 - ``` - Or you can pull the images faster through mirror acceleration if you are in China. - ``` - docker pull registry.docker-cn.com/bhlx3lyx7/svc_msr:0.1.6 - docker pull registry.docker-cn.com/bhlx3lyx7/elasticsearch - docker pull registry.docker-cn.com/bhlx3lyx7/kafka - docker pull registry.docker-cn.com/zookeeper:3.5 - ``` - The docker images are the griffin environment images. - - `bhlx3lyx7/svc_msr`: This image contains mysql, hadoop, hive, spark, livy, griffin service, griffin measure, and some prepared demo data, it works as a single node spark cluster, providing spark engine and griffin service. - - `bhlx3lyx7/elasticsearch`: This image is based on official elasticsearch, adding some configurations to enable cors requests, to provide elasticsearch service for metrics persist. - - `bhlx3lyx7/kafka`: This image contains kafka 0.8, and some demo streaming data, to provide streaming data source in streaming mode. - - `zookeeper:3.5`: This image is official zookeeper, to provide zookeeper service in streaming mode. - -### How to use griffin docker images in batch mode -1. Copy [docker-compose-batch.yml](https://github.com/apache/incubator-griffin/blob/master/griffin-doc/docker/svc_msr/docker-compose-batch.yml) to your work path. -2. In your work path, start docker containers by using docker compose, wait for about one minutes, then griffin service is ready. - ``` - docker-compose -f docker-compose-batch.yml up -d - ``` -3. Now you can try griffin APIs by using postman after importing the [json files](https://github.com/apache/incubator-griffin/blob/master/griffin-doc/postman). - In which you need to modify the environment `BASE_PATH` value into `<your local IP address>:38080`. -4. You can try the api `Basic -> Get griffin version`, to make sure griffin service has started up. -5. Add an accuracy measure through api `Measures -> Add measure`, to create a measure in griffin. -6. Add a job to through api `jobs -> Add job`, to schedule a job to execute the measure. In the example, the schedule interval is 5 minutes. -7. After some minutes, you can get the metrics from elasticsearch. - ``` - curl -XGET '<your local IP address>:39200/griffin/accuracy/_search?pretty&filter_path=hits.hits._source' -d '{"query":{"match_all":{}}, "sort": [{"tmst": {"order": "asc"}}]}' - ``` - -### How to use griffin docker images in streaming mode -1. Copy [docker-compose-streaming.yml](https://github.com/apache/incubator-griffin/blob/master/griffin-doc/docker/svc_msr/docker-compose-streaming.yml) to your work path. -2. In your work path, start docker containers by using docker compose, wait for about one minutes, then griffin service is ready. - ``` - docker-compose -f docker-compose-streaming.yml up -d - ``` -3. Enter the griffin docker container. - ``` - docker exec -it griffin bash - ``` -4. Switch into the measure directory. - ``` - cd ~/measure - ``` -5. Execute the script of streaming-accu, to execute streaming accuracy measurement. - ``` - ./streaming-accu.sh - ``` - You can trace the log in streaming-accu.log. - ``` - tail -f streaming-accu.log - ``` -6. Limited by the docker container resource, you can only execute accuracy or profiling separately. - If you want to try streaming profiling measurement, please kill the streaming-accu process first. - ``` - kill -9 `ps -ef | awk '/griffin-measure/{print $2}'` - ``` - Then clear the checkpoint directory and other related directories of last streaming job. - ``` - ./clear.sh - ``` - Execute the script of streaming-prof, to execute streaming profiling measurement. - ``` - ./streaming-prof.sh - ``` - You can trace the log in streaming-prof.log. - ``` - tail -f streaming-prof.log - ``` \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-griffin/blob/e7e4c3a7/griffin-doc/measure-batch-sample.md ---------------------------------------------------------------------- diff --git a/griffin-doc/measure-batch-sample.md b/griffin-doc/measure-batch-sample.md deleted file mode 100644 index 3783f94..0000000 --- a/griffin-doc/measure-batch-sample.md +++ /dev/null @@ -1,140 +0,0 @@ -<!-- -Licensed to the Apache Software Foundation (ASF) under one -or more contributor license agreements. See the NOTICE file -distributed with this work for additional information -regarding copyright ownership. The ASF licenses this file -to you under the Apache License, Version 2.0 (the -"License"); you may not use this file except in compliance -with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, -software distributed under the License is distributed on an -"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -KIND, either express or implied. See the License for the -specific language governing permissions and limitations -under the License. ---> - -# Measure Batch Sample -Measures consists of batch measure and streaming measure. This document is for the batch measure sample. - -## Batch Accuracy Sample -``` -{ - "name": "accu_batch", - - "process.type": "batch", - - "data.sources": [ - { - "name": "src", - "connectors": [ - { - "type": "avro", - "version": "1.7", - "config": { - "file.name": "users_info_src.avro" - } - } - ] - }, { - "name": "tgt", - "connectors": [ - { - "type": "avro", - "version": "1.7", - "config": { - "file.name": "users_info_target.avro" - } - } - ] - } - ], - - "evaluateRule": { - "rules": [ - { - "dsl.type": "griffin-dsl", - "dq.type": "accuracy", - "rule": "src.user_id = tgt.user_id AND upper(src.first_name) = upper(tgt.first_name) AND src.last_name = tgt.last_name", - "details": { - "source": "src", - "target": "tgt", - "miss.records": { - "name": "miss.records", - "persist.type": "record" - }, - "accuracy": { - "name": "accu", - "persist.type": "metric" - }, - "miss": "miss_count", - "total": "total_count", - "matched": "matched_count" - } - } - ] - } -} -``` -Above is the configure file of batch accuracy job. - -### Data source -In this sample, we use avro file as source and target. - -### Evaluate rule -In this accuracy sample, the rule describes the match condition: `src.user_id = tgt.user_id AND upper(src.first_name) = upper(tgt.first_name) AND src.last_name = tgt.last_name`. -The accuracy metrics will be persisted as metric, with miss column named "miss_count", total column named "total_count", matched column named "matched_count". -The miss records of source will be persisted as record. - -## Batch Profiling Sample -``` -{ - "name": "prof_batch_test", - - "process.type": "batch", - - "data.sources": [ - { - "name": "source", - "connectors": [ - { - "type": "hive", - "version": "1.2", - "config": { - "database": "griffin", - "table.name": "demo_src" - } - } - ] - } - ], - - "evaluateRule": { - "rules": [ - { - "dsl.type": "griffin-dsl", - "dq.type": "profiling", - "rule": "country, country.count() as cnt group by country order by cnt desc limit 3", - "details": { - "source": "source", - "profiling": { - "name": "cntry-group", - "persist.type": "metric" - } - } - } - ] - } -} -``` -Above is the configure file of batch profiling job. - -### Data source -In this sample, we use hive table as source. - -### Evaluate rule -In this profiling sample, the rule describes the profiling request: `country, country.count() as cnt group by country order by cnt desc limit 3`. -The profiling metrics will be persisted as metric, listing the most 3 groups of items in same country. \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-griffin/blob/e7e4c3a7/griffin-doc/measure-configuration-guide.md ---------------------------------------------------------------------- diff --git a/griffin-doc/measure-configuration-guide.md b/griffin-doc/measure-configuration-guide.md deleted file mode 100644 index 0632927..0000000 --- a/griffin-doc/measure-configuration-guide.md +++ /dev/null @@ -1,211 +0,0 @@ -<!-- -Licensed to the Apache Software Foundation (ASF) under one -or more contributor license agreements. See the NOTICE file -distributed with this work for additional information -regarding copyright ownership. The ASF licenses this file -to you under the Apache License, Version 2.0 (the -"License"); you may not use this file except in compliance -with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, -software distributed under the License is distributed on an -"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -KIND, either express or implied. See the License for the -specific language governing permissions and limitations -under the License. ---> - -# Griffin Measure Configuration Guide -Griffin measure module needs two configuration files to define the parameters of execution, one is for environment, the other is for dq job. - -## Environment Parameters -``` -{ - "spark": { - "log.level": "WARN", - "checkpoint.dir": "hdfs:///griffin/streaming/cp", - "batch.interval": "5s", - "process.interval": "30s", - "config": { - "spark.task.maxFailures": 5, - "spark.streaming.kafkaMaxRatePerPartition": 1000, - "spark.streaming.concurrentJobs": 4 - } - }, - - "persist": [ - { - "type": "log", - "config": { - "max.log.lines": 100 - } - }, { - "type": "hdfs", - "config": { - "path": "hdfs:///griffin/streaming/persist", - "max.persist.lines": 10000, - "max.lines.per.file": 10000 - } - } - ], - - "info.cache": [ - { - "type": "zk", - "config": { - "hosts": "<zookeeper host ip>:2181", - "namespace": "griffin/infocache", - "lock.path": "lock", - "mode": "persist", - "init.clear": true, - "close.clear": false - } - } - ] -} -``` -Above lists environment parameters. - -- **spark**: This field configures spark and spark streaming parameters. - + log.level: Level of spark log. - + checkpoint.dir: Check point directory of spark streaming, for streaming mode. - + batch.interval: Interval of dumping streaming data, for streaming mode. - + process.interval: Interval of processing dumped streaming data, for streaming mode. - + config: Configuration of spark parameters. -- **persist**: This field configures list of metrics persist parameters, multiple persist ways are supported. Details of persist configuration [here](#persist). -- **info.cache**: This field configures list of information cache parameters, multiple cache ways are supported. It is only for streaming dq case. Details of info cache configuration [here](#info-cache). - -### <a name="persist"></a>Persist -- **type**: Metrics persist type, "log", "hdfs" and "http". -- **config**: Configure parameters of each persist type. - + log persist - * max.log.lines: the max lines of log. - + hdfs persist - * path: hdfs path to persist metrics - * max.persist.lines: the max lines of total persist data. - * max.lines.per.file: the max lines of each persist file. - + http persist - * api: api to submit persist metrics. - * method: http method, "post" default. - -### <a name="info-cache"></a>Info Cache -- **type**: Information cache type, "zk" for zookeeper cache. -- **config**: Configure parameters of info cache type. - + zookeeper cache - * hosts: zookeeper hosts list as a string, separated by comma. - * namespace: namespace of cache info, "" as default. - * lock.path: path of lock info, "lock" as default. - * mode: create mode of zookeeper node, "persist" as default. - * init.clear: clear cache info when initialize, true default. - * close.clear: clear cache info when close connection, false default. - -## DQ Job Parameters -``` -{ - "name": "accu_batch", - - "process.type": "batch", - - "data.sources": [ - { - "name": "src", - "connectors": [ - { - "type": "avro", - "version": "1.7", - "config": { - "file.path": "<path>/<to>", - "file.name": "<source-file>.avro" - } - } - ] - }, { - "name": "tgt", - "connectors": [ - { - "type": "avro", - "version": "1.7", - "config": { - "file.path": "<path>/<to>", - "file.name": "<target-file>.avro" - } - } - ] - } - ], - - "evaluateRule": { - "rules": [ - { - "dsl.type": "griffin-dsl", - "dq.type": "accuracy", - "rule": "src.user_id = tgt.user_id AND upper(src.first_name) = upper(tgt.first_name) AND src.last_name = tgt.last_name", - "details": { - "source": "src", - "target": "tgt", - "miss.records": { - "name": "miss.records", - "persist.type": "record" - }, - "accuracy": { - "name": "accu", - "persist.type": "metric" - }, - "miss": "miss_count", - "total": "total_count", - "matched": "matched_count" - } - } - ] - } -} -``` -Above lists DQ job configure parameters. - -- **name**: Name of DQ job. -- **process.type**: Process type of DQ job, "batch" or "streaming". -- **data.sources**: List of data sources in this DQ job. - + name: Name of this data source, it should be different from other data sources. - + connectors: List of data connectors combined as the same data source. Details of data connector configuration [here](#data-connector). -- **evaluateRule**: Evaluate rule parameters of this DQ job. - + dsl.type: Default dsl type of all the rules. - + rules: List of rules, to define every rule step. Details of rule configuration [here](#rule). - -### <a name="data-connector"></a>Data Connector -- **type**: Data connector type, "avro", "hive", "text-dir" for batch mode, "kafka" for streaming mode. -- **version**: Version string of data connector type. -- **config**: Configure parameters of each data connector type. - + avro data connector - * file.path: avro file path, optional, "" as default. - * file.name: avro file name. - + hive data connector - * database: data base name, optional, "default" as default. - * table.name: table name. - * partitions: partition conditions string, split by ";" and ",", optional. - e.g. `dt=20170410, hour=15; dt=20170411, hour=15; dt=20170412, hour=15` - + text dir data connector - * dir.path: parent directory path. - * data.dir.depth: integer, depth of data directories, 0 as default. - * success.file: success file name, - * done.file: - -### <a name="rule"></a>Rule -- **dsl.type**: Rule dsl type, "spark-sql", "df-opr" and "griffin-dsl". -- **name** (step information): Result table name of this rule, optional for "griffin-dsl" type. -- **persist.type** (step information): Persist type of result table, optional for "griffin-dsl" type. Supporting "metric", "record" and "none" type, "metric" type indicates the result will be persisted as metrics, "record" type indicates the result will be persisted as record only, "none" type indicates the result will not be persisted. Default is "none" type. -- **update.data.source** (step information): If the result table needs to update the data source, this parameter is the data source name, for streaming accuracy case, optional. -- **dq.type**: DQ type of this rule, only for "griffin-dsl" type, supporting "accuracy" and "profiling". -- **details**: Details of this rule, optional. - + accuracy dq type detail configuration - * source: the data source name which as source in accuracy, default is the name of first data source in "data.sources" if not configured. - * target: the data source name which as target in accuracy, default is the name of second data source in "data.sources" if not configured. - * miss.records: step information of miss records result table step in accuracy. - * accuracy: step information of accuracy result table step in accuracy. - * miss: alias of miss column in result table. - * total: alias of total column in result table. - * matched: alias of matched column in result table. - + profiling dq type detail configuration - * source: the data source name which as source in profiling, default is the name of first data source in "data.sources" if not configured. If the griffin-dsl rule contains from clause, this parameter is ignored. - * profiling: step information of profiling result table step in profiling. \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-griffin/blob/e7e4c3a7/griffin-doc/measure-demo-docker.md ---------------------------------------------------------------------- diff --git a/griffin-doc/measure-demo-docker.md b/griffin-doc/measure-demo-docker.md deleted file mode 100644 index bdda030..0000000 --- a/griffin-doc/measure-demo-docker.md +++ /dev/null @@ -1,63 +0,0 @@ -<!-- -Licensed to the Apache Software Foundation (ASF) under one -or more contributor license agreements. See the NOTICE file -distributed with this work for additional information -regarding copyright ownership. The ASF licenses this file -to you under the Apache License, Version 2.0 (the -"License"); you may not use this file except in compliance -with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, -software distributed under the License is distributed on an -"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -KIND, either express or implied. See the License for the -specific language governing permissions and limitations -under the License. ---> - -# Griffin Measure Demo Docker -We've prepared a docker for griffin measure demo. - -## Preparation -1. Install [docker](https://docs.docker.com/engine/installation/). -2. Download docker image. In this image, the environment for measure module has been prepared, including: hadoop, hive, spark, mysql. -``` -docker pull bhlx3lyx7/griffin_measure_demo:0.0.1 -``` -3. Run docker image. -``` -docker run -it -h griffin --name griffin_measure_demo -m 8G --memory-swap -1 \ --p 42122:2122 -p 47077:7077 -p 46066:6066 -p 48088:8088 -p 48040:8040 \ --p 43306:3306 -p 49000:9000 -p 48042:8042 -p 48080:8080 -p 47017:27017 \ --p 49083:9083 -p 48998:8998 -p 49200:9200 bhlx3lyx7/griffin_measure_demo:0.0.1 -``` -4. In this docker container, run the prepared demo. -- **accuracy demo**: This demo is batch accuracy, source data is Hive table "demo_src", target data is Hive table "demo_tgt", metrics will be persisted in `hdfs:///griffin/persist/accu` after calculation. - + switch into `job/accu`. - ``` - cd job/accu - ``` - + run the prepared script. - ``` - ./bgwork.sh - ``` - + check job log. - ``` - tail -f accu.log - ``` -- **profiling demo**: This demo is batch profiling, source data is Hive table "demo_src", metrics will be persisted in `hdfs:///griffin/persist/prof` after calculation. - + switch into `job/prof`. - ``` - cd job/prof - ``` - + run the prepared script. - ``` - ./bgwork.sh - ``` - + check job log. - ``` - tail -f prof.log - ``` -5. You can modify the job configuration file `config.json` of the above demos, or create your own data sources, to get more metrics of data. \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-griffin/blob/e7e4c3a7/griffin-doc/measure-streaming-sample-old.md ---------------------------------------------------------------------- diff --git a/griffin-doc/measure-streaming-sample-old.md b/griffin-doc/measure-streaming-sample-old.md deleted file mode 100644 index 004ed3b..0000000 --- a/griffin-doc/measure-streaming-sample-old.md +++ /dev/null @@ -1,204 +0,0 @@ -<!-- -Licensed to the Apache Software Foundation (ASF) under one -or more contributor license agreements. See the NOTICE file -distributed with this work for additional information -regarding copyright ownership. The ASF licenses this file -to you under the Apache License, Version 2.0 (the -"License"); you may not use this file except in compliance -with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, -software distributed under the License is distributed on an -"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -KIND, either express or implied. See the License for the -specific language governing permissions and limitations -under the License. ---> -# Measure streaming sample -Measures consists of batch measure and streaming measure. This document is for the streaming measure sample. - -### Data source -At current, we support kafka as streaming data source. -In this sample, we also need a kafka as data source. - -### Measure type -At current, we support accuracy measure in streaming mode. - -### Kafka decoder -In kafka, data always needs encode and decode, we support String type kafka data currently, you can also implement and use your decoder for kafka case. - -### Environment -For current griffin streaming case, we need some necessary environment dependencies, zookeeper and hdfs. -We use zookeeper to cache some checkpoint information, it's optional, but we recommend it. -We use hdfs to save the temporary data, it's also a recommend selection. - -### Streaming accuracy result -The streaming data will be separated into mini-batches of data, for each mini-batch data, there should be an accuracy result. Therefore, the streaming accuracy result should be a bunch of batch accuracy results with timestamp. -Considering the latency of streaming data, which means the source data and the matching target data will not exactly reach exactly at the same time, we have to accept some delay of data in streaming mode, by holding unmatched data in memory or disk, and try to match them later until the data is out-time. - -## How to run streaming sample -### Environment Preparation -At first, we need some environment preparation. -- Zookeeper: Zookeeper 3.4.10 -- Hadoop: Hadoop 2.6 -- Spark: Spark 1.6 -- Kafka: Kafka 0.8 - -### Data Preparation -Create two topics in kafka, for source and target data. For example, topic "source" for source data, and topic "target" for target data. -Streaming data should also be prepared, the format could be json string, for example: -Source data could be: -``` -{"name": "kevin", "age": 24} -{"name": "jason", "age": 25} -{"name": "jhon", "age": 28} -{"name": "steve", "age": 31} -``` -Target data could be: -``` -{"name": "kevin", "age": 24} -{"name": "jason", "age": 25} -{"name": "steve", "age": 20} -``` -You need to input the source data and target data into these two topics, through console producer might be a good choice for experimental purpose. - -### Configuration Preparation -Two configuration files are required. -Environment configuration file: env.json -``` -{ - "spark": { - "log.level": "WARN", - "checkpoint.dir": "hdfs:///griffin/streaming/cp", - "batch.interval": "5s", - "process.interval": "30s", - "config": { - "spark.task.maxFailures": 5, - "spark.streaming.kafkaMaxRatePerPartition": 1000, - "spark.streaming.concurrentJobs": 4 - } - }, - - "persist": [ - { - "type": "log", - "config": { - "max.log.lines": 100 - } - }, { - "type": "hdfs", - "config": { - "path": "hdfs:///griffin/streaming/persist", - "max.persist.lines": 10000, - "max.lines.per.file": 10000 - } - } - ], - - "info.cache": [ - { - "type": "zk", - "config": { - "hosts": "<zookeeper host ip>:2181", - "namespace": "griffin/infocache", - "lock.path": "lock", - "mode": "persist", - "init.clear": true, - "close.clear": false - } - } - ] -} -``` -In env.json, "spark" field configures the spark and spark streaming parameters, "persist" field configures the persist ways, we support "log", "hdfs" and "http" ways at current, "info.cache" field configures the information cache parameters, we support zookeeper only at current. - -Process configuration file: config.json -``` -{ - "name": "streaming-accu-sample", - "type": "accuracy", - "process.type": "streaming", - - "source": { - "type": "kafka", - "version": "0.8", - "config": { - "kafka.config": { - "bootstrap.servers": "<kafka host ip>:9092", - "group.id": "group1", - "auto.offset.reset": "smallest", - "auto.commit.enable": "false" - }, - "topics": "source", - "key.type": "java.lang.String", - "value.type": "java.lang.String" - }, - "cache": { - "type": "text", - "config": { - "file.path": "hdfs:///griffin/streaming/dump/source", - "info.path": "source", - "ready.time.interval": "10s", - "ready.time.delay": "0" - }, - "time.range": ["-5m", "0"] - }, - "match.once": true - }, - - "target": { - "type": "kafka", - "version": "0.8", - "config": { - "kafka.config": { - "bootstrap.servers": "<kafka host ip>:9092", - "group.id": "group1", - "auto.offset.reset": "smallest", - "auto.commit.enable": "false" - }, - "topics": "target", - "key.type": "java.lang.String", - "value.type": "java.lang.String" - }, - "cache": { - "type": "text", - "config": { - "file.path": "hdfs:///griffin/streaming/dump/target", - "info.path": "target", - "ready.time.interval": "10s", - "ready.time.delay": "0" - }, - "time.range": ["-5m", "0"] - }, - "match.once": false - }, - - "evaluateRule": { - "rules": "$source.json().name = $target.json().name AND $source.json().age = $target.json().age" - } -} -``` -In config.json, "source" and "target" fields configure the data source parameters. -The "cache" field in data source configuration represents the temporary data cache way, at current we support "text" and "hive" ways. We recommend "text" way, it only depends on hdfs. "time.range" means that the data older than the lower bound should be considered as out-time, and the out-time data will not be calculated any more. -"match.once" represents the data from this data source could be matched only once or more times. -"evaluateRule.rule" configures the match rule between each source and target data. - -### Run -Build the measure package. -``` -mvn clean install -``` -Get the measure package ```measure-<version>-incubating-SNAPSHOT.jar```, rename it to ```griffin-measure.jar```. -Put measure package together with env.json and config.json. -Run the following command: -``` -spark-submit --class org.apache.griffin.measure.Application \ ---master yarn-client --queue default \ -griffin-measure.jar \ -env.json config.json local,local -``` -The first two parameters are the paths of env.json and config.json, the third parameter represents the file system type of the two configuration files, "local" or "hdfs" are both supported. - -The spark streaming application will be long-time running, you can get the results of each mini-batch of data, during the run-time, you can also input more data into source and target topics, to check the results of the later mini-batches.