Repository: incubator-griffin Updated Branches: refs/heads/master 73a7d84b6 -> 0f9afce35
Fix conventions in batch-sample Bring conventions for enums in measure-batch-sample in sync with service module. While measure accepts any case, service requires them capitalized. Author: Nikolay Sokolov <chemika...@gmail.com> Closes #443 from chemikadze/patch-1. Project: http://git-wip-us.apache.org/repos/asf/incubator-griffin/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-griffin/commit/0f9afce3 Tree: http://git-wip-us.apache.org/repos/asf/incubator-griffin/tree/0f9afce3 Diff: http://git-wip-us.apache.org/repos/asf/incubator-griffin/diff/0f9afce3 Branch: refs/heads/master Commit: 0f9afce35bc7465181fceb68c9173b3f41a342e9 Parents: 73a7d84 Author: Nikolay Sokolov <chemika...@gmail.com> Authored: Thu Oct 25 22:41:14 2018 +0800 Committer: William Guo <gu...@apache.org> Committed: Thu Oct 25 22:41:14 2018 +0800 ---------------------------------------------------------------------- griffin-doc/measure/measure-batch-sample.md | 18 +++++++++--------- .../measure/measure-configuration-guide.md | 16 ++++++++-------- griffin-doc/measure/measure-streaming-sample.md | 16 ++++++++-------- 3 files changed, 25 insertions(+), 25 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-griffin/blob/0f9afce3/griffin-doc/measure/measure-batch-sample.md ---------------------------------------------------------------------- diff --git a/griffin-doc/measure/measure-batch-sample.md b/griffin-doc/measure/measure-batch-sample.md index 7867ea5..f45d040 100644 --- a/griffin-doc/measure/measure-batch-sample.md +++ b/griffin-doc/measure/measure-batch-sample.md @@ -25,7 +25,7 @@ Apache Griffin measures consist of batch measure and streaming measure, this doc { "name": "accu_batch", - "process.type": "batch", + "process.type": "BATCH", "data.sources": [ { @@ -33,7 +33,7 @@ Apache Griffin measures consist of batch measure and streaming measure, this doc "baseline": true, "connectors": [ { - "type": "avro", + "type": "AVRO", "version": "1.7", "config": { "file.name": "src/test/resources/users_info_src.avro" @@ -44,7 +44,7 @@ Apache Griffin measures consist of batch measure and streaming measure, this doc "name": "target", "connectors": [ { - "type": "avro", + "type": "AVRO", "version": "1.7", "config": { "file.name": "src/test/resources/users_info_target.avro" @@ -58,7 +58,7 @@ Apache Griffin measures consist of batch measure and streaming measure, this doc "rules": [ { "dsl.type": "griffin-dsl", - "dq.type": "accuracy", + "dq.type": "ACCURACY", "out.dataframe.name": "accu", "rule": "source.user_id = target.user_id AND upper(source.first_name) = upper(target.first_name) AND source.last_name = target.last_name AND source.address = target.address AND source.email = target.email AND source.phone = target.phone AND source.post_code = target.post_code", "details": { @@ -100,14 +100,14 @@ The miss records of source will be persisted as record. { "name": "prof_batch", - "process.type": "batch", + "process.type": "BATCH", "data.sources": [ { "name": "source", "connectors": [ { - "type": "hive", + "type": "HIVE", "version": "1.2", "config": { "database": "default", @@ -122,7 +122,7 @@ The miss records of source will be persisted as record. "rules": [ { "dsl.type": "griffin-dsl", - "dq.type": "profiling", + "dq.type": "PROFILING", "out.dataframe.name": "prof", "rule": "select max(age) as `max_age`, min(age) as `min_age` from source", "out": [ @@ -134,7 +134,7 @@ The miss records of source will be persisted as record. }, { "dsl.type": "griffin-dsl", - "dq.type": "profiling", + "dq.type": "PROFILING", "out.dataframe.name": "name_grp", "rule": "select name, count(*) as cnt from source group by name", "out": [ @@ -158,4 +158,4 @@ In this sample, we use hive table as source. ### Evaluate rule In this profiling sample, the rule describes the profiling request: `select max(age) as max_age, min(age) as min_age from source` and `select name, count(*) as cnt from source group by name`. -The profiling metrics will be persisted as metric, with the max and min value of age, and count group by name, like this: `{"max_age": 53, "min_age": 11, "name_grp": [{"name": "Adam", "cnt": 13}, {"name": "Fred", "cnt": 2}]}`. \ No newline at end of file +The profiling metrics will be persisted as metric, with the max and min value of age, and count group by name, like this: `{"max_age": 53, "min_age": 11, "name_grp": [{"name": "Adam", "cnt": 13}, {"name": "Fred", "cnt": 2}]}`. http://git-wip-us.apache.org/repos/asf/incubator-griffin/blob/0f9afce3/griffin-doc/measure/measure-configuration-guide.md ---------------------------------------------------------------------- diff --git a/griffin-doc/measure/measure-configuration-guide.md b/griffin-doc/measure/measure-configuration-guide.md index 43cb8ac..9ea7730 100644 --- a/griffin-doc/measure/measure-configuration-guide.md +++ b/griffin-doc/measure/measure-configuration-guide.md @@ -115,14 +115,14 @@ Above lists environment parameters. { "name": "accu_batch", - "process.type": "batch", + "process.type": "BATCH", "data.sources": [ { "name": "src", "connectors": [ { - "type": "avro", + "type": "AVRO", "version": "1.7", "config": { "file.path": "<path>/<to>", @@ -134,7 +134,7 @@ Above lists environment parameters. "name": "tgt", "connectors": [ { - "type": "avro", + "type": "AVRO", "version": "1.7", "config": { "file.path": "<path>/<to>", @@ -149,7 +149,7 @@ Above lists environment parameters. "rules": [ { "dsl.type": "griffin-dsl", - "dq.type": "accuracy", + "dq.type": "ACCURACY", "out.dataframe.name": "accu", "rule": "source.user_id = target.user_id AND upper(source.first_name) = upper(target.first_name) AND source.last_name = target.last_name AND source.address = target.address AND source.email = target.email AND source.phone = target.phone AND source.post_code = target.post_code", "details": { @@ -172,13 +172,13 @@ Above lists environment parameters. ] }, - "sinks": ["console", "http", "hdfs"] + "sinks": ["CONSOLE", "HTTP", "HDFS"] } ``` Above lists DQ job configure parameters. - **name**: Name of DQ job. -- **process.type**: Process type of DQ job, "batch" or "streaming". +- **process.type**: Process type of DQ job, "BATCH" or "STREAMING". - **data.sources**: List of data sources in this DQ job. + name: Name of this data source, it should be different from other data sources. + connectors: List of data connectors combined as the same data source. Details of data connector configuration [here](#data-connector). @@ -188,7 +188,7 @@ Above lists DQ job configure parameters. - **sinks**: Whitelisted sink types for this job. Note: no sinks will be used, if empty or omitted. ### <a name="data-connector"></a>Data Connector -- **type**: Data connector type, "avro", "hive", "text-dir" for batch mode, "kafka" for streaming mode. +- **type**: Data connector type, "AVRO", "HIVE", "TEXT-DIR" for batch mode, "KAFKA" for streaming mode. - **version**: Version string of data connector type. - **config**: Configure parameters of each data connector type. + avro data connector @@ -207,7 +207,7 @@ Above lists DQ job configure parameters. ### <a name="rule"></a>Rule - **dsl.type**: Rule dsl type, "spark-sql", "df-ops" and "griffin-dsl". -- **dq.type**: DQ type of this rule, only for "griffin-dsl" type. Supported types: "accuracy", "profiling", "timeliness", "uniqueness", "completeness". +- **dq.type**: DQ type of this rule, only for "griffin-dsl" type. Supported types: "ACCURACY", "PROFILING", "TIMELINESS", "UNIQUENESS", "COMPLETENESS". - **out.dataframe.name** (step information): Output table name of this rule, could be used in the following rules. - **in.dataframe.name** (step information): Input table name of this rule, only used for "df-ops" type. - **rule**: The rule string. http://git-wip-us.apache.org/repos/asf/incubator-griffin/blob/0f9afce3/griffin-doc/measure/measure-streaming-sample.md ---------------------------------------------------------------------- diff --git a/griffin-doc/measure/measure-streaming-sample.md b/griffin-doc/measure/measure-streaming-sample.md index 30ed718..9e47143 100644 --- a/griffin-doc/measure/measure-streaming-sample.md +++ b/griffin-doc/measure/measure-streaming-sample.md @@ -25,7 +25,7 @@ Apache Griffin measures consist of batch measure and streaming measure, this doc { "name": "accu_streaming", - "process.type": "streaming", + "process.type": "STREAMING", "data.sources": [ { @@ -33,7 +33,7 @@ Apache Griffin measures consist of batch measure and streaming measure, this doc "baseline": true, "connectors": [ { - "type": "kafka", + "type": "KAFKA", "version": "0.8", "config": { "kafka.config": { @@ -73,7 +73,7 @@ Apache Griffin measures consist of batch measure and streaming measure, this doc "name": "target", "connectors": [ { - "type": "kafka", + "type": "KAFKA", "version": "0.8", "config": { "kafka.config": { @@ -115,7 +115,7 @@ Apache Griffin measures consist of batch measure and streaming measure, this doc "rules": [ { "dsl.type": "griffin-dsl", - "dq.type": "accuracy", + "dq.type": "ACCURACY", "out.dataframe.name": "accu", "rule": "source.name = target.name and source.age = target.age", "details": { @@ -176,14 +176,14 @@ The miss records of source will be persisted as record. { "name": "prof_streaming", - "process.type": "streaming", + "process.type": "STREAMING", "data.sources": [ { "name": "source", "connectors": [ { - "type": "kafka", + "type": "KAFKA", "version": "0.8", "config": { "kafka.config": { @@ -225,7 +225,7 @@ The miss records of source will be persisted as record. "rules": [ { "dsl.type": "griffin-dsl", - "dq.type": "profiling", + "dq.type": "PROFILING", "out.dataframe.name": "prof", "rule": "select count(name) as `cnt`, max(age) as `max`, min(age) as `min` from source", "out": [ @@ -237,7 +237,7 @@ The miss records of source will be persisted as record. }, { "dsl.type": "griffin-dsl", - "dq.type": "profiling", + "dq.type": "PROFILING", "out.dataframe.name": "grp", "rule": "select name, count(*) as `cnt` from source group by name", "out": [