Repository: incubator-atlas Updated Branches: refs/heads/master a230f4ffd -> 30a2ec1fa
ATLAS-263 Searching for a multi word trait always returns empty result (girishrp via shwethags) Project: http://git-wip-us.apache.org/repos/asf/incubator-atlas/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-atlas/commit/30a2ec1f Tree: http://git-wip-us.apache.org/repos/asf/incubator-atlas/tree/30a2ec1f Diff: http://git-wip-us.apache.org/repos/asf/incubator-atlas/diff/30a2ec1f Branch: refs/heads/master Commit: 30a2ec1fa450664d6ae90f70a71fde80f8882b68 Parents: a230f4f Author: Shwetha GS <[email protected]> Authored: Tue Dec 22 15:46:00 2015 +0530 Committer: Shwetha GS <[email protected]> Committed: Tue Dec 22 15:46:00 2015 +0530 ---------------------------------------------------------------------- docs/src/site/twiki/Search.twiki | 3 +++ release-log.txt | 2 +- .../apache/atlas/BaseHiveRepositoryTest.java | 21 +++++++++++++++- .../GraphBackedDiscoveryServiceTest.java | 25 ++++++++++---------- .../org/apache/atlas/examples/QuickStart.java | 25 +++++++++++++++++--- 5 files changed, 59 insertions(+), 17 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/30a2ec1f/docs/src/site/twiki/Search.twiki ---------------------------------------------------------------------- diff --git a/docs/src/site/twiki/Search.twiki b/docs/src/site/twiki/Search.twiki index 7e7be28..92a5300 100644 --- a/docs/src/site/twiki/Search.twiki +++ b/docs/src/site/twiki/Search.twiki @@ -81,6 +81,8 @@ Language Notes: * There are couple of Predicate functions different from SQL: * _is_ or _isa_can be used to filter Entities that have a particular Trait. * _has_ can be used to filter Entities that have a value for a particular Attribute. + * When querying for a space delimited multiple-word identifier, it need to be enclosed within + backquote (`) ---+++ DSL Examples @@ -91,6 +93,7 @@ Language Notes: * Column where Column isa PII * Table where name="sales_fact", columns * Table where name="sales_fact", columns as column select column.name, column.dataType, column.comment + * `Log Data` ---++ Full-text Search http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/30a2ec1f/release-log.txt ---------------------------------------------------------------------- diff --git a/release-log.txt b/release-log.txt index 6c8b5e8..cdee3f6 100644 --- a/release-log.txt +++ b/release-log.txt @@ -5,7 +5,7 @@ Apache Atlas Release Notes INCOMPATIBLE CHANGES: ALL CHANGES: - +ATLAS-263 Searching for a multi word trait always returns empty result (girishrp via shwethags) --Release 0.6-incubating INCOMPATIBLE CHANGES: http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/30a2ec1f/repository/src/test/java/org/apache/atlas/BaseHiveRepositoryTest.java ---------------------------------------------------------------------- diff --git a/repository/src/test/java/org/apache/atlas/BaseHiveRepositoryTest.java b/repository/src/test/java/org/apache/atlas/BaseHiveRepositoryTest.java index a49967f..70849ca 100644 --- a/repository/src/test/java/org/apache/atlas/BaseHiveRepositoryTest.java +++ b/repository/src/test/java/org/apache/atlas/BaseHiveRepositoryTest.java @@ -166,8 +166,10 @@ public class BaseHiveRepositoryTest { HierarchicalTypeDefinition<TraitType> jdbcTraitDef = TypesUtil.createTraitTypeDef("JdbcAccess", null); + HierarchicalTypeDefinition<TraitType> logTraitDef = TypesUtil.createTraitTypeDef("Log Data", null); + return TypesUtil.getTypesDef(ImmutableList.<EnumTypeDefinition>of(), ImmutableList.<StructTypeDefinition>of(), - ImmutableList.of(dimTraitDef, factTraitDef, piiTraitDef, metricTraitDef, etlTraitDef, jdbcTraitDef), + ImmutableList.of(dimTraitDef, factTraitDef, piiTraitDef, metricTraitDef, etlTraitDef, jdbcTraitDef, logTraitDef), ImmutableList.of(dbClsDef, storageDescClsDef, columnClsDef, tblClsDef, loadProcessClsDef, viewClsDef, partClsDef)); } @@ -201,6 +203,10 @@ public class BaseHiveRepositoryTest { Id salesFact = table("sales_fact", "sales fact table", salesDB, sd, "Joe", "Managed", salesFactColumns, "Fact"); + List<Referenceable> logFactColumns = ImmutableList + .of(column("time_id", "int", "time id"), column("app_id", "int", "app id"), + column("machine_id", "int", "machine id"), column("log", "string", "log data", "Log Data")); + List<Referenceable> timeDimColumns = ImmutableList .of(column("time_id", "int", "time id"), column("dayOfYear", "int", "day Of Year"), @@ -219,6 +225,12 @@ public class BaseHiveRepositoryTest { loadProcess("loadSalesDaily", "hive query for daily summary", "John ETL", ImmutableList.of(salesFact, timeDim), ImmutableList.of(salesFactDaily), "create table as select ", "plan", "id", "graph", "ETL"); + Id logDB = database("Logging", "logging database", "Tim ETL", "hdfs://host:8000/apps/warehouse/logging"); + + Id loggingFactDaily = + table("log_fact_daily_mv", "log fact daily materialized view", logDB, sd, "Tim ETL", "Managed", + logFactColumns, "Log Data"); + List<Referenceable> productDimColumns = ImmutableList .of(column("product_id", "int", "product id"), column("product_name", "string", "product name"), @@ -248,6 +260,13 @@ public class BaseHiveRepositoryTest { loadProcess("loadSalesMonthly", "hive query for monthly summary", "John ETL", ImmutableList.of(salesFactDaily), ImmutableList.of(salesFactMonthly), "create table as select ", "plan", "id", "graph", "ETL"); + Id loggingFactMonthly = + table("logging_fact_monthly_mv", "logging fact monthly materialized view", logDB, sd, "Tim ETL", + "Managed", logFactColumns, "Log Data"); + + loadProcess("loadLogsMonthly", "hive query for monthly summary", "Tim ETL", ImmutableList.of(loggingFactDaily), + ImmutableList.of(loggingFactMonthly), "create table as select ", "plan", "id", "graph", "ETL"); + partition(new ArrayList() {{ add("2015-01-01"); }}, salesFactDaily); } http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/30a2ec1f/repository/src/test/java/org/apache/atlas/discovery/GraphBackedDiscoveryServiceTest.java ---------------------------------------------------------------------- diff --git a/repository/src/test/java/org/apache/atlas/discovery/GraphBackedDiscoveryServiceTest.java b/repository/src/test/java/org/apache/atlas/discovery/GraphBackedDiscoveryServiceTest.java index ea8718d..74813a1 100755 --- a/repository/src/test/java/org/apache/atlas/discovery/GraphBackedDiscoveryServiceTest.java +++ b/repository/src/test/java/org/apache/atlas/discovery/GraphBackedDiscoveryServiceTest.java @@ -129,13 +129,13 @@ public class GraphBackedDiscoveryServiceTest extends BaseHiveRepositoryTest { @DataProvider(name = "dslQueriesProvider") private Object[][] createDSLQueries() { return new Object[][]{ - {"from hive_db", 2}, - {"hive_db", 2}, + {"from hive_db", 3}, + {"hive_db", 3}, {"hive_db where hive_db.name=\"Reporting\"", 1}, {"hive_db hive_db.name = \"Reporting\"", 1}, {"hive_db where hive_db.name=\"Reporting\" select name, owner", 1}, - {"hive_db has name", 2}, - {"hive_db, hive_table", 6}, + {"hive_db has name", 3}, + {"hive_db, hive_table", 8}, {"View is JdbcAccess", 2}, {"hive_db as db1, hive_table where db1.name = \"Reporting\"", 0}, //Not working - ATLAS-145 // - Final working query -> discoveryService.searchByGremlin("L:{_var_0 = [] as Set;g.V().has(\"__typeName\", \"hive_db\").fill(_var_0);g.V().has(\"__superTypeNames\", \"hive_db\").fill(_var_0);_var_0._().as(\"db1\").in(\"__hive_table.db\").back(\"db1\").and(_().has(\"hive_db.name\", T.eq, \"Reporting\")).toList()}") @@ -143,21 +143,21 @@ public class GraphBackedDiscoveryServiceTest extends BaseHiveRepositoryTest { {"hive_db, hive_process has name"}, //Invalid query {"hive_db where hive_db.name=\"Reporting\" and hive_db.createTime < " + System.currentTimeMillis()} */ - {"from hive_table", 6}, - {"hive_table", 6}, + {"from hive_table", 8}, + {"hive_table", 8}, {"hive_table isa Dimension", 3}, {"hive_column where hive_column isa PII", 6}, {"View is Dimension" , 2}, // {"hive_column where hive_column isa PII select hive_column.name", 6}, //Not working - ATLAS-175 - {"hive_column select hive_column.name", 27}, - {"hive_column select name", 27}, + {"hive_column select hive_column.name", 37}, + {"hive_column select name", 37}, {"hive_column where hive_column.name=\"customer_id\"", 4}, - {"from hive_table select hive_table.name", 6}, + {"from hive_table select hive_table.name", 8}, {"hive_db where (name = \"Reporting\")", 1}, {"hive_db where (name = \"Reporting\") select name as _col_0, owner as _col_1", 1}, {"hive_db where hive_db is JdbcAccess", 0}, //Not supposed to work - {"hive_db hive_table", 6}, - {"hive_db where hive_db has name", 2}, + {"hive_db hive_table", 8}, + {"hive_db where hive_db has name", 3}, {"hive_db as db1 hive_table where (db1.name = \"Reporting\")", 0}, //Not working -> ATLAS-145 {"hive_db where (name = \"Reporting\") select name as _col_0, (createTime + 1) as _col_1 ", 1}, {"hive_table where (name = \"sales_fact\" and createTime > \"2014-01-01\" ) select name as _col_0, createTime as _col_1 ", 1}, @@ -178,9 +178,10 @@ public class GraphBackedDiscoveryServiceTest extends BaseHiveRepositoryTest { // trait searches {"Dimension", 5}, {"JdbcAccess", 2}, - {"ETL", 2}, + {"ETL", 3}, {"Metric", 5}, {"PII", 6}, + {"`Log Data`", 4}, /* Lineage queries are fired through ClosureQuery and are tested through HiveLineageJerseyResourceIt in webapp module. Commenting out the below queries since DSL to Gremlin parsing/translation fails with lineage queries when there are array types http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/30a2ec1f/webapp/src/main/java/org/apache/atlas/examples/QuickStart.java ---------------------------------------------------------------------- diff --git a/webapp/src/main/java/org/apache/atlas/examples/QuickStart.java b/webapp/src/main/java/org/apache/atlas/examples/QuickStart.java index 1232f8f..767f286 100755 --- a/webapp/src/main/java/org/apache/atlas/examples/QuickStart.java +++ b/webapp/src/main/java/org/apache/atlas/examples/QuickStart.java @@ -89,7 +89,7 @@ public class QuickStart { private static final String[] TYPES = {DATABASE_TYPE, TABLE_TYPE, STORAGE_DESC_TYPE, COLUMN_TYPE, LOAD_PROCESS_TYPE, VIEW_TYPE, "JdbcAccess", - "ETL", "Metric", "PII", "Fact", "Dimension"}; + "ETL", "Metric", "PII", "Fact", "Dimension", "Log Data"}; private final AtlasClient metadataServiceClient; @@ -162,8 +162,10 @@ public class QuickStart { HierarchicalTypeDefinition<TraitType> jdbcTraitDef = TypesUtil.createTraitTypeDef("JdbcAccess", null); + HierarchicalTypeDefinition<TraitType> logTraitDef = TypesUtil.createTraitTypeDef("Log Data", null); + return TypesUtil.getTypesDef(ImmutableList.<EnumTypeDefinition>of(), ImmutableList.<StructTypeDefinition>of(), - ImmutableList.of(dimTraitDef, factTraitDef, piiTraitDef, metricTraitDef, etlTraitDef, jdbcTraitDef), + ImmutableList.of(dimTraitDef, factTraitDef, piiTraitDef, metricTraitDef, etlTraitDef, jdbcTraitDef, logTraitDef), ImmutableList.of(dbClsDef, storageDescClsDef, columnClsDef, tblClsDef, loadProcessClsDef, viewClsDef)); } @@ -195,6 +197,10 @@ public class QuickStart { rawColumn("customer_id", "int", "customer id", "PII"), rawColumn("sales", "double", "product id", "Metric")); + List<Referenceable> logFactColumns = ImmutableList + .of(rawColumn("time_id", "int", "time id"), rawColumn("app_id", "int", "app id"), + rawColumn("machine_id", "int", "machine id"), rawColumn("log", "string", "log data", "Log Data")); + Id salesFact = table("sales_fact", "sales fact table", salesDB, sd, "Joe", "Managed", salesFactColumns, "Fact"); List<Referenceable> productDimColumns = ImmutableList @@ -225,10 +231,16 @@ public class QuickStart { Id reportingDB = database("Reporting", "reporting database", "Jane BI", "hdfs://host:8000/apps/warehouse/reporting"); + Id logDB = database("Logging", "logging database", "Tim ETL", "hdfs://host:8000/apps/warehouse/logging"); + Id salesFactDaily = table("sales_fact_daily_mv", "sales fact daily materialized view", reportingDB, sd, "Joe BI", "Managed", salesFactColumns, "Metric"); + Id loggingFactDaily = + table("log_fact_daily_mv", "log fact daily materialized view", logDB, sd, "Tim ETL", "Managed", + logFactColumns, "Log Data"); + loadProcess("loadSalesDaily", "hive query for daily summary", "John ETL", ImmutableList.of(salesFact, timeDim), ImmutableList.of(salesFactDaily), "create table as select ", "plan", "id", "graph", "ETL"); @@ -242,6 +254,13 @@ public class QuickStart { loadProcess("loadSalesMonthly", "hive query for monthly summary", "John ETL", ImmutableList.of(salesFactDaily), ImmutableList.of(salesFactMonthly), "create table as select ", "plan", "id", "graph", "ETL"); + + Id loggingFactMonthly = + table("logging_fact_monthly_mv", "logging fact monthly materialized view", logDB, sd, "Tim ETL", + "Managed", logFactColumns, "Log Data"); + + loadProcess("loadLogsMonthly", "hive query for monthly summary", "Tim ETL", ImmutableList.of(loggingFactDaily), + ImmutableList.of(loggingFactMonthly), "create table as select ", "plan", "id", "graph", "ETL"); } private Id createInstance(Referenceable referenceable) throws Exception { @@ -377,7 +396,7 @@ public class QuickStart { // trait searches "Dimension", /*"Fact", - todo: does not work*/ - "JdbcAccess", "ETL", "Metric", "PII", + "JdbcAccess", "ETL", "Metric", "PII", "`Log Data`", /* // Lineage - todo - fix this, its not working "Table hive_process outputTables",
