This is an automated email from the ASF dual-hosted git repository. nixon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/atlas.git
The following commit(s) were added to refs/heads/master by this push: new 289c87a ATLAS-3451 Export API connected export mismatch for lineage of entities from different DBs 289c87a is described below commit 289c87a67a2307ea3d1b3bdceee7bc322f35a291 Author: nikhilbonte <nikhil.bo...@freestoneinfotech.com> AuthorDate: Mon Oct 7 19:20:10 2019 +0530 ATLAS-3451 Export API connected export mismatch for lineage of entities from different DBs Signed-off-by: nixonrodrigues <ni...@apache.org> --- .../impexp/RelationshipAttributesExtractor.java | 79 ++++-- .../RelationshipAttributesExtractorTest.java | 65 ++++- .../resources/json/entities/column-lineage.json | 89 +++++++ .../src/test/resources/json/entities/db1.json | 27 ++ .../src/test/resources/json/entities/db2.json | 27 ++ .../resources/json/entities/table-lineage.json | 240 ++++++++++++++++++ .../src/test/resources/json/entities/table1.json | 276 +++++++++++++++++++++ .../src/test/resources/json/entities/table2.json | 276 +++++++++++++++++++++ 8 files changed, 1049 insertions(+), 30 deletions(-) diff --git a/repository/src/main/java/org/apache/atlas/repository/impexp/RelationshipAttributesExtractor.java b/repository/src/main/java/org/apache/atlas/repository/impexp/RelationshipAttributesExtractor.java index d609071..d8b0a20 100644 --- a/repository/src/main/java/org/apache/atlas/repository/impexp/RelationshipAttributesExtractor.java +++ b/repository/src/main/java/org/apache/atlas/repository/impexp/RelationshipAttributesExtractor.java @@ -19,7 +19,9 @@ package org.apache.atlas.repository.impexp; import org.apache.atlas.model.instance.AtlasEntity; import org.apache.atlas.model.instance.AtlasRelatedObjectId; +import org.apache.atlas.model.typedef.AtlasBaseTypeDef; import org.apache.atlas.model.typedef.AtlasEntityDef; +import org.apache.atlas.repository.impexp.ExportService.TraversalDirection; import org.apache.atlas.type.AtlasTypeRegistry; import org.apache.atlas.type.AtlasTypeUtil; import org.slf4j.Logger; @@ -29,6 +31,12 @@ import java.util.ArrayList; import java.util.Collection; import java.util.List; +import static org.apache.atlas.repository.impexp.ExportService.ExportContext; +import static org.apache.atlas.repository.impexp.ExportService.TraversalDirection.BOTH; +import static org.apache.atlas.repository.impexp.ExportService.TraversalDirection.INWARD; +import static org.apache.atlas.repository.impexp.ExportService.TraversalDirection.OUTWARD; +import static org.apache.atlas.repository.impexp.ExportService.TraversalDirection.UNKNOWN; + public class RelationshipAttributesExtractor implements ExtractStrategy { private static final Logger LOG = LoggerFactory.getLogger(RelationshipAttributesExtractor.class); @@ -40,7 +48,7 @@ public class RelationshipAttributesExtractor implements ExtractStrategy { } @Override - public void fullFetch(AtlasEntity entity, ExportService.ExportContext context) { + public void fullFetch(AtlasEntity entity, ExportContext context) { if (LOG.isDebugEnabled()) { LOG.debug("==> fullFetch({}): guidsToProcess {}", AtlasTypeUtil.getAtlasObjectId(entity), context.guidsToProcess.size()); } @@ -53,7 +61,7 @@ public class RelationshipAttributesExtractor implements ExtractStrategy { if (context.skipLineage && isLineage) { continue; } - context.addToBeProcessed(isLineage, ar.getGuid(), ExportService.TraversalDirection.BOTH); + context.addToBeProcessed(isLineage, ar.getGuid(), BOTH); } if (LOG.isDebugEnabled()) { @@ -62,31 +70,24 @@ public class RelationshipAttributesExtractor implements ExtractStrategy { } @Override - public void connectedFetch(AtlasEntity entity, ExportService.ExportContext context) { + public void connectedFetch(AtlasEntity entity, ExportContext context) { if (LOG.isDebugEnabled()) { - LOG.debug("==> connectedFetch({}): guidsToProcess {} isSkipConnectedFetch :{}", AtlasTypeUtil.getAtlasObjectId(entity), context.guidsToProcess.size(), context.isSkipConnectedFetch); + LOG.debug("==> connectedFetch({}): guidsToProcess {}", AtlasTypeUtil.getAtlasObjectId(entity), context.guidsToProcess.size()); } - List<AtlasRelatedObjectId> atlasRelatedObjectIdList = getRelatedObjectIds(entity); - for (AtlasRelatedObjectId ar : atlasRelatedObjectIdList) { - boolean isLineage = isLineageType(ar.getTypeName()); + ExportService.TraversalDirection direction = context.guidDirection.get(entity.getGuid()); - if (context.skipLineage && isLineage) { - continue; + if (direction == null || direction == UNKNOWN) { + addToBeProcessed(entity, context, OUTWARD, INWARD); + } else { + if (isLineageType(entity.getTypeName())) { + direction = OUTWARD; } - if (!context.isSkipConnectedFetch || isLineage) { - context.addToBeProcessed(isLineage, ar.getGuid(), ExportService.TraversalDirection.BOTH); - } - } - - if(isLineageType(entity.getTypeName())){ - context.isSkipConnectedFetch = false; - }else{ - context.isSkipConnectedFetch = true; + addToBeProcessed(entity, context, direction); } if (LOG.isDebugEnabled()) { - LOG.debug("==> connectedFetch({}): guidsToProcess {}, isSkipConnectedFetch :{}", AtlasTypeUtil.getAtlasObjectId(entity), context.guidsToProcess.size(), context.isSkipConnectedFetch); + LOG.debug("==> connectedFetch({}): guidsToProcess {}", AtlasTypeUtil.getAtlasObjectId(entity), context.guidsToProcess.size()); } } @@ -94,9 +95,46 @@ public class RelationshipAttributesExtractor implements ExtractStrategy { public void close() { } + private void addToBeProcessed(AtlasEntity entity, ExportContext context, TraversalDirection... directions) { + if (directions == null || directions.length == 0) { + return; + } + + boolean isLineageEntity = isLineageType(entity.getTypeName()); + List<AtlasRelatedObjectId> relatedObjectIds = getRelatedObjectIds(entity); + + for (TraversalDirection direction : directions) { + for (AtlasRelatedObjectId id : relatedObjectIds) { + String guid = id.getGuid(); + TraversalDirection currentDirection = context.guidDirection.get(guid); + boolean isLineageId = isLineageType(id.getTypeName()); + TraversalDirection edgeDirection = getRelationshipEdgeDirection(id, entity.getTypeName()); + + if (context.skipLineage && isLineageId) continue; + + if (!isLineageEntity && direction != edgeDirection || + isLineageEntity && direction == edgeDirection) + continue; + + if (currentDirection == null) { + context.addToBeProcessed(isLineageId, guid, direction); + + } else if (currentDirection == OUTWARD && direction == INWARD) { + context.guidsProcessed.remove(guid); + context.addToBeProcessed(isLineageId, guid, direction); + } + } + } + } + + private TraversalDirection getRelationshipEdgeDirection(AtlasRelatedObjectId relatedObjectId, String entityTypeName) { + boolean isOutEdge = typeRegistry.getRelationshipDefByName(relatedObjectId.getRelationshipType()).getEndDef1().getType().equals(entityTypeName); + return isOutEdge ? OUTWARD : INWARD; + } + private boolean isLineageType(String typeName) { AtlasEntityDef entityDef = typeRegistry.getEntityDefByName(typeName); - return entityDef.getSuperTypes().contains("Process"); + return entityDef.getSuperTypes().contains(AtlasBaseTypeDef.ATLAS_TYPE_PROCESS); } private List<AtlasRelatedObjectId> getRelatedObjectIds(AtlasEntity entity) { @@ -109,7 +147,6 @@ public class RelationshipAttributesExtractor implements ExtractStrategy { relatedObjectIds.addAll((List) o); } } - return relatedObjectIds; } } diff --git a/repository/src/test/java/org/apache/atlas/repository/impexp/RelationshipAttributesExtractorTest.java b/repository/src/test/java/org/apache/atlas/repository/impexp/RelationshipAttributesExtractorTest.java index 920fc28..a1b512f 100644 --- a/repository/src/test/java/org/apache/atlas/repository/impexp/RelationshipAttributesExtractorTest.java +++ b/repository/src/test/java/org/apache/atlas/repository/impexp/RelationshipAttributesExtractorTest.java @@ -26,6 +26,7 @@ import org.apache.atlas.model.impexp.AtlasExportResult; import org.apache.atlas.model.instance.AtlasEntity; import org.apache.atlas.model.instance.AtlasObjectId; import org.apache.atlas.repository.graph.AtlasGraphProvider; +import org.apache.atlas.repository.store.graph.v2.AtlasEntityStoreV2; import org.apache.atlas.runner.LocalSolrRunner; import org.apache.atlas.store.AtlasTypeDefStore; import org.apache.atlas.type.AtlasTypeRegistry; @@ -48,26 +49,45 @@ import java.util.ArrayList; import java.util.HashMap; import static org.apache.atlas.graph.GraphSandboxUtil.useLocalSolr; -import static org.apache.atlas.repository.impexp.ZipFileResourceTestUtils.*; -import static org.testng.Assert.*; +import static org.apache.atlas.repository.impexp.ZipFileResourceTestUtils.getZipSource; +import static org.apache.atlas.repository.impexp.ZipFileResourceTestUtils.loadModelFromJson; +import static org.apache.atlas.repository.impexp.ZipFileResourceTestUtils.runImportWithNoParameters; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertNotNull; import static org.testng.Assert.assertTrue; @Guice(modules = TestModules.TestOnlyModule.class) -public class RelationshipAttributesExtractorTest { +public class RelationshipAttributesExtractorTest extends ExportImportTestBase { - private static final String EXPORT_FULL = "full"; + private static final String EXPORT_FULL = "full"; private static final String EXPORT_CONNECTED = "connected"; - private static final String QUALIFIED_NAME_DB = "db_test_1@02052019"; - private static final String QUALIFIED_NAME_TABLE_LINEAGE = "db_test_1.test_tbl_ctas_2@02052019"; + private static final String ENTITIES_SUB_DIR = "entities"; + + private static final String QUALIFIED_NAME_DB = "db_test_1@02052019"; + private static final String QUALIFIED_NAME_TABLE_LINEAGE = "db_test_1.test_tbl_ctas_2@02052019"; private static final String QUALIFIED_NAME_TABLE_NON_LINEAGE = "db_test_1.test_tbl_1@02052019"; - private static final String GUID_DB = "f0b72ab4-7452-4e42-ac74-2aee7728cce4"; - private static final String GUID_TABLE_1 = "4d5adf00-2c9b-4877-ad23-c41fd7319150"; - private static final String GUID_TABLE_2 = "8d0b834c-61ce-42d8-8f66-6fa51c36bccb"; + private static final String GUID_DB = "f0b72ab4-7452-4e42-ac74-2aee7728cce4"; + private static final String GUID_TABLE_1 = "4d5adf00-2c9b-4877-ad23-c41fd7319150"; + private static final String GUID_TABLE_2 = "8d0b834c-61ce-42d8-8f66-6fa51c36bccb"; private static final String GUID_TABLE_CTAS_2 = "eaec545b-3ac7-4e1b-a497-bd4a2b6434a2"; private static final String GUID_HIVE_PROCESS = "bd3138b2-f29e-4226-b859-de25eaa1c18b"; + private static final String DB1 = "db1"; + private static final String DB2 = "db2"; + private static final String TBL1 = "table1"; + private static final String TBL2 = "table2"; + private static final String HIVE_PROCESS = "table-lineage"; + private static final String HIVE_COLUMN_LINEAGE = "column-lineage"; + + private static final String GUID_DB1 = "1c4e939e-ff6b-4229-92a4-b60c00deb547"; + private static final String GUID_DB2 = "77c3bccf-ca3f-42e7-b2dd-f5a35f63eea6"; + private static final String GUID_TBL1 = "3f6c02be-61e8-4dae-a7b8-cc37f289ce6e"; + private static final String GUID_TBL2 = "b8cbc39f-4467-429b-a7fe-4ba2c28cceca"; + private static final String GUID_PROCESS = "caf7f40a-b334-4f9e-9bf2-f24ce43db47f"; + private static final String GUID_COLUMN_LINEAGE = "d4cf482b-423c-4c88-9bd1-701477ed6fd8"; + @Inject private ImportService importService; @@ -80,6 +100,9 @@ public class RelationshipAttributesExtractorTest { @Inject private ExportService exportService; + @Inject + private AtlasEntityStoreV2 entityStore; + @BeforeClass public void setup() throws IOException, AtlasBaseException { loadBaseModel(); @@ -183,6 +206,22 @@ public class RelationshipAttributesExtractorTest { verifyTableWithoutLineageSkipLineageConn(source); } + @Test + public void interDbLineageConnectedExportTest() throws Exception { + setupInterDbLineageData(); + + ZipSource source = runExport(getExportRequestForHiveTable("db_1.table_1@cl1", EXPORT_CONNECTED, false)); + assertInterDbLineageConnectedExport(source); + } + + private void setupInterDbLineageData() { + RequestContext.get().setImportInProgress(true); + createEntities(entityStore, ENTITIES_SUB_DIR, new String[]{DB1, DB2, TBL1, TBL2, HIVE_PROCESS, HIVE_COLUMN_LINEAGE}); + final String[] entityGuids = {GUID_DB1, GUID_DB2, GUID_TBL1, GUID_TBL2, GUID_PROCESS, GUID_COLUMN_LINEAGE}; + verifyCreatedEntities(entityStore, entityGuids, 6); + RequestContext.get().setImportInProgress(false); + } + private void loadHiveModel() throws IOException, AtlasBaseException { loadModelFromJson("1000-Hadoop/1030-hive_model.json", typeDefStore, typeRegistry); } @@ -334,6 +373,14 @@ public class RelationshipAttributesExtractorTest { verifyExpectedEntities(getFileNames(zipSource), GUID_DB, GUID_TABLE_1); } + private void assertInterDbLineageConnectedExport(ZipSource zipSource) { + assertNotNull(zipSource.getCreationOrder()); + assertEquals(zipSource.getCreationOrder().size(), 5); + + assertTrue(zipSource.getCreationOrder().contains(GUID_PROCESS)); + verifyExpectedEntities(getFileNames(zipSource), GUID_DB1, GUID_DB2, GUID_TBL1, GUID_TBL2, GUID_PROCESS); + } + private void verifyExpectedEntities(List<String> fileNames, String... guids){ assertEquals(fileNames.size(), guids.length); for (String guid : guids) { diff --git a/repository/src/test/resources/json/entities/column-lineage.json b/repository/src/test/resources/json/entities/column-lineage.json new file mode 100644 index 0000000..a98122d --- /dev/null +++ b/repository/src/test/resources/json/entities/column-lineage.json @@ -0,0 +1,89 @@ +{ + "entity": { + "attributes": { + "depenendencyType": "SIMPLE", + "description": null, + "expression": null, + "inputs": [ + { + "guid": "9f6a3c7d-2b93-485b-b7cd-47775c50d6fb", + "typeName": "hive_column", + "uniqueAttributes": { + "qualifiedName": "db_1.table_1.id@cl1" + } + } + ], + "name": "db_2.table_2@cl1:1569932823000:id", + "outputs": [ + { + "guid": "d3ba7485-6b77-4f32-b3c9-e3d45d0949c0", + "typeName": "hive_column", + "uniqueAttributes": { + "qualifiedName": "db_2.table_2.id@cl1" + } + } + ], + "owner": null, + "qualifiedName": "db_2.table_2@cl1:1569932823000:id", + "query": { + "guid": "9827e05b-7f56-4eeb-8ce5-2ebc2f1bd001", + "typeName": "hive_process" + }, + "replicatedFrom": null, + "replicatedTo": null + }, + "createTime": 1570087822414, + "createdBy": "hive", + "guid": "d4cf482b-423c-4c88-9bd1-701477ed6fd8", + "isIncomplete": false, + "relationshipAttributes": { + "inputs": [ + { + "displayText": "id", + "entityStatus": "ACTIVE", + "guid": "9f6a3c7d-2b93-485b-b7cd-47775c50d6fb", + "relationshipAttributes": { + "typeName": "dataset_process_inputs" + }, + "relationshipGuid": "4cce3e95-f780-4d8f-ab41-74d64dc91e23", + "relationshipStatus": "ACTIVE", + "relationshipType": "dataset_process_inputs", + "typeName": "hive_column" + } + ], + "meanings": [], + "outputs": [ + { + "displayText": "id", + "entityStatus": "ACTIVE", + "guid": "d3ba7485-6b77-4f32-b3c9-e3d45d0949c0", + "relationshipAttributes": { + "typeName": "process_dataset_outputs" + }, + "relationshipGuid": "b0054651-7003-4de9-a9f6-49ef7c0b0616", + "relationshipStatus": "ACTIVE", + "relationshipType": "process_dataset_outputs", + "typeName": "hive_column" + } + ], + "query": { + "displayText": "create table db_2.table_2 as select * from db_1.table_1", + "entityStatus": "ACTIVE", + "guid": "9827e05b-7f56-4eeb-8ce5-2ebc2f1bd001", + "relationshipAttributes": { + "typeName": "hive_process_column_lineage" + }, + "relationshipGuid": "ad56ea7f-8556-421d-8979-7eaf79dd0050", + "relationshipStatus": "ACTIVE", + "relationshipType": "hive_process_column_lineage", + "typeName": "hive_process" + } + }, + "status": "ACTIVE", + "typeName": "hive_column_lineage", + "updateTime": 1570087822414, + "updatedBy": "hive", + "version": 0 + }, + "referredEntities": {} +} \ No newline at end of file diff --git a/repository/src/test/resources/json/entities/db1.json b/repository/src/test/resources/json/entities/db1.json new file mode 100644 index 0000000..c1a670f --- /dev/null +++ b/repository/src/test/resources/json/entities/db1.json @@ -0,0 +1,27 @@ +{ + "entity": { + "attributes": { + "clusterName": "cl1", + "description": null, + "location": "hdfs://localhost:8020/warehouse/tablespace/managed/hive/db_1.db", + "name": "db_1", + "owner": "hive", + "ownerType": "USER", + "parameters": {}, + "qualifiedName": "db_1@cl1", + "replicatedFrom": null, + "replicatedTo": null + }, + "createTime": 1570444587942, + "createdBy": "hive", + "guid": "1c4e939e-ff6b-4229-92a4-b60c00deb547", + "isIncomplete": false, + "relationshipAttributes": {}, + "status": "ACTIVE", + "typeName": "hive_db", + "updateTime": 1570444591401, + "updatedBy": "hive", + "version": 0 + }, + "referredEntities": {} +} \ No newline at end of file diff --git a/repository/src/test/resources/json/entities/db2.json b/repository/src/test/resources/json/entities/db2.json new file mode 100644 index 0000000..d71c707 --- /dev/null +++ b/repository/src/test/resources/json/entities/db2.json @@ -0,0 +1,27 @@ +{ + "entity": { + "attributes": { + "clusterName": "cl1", + "description": null, + "location": "hdfs://localhost:8020/warehouse/tablespace/managed/hive/db_2.db", + "name": "db_2", + "owner": "hive", + "ownerType": "USER", + "parameters": {}, + "qualifiedName": "db_2@cl1", + "replicatedFrom": null, + "replicatedTo": null + }, + "createTime": 1570444593617, + "createdBy": "hive", + "guid": "77c3bccf-ca3f-42e7-b2dd-f5a35f63eea6", + "isIncomplete": false, + "relationshipAttributes": {}, + "status": "ACTIVE", + "typeName": "hive_db", + "updateTime": 1570444602640, + "updatedBy": "hive", + "version": 0 + }, + "referredEntities": {} +} \ No newline at end of file diff --git a/repository/src/test/resources/json/entities/table-lineage.json b/repository/src/test/resources/json/entities/table-lineage.json new file mode 100644 index 0000000..40834bc --- /dev/null +++ b/repository/src/test/resources/json/entities/table-lineage.json @@ -0,0 +1,240 @@ +{ + "entity": { + "attributes": { + "clusterName": null, + "description": null, + "endTime": 0, + "inputs": [ + { + "guid": "3f6c02be-61e8-4dae-a7b8-cc37f289ce6e", + "typeName": "hive_table", + "uniqueAttributes": { + "qualifiedName": "db_1.table_1@cl1" + } + } + ], + "name": "create table db_2.table_2 as select * from db_1.table_1", + "operationType": "CREATETABLE_AS_SELECT", + "outputs": [ + { + "guid": "b8cbc39f-4467-429b-a7fe-4ba2c28cceca", + "typeName": "hive_table", + "uniqueAttributes": { + "qualifiedName": "db_2.table_2@cl1" + } + } + ], + "owner": null, + "qualifiedName": "db_2.table_2@cl1:1570444602000", + "queryGraph": null, + "queryId": "", + "queryPlan": "Not Supported", + "queryText": "", + "recentQueries": [ + "create table db_2.table_2 as select * from db_1.table_1" + ], + "replicatedFrom": null, + "replicatedTo": null, + "startTime": 0, + "userName": "" + }, + "createTime": 1570444602640, + "createdBy": "hive", + "guid": "caf7f40a-b334-4f9e-9bf2-f24ce43db47f", + "isIncomplete": false, + "relationshipAttributes": { + "columnLineages": [ + { + "displayText": "db_2.table_2@cl1:1570444602000:id", + "entityStatus": "ACTIVE", + "guid": "52b4662b-999d-4f63-a977-89bc9ea8ff46", + "relationshipAttributes": { + "typeName": "hive_process_column_lineage" + }, + "relationshipGuid": "26c9dec4-f3c9-4a17-82df-7156d68568f4", + "relationshipStatus": "ACTIVE", + "relationshipType": "hive_process_column_lineage", + "typeName": "hive_column_lineage" + } + ], + "inputs": [ + { + "displayText": "table_1", + "entityStatus": "ACTIVE", + "guid": "3f6c02be-61e8-4dae-a7b8-cc37f289ce6e", + "relationshipAttributes": { + "typeName": "dataset_process_inputs" + }, + "relationshipGuid": "69a92de6-2ff0-48bf-821a-5be9b3605aea", + "relationshipStatus": "ACTIVE", + "relationshipType": "dataset_process_inputs", + "typeName": "hive_table" + } + ], + "meanings": [], + "outputs": [ + { + "displayText": "table_2", + "entityStatus": "ACTIVE", + "guid": "b8cbc39f-4467-429b-a7fe-4ba2c28cceca", + "relationshipAttributes": { + "typeName": "process_dataset_outputs" + }, + "relationshipGuid": "d38c4409-65c4-4ba4-99bb-c655d8be8197", + "relationshipStatus": "ACTIVE", + "relationshipType": "process_dataset_outputs", + "typeName": "hive_table" + } + ], + "processExecutions": [ + { + "displayText": "create table db_2.table_2 as select * from db_1.table_1:1570444589240", + "entityStatus": "ACTIVE", + "guid": "10a024a4-cb38-450b-a601-0106ffb758ad", + "relationshipAttributes": { + "typeName": "hive_process_process_executions" + }, + "relationshipGuid": "e40f8127-3415-4c3e-ac7e-424d3349f116", + "relationshipStatus": "ACTIVE", + "relationshipType": "hive_process_process_executions", + "typeName": "hive_process_execution" + } + ] + }, + "status": "ACTIVE", + "typeName": "hive_process", + "updateTime": 1570444602640, + "updatedBy": "hive", + "version": 0 + }, + "referredEntities": { + "10a024a4-cb38-450b-a601-0106ffb758ad": { + "attributes": { + "description": null, + "endTime": 1570444602623, + "hostName": "nb-atl-2409-2.openstacklocal", + "name": "create table db_2.table_2 as select * from db_1.table_1:1570444589240", + "owner": null, + "qualifiedName": "db_2.table_2@cl1:1570444602000:1570444589240:1570444602623", + "queryGraph": null, + "queryId": "hive_20191007103629_e6f69952-08e4-4b0c-a3b8-faf42d819ff3", + "queryPlan": "Not Supported", + "queryText": "create table db_2.table_2 as select * from db_1.table_1", + "replicatedFrom": null, + "replicatedTo": null, + "startTime": 1570444589240, + "userName": "hive" + }, + "createTime": 1570444602640, + "createdBy": "hive", + "guid": "10a024a4-cb38-450b-a601-0106ffb758ad", + "isIncomplete": false, + "relationshipAttributes": { + "meanings": [], + "process": { + "displayText": "create table db_2.table_2 as select * from db_1.table_1", + "entityStatus": "ACTIVE", + "guid": "caf7f40a-b334-4f9e-9bf2-f24ce43db47f", + "relationshipAttributes": { + "typeName": "hive_process_process_executions" + }, + "relationshipGuid": "e40f8127-3415-4c3e-ac7e-424d3349f116", + "relationshipStatus": "ACTIVE", + "relationshipType": "hive_process_process_executions", + "typeName": "hive_process" + } + }, + "status": "ACTIVE", + "typeName": "hive_process_execution", + "updateTime": 1570444602640, + "updatedBy": "hive", + "version": 0 + }, + "52b4662b-999d-4f63-a977-89bc9ea8ff46": { + "attributes": { + "depenendencyType": "SIMPLE", + "description": null, + "expression": null, + "inputs": [ + { + "guid": "9f6a3c7d-2b93-485b-b7cd-47775c50d6fb", + "typeName": "hive_column", + "uniqueAttributes": { + "qualifiedName": "db_1.table_1.id@cl1" + } + } + ], + "name": "db_2.table_2@cl1:1570444602000:id", + "outputs": [ + { + "guid": "d3ba7485-6b77-4f32-b3c9-e3d45d0949c0", + "typeName": "hive_column", + "uniqueAttributes": { + "qualifiedName": "db_2.table_2.id@cl1" + } + } + ], + "owner": null, + "qualifiedName": "db_2.table_2@cl1:1570444602000:id", + "query": { + "guid": "caf7f40a-b334-4f9e-9bf2-f24ce43db47f", + "typeName": "hive_process" + }, + "replicatedFrom": null, + "replicatedTo": null + }, + "createTime": 1570444602640, + "createdBy": "hive", + "guid": "52b4662b-999d-4f63-a977-89bc9ea8ff46", + "isIncomplete": false, + "relationshipAttributes": { + "inputs": [ + { + "displayText": "id", + "entityStatus": "ACTIVE", + "guid": "9f6a3c7d-2b93-485b-b7cd-47775c50d6fb", + "relationshipAttributes": { + "typeName": "dataset_process_inputs" + }, + "relationshipGuid": "82b2e578-e707-4c62-9e70-e94bfbe5caf2", + "relationshipStatus": "ACTIVE", + "relationshipType": "dataset_process_inputs", + "typeName": "hive_column" + } + ], + "meanings": [], + "outputs": [ + { + "displayText": "id", + "entityStatus": "ACTIVE", + "guid": "d3ba7485-6b77-4f32-b3c9-e3d45d0949c0", + "relationshipAttributes": { + "typeName": "process_dataset_outputs" + }, + "relationshipGuid": "4c42c8a1-a0c5-4125-ace3-8fc59312b6dd", + "relationshipStatus": "ACTIVE", + "relationshipType": "process_dataset_outputs", + "typeName": "hive_column" + } + ], + "query": { + "displayText": "create table db_2.table_2 as select * from db_1.table_1", + "entityStatus": "ACTIVE", + "guid": "caf7f40a-b334-4f9e-9bf2-f24ce43db47f", + "relationshipAttributes": { + "typeName": "hive_process_column_lineage" + }, + "relationshipGuid": "26c9dec4-f3c9-4a17-82df-7156d68568f4", + "relationshipStatus": "ACTIVE", + "relationshipType": "hive_process_column_lineage", + "typeName": "hive_process" + } + }, + "status": "ACTIVE", + "typeName": "hive_column_lineage", + "updateTime": 1570444602640, + "updatedBy": "hive", + "version": 0 + } + } +} \ No newline at end of file diff --git a/repository/src/test/resources/json/entities/table1.json b/repository/src/test/resources/json/entities/table1.json new file mode 100644 index 0000000..a1411c1 --- /dev/null +++ b/repository/src/test/resources/json/entities/table1.json @@ -0,0 +1,276 @@ +{ + "entity": { + "attributes": { + "aliases": null, + "columns": [ + { + "guid": "9f6a3c7d-2b93-485b-b7cd-47775c50d6fb", + "typeName": "hive_column" + } + ], + "comment": null, + "createTime": 1570444588000, + "db": { + "guid": "1c4e939e-ff6b-4229-92a4-b60c00deb547", + "typeName": "hive_db" + }, + "description": null, + "lastAccessTime": 1570444588000, + "name": "table_1", + "owner": "hive", + "parameters": { + "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"id\":\"true\"}}", + "bucketing_version": "2", + "numFiles": "0", + "numRows": "0", + "rawDataSize": "0", + "totalSize": "0", + "transactional": "true", + "transactional_properties": "default", + "transient_lastDdlTime": "1570444588" + }, + "partitionKeys": [], + "qualifiedName": "db_1.table_1@cl1", + "replicatedFrom": null, + "replicatedTo": null, + "retention": 0, + "sd": { + "guid": "52e8353a-36e5-475f-8626-908a07230dfd", + "typeName": "hive_storagedesc" + }, + "tableType": "MANAGED_TABLE", + "temporary": false, + "viewExpandedText": null, + "viewOriginalText": null + }, + "createTime": 1570444591401, + "createdBy": "hive", + "guid": "3f6c02be-61e8-4dae-a7b8-cc37f289ce6e", + "isIncomplete": false, + "relationshipAttributes": { + "columns": [ + { + "displayText": "id", + "entityStatus": "ACTIVE", + "guid": "9f6a3c7d-2b93-485b-b7cd-47775c50d6fb", + "relationshipAttributes": { + "typeName": "hive_table_columns" + }, + "relationshipGuid": "f9e10d9a-0021-485e-8f49-b015b2223faf", + "relationshipStatus": "ACTIVE", + "relationshipType": "hive_table_columns", + "typeName": "hive_column" + } + ], + "db": { + "displayText": "db_1", + "entityStatus": "ACTIVE", + "guid": "1c4e939e-ff6b-4229-92a4-b60c00deb547", + "relationshipAttributes": { + "typeName": "hive_table_db" + }, + "relationshipGuid": "1108f124-2dc9-4406-b063-a9751205308d", + "relationshipStatus": "ACTIVE", + "relationshipType": "hive_table_db", + "typeName": "hive_db" + }, + "ddlQueries": [ + { + "displayText": "db_1.table_1@cl1:1570444588003", + "entityStatus": "ACTIVE", + "guid": "942802d2-0310-40f3-ab0a-8e35a6e43b53", + "relationshipAttributes": { + "typeName": "hive_table_ddl_queries" + }, + "relationshipGuid": "30b3a990-c362-4044-9e78-5a7be7f35307", + "relationshipStatus": "ACTIVE", + "relationshipType": "hive_table_ddl_queries", + "typeName": "hive_table_ddl" + } + ], + "inputToProcesses": [ + { + "displayText": "create table db_2.table_2 as select * from db_1.table_1", + "entityStatus": "ACTIVE", + "guid": "caf7f40a-b334-4f9e-9bf2-f24ce43db47f", + "relationshipAttributes": { + "typeName": "dataset_process_inputs" + }, + "relationshipGuid": "69a92de6-2ff0-48bf-821a-5be9b3605aea", + "relationshipStatus": "ACTIVE", + "relationshipType": "dataset_process_inputs", + "typeName": "hive_process" + } + ], + "meanings": [], + "outputFromProcesses": [], + "partitionKeys": [], + "schema": [], + "sd": { + "displayText": "db_1.table_1@cl1_storage", + "entityStatus": "ACTIVE", + "guid": "52e8353a-36e5-475f-8626-908a07230dfd", + "relationshipAttributes": { + "typeName": "hive_table_storagedesc" + }, + "relationshipGuid": "5a40781f-edf1-4854-ba53-d94829039f4a", + "relationshipStatus": "ACTIVE", + "relationshipType": "hive_table_storagedesc", + "typeName": "hive_storagedesc" + } + }, + "status": "ACTIVE", + "typeName": "hive_table", + "updateTime": 1570444602640, + "updatedBy": "hive", + "version": 0 + }, + "referredEntities": { + "52e8353a-36e5-475f-8626-908a07230dfd": { + "attributes": { + "bucketCols": null, + "compressed": false, + "inputFormat": "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat", + "location": "hdfs://localhost:8020/warehouse/tablespace/managed/hive/db_1.db/table_1", + "numBuckets": -1, + "outputFormat": "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat", + "parameters": {}, + "qualifiedName": "db_1.table_1@cl1_storage", + "replicatedFrom": null, + "replicatedTo": null, + "serdeInfo": { + "attributes": { + "name": null, + "parameters": { + "serialization.format": "1" + }, + "serializationLib": "org.apache.hadoop.hive.ql.io.orc.OrcSerde" + }, + "typeName": "hive_serde" + }, + "sortCols": [], + "storedAsSubDirectories": false, + "table": { + "guid": "3f6c02be-61e8-4dae-a7b8-cc37f289ce6e", + "typeName": "hive_table" + } + }, + "createTime": 1570444591401, + "createdBy": "hive", + "guid": "52e8353a-36e5-475f-8626-908a07230dfd", + "isIncomplete": false, + "relationshipAttributes": { + "meanings": [], + "table": { + "displayText": "table_1", + "entityStatus": "ACTIVE", + "guid": "3f6c02be-61e8-4dae-a7b8-cc37f289ce6e", + "relationshipAttributes": { + "typeName": "hive_table_storagedesc" + }, + "relationshipGuid": "5a40781f-edf1-4854-ba53-d94829039f4a", + "relationshipStatus": "ACTIVE", + "relationshipType": "hive_table_storagedesc", + "typeName": "hive_table" + } + }, + "status": "ACTIVE", + "typeName": "hive_storagedesc", + "updateTime": 1570444591401, + "updatedBy": "hive", + "version": 0 + }, + "942802d2-0310-40f3-ab0a-8e35a6e43b53": { + "attributes": { + "execTime": 1570444588003, + "qualifiedName": "db_1.table_1@cl1:1570444588003", + "queryText": "create table db_1.table_1(id int)", + "replicatedFrom": null, + "replicatedTo": null, + "serviceType": "hive", + "userName": "hive" + }, + "createTime": 1570444591401, + "createdBy": "hive", + "guid": "942802d2-0310-40f3-ab0a-8e35a6e43b53", + "isIncomplete": false, + "relationshipAttributes": { + "meanings": [], + "table": { + "displayText": "table_1", + "entityStatus": "ACTIVE", + "guid": "3f6c02be-61e8-4dae-a7b8-cc37f289ce6e", + "relationshipAttributes": { + "typeName": "hive_table_ddl_queries" + }, + "relationshipGuid": "30b3a990-c362-4044-9e78-5a7be7f35307", + "relationshipStatus": "ACTIVE", + "relationshipType": "hive_table_ddl_queries", + "typeName": "hive_table" + } + }, + "status": "ACTIVE", + "typeName": "hive_table_ddl", + "updateTime": 1570444591401, + "updatedBy": "hive", + "version": 0 + }, + "9f6a3c7d-2b93-485b-b7cd-47775c50d6fb": { + "attributes": { + "comment": null, + "description": null, + "name": "id", + "owner": "hive", + "position": 0, + "qualifiedName": "db_1.table_1.id@cl1", + "replicatedFrom": null, + "replicatedTo": null, + "table": { + "guid": "3f6c02be-61e8-4dae-a7b8-cc37f289ce6e", + "typeName": "hive_table" + }, + "type": "int" + }, + "createTime": 1570444591401, + "createdBy": "hive", + "guid": "9f6a3c7d-2b93-485b-b7cd-47775c50d6fb", + "isIncomplete": false, + "relationshipAttributes": { + "inputToProcesses": [ + { + "displayText": "db_2.table_2@cl1:1570444602000:id", + "entityStatus": "ACTIVE", + "guid": "52b4662b-999d-4f63-a977-89bc9ea8ff46", + "relationshipAttributes": { + "typeName": "dataset_process_inputs" + }, + "relationshipGuid": "82b2e578-e707-4c62-9e70-e94bfbe5caf2", + "relationshipStatus": "ACTIVE", + "relationshipType": "dataset_process_inputs", + "typeName": "hive_column_lineage" + } + ], + "meanings": [], + "outputFromProcesses": [], + "schema": [], + "table": { + "displayText": "table_1", + "entityStatus": "ACTIVE", + "guid": "3f6c02be-61e8-4dae-a7b8-cc37f289ce6e", + "relationshipAttributes": { + "typeName": "hive_table_columns" + }, + "relationshipGuid": "f9e10d9a-0021-485e-8f49-b015b2223faf", + "relationshipStatus": "ACTIVE", + "relationshipType": "hive_table_columns", + "typeName": "hive_table" + } + }, + "status": "ACTIVE", + "typeName": "hive_column", + "updateTime": 1570444602640, + "updatedBy": "hive", + "version": 0 + } + } +} \ No newline at end of file diff --git a/repository/src/test/resources/json/entities/table2.json b/repository/src/test/resources/json/entities/table2.json new file mode 100644 index 0000000..5374d86 --- /dev/null +++ b/repository/src/test/resources/json/entities/table2.json @@ -0,0 +1,276 @@ +{ + "entity": { + "attributes": { + "aliases": null, + "columns": [ + { + "guid": "d3ba7485-6b77-4f32-b3c9-e3d45d0949c0", + "typeName": "hive_column" + } + ], + "comment": null, + "createTime": 1570444602000, + "db": { + "guid": "77c3bccf-ca3f-42e7-b2dd-f5a35f63eea6", + "typeName": "hive_db" + }, + "description": null, + "lastAccessTime": 1570444602000, + "name": "table_2", + "owner": "hive", + "parameters": { + "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"id\":\"true\"}}", + "bucketing_version": "2", + "numFiles": "0", + "numRows": "0", + "rawDataSize": "0", + "totalSize": "0", + "transactional": "true", + "transactional_properties": "default", + "transient_lastDdlTime": "1570444602" + }, + "partitionKeys": [], + "qualifiedName": "db_2.table_2@cl1", + "replicatedFrom": null, + "replicatedTo": null, + "retention": 0, + "sd": { + "guid": "403607a6-2092-43a8-8816-9d128592004f", + "typeName": "hive_storagedesc" + }, + "tableType": "MANAGED_TABLE", + "temporary": false, + "viewExpandedText": null, + "viewOriginalText": null + }, + "createTime": 1570444602640, + "createdBy": "hive", + "guid": "b8cbc39f-4467-429b-a7fe-4ba2c28cceca", + "isIncomplete": false, + "relationshipAttributes": { + "columns": [ + { + "displayText": "id", + "entityStatus": "ACTIVE", + "guid": "d3ba7485-6b77-4f32-b3c9-e3d45d0949c0", + "relationshipAttributes": { + "typeName": "hive_table_columns" + }, + "relationshipGuid": "a7c41412-35c9-408c-894b-aa58005b46ae", + "relationshipStatus": "ACTIVE", + "relationshipType": "hive_table_columns", + "typeName": "hive_column" + } + ], + "db": { + "displayText": "db_2", + "entityStatus": "ACTIVE", + "guid": "77c3bccf-ca3f-42e7-b2dd-f5a35f63eea6", + "relationshipAttributes": { + "typeName": "hive_table_db" + }, + "relationshipGuid": "a6cf6e79-2614-4284-b3e8-48d1d261c2e9", + "relationshipStatus": "ACTIVE", + "relationshipType": "hive_table_db", + "typeName": "hive_db" + }, + "ddlQueries": [ + { + "displayText": "db_2.table_2@cl1:1570444589240", + "entityStatus": "ACTIVE", + "guid": "eb5d6056-f5a3-46c4-948f-dae2f9481076", + "relationshipAttributes": { + "typeName": "hive_table_ddl_queries" + }, + "relationshipGuid": "e807ef3b-d7f0-4e1f-b358-57525790e376", + "relationshipStatus": "ACTIVE", + "relationshipType": "hive_table_ddl_queries", + "typeName": "hive_table_ddl" + } + ], + "inputToProcesses": [], + "meanings": [], + "outputFromProcesses": [ + { + "displayText": "create table db_2.table_2 as select * from db_1.table_1", + "entityStatus": "ACTIVE", + "guid": "caf7f40a-b334-4f9e-9bf2-f24ce43db47f", + "relationshipAttributes": { + "typeName": "process_dataset_outputs" + }, + "relationshipGuid": "d38c4409-65c4-4ba4-99bb-c655d8be8197", + "relationshipStatus": "ACTIVE", + "relationshipType": "process_dataset_outputs", + "typeName": "hive_process" + } + ], + "partitionKeys": [], + "schema": [], + "sd": { + "displayText": "db_2.table_2@cl1_storage", + "entityStatus": "ACTIVE", + "guid": "403607a6-2092-43a8-8816-9d128592004f", + "relationshipAttributes": { + "typeName": "hive_table_storagedesc" + }, + "relationshipGuid": "66633928-9605-428a-8b1e-270ebbaec62b", + "relationshipStatus": "ACTIVE", + "relationshipType": "hive_table_storagedesc", + "typeName": "hive_storagedesc" + } + }, + "status": "ACTIVE", + "typeName": "hive_table", + "updateTime": 1570444602640, + "updatedBy": "hive", + "version": 0 + }, + "referredEntities": { + "403607a6-2092-43a8-8816-9d128592004f": { + "attributes": { + "bucketCols": null, + "compressed": false, + "inputFormat": "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat", + "location": "hdfs://localhost:8020/warehouse/tablespace/managed/hive/db_2.db/table_2", + "numBuckets": -1, + "outputFormat": "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat", + "parameters": {}, + "qualifiedName": "db_2.table_2@cl1_storage", + "replicatedFrom": null, + "replicatedTo": null, + "serdeInfo": { + "attributes": { + "name": null, + "parameters": { + "serialization.format": "1" + }, + "serializationLib": "org.apache.hadoop.hive.ql.io.orc.OrcSerde" + }, + "typeName": "hive_serde" + }, + "sortCols": [], + "storedAsSubDirectories": false, + "table": { + "guid": "b8cbc39f-4467-429b-a7fe-4ba2c28cceca", + "typeName": "hive_table" + } + }, + "createTime": 1570444602640, + "createdBy": "hive", + "guid": "403607a6-2092-43a8-8816-9d128592004f", + "isIncomplete": false, + "relationshipAttributes": { + "meanings": [], + "table": { + "displayText": "table_2", + "entityStatus": "ACTIVE", + "guid": "b8cbc39f-4467-429b-a7fe-4ba2c28cceca", + "relationshipAttributes": { + "typeName": "hive_table_storagedesc" + }, + "relationshipGuid": "66633928-9605-428a-8b1e-270ebbaec62b", + "relationshipStatus": "ACTIVE", + "relationshipType": "hive_table_storagedesc", + "typeName": "hive_table" + } + }, + "status": "ACTIVE", + "typeName": "hive_storagedesc", + "updateTime": 1570444602640, + "updatedBy": "hive", + "version": 0 + }, + "d3ba7485-6b77-4f32-b3c9-e3d45d0949c0": { + "attributes": { + "comment": null, + "description": null, + "name": "id", + "owner": "hive", + "position": 0, + "qualifiedName": "db_2.table_2.id@cl1", + "replicatedFrom": null, + "replicatedTo": null, + "table": { + "guid": "b8cbc39f-4467-429b-a7fe-4ba2c28cceca", + "typeName": "hive_table" + }, + "type": "int" + }, + "createTime": 1570444602640, + "createdBy": "hive", + "guid": "d3ba7485-6b77-4f32-b3c9-e3d45d0949c0", + "isIncomplete": false, + "relationshipAttributes": { + "inputToProcesses": [], + "meanings": [], + "outputFromProcesses": [ + { + "displayText": "db_2.table_2@cl1:1570444602000:id", + "entityStatus": "ACTIVE", + "guid": "52b4662b-999d-4f63-a977-89bc9ea8ff46", + "relationshipAttributes": { + "typeName": "process_dataset_outputs" + }, + "relationshipGuid": "4c42c8a1-a0c5-4125-ace3-8fc59312b6dd", + "relationshipStatus": "ACTIVE", + "relationshipType": "process_dataset_outputs", + "typeName": "hive_column_lineage" + } + ], + "schema": [], + "table": { + "displayText": "table_2", + "entityStatus": "ACTIVE", + "guid": "b8cbc39f-4467-429b-a7fe-4ba2c28cceca", + "relationshipAttributes": { + "typeName": "hive_table_columns" + }, + "relationshipGuid": "a7c41412-35c9-408c-894b-aa58005b46ae", + "relationshipStatus": "ACTIVE", + "relationshipType": "hive_table_columns", + "typeName": "hive_table" + } + }, + "status": "ACTIVE", + "typeName": "hive_column", + "updateTime": 1570444602640, + "updatedBy": "hive", + "version": 0 + }, + "eb5d6056-f5a3-46c4-948f-dae2f9481076": { + "attributes": { + "execTime": 1570444589240, + "qualifiedName": "db_2.table_2@cl1:1570444589240", + "queryText": "create table db_2.table_2 as select * from db_1.table_1", + "replicatedFrom": null, + "replicatedTo": null, + "serviceType": "hive", + "userName": "hive" + }, + "createTime": 1570444602640, + "createdBy": "hive", + "guid": "eb5d6056-f5a3-46c4-948f-dae2f9481076", + "isIncomplete": false, + "relationshipAttributes": { + "meanings": [], + "table": { + "displayText": "table_2", + "entityStatus": "ACTIVE", + "guid": "b8cbc39f-4467-429b-a7fe-4ba2c28cceca", + "relationshipAttributes": { + "typeName": "hive_table_ddl_queries" + }, + "relationshipGuid": "e807ef3b-d7f0-4e1f-b358-57525790e376", + "relationshipStatus": "ACTIVE", + "relationshipType": "hive_table_ddl_queries", + "typeName": "hive_table" + } + }, + "status": "ACTIVE", + "typeName": "hive_table_ddl", + "updateTime": 1570444602640, + "updatedBy": "hive", + "version": 0 + } + } +} \ No newline at end of file