This is the relevant debug from Atlas: 2018-10-10 13:27:31,717 DEBUG - [pool-1-thread-9 - fc7a2150-23f3-4f53-8d09-420dbcbd4ffd:] ~ Finding edges for vertex[id=8248 type=spark_table guid=b6183317-63c8-4147-a91f-cddef2e5fdab] with label __spark_table.schema (GraphHelper:337) 2018-10-10 13:27:31,717 DEBUG - [pool-1-thread-9 - fc7a2150-23f3-4f53-8d09-420dbcbd4ffd:] ~ getRelationshipEdgeLabel(avro_schema_associatedEntities) (AtlasRelationshipStoreV2:757) 2018-10-10 13:27:31,718 DEBUG - [pool-1-thread-9 - fc7a2150-23f3-4f53-8d09-420dbcbd4ffd:] ~ Finding edges for vertex[id=8248 type=spark_table guid=b6183317-63c8-4147-a91f-cddef2e5fdab] with label r:avro_schema_associatedEntities (GraphHelper:337) 2018-10-10 13:27:31,719 DEBUG - [pool-1-thread-9 - fc7a2150-23f3-4f53-8d09-420dbcbd4ffd:] ~ <== AtlasErrorCode.getMessage([avro_schema_associatedEntities, DataSet, spark_table]) (AtlasErrorCode:221) 2018-10-10 13:27:31,720 DEBUG - [pool-1-thread-9 - fc7a2150-23f3-4f53-8d09-420dbcbd4ffd:] ~ ==> AtlasErrorCode.getMessage([avro_schema_associatedEntities, DataSet, spark_table]): invalid relationshipDef: avro_schema_associatedEntities: end type 1: DataSet, end type 2: spark_table (AtlasErrorCode:228) 2018-10-10 13:27:31,720 DEBUG - [pool-1-thread-9 - fc7a2150-23f3-4f53-8d09-420dbcbd4ffd:] ~ PERF|createOrUpdate()|1527 (AtlasPerfTracer:77) 2018-10-10 13:27:31,721 ERROR - [pool-1-thread-9 - fc7a2150-23f3-4f53-8d09-420dbcbd4ffd:] ~ graph rollback due to exception (GraphTransactionInterceptor:154)
> On 10 Oct 2018, at 15:13, Bolke de Bruin <[email protected]> wrote: > > Hi, > > We are trying to use the Spark connector for Atlas and we are encountering an > issue we do not understand. To reproduce use a clean Atlas installation and > build the Atlas 1.0 connector from > https://github.com/hortonworks-spark/spark-atlas-connector > <https://github.com/hortonworks-spark/spark-atlas-connector> and use a Spark > 2.3. Follow the instructions to add the listeners and then run > > scala> > Seq((1,2)).toDF("i","j").write.mode("overwrite").saveAsTable("default.atlas_bolke”) > > This is the result > > org.apache.atlas.AtlasServiceException: Metadata service API > org.apache.atlas.AtlasClientV2$API_V2@1dfc2ecd failed with status 400 (Bad > Request) Response Body > ({"errorCode":"ATLAS-400-00-036","errorMessage":"invalid relationshipDef: > avro_schema_associatedEntities: end type 1: DataSet, end type 2: > spark_table”}) > > We have no clue why this error occurs. This relationship is not defined by > the spark connector, neither is it referenced. This is the JSON dump of the > entity definition of spark_table: > > > { > "enumDefs": [], > "structDefs": [], > "classificationDefs": [], > "entityDefs": [{ > "category": "ENTITY", > "guid": "3bd9315c-f159-4865-ac8d-11dbcca79adc", > "createdBy": "admin", > "updatedBy": "admin", > "createTime": 1539112556115, > "updateTime": 1539112556115, > "version": 1, > "name": "spark_table", > "description": "spark_table", > "typeVersion": "1.0", > "attributeDefs": [{ > "name": "qualifiedName", > "typeName": "string", > "isOptional": false, > "cardinality": "SINGLE", > "valuesMinCount": 1, > "valuesMaxCount": 1, > "isUnique": true, > "isIndexable": true, > "includeInNotification": false > }, { > "name": "database", > "typeName": "spark_db", > "isOptional": true, > "cardinality": "SINGLE", > "valuesMinCount": 0, > "valuesMaxCount": 1, > "isUnique": false, > "isIndexable": false, > "includeInNotification": false > }, { > "name": "tableType", > "typeName": "string", > "isOptional": true, > "cardinality": "SINGLE", > "valuesMinCount": 0, > "valuesMaxCount": 1, > "isUnique": false, > "isIndexable": false, > "includeInNotification": false > }, { > "name": "storage", > "typeName": "spark_storagedesc", > "isOptional": true, > "cardinality": "SINGLE", > "valuesMinCount": 0, > "valuesMaxCount": 1, > "isUnique": false, > "isIndexable": false, > "includeInNotification": false, > "constraints": [{ > "type": "ownedRef" > }] > }, { > "name": "schema", > "typeName": "array<spark_column>", > "isOptional": true, > "cardinality": "SINGLE", > "valuesMinCount": 0, > "valuesMaxCount": 1, > "isUnique": false, > "isIndexable": false, > "includeInNotification": false, > "constraints": [{ > "type": "ownedRef" > }] > }, { > "name": "provider", > "typeName": "string", > "isOptional": true, > "cardinality": "SINGLE", > "valuesMinCount": 0, > "valuesMaxCount": 1, > "isUnique": false, > "isIndexable": false, > "includeInNotification": false > }, { > "name": "partitionColumnNames", > "typeName": "array<string>", > "isOptional": true, > "cardinality": "SINGLE", > "valuesMinCount": 0, > "valuesMaxCount": 1, > "isUnique": false, > "isIndexable": false, > "includeInNotification": false > }, { > "name": "bucketSpec", > "typeName": "map<string,string>", > "isOptional": true, > "cardinality": "SINGLE", > "valuesMinCount": 0, > "valuesMaxCount": 1, > "isUnique": false, > "isIndexable": false, > "includeInNotification": false > }, { > "name": "owner", > "typeName": "string", > "isOptional": true, > "cardinality": "SINGLE", > "valuesMinCount": 0, > "valuesMaxCount": 1, > "isUnique": false, > "isIndexable": false, > "includeInNotification": false > }, { > "name": "createTime", > "typeName": "long", > "isOptional": true, > "cardinality": "SINGLE", > "valuesMinCount": 0, > "valuesMaxCount": 1, > "isUnique": false, > "isIndexable": false, > "includeInNotification": false > }, { > "name": "lastAccessTime", > "typeName": "long", > "isOptional": true, > "cardinality": "SINGLE", > "valuesMinCount": 0, > "valuesMaxCount": 1, > "isUnique": false, > "isIndexable": false, > "includeInNotification": false > }, { > "name": "properties", > "typeName": "map<string,string>", > "isOptional": true, > "cardinality": "SINGLE", > "valuesMinCount": 0, > "valuesMaxCount": 1, > "isUnique": false, > "isIndexable": false, > "includeInNotification": false > }, { > "name": "comment", > "typeName": "string", > "isOptional": true, > "cardinality": "SINGLE", > "valuesMinCount": 0, > "valuesMaxCount": 1, > "isUnique": false, > "isIndexable": false, > "includeInNotification": false > }, { > "name": "unsupportedFeatures", > "typeName": "array<string>", > "isOptional": true, > "cardinality": "SINGLE", > "valuesMinCount": 0, > "valuesMaxCount": 1, > "isUnique": false, > "isIndexable": false, > "includeInNotification": false > }], > "superTypes": ["DataSet"], > "subTypes": [] > }], > "relationshipDefs": [] > } > > This is the request that errors: > > { > "referredEntities": null, > "entities": [{ > "typeName": "spark_table", > "attributes": { > "schema": [{ > "typeName": "spark_column", > "attributes": { > "metadata": "{}", > "nullable": true, > "qualifiedName": > "local-1539114333944.default.atlas_bolke.col-i", > "name": "i", > "type": "integer" > }, > "guid": "-79141485348090", > "status": null, > "createdBy": null, > "updatedBy": null, > "createTime": null, > "updateTime": null, > "version": 0, > "relationshipAttributes": null, > "classifications": null, > "meanings": null > }, { > "typeName": "spark_column", > "attributes": { > "metadata": "{}", > "nullable": true, > "qualifiedName": > "local-1539114333944.default.atlas_bolke.col-j", > "name": "j", > "type": "integer" > }, > "guid": "-79141485348091", > "status": null, > "createdBy": null, > "updatedBy": null, > "createTime": null, > "updateTime": null, > "version": 0, > "relationshipAttributes": null, > "classifications": null, > "meanings": null > }], > "owner": "bolke", > "lastAccessTime": 0, > "unsupportedFeatures": [], > "qualifiedName": > "local-1539114333944.default.atlas_bolke", > "storage": { > "typeName": "spark_storagedesc", > "attributes": { > "serde": > "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe", > "qualifiedName": > "local-1539114333944.default.atlas_bolke.storageFormat", > "compressed": false, > "locationUri": { > "typeName": "fs_path", > "attributes": { > "path": > "/users/bolke/downloads/spark-2.3.2-bin-hadoop2.7/spark-warehouse/atlas_bolke", > "qualifiedName": > "file:/Users/bolke/Downloads/spark-2.3.2-bin-hadoop2.7/spark-warehouse/atlas_bolke", > "name": > "/users/bolke/downloads/spark-2.3.2-bin-hadoop2.7/spark-warehouse/atlas_bolke" > }, > "guid": "-79141485348089", > "status": null, > "createdBy": null, > "updatedBy": null, > "createTime": null, > "updateTime": null, > "version": 0, > "relationshipAttributes": null, > "classifications": null, > "meanings": null > }, > "inputFormat": > "org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat", > "outputFormat": > "org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat", > "properties": { > "serialization.format": "1" > } > }, > "guid": "-79141485348088", > "status": null, > "createdBy": null, > "updatedBy": null, > "createTime": null, > "updateTime": null, > "version": 0, > "relationshipAttributes": null, > "classifications": null, > "meanings": null > }, > "tableType": "MANAGED", > "partitionColumnNames": [], > "database": { > "typeName": "spark_db", > "attributes": { > "owner": "bolke", > "qualifiedName": > "local-1539114333944.default", > "name": "default", > "description": "Default Hive database", > "locationUri": { > "typeName": "fs_path", > "attributes": { > "path": > "/users/bolke/downloads/spark-2.3.2-bin-hadoop2.7/spark-warehouse", > "qualifiedName": > "file:/Users/bolke/Downloads/spark-2.3.2-bin-hadoop2.7/spark-warehouse", > "name": > "/users/bolke/downloads/spark-2.3.2-bin-hadoop2.7/spark-warehouse" > }, > "guid": "-79141485348087", > "status": null, > "createdBy": null, > "updatedBy": null, > "createTime": null, > "updateTime": null, > "version": 0, > "relationshipAttributes": null, > "classifications": null, > "meanings": null > }, > "properties": {} > }, > "guid": "-79141485348086", > "status": null, > "createdBy": null, > "updatedBy": null, > "createTime": null, > "updateTime": null, > "version": 0, > "relationshipAttributes": null, > "classifications": null, > "meanings": null > }, > "provider": "parquet", > "createTime": 1539114452000, > "name": "atlas_bolke", > "properties": { > "transient_lastDdlTime": "1539114452" > } > }, > "guid": "-79141485348092", > "status": null, > "createdBy": null, > "updatedBy": null, > "createTime": null, > "updateTime": null, > "version": 0, > "relationshipAttributes": null, > "classifications": null, > "meanings": null > }, { > "typeName": "spark_db", > "attributes": { > "owner": "bolke", > "qualifiedName": "local-1539114333944.default", > "name": "default", > "description": "Default Hive database", > "locationUri": { > "typeName": "fs_path", > "attributes": { > "path": > "/users/bolke/downloads/spark-2.3.2-bin-hadoop2.7/spark-warehouse", > "qualifiedName": > "file:/Users/bolke/Downloads/spark-2.3.2-bin-hadoop2.7/spark-warehouse", > "name": > "/users/bolke/downloads/spark-2.3.2-bin-hadoop2.7/spark-warehouse" > }, > "guid": "-79141485348087", > "status": null, > "createdBy": null, > "updatedBy": null, > "createTime": null, > "updateTime": null, > "version": 0, > "relationshipAttributes": null, > "classifications": null, > "meanings": null > }, > "properties": {} > }, > "guid": "-79141485348086", > "status": null, > "createdBy": null, > "updatedBy": null, > "createTime": null, > "updateTime": null, > "version": 0, > "relationshipAttributes": null, > "classifications": null, > "meanings": null > }, { > "typeName": "fs_path", > "attributes": { > "path": > "/users/bolke/downloads/spark-2.3.2-bin-hadoop2.7/spark-warehouse", > "qualifiedName": > "file:/Users/bolke/Downloads/spark-2.3.2-bin-hadoop2.7/spark-warehouse", > "name": > "/users/bolke/downloads/spark-2.3.2-bin-hadoop2.7/spark-warehouse" > }, > "guid": "-79141485348087", > "status": null, > "createdBy": null, > "updatedBy": null, > "createTime": null, > "updateTime": null, > "version": 0, > "relationshipAttributes": null, > "classifications": null, > "meanings": null > }, { > "typeName": "spark_storagedesc", > "attributes": { > "serde": > "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe", > "qualifiedName": > "local-1539114333944.default.atlas_bolke.storageFormat", > "compressed": false, > "locationUri": { > "typeName": "fs_path", > "attributes": { > "path": > "/users/bolke/downloads/spark-2.3.2-bin-hadoop2.7/spark-warehouse/atlas_bolke", > "qualifiedName": > "file:/Users/bolke/Downloads/spark-2.3.2-bin-hadoop2.7/spark-warehouse/atlas_bolke", > "name": > "/users/bolke/downloads/spark-2.3.2-bin-hadoop2.7/spark-warehouse/atlas_bolke" > }, > "guid": "-79141485348089", > "status": null, > "createdBy": null, > "updatedBy": null, > "createTime": null, > "updateTime": null, > "version": 0, > "relationshipAttributes": null, > "classifications": null, > "meanings": null > }, > "inputFormat": > "org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat", > "outputFormat": > "org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat", > "properties": { > "serialization.format": "1" > } > }, > "guid": "-79141485348088", > "status": null, > "createdBy": null, > "updatedBy": null, > "createTime": null, > "updateTime": null, > "version": 0, > "relationshipAttributes": null, > "classifications": null, > "meanings": null > }, { > "typeName": "fs_path", > "attributes": { > "path": > "/users/bolke/downloads/spark-2.3.2-bin-hadoop2.7/spark-warehouse/atlas_bolke", > "qualifiedName": > "file:/Users/bolke/Downloads/spark-2.3.2-bin-hadoop2.7/spark-warehouse/atlas_bolke", > "name": > "/users/bolke/downloads/spark-2.3.2-bin-hadoop2.7/spark-warehouse/atlas_bolke" > }, > "guid": "-79141485348089", > "status": null, > "createdBy": null, > "updatedBy": null, > "createTime": null, > "updateTime": null, > "version": 0, > "relationshipAttributes": null, > "classifications": null, > "meanings": null > }, { > "typeName": "spark_column", > "attributes": { > "metadata": "{}", > "nullable": true, > "qualifiedName": > "local-1539114333944.default.atlas_bolke.col-i", > "name": "i", > "type": "integer" > }, > "guid": "-79141485348090", > "status": null, > "createdBy": null, > "updatedBy": null, > "createTime": null, > "updateTime": null, > "version": 0, > "relationshipAttributes": null, > "classifications": null, > "meanings": null > }, { > "typeName": "spark_column", > "attributes": { > "metadata": "{}", > "nullable": true, > "qualifiedName": > "local-1539114333944.default.atlas_bolke.col-j", > "name": "j", > "type": "integer" > }, > "guid": "-79141485348091", > "status": null, > "createdBy": null, > "updatedBy": null, > "createTime": null, > "updateTime": null, > "version": 0, > "relationshipAttributes": null, > "classifications": null, > "meanings": null > }] > } > > > Can someone shed some light on this? > > Thanks > > Bolke
