Sidharth Kumar Mishra created ATLAS-4548: --------------------------------------------
Summary: Set Empty attribute value for queryStr and queryId in case of null value when atlas.hook.hive.hive_process.populate.deprecated.attributes=true Key: ATLAS-4548 URL: https://issues.apache.org/jira/browse/ATLAS-4548 Project: Atlas Issue Type: Bug Reporter: Sidharth Kumar Mishra Assignee: Sidharth Kumar Mishra As part of https://issues.apache.org/jira/browse/ATLAS-3606 by default we have made the userName, queryId and queryText for hive_process deprecated and we don't get these attribute values for some of the queries from Hive context. For example creating external table query below we don't get queryText and queryId from hive context and we can't populate either. If you mark the flag "atlas.hook.hive.hive_process.populate.deprecated.attributes=true" then it will cause problems in this case as hive context has null value and Atlas expects some value to be present. Repro steps: -------------- First set "atlas.hook.hive.hive_process.populate.deprecated.attributes=true". Then "beeline -e 'create external table csaba_table_3(msg string);'" Check Atlas log: {noformat} 2022-01-26 12:26:44,950 ERROR - [NotificationHookConsumer thread-0:] ~ graph rollback due to exception AtlasBaseException:Invalid instance creation/updation parameters passed : hive_process.queryText: mandatory attribute value missing in type hive_process (GraphTransactionInterceptor:202) 2022-01-26 12:26:44,951 WARN - [NotificationHookConsumer thread-0:] ~ Max retries exceeded for message {"version":{"version":"1.0.0","versionParts":[1]},"msgCompressionKind":"NONE","msgSplitIdx":1,"msgSplitCount":1,"msgCreationTime":1643200004951,"spooled":false,"message":{"type":"ENTITY_CREATE_V2","user":"hive","entities":{"referredEntities":{"-30336865853165805":{"typeName":"hive_storagedesc","attributes":{"qualifiedName":"default.csaba_table_3@cm_storage","storedAsSubDirectories":false,"location":"hdfs://ns1/warehouse/tablespace/external/hive/csaba_table_3","compressed":false,"inputFormat":"org.apache.hadoop.mapred.TextInputFormat","parameters":{},"outputFormat":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serdeInfo":{"typeName":"hive_serde","attributes":{"serializationLib":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name":null,"parameters":{"serialization.format":"1"}}},"numBuckets":-1},"guid":"-30336865853165805","isIncomplete":false,"provenanceType":0,"version":0,"relationshipAttributes":{"table":{"guid":"-30336865853165804","typeName":"hive_table","uniqueAttributes":{"qualifiedName":"default.csaba_table_3@cm"},"relationshipType":"hive_table_storagedesc"}},"proxy":false},"-30336865853165806":{"typeName":"hive_column","attributes":{"owner":"hrt_qa","qualifiedName":"default.csaba_table_3.msg@cm","name":"msg","comment":null,"position":0,"type":"string"},"guid":"-30336865853165806","isIncomplete":false,"provenanceType":0,"version":0,"relationshipAttributes":{"table":{"guid":"-30336865853165804","typeName":"hive_table","uniqueAttributes":{"qualifiedName":"default.csaba_table_3@cm"},"relationshipType":"hive_table_columns"}},"proxy":false},"-30336865853165807":{"typeName":"hdfs_path","attributes":{"nameServiceId":"ns1","path":"hdfs://ns1/warehouse/tablespace/external/hive/csaba_table_3","qualifiedName":"hdfs://ns1/warehouse/tablespace/external/hive/csaba_table_3@cm","clusterName":"cm","name":"/warehouse/tablespace/external/hive/csaba_table_3"},"guid":"-30336865853165807","isIncomplete":false,"provenanceType":0,"version":0,"proxy":false}},"entities":[{"typeName":"hive_table","attributes":{"owner":"hrt_qa","tableType":"EXTERNAL_TABLE","temporary":false,"lastAccessTime":1643200003000,"createTime":1643200003000,"qualifiedName":"default.csaba_table_3@cm","name":"csaba_table_3","comment":null,"parameters":{"totalSize":"0","EXTERNAL":"TRUE","numRows":"0","rawDataSize":"0","COLUMN_STATS_ACCURATE":"{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"msg\":\"true\"}}","numFiles":"0","transient_lastDdlTime":"1643200003","bucketing_version":"2","numFilesErasureCoded":"0"},"retention":0},"guid":"-30336865853165804","isIncomplete":false,"provenanceType":0,"version":0,"relationshipAttributes":{"sd":{"guid":"-30336865853165805","typeName":"hive_storagedesc","uniqueAttributes":{"qualifiedName":"default.csaba_table_3@cm_storage"},"relationshipType":"hive_table_storagedesc"},"columns":[{"guid":"-30336865853165806","typeName":"hive_column","uniqueAttributes":{"qualifiedName":"default.csaba_table_3.msg@cm"},"relationshipType":"hive_table_columns"}],"partitionKeys":[],"db":{"typeName":"hive_db","uniqueAttributes":{"qualifiedName":"default@cm"},"relationshipType":"hive_table_db"}},"proxy":false},{"typeName":"hive_process","attributes":{"recentQueries":[null],"qualifiedName":"default.csaba_table_3@cm:1643200003000","clusterName":"cm","name":"default.csaba_table_3@cm:1643200003000","queryText":null,"operationType":"CREATETABLE","startTime":1643200003871,"queryPlan":"Not Supported","endTime":1643200003871,"userName":"hive","queryId":null},"guid":"-30336865853165808","isIncomplete":false,"provenanceType":0,"version":0,"relationshipAttributes":{"outputs":[{"guid":"-30336865853165804","typeName":"hive_table","uniqueAttributes":{"qualifiedName":"default.csaba_table_3@cm"},"relationshipType":"process_dataset_outputs"}],"inputs":[{"guid":"-30336865853165807","typeName":"hdfs_path","uniqueAttributes":{"qualifiedName":"hdfs://ns1/warehouse/tablespace/external/hive/csaba_table_3@cm"},"relationshipType":"dataset_process_inputs"}]},"proxy":false}]}}} (NotificationHookConsumer$HookConsumer:808) org.apache.atlas.exception.AtlasBaseException: Invalid instance creation/updation parameters passed : hive_process.queryText: mandatory attribute value missing in type hive_process at org.apache.atlas.repository.store.graph.v2.AtlasEntityGraphDiscoveryV2.validateAndNormalize(AtlasEntityGraphDiscoveryV2.java:109) at org.apache.atlas.repository.store.graph.v2.AtlasEntityStoreV2.preCreateOrUpdate(AtlasEntityStoreV2.java:1275) at org.apache.atlas.repository.store.graph.v2.AtlasEntityStoreV2.createOrUpdate(AtlasEntityStoreV2.java:1145) at org.apache.atlas.repository.store.graph.v2.AtlasEntityStoreV2.createOrUpdate(AtlasEntityStoreV2.java:366) at org.apache.atlas.repository.store.graph.v2.AtlasEntityStoreV2$$FastClassBySpringCGLIB$$6861dca9.invoke(<generated>) at org.springframework.cglib.proxy.MethodProxy.invoke(MethodProxy.java:218) at org.springframework.aop.framework.CglibAopProxy$CglibMethodInvocation.invokeJoinpoint(CglibAopProxy.java:779) at org.springframework.aop.framework.ReflectiveMethodInvocation.proceed(ReflectiveMethodInvocation.java:163) at org.springframework.aop.framework.CglibAopProxy$CglibMethodInvocation.proceed(CglibAopProxy.java:750) at org.apache.atlas.GraphTransactionInterceptor.invoke(GraphTransactionInterceptor.java:111) at org.springframework.aop.framework.ReflectiveMethodInvocation.proceed(ReflectiveMethodInvocation.java:186) at org.springframework.aop.framework.CglibAopProxy$CglibMethodInvocation.proceed(CglibAopProxy.java:750) at org.springframework.aop.framework.CglibAopProxy$DynamicAdvisedInterceptor.intercept(CglibAopProxy.java:692) at org.apache.atlas.repository.store.graph.v2.AtlasEntityStoreV2$$EnhancerBySpringCGLIB$$6c395b63.createOrUpdate(<generated>) at org.apache.atlas.notification.NotificationHookConsumer$HookConsumer.createOrUpdate(NotificationHookConsumer.java:879) at org.apache.atlas.notification.NotificationHookConsumer$HookConsumer.handleMessage(NotificationHookConsumer.java:731) at org.apache.atlas.notification.NotificationHookConsumer$HookConsumer.doWork(NotificationHookConsumer.java:555) at kafka.utils.ShutdownableThread.run(ShutdownableThread.scala:96) at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149{noformat} -- This message was sent by Atlassian Jira (v8.20.1#820001)