Sidharth Kumar Mishra created ATLAS-4548:
--------------------------------------------

             Summary: Set Empty attribute value for queryStr and queryId in 
case of null value when 
atlas.hook.hive.hive_process.populate.deprecated.attributes=true
                 Key: ATLAS-4548
                 URL: https://issues.apache.org/jira/browse/ATLAS-4548
             Project: Atlas
          Issue Type: Bug
            Reporter: Sidharth Kumar Mishra
            Assignee: Sidharth Kumar Mishra


As part of https://issues.apache.org/jira/browse/ATLAS-3606 by default we have 
made the userName, queryId and queryText for hive_process deprecated and we 
don't get these attribute values for some of the queries from Hive context. For 
example creating external table query below we don't get queryText and queryId 
from hive context and we can't populate either. If you mark the flag 
"atlas.hook.hive.hive_process.populate.deprecated.attributes=true" then it will 
cause problems in this case as hive context has null value and Atlas expects 
some value to be present. 

Repro steps:

--------------

First set "atlas.hook.hive.hive_process.populate.deprecated.attributes=true".

Then "beeline -e 'create external table csaba_table_3(msg string);'"

Check Atlas log:
{noformat}
2022-01-26 12:26:44,950 ERROR - [NotificationHookConsumer thread-0:] ~ graph 
rollback due to exception AtlasBaseException:Invalid instance creation/updation 
parameters passed : hive_process.queryText: mandatory attribute value missing 
in type hive_process (GraphTransactionInterceptor:202)
2022-01-26 12:26:44,951 WARN  - [NotificationHookConsumer thread-0:] ~ Max 
retries exceeded for message 
{"version":{"version":"1.0.0","versionParts":[1]},"msgCompressionKind":"NONE","msgSplitIdx":1,"msgSplitCount":1,"msgCreationTime":1643200004951,"spooled":false,"message":{"type":"ENTITY_CREATE_V2","user":"hive","entities":{"referredEntities":{"-30336865853165805":{"typeName":"hive_storagedesc","attributes":{"qualifiedName":"default.csaba_table_3@cm_storage","storedAsSubDirectories":false,"location":"hdfs://ns1/warehouse/tablespace/external/hive/csaba_table_3","compressed":false,"inputFormat":"org.apache.hadoop.mapred.TextInputFormat","parameters":{},"outputFormat":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serdeInfo":{"typeName":"hive_serde","attributes":{"serializationLib":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name":null,"parameters":{"serialization.format":"1"}}},"numBuckets":-1},"guid":"-30336865853165805","isIncomplete":false,"provenanceType":0,"version":0,"relationshipAttributes":{"table":{"guid":"-30336865853165804","typeName":"hive_table","uniqueAttributes":{"qualifiedName":"default.csaba_table_3@cm"},"relationshipType":"hive_table_storagedesc"}},"proxy":false},"-30336865853165806":{"typeName":"hive_column","attributes":{"owner":"hrt_qa","qualifiedName":"default.csaba_table_3.msg@cm","name":"msg","comment":null,"position":0,"type":"string"},"guid":"-30336865853165806","isIncomplete":false,"provenanceType":0,"version":0,"relationshipAttributes":{"table":{"guid":"-30336865853165804","typeName":"hive_table","uniqueAttributes":{"qualifiedName":"default.csaba_table_3@cm"},"relationshipType":"hive_table_columns"}},"proxy":false},"-30336865853165807":{"typeName":"hdfs_path","attributes":{"nameServiceId":"ns1","path":"hdfs://ns1/warehouse/tablespace/external/hive/csaba_table_3","qualifiedName":"hdfs://ns1/warehouse/tablespace/external/hive/csaba_table_3@cm","clusterName":"cm","name":"/warehouse/tablespace/external/hive/csaba_table_3"},"guid":"-30336865853165807","isIncomplete":false,"provenanceType":0,"version":0,"proxy":false}},"entities":[{"typeName":"hive_table","attributes":{"owner":"hrt_qa","tableType":"EXTERNAL_TABLE","temporary":false,"lastAccessTime":1643200003000,"createTime":1643200003000,"qualifiedName":"default.csaba_table_3@cm","name":"csaba_table_3","comment":null,"parameters":{"totalSize":"0","EXTERNAL":"TRUE","numRows":"0","rawDataSize":"0","COLUMN_STATS_ACCURATE":"{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"msg\":\"true\"}}","numFiles":"0","transient_lastDdlTime":"1643200003","bucketing_version":"2","numFilesErasureCoded":"0"},"retention":0},"guid":"-30336865853165804","isIncomplete":false,"provenanceType":0,"version":0,"relationshipAttributes":{"sd":{"guid":"-30336865853165805","typeName":"hive_storagedesc","uniqueAttributes":{"qualifiedName":"default.csaba_table_3@cm_storage"},"relationshipType":"hive_table_storagedesc"},"columns":[{"guid":"-30336865853165806","typeName":"hive_column","uniqueAttributes":{"qualifiedName":"default.csaba_table_3.msg@cm"},"relationshipType":"hive_table_columns"}],"partitionKeys":[],"db":{"typeName":"hive_db","uniqueAttributes":{"qualifiedName":"default@cm"},"relationshipType":"hive_table_db"}},"proxy":false},{"typeName":"hive_process","attributes":{"recentQueries":[null],"qualifiedName":"default.csaba_table_3@cm:1643200003000","clusterName":"cm","name":"default.csaba_table_3@cm:1643200003000","queryText":null,"operationType":"CREATETABLE","startTime":1643200003871,"queryPlan":"Not
 
Supported","endTime":1643200003871,"userName":"hive","queryId":null},"guid":"-30336865853165808","isIncomplete":false,"provenanceType":0,"version":0,"relationshipAttributes":{"outputs":[{"guid":"-30336865853165804","typeName":"hive_table","uniqueAttributes":{"qualifiedName":"default.csaba_table_3@cm"},"relationshipType":"process_dataset_outputs"}],"inputs":[{"guid":"-30336865853165807","typeName":"hdfs_path","uniqueAttributes":{"qualifiedName":"hdfs://ns1/warehouse/tablespace/external/hive/csaba_table_3@cm"},"relationshipType":"dataset_process_inputs"}]},"proxy":false}]}}}
 (NotificationHookConsumer$HookConsumer:808)
org.apache.atlas.exception.AtlasBaseException: Invalid instance 
creation/updation parameters passed : hive_process.queryText: mandatory 
attribute value missing in type hive_process
        at 
org.apache.atlas.repository.store.graph.v2.AtlasEntityGraphDiscoveryV2.validateAndNormalize(AtlasEntityGraphDiscoveryV2.java:109)
        at 
org.apache.atlas.repository.store.graph.v2.AtlasEntityStoreV2.preCreateOrUpdate(AtlasEntityStoreV2.java:1275)
        at 
org.apache.atlas.repository.store.graph.v2.AtlasEntityStoreV2.createOrUpdate(AtlasEntityStoreV2.java:1145)
        at 
org.apache.atlas.repository.store.graph.v2.AtlasEntityStoreV2.createOrUpdate(AtlasEntityStoreV2.java:366)
        at 
org.apache.atlas.repository.store.graph.v2.AtlasEntityStoreV2$$FastClassBySpringCGLIB$$6861dca9.invoke(<generated>)
        at 
org.springframework.cglib.proxy.MethodProxy.invoke(MethodProxy.java:218)
        at 
org.springframework.aop.framework.CglibAopProxy$CglibMethodInvocation.invokeJoinpoint(CglibAopProxy.java:779)
        at 
org.springframework.aop.framework.ReflectiveMethodInvocation.proceed(ReflectiveMethodInvocation.java:163)
        at 
org.springframework.aop.framework.CglibAopProxy$CglibMethodInvocation.proceed(CglibAopProxy.java:750)
        at 
org.apache.atlas.GraphTransactionInterceptor.invoke(GraphTransactionInterceptor.java:111)
        at 
org.springframework.aop.framework.ReflectiveMethodInvocation.proceed(ReflectiveMethodInvocation.java:186)
        at 
org.springframework.aop.framework.CglibAopProxy$CglibMethodInvocation.proceed(CglibAopProxy.java:750)
        at 
org.springframework.aop.framework.CglibAopProxy$DynamicAdvisedInterceptor.intercept(CglibAopProxy.java:692)
        at 
org.apache.atlas.repository.store.graph.v2.AtlasEntityStoreV2$$EnhancerBySpringCGLIB$$6c395b63.createOrUpdate(<generated>)
        at 
org.apache.atlas.notification.NotificationHookConsumer$HookConsumer.createOrUpdate(NotificationHookConsumer.java:879)
        at 
org.apache.atlas.notification.NotificationHookConsumer$HookConsumer.handleMessage(NotificationHookConsumer.java:731)
        at 
org.apache.atlas.notification.NotificationHookConsumer$HookConsumer.doWork(NotificationHookConsumer.java:555)
        at kafka.utils.ShutdownableThread.run(ShutdownableThread.scala:96)
        at 
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
        at java.util.concurrent.FutureTask.run(FutureTask.java:266)
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149{noformat}



--
This message was sent by Atlassian Jira
(v8.20.1#820001)

Reply via email to