[ https://issues.apache.org/jira/browse/HIVE-25510?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Shivangi updated HIVE-25510: ---------------------------- Description: Incorrect lineage is generated for the queries where compare expressions are present in select statements. For example: *`Case-when` in select statement:* Query: {code:java} select place, (case when city == "aa" then id else 0 end)/id from t1; {code} Corresponding Lineage: {code:java} { "edges": [ { "sources": [ 2 ], "targets": [ 0 ], "edgeType": "PROJECTION" }, { "sources": [ 3, 4 ], "targets": [ 1 ], "expression": "(UDFToDouble(CASE WHEN ((UDFToString(t1.city) = 'aa')) THEN (t1.id) ELSE (0) END) / UDFToDouble(t1.id))", "edgeType": "PROJECTION" } ], "vertices": [ { "id": 0, "vertexType": "COLUMN", "vertexId": "place" }, { "id": 1, "vertexType": "COLUMN", "vertexId": "_c1" }, { "id": 2, "vertexType": "COLUMN", "vertexId": "default.t1.place" }, { "id": 3, "vertexType": "COLUMN", "vertexId": "default.t1.city" }, { "id": 4, "vertexType": "COLUMN", "vertexId": "default.t1.id" } ] } {code} Expected Lineage: {code:java} { "edges": [ { "sources": [ 2 ], "targets": [ 0 ], "edgeType": "PROJECTION" }, { "sources": [ 3 ], "targets": [ 1 ], "expression": "(UDFToDouble(CASE WHEN ((UDFToString(t1.city) = 'aa')) THEN (t1.id) ELSE (0) END) / UDFToDouble(t1.id))", "edgeType": "PROJECTION" }, { "sources": [ 4 ], "targets": [ 1 ], "expression": "CASE WHEN ((UDFToString(t1.city) = 'aa')) THEN (t1.id) ELSE (0) END", "edgeType": "PREDICATE" } ], "vertices": [ { "id": 0, "vertexType": "COLUMN", "vertexId": "place" }, { "id": 1, "vertexType": "COLUMN", "vertexId": "_c1" }, { "id": 2, "vertexType": "COLUMN", "vertexId": "default.t1.place" }, { "id": 3, "vertexType": "COLUMN", "vertexId": "default.t1.id" }, { "id": 4, "vertexType": "COLUMN", "vertexId": "default.t1.city" } ] } {code} *`IF` statement in select statement:* Query: {code:java} select IF(city='aa',place,'FALSE') from t1; {code} Corresponding lineage: {code:java} { "edges": [ { "sources": [ 1, 2 ], "targets": [ 0 ], "expression": "if((UDFToString(t1.city) = 'aa'), t1.place, 'FALSE')", "edgeType": "PROJECTION" } ], "vertices": [ { "id": 0, "vertexType": "COLUMN", "vertexId": "_c0" }, { "id": 1, "vertexType": "COLUMN", "vertexId": "default.t1.city" }, { "id": 2, "vertexType": "COLUMN", "vertexId": "default.t1.place" } ] }{code} Expected Lineage: Projection edge for target `vertex 0` should have only `vertex 2` as source and there should be one predicate edge as well, where source would be `vertex 1` and target `vertex 0`. The table under use above is: select * from t1; |t1.id |t1.place |t1.city | |1 |a |aa | |2 |b |bb | was: Incorrect lineage is generated for the queries where compare expressions are present in select statements. For example: *`Case-when` in select statement:* Query: {code:java} select place, (case when city == "aa" then id else 0 end)/id from t1; {code} Corresponding Lineage: {code:java} { "edges": [ { "sources": [ 2 ], "targets": [ 0 ], "edgeType": "PROJECTION" }, { "sources": [ 3, 4 ], "targets": [ 1 ], "expression": "(UDFToDouble(CASE WHEN ((UDFToString(t1.city) = 'aa')) THEN (t1.id) ELSE (0) END) / UDFToDouble(t1.id))", "edgeType": "PROJECTION" } ], "vertices": [ { "id": 0, "vertexType": "COLUMN", "vertexId": "place" }, { "id": 1, "vertexType": "COLUMN", "vertexId": "_c1" }, { "id": 2, "vertexType": "COLUMN", "vertexId": "default.t1.place" }, { "id": 3, "vertexType": "COLUMN", "vertexId": "default.t1.city" }, { "id": 4, "vertexType": "COLUMN", "vertexId": "default.t1.id" } ] } {code} Expected Lineage: {code:java} { "edges": [ { "sources": [ 2 ], "targets": [ 0 ], "edgeType": "PROJECTION" }, { "sources": [ 3 ], "targets": [ 1 ], "expression": "(UDFToDouble(CASE WHEN ((UDFToString(t1.city) = 'aa')) THEN (t1.id) ELSE (0) END) / UDFToDouble(t1.id))", "edgeType": "PROJECTION" }, { "sources": [ 4 ], "targets": [ 1 ], "expression": "CASE WHEN ((UDFToString(t1.city) = 'aa')) THEN (t1.id) ELSE (0) END", "edgeType": "PREDICATE" } ], "vertices": [ { "id": 0, "vertexType": "COLUMN", "vertexId": "place" }, { "id": 1, "vertexType": "COLUMN", "vertexId": "_c1" }, { "id": 2, "vertexType": "COLUMN", "vertexId": "default.t1.place" }, { "id": 3, "vertexType": "COLUMN", "vertexId": "default.t1.id" }, { "id": 4, "vertexType": "COLUMN", "vertexId": "default.t1.city" } ] } {code} *`IF` statement in select statement:* Query: {code:java} select IF(city='aa',place,'FALSE') from t1; {code} Corresponding lineage: {code:java} { "edges": [ { "sources": [ 1, 2 ], "targets": [ 0 ], "expression": "if((UDFToString(t1.city) = 'aa'), t1.place, 'FALSE')", "edgeType": "PROJECTION" } ], "vertices": [ { "id": 0, "vertexType": "COLUMN", "vertexId": "_c0" }, { "id": 1, "vertexType": "COLUMN", "vertexId": "default.t1.city" }, { "id": 2, "vertexType": "COLUMN", "vertexId": "default.t1.place" } ] }{code} Expected Lineage: Projection edge for target `vertex 0` should have only `vertex 2` as source and there should be one predicate edge as well, where source would be `vertex 1` and target `vertex 0`. The table under use above is: select * from t1; +--------+-----------+----------+ | t1.id | t1.place | t1.city | +--------+-----------+----------+ | 1 | a | aa | | 2 | b | bb | +--------+-----------+----------+ > Incorrect lineage for compare expressions in select statements > -------------------------------------------------------------- > > Key: HIVE-25510 > URL: https://issues.apache.org/jira/browse/HIVE-25510 > Project: Hive > Issue Type: Bug > Components: lineage > Reporter: Shivangi > Assignee: Shivangi > Priority: Major > > Incorrect lineage is generated for the queries where compare expressions are > present in select statements. For example: > *`Case-when` in select statement:* > Query: > {code:java} > select place, (case when city == "aa" then id else 0 end)/id from t1; > {code} > Corresponding Lineage: > {code:java} > { > "edges": [ > { > "sources": [ > 2 > ], > "targets": [ > 0 > ], > "edgeType": "PROJECTION" > }, > { > "sources": [ > 3, > 4 > ], > "targets": [ > 1 > ], > "expression": "(UDFToDouble(CASE WHEN ((UDFToString(t1.city) = 'aa')) > THEN (t1.id) ELSE (0) END) / UDFToDouble(t1.id))", > "edgeType": "PROJECTION" > } > ], > "vertices": [ > { > "id": 0, > "vertexType": "COLUMN", > "vertexId": "place" > }, > { > "id": 1, > "vertexType": "COLUMN", > "vertexId": "_c1" > }, > { > "id": 2, > "vertexType": "COLUMN", > "vertexId": "default.t1.place" > }, > { > "id": 3, > "vertexType": "COLUMN", > "vertexId": "default.t1.city" > }, > { > "id": 4, > "vertexType": "COLUMN", > "vertexId": "default.t1.id" > } > ] > } > {code} > Expected Lineage: > {code:java} > { > "edges": [ > { > "sources": [ > 2 > ], > "targets": [ > 0 > ], > "edgeType": "PROJECTION" > }, > { > "sources": [ > 3 > ], > "targets": [ > 1 > ], > "expression": "(UDFToDouble(CASE WHEN ((UDFToString(t1.city) = 'aa')) > THEN (t1.id) ELSE (0) END) / UDFToDouble(t1.id))", > "edgeType": "PROJECTION" > }, > { > "sources": [ > 4 > ], > "targets": [ > 1 > ], > "expression": "CASE WHEN ((UDFToString(t1.city) = 'aa')) THEN (t1.id) > ELSE (0) END", > "edgeType": "PREDICATE" > } > ], > "vertices": [ > { > "id": 0, > "vertexType": "COLUMN", > "vertexId": "place" > }, > { > "id": 1, > "vertexType": "COLUMN", > "vertexId": "_c1" > }, > { > "id": 2, > "vertexType": "COLUMN", > "vertexId": "default.t1.place" > }, > { > "id": 3, > "vertexType": "COLUMN", > "vertexId": "default.t1.id" > }, > { > "id": 4, > "vertexType": "COLUMN", > "vertexId": "default.t1.city" > } > ] > } > {code} > > *`IF` statement in select statement:* > Query: > {code:java} > select IF(city='aa',place,'FALSE') from t1; > {code} > Corresponding lineage: > {code:java} > { > "edges": [ > { > "sources": [ > 1, > 2 > ], > "targets": [ > 0 > ], > "expression": "if((UDFToString(t1.city) = 'aa'), t1.place, 'FALSE')", > "edgeType": "PROJECTION" > } > ], > "vertices": [ > { > "id": 0, > "vertexType": "COLUMN", > "vertexId": "_c0" > }, > { > "id": 1, > "vertexType": "COLUMN", > "vertexId": "default.t1.city" > }, > { > "id": 2, > "vertexType": "COLUMN", > "vertexId": "default.t1.place" > } > ] > }{code} > Expected Lineage: > Projection edge for target `vertex 0` should have only `vertex 2` as source > and there should be one predicate edge as well, where source would be `vertex > 1` and target `vertex 0`. > > The table under use above is: > select * from t1; > |t1.id |t1.place |t1.city | > |1 |a |aa | > |2 |b |bb | -- This message was sent by Atlassian Jira (v8.3.4#803005)