This is an automated email from the ASF dual-hosted git repository. sankarh pushed a commit to branch branch-3 in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/branch-3 by this push: new 9be0397e84b HIVE-27551: Backport of HIVE-22208: Column name with reserved keyword is unescaped when query including join on table with mask column is re-written 9be0397e84b is described below commit 9be0397e84b06bd4480c341373bb2c5b0738ce6a Author: Aman Raj <104416558+amanraj2...@users.noreply.github.com> AuthorDate: Mon Aug 14 13:02:13 2023 +0530 HIVE-27551: Backport of HIVE-22208: Column name with reserved keyword is unescaped when query including join on table with mask column is re-written Signed-off-by: Sankar Hariappan <sank...@apache.org> Closes (#4534) --- .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 22 ++- .../test/queries/clientpositive/masking_reserved.q | 12 ++ .../results/clientpositive/masking_reserved.q.out | 198 +++++++++++++++++++++ 3 files changed, 230 insertions(+), 2 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 8abe8407aa5..0f1577353b9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -12082,8 +12082,8 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { // the table needs to be masked or filtered. // For the replacement, we leverage the methods that are used for // unparseTranslator. - protected static ASTNode rewriteASTWithMaskAndFilter(TableMask tableMask, ASTNode ast, TokenRewriteStream tokenRewriteStream, - Context ctx, Hive db, Map<String, Table> tabNameToTabObject, Set<Integer> ignoredTokens) + protected ASTNode rewriteASTWithMaskAndFilter(TableMask tableMask, ASTNode ast, TokenRewriteStream tokenRewriteStream, + Context ctx, Hive db, Map<String, Table> tabNameToTabObject, Set<Integer> ignoredTokens) throws SemanticException { // 1. collect information about CTE if there is any. // The base table of CTE should be masked. @@ -12124,6 +12124,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { } // 2. rewrite the AST, replace TABREF with masking/filtering if (tableMask.needsRewrite()) { + quoteIdentifierTokens(tokenRewriteStream); tableMask.applyTranslations(tokenRewriteStream); String rewrittenQuery = tokenRewriteStream.toString( ast.getTokenStartIndex(), ast.getTokenStopIndex()); @@ -14874,6 +14875,23 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { return sb.toString(); } + private void quoteIdentifierTokens(TokenRewriteStream tokenRewriteStream) { + if (conf.getVar(ConfVars.HIVE_QUOTEDID_SUPPORT).equals("none")) { + return; + } + + for (int idx = tokenRewriteStream.MIN_TOKEN_INDEX; idx <= tokenRewriteStream.size()-1; idx++) { + Token curTok = tokenRewriteStream.get(idx); + if (curTok.getType() == HiveLexer.Identifier) { + // The Tokens have no distinction between Identifiers and QuotedIdentifiers. + // Ugly solution is just to surround all identifiers with quotes. + // Re-escape any backtick (`) characters in the identifier. + String escapedTokenText = curTok.getText().replaceAll("`", "``"); + tokenRewriteStream.replace(curTok, "`" + escapedTokenText + "`"); + } + } + } + /** * Generate the query string for this query (with fully resolved table references). * @return The query string with resolved references. NULL if an error occurred. diff --git a/ql/src/test/queries/clientpositive/masking_reserved.q b/ql/src/test/queries/clientpositive/masking_reserved.q new file mode 100644 index 00000000000..7fe94fa7e3a --- /dev/null +++ b/ql/src/test/queries/clientpositive/masking_reserved.q @@ -0,0 +1,12 @@ +set hive.mapred.mode=nonstrict; +set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest; + +create table keyword_test_off (id int, `etad` string, key int); +create table keyword_test_on (id int, `date` string, key int); +create table masking_test_n_masking_reserved (id int, value string, key int); + +explain select a.`etad`, b.value from keyword_test_off a join masking_test_n_masking_reserved b on b.id = a.id; +select a.`etad`, b.value from keyword_test_off a join masking_test_n_masking_reserved b on b.id = a.id; + +explain select a.`date`, b.value from keyword_test_on a join masking_test_n_masking_reserved b on b.id = a.id; +select a.`date`, b.value from keyword_test_on a join masking_test_n_masking_reserved b on b.id = a.id; diff --git a/ql/src/test/results/clientpositive/masking_reserved.q.out b/ql/src/test/results/clientpositive/masking_reserved.q.out new file mode 100644 index 00000000000..e1f27f61028 --- /dev/null +++ b/ql/src/test/results/clientpositive/masking_reserved.q.out @@ -0,0 +1,198 @@ +PREHOOK: query: create table keyword_test_off (id int, `etad` string, key int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@keyword_test_off +POSTHOOK: query: create table keyword_test_off (id int, `etad` string, key int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@keyword_test_off +PREHOOK: query: create table keyword_test_on (id int, `date` string, key int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@keyword_test_on +POSTHOOK: query: create table keyword_test_on (id int, `date` string, key int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@keyword_test_on +PREHOOK: query: create table masking_test_n_masking_reserved (id int, value string, key int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@masking_test_n_masking_reserved +POSTHOOK: query: create table masking_test_n_masking_reserved (id int, value string, key int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@masking_test_n_masking_reserved +PREHOOK: query: explain select a.`etad`, b.value from keyword_test_off a join masking_test_n_masking_reserved b on b.id = a.id +PREHOOK: type: QUERY +PREHOOK: Input: default@keyword_test_off +PREHOOK: Input: default@masking_test_n_masking_reserved +#### A masked pattern was here #### +POSTHOOK: query: explain select a.`etad`, b.value from keyword_test_off a join masking_test_n_masking_reserved b on b.id = a.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@keyword_test_off +POSTHOOK: Input: default@masking_test_n_masking_reserved +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: id (type: int), etad (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: string) + TableScan + alias: masking_test_n_masking_reserved + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (((key % 2) = 0) and (key < 10) and id is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: id (type: int), reverse(value) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a.`etad`, b.value from keyword_test_off a join masking_test_n_masking_reserved b on b.id = a.id +PREHOOK: type: QUERY +PREHOOK: Input: default@keyword_test_off +PREHOOK: Input: default@masking_test_n_masking_reserved +#### A masked pattern was here #### +POSTHOOK: query: select a.`etad`, b.value from keyword_test_off a join masking_test_n_masking_reserved b on b.id = a.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@keyword_test_off +POSTHOOK: Input: default@masking_test_n_masking_reserved +#### A masked pattern was here #### +PREHOOK: query: explain select a.`date`, b.value from keyword_test_on a join masking_test_n_masking_reserved b on b.id = a.id +PREHOOK: type: QUERY +PREHOOK: Input: default@keyword_test_on +PREHOOK: Input: default@masking_test_n_masking_reserved +#### A masked pattern was here #### +POSTHOOK: query: explain select a.`date`, b.value from keyword_test_on a join masking_test_n_masking_reserved b on b.id = a.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@keyword_test_on +POSTHOOK: Input: default@masking_test_n_masking_reserved +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: id (type: int), date (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: string) + TableScan + alias: masking_test_n_masking_reserved + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (((key % 2) = 0) and (key < 10) and id is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: id (type: int), reverse(value) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a.`date`, b.value from keyword_test_on a join masking_test_n_masking_reserved b on b.id = a.id +PREHOOK: type: QUERY +PREHOOK: Input: default@keyword_test_on +PREHOOK: Input: default@masking_test_n_masking_reserved +#### A masked pattern was here #### +POSTHOOK: query: select a.`date`, b.value from keyword_test_on a join masking_test_n_masking_reserved b on b.id = a.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@keyword_test_on +POSTHOOK: Input: default@masking_test_n_masking_reserved +#### A masked pattern was here ####