Github user mattyb149 commented on a diff in the pull request: https://github.com/apache/nifi/pull/2239#discussion_r148636452 --- Diff: nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/processors/hive/AbstractHiveQLProcessor.java --- @@ -216,4 +225,104 @@ protected void setParameter(final PreparedStatement stmt, final String attrName, } } + protected static class TableName { + private final String database; + private final String table; + private boolean input = true; + + public TableName(String database, String table) { + this.database = database; + this.table = table; + } + + public void setInput(boolean input) { + this.input = input; + } + + public boolean isInput() { + return input; + } + + @Override + public String toString() { + return database == null || database.isEmpty() ? table : database + '.' + table; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + TableName tableName = (TableName) o; + + if (database != null ? !database.equals(tableName.database) : tableName.database != null) return false; + return table.equals(tableName.table); + } + + @Override + public int hashCode() { + int result = database != null ? database.hashCode() : 0; + result = 31 * result + table.hashCode(); + return result; + } + } + + protected Set<TableName> findTableNames(final String query) throws ParseException { + final ASTNode node = new ParseDriver().parse(normalize(query)); + final HashSet<TableName> tableNames = new HashSet<>(); + findTableNames(node, tableNames); + return tableNames; + } + + /** + * Normalize query. + * Hive resolves prepared statement parameters before executing a query, + * see {@link org.apache.hive.jdbc.HivePreparedStatement#updateSql(String, HashMap)} for detail. + * HiveParser does not expect '?' to be in a query string, and throws an Exception if there is one. + * In this normalize method, '?' is replaced to 'x' to avoid that. + */ + private String normalize(String query) { + return query.replace('?', 'x'); + } + + private void findTableNames(final Object obj, final Set<TableName> tableNames) { + if (!(obj instanceof CommonTree)) { + return; + } + final CommonTree tree = (CommonTree) obj; + final int childCount = tree.getChildCount(); + if ("TOK_TABNAME".equals(tree.getText())) { + final TableName tableName; + switch (childCount) { + case 1 : + tableName = new TableName(null, tree.getChild(0).getText()); + break; + case 2: + tableName = new TableName(tree.getChild(0).getText(), tree.getChild(1).getText()); + break; + default: + throw new IllegalStateException("TOK_TABNAME does not have expected children, childCount=" + childCount); + } + // If parent is TOK_TABREF, then it is an input table. + tableName.setInput("TOK_TABREF".equals(tree.getParent().getText())); --- End diff -- We may need setInput and setOutput (and the getters) here, since a table could technically be input and output, either in the same statement or multiple statements. See my other comment for an example, also this one generates nothing in query.input_tables: ``` insert into t values (8,"eight"); create table if not exists t3 (id int, name string); insert overwrite table t3 select distinct id,name from t; ```
---