Github user mattyb149 commented on a diff in the pull request:

    https://github.com/apache/nifi/pull/2239#discussion_r148636452
  
    --- Diff: 
nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/processors/hive/AbstractHiveQLProcessor.java
 ---
    @@ -216,4 +225,104 @@ protected void setParameter(final PreparedStatement 
stmt, final String attrName,
             }
         }
     
    +    protected static class TableName {
    +        private final String database;
    +        private final String table;
    +        private boolean input = true;
    +
    +        public TableName(String database, String table) {
    +            this.database = database;
    +            this.table = table;
    +        }
    +
    +        public void setInput(boolean input) {
    +            this.input = input;
    +        }
    +
    +        public boolean isInput() {
    +            return input;
    +        }
    +
    +        @Override
    +        public String toString() {
    +            return database == null || database.isEmpty() ? table : 
database + '.' + table;
    +        }
    +
    +        @Override
    +        public boolean equals(Object o) {
    +            if (this == o) return true;
    +            if (o == null || getClass() != o.getClass()) return false;
    +
    +            TableName tableName = (TableName) o;
    +
    +            if (database != null ? !database.equals(tableName.database) : 
tableName.database != null) return false;
    +            return table.equals(tableName.table);
    +        }
    +
    +        @Override
    +        public int hashCode() {
    +            int result = database != null ? database.hashCode() : 0;
    +            result = 31 * result + table.hashCode();
    +            return result;
    +        }
    +    }
    +
    +    protected Set<TableName> findTableNames(final String query) throws 
ParseException {
    +        final ASTNode node = new ParseDriver().parse(normalize(query));
    +        final HashSet<TableName> tableNames = new HashSet<>();
    +        findTableNames(node, tableNames);
    +        return tableNames;
    +    }
    +
    +    /**
    +     * Normalize query.
    +     * Hive resolves prepared statement parameters before executing a 
query,
    +     * see {@link 
org.apache.hive.jdbc.HivePreparedStatement#updateSql(String, HashMap)} for 
detail.
    +     * HiveParser does not expect '?' to be in a query string, and throws 
an Exception if there is one.
    +     * In this normalize method, '?' is replaced to 'x' to avoid that.
    +     */
    +    private String normalize(String query) {
    +        return query.replace('?', 'x');
    +    }
    +
    +    private void findTableNames(final Object obj, final Set<TableName> 
tableNames) {
    +        if (!(obj instanceof CommonTree)) {
    +            return;
    +        }
    +        final CommonTree tree = (CommonTree) obj;
    +        final int childCount = tree.getChildCount();
    +        if ("TOK_TABNAME".equals(tree.getText())) {
    +            final TableName tableName;
    +            switch (childCount) {
    +                case 1 :
    +                    tableName = new TableName(null, 
tree.getChild(0).getText());
    +                    break;
    +                case 2:
    +                    tableName = new TableName(tree.getChild(0).getText(), 
tree.getChild(1).getText());
    +                    break;
    +                default:
    +                    throw new IllegalStateException("TOK_TABNAME does not 
have expected children, childCount=" + childCount);
    +            }
    +            // If parent is TOK_TABREF, then it is an input table.
    +            
tableName.setInput("TOK_TABREF".equals(tree.getParent().getText()));
    --- End diff --
    
    We may need setInput and setOutput (and the getters) here, since a table 
could technically be input and output, either in the same statement or multiple 
statements. See my other comment for an example, also this one generates 
nothing in query.input_tables:
    
    ```
    insert into t values (8,"eight");
    create table if not exists t3 (id int, name string);
    insert overwrite table t3 select distinct id,name from t;
    ```


---

Reply via email to