HIVE-11055 HPL/SQL - Implementing Procedural SQL in Hive (PL/HQL Contribution) (Dmitry Tolpeko via gates)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/052643cb Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/052643cb Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/052643cb Branch: refs/heads/master Commit: 052643cb8d1fa3811cbc0e1f56b0cd10ca419b8d Parents: edb7b88 Author: Alan Gates <ga...@hortonworks.com> Authored: Wed Jul 1 09:16:44 2015 -0700 Committer: Alan Gates <ga...@hortonworks.com> Committed: Wed Jul 1 09:16:44 2015 -0700 ---------------------------------------------------------------------- bin/ext/hplsql.sh | 37 + bin/hplsql | 25 + bin/hplsql.cmd | 58 + hplsql/pom.xml | 123 ++ .../antlr4/org/apache/hive/hplsql/Hplsql.g4 | 1426 +++++++++++++ .../java/org/apache/hive/hplsql/Arguments.java | 206 ++ .../main/java/org/apache/hive/hplsql/Conf.java | 175 ++ .../main/java/org/apache/hive/hplsql/Conn.java | 243 +++ .../java/org/apache/hive/hplsql/Converter.java | 56 + .../main/java/org/apache/hive/hplsql/Copy.java | 426 ++++ .../main/java/org/apache/hive/hplsql/Exec.java | 1950 ++++++++++++++++++ .../java/org/apache/hive/hplsql/Expression.java | 574 ++++++ .../main/java/org/apache/hive/hplsql/File.java | 132 ++ .../java/org/apache/hive/hplsql/Handler.java | 41 + .../java/org/apache/hive/hplsql/Hplsql.java | 25 + .../java/org/apache/hive/hplsql/Interval.java | 109 + .../main/java/org/apache/hive/hplsql/Query.java | 155 ++ .../main/java/org/apache/hive/hplsql/Scope.java | 69 + .../java/org/apache/hive/hplsql/Select.java | 411 ++++ .../java/org/apache/hive/hplsql/Signal.java | 48 + .../main/java/org/apache/hive/hplsql/Stmt.java | 1021 +++++++++ .../org/apache/hive/hplsql/StreamGobbler.java | 51 + .../main/java/org/apache/hive/hplsql/Timer.java | 59 + .../main/java/org/apache/hive/hplsql/Udf.java | 117 ++ .../main/java/org/apache/hive/hplsql/Utils.java | 289 +++ .../main/java/org/apache/hive/hplsql/Var.java | 430 ++++ .../apache/hive/hplsql/functions/Function.java | 709 +++++++ .../hive/hplsql/functions/FunctionDatetime.java | 151 ++ .../hive/hplsql/functions/FunctionMisc.java | 188 ++ .../hive/hplsql/functions/FunctionOra.java | 231 +++ .../hive/hplsql/functions/FunctionString.java | 276 +++ pom.xml | 1 + 32 files changed, 9812 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/052643cb/bin/ext/hplsql.sh ---------------------------------------------------------------------- diff --git a/bin/ext/hplsql.sh b/bin/ext/hplsql.sh new file mode 100644 index 0000000..ddaf324 --- /dev/null +++ b/bin/ext/hplsql.sh @@ -0,0 +1,37 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +THISSERVICE=hplsql +export SERVICE_LIST="${SERVICE_LIST}${THISSERVICE} " + +hplsql () { + CLASS=org.apache.hive.hplsql.Hplsql; + + # include only the HPL/SQL jar and its dependencies + hplsqlJarPath=`ls ${HIVE_LIB}/hive-hplsql-*.jar` + antlrJarPath="${HIVE_LIB}/antlr-runtime-4.5.jar" + hadoopClasspath="" + if [[ -n "${HADOOP_CLASSPATH}" ]] + then + hadoopClasspath="${HADOOP_CLASSPATH}:" + fi + export HADOOP_CLASSPATH="${hadoopClasspath}${HIVE_CONF_DIR}:${hplsqlJarPath}:${antlrJarPath}" + + exec $HADOOP jar ${hplsqlJarPath} $CLASS $HIVE_OPTS "$@" +} + +hplsql_help () { + hplsql "--help" +} http://git-wip-us.apache.org/repos/asf/hive/blob/052643cb/bin/hplsql ---------------------------------------------------------------------- diff --git a/bin/hplsql b/bin/hplsql new file mode 100644 index 0000000..6a5da7e --- /dev/null +++ b/bin/hplsql @@ -0,0 +1,25 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +bin=`dirname "$0"` +bin=`cd "$bin"; pwd` + +# Set Hadoop User classpath to true so that httpclient jars are taken from +# hive lib instead of hadoop lib. +export HADOOP_USER_CLASSPATH_FIRST=true + +. "$bin"/hive --service hplsql "$@" http://git-wip-us.apache.org/repos/asf/hive/blob/052643cb/bin/hplsql.cmd ---------------------------------------------------------------------- diff --git a/bin/hplsql.cmd b/bin/hplsql.cmd new file mode 100644 index 0000000..6717a1c --- /dev/null +++ b/bin/hplsql.cmd @@ -0,0 +1,58 @@ +@echo off +@rem Licensed to the Apache Software Foundation (ASF) under one or more +@rem contributor license agreements. See the NOTICE file distributed with +@rem this work for additional information regarding copyright ownership. +@rem The ASF licenses this file to You under the Apache License, Version 2.0 +@rem (the "License"); you may not use this file except in compliance with +@rem the License. You may obtain a copy of the License at +@rem +@rem http://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. +SetLocal EnableDelayedExpansion + +pushd %CD%\.. +if not defined HIVE_HOME ( + set HIVE_HOME=%CD% +) +popd + +if "%HADOOP_BIN_PATH:~-1%" == "\" ( + set HADOOP_BIN_PATH=%HADOOP_BIN_PATH:~0,-1% +) + +if not defined JAVA_HOME ( + echo Error: JAVA_HOME is not set. + goto :eof +) + +@rem get the hadoop envrionment +if not exist %HADOOP_HOME%\libexec\hadoop-config.cmd ( + @echo +================================================================+ + @echo ^| Error: HADOOP_HOME is not set correctly ^| + @echo +----------------------------------------------------------------+ + @echo ^| Please set your HADOOP_HOME variable to the absolute path of ^| + @echo ^| the directory that contains \libexec\hadoop-config.cmd ^| + @echo +================================================================+ + exit /b 1 +) +@rem supress the HADOOP_HOME warnings in 1.x.x +set HADOOP_HOME_WARN_SUPPRESS=true + +@rem include only the HPL/SQL jar and its dependencies +pushd %HIVE_HOME%\lib +for /f %%a IN ('dir /b hive-hplsql-**.jar') do ( + set HADOOP_CLASSPATH=%HADOOP_CLASSPATH%;%HIVE_HOME%\lib\%%a +) +set HADOOP_CLASSPATH=%HADOOP_CLASSPATH%;%HIVE_HOME%\lib\antlr-runtime-4.5.jar +popd +set HADOOP_USER_CLASSPATH_FIRST=true +call %HADOOP_HOME%\libexec\hadoop-config.cmd + +call "%JAVA_HOME%\bin\java" %JAVA_HEAP_MAX% %HADOOP_OPTS% -classpath %HADOOP_CLASSPATH% org.apache.hive.hplsql.Hplsql %* + +endlocal http://git-wip-us.apache.org/repos/asf/hive/blob/052643cb/hplsql/pom.xml ---------------------------------------------------------------------- diff --git a/hplsql/pom.xml b/hplsql/pom.xml new file mode 100644 index 0000000..d096e90 --- /dev/null +++ b/hplsql/pom.xml @@ -0,0 +1,123 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<project xmlns="http://maven.apache.org/POM/4.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + <parent> + <groupId>org.apache.hive</groupId> + <artifactId>hive</artifactId> + <version>2.0.0-SNAPSHOT</version> + <relativePath>../pom.xml</relativePath> + </parent> + + <artifactId>hive-hplsql</artifactId> + <packaging>jar</packaging> + <name>Hive HPL/SQL</name> + + <properties> + <hive.path.to.root>..</hive.path.to.root> + <packaging.minimizeJar>false</packaging.minimizeJar> + </properties> + + <dependencies> + <dependency> + <groupId>com.google.guava</groupId> + <artifactId>guava</artifactId> + <version>${guava.version}</version> + </dependency> + <dependency> + <groupId>commons-collections</groupId> + <artifactId>commons-collections</artifactId> + <version>3.2.1</version> + </dependency> + <dependency> + <groupId>commons-cli</groupId> + <artifactId>commons-cli</artifactId> + <version>${commons-cli.version}</version> + </dependency> + <dependency> + <groupId>commons-lang</groupId> + <artifactId>commons-lang</artifactId> + <version>${commons-lang.version}</version> + </dependency> + <dependency> + <groupId>commons-logging</groupId> + <artifactId>commons-logging</artifactId> + <version>${commons-logging.version}</version> + </dependency> + + <dependency> + <groupId>commons-io</groupId> + <artifactId>commons-io</artifactId> + <version>${commons-io.version}</version> + </dependency> + <dependency> + <groupId>org.apache.hive</groupId> + <artifactId>hive-exec</artifactId> + <version>${project.version}</version> + </dependency> + <dependency> + <groupId>org.antlr</groupId> + <artifactId>antlr4-runtime</artifactId> + <version>4.5</version> + </dependency> + </dependencies> + + <profiles> + <profile> + <id>hadoop-1</id> + <dependencies> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-core</artifactId> + <version>${hadoop-20S.version}</version> + <optional>true</optional> + </dependency> + </dependencies> + </profile> + <profile> + <id>hadoop-2</id> + <dependencies> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-common</artifactId> + <version>${hadoop-23.version}</version> + <optional>true</optional> + </dependency> + </dependencies> + </profile> + </profiles> + + <build> + <plugins> + <plugin> + <groupId>org.antlr</groupId> + <artifactId>antlr4-maven-plugin</artifactId> + <version>4.5</version> + <configuration> + <visitor>true</visitor> + </configuration> + <executions> + <execution> + <goals> + <goal>antlr4</goal> + </goals> + </execution> + </executions> + </plugin> + </plugins> + </build> +</project> http://git-wip-us.apache.org/repos/asf/hive/blob/052643cb/hplsql/src/main/antlr4/org/apache/hive/hplsql/Hplsql.g4 ---------------------------------------------------------------------- diff --git a/hplsql/src/main/antlr4/org/apache/hive/hplsql/Hplsql.g4 b/hplsql/src/main/antlr4/org/apache/hive/hplsql/Hplsql.g4 new file mode 100644 index 0000000..852716b --- /dev/null +++ b/hplsql/src/main/antlr4/org/apache/hive/hplsql/Hplsql.g4 @@ -0,0 +1,1426 @@ +/** + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +// HPL/SQL Procedural SQL Extension Grammar +grammar Hplsql; + +program : block ; + +block : (begin_end_block | stmt)+ ; // Multiple consecutive blocks/statements + +begin_end_block : + declare_block? T_BEGIN block exception_block? T_END + ; + +single_block_stmt : // Single BEGIN END block (but nested blocks are possible) or single statement + T_BEGIN block T_END + | stmt T_SEMICOLON? + ; + +stmt : + assignment_stmt + | break_stmt + | call_stmt + | close_stmt + | copy_from_local_stmt + | copy_stmt + | commit_stmt + | create_function_stmt + | create_index_stmt + | create_local_temp_table_stmt + | create_procedure_stmt + | create_table_stmt + | declare_stmt + | delete_stmt + | drop_stmt + | exec_stmt + | exit_stmt + | fetch_stmt + | for_cursor_stmt + | for_range_stmt + | if_stmt + | include_stmt + | insert_stmt + | get_diag_stmt + | grant_stmt + | leave_stmt + | map_object_stmt + | merge_stmt + | open_stmt + | print_stmt + | resignal_stmt + | return_stmt + | rollback_stmt + | select_stmt + | signal_stmt + | update_stmt + | use_stmt + | values_into_stmt + | while_stmt + | label + | hive + | host + | expr_stmt + | semicolon_stmt // Placed here to allow null statements ;;... + ; + +semicolon_stmt : + T_SEMICOLON + | '@' | '#' | '/' + ; + +exception_block : // Exception block + T_EXCEPTION exception_block_item+ + ; + +exception_block_item : + T_WHEN L_ID T_THEN block ~(T_WHEN | T_END) + ; + +expr_stmt : // Standalone expression + expr + ; + +assignment_stmt : // Assignment statement + T_SET? assignment_stmt_item (T_COMMA assignment_stmt_item)* + ; + +assignment_stmt_item : + assignment_stmt_single_item + | assignment_stmt_multiple_item + | assignment_stmt_select_item + ; + +assignment_stmt_single_item : + ident T_COLON? T_EQUAL expr + ; + +assignment_stmt_multiple_item : + T_OPEN_P ident (T_COMMA ident)* T_CLOSE_P T_COLON? T_EQUAL T_OPEN_P expr (T_COMMA expr)* T_CLOSE_P + ; + +assignment_stmt_select_item : + (ident | (T_OPEN_P ident (T_COMMA ident)* T_CLOSE_P)) T_COLON? T_EQUAL T_OPEN_P select_stmt T_CLOSE_P + ; + +break_stmt : + T_BREAK + ; + +call_stmt : + T_CALL ident expr_func_params? + ; + +declare_stmt : // Declaration statement + T_DECLARE declare_stmt_item (T_COMMA declare_stmt_item)* + ; + +declare_block : // Declaration block + T_DECLARE declare_stmt_item T_SEMICOLON (declare_stmt_item T_SEMICOLON)* + ; + + +declare_stmt_item : + declare_var_item + | declare_condition_item + | declare_cursor_item + | declare_handler_item + | declare_temporary_table_item + ; + +declare_var_item : + ident (T_COMMA ident)* T_AS? dtype dtype_len? dtype_attr* dtype_default? + ; + +declare_condition_item : // Condition declaration + ident T_CONDITION + ; + +declare_cursor_item : // Cursor declaration + (T_CURSOR ident | ident T_CURSOR) declare_cursor_return? (T_IS | T_AS | T_FOR) (select_stmt | expr ) + ; + +declare_cursor_return : + T_WITHOUT T_RETURN + | T_WITH T_RETURN T_ONLY? (T_TO (T_CALLER | T_CLIENT))? + ; + +declare_handler_item : // Condition handler declaration + (T_CONTINUE | T_EXIT) T_HANDLER T_FOR (T_SQLEXCEPTION | T_SQLWARNING | T_NOT T_FOUND | ident) single_block_stmt + ; + +declare_temporary_table_item : // DECLARE TEMPORARY TABLE statement + T_GLOBAL? T_TEMPORARY T_TABLE ident T_OPEN_P create_table_columns T_CLOSE_P create_table_options? + ; + +create_table_stmt : + T_CREATE T_TABLE (T_IF T_NOT T_EXISTS)? ident T_OPEN_P create_table_columns T_CLOSE_P create_table_options? + ; + +create_local_temp_table_stmt : + T_CREATE (T_LOCAL T_TEMPORARY | (T_SET | T_MULTISET)? T_VOLATILE) T_TABLE ident T_OPEN_P create_table_columns T_CLOSE_P create_table_options? + ; + +create_table_columns : + create_table_columns_item (T_COMMA create_table_columns_item)* + ; + +create_table_columns_item : + ident dtype dtype_len? dtype_attr* create_table_column_inline_cons? + | T_CONSTRAINT ident create_table_column_cons + ; + +create_table_column_inline_cons : + dtype_default + | T_NOT? T_NULL + | T_PRIMARY T_KEY + ; + +create_table_column_cons : + T_PRIMARY T_KEY T_OPEN_P ident (T_COMMA ident)* T_CLOSE_P + ; + +create_table_options : + create_table_options_item+ + ; + +create_table_options_item : + T_ON T_COMMIT (T_DELETE | T_PRESERVE) T_ROWS + | create_table_options_db2_item + | create_table_options_hive_item + ; + +create_table_options_db2_item : + T_IN ident + | T_WITH T_REPLACE + | T_DISTRIBUTE T_BY T_HASH T_OPEN_P ident (T_COMMA ident)* T_CLOSE_P + | T_LOGGED + | T_NOT T_LOGGED + ; + +create_table_options_hive_item : + create_table_hive_row_format + ; + +create_table_hive_row_format : + T_ROW T_FORMAT T_DELIMITED create_table_hive_row_format_fields* + ; + +create_table_hive_row_format_fields : + T_FIELDS T_TERMINATED T_BY expr (T_ESCAPED T_BY expr)? + | T_COLLECTION T_ITEMS T_TERMINATED T_BY expr + | T_MAP T_KEYS T_TERMINATED T_BY expr + | T_LINES T_TERMINATED T_BY expr + | T_NULL T_DEFINED T_AS expr + ; + +dtype : // Data types + T_CHAR + | T_BIGINT + | T_DATE + | T_DEC + | T_DECIMAL + | T_FLOAT + | T_INT + | T_INTEGER + | T_NUMBER + | T_SMALLINT + | T_STRING + | T_TIMESTAMP + | T_VARCHAR + | T_VARCHAR2 + | L_ID // User-defined data type + ; + +dtype_len : // Data type length or size specification + T_OPEN_P L_INT (T_COMMA L_INT)? T_CLOSE_P + ; + +dtype_attr : + T_CHARACTER T_SET ident + | T_NOT? (T_CASESPECIFIC | T_CS) + ; + +dtype_default : // Default clause in variable declaration + T_COLON? T_EQUAL expr + | T_DEFAULT expr + ; + +create_function_stmt : + (T_ALTER | T_CREATE (T_OR T_REPLACE)? | T_REPLACE) T_FUNCTION ident create_routine_params create_function_return (T_AS | T_IS)? single_block_stmt + ; + +create_function_return : + (T_RETURN | T_RETURNS) dtype dtype_len? + ; + +create_procedure_stmt : + (T_ALTER | T_CREATE (T_OR T_REPLACE)? | T_REPLACE) (T_PROCEDURE | T_PROC) ident create_routine_params create_routine_options? (T_AS | T_IS)? label? single_block_stmt (ident T_SEMICOLON)? + ; + +create_routine_params : + T_OPEN_P (create_routine_param_item (T_COMMA create_routine_param_item)*)? T_CLOSE_P + ; + +create_routine_param_item : + (T_IN | T_OUT | T_INOUT | T_IN T_OUT)? ident dtype dtype_len? dtype_attr* dtype_default? + | ident (T_IN | T_OUT | T_INOUT | T_IN T_OUT)? dtype dtype_len? dtype_attr* dtype_default? + ; + +create_routine_options : + create_routine_option+ + ; +create_routine_option : + T_LANGUAGE T_SQL + | T_SQL T_SECURITY (T_CREATOR | T_DEFINER | T_INVOKER | T_OWNER) + | T_DYNAMIC T_RESULT T_SETS L_INT + ; + +drop_stmt : // DROP statement + T_DROP T_TABLE (T_IF T_EXISTS)? table_name + ; + +exec_stmt : // EXEC, EXECUTE IMMEDIATE statement + (T_EXEC | T_EXECUTE) T_IMMEDIATE? expr (T_INTO L_ID (T_COMMA L_ID)*)? using_clause? + ; + +if_stmt : // IF statement + if_plsql_stmt + | if_tsql_stmt + ; + +if_plsql_stmt : + T_IF bool_expr T_THEN block elseif_block* else_block? T_END T_IF + ; + +if_tsql_stmt : + T_IF bool_expr single_block_stmt (T_ELSE single_block_stmt)? + ; + +elseif_block : + (T_ELSIF | T_ELSEIF) bool_expr T_THEN block + ; + +else_block : + T_ELSE block + ; + +include_stmt : // INCLUDE statement + T_INCLUDE file_name + ; + +insert_stmt : // INSERT statement + T_INSERT (T_OVERWRITE T_TABLE | T_INTO T_TABLE?) table_name insert_stmt_cols? (select_stmt | insert_stmt_rows) + ; + +insert_stmt_cols : + T_OPEN_P ident (T_COMMA ident)* T_CLOSE_P + ; + +insert_stmt_rows : + T_VALUES insert_stmt_row (T_COMMA insert_stmt_row)* + ; + +insert_stmt_row: + T_OPEN_P expr (T_COMMA expr)* T_CLOSE_P + ; + +exit_stmt : + T_EXIT L_ID? (T_WHEN bool_expr)? + ; + +get_diag_stmt : // GET DIAGNOSTICS statement + T_GET T_DIAGNOSTICS get_diag_stmt_item + ; + +get_diag_stmt_item : + get_diag_stmt_exception_item + | get_diag_stmt_rowcount_item + ; + +get_diag_stmt_exception_item : + T_EXCEPTION L_INT ident T_EQUAL T_MESSAGE_TEXT + ; + +get_diag_stmt_rowcount_item : + ident T_EQUAL T_ROW_COUNT + ; + +grant_stmt : + T_GRANT grant_stmt_item (T_COMMA grant_stmt_item)* T_TO ident + ; + +grant_stmt_item : + T_EXECUTE T_ON T_PROCEDURE ident + ; + +leave_stmt : + T_LEAVE L_ID? + ; + +map_object_stmt : + T_MAP T_OBJECT expr (T_TO expr)? (T_AT expr)? + ; + +open_stmt : // OPEN cursor statement + T_OPEN L_ID (T_FOR (expr | select_stmt))? + ; + +fetch_stmt : // FETCH cursor statement + T_FETCH T_FROM? L_ID T_INTO L_ID (T_COMMA L_ID)* + ; + +close_stmt : // CLOSE cursor statement + T_CLOSE L_ID + ; + +copy_from_local_stmt : // COPY FROM LOCAL statement + T_COPY T_FROM T_LOCAL copy_source (T_COMMA copy_source)* T_TO copy_target copy_file_option* + ; + +copy_stmt : // COPY statement + T_COPY (table_name | T_OPEN_P select_stmt T_CLOSE_P) T_TO copy_target copy_option* + ; + +copy_source : + (ident | expr | L_FILE) + ; + +copy_target : + (ident | expr | L_FILE) + ; + +copy_option : + T_AT ident + | T_BATCHSIZE expr + | T_DELIMITER expr + | T_SQLINSERT ident + ; + +copy_file_option : + T_DELETE + | T_IGNORE + | T_OVERWRITE + ; + +commit_stmt : // COMMIT statement + T_COMMIT T_WORK? + ; + +create_index_stmt : // CREATE INDEX statement + T_CREATE T_UNIQUE? T_INDEX ident T_ON table_name T_OPEN_P create_index_col (T_COMMA create_index_col)* T_CLOSE_P + ; + +create_index_col : + ident (T_ASC | T_DESC)? + ; + +print_stmt : // PRINT statement + T_PRINT expr + | T_PRINT T_OPEN_P expr T_CLOSE_P + ; + +resignal_stmt : // RESIGNAL statement + T_RESIGNAL (T_SQLSTATE T_VALUE? expr (T_SET T_MESSAGE_TEXT T_EQUAL expr)? )? + ; + +return_stmt : // RETURN statement + T_RETURN expr? + ; + +rollback_stmt : // ROLLBACK statement + T_ROLLBACK T_WORK? + ; + +signal_stmt : // SIGNAL statement + T_SIGNAL ident + ; + +use_stmt : // USE statement + T_USE expr + ; + +values_into_stmt : // VALUES INTO statement + T_VALUES T_OPEN_P? expr (T_COMMA expr)* T_CLOSE_P? T_INTO T_OPEN_P? ident (T_COMMA ident)* T_CLOSE_P? + ; + +while_stmt : // WHILE loop statement + T_WHILE bool_expr (T_DO | T_LOOP | T_THEN | T_BEGIN) block T_END (T_WHILE | T_LOOP)? + ; + +for_cursor_stmt : // FOR (cursor) statement + T_FOR L_ID T_IN T_OPEN_P? select_stmt T_CLOSE_P? T_LOOP block T_END T_LOOP + ; + +for_range_stmt : // FOR (Integer range) statement + T_FOR L_ID T_IN T_REVERSE? expr T_DOT2 expr ((T_BY | T_STEP) expr)? T_LOOP block T_END T_LOOP + ; + +label : + L_LABEL + | T_LESS T_LESS L_ID T_GREATER T_GREATER + ; + +using_clause : // USING var,... clause + T_USING expr (T_COMMA expr)* + ; + +select_stmt : // SELECT statement + cte_select_stmt? fullselect_stmt + ; + +cte_select_stmt : + T_WITH cte_select_stmt_item (T_COMMA cte_select_stmt_item)* + ; + +cte_select_stmt_item : + ident cte_select_cols? T_AS T_OPEN_P fullselect_stmt T_CLOSE_P + ; + +cte_select_cols : + T_OPEN_P ident (T_COMMA ident)* T_CLOSE_P + ; + +fullselect_stmt : + fullselect_stmt_item (fullselect_set_clause fullselect_stmt_item)* + ; + +fullselect_stmt_item : + subselect_stmt + | T_OPEN_P fullselect_stmt T_CLOSE_P + ; + +fullselect_set_clause : + T_UNION T_ALL? + | T_EXCEPT T_ALL? + | T_INTERSECT T_ALL? + ; + +subselect_stmt : + (T_SELECT | T_SEL) select_list into_clause? from_clause? where_clause? group_by_clause? having_clause? order_by_clause? select_options? + ; + +select_list : + select_list_set? select_list_limit? select_list_item (T_COMMA select_list_item)* + ; + +select_list_set : + T_ALL + | T_DISTINCT + ; + +select_list_limit : + T_TOP expr + ; + +select_list_item : + (expr select_list_alias? | select_list_asterisk) + ; + +select_list_alias : + T_AS? L_ID + | T_OPEN_P T_TITLE L_S_STRING T_CLOSE_P + ; + +select_list_asterisk : + (L_ID '.')? '*' + ; + +into_clause : + T_INTO ident (T_COMMA ident)* + ; + +from_clause : + T_FROM from_table_clause (from_join_clause)* + ; + +from_table_clause : + from_table_name_clause + | from_subselect_clause + | from_table_values_clause + ; + +from_table_name_clause : + table_name from_alias_clause? + ; + +from_subselect_clause : + T_OPEN_P subselect_stmt T_CLOSE_P from_alias_clause? + ; + +from_join_clause : + T_COMMA from_table_clause + | from_join_type_clause from_table_clause T_ON bool_expr + ; + +from_join_type_clause : + T_INNER T_JOIN + | (T_LEFT | T_RIGHT | T_FULL) T_OUTER? T_JOIN + ; + +from_table_values_clause: + T_TABLE T_OPEN_P T_VALUES from_table_values_row (T_COMMA from_table_values_row)* T_CLOSE_P from_alias_clause? + ; + +from_table_values_row: + expr + | T_OPEN_P expr (T_COMMA expr)* T_CLOSE_P + ; + +from_alias_clause : + {!_input.LT(1).getText().equalsIgnoreCase("GROUP") && + !_input.LT(1).getText().equalsIgnoreCase("ORDER") && + !_input.LT(1).getText().equalsIgnoreCase("LIMIT")}? + T_AS? ident (T_OPEN_P L_ID (T_COMMA L_ID)* T_CLOSE_P)? + ; + +table_name : + ident + ; + +where_clause : + T_WHERE bool_expr + ; + +group_by_clause : + T_GROUP T_BY expr (T_COMMA expr)* + ; + +having_clause : + T_HAVING bool_expr + ; + +order_by_clause : + T_ORDER T_BY expr (T_ASC | T_DESC)? (T_COMMA expr (T_ASC | T_DESC)?)* + ; + +select_options : + select_options_item+ + ; + +select_options_item : + T_LIMIT expr + | T_WITH (T_RR | T_RS | T_CS | T_UR) + ; + +update_stmt : // UPDATE statement + T_UPDATE update_table T_SET assignment_stmt_item (T_COMMA assignment_stmt_item)* where_clause? update_upsert? + ; + +update_table : + (table_name | (T_OPEN_P select_stmt T_CLOSE_P)) (T_AS? ident)? + ; + +update_upsert : + T_ELSE insert_stmt + ; + +merge_stmt : // MERGE statement + T_MERGE T_INTO merge_table T_USING merge_table T_ON bool_expr merge_condition+ + ; + +merge_table : + (table_name | (T_OPEN_P select_stmt T_CLOSE_P)) (T_AS? ident)? + ; + +merge_condition : + T_WHEN T_NOT? T_MATCHED (T_AND bool_expr)? T_THEN merge_action + | T_ELSE T_IGNORE + ; + +merge_action : + T_INSERT insert_stmt_cols? T_VALUES insert_stmt_row + | T_UPDATE T_SET assignment_stmt_item (T_COMMA assignment_stmt_item)* + | T_DELETE + ; + +delete_stmt : // DELETE statement + T_DELETE T_FROM? table_name (T_AS? ident)? where_clause? + ; + +bool_expr : // Boolean condition + T_OPEN_P bool_expr T_CLOSE_P + | bool_expr bool_expr_logical_operator bool_expr + | bool_expr_atom + ; + +bool_expr_atom : + bool_expr_unary + | bool_expr_binary + ; + +bool_expr_unary : + expr T_IS T_NOT? T_NULL + | expr T_BETWEEN expr T_AND expr + | bool_expr_single_in + | bool_expr_multi_in + ; + +bool_expr_single_in : + expr T_NOT? T_IN T_OPEN_P ((expr (T_COMMA expr)*) | select_stmt) T_CLOSE_P + ; + +bool_expr_multi_in : + T_OPEN_P expr (T_COMMA expr)* T_CLOSE_P T_NOT? T_IN T_OPEN_P select_stmt T_CLOSE_P + ; + +bool_expr_binary : + expr bool_expr_binary_operator expr + ; + +bool_expr_logical_operator : + T_AND + | T_OR + ; + +bool_expr_binary_operator : + T_EQUAL + | T_EQUAL2 + | T_NOTEQUAL + | T_NOTEQUAL2 + | T_LESS + | T_LESSEQUAL + | T_GREATER + | T_GREATEREQUAL + | T_NOT? (T_LIKE | T_RLIKE | T_REGEXP) + ; + +expr : + expr interval_item + | expr T_MUL expr + | expr T_DIV expr + | expr T_ADD expr + | expr T_SUB expr + | T_OPEN_P expr T_CLOSE_P + | expr_concat + | expr_case + | expr_agg_window_func + | expr_spec_func + | expr_func + | expr_atom + ; + + +expr_atom : + date_literal + | timestamp_literal + | ident + | string + | dec_number + | interval_number + | int_number + | null_const + ; + +interval_item : + T_DAY + | T_DAYS + | T_MICROSECOND + | T_MICROSECONDS + ; + +interval_number : + int_number interval_item + ; + +expr_concat : // String concatenation operator + expr_concat_item (T_PIPE | T_CONCAT) expr_concat_item ((T_PIPE | T_CONCAT) expr_concat_item)* + ; + +expr_concat_item : + T_OPEN_P expr T_CLOSE_P + | expr_case + | expr_agg_window_func + | expr_spec_func + | expr_func + | expr_atom + ; + +expr_case : // CASE expression + expr_case_simple + | expr_case_searched + ; + +expr_case_simple : + T_CASE expr (T_WHEN expr T_THEN expr)+ (T_ELSE expr)? T_END + ; + +expr_case_searched : + T_CASE (T_WHEN bool_expr T_THEN expr)+ (T_ELSE expr)? T_END + ; + +expr_agg_window_func : + T_AVG T_OPEN_P expr_func_all_distinct? expr T_CLOSE_P expr_func_over_clause? + | T_COUNT T_OPEN_P ((expr_func_all_distinct? expr) | '*') T_CLOSE_P expr_func_over_clause? + | T_COUNT_BIG T_OPEN_P ((expr_func_all_distinct? expr) | '*') T_CLOSE_P expr_func_over_clause? + | T_DENSE_RANK T_OPEN_P T_CLOSE_P expr_func_over_clause + | T_FIRST_VALUE T_OPEN_P expr T_CLOSE_P expr_func_over_clause + | T_LAG T_OPEN_P expr (T_COMMA expr (T_COMMA expr)?)? T_CLOSE_P expr_func_over_clause + | T_LAST_VALUE T_OPEN_P expr T_CLOSE_P expr_func_over_clause + | T_LEAD T_OPEN_P expr (T_COMMA expr (T_COMMA expr)?)? T_CLOSE_P expr_func_over_clause + | T_MAX T_OPEN_P expr_func_all_distinct? expr T_CLOSE_P expr_func_over_clause? + | T_MIN T_OPEN_P expr_func_all_distinct? expr T_CLOSE_P expr_func_over_clause? + | T_RANK T_OPEN_P T_CLOSE_P expr_func_over_clause + | T_ROW_NUMBER T_OPEN_P T_CLOSE_P expr_func_over_clause + | T_STDEV T_OPEN_P expr_func_all_distinct? expr T_CLOSE_P expr_func_over_clause? + | T_SUM T_OPEN_P expr_func_all_distinct? expr T_CLOSE_P expr_func_over_clause? + | T_VAR T_OPEN_P expr_func_all_distinct? expr T_CLOSE_P expr_func_over_clause? + | T_VARIANCE T_OPEN_P expr_func_all_distinct? expr T_CLOSE_P expr_func_over_clause? + ; + +expr_func_all_distinct : + T_ALL + | T_DISTINCT + ; + +expr_func_over_clause : + T_OVER T_OPEN_P expr_func_partition_by_clause? order_by_clause? T_CLOSE_P + ; + +expr_func_partition_by_clause : + T_PARTITION T_BY ident (T_COMMA ident)* + ; + +expr_spec_func : + T_ACTIVITY_COUNT + | T_CAST T_OPEN_P expr T_AS dtype dtype_len? T_CLOSE_P + | T_COUNT T_OPEN_P (expr | '*') T_CLOSE_P + | T_CURRENT_DATE | T_CURRENT T_DATE + | (T_CURRENT_TIMESTAMP | T_CURRENT T_TIMESTAMP) (T_OPEN_P expr T_CLOSE_P)? + | T_CURRENT_USER | T_CURRENT T_USER + | T_MAX_PART_STRING T_OPEN_P expr (T_COMMA expr (T_COMMA expr T_EQUAL expr)*)? T_CLOSE_P + | T_MIN_PART_STRING T_OPEN_P expr (T_COMMA expr (T_COMMA expr T_EQUAL expr)*)? T_CLOSE_P + | T_MAX_PART_INT T_OPEN_P expr (T_COMMA expr (T_COMMA expr T_EQUAL expr)*)? T_CLOSE_P + | T_MIN_PART_INT T_OPEN_P expr (T_COMMA expr (T_COMMA expr T_EQUAL expr)*)? T_CLOSE_P + | T_MAX_PART_DATE T_OPEN_P expr (T_COMMA expr (T_COMMA expr T_EQUAL expr)*)? T_CLOSE_P + | T_MIN_PART_DATE T_OPEN_P expr (T_COMMA expr (T_COMMA expr T_EQUAL expr)*)? T_CLOSE_P + | T_PART_LOC T_OPEN_P expr (T_COMMA expr T_EQUAL expr)+ (T_COMMA expr)? T_CLOSE_P + | T_TRIM T_OPEN_P expr T_CLOSE_P + | T_SUBSTRING T_OPEN_P expr T_FROM expr (T_FOR expr)? T_CLOSE_P + | T_SYSDATE + | T_USER + ; + +expr_func : + ident expr_func_params + ; + +expr_func_params : + T_OPEN_P (expr (T_COMMA expr)*)? T_CLOSE_P + ; + +hive : + T_HIVE hive_item* + ; + +hive_item : + P_e expr + | P_f expr + | P_hiveconf L_ID T_EQUAL expr + | P_i expr + | P_S + | P_h + ; + +host : + '!' host_cmd ';' // OS command + | host_stmt + ; + +host_cmd : + .*? + ; + +host_stmt : + T_HOST expr + ; + +file_name : + L_ID | L_FILE + ; + +date_literal : // DATE 'YYYY-MM-DD' literal + T_DATE string + ; + +timestamp_literal : // TIMESTAMP 'YYYY-MM-DD HH:MI:SS.FFF' literal + T_TIMESTAMP string + ; + +ident : + L_ID + | non_reserved_words + ; + +string : // String literal (single or double quoted) + L_S_STRING # single_quotedString + | L_D_STRING # double_quotedString + ; + +int_number : // Integer (positive or negative) + ('-' | '+')? L_INT + ; + +dec_number : // Decimal number (positive or negative) + ('-' | '+')? L_DEC + ; + +null_const : // NULL constant + T_NULL + ; + +non_reserved_words : // Tokens that are not reserved words and can be used as identifiers + T_ACTIVITY_COUNT + | T_ALL + | T_ALTER + | T_AND + | T_AS + | T_ASC + | T_AT + | T_AVG + | T_BATCHSIZE + | T_BEGIN + | T_BETWEEN + | T_BIGINT + | T_BREAK + | T_BY + | T_CALL + | T_CALLER + | T_CASE + | T_CASESPECIFIC + | T_CAST + | T_CHAR + | T_CHARACTER + | T_CLIENT + | T_CLOSE + | T_COLLECTION + | T_COPY + | T_COMMIT + | T_CONCAT + | T_CONDITION + | T_CONSTRAINT + | T_CONTINUE + | T_COUNT + | T_COUNT_BIG + | T_CREATE + | T_CREATOR + | T_CS + | T_CURRENT + | T_CURRENT_DATE + | T_CURRENT_TIMESTAMP + | T_CURRENT_USER + | T_CURSOR + | T_DATE + | T_DAY + | T_DAYS + | T_DEC + | T_DECIMAL + | T_DECLARE + | T_DEFAULT + | T_DEFINED + | T_DEFINER + | T_DELETE + | T_DELIMITED + | T_DELIMITER + | T_DENSE_RANK + | T_DESC + | T_DIAGNOSTICS + | T_DISTINCT + | T_DISTRIBUTE + | T_DO + | T_DROP + | T_DYNAMIC + // T_ELSE reserved word + // T_ELSEIF reserved word + // T_ELSIF reserved word + // T_END reserved word + | T_ESCAPED + | T_EXCEPT + | T_EXEC + | T_EXECUTE + | T_EXCEPTION + | T_EXISTS + | T_EXIT + | T_FETCH + | T_FIELDS + | T_FILE + | T_FIRST_VALUE + | T_FLOAT + | T_FOR + | T_FORMAT + | T_FOUND + | T_FROM + | T_FULL + | T_FUNCTION + | T_GET + | T_GLOBAL + | T_GRANT + | T_GROUP + | T_HANDLER + | T_HASH + | T_HAVING + | T_HIVE + | T_HOST + | T_IF + | T_IGNORE + | T_IMMEDIATE + | T_IN + | T_INCLUDE + | T_INDEX + | T_INNER + | T_INOUT + | T_INSERT + | T_INT + | T_INTEGER + | T_INTERSECT + | T_INTO + | T_INVOKER + | T_ITEMS + | T_IS + | T_JOIN + | T_KEY + | T_KEYS + | T_LAG + | T_LANGUAGE + | T_LAST_VALUE + | T_LEAD + | T_LEAVE + | T_LEFT + | T_LIKE + | T_LIMIT + | T_LINES + | T_LOCAL + | T_LOGGED + | T_LOOP + | T_MAP + | T_MATCHED + | T_MAX + | T_MERGE + | T_MESSAGE_TEXT + | T_MICROSECOND + | T_MICROSECONDS + | T_MIN + | T_MULTISET + | T_NOT + // T_NULL reserved word + | T_NUMBER + | T_OBJECT + | T_ON + | T_ONLY + | T_OPEN + | T_OR + | T_ORDER + | T_OUT + | T_OUTER + | T_OVER + | T_OVERWRITE + | T_OWNER + | T_PART_LOC + | T_PARTITION + | T_PRESERVE + | T_PRIMARY + | T_PRINT + | T_PROC + | T_PROCEDURE + | T_RANK + | T_REGEXP + | T_RR + | T_REPLACE + | T_RESIGNAL + | T_RESULT + | T_RETURN + | T_RETURNS + | T_REVERSE + | T_RIGHT + | T_RLIKE + | T_RS + | T_ROLLBACK + | T_ROW + | T_ROWS + | T_ROW_COUNT + | T_ROW_NUMBER + | T_SECURITY + | T_SEL + | T_SELECT + | T_SET + | T_SETS + | T_SIGNAL + | T_SMALLINT + | T_SQL + | T_SQLEXCEPTION + | T_SQLINSERT + | T_SQLSTATE + | T_SQLWARNING + | T_STEP + | T_STDEV + | T_STRING + | T_SUBSTRING + | T_SUM + | T_SYSDATE + | T_TABLE + | T_TEMPORARY + | T_TERMINATED + | T_THEN + | T_TIMESTAMP + | T_TITLE + | T_TO + | T_TOP + | T_TRIM + // T_UNION reserved word + | T_UNIQUE + | T_UPDATE + | T_UR + | T_USE + | T_USER + | T_USING + | T_VALUE + | T_VALUES + | T_VAR + | T_VARCHAR + | T_VARCHAR2 + | T_VARIANCE + | T_VOLATILE + // T_WHEN reserved word + // T_WHERE reserved word + | T_WHILE + | T_WITH + | T_WITHOUT + | T_WORK + ; + +// Lexer rules +T_ALL : A L L ; +T_ALTER : A L T E R ; +T_AND : A N D ; +T_AS : A S ; +T_ASC : A S C ; +T_AT : A T ; +T_AVG : A V G ; +T_BATCHSIZE : B A T C H S I Z E ; +T_BEGIN : B E G I N ; +T_BETWEEN : B E T W E E N ; +T_BIGINT : B I G I N T ; +T_BREAK : B R E A K ; +T_BY : B Y ; +T_CALL : C A L L ; +T_CALLER : C A L L E R ; +T_CASE : C A S E ; +T_CASESPECIFIC : C A S E S P E C I F I C ; +T_CAST : C A S T ; +T_CHAR : C H A R ; +T_CHARACTER : C H A R A C T E R ; +T_CLIENT : C L I E N T ; +T_CLOSE : C L O S E ; +T_COLLECTION : C O L L E C T I O N ; +T_COPY : C O P Y ; +T_COMMIT : C O M M I T ; +T_CONCAT : C O N C A T; +T_CONDITION : C O N D I T I O N ; +T_CONSTRAINT : C O N S T R A I N T ; +T_CONTINUE : C O N T I N U E ; +T_COUNT : C O U N T ; +T_COUNT_BIG : C O U N T '_' B I G; +T_CREATE : C R E A T E ; +T_CREATOR : C R E A T O R ; +T_CS : C S; +T_CURRENT : C U R R E N T ; +T_CURSOR : C U R S O R ; +T_DATE : D A T E ; +T_DAY : D A Y ; +T_DAYS : D A Y S ; +T_DEC : D E C ; +T_DECIMAL : D E C I M A L ; +T_DECLARE : D E C L A R E ; +T_DEFAULT : D E F A U L T ; +T_DEFINED : D E F I N E D ; +T_DEFINER : D E F I N E R ; +T_DELETE : D E L E T E ; +T_DELIMITED : D E L I M I T E D ; +T_DELIMITER : D E L I M I T E R ; +T_DESC : D E S C ; +T_DIAGNOSTICS : D I A G N O S T I C S ; +T_DISTINCT : D I S T I N C T ; +T_DISTRIBUTE : D I S T R I B U T E ; +T_DO : D O ; +T_DROP : D R O P ; +T_DYNAMIC : D Y N A M I C ; +T_ELSE : E L S E ; +T_ELSEIF : E L S E I F ; +T_ELSIF : E L S I F ; +T_END : E N D ; +T_ESCAPED : E S C A P E D ; +T_EXCEPT : E X C E P T ; +T_EXEC : E X E C ; +T_EXECUTE : E X E C U T E ; +T_EXCEPTION : E X C E P T I O N ; +T_EXISTS : E X I S T S ; +T_EXIT : E X I T ; +T_FETCH : F E T C H ; +T_FIELDS : F I E L D S ; +T_FILE : F I L E ; +T_FLOAT : F L O A T ; +T_FOR : F O R ; +T_FORMAT : F O R M A T ; +T_FOUND : F O U N D ; +T_FROM : F R O M ; +T_FULL : F U L L ; +T_FUNCTION : F U N C T I O N ; +T_GET : G E T ; +T_GLOBAL : G L O B A L ; +T_GRANT : G R A N T ; +T_GROUP : G R O U P ; +T_HANDLER : H A N D L E R ; +T_HASH : H A S H ; +T_HAVING : H A V I N G ; +T_HIVE : H I V E ; +T_HOST : H O S T ; +T_IF : I F ; +T_IGNORE : I G N O R E ; +T_IMMEDIATE : I M M E D I A T E ; +T_IN : I N ; +T_INCLUDE : I N C L U D E ; +T_INDEX : I N D E X ; +T_INNER : I N N E R ; +T_INOUT : I N O U T; +T_INSERT : I N S E R T ; +T_INT : I N T ; +T_INTEGER : I N T E G E R ; +T_INTERSECT : I N T E R S E C T ; +T_INTO : I N T O ; +T_INVOKER : I N V O K E R ; +T_IS : I S ; +T_ITEMS : I T E M S ; +T_JOIN : J O I N ; +T_KEY : K E Y ; +T_KEYS : K E Y S ; +T_LANGUAGE : L A N G U A G E ; +T_LEAVE : L E A V E ; +T_LEFT : L E F T ; +T_LIKE : L I K E ; +T_LIMIT : L I M I T ; +T_LINES : L I N E S ; +T_LOCAL : L O C A L ; +T_LOGGED : L O G G E D ; +T_LOOP : L O O P ; +T_MAP : M A P ; +T_MATCHED : M A T C H E D ; +T_MAX : M A X ; +T_MERGE : M E R G E ; +T_MESSAGE_TEXT : M E S S A G E '_' T E X T ; +T_MICROSECOND : M I C R O S E C O N D ; +T_MICROSECONDS : M I C R O S E C O N D S; +T_MIN : M I N ; +T_MULTISET : M U L T I S E T ; +T_NOT : N O T ; +T_NULL : N U L L ; +T_NUMBER : N U M B E R ; +T_OBJECT : O B J E C T ; +T_ON : O N ; +T_ONLY : O N L Y ; +T_OPEN : O P E N ; +T_OR : O R ; +T_ORDER : O R D E R; +T_OUT : O U T ; +T_OUTER : O U T E R ; +T_OVER : O V E R ; +T_OVERWRITE : O V E R W R I T E ; +T_OWNER : O W N E R ; +T_PARTITION : P A R T I T I O N ; +T_PRESERVE : P R E S E R V E ; +T_PRIMARY : P R I M A R Y ; +T_PRINT : P R I N T ; +T_PROC : P R O C ; +T_PROCEDURE : P R O C E D U R E; +T_REGEXP : R E G E X P ; +T_REPLACE : R E P L A C E ; +T_RESIGNAL : R E S I G N A L ; +T_RESULT : R E S U L T ; +T_RETURN : R E T U R N ; +T_RETURNS : R E T U R N S ; +T_REVERSE : R E V E R S E ; +T_RIGHT : R I G H T ; +T_RLIKE : R L I K E ; +T_ROLLBACK : R O L L B A C K ; +T_ROW : R O W ; +T_ROWS : R O W S ; +T_ROW_COUNT : R O W '_' C O U N T ; +T_RR : R R; +T_RS : R S ; +T_TRIM : T R I M ; +T_SECURITY : S E C U R I T Y ; +T_SEL : S E L ; +T_SELECT : S E L E C T ; +T_SET : S E T ; +T_SETS : S E T S; +T_SIGNAL : S I G N A L ; +T_SMALLINT : S M A L L I N T ; +T_SQL : S Q L ; +T_SQLEXCEPTION : S Q L E X C E P T I O N ; +T_SQLINSERT : S Q L I N S E R T ; +T_SQLSTATE : S Q L S T A T E ; +T_SQLWARNING : S Q L W A R N I N G ; +T_STEP : S T E P ; +T_STRING : S T R I N G ; +T_SUBSTRING : S U B S T R I N G ; +T_SUM : S U M ; +T_TABLE : T A B L E ; +T_TEMPORARY : T E M P O R A R Y ; +T_TERMINATED : T E R M I N A T E D ; +T_THEN : T H E N ; +T_TIMESTAMP : T I M E S T A M P ; +T_TITLE : T I T L E ; +T_TO : T O ; +T_TOP : T O P ; +T_UNION : U N I O N ; +T_UNIQUE : U N I Q U E ; +T_UPDATE : U P D A T E ; +T_UR : U R ; +T_USE : U S E ; +T_USING : U S I N G ; +T_VALUE : V A L U E ; +T_VALUES : V A L U E S ; +T_VAR : V A R ; +T_VARCHAR : V A R C H A R ; +T_VARCHAR2 : V A R C H A R '2' ; +T_VOLATILE : V O L A T I L E ; +T_WHEN : W H E N ; +T_WHERE : W H E R E ; +T_WHILE : W H I L E ; +T_WITH : W I T H ; +T_WITHOUT : W I T H O U T ; +T_WORK : W O R K ; + +// Functions with specific syntax +T_ACTIVITY_COUNT : A C T I V I T Y '_' C O U N T ; +T_CURRENT_DATE : C U R R E N T '_' D A T E ; +T_CURRENT_TIMESTAMP : C U R R E N T '_' T I M E S T A M P ; +T_CURRENT_USER : C U R R E N T '_' U S E R ; +T_DENSE_RANK : D E N S E '_' R A N K ; +T_FIRST_VALUE : F I R S T '_' V A L U E; +T_LAG : L A G ; +T_LAST_VALUE : L A S T '_' V A L U E; +T_LEAD : L E A D ; +T_MAX_PART_STRING : M A X '_' P A R T '_' S T R I N G ; +T_MIN_PART_STRING : M I N '_' P A R T '_' S T R I N G ; +T_MAX_PART_INT : M A X '_' P A R T '_' I N T ; +T_MIN_PART_INT : M I N '_' P A R T '_' I N T ; +T_MAX_PART_DATE : M A X '_' P A R T '_' D A T E ; +T_MIN_PART_DATE : M I N '_' P A R T '_' D A T E ; +T_PART_LOC : P A R T '_' L O C ; +T_RANK : R A N K ; +T_ROW_NUMBER : R O W '_' N U M B E R; +T_STDEV : S T D E V ; +T_SYSDATE : S Y S D A T E ; +T_VARIANCE : V A R I A N C E ; +T_USER : U S E R; + +T_ADD : '+' ; +T_COLON : ':' ; +T_COMMA : ',' ; +T_PIPE : '||' ; +T_DIV : '/' ; +T_DOT2 : '..' ; +T_EQUAL : '=' ; +T_EQUAL2 : '==' ; +T_NOTEQUAL : '<>' ; +T_NOTEQUAL2 : '!=' ; +T_GREATER : '>' ; +T_GREATEREQUAL : '>=' ; +T_LESS : '<' ; +T_LESSEQUAL : '<=' ; +T_MUL : '*' ; +T_OPEN_B : '{' ; +T_OPEN_P : '(' ; +T_CLOSE_B : '}' ; +T_CLOSE_P : ')' ; +T_SEMICOLON : ';' ; +T_SUB : '-' ; + +P_e : '-e' ; +P_f : '-f' ; +P_hiveconf : '-hiveconf' ; +P_i : '-i' ; +P_S : '-S' ; +P_h : '-h' ; + +L_ID : L_ID_PART (L_BLANK* '.' L_BLANK* L_ID_PART)* // Identifier + ; +L_S_STRING : '\'' (('\'' '\'') | ('\\' '\'') | ~('\''))* '\'' // Single quoted string literal + ; +L_D_STRING : '"' (L_STR_ESC_D | .)*? '"' // Double quoted string literal + ; +L_INT : L_DIGIT+ ; // Integer +L_DEC : L_DIGIT+ '.' ~'.' L_DIGIT* // Decimal number + | '.' L_DIGIT+ + ; +L_WS : L_BLANK+ -> skip ; // Whitespace +L_M_COMMENT : '/*' .*? '*/' -> channel(HIDDEN) ; // Multiline comment +L_S_COMMENT : ('--' | '//') .*? '\r'? '\n' -> channel(HIDDEN) ; // Single line comment + +L_FILE : '/'? L_ID ('/' L_ID)* // File path + | ([a-zA-Z] ':' '\\'?)? L_ID ('\\' L_ID)* + ; + +L_LABEL : ([a-zA-Z] | L_DIGIT | '_')* ':' + ; + +fragment +L_ID_PART : + [a-zA-Z] ([a-zA-Z] | L_DIGIT | '_')* // Identifier part + | ('_' | '@' | ':' | '#' | '$') ([a-zA-Z] | L_DIGIT | '_' | '@' | ':' | '#' | '$')+ // (at least one char must follow special char) + | '"' .*? '"' // Quoted identifiers + | '[' .*? ']' + | '`' .*? '`' + ; +fragment +L_STR_ESC_D : // Double quoted string escape sequence + '""' | '\\"' + ; +fragment +L_DIGIT : [0-9] // Digit + ; +fragment +L_BLANK : (' ' | '\t' | '\r' | '\n') + ; + +// Support case-insensitive keywords and allowing case-sensitive identifiers +fragment A : ('a'|'A') ; +fragment B : ('b'|'B') ; +fragment C : ('c'|'C') ; +fragment D : ('d'|'D') ; +fragment E : ('e'|'E') ; +fragment F : ('f'|'F') ; +fragment G : ('g'|'G') ; +fragment H : ('h'|'H') ; +fragment I : ('i'|'I') ; +fragment J : ('j'|'J') ; +fragment K : ('k'|'K') ; +fragment L : ('l'|'L') ; +fragment M : ('m'|'M') ; +fragment N : ('n'|'N') ; +fragment O : ('o'|'O') ; +fragment P : ('p'|'P') ; +fragment Q : ('q'|'Q') ; +fragment R : ('r'|'R') ; +fragment S : ('s'|'S') ; +fragment T : ('t'|'T') ; +fragment U : ('u'|'U') ; +fragment V : ('v'|'V') ; +fragment W : ('w'|'W') ; +fragment X : ('x'|'X') ; +fragment Y : ('y'|'Y') ; +fragment Z : ('z'|'Z') ; http://git-wip-us.apache.org/repos/asf/hive/blob/052643cb/hplsql/src/main/java/org/apache/hive/hplsql/Arguments.java ---------------------------------------------------------------------- diff --git a/hplsql/src/main/java/org/apache/hive/hplsql/Arguments.java b/hplsql/src/main/java/org/apache/hive/hplsql/Arguments.java new file mode 100644 index 0000000..604d9a7 --- /dev/null +++ b/hplsql/src/main/java/org/apache/hive/hplsql/Arguments.java @@ -0,0 +1,206 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.hplsql; + +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.GnuParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.OptionBuilder; +import org.apache.commons.cli.ParseException; + +public class Arguments { + private CommandLine commandLine; + private Options options = new Options(); + + String execString; + String fileName; + String main; + Map<String, String> vars = new HashMap<String, String>(); + + @SuppressWarnings("static-access") + Arguments() { + // -e 'query' + options.addOption(OptionBuilder + .hasArg() + .withArgName("quoted-query-string") + .withDescription("HPL/SQL from command line") + .create('e')); + + // -f <file> + options.addOption(OptionBuilder + .hasArg() + .withArgName("filename") + .withDescription("HPL/SQL from a file") + .create('f')); + + // -main entry_point_name + options.addOption(OptionBuilder + .hasArg() + .withArgName("procname") + .withDescription("Entry point (procedure or function name)") + .create("main")); + + // -hiveconf x=y + options.addOption(OptionBuilder + .withValueSeparator() + .hasArgs(2) + .withArgName("property=value") + .withLongOpt("hiveconf") + .withDescription("Value for given property") + .create()); + + // Substitution option -d, --define + options.addOption(OptionBuilder + .withValueSeparator() + .hasArgs(2) + .withArgName("key=value") + .withLongOpt("define") + .withDescription("Variable subsitution e.g. -d A=B or --define A=B") + .create('d')); + + // Substitution option --hivevar + options.addOption(OptionBuilder + .withValueSeparator() + .hasArgs(2) + .withArgName("key=value") + .withLongOpt("hivevar") + .withDescription("Variable subsitution e.g. --hivevar A=B") + .create()); + + // [-version|--version] + options.addOption(new Option("version", "version", false, "Print HPL/SQL version")); + + // [-trace|--trace] + options.addOption(new Option("trace", "trace", false, "Print debug information")); + + // [-offline|--offline] + options.addOption(new Option("offline", "offline", false, "Offline mode - skip SQL execution")); + + // [-H|--help] + options.addOption(new Option("H", "help", false, "Print help information")); + } + + /** + * Parse the command line arguments + */ + public boolean parse(String[] args) { + try { + commandLine = new GnuParser().parse(options, args); + execString = commandLine.getOptionValue('e'); + fileName = commandLine.getOptionValue('f'); + main = commandLine.getOptionValue("main"); + Properties p = commandLine.getOptionProperties("hiveconf"); + for(String key : p.stringPropertyNames()) { + vars.put(key, p.getProperty(key)); + } + p = commandLine.getOptionProperties("hivevar"); + for(String key : p.stringPropertyNames()) { + vars.put(key, p.getProperty(key)); + } + p = commandLine.getOptionProperties("define"); + for(String key : p.stringPropertyNames()) { + vars.put(key, p.getProperty(key)); + } + } catch (ParseException e) { + System.err.println(e.getMessage()); + return false; + } + return true; + } + + /** + * Get the value of execution option -e + */ + public String getExecString() { + return execString; + } + + /** + * Get the value of file option -f + */ + public String getFileName() { + return fileName; + } + + /** + * Get the value of -main option + */ + public String getMain() { + return main; + } + + /** + * Get the variables + */ + public Map<String, String> getVars() { + return vars; + } + + /** + * Test whether version option is set + */ + public boolean hasVersionOption() { + if(commandLine.hasOption("version")) { + return true; + } + return false; + } + + /** + * Test whether debug option is set + */ + public boolean hasTraceOption() { + if(commandLine.hasOption("trace")) { + return true; + } + return false; + } + + /** + * Test whether offline option is set + */ + public boolean hasOfflineOption() { + if(commandLine.hasOption("offline")) { + return true; + } + return false; + } + + /** + * Test whether help option is set + */ + public boolean hasHelpOption() { + if(commandLine.hasOption('H')) { + return true; + } + return false; + } + + /** + * Print help information + */ + public void printHelp() { + new HelpFormatter().printHelp("hplsql", options); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/052643cb/hplsql/src/main/java/org/apache/hive/hplsql/Conf.java ---------------------------------------------------------------------- diff --git a/hplsql/src/main/java/org/apache/hive/hplsql/Conf.java b/hplsql/src/main/java/org/apache/hive/hplsql/Conf.java new file mode 100644 index 0000000..88afbb5 --- /dev/null +++ b/hplsql/src/main/java/org/apache/hive/hplsql/Conf.java @@ -0,0 +1,175 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.hplsql; + +import java.net.URL; +import java.util.HashMap; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hive.hplsql.Exec.OnError; + +/** + * HPL/SQL run-time configuration + */ +public class Conf extends Configuration { + + public static final String SITE_XML = "hplsql-site.xml"; + public static final String DOT_HPLSQLRC = ".hplsqlrc"; + public static final String HPLSQLRC = "hplsqlrc"; + public static final String HPLSQL_LOCALS_SQL = "hplsql_locals.sql"; + + public static final String CONN_CONVERT = "hplsql.conn.convert."; + public static final String CONN_DEFAULT = "hplsql.conn.default"; + public static final String DUAL_TABLE = "hplsql.dual.table"; + public static final String INSERT_VALUES = "hplsql.insert.values"; + public static final String ONERROR = "hplsql.onerror"; + public static final String TEMP_TABLES = "hplsql.temp.tables"; + public static final String TEMP_TABLES_SCHEMA = "hplsql.temp.tables.schema"; + public static final String TEMP_TABLES_LOCATION = "hplsql.temp.tables.location"; + + public static final String TRUE = "true"; + public static final String FALSE = "false"; + public static final String YES = "yes"; + public static final String NO = "no"; + + public enum InsertValues {NATIVE, SELECT}; + public enum TempTables {NATIVE, MANAGED}; + + public String defaultConnection; + + OnError onError = OnError.EXCEPTION; + InsertValues insertValues = InsertValues.NATIVE; + TempTables tempTables = TempTables.NATIVE; + + String dualTable = "default.dual"; + + String tempTablesSchema = ""; + String tempTablesLocation = "/tmp/hplsql"; + + HashMap<String, Boolean> connConvert = new HashMap<String, Boolean>(); + + /** + * Set an option + */ + public void setOption(String key, String value) { + if (key.startsWith(CONN_CONVERT)) { + setConnectionConvert(key.substring(19), value); + } + else if (key.compareToIgnoreCase(CONN_DEFAULT) == 0) { + defaultConnection = value; + } + else if (key.compareToIgnoreCase(DUAL_TABLE) == 0) { + dualTable = value; + } + else if (key.compareToIgnoreCase(INSERT_VALUES) == 0) { + setInsertValues(value); + } + else if (key.compareToIgnoreCase(ONERROR) == 0) { + setOnError(value); + } + else if (key.compareToIgnoreCase(TEMP_TABLES) == 0) { + setTempTables(value); + } + else if (key.compareToIgnoreCase(TEMP_TABLES_SCHEMA) == 0) { + tempTablesSchema = value; + } + else if (key.compareToIgnoreCase(TEMP_TABLES_LOCATION) == 0) { + tempTablesLocation = value; + } + } + + /** + * Set hplsql.insert.values option + */ + private void setInsertValues(String value) { + if (value.compareToIgnoreCase("NATIVE") == 0) { + insertValues = InsertValues.NATIVE; + } + else if (value.compareToIgnoreCase("SELECT") == 0) { + insertValues = InsertValues.SELECT; + } + } + + /** + * Set hplsql.temp.tables option + */ + private void setTempTables(String value) { + if (value.compareToIgnoreCase("NATIVE") == 0) { + tempTables = TempTables.NATIVE; + } + else if (value.compareToIgnoreCase("MANAGED") == 0) { + tempTables = TempTables.MANAGED; + } + } + + /** + * Set error handling approach + */ + private void setOnError(String value) { + if (value.compareToIgnoreCase("EXCEPTION") == 0) { + onError = OnError.EXCEPTION; + } + else if (value.compareToIgnoreCase("SETERROR") == 0) { + onError = OnError.SETERROR; + } + if (value.compareToIgnoreCase("STOP") == 0) { + onError = OnError.STOP; + } + } + + /** + * Set whether convert or not SQL for the specified connection profile + */ + void setConnectionConvert(String name, String value) { + boolean convert = false; + if (value.compareToIgnoreCase(TRUE) == 0 || value.compareToIgnoreCase(YES) == 0) { + convert = true; + } + connConvert.put(name, convert); + } + + /** + * Get whether convert or not SQL for the specified connection profile + */ + boolean getConnectionConvert(String name) { + Boolean convert = connConvert.get(name); + if (convert != null) { + return convert.booleanValue(); + } + return false; + } + + /** + * Load parameters + */ + public void init() { + addResource(SITE_XML); + } + + /** + * Get the location of the configuration file + */ + public String getLocation() { + URL url = getResource(SITE_XML); + if (url != null) { + return url.toString(); + } + return ""; + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/052643cb/hplsql/src/main/java/org/apache/hive/hplsql/Conn.java ---------------------------------------------------------------------- diff --git a/hplsql/src/main/java/org/apache/hive/hplsql/Conn.java b/hplsql/src/main/java/org/apache/hive/hplsql/Conn.java new file mode 100644 index 0000000..828fbc3 --- /dev/null +++ b/hplsql/src/main/java/org/apache/hive/hplsql/Conn.java @@ -0,0 +1,243 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.hplsql; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Stack; +import java.sql.DriverManager; +import java.sql.Connection; +import java.sql.SQLException; +import java.sql.Statement; +import java.sql.ResultSet; + +public class Conn { + + public enum Type {DB2, HIVE, MYSQL, TERADATA}; + + HashMap<String, Stack<Connection>> connections = new HashMap<String, Stack<Connection>>(); + HashMap<String, String> connStrings = new HashMap<String, String>(); + HashMap<String, Type> connTypes = new HashMap<String, Type>(); + + HashMap<String, ArrayList<String>> connInits = new HashMap<String, ArrayList<String>>(); + HashMap<String, ArrayList<String>> preSql = new HashMap<String, ArrayList<String>>(); + + Exec exec; + Timer timer = new Timer(); + boolean trace = false; + + Conn(Exec e) { + exec = e; + trace = exec.getTrace(); + } + + /** + * Execute a SQL query + */ + public Query executeQuery(Query query, String connName) { + try { + Connection conn = getConnection(connName); + runPreSql(connName, conn); + Statement stmt = conn.createStatement(); + timer.start(); + ResultSet rs = stmt.executeQuery(query.sql); + timer.stop(); + query.set(conn, stmt, rs); + if (trace) { + exec.trace(null, "Query executed successfully (" + timer.format() + ")"); + } + } catch (Exception e) { + query.setError(e); + } + return query; + } + + public Query executeQuery(String sql, String connName) { + return executeQuery(new Query(sql), connName); + } + + /** + * Execute a SQL statement + */ + public Query executeSql(String sql, String connName) { + Query query = new Query(sql); + try { + Connection conn = getConnection(connName); + runPreSql(connName, conn); + Statement stmt = conn.createStatement(); + ResultSet rs = null; + if (stmt.execute(sql)) { + rs = stmt.getResultSet(); + } + query.set(conn, stmt, rs); + } catch (Exception e) { + query.setError(e); + } + return query; + } + + /** + * Close the query object + */ + public void closeQuery(Query query, String connName) { + query.closeStatement(); + returnConnection(connName, query.getConnection()); + } + + /** + * Run pre-SQL statements + * @throws SQLException + */ + void runPreSql(String connName, Connection conn) throws SQLException { + ArrayList<String> sqls = preSql.get(connName); + if (sqls != null) { + Statement s = conn.createStatement(); + for (String sql : sqls) { + s.execute(sql); + } + s.close(); + preSql.remove(connName); + } + } + + /** + * Get a connection + * @throws Exception + */ + synchronized Connection getConnection(String connName) throws Exception { + Stack<Connection> connStack = connections.get(connName); + String connStr = connStrings.get(connName); + if (connStr == null) { + throw new Exception("Unknown connection profile: " + connName); + } + if (connStack != null && !connStack.empty()) { // Reuse an existing connection + return connStack.pop(); + } + Connection c = openConnection(connStr); + ArrayList<String> sqls = connInits.get(connName); // Run initialization statements on the connection + if (sqls != null) { + Statement s = c.createStatement(); + for (String sql : sqls) { + s.execute(sql); + } + s.close(); + } + return c; + } + + /** + * Open a new connection + * @throws Exception + */ + Connection openConnection(String connStr) throws Exception { + String driver = "org.apache.hadoop.hive.jdbc.HiveDriver"; + String url = "jdbc:hive://"; + String usr = ""; + String pwd = ""; + if (connStr != null) { + String[] c = connStr.split(";"); + if (c.length >= 1) { + driver = c[0]; + } + if (c.length >= 2) { + url = c[1]; + } + if (c.length >= 3) { + usr = c[2]; + } + if (c.length >= 4) { + pwd = c[3]; + } + } + Class.forName(driver); + timer.start(); + Connection conn = DriverManager.getConnection(url, usr, pwd); + timer.stop(); + if (trace) { + exec.trace(null, "Open connection: " + url + " (" + timer.format() + ")"); + } + return conn; + } + + /** + * Get the database type by profile name + */ + Conn.Type getTypeByProfile(String name) { + return connTypes.get(name); + } + + /** + * Get the database type by connection string + */ + Conn.Type getType(String connStr) { + if (connStr.contains("hive.")) { + return Type.HIVE; + } + else if (connStr.contains("db2.")) { + return Type.DB2; + } + else if (connStr.contains("mysql.")) { + return Type.MYSQL; + } + else if (connStr.contains("teradata.")) { + return Type.TERADATA; + } + return Type.HIVE; + } + + /** + * Return the connection to the pool + */ + void returnConnection(String name, Connection conn) { + if (conn != null) { + connections.get(name).push(conn); + } + } + + /** + * Add a new connection string + */ + public void addConnection(String name, String connStr) { + connections.put(name, new Stack<Connection>()); + connStrings.put(name, connStr); + connTypes.put(name, getType(connStr)); + } + + /** + * Add initialization statements for the specified connection + */ + public void addConnectionInit(String name, String connInit) { + ArrayList<String> a = new ArrayList<String>(); + String[] sa = connInit.split(";"); + for (String s : sa) { + s = s.trim(); + if (!s.isEmpty()) { + a.add(s); + } + } + connInits.put(name, a); + } + + /** + * Add SQL statements to be executed before executing the next SQL statement (pre-SQL) + */ + public void addPreSql(String name, ArrayList<String> sql) { + preSql.put(name, sql); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/052643cb/hplsql/src/main/java/org/apache/hive/hplsql/Converter.java ---------------------------------------------------------------------- diff --git a/hplsql/src/main/java/org/apache/hive/hplsql/Converter.java b/hplsql/src/main/java/org/apache/hive/hplsql/Converter.java new file mode 100644 index 0000000..46f98a9 --- /dev/null +++ b/hplsql/src/main/java/org/apache/hive/hplsql/Converter.java @@ -0,0 +1,56 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.hplsql; + +/** + * On-the-fly SQL Converter + */ +public class Converter { + + Exec exec; + boolean trace = false; + + Converter(Exec e) { + exec = e; + trace = exec.getTrace(); + } + + /** + * Convert a data type + */ + String dataType(HplsqlParser.DtypeContext type, HplsqlParser.Dtype_lenContext len) { + String d = null; + if (type.T_VARCHAR2() != null) { + d = "STRING"; + } + else if (type.T_NUMBER() != null) { + d = "DECIMAL"; + if (len != null) { + d += exec.getText(len); + } + } + if (d != null) { + return d; + } + else if (len != null) { + return exec.getText(type, type.getStart(), len.getStop()); + } + return exec.getText(type, type.getStart(), type.getStop()); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/052643cb/hplsql/src/main/java/org/apache/hive/hplsql/Copy.java ---------------------------------------------------------------------- diff --git a/hplsql/src/main/java/org/apache/hive/hplsql/Copy.java b/hplsql/src/main/java/org/apache/hive/hplsql/Copy.java new file mode 100644 index 0000000..30b98ca --- /dev/null +++ b/hplsql/src/main/java/org/apache/hive/hplsql/Copy.java @@ -0,0 +1,426 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.hplsql; + +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.ResultSetMetaData; +import java.util.HashMap; +import java.util.Map; +import java.util.List; +import java.io.FileOutputStream; +import java.io.IOException; + +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hive.hplsql.Var; +import org.antlr.v4.runtime.ParserRuleContext; +import org.apache.commons.lang.StringEscapeUtils; +import org.apache.commons.lang3.tuple.Pair; + +public class Copy { + + Exec exec; + Timer timer = new Timer(); + boolean trace = false; + boolean info = false; + + long srcSizeInBytes = 0; + + String delimiter = "\t"; + boolean sqlInsert = false; + String sqlInsertName; + String targetConn; + int batchSize = 1000; + + boolean overwrite = false; + boolean delete = false; + boolean ignore = false; + + Copy(Exec e) { + exec = e; + trace = exec.getTrace(); + info = exec.getInfo(); + } + + /** + * Run COPY command + */ + Integer run(HplsqlParser.Copy_stmtContext ctx) { + trace(ctx, "COPY"); + initOptions(ctx); + StringBuilder sql = new StringBuilder(); + String conn = null; + if (ctx.table_name() != null) { + String table = evalPop(ctx.table_name()).toString(); + conn = exec.getObjectConnection(ctx.table_name().getText()); + sql.append("SELECT * FROM "); + sql.append(table); + } + else { + sql.append(evalPop(ctx.select_stmt()).toString()); + conn = exec.getStatementConnection(); + if (trace) { + trace(ctx, "Statement:\n" + sql); + } + } + Query query = exec.executeQuery(ctx, sql.toString(), conn); + if (query.error()) { + exec.signal(query); + return 1; + } + exec.setSqlSuccess(); + try { + if (targetConn != null) { + copyToTable(ctx, query); + } + else { + copyToFile(ctx, query); + } + } + catch (Exception e) { + exec.signal(e); + return 1; + } + finally { + exec.closeQuery(query, conn); + } + return 0; + } + + /** + * Copy the query results to another table + * @throws Exception + */ + void copyToTable(HplsqlParser.Copy_stmtContext ctx, Query query) throws Exception { + ResultSet rs = query.getResultSet(); + if (rs == null) { + return; + } + ResultSetMetaData rm = rs.getMetaData(); + int cols = rm.getColumnCount(); + int rows = 0; + if (trace) { + trace(ctx, "SELECT executed: " + cols + " columns"); + } + Connection conn = exec.getConnection(targetConn); + StringBuilder sql = new StringBuilder(); + sql.append("INSERT INTO " + sqlInsertName + " VALUES ("); + for (int i = 0; i < cols; i++) { + sql.append("?"); + if (i + 1 < cols) { + sql.append(","); + } + } + sql.append(")"); + PreparedStatement ps = conn.prepareStatement(sql.toString()); + long start = timer.start(); + long prev = start; + boolean batchOpen = false; + while (rs.next()) { + for (int i = 1; i <= cols; i++) { + ps.setObject(i, rs.getObject(i)); + } + rows++; + if (batchSize > 1) { + ps.addBatch(); + batchOpen = true; + if (rows % batchSize == 0) { + ps.executeBatch(); + batchOpen = false; + } + } + else { + ps.executeUpdate(); + } + if (trace && rows % 100 == 0) { + long cur = timer.current(); + if (cur - prev > 10000) { + trace(ctx, "Copying rows: " + rows + " (" + rows/((cur - start)/1000) + " rows/sec)"); + prev = cur; + } + } + } + if (batchOpen) { + ps.executeBatch(); + } + ps.close(); + exec.returnConnection(targetConn, conn); + exec.setRowCount(rows); + long elapsed = timer.stop(); + if (trace) { + trace(ctx, "COPY completed: " + rows + " row(s), " + timer.format() + ", " + rows/(elapsed/1000) + " rows/sec"); + } + } + + /** + * Copy the query results to a file + * @throws Exception + */ + void copyToFile(HplsqlParser.Copy_stmtContext ctx, Query query) throws Exception { + ResultSet rs = query.getResultSet(); + if (rs == null) { + return; + } + ResultSetMetaData rm = rs.getMetaData(); + String filename = null; + if (ctx.copy_target().expr() != null) { + filename = evalPop(ctx.copy_target().expr()).toString(); + } + else { + filename = ctx.copy_target().getText(); + } + byte[] del = delimiter.getBytes(); + byte[] rowdel = "\n".getBytes(); + byte[] nullstr = "NULL".getBytes(); + int cols = rm.getColumnCount(); + int rows = 0; + if (trace) { + trace(ctx, "SELECT executed: " + cols + " columns, output file: " + filename); + } + java.io.File file = new java.io.File(filename); + FileOutputStream out = null; + try { + if (!file.exists()) { + file.createNewFile(); + } + out = new FileOutputStream(file, false /*append*/); + String col; + String sql = ""; + if (sqlInsert) { + sql = "INSERT INTO " + sqlInsertName + " VALUES ("; + rowdel = ");\n".getBytes(); + } + while (rs.next()) { + if (sqlInsert) { + out.write(sql.getBytes()); + } + for (int i = 1; i <= cols; i++) { + if (i > 1) { + out.write(del); + } + col = rs.getString(i); + if (col != null) { + if (sqlInsert) { + col = Utils.quoteString(col); + } + out.write(col.getBytes()); + } + else if (sqlInsert) { + out.write(nullstr); + } + } + out.write(rowdel); + rows++; + } + exec.setRowCount(rows); + } + finally { + if (out != null) { + out.close(); + } + } + if (trace) { + trace(ctx, "COPY rows: " + rows); + } + } + + /** + * Run COPY FROM LOCAL statement + */ + public Integer runFromLocal(HplsqlParser.Copy_from_local_stmtContext ctx) { + trace(ctx, "COPY FROM LOCAL"); + initFileOptions(ctx.copy_file_option()); + HashMap<String, Pair<String, Long>> src = new HashMap<String, Pair<String, Long>>(); + int cnt = ctx.copy_source().size(); + for (int i = 0; i < cnt; i++) { + createLocalFileList(src, evalPop(ctx.copy_source(i)).toString(), null); + } + String dest = evalPop(ctx.copy_target()).toString(); + if (info) { + info(ctx, "Files to copy: " + src.size() + " (" + Utils.formatSizeInBytes(srcSizeInBytes) + ")"); + } + timer.start(); + File file = new File(); + FileSystem fs = null; + int succeed = 0; + int failed = 0; + long copiedSize = 0; + try { + fs = file.createFs(); + boolean multi = false; + if (src.size() > 1) { + multi = true; + } + for (Map.Entry<String, Pair<String, Long>> i : src.entrySet()) { + try { + Path s = new Path(i.getKey()); + Path d = null; + if (multi) { + String relativePath = i.getValue().getLeft(); + if (relativePath == null) { + d = new Path(dest, s.getName()); + } + else { + d = new Path(dest, relativePath + java.io.File.separator + s.getName()); + } + } + else { + d = new Path(dest); + } + fs.copyFromLocalFile(delete, overwrite, s, d); + succeed++; + long size = i.getValue().getRight(); + copiedSize += size; + if (info) { + info(ctx, "Copied: " + file.resolvePath(d) + " (" + Utils.formatSizeInBytes(size) + ")"); + } + } + catch(IOException e) { + failed++; + if (!ignore) { + throw e; + } + } + } + } + catch(IOException e) { + exec.signal(e); + exec.setHostCode(1); + return 1; + } + finally { + long elapsed = timer.stop(); + if (info) { + info(ctx, "COPY completed: " + succeed + " succeed, " + failed + " failed, " + + timer.format() + ", " + Utils.formatSizeInBytes(copiedSize) + ", " + + Utils.formatBytesPerSec(copiedSize, elapsed)); + } + if (failed == 0) { + exec.setHostCode(0); + } + else { + exec.setHostCode(1); + } + file.close(); + } + return 0; + } + + /** + * Create the list of local files for the specified path (including subdirectories) + */ + void createLocalFileList(HashMap<String, Pair<String, Long>> list, String path, String relativePath) { + java.io.File file = new java.io.File(path); + if (file.exists()) { + if (file.isDirectory()) { + for (java.io.File i : file.listFiles()) { + if (i.isDirectory()) { + String rel = null; + if (relativePath == null) { + rel = i.getName(); + } + else { + rel = relativePath + java.io.File.separator + i.getName(); + } + createLocalFileList(list, i.getAbsolutePath(), rel); + } + else { + long size = i.length(); + list.put(i.getAbsolutePath(), Pair.of(relativePath, size)); + srcSizeInBytes += size; + } + } + } + else { + long size = file.length(); + list.put(file.getAbsolutePath(), Pair.of(relativePath, size)); + srcSizeInBytes += size; + } + } + } + + /** + * Initialize COPY command options + */ + void initOptions(HplsqlParser.Copy_stmtContext ctx) { + int cnt = ctx.copy_option().size(); + for (int i = 0; i < cnt; i++) { + HplsqlParser.Copy_optionContext option = ctx.copy_option(i); + if (option.T_DELIMITER() != null) { + delimiter = StringEscapeUtils.unescapeJava(evalPop(option.expr()).toString()); + } + else if (option.T_SQLINSERT() != null) { + sqlInsert = true; + delimiter = ", "; + if (option.ident() != null) { + sqlInsertName = option.ident().getText(); + } + } + else if (option.T_AT() != null) { + targetConn = option.ident().getText(); + sqlInsertName = ctx.copy_target().ident().getText(); + } + else if (option.T_BATCHSIZE() != null) { + batchSize = evalPop(option.expr()).intValue(); + } + } + } + + /** + * Initialize COPY FILE options + */ + void initFileOptions(List<HplsqlParser.Copy_file_optionContext> options) { + srcSizeInBytes = 0; + for (HplsqlParser.Copy_file_optionContext i : options) { + if (i.T_OVERWRITE() != null) { + overwrite = true; + } + else if (i.T_DELETE() != null) { + delete = true; + } + else if (i.T_IGNORE() != null) { + ignore = true; + } + } + } + + /** + * Evaluate the expression and pop value from the stack + */ + Var evalPop(ParserRuleContext ctx) { + exec.visit(ctx); + if (!exec.stack.isEmpty()) { + return exec.stackPop(); + } + return Var.Empty; + } + + /** + * Trace and information + */ + public void trace(ParserRuleContext ctx, String message) { + exec.trace(ctx, message); + } + + public void info(ParserRuleContext ctx, String message) { + exec.info(ctx, message); + } +}