This is an automated email from the ASF dual-hosted git repository.
duanzhengqiang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/shardingsphere.git
The following commit(s) were added to refs/heads/master by this push:
new 8bab753b19c Support Hive Writing data into the filesystem from queries
statement parse (#36371)
8bab753b19c is described below
commit 8bab753b19cf87d768e204553ff2449ec3a6e68b
Author: Claire <[email protected]>
AuthorDate: Thu Aug 21 12:01:58 2025 +0800
Support Hive Writing data into the filesystem from queries statement parse
(#36371)
* support Writing data into the filesystem from queries
* update release-notes
---
RELEASE-NOTES.md | 1 +
.../hive/src/main/antlr4/imports/hive/BaseRule.g4 | 45 ++++++
.../src/main/antlr4/imports/hive/DDLStatement.g4 | 45 ------
.../src/main/antlr4/imports/hive/DMLStatement.g4 | 12 +-
.../statement/type/HiveDMLStatementVisitor.java | 50 ++++++
.../parser/src/main/resources/case/dml/insert.xml | 170 +++++++++++++++++++++
.../main/resources/sql/supported/dml/insert.xml | 9 ++
7 files changed, 286 insertions(+), 46 deletions(-)
diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
index 9a011ec1c29..6f9d6343ebb 100644
--- a/RELEASE-NOTES.md
+++ b/RELEASE-NOTES.md
@@ -82,6 +82,7 @@
1. SQL Parser: Support Hive SHOW TRANSACTIONS & SHOW COMPACTIONS statement
parse - [#36301](https://github.com/apache/shardingsphere/pull/36301)
1. SQL Parser: Support Hive DESCRIBE statement parse -
[#36350](https://github.com/apache/shardingsphere/pull/36350)
1. SQL Parser: Support Hive Inserting data into Hive Tables from queries
statement parse - [#36320](https://github.com/apache/shardingsphere/pull/36320)
+1. SQL Parser: Support Hive Writing data into the filesystem from queries
statement parse - [#36371](https://github.com/apache/shardingsphere/pull/36371)
1. SQL Parser: Support SQL Server xml methods parse -
[#35911](https://github.com/apache/shardingsphere/pull/35911)
1. SQL Parser: Support SQL Server CHANGETABLE function parse -
[#35920](https://github.com/apache/shardingsphere/pull/35920)
1. SQL Parser: Support SQL Server AI_GENERATE_EMBEDDINGS function parse -
[#35922](https://github.com/apache/shardingsphere/pull/35922)
diff --git a/parser/sql/dialect/hive/src/main/antlr4/imports/hive/BaseRule.g4
b/parser/sql/dialect/hive/src/main/antlr4/imports/hive/BaseRule.g4
index 8cb7f68163d..2c91c97baa5 100644
--- a/parser/sql/dialect/hive/src/main/antlr4/imports/hive/BaseRule.g4
+++ b/parser/sql/dialect/hive/src/main/antlr4/imports/hive/BaseRule.g4
@@ -1336,3 +1336,48 @@ noWriteToBinLog
channelOption
: FOR CHANNEL string_
;
+
+propertyListCommonClause
+ : LP_ propertyList RP_
+ ;
+
+propertyList
+ : property (COMMA_ property)*
+ ;
+
+property
+ : string_ EQ_ string_
+ ;
+
+rowFormat
+ : ROW FORMAT rowFormatType
+ ;
+
+rowFormatType
+ : DELIMITED rowFormatDelimited
+ | SERDE string_ (WITH SERDEPROPERTIES propertyListCommonClause)?
+ ;
+
+rowFormatDelimited
+ : (COLUMNS TERMINATED BY string_ (ESCAPED BY string_)?)?
+ (COLLECTION ITEMS TERMINATED BY string_)?
+ (MAP KEYS TERMINATED BY string_)?
+ (LINES TERMINATED BY string_)?
+ (NULL DEFINED AS string_)?
+ ;
+
+storedClause
+ : STORED AS fileFormat
+ | STORED BY string_ (WITH SERDEPROPERTIES propertyListCommonClause)?
+ ;
+
+fileFormat
+ : SEQUENCEFILE
+ | TEXTFILE
+ | RCFILE
+ | ORC
+ | PARQUET
+ | AVRO
+ | JSONFILE
+ | INPUTFORMAT string_ OUTPUTFORMAT string_
+ ;
diff --git
a/parser/sql/dialect/hive/src/main/antlr4/imports/hive/DDLStatement.g4
b/parser/sql/dialect/hive/src/main/antlr4/imports/hive/DDLStatement.g4
index acf2eda6c46..f807ba107ea 100644
--- a/parser/sql/dialect/hive/src/main/antlr4/imports/hive/DDLStatement.g4
+++ b/parser/sql/dialect/hive/src/main/antlr4/imports/hive/DDLStatement.g4
@@ -338,39 +338,6 @@ skewedValue
| literals
;
-rowFormat
- : ROW FORMAT rowFormatType
- ;
-
-rowFormatType
- : DELIMITED rowFormatDelimited
- | SERDE string_ (WITH SERDEPROPERTIES propertyListCommonClause)?
- ;
-
-rowFormatDelimited
- : (COLUMNS TERMINATED BY string_ (ESCAPED BY string_)?)?
- (COLLECTION ITEMS TERMINATED BY string_)?
- (MAP KEYS TERMINATED BY string_)?
- (LINES TERMINATED BY string_)?
- (NULL DEFINED AS string_)?
- ;
-
-storedClause
- : STORED AS fileFormat
- | STORED BY string_ (WITH SERDEPROPERTIES propertyListCommonClause)?
- ;
-
-fileFormat
- : SEQUENCEFILE
- | TEXTFILE
- | RCFILE
- | ORC
- | PARQUET
- | AVRO
- | JSONFILE
- | INPUTFORMAT string_ OUTPUTFORMAT string_
- ;
-
storageLocation
: LOCATION string_
;
@@ -387,14 +354,6 @@ tblProperties
: TBLPROPERTIES propertyListCommonClause
;
-propertyList
- : property (COMMA_ property)*
- ;
-
-property
- : string_ EQ_ string_
- ;
-
addConstraint
: ADD CONSTRAINT constraintName
;
@@ -495,10 +454,6 @@ columnNamesCommonClause
: LP_ columnNames RP_
;
-propertyListCommonClause
- : LP_ propertyList RP_
- ;
-
macroParameterList
: macroParameter (COMMA_ macroParameter)*
;
diff --git
a/parser/sql/dialect/hive/src/main/antlr4/imports/hive/DMLStatement.g4
b/parser/sql/dialect/hive/src/main/antlr4/imports/hive/DMLStatement.g4
index d33cdab7424..6875c2e2ffb 100644
--- a/parser/sql/dialect/hive/src/main/antlr4/imports/hive/DMLStatement.g4
+++ b/parser/sql/dialect/hive/src/main/antlr4/imports/hive/DMLStatement.g4
@@ -22,6 +22,7 @@ import BaseRule;
insert
: INSERT insertSpecification INTO? tableName partitionNames?
(insertValuesClause | setAssignmentsClause | insertSelectClause)
onDuplicateKeyClause?
| insertDataIntoTablesFromQueries
+ | writingDataIntoFileSystem
;
insertSpecification
@@ -368,7 +369,7 @@ dynamicPartitionInserts
;
hiveMultipleInserts
- : hiveInsertStatement (hiveInsertStatement)*
+ : hiveInsertStatement+
;
hiveInsertStatement
@@ -387,3 +388,12 @@ dynamicPartitionKey
partitionClause
: partitionSpec ifNotExists?
;
+
+writingDataIntoFileSystem
+ : insertOverwriteStandardSyntax
+ | fromClause insertOverwriteStandardSyntax+
+ ;
+
+insertOverwriteStandardSyntax
+ : INSERT OVERWRITE LOCAL? DIRECTORY string_ rowFormat? storedClause? select
+ ;
diff --git
a/parser/sql/dialect/hive/src/main/java/org/apache/shardingsphere/sql/parser/hive/visitor/statement/type/HiveDMLStatementVisitor.java
b/parser/sql/dialect/hive/src/main/java/org/apache/shardingsphere/sql/parser/hive/visitor/statement/type/HiveDMLStatementVisitor.java
index 99cfda8d859..ae2e2dab0ce 100644
---
a/parser/sql/dialect/hive/src/main/java/org/apache/shardingsphere/sql/parser/hive/visitor/statement/type/HiveDMLStatementVisitor.java
+++
b/parser/sql/dialect/hive/src/main/java/org/apache/shardingsphere/sql/parser/hive/visitor/statement/type/HiveDMLStatementVisitor.java
@@ -123,6 +123,8 @@ import
org.apache.shardingsphere.sql.parser.autogen.HiveStatementParser.Multiple
import
org.apache.shardingsphere.sql.parser.autogen.HiveStatementParser.DynamicPartitionInsertsContext;
import
org.apache.shardingsphere.sql.parser.autogen.HiveStatementParser.HiveMultipleInsertsContext;
import
org.apache.shardingsphere.sql.parser.autogen.HiveStatementParser.HiveInsertStatementContext;
+import
org.apache.shardingsphere.sql.parser.autogen.HiveStatementParser.WritingDataIntoFileSystemContext;
+import
org.apache.shardingsphere.sql.parser.autogen.HiveStatementParser.InsertOverwriteStandardSyntaxContext;
import
org.apache.shardingsphere.sql.parser.autogen.HiveStatementParser.DynamicPartitionClauseContext;
import
org.apache.shardingsphere.sql.parser.autogen.HiveStatementParser.DynamicPartitionKeyContext;
import
org.apache.shardingsphere.sql.parser.autogen.HiveStatementParser.TableNameContext;
@@ -873,6 +875,9 @@ public final class HiveDMLStatementVisitor extends
HiveStatementVisitor implemen
if (null != ctx.insertDataIntoTablesFromQueries()) {
return visit(ctx.insertDataIntoTablesFromQueries());
}
+ if (null != ctx.writingDataIntoFileSystem()) {
+ return visit(ctx.writingDataIntoFileSystem());
+ }
InsertStatement result;
if (null != ctx.insertValuesClause()) {
result = (InsertStatement) visit(ctx.insertValuesClause());
@@ -990,6 +995,51 @@ public final class HiveDMLStatementVisitor extends
HiveStatementVisitor implemen
return new PartitionSegment(ctx.getStart().getStartIndex(),
ctx.getStop().getStopIndex(), new IdentifierValue(ctx.identifier().getText()));
}
+ @Override
+ public ASTNode visitWritingDataIntoFileSystem(final
WritingDataIntoFileSystemContext ctx) {
+ List<InsertOverwriteStandardSyntaxContext> statements =
ctx.insertOverwriteStandardSyntax();
+ if (1 == statements.size() && null == ctx.fromClause()) {
+ return visit(statements.get(0));
+ }
+ final TableSegment sourceTable = null != ctx.fromClause() ?
(TableSegment) visit(ctx.fromClause()) : null;
+ if (1 == statements.size()) {
+ InsertStatement single = (InsertStatement)
visit(statements.get(0));
+ if (null != sourceTable) {
+ single.getInsertSelect().ifPresent(subquery ->
setFromForSelect(subquery, sourceTable));
+ }
+ single.addParameterMarkers(getParameterMarkerSegments());
+ return single;
+ }
+ InsertStatement result = new InsertStatement(getDatabaseType());
+ result.setMultiTableInsertType(MultiTableInsertType.ALL);
+ MultiTableInsertIntoSegment multiTableInsertInto = new
MultiTableInsertIntoSegment(
+ ctx.getStart().getStartIndex(), ctx.getStop().getStopIndex());
+ for (InsertOverwriteStandardSyntaxContext each : statements) {
+ InsertStatement insertStmt = (InsertStatement) visit(each);
+ if (null != sourceTable) {
+ insertStmt.getInsertSelect().ifPresent(subquery ->
setFromForSelect(subquery, sourceTable));
+ }
+ insertStmt.addParameterMarkers(getParameterMarkerSegments());
+ multiTableInsertInto.getInsertStatements().add(insertStmt);
+ }
+ result.setMultiTableInsertInto(multiTableInsertInto);
+ result.addParameterMarkers(getParameterMarkerSegments());
+ return result;
+ }
+
+ @Override
+ public ASTNode visitInsertOverwriteStandardSyntax(final
InsertOverwriteStandardSyntaxContext ctx) {
+ return createHiveInsertStatementForDirectory(ctx.select(),
ctx.start.getStartIndex());
+ }
+
+ private InsertStatement createHiveInsertStatementForDirectory(final
SelectContext select, final int startIndex) {
+ InsertStatement result = new InsertStatement(getDatabaseType());
+ result.setInsertColumns(new InsertColumnsSegment(startIndex,
startIndex, Collections.emptyList()));
+ result.setInsertSelect(createInsertSelectSegment(select));
+ result.addParameterMarkers(getParameterMarkerSegments());
+ return result;
+ }
+
private SubquerySegment createInsertSelectSegment(final SelectContext ctx)
{
SelectStatement selectStatement = (SelectStatement) visit(ctx);
return new SubquerySegment(ctx.start.getStartIndex(),
ctx.stop.getStopIndex(), selectStatement, getOriginalText(ctx));
diff --git a/test/it/parser/src/main/resources/case/dml/insert.xml
b/test/it/parser/src/main/resources/case/dml/insert.xml
index b11d3e2b1a9..b6b9c5880a2 100644
--- a/test/it/parser/src/main/resources/case/dml/insert.xml
+++ b/test/it/parser/src/main/resources/case/dml/insert.xml
@@ -5123,4 +5123,174 @@
</where>
</select>
</insert>
+
+ <insert sql-case-id="insert_overwrite_directory_basic">
+ <columns start-index="0" stop-index="0" />
+ <select>
+ <from>
+ <simple-table name="employee" start-index="68" stop-index="75"
/>
+ </from>
+ <projections start-index="54" stop-index="61">
+ <column-projection name="id" start-index="54" stop-index="55"
/>
+ <column-projection name="name" start-index="58"
stop-index="61" />
+ </projections>
+ </select>
+ </insert>
+
+ <insert sql-case-id="insert_overwrite_local_directory">
+ <columns start-index="0" stop-index="0" />
+ <select>
+ <from>
+ <simple-table name="sales" start-index="74" stop-index="78" />
+ </from>
+ <projections start-index="60" stop-index="67">
+ <column-projection name="order_id" start-index="60"
stop-index="67" />
+ </projections>
+ </select>
+ </insert>
+
+ <insert sql-case-id="insert_overwrite_directory_with_row_format">
+ <columns start-index="0" stop-index="0" />
+ <select>
+ <from>
+ <simple-table name="products" start-index="121"
stop-index="128" />
+ </from>
+ <projections start-index="95" stop-index="114">
+ <column-projection name="product_id" start-index="95"
stop-index="104" />
+ <column-projection name="category" start-index="107"
stop-index="114" />
+ </projections>
+ </select>
+ </insert>
+
+ <insert sql-case-id="insert_overwrite_directory_with_file_format">
+ <columns start-index="0" stop-index="0" />
+ <select>
+ <from>
+ <simple-table name="system_logs" start-index="91"
stop-index="101" />
+ </from>
+ <projections start-index="68" stop-index="84">
+ <column-projection name="log_date" start-index="68"
stop-index="75" />
+ <column-projection name="message" start-index="78"
stop-index="84" />
+ </projections>
+ </select>
+ </insert>
+
+ <insert
sql-case-id="insert_overwrite_local_with_row_format_and_file_format">
+ <columns start-index="0" stop-index="0" />
+ <select>
+ <from>
+ <simple-table name="user_activity" start-index="135"
stop-index="147" />
+ </from>
+ <projections start-index="119" stop-index="128">
+ <column-projection name="login_time" start-index="119"
stop-index="128" />
+ </projections>
+ </select>
+ </insert>
+
+ <insert sql-case-id="single_insert_overwrite_directory">
+ <columns start-index="14" stop-index="14" />
+ <select>
+ <from>
+ <simple-table name="employee" start-index="5" stop-index="12"
/>
+ </from>
+ <projections start-index="72" stop-index="73">
+ <column-projection name="id" start-index="72" stop-index="73"
/>
+ </projections>
+ </select>
+ </insert>
+
+ <insert sql-case-id="two_insert_overwrite_directory">
+ <multi-table-insert-type value="ALL" />
+ <multi-table-insert-into start-index="0" stop-index="136"
literal-start-index="0" literal-stop-index="136">
+ <insert-statement>
+ <columns start-index="14" stop-index="14" />
+ <select>
+ <from>
+ <simple-table name="employee" start-index="5"
stop-index="12" />
+ </from>
+ <projections start-index="72" stop-index="73">
+ <column-projection name="id" start-index="72"
stop-index="73" />
+ </projections>
+ </select>
+ </insert-statement>
+ <insert-statement>
+ <columns start-index="75" stop-index="75" />
+ <select>
+ <from>
+ <simple-table name="employee" start-index="5"
stop-index="12" />
+ </from>
+ <projections start-index="133" stop-index="136">
+ <column-projection name="name" start-index="133"
stop-index="136" />
+ </projections>
+ </select>
+ </insert-statement>
+ </multi-table-insert-into>
+ </insert>
+
+ <insert sql-case-id="two_insert_overwrite_directory_with_local">
+ <multi-table-insert-type value="ALL" />
+ <multi-table-insert-into start-index="5" stop-index="148"
literal-start-index="0" literal-stop-index="148">
+ <insert-statement>
+ <columns start-index="14" stop-index="14" />
+ <select>
+ <from>
+ <simple-table name="employee" start-index="5"
stop-index="12" />
+ </from>
+ <projections start-index="78" stop-index="79">
+ <column-projection name="id" start-index="78"
stop-index="79" />
+ </projections>
+ </select>
+ </insert-statement>
+ <insert-statement>
+ <columns start-index="81" stop-index="81" />
+ <select>
+ <from>
+ <simple-table name="employee" start-index="5"
stop-index="12" />
+ </from>
+ <projections start-index="145" stop-index="148">
+ <column-projection name="name" start-index="145"
stop-index="148" />
+ </projections>
+ </select>
+ </insert-statement>
+ </multi-table-insert-into>
+ </insert>
+
+ <insert sql-case-id="three_insert_overwrite_directory">
+ <multi-table-insert-type value="ALL" />
+ <multi-table-insert-into start-index="0" stop-index="199"
literal-start-index="0" literal-stop-index="199">
+ <insert-statement>
+ <columns start-index="14" stop-index="14" />
+ <select>
+ <from>
+ <simple-table name="employee" start-index="5"
stop-index="12" />
+ </from>
+ <projections start-index="72" stop-index="73">
+ <column-projection name="id" start-index="72"
stop-index="73" />
+ </projections>
+ </select>
+ </insert-statement>
+ <insert-statement>
+ <columns start-index="75" stop-index="75" />
+ <select>
+ <from>
+ <simple-table name="employee" start-index="5"
stop-index="12" />
+ </from>
+ <projections start-index="133" stop-index="136">
+ <column-projection name="name" start-index="133"
stop-index="136" />
+ </projections>
+ </select>
+ </insert-statement>
+ <insert-statement>
+ <columns start-index="138" stop-index="138" />
+ <select>
+ <from>
+ <simple-table name="employee" start-index="5"
stop-index="12" />
+ </from>
+ <projections start-index="196" stop-index="199">
+ <column-projection name="user" start-index="196"
stop-index="199" />
+ </projections>
+ </select>
+ </insert-statement>
+ </multi-table-insert-into>
+ </insert>
</sql-parser-test-cases>
diff --git a/test/it/parser/src/main/resources/sql/supported/dml/insert.xml
b/test/it/parser/src/main/resources/sql/supported/dml/insert.xml
index 8506a6e9c65..7f717609979 100644
--- a/test/it/parser/src/main/resources/sql/supported/dml/insert.xml
+++ b/test/it/parser/src/main/resources/sql/supported/dml/insert.xml
@@ -180,4 +180,13 @@
<sql-case id="dynamic_partition_into_basic" value="INSERT INTO TABLE sales
PARTITION (region, year) select id FROM raw_sales;" db-types="Hive" />
<sql-case id="dynamic_partition_into_mixed" value="INSERT INTO TABLE
user_logs PARTITION (country='CN', province) select user_id FROM all_logs where
country='CN';" db-types="Hive" />
<sql-case id="dynamic_partition_into_filtered" value="INSERT INTO TABLE
order_stats PARTITION (month='August', status='active') select order_id FROM
order_details where amount > 100;" db-types="Hive" />
+ <sql-case id="insert_overwrite_directory_basic" value="INSERT OVERWRITE
DIRECTORY '/user/hive/output' SELECT id, name FROM employee;" db-types="Hive" />
+ <sql-case id="insert_overwrite_local_directory" value="INSERT OVERWRITE
LOCAL DIRECTORY '/tmp/local_output' SELECT order_id FROM sales;"
db-types="Hive" />
+ <sql-case id="insert_overwrite_directory_with_row_format" value="INSERT
OVERWRITE DIRECTORY '/data/export' ROW FORMAT DELIMITED FIELDS TERMINATED BY
',' SELECT product_id, category FROM products;" db-types="Hive" />
+ <sql-case id="insert_overwrite_directory_with_file_format" value="INSERT
OVERWRITE DIRECTORY '/archive/logs' STORED AS PARQUET SELECT log_date, message
FROM system_logs;" db-types="Hive" />
+ <sql-case id="insert_overwrite_local_with_row_format_and_file_format"
value="INSERT OVERWRITE LOCAL DIRECTORY '/tmp/report' ROW FORMAT DELIMITED
FIELDS TERMINATED BY ',' STORED AS TEXTFILE SELECT login_time FROM
user_activity;" db-types="Hive" />
+ <sql-case id="single_insert_overwrite_directory" value="FROM employee
INSERT OVERWRITE DIRECTORY '/user/output/emp_data' SELECT id;" db-types="Hive"
/>
+ <sql-case id="two_insert_overwrite_directory" value="FROM employee INSERT
OVERWRITE DIRECTORY '/user/output/emp_data' SELECT id INSERT OVERWRITE
DIRECTORY '/user/output/emp_data' SELECT name;" db-types="Hive" />
+ <sql-case id="two_insert_overwrite_directory_with_local" value="FROM
employee INSERT OVERWRITE LOCAL DIRECTORY '/user/output/emp_data' SELECT id
INSERT OVERWRITE LOCAL DIRECTORY '/user/output/emp_data' SELECT name;"
db-types="Hive" />
+ <sql-case id="three_insert_overwrite_directory" value="FROM employee
INSERT OVERWRITE DIRECTORY '/user/output/emp_data' SELECT id INSERT OVERWRITE
DIRECTORY '/user/output/emp_data' SELECT name INSERT OVERWRITE DIRECTORY
'/user/output/emp_data' SELECT user;" db-types="Hive" />
</sql-cases>