This is an automated email from the ASF dual-hosted git repository.

duanzhengqiang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/shardingsphere.git


The following commit(s) were added to refs/heads/master by this push:
     new 8bab753b19c Support Hive Writing data into the filesystem from queries 
statement parse (#36371)
8bab753b19c is described below

commit 8bab753b19cf87d768e204553ff2449ec3a6e68b
Author: Claire <[email protected]>
AuthorDate: Thu Aug 21 12:01:58 2025 +0800

    Support Hive Writing data into the filesystem from queries statement parse 
(#36371)
    
    * support Writing data into the filesystem from queries
    
    * update release-notes
---
 RELEASE-NOTES.md                                   |   1 +
 .../hive/src/main/antlr4/imports/hive/BaseRule.g4  |  45 ++++++
 .../src/main/antlr4/imports/hive/DDLStatement.g4   |  45 ------
 .../src/main/antlr4/imports/hive/DMLStatement.g4   |  12 +-
 .../statement/type/HiveDMLStatementVisitor.java    |  50 ++++++
 .../parser/src/main/resources/case/dml/insert.xml  | 170 +++++++++++++++++++++
 .../main/resources/sql/supported/dml/insert.xml    |   9 ++
 7 files changed, 286 insertions(+), 46 deletions(-)

diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
index 9a011ec1c29..6f9d6343ebb 100644
--- a/RELEASE-NOTES.md
+++ b/RELEASE-NOTES.md
@@ -82,6 +82,7 @@
 1. SQL Parser: Support Hive SHOW TRANSACTIONS & SHOW COMPACTIONS statement 
parse - [#36301](https://github.com/apache/shardingsphere/pull/36301)
 1. SQL Parser: Support Hive DESCRIBE statement parse - 
[#36350](https://github.com/apache/shardingsphere/pull/36350)
 1. SQL Parser: Support Hive Inserting data into Hive Tables from queries 
statement parse - [#36320](https://github.com/apache/shardingsphere/pull/36320)
+1. SQL Parser: Support Hive Writing data into the filesystem from queries 
statement parse - [#36371](https://github.com/apache/shardingsphere/pull/36371)
 1. SQL Parser: Support SQL Server xml methods parse - 
[#35911](https://github.com/apache/shardingsphere/pull/35911)
 1. SQL Parser: Support SQL Server CHANGETABLE function parse - 
[#35920](https://github.com/apache/shardingsphere/pull/35920)
 1. SQL Parser: Support SQL Server AI_GENERATE_EMBEDDINGS function parse - 
[#35922](https://github.com/apache/shardingsphere/pull/35922)
diff --git a/parser/sql/dialect/hive/src/main/antlr4/imports/hive/BaseRule.g4 
b/parser/sql/dialect/hive/src/main/antlr4/imports/hive/BaseRule.g4
index 8cb7f68163d..2c91c97baa5 100644
--- a/parser/sql/dialect/hive/src/main/antlr4/imports/hive/BaseRule.g4
+++ b/parser/sql/dialect/hive/src/main/antlr4/imports/hive/BaseRule.g4
@@ -1336,3 +1336,48 @@ noWriteToBinLog
 channelOption
     : FOR CHANNEL string_
     ;
+
+propertyListCommonClause
+    : LP_ propertyList RP_
+    ;
+
+propertyList
+    : property (COMMA_ property)*
+    ;
+
+property
+    : string_ EQ_ string_
+    ;
+
+rowFormat
+    : ROW FORMAT rowFormatType
+    ;
+
+rowFormatType
+    : DELIMITED rowFormatDelimited
+    | SERDE string_ (WITH SERDEPROPERTIES propertyListCommonClause)?
+    ;
+
+rowFormatDelimited
+    : (COLUMNS TERMINATED BY string_ (ESCAPED BY string_)?)?
+      (COLLECTION ITEMS TERMINATED BY string_)?
+      (MAP KEYS TERMINATED BY string_)?
+      (LINES TERMINATED BY string_)?
+      (NULL DEFINED AS string_)?
+    ;
+
+storedClause
+    : STORED AS fileFormat
+    | STORED BY string_ (WITH SERDEPROPERTIES propertyListCommonClause)?
+    ;
+
+fileFormat
+    : SEQUENCEFILE
+    | TEXTFILE
+    | RCFILE
+    | ORC
+    | PARQUET
+    | AVRO
+    | JSONFILE
+    | INPUTFORMAT string_ OUTPUTFORMAT string_
+    ;
diff --git 
a/parser/sql/dialect/hive/src/main/antlr4/imports/hive/DDLStatement.g4 
b/parser/sql/dialect/hive/src/main/antlr4/imports/hive/DDLStatement.g4
index acf2eda6c46..f807ba107ea 100644
--- a/parser/sql/dialect/hive/src/main/antlr4/imports/hive/DDLStatement.g4
+++ b/parser/sql/dialect/hive/src/main/antlr4/imports/hive/DDLStatement.g4
@@ -338,39 +338,6 @@ skewedValue
     | literals
     ;
 
-rowFormat
-    : ROW FORMAT rowFormatType
-    ;
-
-rowFormatType
-    : DELIMITED rowFormatDelimited
-    | SERDE string_ (WITH SERDEPROPERTIES propertyListCommonClause)?
-    ;
-
-rowFormatDelimited
-    : (COLUMNS TERMINATED BY string_ (ESCAPED BY string_)?)?
-      (COLLECTION ITEMS TERMINATED BY string_)?
-      (MAP KEYS TERMINATED BY string_)?
-      (LINES TERMINATED BY string_)?
-      (NULL DEFINED AS string_)?
-    ;
-
-storedClause
-    : STORED AS fileFormat
-    | STORED BY string_ (WITH SERDEPROPERTIES propertyListCommonClause)?
-    ;
-
-fileFormat
-    : SEQUENCEFILE
-    | TEXTFILE
-    | RCFILE
-    | ORC
-    | PARQUET
-    | AVRO
-    | JSONFILE
-    | INPUTFORMAT string_ OUTPUTFORMAT string_
-    ;
-
 storageLocation
     : LOCATION string_
     ;
@@ -387,14 +354,6 @@ tblProperties
     : TBLPROPERTIES propertyListCommonClause
     ;
 
-propertyList
-    : property (COMMA_ property)*
-    ;
-
-property
-    : string_ EQ_ string_
-    ;
-
 addConstraint
     : ADD CONSTRAINT constraintName
     ;
@@ -495,10 +454,6 @@ columnNamesCommonClause
     : LP_ columnNames RP_
     ;
 
-propertyListCommonClause
-    : LP_ propertyList RP_
-    ;
-
 macroParameterList
     : macroParameter (COMMA_ macroParameter)*
     ;
diff --git 
a/parser/sql/dialect/hive/src/main/antlr4/imports/hive/DMLStatement.g4 
b/parser/sql/dialect/hive/src/main/antlr4/imports/hive/DMLStatement.g4
index d33cdab7424..6875c2e2ffb 100644
--- a/parser/sql/dialect/hive/src/main/antlr4/imports/hive/DMLStatement.g4
+++ b/parser/sql/dialect/hive/src/main/antlr4/imports/hive/DMLStatement.g4
@@ -22,6 +22,7 @@ import BaseRule;
 insert
     : INSERT insertSpecification INTO? tableName partitionNames? 
(insertValuesClause | setAssignmentsClause | insertSelectClause) 
onDuplicateKeyClause?
     | insertDataIntoTablesFromQueries
+    | writingDataIntoFileSystem
     ;
 
 insertSpecification
@@ -368,7 +369,7 @@ dynamicPartitionInserts
     ;
 
 hiveMultipleInserts
-    : hiveInsertStatement (hiveInsertStatement)*
+    : hiveInsertStatement+
     ;
 
 hiveInsertStatement
@@ -387,3 +388,12 @@ dynamicPartitionKey
 partitionClause
     : partitionSpec ifNotExists?
     ;
+
+writingDataIntoFileSystem
+    : insertOverwriteStandardSyntax
+    | fromClause insertOverwriteStandardSyntax+
+    ;
+
+insertOverwriteStandardSyntax
+    : INSERT OVERWRITE LOCAL? DIRECTORY string_ rowFormat? storedClause? select
+    ;
diff --git 
a/parser/sql/dialect/hive/src/main/java/org/apache/shardingsphere/sql/parser/hive/visitor/statement/type/HiveDMLStatementVisitor.java
 
b/parser/sql/dialect/hive/src/main/java/org/apache/shardingsphere/sql/parser/hive/visitor/statement/type/HiveDMLStatementVisitor.java
index 99cfda8d859..ae2e2dab0ce 100644
--- 
a/parser/sql/dialect/hive/src/main/java/org/apache/shardingsphere/sql/parser/hive/visitor/statement/type/HiveDMLStatementVisitor.java
+++ 
b/parser/sql/dialect/hive/src/main/java/org/apache/shardingsphere/sql/parser/hive/visitor/statement/type/HiveDMLStatementVisitor.java
@@ -123,6 +123,8 @@ import 
org.apache.shardingsphere.sql.parser.autogen.HiveStatementParser.Multiple
 import 
org.apache.shardingsphere.sql.parser.autogen.HiveStatementParser.DynamicPartitionInsertsContext;
 import 
org.apache.shardingsphere.sql.parser.autogen.HiveStatementParser.HiveMultipleInsertsContext;
 import 
org.apache.shardingsphere.sql.parser.autogen.HiveStatementParser.HiveInsertStatementContext;
+import 
org.apache.shardingsphere.sql.parser.autogen.HiveStatementParser.WritingDataIntoFileSystemContext;
+import 
org.apache.shardingsphere.sql.parser.autogen.HiveStatementParser.InsertOverwriteStandardSyntaxContext;
 import 
org.apache.shardingsphere.sql.parser.autogen.HiveStatementParser.DynamicPartitionClauseContext;
 import 
org.apache.shardingsphere.sql.parser.autogen.HiveStatementParser.DynamicPartitionKeyContext;
 import 
org.apache.shardingsphere.sql.parser.autogen.HiveStatementParser.TableNameContext;
@@ -873,6 +875,9 @@ public final class HiveDMLStatementVisitor extends 
HiveStatementVisitor implemen
         if (null != ctx.insertDataIntoTablesFromQueries()) {
             return visit(ctx.insertDataIntoTablesFromQueries());
         }
+        if (null != ctx.writingDataIntoFileSystem()) {
+            return visit(ctx.writingDataIntoFileSystem());
+        }
         InsertStatement result;
         if (null != ctx.insertValuesClause()) {
             result = (InsertStatement) visit(ctx.insertValuesClause());
@@ -990,6 +995,51 @@ public final class HiveDMLStatementVisitor extends 
HiveStatementVisitor implemen
         return new PartitionSegment(ctx.getStart().getStartIndex(), 
ctx.getStop().getStopIndex(), new IdentifierValue(ctx.identifier().getText()));
     }
     
+    @Override
+    public ASTNode visitWritingDataIntoFileSystem(final 
WritingDataIntoFileSystemContext ctx) {
+        List<InsertOverwriteStandardSyntaxContext> statements = 
ctx.insertOverwriteStandardSyntax();
+        if (1 == statements.size() && null == ctx.fromClause()) {
+            return visit(statements.get(0));
+        }
+        final TableSegment sourceTable = null != ctx.fromClause() ? 
(TableSegment) visit(ctx.fromClause()) : null;
+        if (1 == statements.size()) {
+            InsertStatement single = (InsertStatement) 
visit(statements.get(0));
+            if (null != sourceTable) {
+                single.getInsertSelect().ifPresent(subquery -> 
setFromForSelect(subquery, sourceTable));
+            }
+            single.addParameterMarkers(getParameterMarkerSegments());
+            return single;
+        }
+        InsertStatement result = new InsertStatement(getDatabaseType());
+        result.setMultiTableInsertType(MultiTableInsertType.ALL);
+        MultiTableInsertIntoSegment multiTableInsertInto = new 
MultiTableInsertIntoSegment(
+                ctx.getStart().getStartIndex(), ctx.getStop().getStopIndex());
+        for (InsertOverwriteStandardSyntaxContext each : statements) {
+            InsertStatement insertStmt = (InsertStatement) visit(each);
+            if (null != sourceTable) {
+                insertStmt.getInsertSelect().ifPresent(subquery -> 
setFromForSelect(subquery, sourceTable));
+            }
+            insertStmt.addParameterMarkers(getParameterMarkerSegments());
+            multiTableInsertInto.getInsertStatements().add(insertStmt);
+        }
+        result.setMultiTableInsertInto(multiTableInsertInto);
+        result.addParameterMarkers(getParameterMarkerSegments());
+        return result;
+    }
+    
+    @Override
+    public ASTNode visitInsertOverwriteStandardSyntax(final 
InsertOverwriteStandardSyntaxContext ctx) {
+        return createHiveInsertStatementForDirectory(ctx.select(), 
ctx.start.getStartIndex());
+    }
+    
+    private InsertStatement createHiveInsertStatementForDirectory(final 
SelectContext select, final int startIndex) {
+        InsertStatement result = new InsertStatement(getDatabaseType());
+        result.setInsertColumns(new InsertColumnsSegment(startIndex, 
startIndex, Collections.emptyList()));
+        result.setInsertSelect(createInsertSelectSegment(select));
+        result.addParameterMarkers(getParameterMarkerSegments());
+        return result;
+    }
+    
     private SubquerySegment createInsertSelectSegment(final SelectContext ctx) 
{
         SelectStatement selectStatement = (SelectStatement) visit(ctx);
         return new SubquerySegment(ctx.start.getStartIndex(), 
ctx.stop.getStopIndex(), selectStatement, getOriginalText(ctx));
diff --git a/test/it/parser/src/main/resources/case/dml/insert.xml 
b/test/it/parser/src/main/resources/case/dml/insert.xml
index b11d3e2b1a9..b6b9c5880a2 100644
--- a/test/it/parser/src/main/resources/case/dml/insert.xml
+++ b/test/it/parser/src/main/resources/case/dml/insert.xml
@@ -5123,4 +5123,174 @@
             </where>
         </select>
     </insert>
+
+    <insert sql-case-id="insert_overwrite_directory_basic">
+        <columns start-index="0" stop-index="0" />
+        <select>
+            <from>
+                <simple-table name="employee" start-index="68" stop-index="75" 
/>
+            </from>
+            <projections start-index="54" stop-index="61">
+                <column-projection name="id" start-index="54" stop-index="55" 
/>
+                <column-projection name="name" start-index="58" 
stop-index="61" />
+            </projections>
+        </select>
+    </insert>
+
+    <insert sql-case-id="insert_overwrite_local_directory">
+        <columns start-index="0" stop-index="0" />
+        <select>
+            <from>
+                <simple-table name="sales" start-index="74" stop-index="78" />
+            </from>
+            <projections start-index="60" stop-index="67">
+                <column-projection name="order_id" start-index="60" 
stop-index="67" />
+            </projections>
+        </select>
+    </insert>
+
+    <insert sql-case-id="insert_overwrite_directory_with_row_format">
+        <columns start-index="0" stop-index="0" />
+        <select>
+            <from>
+                <simple-table name="products" start-index="121" 
stop-index="128" />
+            </from>
+            <projections start-index="95" stop-index="114">
+                <column-projection name="product_id" start-index="95" 
stop-index="104" />
+                <column-projection name="category" start-index="107" 
stop-index="114" />
+            </projections>
+        </select>
+    </insert>
+
+    <insert sql-case-id="insert_overwrite_directory_with_file_format">
+        <columns start-index="0" stop-index="0" />
+        <select>
+            <from>
+                <simple-table name="system_logs" start-index="91" 
stop-index="101" />
+            </from>
+            <projections start-index="68" stop-index="84">
+                <column-projection name="log_date" start-index="68" 
stop-index="75" />
+                <column-projection name="message" start-index="78" 
stop-index="84" />
+            </projections>
+        </select>
+    </insert>
+
+    <insert 
sql-case-id="insert_overwrite_local_with_row_format_and_file_format">
+        <columns start-index="0" stop-index="0" />
+        <select>
+            <from>
+                <simple-table name="user_activity" start-index="135" 
stop-index="147" />
+            </from>
+            <projections start-index="119" stop-index="128">
+                <column-projection name="login_time" start-index="119" 
stop-index="128" />
+            </projections>
+        </select>
+    </insert>
+
+    <insert sql-case-id="single_insert_overwrite_directory">
+        <columns start-index="14" stop-index="14" />
+        <select>
+            <from>
+                <simple-table name="employee" start-index="5" stop-index="12" 
/>
+            </from>
+            <projections start-index="72" stop-index="73">
+                <column-projection name="id" start-index="72" stop-index="73" 
/>
+            </projections>
+        </select>
+    </insert>
+
+    <insert sql-case-id="two_insert_overwrite_directory">
+        <multi-table-insert-type value="ALL" />
+        <multi-table-insert-into start-index="0" stop-index="136" 
literal-start-index="0" literal-stop-index="136">
+            <insert-statement>
+                <columns start-index="14" stop-index="14" />
+                <select>
+                    <from>
+                        <simple-table name="employee" start-index="5" 
stop-index="12" />
+                    </from>
+                    <projections start-index="72" stop-index="73">
+                        <column-projection name="id" start-index="72" 
stop-index="73" />
+                    </projections>
+                </select>
+            </insert-statement>
+            <insert-statement>
+                <columns start-index="75" stop-index="75" />
+                <select>
+                    <from>
+                        <simple-table name="employee" start-index="5" 
stop-index="12" />
+                    </from>
+                    <projections start-index="133" stop-index="136">
+                        <column-projection name="name" start-index="133" 
stop-index="136" />
+                    </projections>
+                </select>
+            </insert-statement>
+        </multi-table-insert-into>
+    </insert>
+
+    <insert sql-case-id="two_insert_overwrite_directory_with_local">
+        <multi-table-insert-type value="ALL" />
+        <multi-table-insert-into start-index="5" stop-index="148" 
literal-start-index="0" literal-stop-index="148">
+            <insert-statement>
+                <columns start-index="14" stop-index="14" />
+                <select>
+                    <from>
+                        <simple-table name="employee" start-index="5" 
stop-index="12" />
+                    </from>
+                    <projections start-index="78" stop-index="79">
+                        <column-projection name="id" start-index="78" 
stop-index="79" />
+                    </projections>
+                </select>
+            </insert-statement>
+            <insert-statement>
+                <columns start-index="81" stop-index="81" />
+                <select>
+                    <from>
+                        <simple-table name="employee" start-index="5" 
stop-index="12" />
+                    </from>
+                    <projections start-index="145" stop-index="148">
+                        <column-projection name="name" start-index="145" 
stop-index="148" />
+                    </projections>
+                </select>
+            </insert-statement>
+        </multi-table-insert-into>
+    </insert>
+
+    <insert sql-case-id="three_insert_overwrite_directory">
+        <multi-table-insert-type value="ALL" />
+        <multi-table-insert-into start-index="0" stop-index="199" 
literal-start-index="0" literal-stop-index="199">
+            <insert-statement>
+                <columns start-index="14" stop-index="14" />
+                <select>
+                    <from>
+                        <simple-table name="employee" start-index="5" 
stop-index="12" />
+                    </from>
+                    <projections start-index="72" stop-index="73">
+                        <column-projection name="id" start-index="72" 
stop-index="73" />
+                    </projections>
+                </select>
+            </insert-statement>
+            <insert-statement>
+                <columns start-index="75" stop-index="75" />
+                <select>
+                    <from>
+                        <simple-table name="employee" start-index="5" 
stop-index="12" />
+                    </from>
+                    <projections start-index="133" stop-index="136">
+                        <column-projection name="name" start-index="133" 
stop-index="136" />
+                    </projections>
+                </select>
+            </insert-statement>
+            <insert-statement>
+                <columns start-index="138" stop-index="138" />
+                <select>
+                    <from>
+                        <simple-table name="employee" start-index="5" 
stop-index="12" />
+                    </from>
+                    <projections start-index="196" stop-index="199">
+                        <column-projection name="user" start-index="196" 
stop-index="199" />
+                    </projections>
+                </select>
+            </insert-statement>
+        </multi-table-insert-into>
+    </insert>
 </sql-parser-test-cases>
diff --git a/test/it/parser/src/main/resources/sql/supported/dml/insert.xml 
b/test/it/parser/src/main/resources/sql/supported/dml/insert.xml
index 8506a6e9c65..7f717609979 100644
--- a/test/it/parser/src/main/resources/sql/supported/dml/insert.xml
+++ b/test/it/parser/src/main/resources/sql/supported/dml/insert.xml
@@ -180,4 +180,13 @@
     <sql-case id="dynamic_partition_into_basic" value="INSERT INTO TABLE sales 
PARTITION (region, year) select id FROM raw_sales;" db-types="Hive" />
     <sql-case id="dynamic_partition_into_mixed" value="INSERT INTO TABLE 
user_logs PARTITION (country='CN', province) select user_id FROM all_logs where 
country='CN';" db-types="Hive" />
     <sql-case id="dynamic_partition_into_filtered" value="INSERT INTO TABLE 
order_stats PARTITION (month='August', status='active') select order_id FROM 
order_details where amount > 100;" db-types="Hive" />
+    <sql-case id="insert_overwrite_directory_basic" value="INSERT OVERWRITE 
DIRECTORY '/user/hive/output' SELECT id, name FROM employee;" db-types="Hive" />
+    <sql-case id="insert_overwrite_local_directory" value="INSERT OVERWRITE 
LOCAL DIRECTORY '/tmp/local_output' SELECT order_id FROM sales;" 
db-types="Hive" />
+    <sql-case id="insert_overwrite_directory_with_row_format" value="INSERT 
OVERWRITE DIRECTORY '/data/export' ROW FORMAT DELIMITED FIELDS TERMINATED BY 
',' SELECT product_id, category FROM products;" db-types="Hive" />
+    <sql-case id="insert_overwrite_directory_with_file_format" value="INSERT 
OVERWRITE DIRECTORY '/archive/logs' STORED AS PARQUET SELECT log_date, message 
FROM system_logs;" db-types="Hive" />
+    <sql-case id="insert_overwrite_local_with_row_format_and_file_format" 
value="INSERT OVERWRITE LOCAL DIRECTORY '/tmp/report' ROW FORMAT DELIMITED 
FIELDS TERMINATED BY ',' STORED AS TEXTFILE SELECT login_time FROM 
user_activity;" db-types="Hive" />
+    <sql-case id="single_insert_overwrite_directory" value="FROM employee 
INSERT OVERWRITE DIRECTORY '/user/output/emp_data' SELECT id;" db-types="Hive" 
/>
+    <sql-case id="two_insert_overwrite_directory" value="FROM employee INSERT 
OVERWRITE DIRECTORY '/user/output/emp_data' SELECT id INSERT OVERWRITE 
DIRECTORY '/user/output/emp_data' SELECT name;" db-types="Hive" />
+    <sql-case id="two_insert_overwrite_directory_with_local" value="FROM 
employee INSERT OVERWRITE LOCAL DIRECTORY '/user/output/emp_data' SELECT id 
INSERT OVERWRITE LOCAL DIRECTORY '/user/output/emp_data' SELECT name;" 
db-types="Hive" />
+    <sql-case id="three_insert_overwrite_directory" value="FROM employee 
INSERT OVERWRITE DIRECTORY '/user/output/emp_data' SELECT id INSERT OVERWRITE 
DIRECTORY '/user/output/emp_data' SELECT name INSERT OVERWRITE DIRECTORY 
'/user/output/emp_data' SELECT user;" db-types="Hive" />
 </sql-cases>

Reply via email to