This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 15d698eaf96 branch-3.0: [enhance](hive) support skip.header.line.count 
#49929 (#49975)
15d698eaf96 is described below

commit 15d698eaf96569d82434ecca948ca78037edfc66
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Fri Apr 25 09:31:53 2025 +0800

    branch-3.0: [enhance](hive) support skip.header.line.count #49929 (#49975)
    
    Cherry-picked from #49929
    
    Co-authored-by: Socrates <[email protected]>
---
 .../scripts/create_preinstalled_scripts/run76.hql  |  21 ++++++++++
 .../text_table_compressed_skip_header/000000_0.gz  | Bin 0 -> 149 bytes
 .../text/text_table_normal_skip_header/000000_0    |  20 +++++++++
 .../doris/datasource/hive/HiveProperties.java      |  22 +++++++++-
 .../doris/datasource/hive/source/HiveScanNode.java |   3 ++
 .../hive/test_text_skip_header.out                 | Bin 0 -> 755 bytes
 .../hive/test_text_skip_header.groovy              |  45 +++++++++++++++++++++
 7 files changed, 110 insertions(+), 1 deletion(-)

diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run76.hql
 
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run76.hql
new file mode 100755
index 00000000000..451f74482e3
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run76.hql
@@ -0,0 +1,21 @@
+use multi_catalog;
+
+CREATE TABLE text_table_normal_skip_header (
+  id INT,
+  name STRING
+)
+ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '\t'
+STORED AS TEXTFILE
+LOCATION '/user/doris/preinstalled_data/text/text_table_normal_skip_header'
+TBLPROPERTIES ("skip.header.line.count"="2");
+
+CREATE TABLE text_table_compressed_skip_header (
+  id INT,
+  name STRING
+)
+ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '\t'
+STORED AS TEXTFILE
+LOCATION '/user/doris/preinstalled_data/text/text_table_compressed_skip_header'
+TBLPROPERTIES ("skip.header.line.count"="5");
\ No newline at end of file
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/text/text_table_compressed_skip_header/000000_0.gz
 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/text/text_table_compressed_skip_header/000000_0.gz
new file mode 100644
index 00000000000..049b58f3c6c
Binary files /dev/null and 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/text/text_table_compressed_skip_header/000000_0.gz
 differ
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/text/text_table_normal_skip_header/000000_0
 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/text/text_table_normal_skip_header/000000_0
new file mode 100644
index 00000000000..5c4e04c09fe
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/text/text_table_normal_skip_header/000000_0
@@ -0,0 +1,20 @@
+1      Alice
+2      Bob
+3      Charlie
+4      David
+5      Eva
+6      Frank
+7      Grace
+8      Henry
+9      Ivy
+10     Jack
+11     Kathy
+12     Leo
+13     Mona
+14     Nate
+15     Olive
+16     Paul
+17     Queen
+18     Ray
+19     Susan
+20     Tom
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveProperties.java 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveProperties.java
index 74f3dcc1a9d..7c4d5279969 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveProperties.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveProperties.java
@@ -47,6 +47,12 @@ public class HiveProperties {
     public static final String PROP_NULL_FORMAT = "serialization.null.format";
     public static final String DEFAULT_NULL_FORMAT = "\\N";
 
+    public static final String PROP_SKIP_HEADER_COUNT = 
"skip.header.line.count";
+    public static final String DEFAULT_SKIP_HEADER_COUNT = "0";
+
+    public static final String PROP_SKIP_FOOTER_COUNT = 
"skip.footer.line.count";
+    public static final String DEFAULT_SKIP_FOOTER_COUNT = "0";
+
     // The following properties are used for OpenCsvSerde.
     public static final String PROP_SEPARATOR_CHAR = 
OpenCSVSerde.SEPARATORCHAR;
     public static final String DEFAULT_SEPARATOR_CHAR = ",";
@@ -66,7 +72,9 @@ public class HiveProperties {
             PROP_MAP_KV_DELIMITER,
             PROP_ESCAPE_DELIMITER,
             PROP_ESCAPE_CHAR,
-            PROP_NULL_FORMAT);
+            PROP_NULL_FORMAT,
+            PROP_SKIP_HEADER_COUNT,
+            PROP_SKIP_FOOTER_COUNT);
 
     public static String getFieldDelimiter(Table table) {
         // This method is used for text format.
@@ -131,6 +139,18 @@ public class HiveProperties {
         return 
HiveMetaStoreClientHelper.firstPresentOrDefault(DEFAULT_ESCAPE_CHAR, 
escapeChar);
     }
 
+    public static int getSkipHeaderCount(Table table) {
+        Optional<String> skipHeaderCount = 
HiveMetaStoreClientHelper.getSerdeProperty(table, PROP_SKIP_HEADER_COUNT);
+        return Integer
+                
.parseInt(HiveMetaStoreClientHelper.firstPresentOrDefault(DEFAULT_SKIP_HEADER_COUNT,
 skipHeaderCount));
+    }
+
+    public static int getSkipFooterCount(Table table) {
+        Optional<String> skipFooterCount = 
HiveMetaStoreClientHelper.getSerdeProperty(table, PROP_SKIP_FOOTER_COUNT);
+        return Integer
+                
.parseInt(HiveMetaStoreClientHelper.firstPresentOrDefault(DEFAULT_SKIP_FOOTER_COUNT,
 skipFooterCount));
+    }
+
     // Set properties to table
     public static void setTableProperties(Table table, Map<String, String> 
properties) {
         HashMap<String, String> serdeProps = new HashMap<>();
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
index b14dfbf02f4..80a7e64f643 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
@@ -425,6 +425,9 @@ public class HiveScanNode extends FileQueryScanNode {
     protected TFileAttributes getFileAttributes() throws UserException {
         TFileAttributes fileAttributes = new TFileAttributes();
         Table table = hmsTable.getRemoteTable();
+        // set skip header count
+        // TODO: support skip footer count
+        fileAttributes.setSkipLines(HiveProperties.getSkipHeaderCount(table));
         // TODO: separate hive text table and OpenCsv table
         String serDeLib = table.getSd().getSerdeInfo().getSerializationLib();
         if 
(serDeLib.equals("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")) {
diff --git 
a/regression-test/data/external_table_p0/hive/test_text_skip_header.out 
b/regression-test/data/external_table_p0/hive/test_text_skip_header.out
new file mode 100644
index 00000000000..d8115ed9c85
Binary files /dev/null and 
b/regression-test/data/external_table_p0/hive/test_text_skip_header.out differ
diff --git 
a/regression-test/suites/external_table_p0/hive/test_text_skip_header.groovy 
b/regression-test/suites/external_table_p0/hive/test_text_skip_header.groovy
new file mode 100644
index 00000000000..375190f6414
--- /dev/null
+++ b/regression-test/suites/external_table_p0/hive/test_text_skip_header.groovy
@@ -0,0 +1,45 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+suite("test_hive_text_skip_header","p0,external,tvf,hive,external_docker,external_docker_hive")
 {
+    String enabled = context.config.otherConfigs.get("enableHiveTest")
+    if (enabled == null || !enabled.equalsIgnoreCase("true")) {
+        logger.info("diable Hive test.")
+        return;
+    }
+
+    for (String hivePrefix : ["hive2","hive3"]) {
+    
+        String hms_port = context.config.otherConfigs.get(hivePrefix + 
"HmsPort")
+        String catalog_name = "${hivePrefix}_test_utf8_check"
+        String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
+        def hdfsUserName = "doris"
+        String hdfs_port = context.config.otherConfigs.get(hivePrefix + 
"HdfsPort")
+        def defaultFS = "hdfs://${externalEnvIp}:${hdfs_port}"
+
+        sql """drop catalog if exists ${catalog_name}"""
+        sql """create catalog if not exists ${catalog_name} properties (
+            "type"="hms",
+            'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}'
+        );"""
+        sql """use `${catalog_name}`.`multi_catalog`"""
+
+        qt_text_skip_header_normal """select * from 
text_table_normal_skip_header order by id;"""
+        qt_text_skip_header_compressed """select * from 
text_table_compressed_skip_header order by id;"""
+    }
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to