This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new a1d02f36ac [feature](table-valued-function) support `hdfs()` tvf
(#14213)
a1d02f36ac is described below
commit a1d02f36acdabd947911194260ab5f2ef8feda79
Author: Tiewei Fang <[email protected]>
AuthorDate: Fri Nov 18 14:17:02 2022 +0800
[feature](table-valued-function) support `hdfs()` tvf (#14213)
This pr does two things:
1. support `hdfs()` table valued function.
2. add regression test
---
.licenserc.yaml | 1 +
.../hive/scripts/csv_format_test/all_types.csv | 120 +++++++++
.../scripts/csv_format_test/array_malformat.csv | 5 +
.../hive/scripts/csv_format_test/array_normal.csv | 9 +
.../hive/scripts/csv_format_test/student.csv | 10 +
.../scripts/csv_format_test/student_with_names.csv | 11 +
.../student_with_names_and_types.csv | 12 +
.../docker-compose/hive/scripts/hive-metastore.sh | 4 +-
.../scripts/json_format_test/one_array_json.json | 52 ++++
.../json_format_test/simple_object_json.json | 4 +-
.../java/org/apache/doris/backup/S3Storage.java | 4 +-
.../doris/planner/external/QueryScanProvider.java | 17 +-
.../doris/planner/external/TVFScanProvider.java | 4 +
.../ExternalFileTableValuedFunction.java | 72 +++--
.../tablefunction/HdfsTableValuedFunction.java | 116 +++++++++
.../doris/tablefunction/S3TableValuedFunction.java | 58 ++++-
.../doris/tablefunction/TableValuedFunctionIf.java | 2 +
.../table_valued_function/test_hdfs_tvf.out | 289 +++++++++++++++++++++
.../load_p0/stream_load/test_hdfs_json_load.out | 28 +-
.../table_valued_function/test_hdfs_tvf.groovy | 199 ++++++++++++++
20 files changed, 962 insertions(+), 55 deletions(-)
diff --git a/.licenserc.yaml b/.licenserc.yaml
index 1d1d35a65a..d6554af369 100644
--- a/.licenserc.yaml
+++ b/.licenserc.yaml
@@ -27,6 +27,7 @@ header:
- "**/test_data/**"
- "**/jmockit/**"
- "**/*.json"
+ - "**/*.csv"
- "**/*.dat"
- "**/*.svg"
- "**/*.md5"
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/csv_format_test/all_types.csv
b/docker/thirdparties/docker-compose/hive/scripts/csv_format_test/all_types.csv
new file mode 100644
index 0000000000..9a5e34b270
--- /dev/null
+++
b/docker/thirdparties/docker-compose/hive/scripts/csv_format_test/all_types.csv
@@ -0,0 +1,120 @@
+0,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+1,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+2,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+3,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+4,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+5,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+6,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+7,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+8,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+9,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+10,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+11,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+12,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+13,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+14,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+15,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+16,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+17,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+18,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+19,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+20,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+21,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+22,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+23,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+24,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+25,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+26,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+27,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+28,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+29,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+30,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+31,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+32,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+33,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+34,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+35,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+36,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+37,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+38,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+39,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+40,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+41,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+42,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+43,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+44,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+45,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+46,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+47,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+48,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+49,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+50,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+51,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+52,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+53,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+54,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+55,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+56,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+57,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+58,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+59,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+60,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+61,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+62,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+63,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+64,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+65,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+66,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+67,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+68,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+69,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+70,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+71,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+72,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+73,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+74,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+75,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+76,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+77,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+78,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+79,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+80,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+81,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+82,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+83,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+84,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+85,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+86,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+87,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+88,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+89,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+90,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+91,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+92,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+93,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+94,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+95,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+96,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+97,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+98,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+99,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+100,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+101,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+102,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+103,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+104,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+105,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+106,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+107,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+108,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+109,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+110,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+111,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+112,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+113,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+114,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+115,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+116,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+117,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+118,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
+119,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59
\ No newline at end of file
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/csv_format_test/array_malformat.csv
b/docker/thirdparties/docker-compose/hive/scripts/csv_format_test/array_malformat.csv
new file mode 100644
index 0000000000..3fbc5a50f5
--- /dev/null
+++
b/docker/thirdparties/docker-compose/hive/scripts/csv_format_test/array_malformat.csv
@@ -0,0 +1,5 @@
+1|[1,2,3,4,5]|[32767,32768,32769]|[65534,65535,65536]|["a","b","c","d","e"]|["hello","world"]|["1991-01-01",
"1992-02-02", "1993-03-03"]|["1991-01-01
00:00:00"]|[0.33,0.67]|[3.1415926,0.878787878]|[1,1.2,1.3]
+2|[1,2,3,4,5]|[32767,32768,32769]|[65534,65535,65536]|["a","b","c","d","e"]|["hello","world"]|['1991-01-01',
'1992-02-02', '1993-03-03']|\N|\N|\N|[1,\N,1.3]
+3|\N|\N|\N|\N|\N|\N|\N|\N|\N|\N
+4|1,2,3,4,5|\N|\N|\N|\N|\N|\N|\N|\N|\N
+5|[1,2,3,4,5|\N|\N|\N|\N|\N|\N|\N|\N|\N
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/csv_format_test/array_normal.csv
b/docker/thirdparties/docker-compose/hive/scripts/csv_format_test/array_normal.csv
new file mode 100644
index 0000000000..b4b3a716a7
--- /dev/null
+++
b/docker/thirdparties/docker-compose/hive/scripts/csv_format_test/array_normal.csv
@@ -0,0 +1,9 @@
+1|[1,2,3,4,5]|[32767,32768,32769]|[65534,65535,65536]|["a","b","c","d","e"]|["hello","world"]|["1991-01-01",
"1992-02-02", "1993-03-03"]|["1991-01-01
00:00:00"]|[0.33,0.67]|[3.1415926,0.878787878]|[1,1.2,1.3]
+2|[1,2,3,4,5]|[32767,32768,32769]|[65534,65535,65536]|["a","b","c","d","e"]|["hello","world"]|['1991-01-01',
'1992-02-02', '1993-03-03']|\N|\N|\N|[1,\N,1.3]
+3|\N|\N|\N|\N|\N|\N|\N|\N|\N|\N
+4|[]|[]|[]|[]|[]|[]|[]|[]|[]|[]
+5|[null]|[null]|[null]|[null]|[null]|[null]|[null]|[null]|[null]|[null]
+6|[null,null]|[null,null]|[null,null]|[null,null]|[null,null]|[null,null]|[null,null]|[null,null]|[null,null]|[null,null,null,null,null,null]
+6|[null,null]|[null,null]|[null,null]|[null,null]|[null,"null"]|[null,null]|[null,null]|[null,null]|[null,null]|[null,null,null,null,null,null]
+7|[1,2,3,4,5]|\N|\N|\N|\N|\N|\N|\N|\N|\N
+8|[1,2,3,4,5]|\N|\N|\N|\N|\N|[]]|]]|[[]|[[
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/csv_format_test/student.csv
b/docker/thirdparties/docker-compose/hive/scripts/csv_format_test/student.csv
new file mode 100644
index 0000000000..3a7d6c5d6f
--- /dev/null
+++
b/docker/thirdparties/docker-compose/hive/scripts/csv_format_test/student.csv
@@ -0,0 +1,10 @@
+1,alice,18
+2,bob,20
+3,jack,24
+4,jackson,19
+5,liming,18
+6,luffy,20
+7,zoro,22
+8,sanzi,26
+9,wusuopu,21
+10,nami,18
\ No newline at end of file
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/csv_format_test/student_with_names.csv
b/docker/thirdparties/docker-compose/hive/scripts/csv_format_test/student_with_names.csv
new file mode 100644
index 0000000000..62d32e39f4
--- /dev/null
+++
b/docker/thirdparties/docker-compose/hive/scripts/csv_format_test/student_with_names.csv
@@ -0,0 +1,11 @@
+id,name,age
+1,alice,18
+2,bob,20
+3,jack,24
+4,jackson,19
+5,liming,18
+6,luffy,20
+7,zoro,22
+8,sanzi,26
+9,wusuopu,21
+10,nami,18
\ No newline at end of file
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/csv_format_test/student_with_names_and_types.csv
b/docker/thirdparties/docker-compose/hive/scripts/csv_format_test/student_with_names_and_types.csv
new file mode 100644
index 0000000000..4e88aef6d8
--- /dev/null
+++
b/docker/thirdparties/docker-compose/hive/scripts/csv_format_test/student_with_names_and_types.csv
@@ -0,0 +1,12 @@
+id,name,age
+INT,STRING,INT
+1,alice,18
+2,bob,20
+3,jack,24
+4,jackson,19
+5,liming,18
+6,luffy,20
+7,zoro,22
+8,sanzi,26
+9,wusuopu,21
+10,nami,18
\ No newline at end of file
diff --git a/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh
b/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh
index 6d97471cc5..884684f2ad 100755
--- a/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh
+++ b/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh
@@ -27,12 +27,14 @@ echo "hadoop fs -mkdir /user/doris/"
hadoop fs -mkdir -p /user/doris/
echo "hadoop fs -put /mnt/scripts/tpch1.db /user/doris/"
hadoop fs -put /mnt/scripts/tpch1.db /user/doris/
-echo "hadoop fs -put /mnt/scripts/json_format_test.db /user/doris/"
+echo "hadoop fs -put /mnt/scripts/json_format_test /user/doris/"
hadoop fs -put /mnt/scripts/json_format_test /user/doris/
echo "hadoop fs -put /mnt/scripts/parquet /user/doris/"
hadoop fs -put /mnt/scripts/parquet /user/doris/
echo "hadoop fs -put /mnt/scripts/orc /user/doris/"
hadoop fs -put /mnt/scripts/orc /user/doris/
+echo "hadoop fs -put /mnt/scripts/csv_format_test /user/doris/"
+hadoop fs -put /mnt/scripts/csv_format_test /user/doris/
echo "hive -f /mnt/scripts/create.hql"
hive -f /mnt/scripts/create.hql
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/json_format_test/one_array_json.json
b/docker/thirdparties/docker-compose/hive/scripts/json_format_test/one_array_json.json
new file mode 100644
index 0000000000..042db8c884
--- /dev/null
+++
b/docker/thirdparties/docker-compose/hive/scripts/json_format_test/one_array_json.json
@@ -0,0 +1,52 @@
+[
+ {
+ "id": 1,
+ "city": "beijing",
+ "code": 1454547
+ },
+ {
+ "id": 2,
+ "city": "shanghai",
+ "code": 1244264
+ },
+ {
+ "id": 3,
+ "city": "guangzhou",
+ "code": 528369
+ },
+ {
+ "id": 4,
+ "city": "shenzhen",
+ "code": 594201
+ },
+ {
+ "id": 5,
+ "city": "hangzhou",
+ "code": 594201
+ },
+ {
+ "id": 6,
+ "city": "nanjing",
+ "code": 2345672
+ },
+ {
+ "id": 7,
+ "city": "wuhan",
+ "code": 2345673
+ },
+ {
+ "id": 8,
+ "city": "chengdu",
+ "code": 2345674
+ },
+ {
+ "id": 9,
+ "city": "xian",
+ "code": 2345675
+ },
+ {
+ "id": 10,
+ "city": "hefei",
+ "code": 2345676
+ }
+]
\ No newline at end of file
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/json_format_test/simple_object_json.json
b/docker/thirdparties/docker-compose/hive/scripts/json_format_test/simple_object_json.json
index a7912466fd..5c3a9c07e9 100644
---
a/docker/thirdparties/docker-compose/hive/scripts/json_format_test/simple_object_json.json
+++
b/docker/thirdparties/docker-compose/hive/scripts/json_format_test/simple_object_json.json
@@ -8,5 +8,5 @@
{"id": 8, "city": "chengdu", "code": 2345678}
{"id": 9, "city": "xian", "code": 2345679}
{"id": 10, "city": "hefei", "code": 23456710}
-{"id": 10, "city": null, "code": 23456711}
-{"id": 10, "city": "hefei", "code": null}
+{"id": 11, "city": null, "code": 23456711}
+{"id": 12, "city": "hefei", "code": null}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/S3Storage.java
b/fe/fe-core/src/main/java/org/apache/doris/backup/S3Storage.java
index ae89175ebd..d1e250e779 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/backup/S3Storage.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/backup/S3Storage.java
@@ -97,14 +97,14 @@ public class S3Storage extends BlobStorage {
public void setProperties(Map<String, String> properties) {
super.setProperties(properties);
caseInsensitiveProperties.putAll(properties);
- // Virtual hosted-sytle is recommended in the s3 protocol.
+ // Virtual hosted-style is recommended in the s3 protocol.
// The path-style has been abandoned, but for some unexplainable
reasons,
// the s3 client will determine whether the endpiont starts with `s3`
// when generating a virtual hosted-sytle request.
// If not, it will not be converted (
https://github.com/aws/aws-sdk-java-v2/pull/763),
// but the endpoints of many cloud service providers for object
storage do not start with s3,
// so they cannot be converted to virtual hosted-sytle.
- // Some of them, such as aliyun's oss, only support virtual
hosted-sytle,
+ // Some of them, such as aliyun's oss, only support virtual
hosted-style,
// and some of them(ceph) may only support
// path-style, so we need to do some additional conversion.
//
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/QueryScanProvider.java
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/QueryScanProvider.java
index c38d1b967a..9cf2255f64 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/QueryScanProvider.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/QueryScanProvider.java
@@ -64,12 +64,6 @@ public abstract class QueryScanProvider implements
FileScanProviderIf {
return;
}
InputSplit inputSplit = inputSplits.get(0);
- String fullPath = ((FileSplit)
inputSplit).getPath().toUri().toString();
- String filePath = ((FileSplit)
inputSplit).getPath().toUri().getPath();
- // eg:
- // hdfs://namenode
- // s3://buckets
- String fsName = fullPath.replace(filePath, "");
TFileType locationType = getLocationType();
context.params.setFileType(locationType);
TFileFormatType fileFormatType = getFileFormatType();
@@ -84,6 +78,17 @@ public abstract class QueryScanProvider implements
FileScanProviderIf {
// set hdfs params for hdfs file type.
Map<String, String> locationProperties = getLocationProperties();
if (locationType == TFileType.FILE_HDFS) {
+ String fsName = "";
+ if (this instanceof TVFScanProvider) {
+ fsName = ((TVFScanProvider) this).getFsName();
+ } else {
+ String fullPath = ((FileSplit)
inputSplit).getPath().toUri().toString();
+ String filePath = ((FileSplit)
inputSplit).getPath().toUri().getPath();
+ // eg:
+ // hdfs://namenode
+ // s3://buckets
+ fsName = fullPath.replace(filePath, "");
+ }
THdfsParams tHdfsParams =
BrokerUtil.generateHdfsParam(locationProperties);
tHdfsParams.setFsName(fsName);
context.params.setHdfsParams(tHdfsParams);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/TVFScanProvider.java
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/TVFScanProvider.java
index 8c8bdf9d30..954d271a94 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/TVFScanProvider.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/TVFScanProvider.java
@@ -58,6 +58,10 @@ public class TVFScanProvider extends QueryScanProvider {
this.tableValuedFunction = tableValuedFunction;
}
+ public String getFsName() {
+ return tableValuedFunction.getFsName();
+ }
+
// =========== implement abstract methods of QueryScanProvider
=================
@Override
public TFileAttributes getFileAttributes() throws UserException {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java
b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java
index ac69cad8de..17b3364294 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java
@@ -45,10 +45,12 @@ import org.apache.doris.thrift.TFileScanRange;
import org.apache.doris.thrift.TFileScanRangeParams;
import org.apache.doris.thrift.TFileTextScanRangeParams;
import org.apache.doris.thrift.TFileType;
+import org.apache.doris.thrift.THdfsParams;
import org.apache.doris.thrift.TNetworkAddress;
import org.apache.doris.thrift.TPrimitiveType;
import org.apache.doris.thrift.TStatusCode;
+import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import com.google.protobuf.ByteString;
import org.apache.log4j.LogManager;
@@ -75,20 +77,37 @@ public abstract class ExternalFileTableValuedFunction
extends TableValuedFunctio
protected static final String JSON_PATHS = "jsonpaths";
protected static final String STRIP_OUTER_ARRAY = "strip_outer_array";
protected static final String READ_JSON_BY_LINE = "read_json_by_line";
+ protected static final String NUM_AS_STRING = "num_as_string";
+ protected static final String FUZZY_PARSE = "fuzzy_parse";
+
+ protected static final ImmutableSet<String> FILE_FORMAT_PROPERTIES = new
ImmutableSet.Builder<String>()
+ .add(FORMAT)
+ .add(JSON_ROOT)
+ .add(JSON_PATHS)
+ .add(STRIP_OUTER_ARRAY)
+ .add(READ_JSON_BY_LINE)
+ .add(NUM_AS_STRING)
+ .add(FUZZY_PARSE)
+ .add(COLUMN_SEPARATOR)
+ .add(LINE_DELIMITER)
+ .build();
+
protected List<Column> columns = null;
protected List<TBrokerFileStatus> fileStatuses = Lists.newArrayList();
protected Map<String, String> locationProperties;
- protected TFileFormatType fileFormatType;
- protected String headerType = "";
+ private TFileFormatType fileFormatType;
+ private String headerType = "";
- protected String columnSeparator = DEFAULT_COLUMN_SEPARATOR;
- protected String lineDelimiter = DEFAULT_LINE_DELIMITER;
- protected String jsonRoot = "";
- protected String jsonPaths = "";
- protected String stripOuterArray = "";
- protected String readJsonByLine = "";
+ private String columnSeparator = DEFAULT_COLUMN_SEPARATOR;
+ private String lineDelimiter = DEFAULT_LINE_DELIMITER;
+ private String jsonRoot = "";
+ private String jsonPaths = "";
+ private boolean stripOuterArray;
+ private boolean readJsonByLine;
+ private boolean numAsString;
+ private boolean fuzzyParse;
public abstract TFileType getTFileType();
@@ -105,6 +124,16 @@ public abstract class ExternalFileTableValuedFunction
extends TableValuedFunctio
return locationProperties;
}
+ public String getFsName() {
+ TFileType fileType = getTFileType();
+ if (fileType == TFileType.FILE_HDFS) {
+ return
locationProperties.get(HdfsTableValuedFunction.HADOOP_FS_NAME);
+ } else if (fileType == TFileType.FILE_S3) {
+ return locationProperties.get(S3TableValuedFunction.S3_ENDPOINT);
+ }
+ return "";
+ }
+
protected void parseFile() throws UserException {
String path = getFilePath();
BrokerDesc brokerDesc = getBrokerDesc();
@@ -142,8 +171,10 @@ public abstract class ExternalFileTableValuedFunction
extends TableValuedFunctio
lineDelimiter = validParams.getOrDefault(LINE_DELIMITER,
DEFAULT_LINE_DELIMITER);
jsonRoot = validParams.getOrDefault(JSON_ROOT, "");
jsonPaths = validParams.getOrDefault(JSON_PATHS, "");
- stripOuterArray = validParams.getOrDefault(STRIP_OUTER_ARRAY,
"false").toLowerCase();
- readJsonByLine = validParams.getOrDefault(READ_JSON_BY_LINE,
"true").toLowerCase();
+ readJsonByLine =
Boolean.valueOf(validParams.get(READ_JSON_BY_LINE)).booleanValue();
+ stripOuterArray =
Boolean.valueOf(validParams.get(STRIP_OUTER_ARRAY)).booleanValue();
+ numAsString =
Boolean.valueOf(validParams.get(NUM_AS_STRING)).booleanValue();
+ fuzzyParse =
Boolean.valueOf(validParams.get(FUZZY_PARSE)).booleanValue();
}
public List<TBrokerFileStatus> getFileStatuses() {
@@ -161,17 +192,10 @@ public abstract class ExternalFileTableValuedFunction
extends TableValuedFunctio
} else if (this.fileFormatType == TFileFormatType.FORMAT_JSON) {
fileAttributes.setJsonRoot(jsonRoot);
fileAttributes.setJsonpaths(jsonPaths);
- if (readJsonByLine.equalsIgnoreCase("true")) {
- fileAttributes.setReadJsonByLine(true);
- } else {
- fileAttributes.setReadJsonByLine(false);
- }
- if (stripOuterArray.equalsIgnoreCase("true")) {
- fileAttributes.setStripOuterArray(true);
- } else {
- fileAttributes.setStripOuterArray(false);
- }
- // TODO(ftw): num_as_string/fuzzy_parser?
+ fileAttributes.setReadJsonByLine(readJsonByLine);
+ fileAttributes.setStripOuterArray(stripOuterArray);
+ fileAttributes.setNumAsString(numAsString);
+ fileAttributes.setFuzzyParse(fuzzyParse);
}
return fileAttributes;
}
@@ -254,6 +278,12 @@ public abstract class ExternalFileTableValuedFunction
extends TableValuedFunctio
fileScanRangeParams.setFormatType(fileFormatType);
fileScanRangeParams.setProperties(locationProperties);
fileScanRangeParams.setFileAttributes(getFileAttributes());
+ if (getTFileType() == TFileType.FILE_HDFS) {
+ THdfsParams tHdfsParams =
BrokerUtil.generateHdfsParam(locationProperties);
+ String fsNmae =
getLocationProperties().get(HdfsTableValuedFunction.HADOOP_FS_NAME);
+ tHdfsParams.setFsName(fsNmae);
+ fileScanRangeParams.setHdfsParams(tHdfsParams);
+ }
// get first file, used to parse table schema
TBrokerFileStatus firstFile = null;
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/HdfsTableValuedFunction.java
b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/HdfsTableValuedFunction.java
new file mode 100644
index 0000000000..175c9e501a
--- /dev/null
+++
b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/HdfsTableValuedFunction.java
@@ -0,0 +1,116 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.tablefunction;
+
+import org.apache.doris.analysis.BrokerDesc;
+import org.apache.doris.analysis.ExportStmt;
+import org.apache.doris.analysis.StorageBackend.StorageType;
+import org.apache.doris.common.AnalysisException;
+import org.apache.doris.common.UserException;
+import org.apache.doris.common.util.URI;
+import org.apache.doris.thrift.TFileType;
+
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Maps;
+import org.apache.commons.collections.map.CaseInsensitiveMap;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+
+import java.util.Map;
+
+/**
+ * The Implement of table valued function
+ * hdfs("uri" = "xxx", "hadoop.username" = "xx", "FORMAT" = "csv").
+ */
+public class HdfsTableValuedFunction extends ExternalFileTableValuedFunction {
+ public static final Logger LOG =
LogManager.getLogger(HdfsTableValuedFunction.class);
+
+ public static final String NAME = "hdfs";
+ public static final String HDFS_URI = "uri";
+ public static String HADOOP_FS_NAME = "fs.defaultFS";
+ // simple or kerberos
+ public static String HADOOP_SECURITY_AUTHENTICATION =
"hadoop.security.authentication";
+ public static String HADOOP_USER_NAME = "hadoop.username";
+ public static String HADOOP_KERBEROS_PRINCIPAL =
"hadoop.kerberos.principal";
+ public static String HADOOP_KERBEROS_KEYTAB = "hadoop.kerberos.keytab";
+ public static String HADOOP_SHORT_CIRCUIT = "dfs.client.read.shortcircuit";
+ public static String HADOOP_SOCKET_PATH = "dfs.domain.socket.path";
+
+ private static final ImmutableSet<String> LOCATION_PROPERTIES = new
ImmutableSet.Builder<String>()
+ .add(HDFS_URI)
+ .add(HADOOP_SECURITY_AUTHENTICATION)
+ .add(HADOOP_FS_NAME)
+ .add(HADOOP_USER_NAME)
+ .add(HADOOP_KERBEROS_PRINCIPAL)
+ .add(HADOOP_KERBEROS_KEYTAB)
+ .add(HADOOP_SHORT_CIRCUIT)
+ .add(HADOOP_SOCKET_PATH)
+ .build();
+
+ private URI hdfsUri;
+ private String filePath;
+
+ public HdfsTableValuedFunction(Map<String, String> params) throws
UserException {
+ Map<String, String> fileFormatParams = new CaseInsensitiveMap();
+ locationProperties = Maps.newHashMap();
+ for (String key : params.keySet()) {
+ if (FILE_FORMAT_PROPERTIES.contains(key.toLowerCase())) {
+ fileFormatParams.put(key, params.get(key));
+ } else if (LOCATION_PROPERTIES.contains(key.toLowerCase()) ||
HADOOP_FS_NAME.equalsIgnoreCase(key)) {
+ // because HADOOP_FS_NAME contains upper and lower case
+ if (HADOOP_FS_NAME.equalsIgnoreCase(key)) {
+ locationProperties.put(HADOOP_FS_NAME, params.get(key));
+ } else {
+ locationProperties.put(key.toLowerCase(), params.get(key));
+ }
+ } else {
+ throw new AnalysisException(key + " is invalid property");
+ }
+ }
+
+ ExportStmt.checkPath(locationProperties.get(HDFS_URI),
StorageType.HDFS);
+ hdfsUri = URI.create(locationProperties.get(HDFS_URI));
+ filePath = locationProperties.get(HADOOP_FS_NAME) + hdfsUri.getPath();
+
+ parseProperties(fileFormatParams);
+ parseFile();
+ }
+
+ // =========== implement abstract methods of
ExternalFileTableValuedFunction =================
+ @Override
+ public TFileType getTFileType() {
+ return TFileType.FILE_HDFS;
+ }
+
+ @Override
+ public String getFilePath() {
+ // must be "hdfs://namenode/filepath"
+ return filePath;
+ }
+
+ @Override
+ public BrokerDesc getBrokerDesc() {
+ return new BrokerDesc("S3TvfBroker", StorageType.HDFS,
locationProperties);
+ }
+
+ // =========== implement abstract methods of TableValuedFunctionIf
=================
+ @Override
+ public String getTableName() {
+ return "HDFSTableValuedFunction";
+ }
+}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/S3TableValuedFunction.java
b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/S3TableValuedFunction.java
index 784a75accc..6af05f3374 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/S3TableValuedFunction.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/S3TableValuedFunction.java
@@ -26,6 +26,7 @@ import org.apache.doris.thrift.TFileType;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Maps;
+import org.apache.commons.collections.map.CaseInsensitiveMap;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
@@ -46,44 +47,71 @@ public class S3TableValuedFunction extends
ExternalFileTableValuedFunction {
private static final String AK = "access_key";
private static final String SK = "secret_key";
- public static final String USE_PATH_STYLE = "use_path_style";
+ private static final String USE_PATH_STYLE = "use_path_style";
+ private static final String REGION = "region";
private static final ImmutableSet<String> PROPERTIES_SET = new
ImmutableSet.Builder<String>()
.add(S3_URI)
.add(AK)
.add(SK)
- .add(FORMAT)
- .add(JSON_ROOT)
- .add(JSON_PATHS)
- .add(STRIP_OUTER_ARRAY)
- .add(READ_JSON_BY_LINE)
+ .add(USE_PATH_STYLE)
+ .add(REGION)
.build();
private S3URI s3uri;
private String s3AK;
private String s3SK;
+ private String endPoint;
+ private String virtualBucket;
+ private boolean forceVirtualHosted;
public S3TableValuedFunction(Map<String, String> params) throws
UserException {
- Map<String, String> validParams = Maps.newHashMap();
+ Map<String, String> validParams = new CaseInsensitiveMap();
for (String key : params.keySet()) {
- if (!PROPERTIES_SET.contains(key.toLowerCase())) {
+ if (!PROPERTIES_SET.contains(key.toLowerCase()) &&
!FILE_FORMAT_PROPERTIES.contains(key.toLowerCase())) {
throw new AnalysisException(key + " is invalid property");
}
- validParams.put(key.toLowerCase(), params.get(key));
+ validParams.put(key, params.get(key));
}
- s3uri = S3URI.create(validParams.get(S3_URI));
+ String originUri = validParams.getOrDefault(S3_URI, "");
+ if (originUri.toLowerCase().startsWith("s3")) {
+ // s3 protocol
+ forceVirtualHosted = false;
+ } else {
+ // not s3 protocol, forceVirtualHosted is determined by
USE_PATH_STYLE.
+ forceVirtualHosted =
!Boolean.valueOf(validParams.get(USE_PATH_STYLE)).booleanValue();
+ }
+
+ s3uri = S3URI.create(validParams.get(S3_URI), forceVirtualHosted);
+ if (forceVirtualHosted) {
+ // s3uri.getVirtualBucket() is: virtualBucket.endpoint, Eg:
+ // uri:
http://my_bucket.cos.ap-beijing.myqcloud.com/file.txt
+ // s3uri.getVirtualBucket() =
my_bucket.cos.ap-beijing.myqcloud.com,
+ // so we need separate virtualBucket and endpoint.
+ String[] fileds = s3uri.getVirtualBucket().split("\\.", 2);
+ virtualBucket = fileds[0];
+ if (fileds.length > 1) {
+ endPoint = fileds[1];
+ } else {
+ throw new AnalysisException("can not parse endpoint, please
check uri.");
+ }
+ } else {
+ endPoint = s3uri.getBucketScheme();
+ }
s3AK = validParams.getOrDefault(AK, "");
s3SK = validParams.getOrDefault(SK, "");
+ String usePathStyle = validParams.getOrDefault(USE_PATH_STYLE,
"false");
parseProperties(validParams);
// set S3 location properties
+ // these five properties is necessary, no one can be lost.
locationProperties = Maps.newHashMap();
- locationProperties.put(S3_ENDPOINT, s3uri.getBucketScheme());
+ locationProperties.put(S3_ENDPOINT, endPoint);
locationProperties.put(S3_AK, s3AK);
locationProperties.put(S3_SK, s3SK);
- locationProperties.put(S3_REGION, "");
- locationProperties.put(USE_PATH_STYLE, "true");
+ locationProperties.put(S3_REGION, validParams.getOrDefault(REGION,
""));
+ locationProperties.put(USE_PATH_STYLE, usePathStyle);
parseFile();
}
@@ -97,6 +125,10 @@ public class S3TableValuedFunction extends
ExternalFileTableValuedFunction {
@Override
public String getFilePath() {
// must be "s3://..."
+ if (forceVirtualHosted) {
+ return NAME + S3URI.SCHEME_DELIM + virtualBucket + S3URI.PATH_DELIM
+ + s3uri.getBucket() + S3URI.PATH_DELIM + s3uri.getKey();
+ }
return NAME + S3URI.SCHEME_DELIM + s3uri.getKey();
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/TableValuedFunctionIf.java
b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/TableValuedFunctionIf.java
index 3063880c92..56167d1b87 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/TableValuedFunctionIf.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/TableValuedFunctionIf.java
@@ -48,6 +48,8 @@ public abstract class TableValuedFunctionIf {
return new NumbersTableValuedFunction(params);
case S3TableValuedFunction.NAME:
return new S3TableValuedFunction(params);
+ case HdfsTableValuedFunction.NAME:
+ return new HdfsTableValuedFunction(params);
default:
throw new UserException("Could not find table function " +
funcName);
}
diff --git
a/regression-test/data/correctness_p0/table_valued_function/test_hdfs_tvf.out
b/regression-test/data/correctness_p0/table_valued_function/test_hdfs_tvf.out
new file mode 100644
index 0000000000..6ed17594b9
--- /dev/null
+++
b/regression-test/data/correctness_p0/table_valued_function/test_hdfs_tvf.out
@@ -0,0 +1,289 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !csv_all_types --
+0 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+1 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+10 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+100 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+101 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+102 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+103 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+104 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+105 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+106 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+107 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+108 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+109 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+11 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+110 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+111 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+112 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+113 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+114 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+115 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+116 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+117 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+118 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+119 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+12 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+13 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+14 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+15 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+16 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+17 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+18 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+19 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+2 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+20 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+21 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+22 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+23 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+24 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+25 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+26 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+27 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+28 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+29 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+3 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+30 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+31 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+32 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+33 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+34 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+35 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+36 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+37 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+38 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+39 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+4 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+40 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+41 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+42 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+43 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+44 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+45 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+46 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+47 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+48 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+49 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+5 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+50 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+51 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+52 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+53 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+54 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+55 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+56 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+57 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+58 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+59 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+6 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+60 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+61 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+62 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+63 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+64 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+65 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+66 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+67 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+68 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+69 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+7 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+70 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+71 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+72 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+73 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+74 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+75 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+76 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+77 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+78 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+79 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+8 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+80 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+81 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+82 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+83 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+84 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+85 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+86 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+87 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+88 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+89 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+9 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+90 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+91 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+92 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+93 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+94 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+95 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+96 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+97 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+98 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+99 2 3 4 5 6.6 7.7 8.8 abc def
ghiaaaaaa 2020-10-10 2020-10-10 11:12:59
+
+-- !csv_student --
+1 alice 18
+2 bob 20
+3 jack 24
+4 jackson 19
+5 liming 18
+6 luffy 20
+7 zoro 22
+8 sanzi 26
+9 wusuopu 21
+10 nami 18
+
+-- !csv_array_malformat --
+1 [1,2,3,4,5] [32767,32768,32769] [65534,65535,65536]
["a","b","c","d","e"] ["hello","world"] ["1991-01-01", "1992-02-02",
"1993-03-03"] ["1991-01-01 00:00:00"] [0.33,0.67]
[3.1415926,0.878787878] [1,1.2,1.3]
+2 [1,2,3,4,5] [32767,32768,32769] [65534,65535,65536]
["a","b","c","d","e"] ["hello","world"] ['1991-01-01', '1992-02-02',
'1993-03-03'] \N \N \N [1,\\N,1.3]
+3 \N \N \N \N \N \N \N \N \N
\N
+4 1,2,3,4,5 \N \N \N \N \N \N \N
\N \N
+5 [1,2,3,4,5 \N \N \N \N \N \N \N
\N \N
+
+-- !csv_array_normal --
+1 [1,2,3,4,5] [32767,32768,32769] [65534,65535,65536]
["a","b","c","d","e"] ["hello","world"] ["1991-01-01", "1992-02-02",
"1993-03-03"] ["1991-01-01 00:00:00"] [0.33,0.67]
[3.1415926,0.878787878] [1,1.2,1.3]
+2 [1,2,3,4,5] [32767,32768,32769] [65534,65535,65536]
["a","b","c","d","e"] ["hello","world"] ['1991-01-01', '1992-02-02',
'1993-03-03'] \N \N \N [1,\\N,1.3]
+3 \N \N \N \N \N \N \N \N \N
\N
+4 [] [] [] [] [] [] [] [] []
[]
+5 [null] [null] [null] [null] [null] [null] [null] [null] [null]
[null]
+6 [null,null] [null,null] [null,null] [null,null]
[null,null] [null,null] [null,null] [null,null] [null,null]
[null,null,null,null,null,null]
+6 [null,null] [null,null] [null,null] [null,null]
[null,"null"] [null,null] [null,null] [null,null] [null,null]
[null,null,null,null,null,null]
+7 [1,2,3,4,5] \N \N \N \N \N \N \N
\N \N
+8 [1,2,3,4,5] \N \N \N \N \N []] ]]
[[] [[
+
+-- !csv_names --
+1 alice 18
+2 bob 20
+3 jack 24
+4 jackson 19
+5 liming 18
+6 luffy 20
+7 zoro 22
+8 sanzi 26
+9 wusuopu 21
+10 nami 18
+
+-- !csv_names_types --
+1 alice 18
+2 bob 20
+3 jack 24
+4 jackson 19
+5 liming 18
+6 luffy 20
+7 zoro 22
+8 sanzi 26
+9 wusuopu 21
+10 nami 18
+
+-- !parquet --
+1 Supplier#000000001 N kD4on9OM Ipw3,gf0JBoQDd7tgrzrddZ 17
27-918-335-1736 5755 each slyly above the careful
+2 Supplier#000000002 89eJ5ksX3ImxJQBvxObC, 5 15-679-861-2259
4032 slyly bold instructions. idle dependen
+3 Supplier#000000003 q1,G3Pj6OjIuUYfUoH18BFTKP5aU9bEV3 1
11-383-516-1199 4192 blithely silent requests after the express dependencies
are sl
+4 Supplier#000000004 Bk7ah4CK8SYQTepEmvMkkgMwg 15
25-843-787-7479 4641 riously even requests above the exp
+5 Supplier#000000005 Gcdm2rJRzl5qlTVzc 11 21-151-690-3663
-283 . slyly regular pinto bea
+6 Supplier#000000006 tQxuVm7s7CnK 14 24-696-997-4969 1365
final accounts. regular dolphins use against the furiously ironic decoys.
+7 Supplier#000000007 s,4TicNGB4uO6PaSqNBUq 23 33-990-965-2201
6820 s unwind silently furiously regular courts. final requests are
deposits. requests wake quietly blit
+8 Supplier#000000008 9Sq4bBH2FQEmaFOocY45sRTxo6yuoG 17
27-498-742-3860 7627 al pinto beans. asymptotes haggl
+9 Supplier#000000009 1KhUgZegwM3ua7dsYmekYBsK 10
20-403-398-8662 5302 s. unusual, even requests along the furiously regular
pac
+10 Supplier#000000010 Saygah3gYWMp72i PY 24 34-852-489-8585
3891 ing waters. regular requests ar
+11 Supplier#000000011 JfwTs,LZrV, M,9C 18 28-613-996-1505
3393 y ironic packages. slyly ironic accounts affix furiously; ironically
unusual excuses across the flu
+12 Supplier#000000012 aLIW q0HYd 8 18-179-925-7181 1432
al packages nag alongside of the bold instructions. express, daring accounts
+13 Supplier#000000013 HK71HQyWoqRWOX8GI FpgAifW,2PoH 3
13-727-620-7813 9107 requests engage regularly instructions. furiously
special requests ar
+14 Supplier#000000014 EXsnO5pTNj4iZRm 15 25-656-247-5058 9189
l accounts boost. fluffily bold warhorses wake
+15 Supplier#000000015 olXVbNBfVzRqgokr1T,Ie 8 18-453-357-6394
308 across the furiously regular platelets wake even deposits. quickly
express she
+16 Supplier#000000016 YjP5C55zHDXL7LalK27zfQnwejdpin4AMpvh 22
32-822-502-4215 2972 ously express ideas haggle quickly dugouts? fu
+17 Supplier#000000017 c2d,ESHRSkK3WYnxpgw6aOqN0q 19
29-601-884-9219 1687 eep against the furiously bold ideas. fluffily bold
packa
+18 Supplier#000000018 PGGVE5PWAMwKDZw 16 26-729-551-1115
7040 accounts snooze slyly furiously bold
+19 Supplier#000000019 edZT3es,nBFD8lBXTGeTl 24 34-278-310-2731
6150 refully final foxes across the dogged theodolites sleep slyly abou
+20 Supplier#000000020 iybAE,RmTymrZVYaFZva2SH,j 3
13-715-945-6730 530 n, ironic ideas would nag blithely about the slyly
regular accounts. silent, expr
+
+-- !orc --
+1 goldenrod lavender spring chocolate lace Manufacturer#1
Brand#13 PROMO BURNISHED COPPER 7 JUMBO PKG 901 ly.
slyly ironi
+2 blush thistle blue yellow saddle Manufacturer#1 Brand#13
LARGE BRUSHED BRASS 1 LG CASE 902 lar accounts amo
+3 spring green yellow purple cornsilk Manufacturer#4 Brand#42
STANDARD POLISHED BRASS 21 WRAP CASE 903 egular deposits hag
+4 cornflower chocolate smoke green pink Manufacturer#3 Brand#34
SMALL PLATED BRASS 14 MED DRUM 904 p furiously r
+5 forest brown coral puff cream Manufacturer#3 Brand#32
STANDARD POLISHED TIN 15 SM PKG 905 wake carefully
+6 bisque cornflower lawn forest magenta Manufacturer#2 Brand#24
PROMO PLATED STEEL 4 MED BAG 906 sual a
+7 moccasin green thistle khaki floral Manufacturer#1 Brand#11
SMALL PLATED COPPER 45 SM BAG 907 lyly. ex
+8 misty lace thistle snow royal Manufacturer#4 Brand#44 PROMO
BURNISHED TIN 41 LG DRUM 908 eposi
+9 thistle dim navajo dark gainsboro Manufacturer#4 Brand#43
SMALL BURNISHED STEEL 12 WRAP CASE 909 ironic foxe
+10 linen pink saddle puff powder Manufacturer#5 Brand#54 LARGE
BURNISHED STEEL 44 LG CAN 910 ithely final deposit
+11 spring maroon seashell almond orchid Manufacturer#2 Brand#25
STANDARD BURNISHED NICKEL 43 WRAP BOX 911 ng gr
+12 cornflower wheat orange maroon ghost Manufacturer#3 Brand#33
MEDIUM ANODIZED STEEL 25 JUMBO CASE 912 quickly
+13 ghost olive orange rosy thistle Manufacturer#5 Brand#55 MEDIUM
BURNISHED NICKEL 1 JUMBO PACK 913 osits.
+14 khaki seashell rose cornsilk navajo Manufacturer#1 Brand#13
SMALL POLISHED STEEL 28 JUMBO BOX 914 kages c
+15 blanched honeydew sky turquoise medium Manufacturer#1 Brand#15
LARGE ANODIZED BRASS 45 LG CASE 915 usual ac
+16 deep sky turquoise drab peach Manufacturer#3 Brand#32 PROMO
PLATED TIN 2 MED PACK 916 unts a
+17 indian navy coral pink deep Manufacturer#4 Brand#43 ECONOMY
BRUSHED STEEL 16 LG BOX 917 regular accounts
+18 turquoise indian lemon lavender misty Manufacturer#1 Brand#11
SMALL BURNISHED STEEL 42 JUMBO PACK 918 s cajole slyly a
+19 chocolate navy tan deep brown Manufacturer#2 Brand#23 SMALL
ANODIZED NICKEL 33 WRAP BOX 919 pending acc
+20 ivory navy honeydew sandy midnight Manufacturer#1 Brand#12
LARGE POLISHED NICKEL 48 MED BAG 920 are across the asympt
+
+-- !json --
+1 beijing 2345671
+10 hefei 23456710
+11 \N 23456711
+12 hefei \N
+2 shanghai 2345672
+3 guangzhou 2345673
+4 shenzhen 2345674
+5 hangzhou 2345675
+6 nanjing 2345676
+7 wuhan 2345677
+8 chengdu 2345678
+9 xian 2345679
+
+-- !json_root --
+1 beijing 2345671
+2 shanghai 2345672
+3 hangzhou 2345673
+4 shenzhen 2345674
+5 guangzhou 2345675
+
+-- !json_paths --
+1 2345671
+2 2345672
+3 2345673
+4 2345674
+5 2345675
+6 2345676
+7 2345677
+8 2345678
+9 2345679
+10 23456710
+11 23456711
+12 \N
+
+-- !one_array --
+1 beijing 1454547
+2 shanghai 1244264
+3 guangzhou 528369
+4 shenzhen 594201
+5 hangzhou 594201
+6 nanjing 2345672
+7 wuhan 2345673
+8 chengdu 2345674
+9 xian 2345675
+10 hefei 2345676
+
+-- !cast --
+1 beijing 2345671
+2 shanghai 2345672
+3 guangzhou 2345673
+4 shenzhen 2345674
+5 hangzhou 2345675
+6 nanjing 2345676
+7 wuhan 2345677
+8 chengdu 2345678
+9 xian 2345679
+10 hefei 23456710
+11 \N 23456711
+12 hefei \N
+
+-- !insert --
+1 beijing 2345671
+2 shanghai 2345672
+3 hangzhou 2345673
+4 shenzhen 2345674
+5 guangzhou 2345675
+
diff --git a/regression-test/data/load_p0/stream_load/test_hdfs_json_load.out
b/regression-test/data/load_p0/stream_load/test_hdfs_json_load.out
index 594d2ec60a..f55edaaa7e 100644
--- a/regression-test/data/load_p0/stream_load/test_hdfs_json_load.out
+++ b/regression-test/data/load_p0/stream_load/test_hdfs_json_load.out
@@ -9,8 +9,9 @@
7 wuhan 2345677
8 chengdu 2345678
9 xian 2345679
-10 \N 23456711
10 hefei 23456710
+11 \N 23456711
+12 hefei \N
200 changsha 3456789
-- !select1 --
@@ -23,8 +24,9 @@
7 wuhan 2345677
8 chengdu 2345678
9 xian 2345679
-10 \N 23456711
10 hefei 23456710
+11 \N 23456711
+12 hefei \N
200 changsha 3456789
-- !select2 --
@@ -37,8 +39,9 @@
70 wuhan 2345677
80 chengdu 2345678
90 xian 2345679
-100 \N 23456711
100 hefei 23456710
+110 \N 23456711
+120 hefei \N
200 changsha 3456789
-- !select2 --
@@ -51,8 +54,9 @@
70 wuhan 2345677
80 chengdu 2345678
90 xian 2345679
-100 \N 23456711
100 hefei 23456710
+110 \N 23456711
+120 hefei \N
200 changsha 3456789
-- !select3 --
@@ -65,9 +69,9 @@
7 2345677 \N
8 2345678 \N
9 2345679 \N
-10 \N \N
10 23456710 \N
-10 23456711 \N
+11 23456711 \N
+12 \N \N
200 changsha 3456789
-- !select3 --
@@ -80,9 +84,9 @@
7 2345677 \N
8 2345678 \N
9 2345679 \N
-10 \N \N
10 23456710 \N
-10 23456711 \N
+11 23456711 \N
+12 \N \N
200 changsha 3456789
-- !select4 --
@@ -95,7 +99,9 @@
7 \N 270
8 \N 280
9 \N 290
-10 \N 900
+10 \N 300
+11 \N 310
+12 \N 320
200 changsha 3456789
-- !select4 --
@@ -108,7 +114,9 @@
7 \N 270
8 \N 280
9 \N 290
-10 \N 900
+10 \N 300
+11 \N 310
+12 \N 320
200 changsha 3456789
-- !select5 --
diff --git
a/regression-test/suites/correctness_p0/table_valued_function/test_hdfs_tvf.groovy
b/regression-test/suites/correctness_p0/table_valued_function/test_hdfs_tvf.groovy
new file mode 100644
index 0000000000..a11a4160fa
--- /dev/null
+++
b/regression-test/suites/correctness_p0/table_valued_function/test_hdfs_tvf.groovy
@@ -0,0 +1,199 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_hdfs_tvf") {
+ String hdfs_port = context.config.otherConfigs.get("hdfs_port")
+ // It's okay to use random `hdfsUser`, but can not be empty.
+ def hdfsUserName = "doris"
+ def format = "csv"
+ def defaultFS = "hdfs://127.0.0.1:${hdfs_port}"
+ def uri = ""
+
+ String enabled = context.config.otherConfigs.get("enableHiveTest")
+ if (enabled != null && enabled.equalsIgnoreCase("true")) {
+ try {
+ sql """ADMIN SET FRONTEND CONFIG ("enable_new_load_scan_node" =
"true");"""
+
+ // test csv foramt
+ uri = "${defaultFS}" + "/user/doris/csv_format_test/all_types.csv"
+ format = "csv"
+ qt_csv_all_types """ select * from HDFS(
+ "uri" = "${uri}",
+ "fs.defaultFS"= "${defaultFS}",
+ "hadoop.username" = "${hdfsUserName}",
+ "format" = "${format}") order by c1; """
+
+
+ uri = "${defaultFS}" + "/user/doris/csv_format_test/student.csv"
+ format = "csv"
+ qt_csv_student """ select cast(c1 as INT) as id, c2 as name, c3 as
age from HDFS(
+ "uri" = "${uri}",
+ "fs.defaultFS"= "${defaultFS}",
+ "hadoop.username" = "${hdfsUserName}",
+ "format" = "${format}") order by id; """
+
+ uri = "${defaultFS}" +
"/user/doris/csv_format_test/array_malformat.csv"
+ format = "csv"
+ qt_csv_array_malformat """ select * from HDFS(
+ "URI" = "${uri}",
+ "fs.defaultFS"= "${defaultFS}",
+ "hadoop.username" = "${hdfsUserName}",
+ "format" = "csv",
+ "column_separator" = "|") order by c1;
"""
+
+ uri = "${defaultFS}" +
"/user/doris/csv_format_test/array_normal.csv"
+ format = "csv"
+ qt_csv_array_normal """ select * from HDFS("URI" = "${uri}",
+ "fs.defaultFS"= "${defaultFS}",
+ "hadoop.username" = "${hdfsUserName}",
+ "format" = "csv",
+ "column_separator" = "|") order by c1; """
+
+ // test csv_with_names file format
+ uri = "${defaultFS}" +
"/user/doris/csv_format_test/student_with_names.csv"
+ format = "csv_with_names"
+ qt_csv_names """ select cast(id as INT) as id, name, age from HDFS(
+ "uri" = "${uri}",
+ "fs.defaultFS"= "${defaultFS}",
+ "hadoop.username" = "${hdfsUserName}",
+ "format" = "${format}") order by id; """
+
+ // test csv_with_names_and_types file format
+ uri = "${defaultFS}" +
"/user/doris/csv_format_test/student_with_names_and_types.csv"
+ format = "csv_with_names_and_types"
+ qt_csv_names_types """ select cast(id as INT) as id, name, age
from HDFS(
+ "uri" = "${uri}",
+ "fs.defaultFS"= "${defaultFS}",
+ "hadoop.username" = "${hdfsUserName}",
+ "format" = "${format}") order by id; """
+
+
+ // test parquet
+ uri = "${defaultFS}" +
"/user/doris/tpch1.db/hdfs_tvf/test_parquet.snappy.parquet"
+ format = "parquet"
+ qt_parquet """ select * from HDFS(
+ "uri" = "${uri}",
+ "fs.defaultFS"= "${defaultFS}",
+ "hadoop.username" = "${hdfsUserName}",
+ "format" = "${format}") order by s_suppkey limit
20; """
+
+ // test orc
+ uri = "${defaultFS}" +
"/user/doris/tpch1.db/hdfs_tvf/test_orc.snappy.orc"
+ format = "orc"
+ qt_orc """ select * from HDFS(
+ "uri" = "${uri}",
+ "fs.defaultFS"= "${defaultFS}",
+ "hadoop.username" = "${hdfsUserName}",
+ "format" = "${format}") order by p_partkey limit
20; """
+
+
+ // test josn format
+ uri = "${defaultFS}" +
"/user/doris/json_format_test/simple_object_json.json"
+ format = "json"
+ qt_json """ select * from HDFS(
+ "uri" = "${uri}",
+ "fs.defaultFS"= "${defaultFS}",
+ "hadoop.username" = "${hdfsUserName}",
+ "format" = "${format}",
+ "strip_outer_array" = "false",
+ "read_json_by_line" = "true") order by id; """
+
+ // test json root
+ uri = "${defaultFS}" +
"/user/doris/json_format_test/nest_json.json"
+ format = "json"
+ qt_json_root """ select cast(id as INT) as id, city, cast(code as
INT) as code from HDFS(
+ "uri" = "${uri}",
+ "fs.defaultFS"= "${defaultFS}",
+ "hadoop.username" = "${hdfsUserName}",
+ "format" = "${format}",
+ "strip_outer_array" = "false",
+ "read_json_by_line" = "true",
+ "json_root" = "\$.item") order by id; """
+
+ // test json paths
+ uri = "${defaultFS}" +
"/user/doris/json_format_test/simple_object_json.json"
+ format = "json"
+ qt_json_paths """ select cast(id as INT) as id, cast(code as INT)
as code from HDFS(
+ "uri" = "${uri}",
+ "fs.defaultFS"= "${defaultFS}",
+ "hadoop.username" = "${hdfsUserName}",
+ "format" = "${format}",
+ "strip_outer_array" = "false",
+ "read_json_by_line" = "true",
+ "jsonpaths" = "[\\"\$.id\\", \\"\$.code\\"]") order by
id; """
+
+ // test non read_json_by_line
+ uri = "${defaultFS}" +
"/user/doris/json_format_test/one_array_json.json"
+ format = "json"
+ qt_one_array """ select cast(id as INT) as id, city, cast(code as
INT) as code from HDFS(
+ "uri" = "${uri}",
+ "fs.defaultFS"= "${defaultFS}",
+ "hadoop.username" = "${hdfsUserName}",
+ "format" = "${format}",
+ "strip_outer_array" = "true",
+ "read_json_by_line" = "false") order by id; """
+
+
+ // test cast to int
+ uri = "${defaultFS}" +
"/user/doris/json_format_test/simple_object_json.json"
+ format = "json"
+ qt_cast """ select cast(id as INT) as id, city, cast(code as INT)
as code from HDFS(
+ "uri" = "${uri}",
+ "fs.defaultFS"= "${defaultFS}",
+ "hadoop.username" = "${hdfsUserName}",
+ "format" = "${format}",
+ "strip_outer_array" = "false",
+ "read_json_by_line" = "true") order by id; """
+
+ // test insert into select
+ def testTable = "test_hdfs_tvf"
+ sql "DROP TABLE IF EXISTS ${testTable}"
+ def result1 = sql """ CREATE TABLE IF NOT EXISTS ${testTable}
+ (
+ id int,
+ city varchar(50),
+ code int
+ )
+ COMMENT "test hdfs tvf table"
+ DISTRIBUTED BY HASH(id) BUCKETS 32
+ PROPERTIES("replication_num" = "1"); """
+
+ assertTrue(result1.size() == 1)
+ assertTrue(result1[0].size() == 1)
+ assertTrue(result1[0][0] == 0, "Create table should update 0 rows")
+
+ uri = "${defaultFS}" +
"/user/doris/json_format_test/nest_json.json"
+ format = "json"
+ def result2 = sql """ insert into ${testTable}(id,city,code)
+ select cast (id as INT) as id, city, cast (code as INT) as
code
+ from HDFS(
+ "uri" = "${uri}",
+ "fs.defaultFS"= "${defaultFS}",
+ "hadoop.username" = "${hdfsUserName}",
+ "format" = "${format}",
+ "strip_outer_array" = "false",
+ "read_json_by_line" = "true",
+ "json_root" = "\$.item") """
+
+ sql "sync"
+ assertTrue(result2[0][0] == 5, "Insert should update 12 rows")
+ qt_insert """ select * from test_hdfs_tvf order by id; """
+ } finally {
+ sql """ADMIN SET FRONTEND CONFIG ("enable_new_load_scan_node" =
"false");"""
+ }
+ }
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]