This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new c8316d14a0 [cherry-pick][branch-2.0](load) support line delimiter for
old broker load in 2.0 (#22225)
c8316d14a0 is described below
commit c8316d14a0da50d95ee25659a1e7d99fac74232c
Author: Siyang Tang <[email protected]>
AuthorDate: Thu Jul 27 09:48:45 2023 +0800
[cherry-pick][branch-2.0](load) support line delimiter for old broker load
in 2.0 (#22225)
---
.../Load/BROKER-LOAD.md | 5 +
.../Load/BROKER-LOAD.md | 5 +
fe/fe-core/src/main/cup/sql_parser.cup | 3 +-
.../org/apache/doris/analysis/DataDescription.java | 26 +++
.../org/apache/doris/analysis/S3TvfLoadStmt.java | 23 +--
.../ddl/csv_s3_case_line_delimiter_create.sql | 25 +++
.../ddl/csv_s3_case_line_delimiter_drop.sql | 1 +
.../load_p2/broker_load/test_broker_load.groovy | 180 +++++++++++----------
.../broker_load/test_tvf_based_broker_load.groovy | 133 +++------------
9 files changed, 200 insertions(+), 201 deletions(-)
diff --git
a/docs/en/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/BROKER-LOAD.md
b/docs/en/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/BROKER-LOAD.md
index 50b859113a..59956abc76 100644
---
a/docs/en/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/BROKER-LOAD.md
+++
b/docs/en/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/BROKER-LOAD.md
@@ -65,6 +65,7 @@ WITH BROKER broker_name
INTO TABLE `table_name`
[PARTITION (p1, p2, ...)]
[COLUMNS TERMINATED BY "column_separator"]
+ [LINES TERMINATED BY "line_delimiter"]
[FORMAT AS "file_type"]
[(column_list)]
[COLUMNS FROM PATH AS (c1, c2, ...)]
@@ -96,6 +97,10 @@ WITH BROKER broker_name
Specifies the column separator. Only valid in CSV format. Only single-byte
delimiters can be specified.
+ - `LINES TERMINATED BY`
+
+ Specifies the line delimiter. Only valid in CSV format. Only single-byte
delimiters can be specified.
+
- `FORMAT AS`
Specifies the file type, CSV, PARQUET and ORC formats are supported.
Default is CSV.
diff --git
a/docs/zh-CN/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/BROKER-LOAD.md
b/docs/zh-CN/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/BROKER-LOAD.md
index 73387d12da..eb294dcacf 100644
---
a/docs/zh-CN/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/BROKER-LOAD.md
+++
b/docs/zh-CN/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/BROKER-LOAD.md
@@ -66,6 +66,7 @@ WITH BROKER broker_name
[PARTITION (p1, p2, ...)]
[COLUMNS TERMINATED BY "column_separator"]
[FORMAT AS "file_type"]
+ [LINES TERMINATED BY "line_delimiter"]
[(column_list)]
[COLUMNS FROM PATH AS (c1, c2, ...)]
[SET (column_mapping)]
@@ -96,6 +97,10 @@ WITH BROKER broker_name
指定列分隔符。仅在 CSV 格式下有效。仅能指定单字节分隔符。
+ - `LINES TERMINATED BY`
+
+ 指定行分隔符。仅在 CSV 格式下有效。仅能指定单字节分隔符。
+
- `FORMAT AS`
指定文件类型,支持 CSV、PARQUET 和 ORC 格式。默认为 CSV。
diff --git a/fe/fe-core/src/main/cup/sql_parser.cup
b/fe/fe-core/src/main/cup/sql_parser.cup
index 86d2dc97f7..a15bbc5a3c 100644
--- a/fe/fe-core/src/main/cup/sql_parser.cup
+++ b/fe/fe-core/src/main/cup/sql_parser.cup
@@ -2398,6 +2398,7 @@ data_desc ::=
KW_INTO KW_TABLE ident:tableName
opt_partition_names:partitionNames
opt_field_term:colSep
+ opt_line_term:lineDelimiter
opt_file_format:fileFormat
opt_col_list:colList
opt_columns_from_path:columnsFromPath
@@ -2408,7 +2409,7 @@ data_desc ::=
sequence_col_clause:sequenceColName
opt_properties:properties
{:
- RESULT = new DataDescription(tableName, partitionNames, files,
colList, colSep, fileFormat,
+ RESULT = new DataDescription(tableName, partitionNames, files,
colList, colSep, lineDelimiter, fileFormat,
columnsFromPath, isNeg, colMappingList, preFilterExpr, whereExpr,
mergeType, deleteExpr, sequenceColName, properties);
:}
| opt_merge_type:mergeType KW_DATA KW_FROM KW_TABLE ident:srcTableName
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/DataDescription.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/DataDescription.java
index 658605abc5..88044394fe 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DataDescription.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DataDescription.java
@@ -168,11 +168,32 @@ public class DataDescription implements
InsertStmt.DataDesc {
isNegative, columnMappingList, null, null,
LoadTask.MergeType.APPEND, null, null, null);
}
+ public DataDescription(String tableName,
+ PartitionNames partitionNames,
+ List<String> filePaths,
+ List<String> columns,
+ Separator columnSeparator,
+ String fileFormat,
+ List<String> columnsFromPath,
+ boolean isNegative,
+ List<Expr> columnMappingList,
+ Expr fileFilterExpr,
+ Expr whereExpr,
+ LoadTask.MergeType mergeType,
+ Expr deleteCondition,
+ String sequenceColName,
+ Map<String, String> properties) {
+ this(tableName, partitionNames, filePaths, columns, columnSeparator,
null,
+ fileFormat, columnsFromPath, isNegative, columnMappingList,
fileFilterExpr, whereExpr,
+ mergeType, deleteCondition, sequenceColName, properties);
+ }
+
public DataDescription(String tableName,
PartitionNames partitionNames,
List<String> filePaths,
List<String> columns,
Separator columnSeparator,
+ Separator lineDelimiter,
String fileFormat,
List<String> columnsFromPath,
boolean isNegative,
@@ -188,6 +209,7 @@ public class DataDescription implements InsertStmt.DataDesc
{
this.filePaths = filePaths;
this.fileFieldNames = columns;
this.columnSeparator = columnSeparator;
+ this.lineDelimiter = lineDelimiter;
this.fileFormat = fileFormat;
this.columnsFromPath = columnsFromPath;
this.isNegative = isNegative;
@@ -597,6 +619,10 @@ public class DataDescription implements
InsertStmt.DataDesc {
return lineDelimiter.getSeparator();
}
+ public Separator getLineDelimiterObj() {
+ return lineDelimiter;
+ }
+
public void setLineDelimiter(Separator lineDelimiter) {
this.lineDelimiter = lineDelimiter;
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/S3TvfLoadStmt.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/S3TvfLoadStmt.java
index 7c5eec7729..99b23fcc61 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/S3TvfLoadStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/S3TvfLoadStmt.java
@@ -155,15 +155,8 @@ public class S3TvfLoadStmt extends NativeInsertStmt {
final String format =
Optional.ofNullable(dataDescription.getFileFormat()).orElse(DEFAULT_FORMAT);
params.put(ExternalFileTableValuedFunction.FORMAT, format);
if (isCsvFormat(format)) {
- final Separator separator =
dataDescription.getColumnSeparatorObj();
- if (separator != null) {
- try {
- separator.analyze();
- } catch (AnalysisException e) {
- throw new DdlException("failed to create s3 tvf ref", e);
- }
- params.put(ExternalFileTableValuedFunction.COLUMN_SEPARATOR,
dataDescription.getColumnSeparator());
- }
+ parseSeparator(dataDescription.getColumnSeparatorObj(), params);
+ parseSeparator(dataDescription.getLineDelimiterObj(), params);
}
Preconditions.checkState(!brokerDesc.isMultiLoadBroker(), "do not
support multi broker load currently");
@@ -181,6 +174,18 @@ public class S3TvfLoadStmt extends NativeInsertStmt {
}
}
+ private static void parseSeparator(Separator separator, Map<String,
String> tvfParams) throws DdlException {
+ if (separator == null) {
+ return;
+ }
+ try {
+ separator.analyze();
+ } catch (AnalysisException e) {
+ throw new DdlException(String.format("failed to parse
separator:%s", separator), e);
+ }
+ tvfParams.put(ExternalFileTableValuedFunction.COLUMN_SEPARATOR,
separator.getSeparator());
+ }
+
private static boolean isCsvFormat(String format) {
return Strings.isNullOrEmpty(format) ||
StringUtils.equalsIgnoreCase(format, FORMAT_CSV);
}
diff --git
a/regression-test/suites/load_p2/broker_load/ddl/csv_s3_case_line_delimiter_create.sql
b/regression-test/suites/load_p2/broker_load/ddl/csv_s3_case_line_delimiter_create.sql
new file mode 100644
index 0000000000..12d9fd92fc
--- /dev/null
+++
b/regression-test/suites/load_p2/broker_load/ddl/csv_s3_case_line_delimiter_create.sql
@@ -0,0 +1,25 @@
+CREATE TABLE csv_s3_case_line_delimiter (
+ l_shipdate DATE NOT NULL,
+ l_orderkey bigint NOT NULL,
+ l_linenumber int not null,
+ l_partkey int NOT NULL,
+ l_suppkey int not null,
+ l_quantity decimal(15, 2) NOT
NULL,
+ l_extendedprice decimal(15, 2)
NOT NULL,
+ l_discount decimal(15, 2) NOT
NULL,
+ l_tax decimal(15, 2) NOT
NULL,
+ l_returnflag VARCHAR(1) NOT NULL,
+ l_linestatus VARCHAR(1) NOT NULL,
+ l_commitdate DATE NOT NULL,
+ l_receiptdate DATE NOT NULL,
+ l_shipinstruct VARCHAR(25) NOT
NULL,
+ l_shipmode VARCHAR(10) NOT
NULL,
+ l_comment VARCHAR(44) NOT NULL
+)ENGINE=OLAP
+DUPLICATE KEY(`l_shipdate`, `l_orderkey`)
+COMMENT "OLAP"
+DISTRIBUTED BY HASH(`l_orderkey`) BUCKETS 96
+PROPERTIES (
+ "replication_num" = "1",
+ "colocate_with" = "lineitem_orders"
+);
\ No newline at end of file
diff --git
a/regression-test/suites/load_p2/broker_load/ddl/csv_s3_case_line_delimiter_drop.sql
b/regression-test/suites/load_p2/broker_load/ddl/csv_s3_case_line_delimiter_drop.sql
new file mode 100644
index 0000000000..f3ab15d05e
--- /dev/null
+++
b/regression-test/suites/load_p2/broker_load/ddl/csv_s3_case_line_delimiter_drop.sql
@@ -0,0 +1 @@
+DROP TABLE IF EXISTS csv_s3_case_line_delimiter
\ No newline at end of file
diff --git a/regression-test/suites/load_p2/broker_load/test_broker_load.groovy
b/regression-test/suites/load_p2/broker_load/test_broker_load.groovy
index d9b4cfea37..fb29639908 100644
--- a/regression-test/suites/load_p2/broker_load/test_broker_load.groovy
+++ b/regression-test/suites/load_p2/broker_load/test_broker_load.groovy
@@ -48,7 +48,8 @@ suite("test_broker_load_p2", "p2") {
"orc_s3_case7", // table column uppercase * load column
lowercase * orc file lowercase
"orc_s3_case8", // table column uppercase * load column
uppercase * orc file uppercase
"orc_s3_case9", // table column uppercase * load column
lowercase * orc file uppercase
- ]
+ "csv_s3_case_line_delimiter" // csv format table with
special line delimiter
+ ]
def paths = ["s3://doris-build-1308700295/regression/load/data/part*",
"s3://doris-build-1308700295/regression/load/data/part*",
"s3://doris-build-1308700295/regression/load/data/part*",
@@ -80,51 +81,53 @@ suite("test_broker_load_p2", "p2") {
"s3://doris-build-1308700295/regression/load/data/orc/hits_10k_rows_lowercase.orc",
"s3://doris-build-1308700295/regression/load/data/orc/hits_10k_rows_uppercase.orc",
"s3://doris-build-1308700295/regression/load/data/orc/hits_10k_rows_uppercase.orc",
+
"s3://doris-build-1308700295/regression/line_delimiter/lineitem_0x7.csv.gz"
]
def columns_list = ["""p_partkey, p_name, p_mfgr, p_brand, p_type, p_size,
p_container, p_retailprice, p_comment""",
- """p_partkey, p_name, p_mfgr, p_brand, p_type, p_size,
p_container, p_retailprice, p_comment""",
- """p_partkey, p_name, p_mfgr, p_brand, p_type, p_size,
p_container, p_retailprice, p_comment""",
- """p_partkey, p_name, p_size""",
- """p_partkey""",
- """p_partkey""",
- """p_partkey, p_size""",
- """p_partkey""",
- """p_partkey, p_size""",
- """p_partkey, p_size""",
- """p_partkey, p_name, p_mfgr, p_brand, p_type, p_size,
p_container, p_retailprice, p_comment""",
- """p_partkey, p_name, p_mfgr, p_brand, p_type, p_size,
p_container, p_retailprice, p_comment""",
- """p_partkey, p_name, p_mfgr, p_brand, p_type, p_size,
p_container, p_retailprice, p_comment""",
- """p_partkey, p_name, p_mfgr, p_brand, p_type, p_size,
p_container, p_retailprice, p_comment, col1""",
- """p_partkey, p_name, p_mfgr, p_brand, p_type, p_size,
p_container, p_retailprice, p_comment, x1""",
- """p_partkey, p_name, p_mfgr, p_brand, p_type, p_size,
p_container, p_retailprice, p_comment""",
- """col1, col2, col3, col4""",
- """p_partkey, p_name, p_mfgr, x1""",
- """p_partkey, p_name, p_mfgr, p_brand""",
- """p_partkey, p_name, p_mfgr, p_brand""",
- """p_name, p_mfgr""",
- """""",
-
"""watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,advengineid
[...]
- //TODO: comment blow 8 rows after jibing fix
-
"""watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,advengineid
[...]
-
"""watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,advengineid
[...]
-
"""watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,advengineid
[...]
-
"""watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,advengineid
[...]
-
"""watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,advengineid
[...]
-
"""watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,advengineid
[...]
-
"""watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,advengineid
[...]
-
"""watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,advengineid
[...]
- //TODO: uncomment blow 8 rows after jibing fix
- //
"""watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,advengin
[...]
- //
"""WATCHID,JAVAENABLE,TITLE,GOODEVENT,EVENTTIME,EVENTDATE,COUNTERID,CLIENTIP,REGIONID,USERID,COUNTERCLASS,OS,USERAGENT,URL,REFERER,ISREFRESH,REFERERCATEGORYID,REFERERREGIONID,URLCATEGORYID,URLREGIONID,RESOLUTIONWIDTH,RESOLUTIONHEIGHT,RESOLUTIONDEPTH,FLASHMAJOR,FLASHMINOR,FLASHMINOR2,NETMAJOR,NETMINOR,USERAGENTMAJOR,USERAGENTMINOR,COOKIEENABLE,JAVASCRIPTENABLE,ISMOBILE,MOBILEPHONE,MOBILEPHONEMODEL,PARAMS,IPNETWORKID,TRAFICSOURCEID,SEARCHENGINEID,SEARCHPHRASE,ADVENGIN
[...]
- //
"""watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,advengin
[...]
- //
"""WATCHID,JAVAENABLE,TITLE,GOODEVENT,EVENTTIME,EVENTDATE,COUNTERID,CLIENTIP,REGIONID,USERID,COUNTERCLASS,OS,USERAGENT,URL,REFERER,ISREFRESH,REFERERCATEGORYID,REFERERREGIONID,URLCATEGORYID,URLREGIONID,RESOLUTIONWIDTH,RESOLUTIONHEIGHT,RESOLUTIONDEPTH,FLASHMAJOR,FLASHMINOR,FLASHMINOR2,NETMAJOR,NETMINOR,USERAGENTMAJOR,USERAGENTMINOR,COOKIEENABLE,JAVASCRIPTENABLE,ISMOBILE,MOBILEPHONE,MOBILEPHONEMODEL,PARAMS,IPNETWORKID,TRAFICSOURCEID,SEARCHENGINEID,SEARCHPHRASE,ADVENGIN
[...]
- //
"""WATCHID,JAVAENABLE,TITLE,GOODEVENT,EVENTTIME,EVENTDATE,COUNTERID,CLIENTIP,REGIONID,USERID,COUNTERCLASS,OS,USERAGENT,URL,REFERER,ISREFRESH,REFERERCATEGORYID,REFERERREGIONID,URLCATEGORYID,URLREGIONID,RESOLUTIONWIDTH,RESOLUTIONHEIGHT,RESOLUTIONDEPTH,FLASHMAJOR,FLASHMINOR,FLASHMINOR2,NETMAJOR,NETMINOR,USERAGENTMAJOR,USERAGENTMINOR,COOKIEENABLE,JAVASCRIPTENABLE,ISMOBILE,MOBILEPHONE,MOBILEPHONEMODEL,PARAMS,IPNETWORKID,TRAFICSOURCEID,SEARCHENGINEID,SEARCHPHRASE,ADVENGIN
[...]
- //
"""watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,advengin
[...]
- //
"""WATCHID,JAVAENABLE,TITLE,GOODEVENT,EVENTTIME,EVENTDATE,COUNTERID,CLIENTIP,REGIONID,USERID,COUNTERCLASS,OS,USERAGENT,URL,REFERER,ISREFRESH,REFERERCATEGORYID,REFERERREGIONID,URLCATEGORYID,URLREGIONID,RESOLUTIONWIDTH,RESOLUTIONHEIGHT,RESOLUTIONDEPTH,FLASHMAJOR,FLASHMINOR,FLASHMINOR2,NETMAJOR,NETMINOR,USERAGENTMAJOR,USERAGENTMINOR,COOKIEENABLE,JAVASCRIPTENABLE,ISMOBILE,MOBILEPHONE,MOBILEPHONEMODEL,PARAMS,IPNETWORKID,TRAFICSOURCEID,SEARCHENGINEID,SEARCHPHRASE,ADVENGIN
[...]
- //
"""watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,advengin
[...]
- ]
- def column_in_paths = ["", "", "", "", "", "", "", "", "", "", "", "",
"COLUMNS FROM PATH AS (city)", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", ""]
- def preceding_filters = ["", "", "", "", "", "", "", "", "", "", "",
"preceding filter p_size < 10", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", ""]
+ """p_partkey, p_name, p_mfgr, p_brand, p_type, p_size,
p_container, p_retailprice, p_comment""",
+ """p_partkey, p_name, p_mfgr, p_brand, p_type, p_size,
p_container, p_retailprice, p_comment""",
+ """p_partkey, p_name, p_size""",
+ """p_partkey""",
+ """p_partkey""",
+ """p_partkey, p_size""",
+ """p_partkey""",
+ """p_partkey, p_size""",
+ """p_partkey, p_size""",
+ """p_partkey, p_name, p_mfgr, p_brand, p_type, p_size,
p_container, p_retailprice, p_comment""",
+ """p_partkey, p_name, p_mfgr, p_brand, p_type, p_size,
p_container, p_retailprice, p_comment""",
+ """p_partkey, p_name, p_mfgr, p_brand, p_type, p_size,
p_container, p_retailprice, p_comment""",
+ """p_partkey, p_name, p_mfgr, p_brand, p_type, p_size,
p_container, p_retailprice, p_comment, col1""",
+ """p_partkey, p_name, p_mfgr, p_brand, p_type, p_size,
p_container, p_retailprice, p_comment, x1""",
+ """p_partkey, p_name, p_mfgr, p_brand, p_type, p_size,
p_container, p_retailprice, p_comment""",
+ """col1, col2, col3, col4""",
+ """p_partkey, p_name, p_mfgr, x1""",
+ """p_partkey, p_name, p_mfgr, p_brand""",
+ """p_partkey, p_name, p_mfgr, p_brand""",
+ """p_name, p_mfgr""",
+ """""",
+
"""watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,adveng
[...]
+ //TODO: comment blow 8 rows after jibing fix
+
"""watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,adveng
[...]
+
"""watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,adveng
[...]
+
"""watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,adveng
[...]
+
"""watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,adveng
[...]
+
"""watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,adveng
[...]
+
"""watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,adveng
[...]
+
"""watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,adveng
[...]
+
"""watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,adveng
[...]
+ """"""
+ //TODO: uncomment blow 8 rows after jibing fix
+ //
"""watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,
[...]
+ //
"""WATCHID,JAVAENABLE,TITLE,GOODEVENT,EVENTTIME,EVENTDATE,COUNTERID,CLIENTIP,REGIONID,USERID,COUNTERCLASS,OS,USERAGENT,URL,REFERER,ISREFRESH,REFERERCATEGORYID,REFERERREGIONID,URLCATEGORYID,URLREGIONID,RESOLUTIONWIDTH,RESOLUTIONHEIGHT,RESOLUTIONDEPTH,FLASHMAJOR,FLASHMINOR,FLASHMINOR2,NETMAJOR,NETMINOR,USERAGENTMAJOR,USERAGENTMINOR,COOKIEENABLE,JAVASCRIPTENABLE,ISMOBILE,MOBILEPHONE,MOBILEPHONEMODEL,PARAMS,IPNETWORKID,TRAFICSOURCEID,SEARCHENGINEID,SEARCHPHRASE,
[...]
+ //
"""watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,
[...]
+ //
"""WATCHID,JAVAENABLE,TITLE,GOODEVENT,EVENTTIME,EVENTDATE,COUNTERID,CLIENTIP,REGIONID,USERID,COUNTERCLASS,OS,USERAGENT,URL,REFERER,ISREFRESH,REFERERCATEGORYID,REFERERREGIONID,URLCATEGORYID,URLREGIONID,RESOLUTIONWIDTH,RESOLUTIONHEIGHT,RESOLUTIONDEPTH,FLASHMAJOR,FLASHMINOR,FLASHMINOR2,NETMAJOR,NETMINOR,USERAGENTMAJOR,USERAGENTMINOR,COOKIEENABLE,JAVASCRIPTENABLE,ISMOBILE,MOBILEPHONE,MOBILEPHONEMODEL,PARAMS,IPNETWORKID,TRAFICSOURCEID,SEARCHENGINEID,SEARCHPHRASE,
[...]
+ //
"""WATCHID,JAVAENABLE,TITLE,GOODEVENT,EVENTTIME,EVENTDATE,COUNTERID,CLIENTIP,REGIONID,USERID,COUNTERCLASS,OS,USERAGENT,URL,REFERER,ISREFRESH,REFERERCATEGORYID,REFERERREGIONID,URLCATEGORYID,URLREGIONID,RESOLUTIONWIDTH,RESOLUTIONHEIGHT,RESOLUTIONDEPTH,FLASHMAJOR,FLASHMINOR,FLASHMINOR2,NETMAJOR,NETMINOR,USERAGENTMAJOR,USERAGENTMINOR,COOKIEENABLE,JAVASCRIPTENABLE,ISMOBILE,MOBILEPHONE,MOBILEPHONEMODEL,PARAMS,IPNETWORKID,TRAFICSOURCEID,SEARCHENGINEID,SEARCHPHRASE,
[...]
+ //
"""watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,
[...]
+ //
"""WATCHID,JAVAENABLE,TITLE,GOODEVENT,EVENTTIME,EVENTDATE,COUNTERID,CLIENTIP,REGIONID,USERID,COUNTERCLASS,OS,USERAGENT,URL,REFERER,ISREFRESH,REFERERCATEGORYID,REFERERREGIONID,URLCATEGORYID,URLREGIONID,RESOLUTIONWIDTH,RESOLUTIONHEIGHT,RESOLUTIONDEPTH,FLASHMAJOR,FLASHMINOR,FLASHMINOR2,NETMAJOR,NETMINOR,USERAGENTMAJOR,USERAGENTMINOR,COOKIEENABLE,JAVASCRIPTENABLE,ISMOBILE,MOBILEPHONE,MOBILEPHONEMODEL,PARAMS,IPNETWORKID,TRAFICSOURCEID,SEARCHENGINEID,SEARCHPHRASE,
[...]
+ //
"""watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,
[...]
+ ]
+ def column_in_paths = ["", "", "", "", "", "", "", "", "", "", "", "",
"COLUMNS FROM PATH AS (city)", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", ""]
+ def preceding_filters = ["", "", "", "", "", "", "", "", "", "", "",
"preceding filter p_size < 10", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", ""]
def set_values = ["",
"",
"SET(comment=p_comment, retailprice=p_retailprice,
container=p_container, size=p_size, type=p_type, brand=p_brand, mfgr=p_mfgr,
name=p_name, partkey=p_partkey)",
@@ -155,9 +158,12 @@ suite("test_broker_load_p2", "p2") {
"",
"",
"",
+ "",
""
]
- def where_exprs = ["", "", "", "", "", "", "", "", "", "", "", "where
p_partkey>10", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", ""]
+ def where_exprs = ["", "", "", "", "", "", "", "", "", "", "", "where
p_partkey>10", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", ""]
+
+ def line_delimiters = ["", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"\u0007"]
def etl_info = ["unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000",
"unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000",
@@ -190,7 +196,8 @@ suite("test_broker_load_p2", "p2") {
"unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=10000",
"unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=10000",
"unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=10000",
- ]
+ "\\N"
+ ]
def task_info = ["cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400;
max_filter_ratio:0.0",
"cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400;
max_filter_ratio:0.0",
@@ -223,50 +230,59 @@ suite("test_broker_load_p2", "p2") {
"cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400;
max_filter_ratio:0.0",
"cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400;
max_filter_ratio:0.0",
"cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400;
max_filter_ratio:0.0",
+ "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400;
max_filter_ratio:0.0"
]
def error_msg = ["",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "[INTERNAL_ERROR]failed to find default value expr for
slot: x1",
- "",
- "",
- "[INTERNAL_ERROR]failed to find default value expr for
slot: x1",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- ]
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "[INTERNAL_ERROR]failed to find default value expr for
slot: x1",
+ "",
+ "",
+ "[INTERNAL_ERROR]failed to find default value expr for
slot: x1",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ ""
+ ]
String ak = getS3AK()
String sk = getS3SK()
String enabled = context.config.otherConfigs.get("enableBrokerLoad")
def do_load_job = {uuid, path, table, columns, column_in_path,
preceding_filter,
- set_value, where_expr ->
- String columns_str = ("$columns" != "") ? "($columns)" : "";
- String format_str = table.startsWith("orc_s3_case") ? "ORC" : "PARQUET"
+ set_value, where_expr, line_delimiter ->
+ String columns_str = ("$columns" != "") ? "($columns)" : "";
+ String format_str
+ if (table.startsWith("orc_s3_case")) {
+ format_str = "ORC"
+ } else if (table.startsWith("csv")) {
+ format_str = "CSV"
+ } else {
+ format_str = "PARQUET"
+ }
sql """
- LOAD LABEL $uuid (
+ LOAD LABEL $uuid (
DATA INFILE("$path")
INTO TABLE $table
FORMAT AS $format_str
@@ -275,6 +291,7 @@ suite("test_broker_load_p2", "p2") {
$preceding_filter
$set_value
$where_expr
+ $line_delimiter
)
WITH S3 (
"AWS_ACCESS_KEY" = "$ak",
@@ -300,7 +317,7 @@ suite("test_broker_load_p2", "p2") {
def uuid = UUID.randomUUID().toString().replace("-", "0")
uuids.add(uuid)
do_load_job.call(uuid, paths[i], table, columns_list[i],
column_in_paths[i], preceding_filters[i],
- set_values[i], where_exprs[i])
+ set_values[i], where_exprs[i], line_delimiters[i])
i++
}
@@ -351,4 +368,3 @@ suite("test_broker_load_p2", "p2") {
}
}
}
-
diff --git
a/regression-test/suites/load_p2/broker_load/test_tvf_based_broker_load.groovy
b/regression-test/suites/load_p2/broker_load/test_tvf_based_broker_load.groovy
index 964e2c926a..665730d3d5 100644
---
a/regression-test/suites/load_p2/broker_load/test_tvf_based_broker_load.groovy
+++
b/regression-test/suites/load_p2/broker_load/test_tvf_based_broker_load.groovy
@@ -29,7 +29,7 @@ suite("test_tvf_based_broker_load_p2", "p2") {
"set7",
"null_default",
"filter",
- // "path_column",
+ "path_column",
"parquet_s3_case1", // col1 not in file but in table, will
load default value for it.
"parquet_s3_case2", // x1 not in file, not in table, will
throw "col not found" error.
"parquet_s3_case3", // p_comment not in table but in file,
load normally.
@@ -48,6 +48,7 @@ suite("test_tvf_based_broker_load_p2", "p2") {
"orc_s3_case7", // table column uppercase * load column
lowercase * orc file lowercase
"orc_s3_case8", // table column uppercase * load column
uppercase * orc file uppercase
"orc_s3_case9", // table column uppercase * load column
lowercase * orc file uppercase
+ "csv_s3_case_line_delimiter" // csv format table with
special line delimiter
]
def paths = ["s3://doris-build-1308700295/regression/load/data/part*",
"s3://doris-build-1308700295/regression/load/data/part*",
@@ -61,7 +62,7 @@ suite("test_tvf_based_broker_load_p2", "p2") {
"s3://doris-build-1308700295/regression/load/data/part*",
"s3://doris-build-1308700295/regression/load/data/part*",
"s3://doris-build-1308700295/regression/load/data/part*",
- //
"s3://doris-build-1308700295/regression/load/data/path/*/part*",
+
"s3://doris-build-1308700295/regression/load/data/path/*/part*",
"s3://doris-build-1308700295/regression/load/data/part*",
"s3://doris-build-1308700295/regression/load/data/part*",
"s3://doris-build-1308700295/regression/load/data/part*",
@@ -80,6 +81,7 @@ suite("test_tvf_based_broker_load_p2", "p2") {
"s3://doris-build-1308700295/regression/load/data/orc/hits_10k_rows_lowercase.orc",
"s3://doris-build-1308700295/regression/load/data/orc/hits_10k_rows_uppercase.orc",
"s3://doris-build-1308700295/regression/load/data/orc/hits_10k_rows_uppercase.orc",
+
"s3://doris-build-1308700295/regression/line_delimiter/lineitem_0x7.csv.gz"
]
def columns_list = ["""p_partkey, p_name, p_mfgr, p_brand, p_type, p_size,
p_container, p_retailprice, p_comment""",
"""p_partkey, p_name, p_mfgr, p_brand, p_type, p_size,
p_container, p_retailprice, p_comment""",
@@ -93,7 +95,7 @@ suite("test_tvf_based_broker_load_p2", "p2") {
"""p_partkey, p_size""",
"""p_partkey, p_name, p_mfgr, p_brand, p_type, p_size,
p_container, p_retailprice, p_comment""",
"""p_partkey, p_name, p_mfgr, p_brand, p_type, p_size,
p_container, p_retailprice, p_comment""",
- // """p_partkey, p_name, p_mfgr, p_brand, p_type,
p_size, p_container, p_retailprice, p_comment""",
+ """p_partkey, p_name, p_mfgr, p_brand, p_type, p_size,
p_container, p_retailprice, p_comment""",
"""p_partkey, p_name, p_mfgr, p_brand, p_type, p_size,
p_container, p_retailprice, p_comment, col1""",
"""p_partkey, p_name, p_mfgr, p_brand, p_type, p_size,
p_container, p_retailprice, p_comment, x1""",
"""p_partkey, p_name, p_mfgr, p_brand, p_type, p_size,
p_container, p_retailprice, p_comment""",
@@ -113,6 +115,7 @@ suite("test_tvf_based_broker_load_p2", "p2") {
"""watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,adveng
[...]
"""watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,adveng
[...]
"""watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,adveng
[...]
+ """"""
//TODO: uncomment blow 8 rows after jibing fix
//
"""watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,
[...]
//
"""WATCHID,JAVAENABLE,TITLE,GOODEVENT,EVENTTIME,EVENTDATE,COUNTERID,CLIENTIP,REGIONID,USERID,COUNTERCLASS,OS,USERAGENT,URL,REFERER,ISREFRESH,REFERERCATEGORYID,REFERERREGIONID,URLCATEGORYID,URLREGIONID,RESOLUTIONWIDTH,RESOLUTIONHEIGHT,RESOLUTIONDEPTH,FLASHMAJOR,FLASHMINOR,FLASHMINOR2,NETMAJOR,NETMINOR,USERAGENTMAJOR,USERAGENTMINOR,COOKIEENABLE,JAVASCRIPTENABLE,ISMOBILE,MOBILEPHONE,MOBILEPHONEMODEL,PARAMS,IPNETWORKID,TRAFICSOURCEID,SEARCHENGINEID,SEARCHPHRASE,
[...]
@@ -123,8 +126,8 @@ suite("test_tvf_based_broker_load_p2", "p2") {
//
"""WATCHID,JAVAENABLE,TITLE,GOODEVENT,EVENTTIME,EVENTDATE,COUNTERID,CLIENTIP,REGIONID,USERID,COUNTERCLASS,OS,USERAGENT,URL,REFERER,ISREFRESH,REFERERCATEGORYID,REFERERREGIONID,URLCATEGORYID,URLREGIONID,RESOLUTIONWIDTH,RESOLUTIONHEIGHT,RESOLUTIONDEPTH,FLASHMAJOR,FLASHMINOR,FLASHMINOR2,NETMAJOR,NETMINOR,USERAGENTMAJOR,USERAGENTMINOR,COOKIEENABLE,JAVASCRIPTENABLE,ISMOBILE,MOBILEPHONE,MOBILEPHONEMODEL,PARAMS,IPNETWORKID,TRAFICSOURCEID,SEARCHENGINEID,SEARCHPHRASE,
[...]
//
"""watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,
[...]
]
- def column_in_paths = ["", "", "", "", "", "", "", "", "", "", "", ""/*,
"COLUMNS FROM PATH AS (city)"*/, "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", ""]
- def preceding_filters = ["", "", "", "", "", "", "", "", "", "", "",
"preceding filter p_size < 10"/*, ""*/, "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", ""]
+ def column_in_paths = ["", "", "", "", "", "", "", "", "", "", "", "",
"COLUMNS FROM PATH AS (city)", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", ""]
+ def preceding_filters = ["", "", "", "", "", "", "", "", "", "", "",
"preceding filter p_size < 10", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", ""]
def set_values = ["",
"",
"SET(comment=p_comment, retailprice=p_retailprice,
container=p_container, size=p_size, type=p_type, brand=p_brand, mfgr=p_mfgr,
name=p_name, partkey=p_partkey)",
@@ -137,7 +140,7 @@ suite("test_tvf_based_broker_load_p2", "p2") {
"set(partsize = p_partkey + p_size)",
"",
"",
- // "",
+ "",
"",
"",
"",
@@ -155,116 +158,27 @@ suite("test_tvf_based_broker_load_p2", "p2") {
"",
"",
"",
+ "",
""
]
- def where_exprs = ["", "", "", "", "", "", "", "", "", "", "", "where
p_partkey>10"/*, ""*/, "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", ""]
-
- def etl_info = ["unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000",
- "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000",
- "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000",
- "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000",
- "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000",
- "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000",
- "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000",
- "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000",
- "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000",
- "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000",
- "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000",
- "unselected.rows=163706; dpp.abnorm.ALL=0;
dpp.norm.ALL=36294",
- "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000",
- "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000",
- "\\N",
- "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000",
- "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000",
- "\\N",
- "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000",
- "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000",
- "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000",
- "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=4096",
- "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=100000",
- "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=10000",
- "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=10000",
- "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=10000",
- "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=10000",
- "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=10000",
- "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=10000",
- "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=10000",
- "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=10000",
- ]
-
- def task_info = ["cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400;
max_filter_ratio:0.0",
- "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400;
max_filter_ratio:0.0",
- "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400;
max_filter_ratio:0.0",
- "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400;
max_filter_ratio:0.0",
- "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400;
max_filter_ratio:0.0",
- "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400;
max_filter_ratio:0.0",
- "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400;
max_filter_ratio:0.0",
- "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400;
max_filter_ratio:0.0",
- "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400;
max_filter_ratio:0.0",
- "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400;
max_filter_ratio:0.0",
- "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400;
max_filter_ratio:0.0",
- "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400;
max_filter_ratio:0.0",
- "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400;
max_filter_ratio:0.0",
- "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400;
max_filter_ratio:0.0",
- "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400;
max_filter_ratio:0.0",
- "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400;
max_filter_ratio:0.0",
- "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400;
max_filter_ratio:0.0",
- "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400;
max_filter_ratio:0.0",
- "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400;
max_filter_ratio:0.0",
- "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400;
max_filter_ratio:0.0",
- "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400;
max_filter_ratio:0.0",
- "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400;
max_filter_ratio:0.0",
- "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400;
max_filter_ratio:0.0",
- "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400;
max_filter_ratio:0.0",
- "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400;
max_filter_ratio:0.0",
- "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400;
max_filter_ratio:0.0",
- "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400;
max_filter_ratio:0.0",
- "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400;
max_filter_ratio:0.0",
- "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400;
max_filter_ratio:0.0",
- "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400;
max_filter_ratio:0.0",
- "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400;
max_filter_ratio:0.0",
- ]
-
- def error_msg = ["",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "[INTERNAL_ERROR]failed to find default value expr for
slot: x1",
- "",
- "",
- "[INTERNAL_ERROR]failed to find default value expr for
slot: x1",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- ]
+ def where_exprs = ["", "", "", "", "", "", "", "", "", "", "", "where
p_partkey>10", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "",""]
+ def line_delimiters = ["", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"\u0007"]
String ak = getS3AK()
String sk = getS3SK()
String enabled = context.config.otherConfigs.get("enableBrokerLoad")
def do_load_job = { uuid, path, table, columns, column_in_path,
preceding_filter,
- set_value, where_expr ->
+ set_value, where_expr, line_delimiter ->
String columns_str = ("$columns" != "") ? "($columns)" : "";
- String format_str = table.startsWith("orc_s3_case") ? "ORC" : "PARQUET"
+ String format_str
+ if (table.startsWith("orc_s3_case")) {
+ format_str = "ORC"
+ } else if (table.startsWith("csv")) {
+ format_str = "CSV"
+ } else {
+ format_str = "PARQUET"
+ }
sql """
LOAD LABEL $uuid (
DATA INFILE("$path")
@@ -275,6 +189,7 @@ suite("test_tvf_based_broker_load_p2", "p2") {
$preceding_filter
$set_value
$where_expr
+ $line_delimiter
)
WITH S3 (
"AWS_ACCESS_KEY" = "$ak",
@@ -299,7 +214,7 @@ suite("test_tvf_based_broker_load_p2", "p2") {
def uuid = UUID.randomUUID().toString().replace("-", "0")
uuids.add(uuid)
do_load_job.call(uuid, paths[i], table, columns_list[i],
column_in_paths[i], preceding_filters[i],
- set_values[i], where_exprs[i])
+ set_values[i], where_exprs[i], line_delimiters[i])
i++
}
@@ -325,4 +240,4 @@ suite("test_tvf_based_broker_load_p2", "p2") {
sql """ set enable_unified_load=false; """
}
}
-}
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]