This is an automated email from the ASF dual-hosted git repository.
lijibing pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 31603089082 Limit the max string length to 1024 while collecting
column stats to control BE memory usage. (#32470) (#33084)
31603089082 is described below
commit 31603089082ca2a511227473727a6756f9b85e1d
Author: Jibing-Li <[email protected]>
AuthorDate: Mon Apr 1 12:01:23 2024 +0800
Limit the max string length to 1024 while collecting column stats to
control BE memory usage. (#32470) (#33084)
---
.../apache/doris/statistics/BaseAnalysisTask.java | 24 ++++++++++++++++++++++
.../apache/doris/statistics/HMSAnalysisTask.java | 6 +++++-
.../apache/doris/statistics/OlapAnalysisTask.java | 6 +++++-
.../doris/statistics/OlapAnalysisTaskTest.java | 12 +++++------
4 files changed, 40 insertions(+), 8 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
index 68767843507..f871e8761a5 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
@@ -82,6 +82,30 @@ public abstract class BaseAnalysisTask {
+ "NOW() "
+ "FROM `${catalogName}`.`${dbName}`.`${tblName}` ${index}
${sampleHints} ${limit}";
+ protected static final String DUJ1_ANALYZE_STRING_TEMPLATE = "SELECT "
+ + "CONCAT('${tblId}', '-', '${idxId}', '-', '${colId}') AS `id`, "
+ + "${catalogId} AS `catalog_id`, "
+ + "${dbId} AS `db_id`, "
+ + "${tblId} AS `tbl_id`, "
+ + "${idxId} AS `idx_id`, "
+ + "'${colId}' AS `col_id`, "
+ + "NULL AS `part_id`, "
+ + "${rowCount} AS `row_count`, "
+ + "${ndvFunction} as `ndv`, "
+ + "IFNULL(SUM(IF(`t1`.`column_key` IS NULL, `t1`.`count`, 0)), 0)
* ${scaleFactor} as `null_count`, "
+ + "SUBSTRING(CAST(${min} AS STRING), 1, 1024) AS `min`, "
+ + "SUBSTRING(CAST(${max} AS STRING), 1, 1024) AS `max`, "
+ + "${dataSizeFunction} * ${scaleFactor} AS `data_size`, "
+ + "NOW() "
+ + "FROM ( "
+ + " SELECT t0.`colValue` as `column_key`, COUNT(1) as `count` "
+ + " FROM "
+ + " (SELECT SUBSTRING(CAST(`${colName}` AS STRING), 1, 1024) AS
`colValue` "
+ + " FROM `${catalogName}`.`${dbName}`.`${tblName}`
${index} "
+ + " ${sampleHints} ${limit}) as `t0` "
+ + " GROUP BY `t0`.`colValue` "
+ + ") as `t1` ";
+
protected static final String DUJ1_ANALYZE_TEMPLATE = "SELECT "
+ "CONCAT('${tblId}', '-', '${idxId}', '-', '${colId}') AS `id`, "
+ "${catalogId} AS `catalog_id`, "
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java
index 1fe827420c9..a90308a8000 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java
@@ -148,7 +148,11 @@ public class HMSAnalysisTask extends BaseAnalysisTask {
params.put("ndvFunction", "ROUND(NDV(`${colName}`) *
${scaleFactor})");
params.put("rowCount", "ROUND(count(1) * ${scaleFactor})");
} else {
- sb.append(DUJ1_ANALYZE_TEMPLATE);
+ if (col.getType().isStringType()) {
+ sb.append(DUJ1_ANALYZE_STRING_TEMPLATE);
+ } else {
+ sb.append(DUJ1_ANALYZE_TEMPLATE);
+ }
params.put("dataSizeFunction", getDataSizeFunction(col, true));
params.put("ndvFunction", getNdvFunction("ROUND(SUM(t1.count)
* ${scaleFactor})"));
params.put("rowCount", "ROUND(SUM(t1.count) *
${scaleFactor})");
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
index e6dd46e9fc7..60a3528afc8 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
@@ -163,7 +163,11 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
sql = stringSubstitutor.replace(LINEAR_ANALYZE_TEMPLATE);
} else {
params.put("dataSizeFunction", getDataSizeFunction(col, true));
- sql = stringSubstitutor.replace(DUJ1_ANALYZE_TEMPLATE);
+ if (col.getType().isStringType()) {
+ sql =
stringSubstitutor.replace(DUJ1_ANALYZE_STRING_TEMPLATE);
+ } else {
+ sql = stringSubstitutor.replace(DUJ1_ANALYZE_TEMPLATE);
+ }
}
LOG.info("Sample for column [{}]. Total rows [{}], rows to sample
[{}], scale factor [{}], "
+ "limited [{}], distribute column [{}], partition column
[{}], key column [{}], "
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
b/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
index 5d9d57406a3..75506b1c85a 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
@@ -159,11 +159,10 @@ public class OlapAnalysisTaskTest {
+ " IS NULL, `t1`.`count`, 0)), 0) * 5.0 as
`null_count`, "
+ "SUBSTRING(CAST('1' AS STRING), 1, 1024) AS `min`,"
+ " SUBSTRING(CAST('2' AS STRING), 1, 1024) AS `max`, "
- + "SUM(LENGTH(`column_key`) * count) * 5.0 AS
`data_size`, NOW() "
+ + "SUM(t1.count) * 4 * 5.0 AS `data_size`, NOW() "
+ "FROM ( SELECT t0.`${colName}` as `column_key`,
COUNT(1) "
- + "as `count` FROM (SELECT `${colName}` FROM "
- + "`catalogName`.`${dbName}`.`${tblName}` "
- + " limit 100) as `t0` GROUP BY `t0`.`${colName}`
) as `t1` ", sql);
+ + "as `count` FROM (SELECT `${colName}` FROM
`catalogName`.`${dbName}`.`${tblName}`"
+ + " limit 100) as `t0` GROUP BY
`t0`.`${colName}` ) as `t1` ", sql);
return;
}
};
@@ -183,7 +182,7 @@ public class OlapAnalysisTaskTest {
};
OlapAnalysisTask olapAnalysisTask = new OlapAnalysisTask();
- olapAnalysisTask.col = new Column("test", PrimitiveType.STRING);
+ olapAnalysisTask.col = new Column("test", PrimitiveType.INT);
olapAnalysisTask.tbl = tableIf;
AnalysisInfoBuilder analysisInfoBuilder = new AnalysisInfoBuilder();
analysisInfoBuilder.setJobType(AnalysisInfo.JobType.MANUAL);
@@ -322,7 +321,8 @@ public class OlapAnalysisTaskTest {
+ "SUBSTRING(CAST('1' AS STRING), 1, 1024) AS `min`, "
+ "SUBSTRING(CAST('2' AS STRING), 1, 1024) AS `max`, "
+ "SUM(LENGTH(`column_key`) * count) * 5.0 AS
`data_size`, NOW() "
- + "FROM ( SELECT t0.`${colName}` as `column_key`,
COUNT(1) as `count` FROM (SELECT `${colName}` FROM
`catalogName`.`${dbName}`.`${tblName}` limit 100) as `t0` GROUP BY
`t0`.`${colName}` ) as `t1` ", sql);
+ + "FROM ( SELECT t0.`colValue` as `column_key`,
COUNT(1) as `count` FROM "
+ + "(SELECT SUBSTRING(CAST(`${colName}` AS STRING), 1,
1024) AS `colValue` FROM `catalogName`.`${dbName}`.`${tblName}`
limit 100) as `t0` GROUP BY `t0`.`colValue` ) as `t1` ", sql);
return;
}
};
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]