This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new d2efc619b0 [Enchancement](statistics) Show histogram statistics, show
specified column statistics (#18657)
d2efc619b0 is described below
commit d2efc619b0aa4c18c290044bc53a5ef83d37f6b1
Author: ElvinWei <[email protected]>
AuthorDate: Fri Apr 14 22:36:40 2023 +0800
[Enchancement](statistics) Show histogram statistics, show specified column
statistics (#18657)
---
fe/fe-core/src/main/cup/sql_parser.cup | 9 ++-
...olumnStatsStmt.java => ShowColumnHistStmt.java} | 83 +++++++++++++---------
.../apache/doris/analysis/ShowColumnStatsStmt.java | 41 +++++++++--
.../java/org/apache/doris/qe/ShowExecutor.java | 30 +++++---
.../java/org/apache/doris/statistics/Bucket.java | 51 +++++--------
.../org/apache/doris/statistics/Histogram.java | 26 +++++--
.../apache/doris/statistics/HistogramBuilder.java | 2 +-
.../doris/statistics/StatisticsRepository.java | 26 ++++++-
.../org/apache/doris/statistics/HistogramTest.java | 28 ++++----
9 files changed, 190 insertions(+), 106 deletions(-)
diff --git a/fe/fe-core/src/main/cup/sql_parser.cup
b/fe/fe-core/src/main/cup/sql_parser.cup
index 9fdeaab511..a860b74d4f 100644
--- a/fe/fe-core/src/main/cup/sql_parser.cup
+++ b/fe/fe-core/src/main/cup/sql_parser.cup
@@ -3970,9 +3970,14 @@ show_param ::=
RESULT = new ShowSyncJobStmt(dbName);
:}
/* show column stats */
- | KW_COLUMN KW_STATS table_name:tbl opt_partition_names:partitionNames
+ | KW_COLUMN KW_STATS table_name:tbl opt_col_list:cols
opt_partition_names:partitionNames
{:
- RESULT = new ShowColumnStatsStmt(tbl, partitionNames);
+ RESULT = new ShowColumnStatsStmt(tbl, cols, partitionNames);
+ :}
+ /* show column histogram */
+ | KW_COLUMN KW_HISTOGRAM table_name:tbl opt_col_list:cols
+ {:
+ RESULT = new ShowColumnHistStmt(tbl, cols);
:}
/* show table creation statement */
| KW_TABLE KW_CREATION opt_db:db opt_wild_where
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnHistStmt.java
similarity index 61%
copy from
fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java
copy to
fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnHistStmt.java
index 557a5a8590..20b5dbbd0d 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnHistStmt.java
@@ -22,46 +22,49 @@ import org.apache.doris.catalog.DatabaseIf;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.ScalarType;
import org.apache.doris.catalog.TableIf;
-import org.apache.doris.common.AnalysisException;
+import org.apache.doris.catalog.Type;
+import org.apache.doris.common.ErrorCode;
import org.apache.doris.common.ErrorReport;
import org.apache.doris.common.Pair;
import org.apache.doris.common.UserException;
import org.apache.doris.common.util.Util;
import org.apache.doris.datasource.CatalogIf;
+import org.apache.doris.mysql.privilege.PrivPredicate;
+import org.apache.doris.qe.ConnectContext;
import org.apache.doris.qe.ShowResultSet;
import org.apache.doris.qe.ShowResultSetMetaData;
-import org.apache.doris.statistics.ColumnStatistic;
+import org.apache.doris.statistics.Histogram;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
import java.util.List;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.Set;
+import java.util.stream.Collectors;
-public class ShowColumnStatsStmt extends ShowStmt {
+public class ShowColumnHistStmt extends ShowStmt {
private static final ImmutableList<String> TITLE_NAMES =
new ImmutableList.Builder<String>()
.add("column_name")
- .add("count")
- .add("ndv")
- .add("num_null")
- .add("data_size")
- .add("avg_size_byte")
- .add("min")
- .add("max")
- .add("min_expr")
- .add("max_expr")
+ .add("data_type")
+ .add("sample_rate")
+ .add("num_buckets")
+ .add("buckets")
.build();
private final TableName tableName;
- private final PartitionNames partitionNames;
+ private final List<String> columnNames;
private TableIf table;
- public ShowColumnStatsStmt(TableName tableName, PartitionNames
partitionNames) {
+ public ShowColumnHistStmt(TableName tableName, List<String> columnNames) {
this.tableName = tableName;
- this.partitionNames = partitionNames;
+ this.columnNames = columnNames;
}
public TableName getTableName() {
@@ -72,12 +75,7 @@ public class ShowColumnStatsStmt extends ShowStmt {
public void analyze(Analyzer analyzer) throws UserException {
super.analyze(analyzer);
tableName.analyze(analyzer);
- if (partitionNames != null) {
- partitionNames.analyze(analyzer);
- if (partitionNames.getPartitionNames().size() > 1) {
- throw new AnalysisException("Only one partition name could be
specified");
- }
- }
+
// disallow external catalog
Util.prohibitExternalCatalog(tableName.getCtl(),
this.getClass().getSimpleName());
CatalogIf<DatabaseIf> catalog =
Env.getCurrentEnv().getCatalogMgr().getCatalog(tableName.getCtl());
@@ -92,6 +90,23 @@ public class ShowColumnStatsStmt extends ShowStmt {
if (table == null) {
ErrorReport.reportAnalysisException("Table: {} not exists",
tableName.getTbl());
}
+
+ if (!Env.getCurrentEnv().getAccessManager()
+ .checkTblPriv(ConnectContext.get(), tableName.getDb(),
tableName.getTbl(), PrivPredicate.SHOW)) {
+
ErrorReport.reportAnalysisException(ErrorCode.ERR_TABLEACCESS_DENIED_ERROR,
"Permission denied",
+ ConnectContext.get().getQualifiedUser(),
ConnectContext.get().getRemoteIP(),
+ tableName.getDb() + ": " + tableName.getTbl());
+ }
+
+ if (columnNames != null) {
+ Optional<Column> nullColumn = columnNames.stream()
+ .map(table::getColumn)
+ .filter(Objects::isNull)
+ .findFirst();
+ if (nullColumn.isPresent()) {
+ ErrorReport.reportAnalysisException("Column: {} not exists",
nullColumn.get());
+ }
+ }
}
@Override
@@ -108,29 +123,29 @@ public class ShowColumnStatsStmt extends ShowStmt {
return table;
}
- public ShowResultSet constructResultSet(List<Pair<String,
ColumnStatistic>> columnStatistics) {
+ public ShowResultSet constructResultSet(List<Pair<String, Histogram>>
columnStatistics) {
List<List<String>> result = Lists.newArrayList();
columnStatistics.forEach(p -> {
- if (p.second == ColumnStatistic.UNKNOWN) {
+ if (p.second == null || p.second.dataType == Type.NULL) {
return;
}
List<String> row = Lists.newArrayList();
row.add(p.first);
- row.add(String.valueOf(p.second.count));
- row.add(String.valueOf(p.second.ndv));
- row.add(String.valueOf(p.second.numNulls));
- row.add(String.valueOf(p.second.dataSize));
- row.add(String.valueOf(p.second.avgSizeByte));
- row.add(String.valueOf(p.second.minValue));
- row.add(String.valueOf(p.second.maxValue));
- row.add(String.valueOf(p.second.minExpr == null ? "N/A" :
p.second.minExpr.toSql()));
- row.add(String.valueOf(p.second.maxExpr == null ? "N/A" :
p.second.maxExpr.toSql()));
+ row.add(String.valueOf(p.second.dataType));
+ row.add(String.valueOf(p.second.sampleRate));
+ row.add(String.valueOf(p.second.numBuckets));
+ row.add(Histogram.getBucketsJson(p.second.buckets).toString());
result.add(row);
});
+
return new ShowResultSet(getMetaData(), result);
}
- public PartitionNames getPartitionNames() {
- return partitionNames;
+ public Set<String> getColumnNames() {
+ if (columnNames != null) {
+ return Sets.newHashSet(columnNames);
+ }
+ return table.getColumns().stream()
+ .map(Column::getName).collect(Collectors.toSet());
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java
index 557a5a8590..fd67316df4 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java
@@ -23,19 +23,27 @@ import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.ScalarType;
import org.apache.doris.catalog.TableIf;
import org.apache.doris.common.AnalysisException;
+import org.apache.doris.common.ErrorCode;
import org.apache.doris.common.ErrorReport;
import org.apache.doris.common.Pair;
import org.apache.doris.common.UserException;
import org.apache.doris.common.util.Util;
import org.apache.doris.datasource.CatalogIf;
+import org.apache.doris.mysql.privilege.PrivPredicate;
+import org.apache.doris.qe.ConnectContext;
import org.apache.doris.qe.ShowResultSet;
import org.apache.doris.qe.ShowResultSetMetaData;
import org.apache.doris.statistics.ColumnStatistic;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
import java.util.List;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.Set;
+import java.util.stream.Collectors;
public class ShowColumnStatsStmt extends ShowStmt {
@@ -49,18 +57,18 @@ public class ShowColumnStatsStmt extends ShowStmt {
.add("avg_size_byte")
.add("min")
.add("max")
- .add("min_expr")
- .add("max_expr")
.build();
private final TableName tableName;
+ private final List<String> columnNames;
private final PartitionNames partitionNames;
private TableIf table;
- public ShowColumnStatsStmt(TableName tableName, PartitionNames
partitionNames) {
+ public ShowColumnStatsStmt(TableName tableName, List<String> columnNames,
PartitionNames partitionNames) {
this.tableName = tableName;
+ this.columnNames = columnNames;
this.partitionNames = partitionNames;
}
@@ -92,6 +100,23 @@ public class ShowColumnStatsStmt extends ShowStmt {
if (table == null) {
ErrorReport.reportAnalysisException("Table: {} not exists",
tableName.getTbl());
}
+
+ if (!Env.getCurrentEnv().getAccessManager()
+ .checkTblPriv(ConnectContext.get(), tableName.getDb(),
tableName.getTbl(), PrivPredicate.SHOW)) {
+
ErrorReport.reportAnalysisException(ErrorCode.ERR_TABLEACCESS_DENIED_ERROR,
"Permission denied",
+ ConnectContext.get().getQualifiedUser(),
ConnectContext.get().getRemoteIP(),
+ tableName.getDb() + ": " + tableName.getTbl());
+ }
+
+ if (columnNames != null) {
+ Optional<Column> nullColumn = columnNames.stream()
+ .map(table::getColumn)
+ .filter(Objects::isNull)
+ .findFirst();
+ if (nullColumn.isPresent()) {
+ ErrorReport.reportAnalysisException("Column: {} not exists",
nullColumn.get());
+ }
+ }
}
@Override
@@ -121,8 +146,6 @@ public class ShowColumnStatsStmt extends ShowStmt {
row.add(String.valueOf(p.second.numNulls));
row.add(String.valueOf(p.second.dataSize));
row.add(String.valueOf(p.second.avgSizeByte));
- row.add(String.valueOf(p.second.minValue));
- row.add(String.valueOf(p.second.maxValue));
row.add(String.valueOf(p.second.minExpr == null ? "N/A" :
p.second.minExpr.toSql()));
row.add(String.valueOf(p.second.maxExpr == null ? "N/A" :
p.second.maxExpr.toSql()));
result.add(row);
@@ -133,4 +156,12 @@ public class ShowColumnStatsStmt extends ShowStmt {
public PartitionNames getPartitionNames() {
return partitionNames;
}
+
+ public Set<String> getColumnNames() {
+ if (columnNames != null) {
+ return Sets.newHashSet(columnNames);
+ }
+ return table.getColumns().stream()
+ .map(Column::getName).collect(Collectors.toSet());
+ }
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
index 0c240e525f..4c9757616e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
@@ -36,6 +36,7 @@ import org.apache.doris.analysis.ShowCatalogRecycleBinStmt;
import org.apache.doris.analysis.ShowCatalogStmt;
import org.apache.doris.analysis.ShowClusterStmt;
import org.apache.doris.analysis.ShowCollationStmt;
+import org.apache.doris.analysis.ShowColumnHistStmt;
import org.apache.doris.analysis.ShowColumnStatsStmt;
import org.apache.doris.analysis.ShowColumnStmt;
import org.apache.doris.analysis.ShowCreateCatalogStmt;
@@ -185,6 +186,7 @@ import org.apache.doris.mtmv.metadata.MTMVJob;
import org.apache.doris.mtmv.metadata.MTMVTask;
import org.apache.doris.mysql.privilege.PrivPredicate;
import org.apache.doris.statistics.ColumnStatistic;
+import org.apache.doris.statistics.Histogram;
import org.apache.doris.statistics.StatisticsRepository;
import org.apache.doris.system.Backend;
import org.apache.doris.system.Diagnoser;
@@ -378,6 +380,8 @@ public class ShowExecutor {
handleShowSqlBlockRule();
} else if (stmt instanceof ShowColumnStatsStmt) {
handleShowColumnStats();
+ } else if (stmt instanceof ShowColumnHistStmt) {
+ handleShowColumnHist();
} else if (stmt instanceof ShowTableCreationStmt) {
handleShowTableCreation();
} else if (stmt instanceof ShowLastInsertStmt) {
@@ -2301,20 +2305,15 @@ public class ShowExecutor {
ShowColumnStatsStmt showColumnStatsStmt = (ShowColumnStatsStmt) stmt;
TableName tableName = showColumnStatsStmt.getTableName();
TableIf tableIf = showColumnStatsStmt.getTable();
- if (!Env.getCurrentEnv().getAccessManager()
- .checkTblPriv(ConnectContext.get(), tableName.getDb(),
tableName.getTbl(), PrivPredicate.SHOW)) {
-
ErrorReport.reportAnalysisException(ErrorCode.ERR_TABLEACCESS_DENIED_ERROR,
"Permission denied",
- ConnectContext.get().getQualifiedUser(),
ConnectContext.get().getRemoteIP(),
- tableName.getDb() + ": " + tableName.getTbl());
- }
List<Pair<String, ColumnStatistic>> columnStatistics = new
ArrayList<>();
+ Set<String> columnNames = showColumnStatsStmt.getColumnNames();
PartitionNames partitionNames =
showColumnStatsStmt.getPartitionNames();
- for (Column column : tableIf.getColumns()) {
- String colName = column.getName();
+
+ for (String colName : columnNames) {
if (partitionNames == null) {
ColumnStatistic columnStatistic =
StatisticsRepository.queryColumnStatisticsByName(tableIf.getId(), colName);
- columnStatistics.add(Pair.of(column.getName(),
columnStatistic));
+ columnStatistics.add(Pair.of(colName, columnStatistic));
} else {
columnStatistics.addAll(StatisticsRepository.queryColumnStatisticsByPartitions(tableName,
colName,
showColumnStatsStmt.getPartitionNames().getPartitionNames())
@@ -2326,6 +2325,19 @@ public class ShowExecutor {
resultSet = showColumnStatsStmt.constructResultSet(columnStatistics);
}
+ public void handleShowColumnHist() {
+ ShowColumnHistStmt showColumnHistStmt = (ShowColumnHistStmt) stmt;
+ TableIf tableIf = showColumnHistStmt.getTable();
+ Set<String> columnNames = showColumnHistStmt.getColumnNames();
+
+ List<Pair<String, Histogram>> columnStatistics = columnNames.stream()
+ .map(colName -> Pair.of(colName,
+
StatisticsRepository.queryColumnHistogramByName(tableIf.getId(), colName)))
+ .collect(Collectors.toList());
+
+ resultSet = showColumnHistStmt.constructResultSet(columnStatistics);
+ }
+
public void handleShowSqlBlockRule() throws AnalysisException {
ShowSqlBlockRuleStmt showStmt = (ShowSqlBlockRuleStmt) stmt;
List<List<String>> rows = Lists.newArrayList();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/Bucket.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/Bucket.java
index 8a63b4b31d..3137ffbde2 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/Bucket.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/Bucket.java
@@ -17,6 +17,7 @@
package org.apache.doris.statistics;
+import org.apache.doris.analysis.LiteralExpr;
import org.apache.doris.catalog.Type;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.statistics.util.StatisticsUtil;
@@ -31,6 +32,10 @@ public class Bucket {
public double preSum;
public double ndv;
+ // For display only.
+ public LiteralExpr lowerExpr;
+ public LiteralExpr upperExpr;
+
public Bucket() {
}
@@ -42,44 +47,15 @@ public class Bucket {
this.ndv = ndv;
}
- public double getLower() {
- return lower;
- }
-
- public void setLower(double lower) {
+ public Bucket(double lower, double upper, double count, double preSum,
double ndv,
+ LiteralExpr lowerExpr, LiteralExpr upperExpr) {
this.lower = lower;
- }
-
- public double getUpper() {
- return upper;
- }
-
- public void setUpper(double upper) {
this.upper = upper;
- }
-
- public double getCount() {
- return count;
- }
-
- public void setCount(int count) {
this.count = count;
- }
-
- public double getPreSum() {
- return preSum;
- }
-
- public void setPreSum(int preSum) {
this.preSum = preSum;
- }
-
- public double getNdv() {
- return ndv;
- }
-
- public void setNdv(int ndv) {
this.ndv = ndv;
+ this.lowerExpr = lowerExpr;
+ this.upperExpr = upperExpr;
}
public static Bucket deserializeFromJson(Type datatype, String json)
throws AnalysisException {
@@ -90,6 +66,11 @@ public class Bucket {
bucket.count = bucketJson.get("count").getAsInt();
bucket.preSum = bucketJson.get("pre_sum").getAsInt();
bucket.ndv = bucketJson.get("ndv").getAsInt();
+
+ // LowerExpr and upperExpr for display only.
+ bucket.lowerExpr = StatisticsUtil.readableValue(datatype,
bucketJson.get("lower").getAsString());
+ bucket.upperExpr = StatisticsUtil.readableValue(datatype,
bucketJson.get("upper").getAsString());
+
return bucket;
}
@@ -99,8 +80,8 @@ public class Bucket {
}
JsonObject bucketJson = new JsonObject();
- bucketJson.addProperty("upper", bucket.upper);
- bucketJson.addProperty("lower", bucket.lower);
+ bucketJson.addProperty("lower_expr",
bucket.lowerExpr.getStringValue());
+ bucketJson.addProperty("upper_expr",
bucket.upperExpr.getStringValue());
bucketJson.addProperty("count", bucket.count);
bucketJson.addProperty("pre_sum", bucket.preSum);
bucketJson.addProperty("ndv", bucket.ndv);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/Histogram.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/Histogram.java
index d9324975d4..0592a241e1 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/Histogram.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/Histogram.java
@@ -33,6 +33,7 @@ import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.parquet.Strings;
+import java.util.Collections;
import java.util.List;
public class Histogram {
@@ -44,13 +45,18 @@ public class Histogram {
public final List<Bucket> buckets;
- public Histogram(Type dataType, double sampleRate, List<Bucket> buckets) {
+ public final int numBuckets;
+
+ public Histogram(Type dataType, double sampleRate, int numBuckets,
List<Bucket> buckets) {
this.dataType = dataType;
this.sampleRate = sampleRate;
+ this.numBuckets = numBuckets;
this.buckets = buckets;
}
-
+ public static Histogram UNKNOWN = new
HistogramBuilder().setDataType(Type.NULL)
+
.setSampleRate(0).setNumBuckets(0).setBuckets(Collections.emptyList())
+ .build();
// TODO: use thrift
public static Histogram fromResultRow(ResultRow resultRow) {
@@ -151,18 +157,26 @@ public class Histogram {
histogramJson.addProperty("sample_rate", histogram.sampleRate);
histogramJson.addProperty("num_buckets", histogram.buckets.size());
- JsonArray bucketsJsonArray = new JsonArray();
-
histogram.buckets.stream().map(Bucket::serializeToJsonObj).forEach(bucketsJsonArray::add);
- histogramJson.add("buckets", bucketsJsonArray);
+ JsonArray bucketsJson = getBucketsJson(histogram.buckets);
+ histogramJson.add("buckets", bucketsJson);
return histogramJson.toString();
}
+ public static JsonArray getBucketsJson(List<Bucket> buckets) {
+ if (buckets == null) {
+ return null;
+ }
+ JsonArray bucketsJsonArray = new JsonArray();
+
buckets.stream().map(Bucket::serializeToJsonObj).forEach(bucketsJsonArray::add);
+ return bucketsJsonArray;
+ }
+
public double size() {
if (CollectionUtils.isEmpty(buckets)) {
return 0;
}
Bucket lastBucket = buckets.get(buckets.size() - 1);
- return lastBucket.getPreSum() + lastBucket.getCount();
+ return lastBucket.preSum + lastBucket.count;
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramBuilder.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramBuilder.java
index 41ce66b94d..3ffc79bfb1 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramBuilder.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramBuilder.java
@@ -64,6 +64,6 @@ public class HistogramBuilder {
}
public Histogram build() {
- return new Histogram(dataType, sampleRate, buckets);
+ return new Histogram(dataType, sampleRate, numBuckets, buckets);
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
index 865dd3ca2c..4ce673a063 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
@@ -56,6 +56,9 @@ public class StatisticsRepository {
private static final String FULL_QUALIFIED_COLUMN_STATISTICS_NAME =
FULL_QUALIFIED_DB_NAME + "."
+ "`" + StatisticConstants.STATISTIC_TBL_NAME + "`";
+ private static final String FULL_QUALIFIED_COLUMN_HISTOGRAM_NAME =
FULL_QUALIFIED_DB_NAME + "."
+ + "`" + StatisticConstants.HISTOGRAM_TBL_NAME + "`";
+
private static final String FULL_QUALIFIED_ANALYSIS_JOB_TABLE_NAME =
FULL_QUALIFIED_DB_NAME + "."
+ "`" + StatisticConstants.ANALYSIS_JOB_TABLE + "`";
@@ -67,6 +70,10 @@ public class StatisticsRepository {
+ FULL_QUALIFIED_COLUMN_STATISTICS_NAME
+ " WHERE `id` IN (${idList})";
+ private static final String FETCH_COLUMN_HISTOGRAM_TEMPLATE = "SELECT *
FROM "
+ + FULL_QUALIFIED_COLUMN_HISTOGRAM_NAME
+ + " WHERE `id` = '${id}'";
+
private static final String PERSIST_ANALYSIS_TASK_SQL_TEMPLATE = "INSERT
INTO "
+ FULL_QUALIFIED_ANALYSIS_JOB_TABLE_NAME + " VALUES(${jobId},
${taskId}, '${catalogName}', '${dbName}',"
+ "'${tblName}','${colName}', '${indexId}','${jobType}',
'${analysisType}', "
@@ -121,10 +128,19 @@ public class StatisticsRepository {
}
public static ResultRow queryColumnStatisticById(long tblId, String
colName) {
+ return queryColumnStatisticById(tblId, colName, false);
+ }
+
+ public static ResultRow queryColumnHistogramById(long tblId, String
colName) {
+ return queryColumnStatisticById(tblId, colName, true);
+ }
+
+ private static ResultRow queryColumnStatisticById(long tblId, String
colName, boolean isHistogram) {
Map<String, String> map = new HashMap<>();
String id = constructId(tblId, -1, colName);
map.put("id", id);
- List<ResultRow> rows =
StatisticsUtil.executeQuery(FETCH_COLUMN_STATISTIC_TEMPLATE, map);
+ List<ResultRow> rows = isHistogram ?
StatisticsUtil.executeQuery(FETCH_COLUMN_HISTOGRAM_TEMPLATE, map) :
+ StatisticsUtil.executeQuery(FETCH_COLUMN_STATISTIC_TEMPLATE,
map);
int size = rows.size();
if (size > 1) {
throw new IllegalStateException(String.format("id: %s should be
unique, but return more than one row", id));
@@ -143,6 +159,14 @@ public class StatisticsRepository {
return rows == null ? Collections.emptyList() : rows;
}
+ public static Histogram queryColumnHistogramByName(long tableId, String
colName) {
+ ResultRow resultRow = queryColumnHistogramById(tableId, colName);
+ if (resultRow == null) {
+ return Histogram.UNKNOWN;
+ }
+ return Histogram.fromResultRow(resultRow);
+ }
+
private static String constructId(Object... params) {
StringJoiner stringJoiner = new StringJoiner("-");
for (Object param : params) {
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/statistics/HistogramTest.java
b/fe/fe-core/src/test/java/org/apache/doris/statistics/HistogramTest.java
index 515e3c0d3f..b5ca8d8095 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/statistics/HistogramTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/HistogramTest.java
@@ -21,11 +21,11 @@ import org.apache.doris.analysis.LiteralExpr;
import org.apache.doris.catalog.PrimitiveType;
import org.apache.doris.catalog.Type;
import org.apache.doris.common.AnalysisException;
+import org.apache.doris.statistics.util.StatisticsUtil;
import com.google.gson.JsonArray;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
-import org.apache.commons.math3.util.Precision;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
@@ -69,18 +69,17 @@ class HistogramTest {
List<Bucket> buckets = histogramUnderTest.buckets;
Assertions.assertEquals(5, buckets.size());
- double expectedLower = LiteralExpr.create("2022-09-21 17:30:29",
-
Objects.requireNonNull(Type.fromPrimitiveType(PrimitiveType.DATETIME))).getDoubleValue();
- double expectedUpper = LiteralExpr.create("2022-09-21 22:30:29",
-
Objects.requireNonNull(Type.fromPrimitiveType(PrimitiveType.DATETIME))).getDoubleValue();
+ LiteralExpr expectedLower = LiteralExpr.create("2022-09-21 17:30:29",
+
Objects.requireNonNull(Type.fromPrimitiveType(PrimitiveType.DATETIME)));
+ LiteralExpr expectedUpper = LiteralExpr.create("2022-09-21 22:30:29",
+
Objects.requireNonNull(Type.fromPrimitiveType(PrimitiveType.DATETIME)));
boolean flag = false;
for (Bucket bucket : buckets) {
- double lower = bucket.getLower();
- double upper = bucket.getUpper();
- if (Precision.equals(expectedLower, lower, 0.01)
- && Precision.equals(expectedUpper, upper, 0.01)) {
+ LiteralExpr lower = bucket.lowerExpr;
+ LiteralExpr upper = bucket.upperExpr;
+ if (expectedLower.equals(lower) && expectedUpper.equals(upper)) {
flag = true;
break;
}
@@ -96,6 +95,8 @@ class HistogramTest {
String typeStr = histogramJson.get("data_type").getAsString();
Assertions.assertEquals("DATETIME", typeStr);
+ Type datatype = Type.fromPrimitiveType(PrimitiveType.valueOf(typeStr));
+ Assertions.assertNotNull(datatype);
int numBuckets = histogramJson.get("num_buckets").getAsInt();
Assertions.assertEquals(5, numBuckets);
@@ -116,13 +117,14 @@ class HistogramTest {
for (int i = 0; i < jsonArray.size(); i++) {
JsonObject bucketJson = jsonArray.get(i).getAsJsonObject();
- double lower = bucketJson.get("lower").getAsDouble();
- double upper = bucketJson.get("upper").getAsDouble();
+ LiteralExpr lower = StatisticsUtil.readableValue(datatype,
+ bucketJson.get("lower_expr").getAsString());
+ LiteralExpr upper = StatisticsUtil.readableValue(datatype,
+ bucketJson.get("upper_expr").getAsString());
int count = bucketJson.get("count").getAsInt();
int preSum = bucketJson.get("pre_sum").getAsInt();
int ndv = bucketJson.get("ndv").getAsInt();
- if (Precision.equals(expectedLower.getDoubleValue(), lower, 0.01)
- && Precision.equals(expectedUpper.getDoubleValue(), upper,
0.01)
+ if (expectedLower.equals(lower) && expectedUpper.equals(upper)
&& count == 9 && preSum == 0 && ndv == 1) {
flag = true;
break;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]