This is an automated email from the ASF dual-hosted git repository.
lijibing pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new bf0d060578f branch-3.0: [improvement](statistics)Support auto analyze
columns that haven't been analyzed for a long time. #42399 (#45281)
bf0d060578f is described below
commit bf0d060578fc910a82096e19d6c0e7ac930d5828
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Wed Dec 11 16:48:25 2024 +0800
branch-3.0: [improvement](statistics)Support auto analyze columns that
haven't been analyzed for a long time. #42399 (#45281)
Cherry-picked from #42399
Co-authored-by: James <[email protected]>
---
.../main/java/org/apache/doris/common/Config.java | 2 +-
.../doris/analysis/ShowAutoAnalyzeJobsStmt.java | 4 +-
.../apache/doris/analysis/ShowColumnStatsStmt.java | 2 +
.../apache/doris/analysis/ShowTableStatsStmt.java | 9 +-
.../java/org/apache/doris/catalog/OlapTable.java | 11 --
.../org/apache/doris/statistics/AnalysisInfo.java | 9 +-
.../doris/statistics/AnalysisInfoBuilder.java | 9 +-
.../apache/doris/statistics/AnalysisManager.java | 7 +-
.../org/apache/doris/statistics/ColStatsMeta.java | 9 +-
.../org/apache/doris/statistics/JobPriority.java | 1 +
.../doris/statistics/StatisticsAutoCollector.java | 40 +++--
.../doris/statistics/StatisticsJobAppender.java | 66 +++++---
.../doris/statistics/StatisticsRepository.java | 3 +-
.../apache/doris/statistics/TableStatsMeta.java | 10 +-
.../doris/statistics/util/StatisticsUtil.java | 52 +++++-
.../statistics/StatisticsJobAppenderTest.java | 175 ++++++++++++++++-----
.../doris/statistics/util/StatisticsUtilTest.java | 168 ++++++++++++++++----
.../suites/statistics/analyze_stats.groovy | 33 +++-
18 files changed, 472 insertions(+), 138 deletions(-)
diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
index 568cd7c817e..fa98487d19f 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
@@ -2903,7 +2903,7 @@ public class Config extends ConfigBase {
"Columns that have not been collected within the specified
interval will trigger automatic analyze. "
+ "0 means not trigger."
})
- public static long auto_analyze_interval_seconds = 0;
+ public static long auto_analyze_interval_seconds = 86400; // 24 hours.
//==========================================================================
// begin of cloud config
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowAutoAnalyzeJobsStmt.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowAutoAnalyzeJobsStmt.java
index 9b07796df78..7cff1e2b949 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowAutoAnalyzeJobsStmt.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowAutoAnalyzeJobsStmt.java
@@ -40,7 +40,7 @@ import com.google.common.collect.ImmutableList;
* [TABLE]
* [
* WHERE
- * [PRIORITY = ["HIGH"|"MID"|"LOW"]]
+ * [PRIORITY = ["HIGH"|"MID"|"LOW"|"VERY_LOW"]]
* ]
*/
public class ShowAutoAnalyzeJobsStmt extends ShowStmt implements
NotFallbackInParser {
@@ -175,7 +175,7 @@ public class ShowAutoAnalyzeJobsStmt extends ShowStmt
implements NotFallbackInPa
if (!valid) {
throw new AnalysisException("Where clause should looks like: "
- + "PRIORITY = \"HIGH|MID|LOW\"");
+ + "PRIORITY = \"HIGH|MID|LOW|VERY_LOW\"");
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java
index 354b57bc55c..d180ef0d807 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java
@@ -70,6 +70,7 @@ public class ShowColumnStatsStmt extends ShowStmt implements
NotFallbackInParser
.add("updated_time")
.add("update_rows")
.add("last_analyze_row_count")
+ .add("last_analyze_version")
.build();
private static final ImmutableList<String> PARTITION_COLUMN_TITLE_NAMES =
@@ -185,6 +186,7 @@ public class ShowColumnStatsStmt extends ShowStmt
implements NotFallbackInParser
row.add(String.valueOf(p.second.updatedTime));
row.add(String.valueOf(colStatsMeta == null ? "N/A" :
colStatsMeta.updatedRows));
row.add(String.valueOf(colStatsMeta == null ? "N/A" :
colStatsMeta.rowCount));
+ row.add(String.valueOf(colStatsMeta == null ? "N/A" :
colStatsMeta.tableVersion));
result.add(row);
});
return new ShowResultSet(getMetaData(), result);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java
index ccffee3086d..ea9b96d0afe 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java
@@ -62,6 +62,7 @@ public class ShowTableStatsStmt extends ShowStmt implements
NotFallbackInParser
.add("new_partition")
.add("user_inject")
.add("enable_auto_analyze")
+ .add("last_analyze_time")
.build();
private static final ImmutableList<String> PARTITION_TITLE_NAMES =
@@ -230,6 +231,7 @@ public class ShowTableStatsStmt extends ShowStmt implements
NotFallbackInParser
row.add("");
row.add("");
row.add(String.valueOf(table.autoAnalyzeEnabled()));
+ row.add("");
result.add(row);
return new ShowResultSet(getMetaData(), result);
}
@@ -242,13 +244,16 @@ public class ShowTableStatsStmt extends ShowStmt
implements NotFallbackInParser
LocalDateTime dateTime =
LocalDateTime.ofInstant(Instant.ofEpochMilli(tableStatistic.updatedTime),
java.time.ZoneId.systemDefault());
- String formattedDateTime = dateTime.format(formatter);
- row.add(formattedDateTime);
+ LocalDateTime lastAnalyzeTime =
+
LocalDateTime.ofInstant(Instant.ofEpochMilli(tableStatistic.lastAnalyzeTime),
+ java.time.ZoneId.systemDefault());
+ row.add(dateTime.format(formatter));
row.add(tableStatistic.analyzeColumns().toString());
row.add(tableStatistic.jobType.toString());
row.add(String.valueOf(tableStatistic.partitionChanged.get()));
row.add(String.valueOf(tableStatistic.userInjected));
row.add(table == null ? "N/A" :
String.valueOf(table.autoAnalyzeEnabled()));
+ row.add(lastAnalyzeTime.format(formatter));
result.add(row);
return new ShowResultSet(getMetaData(), result);
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
index b940cc08156..de0adc82d17 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
@@ -874,17 +874,6 @@ public class OlapTable extends Table implements
MTMVRelatedTableIf, GsonPostProc
return columns;
}
- public List<Column> getMvColumns(boolean full) {
- List<Column> columns = Lists.newArrayList();
- for (Long indexId : indexIdToMeta.keySet()) {
- if (indexId == baseIndexId) {
- continue;
- }
- columns.addAll(getSchemaByIndexId(indexId, full));
- }
- return columns;
- }
-
public List<Column> getBaseSchemaKeyColumns() {
return getKeyColumnsByIndexId(baseIndexId);
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
index 6ec413821ea..58b2c3e3d1f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
@@ -183,6 +183,9 @@ public class AnalysisInfo implements Writable {
@SerializedName("updateRows")
public final long updateRows;
+ @SerializedName("tv")
+ public final long tableVersion;
+
public final Map<Long, Long> partitionUpdateRows = new
ConcurrentHashMap<>();
@SerializedName("tblUpdateTime")
@@ -206,8 +209,8 @@ public class AnalysisInfo implements Writable {
long lastExecTimeInMs, long timeCostInMs, AnalysisState state,
ScheduleType scheduleType,
boolean partitionOnly, boolean samplingPartition,
boolean isAllPartition, long partitionCount, CronExpression
cronExpression, boolean forceFull,
- boolean usingSqlForExternalTable, long tblUpdateTime, long
rowCount, boolean userInject,
- long updateRows, JobPriority priority, Map<Long, Long>
partitionUpdateRows, boolean enablePartition) {
+ boolean usingSqlForExternalTable, long tblUpdateTime, long
rowCount, boolean userInject, long updateRows,
+ long tableVersion, JobPriority priority, Map<Long, Long>
partitionUpdateRows, boolean enablePartition) {
this.jobId = jobId;
this.taskId = taskId;
this.taskIds = taskIds;
@@ -244,6 +247,7 @@ public class AnalysisInfo implements Writable {
this.rowCount = rowCount;
this.userInject = userInject;
this.updateRows = updateRows;
+ this.tableVersion = tableVersion;
this.priority = priority;
if (partitionUpdateRows != null) {
this.partitionUpdateRows.putAll(partitionUpdateRows);
@@ -292,6 +296,7 @@ public class AnalysisInfo implements Writable {
sj.add("rowCount: " + rowCount);
sj.add("userInject: " + userInject);
sj.add("updateRows: " + updateRows);
+ sj.add("tableVersion: " + tableVersion);
sj.add("priority: " + priority.name());
sj.add("enablePartition: " + enablePartition);
return sj.toString();
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java
index 73817363ef1..bbd0d616495 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java
@@ -63,6 +63,7 @@ public class AnalysisInfoBuilder {
private long rowCount;
private boolean userInject = false;
private long updateRows;
+ private long tableVersion;
private JobPriority priority;
private Map<Long, Long> partitionUpdateRows;
private boolean enablePartition;
@@ -104,6 +105,7 @@ public class AnalysisInfoBuilder {
rowCount = info.rowCount;
userInject = info.userInject;
updateRows = info.updateRows;
+ tableVersion = info.tableVersion;
priority = info.priority;
partitionUpdateRows = info.partitionUpdateRows;
enablePartition = info.enablePartition;
@@ -274,6 +276,11 @@ public class AnalysisInfoBuilder {
return this;
}
+ public AnalysisInfoBuilder setTableVersion(long tableVersion) {
+ this.tableVersion = tableVersion;
+ return this;
+ }
+
public AnalysisInfoBuilder setPriority(JobPriority priority) {
this.priority = priority;
return this;
@@ -295,7 +302,7 @@ public class AnalysisInfoBuilder {
sampleRows, maxBucketNum, periodTimeInMs, message,
lastExecTimeInMs, timeCostInMs, state, scheduleType,
partitionOnly, samplingPartition, isAllPartition,
partitionCount,
cronExpression, forceFull, usingSqlForExternalTable,
tblUpdateTime, rowCount, userInject, updateRows,
- priority, partitionUpdateRows, enablePartition);
+ tableVersion, priority, partitionUpdateRows, enablePartition);
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
index c2b20707f13..ece9daf2520 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
@@ -129,6 +129,7 @@ public class AnalysisManager implements Writable {
public final Map<TableName, Set<Pair<String, String>>> highPriorityJobs =
new LinkedHashMap<>();
public final Map<TableName, Set<Pair<String, String>>> midPriorityJobs =
new LinkedHashMap<>();
public final Map<TableName, Set<Pair<String, String>>> lowPriorityJobs =
new LinkedHashMap<>();
+ public final Map<TableName, Set<Pair<String, String>>> veryLowPriorityJobs
= new LinkedHashMap<>();
// Tracking running manually submitted async tasks, keep in mem only
protected final ConcurrentMap<Long, Map<Long, BaseAnalysisTask>>
analysisJobIdToTaskMap = new ConcurrentHashMap<>();
@@ -381,7 +382,7 @@ public class AnalysisManager implements Writable {
}
infoBuilder.setColName(stringJoiner.toString());
infoBuilder.setTaskIds(Lists.newArrayList());
- infoBuilder.setTblUpdateTime(System.currentTimeMillis());
+ infoBuilder.setTblUpdateTime(table.getUpdateTime());
// Empty table row count is 0. Call fetchRowCount() when getRowCount()
returns <= 0,
// because getRowCount may return <= 0 if cached is not loaded. This
is mainly for external table.
long rowCount = StatisticsUtil.isEmptyTable(table, analysisMethod) ? 0
:
@@ -389,6 +390,7 @@ public class AnalysisManager implements Writable {
infoBuilder.setRowCount(rowCount);
TableStatsMeta tableStatsStatus = findTableStatsStatus(table.getId());
infoBuilder.setUpdateRows(tableStatsStatus == null ? 0 :
tableStatsStatus.updatedRows.get());
+ infoBuilder.setTableVersion(table instanceof OlapTable ? ((OlapTable)
table).getVisibleVersion() : 0);
infoBuilder.setPriority(JobPriority.MANUAL);
infoBuilder.setPartitionUpdateRows(tableStatsStatus == null ? null :
tableStatsStatus.partitionUpdateRows);
infoBuilder.setEnablePartition(StatisticsUtil.enablePartitionAnalyze());
@@ -547,12 +549,15 @@ public class AnalysisManager implements Writable {
result.addAll(getPendingJobs(highPriorityJobs, JobPriority.HIGH,
tblName));
result.addAll(getPendingJobs(midPriorityJobs, JobPriority.MID,
tblName));
result.addAll(getPendingJobs(lowPriorityJobs, JobPriority.LOW,
tblName));
+ result.addAll(getPendingJobs(veryLowPriorityJobs,
JobPriority.VERY_LOW, tblName));
} else if (priority.equals(JobPriority.HIGH.name())) {
result.addAll(getPendingJobs(highPriorityJobs, JobPriority.HIGH,
tblName));
} else if (priority.equals(JobPriority.MID.name())) {
result.addAll(getPendingJobs(midPriorityJobs, JobPriority.MID,
tblName));
} else if (priority.equals(JobPriority.LOW.name())) {
result.addAll(getPendingJobs(lowPriorityJobs, JobPriority.LOW,
tblName));
+ } else if (priority.equals(JobPriority.VERY_LOW.name())) {
+ result.addAll(getPendingJobs(veryLowPriorityJobs,
JobPriority.VERY_LOW, tblName));
}
return result;
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsMeta.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsMeta.java
index 6cb2ced9286..78f51c2ac0c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsMeta.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsMeta.java
@@ -52,11 +52,15 @@ public class ColStatsMeta {
@SerializedName("rowCount")
public long rowCount;
+ @SerializedName("tv")
+ public long tableVersion;
+
@SerializedName("pur")
public ConcurrentMap<Long, Long> partitionUpdateRows = new
ConcurrentHashMap<>();
- public ColStatsMeta(long updatedTime, AnalysisMethod analysisMethod,
AnalysisType analysisType, JobType jobType,
- long queriedTimes, long rowCount, long updatedRows, Map<Long,
Long> partitionUpdateRows) {
+ public ColStatsMeta(long updatedTime, AnalysisMethod analysisMethod,
AnalysisType analysisType,
+ JobType jobType, long queriedTimes, long rowCount,
long updatedRows,
+ long tableVersion, Map<Long, Long>
partitionUpdateRows) {
this.updatedTime = updatedTime;
this.analysisMethod = analysisMethod;
this.analysisType = analysisType;
@@ -64,6 +68,7 @@ public class ColStatsMeta {
this.queriedTimes.addAndGet(queriedTimes);
this.updatedRows = updatedRows;
this.rowCount = rowCount;
+ this.tableVersion = tableVersion;
if (partitionUpdateRows != null) {
this.partitionUpdateRows.putAll(partitionUpdateRows);
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/JobPriority.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/JobPriority.java
index c3656b92927..df95b3cbede 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/JobPriority.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/JobPriority.java
@@ -21,5 +21,6 @@ public enum JobPriority {
HIGH,
MID,
LOW,
+ VERY_LOW,
MANUAL;
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
index 38af43bef73..6c1fd1ba959 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
@@ -26,7 +26,6 @@ import org.apache.doris.common.Config;
import org.apache.doris.common.DdlException;
import org.apache.doris.common.Pair;
import org.apache.doris.common.util.MasterDaemon;
-import org.apache.doris.common.util.TimeUtils;
import org.apache.doris.datasource.hive.HMSExternalTable;
import org.apache.doris.statistics.AnalysisInfo.AnalysisMethod;
import org.apache.doris.statistics.AnalysisInfo.JobType;
@@ -37,7 +36,6 @@ import org.apache.hudi.common.util.VisibleForTesting;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
-import java.time.LocalTime;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
@@ -92,10 +90,11 @@ public class StatisticsAutoCollector extends MasterDaemon {
}
protected void collect() {
- while (canCollect()) {
+ while (StatisticsUtil.canCollect()) {
Pair<Entry<TableName, Set<Pair<String, String>>>, JobPriority> job
= getJob();
if (job == null) {
// No more job to process, break and sleep.
+ LOG.info("No auto analyze jobs to process.");
break;
}
try {
@@ -112,11 +111,6 @@ public class StatisticsAutoCollector extends MasterDaemon {
}
}
- protected boolean canCollect() {
- return StatisticsUtil.enableAutoAnalyze()
- &&
StatisticsUtil.inAnalyzeTime(LocalTime.now(TimeUtils.getTimeZone().toZoneId()));
- }
-
protected Pair<Entry<TableName, Set<Pair<String, String>>>, JobPriority>
getJob() {
AnalysisManager manager = Env.getServingEnv().getAnalysisManager();
Optional<Entry<TableName, Set<Pair<String, String>>>> job =
fetchJobFromMap(manager.highPriorityJobs);
@@ -128,7 +122,11 @@ public class StatisticsAutoCollector extends MasterDaemon {
return Pair.of(job.get(), JobPriority.MID);
}
job = fetchJobFromMap(manager.lowPriorityJobs);
- return job.map(entry -> Pair.of(entry, JobPriority.LOW)).orElse(null);
+ if (job.isPresent()) {
+ return Pair.of(job.get(), JobPriority.LOW);
+ }
+ job = fetchJobFromMap(manager.veryLowPriorityJobs);
+ return job.map(tableNameSetEntry -> Pair.of(tableNameSetEntry,
JobPriority.VERY_LOW)).orElse(null);
}
protected Optional<Map.Entry<TableName, Set<Pair<String, String>>>>
fetchJobFromMap(
@@ -142,9 +140,13 @@ public class StatisticsAutoCollector extends MasterDaemon {
protected void processOneJob(TableIf table, Set<Pair<String, String>>
columns,
JobPriority priority) throws DdlException {
- // appendMvColumn(table, columns);
appendAllColumns(table, columns);
- columns = columns.stream().filter(c ->
StatisticsUtil.needAnalyzeColumn(table, c)).collect(Collectors.toSet());
+ columns = columns.stream().filter(
+ c -> StatisticsUtil.needAnalyzeColumn(table, c) ||
StatisticsUtil.isLongTimeColumn(table, c))
+ .collect(Collectors.toSet());
+ if (columns.isEmpty()) {
+ return;
+ }
AnalysisInfo analyzeJob = createAnalyzeJobForTbl(table, columns,
priority);
if (analyzeJob == null) {
return;
@@ -178,15 +180,6 @@ public class StatisticsAutoCollector extends MasterDaemon {
}
}
- protected void appendMvColumn(TableIf table, Set<String> columns) {
- if (!(table instanceof OlapTable)) {
- return;
- }
- OlapTable olapTable = (OlapTable) table;
- Set<String> mvColumns =
olapTable.getMvColumns(false).stream().map(Column::getName).collect(Collectors.toSet());
- columns.addAll(mvColumns);
- }
-
protected boolean supportAutoAnalyze(TableIf tableIf) {
if (tableIf == null) {
return false;
@@ -248,9 +241,10 @@ public class StatisticsAutoCollector extends MasterDaemon {
.setTaskIds(new ArrayList<>())
.setLastExecTimeInMs(System.currentTimeMillis())
.setJobType(JobType.SYSTEM)
- .setTblUpdateTime(System.currentTimeMillis())
+ .setTblUpdateTime(table.getUpdateTime())
.setRowCount(rowCount)
.setUpdateRows(tableStatsStatus == null ? 0 :
tableStatsStatus.updatedRows.get())
+ .setTableVersion(table instanceof OlapTable ? ((OlapTable)
table).getVisibleVersion() : 0)
.setPriority(priority)
.setPartitionUpdateRows(tableStatsStatus == null ? null :
tableStatsStatus.partitionUpdateRows)
.setEnablePartition(StatisticsUtil.enablePartitionAnalyze())
@@ -275,4 +269,8 @@ public class StatisticsAutoCollector extends MasterDaemon {
future.get();
}
}
+
+ public boolean isReady() {
+ return waited;
+ }
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobAppender.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobAppender.java
index b67d1cf947c..4a3e93550fc 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobAppender.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobAppender.java
@@ -29,6 +29,7 @@ import org.apache.doris.common.util.MasterDaemon;
import org.apache.doris.datasource.InternalCatalog;
import org.apache.doris.statistics.util.StatisticsUtil;
+import com.google.common.annotations.VisibleForTesting;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
@@ -46,7 +47,7 @@ public class StatisticsJobAppender extends MasterDaemon {
private static final Logger LOG =
LogManager.getLogger(StatisticsJobAppender.class);
public static final long INTERVAL = 1000;
- public static final int JOB_MAP_SIZE = 1000;
+ public static final int JOB_MAP_SIZE = 100;
public static final int TABLE_BATCH_SIZE = 100;
private long currentDbId = 0;
@@ -70,6 +71,11 @@ public class StatisticsJobAppender extends MasterDaemon {
LOG.info("Stats table not available, skip");
return;
}
+ if (Env.getCurrentEnv().getStatisticsAutoCollector() == null
+ ||
!Env.getCurrentEnv().getStatisticsAutoCollector().isReady()) {
+ LOG.info("Statistics auto collector not ready, skip");
+ return;
+ }
if (Env.isCheckpointThread()) {
return;
}
@@ -81,7 +87,7 @@ public class StatisticsJobAppender extends MasterDaemon {
appendColumnsToJobs(manager.highPriorityColumns,
manager.highPriorityJobs);
appendColumnsToJobs(manager.midPriorityColumns,
manager.midPriorityJobs);
if (StatisticsUtil.enableAutoAnalyzeInternalCatalog()) {
- appendToLowJobs(manager.lowPriorityJobs);
+ appendToLowJobs(manager.lowPriorityJobs,
manager.veryLowPriorityJobs);
}
}
@@ -136,7 +142,8 @@ public class StatisticsJobAppender extends MasterDaemon {
}
}
- protected void appendToLowJobs(Map<TableName, Set<Pair<String, String>>>
jobs) {
+ protected void appendToLowJobs(Map<TableName, Set<Pair<String, String>>>
lowPriorityJobs,
+ Map<TableName, Set<Pair<String, String>>>
veryLowPriorityJobs) {
if (System.currentTimeMillis() - lastRoundFinishTime <
lowJobIntervalMs) {
return;
}
@@ -162,27 +169,33 @@ public class StatisticsJobAppender extends MasterDaemon {
if (t.getBaseSchema().size() >
StatisticsUtil.getAutoAnalyzeTableWidthThreshold()) {
continue;
}
- Set<Pair<String, String>> columnIndexPairs =
t.getColumnIndexPairs(
- t.getSchemaAllIndexes(false).stream()
- .filter(c ->
!StatisticsUtil.isUnsupportedType(c.getType()))
-
.map(Column::getName).collect(Collectors.toSet()))
+ Set<String> columns = t.getSchemaAllIndexes(false).stream()
+ .filter(c ->
!StatisticsUtil.isUnsupportedType(c.getType()))
+ .map(Column::getName).collect(Collectors.toSet());
+ Set<Pair<String, String>> columnIndexPairs =
t.getColumnIndexPairs(columns)
.stream().filter(p ->
StatisticsUtil.needAnalyzeColumn(t, p))
.collect(Collectors.toSet());
- if (columnIndexPairs.isEmpty()) {
- continue;
- }
TableName tableName = new
TableName(t.getDatabase().getCatalog().getName(),
t.getDatabase().getFullName(), t.getName());
- synchronized (jobs) {
- // If job map reach the upper limit, stop adding new jobs.
- if (!jobs.containsKey(tableName) && jobs.size() >=
JOB_MAP_SIZE) {
- LOG.info("Low job map full.");
+ // Append to low job map first.
+ if (!columnIndexPairs.isEmpty()) {
+ boolean appended = doAppend(lowPriorityJobs,
columnIndexPairs, tableName);
+ // If low job map is full, stop this iteration.
+ if (!appended) {
+ LOG.debug("Low Priority job map is full.");
return;
}
- if (jobs.containsKey(tableName)) {
- jobs.get(tableName).addAll(columnIndexPairs);
- } else {
- jobs.put(tableName, columnIndexPairs);
+ } else {
+ // Append to very low job map.
+ columnIndexPairs = t.getColumnIndexPairs(columns)
+ .stream().filter(p ->
StatisticsUtil.isLongTimeColumn(t, p))
+ .collect(Collectors.toSet());
+ if (!columnIndexPairs.isEmpty()) {
+ boolean appended = doAppend(veryLowPriorityJobs,
columnIndexPairs, tableName);
+ // If very low job map is full, simply ignore it and
go to the next table.
+ if (!appended) {
+ LOG.debug("Very low Priority job map is full.");
+ }
}
}
currentTableId = t.getId();
@@ -200,6 +213,23 @@ public class StatisticsJobAppender extends MasterDaemon {
lastRoundFinishTime = System.currentTimeMillis();
}
+ @VisibleForTesting
+ public boolean doAppend(Map<TableName, Set<Pair<String, String>>> jobMap,
+ Set<Pair<String, String>> columnIndexPairs,
+ TableName tableName) {
+ synchronized (jobMap) {
+ if (!jobMap.containsKey(tableName) && jobMap.size() >=
JOB_MAP_SIZE) {
+ return false;
+ }
+ if (jobMap.containsKey(tableName)) {
+ jobMap.get(tableName).addAll(columnIndexPairs);
+ } else {
+ jobMap.put(tableName, columnIndexPairs);
+ }
+ }
+ return true;
+ }
+
// For unit test only.
public void setLastRoundFinishTime(long value) {
lastRoundFinishTime = value;
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
index ba23ab84dc7..ac4704b54c6 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
@@ -379,9 +379,8 @@ public class StatisticsRepository {
objects.catalog.getId(), objects.db.getId(),
objects.table.getId(), indexId, colName,
null, columnStatistic);
Env.getCurrentEnv().getStatisticsCache().syncColStats(data);
- long timestamp = System.currentTimeMillis();
AnalysisInfo mockedJobInfo = new AnalysisInfoBuilder()
- .setTblUpdateTime(timestamp)
+ .setTblUpdateTime(objects.table.getUpdateTime())
.setColName("")
.setRowCount((long) Double.parseDouble(rowCount))
.setJobColumns(Sets.newHashSet())
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java
index 6a7f2933996..4ebdb019f27 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java
@@ -80,6 +80,9 @@ public class TableStatsMeta implements Writable,
GsonPostProcessable {
@SerializedName("updateTime")
public long updatedTime;
+ @SerializedName("lat")
+ public long lastAnalyzeTime;
+
@SerializedName("colNameToColStatsMeta")
private ConcurrentMap<String, ColStatsMeta>
deprecatedColNameToColStatsMeta = new ConcurrentHashMap<>();
@@ -160,6 +163,7 @@ public class TableStatsMeta implements Writable,
GsonPostProcessable {
public void update(AnalysisInfo analyzedJob, TableIf tableIf) {
updatedTime = analyzedJob.tblUpdateTime;
+ lastAnalyzeTime = analyzedJob.createTime;
if (analyzedJob.userInject) {
userInjected = true;
}
@@ -168,14 +172,16 @@ public class TableStatsMeta implements Writable,
GsonPostProcessable {
if (colStatsMeta == null) {
colToColStatsMeta.put(colPair, new
ColStatsMeta(analyzedJob.createTime, analyzedJob.analysisMethod,
analyzedJob.analysisType, analyzedJob.jobType, 0,
analyzedJob.rowCount,
- analyzedJob.updateRows, analyzedJob.enablePartition ?
analyzedJob.partitionUpdateRows : null));
+ analyzedJob.updateRows, analyzedJob.tableVersion,
+ analyzedJob.enablePartition ?
analyzedJob.partitionUpdateRows : null));
} else {
- colStatsMeta.updatedTime = analyzedJob.tblUpdateTime;
+ colStatsMeta.updatedTime = analyzedJob.createTime;
colStatsMeta.analysisType = analyzedJob.analysisType;
colStatsMeta.analysisMethod = analyzedJob.analysisMethod;
colStatsMeta.jobType = analyzedJob.jobType;
colStatsMeta.updatedRows = analyzedJob.updateRows;
colStatsMeta.rowCount = analyzedJob.rowCount;
+ colStatsMeta.tableVersion = analyzedJob.tableVersion;
if (analyzedJob.enablePartition) {
if (colStatsMeta.partitionUpdateRows == null) {
colStatsMeta.partitionUpdateRows = new
ConcurrentHashMap<>();
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
index a9c1612eb48..dd037617ba6 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
@@ -53,6 +53,7 @@ import org.apache.doris.common.Config;
import org.apache.doris.common.FeConstants;
import org.apache.doris.common.Pair;
import org.apache.doris.common.UserException;
+import org.apache.doris.common.util.TimeUtils;
import org.apache.doris.datasource.CatalogIf;
import org.apache.doris.datasource.ExternalTable;
import org.apache.doris.datasource.InternalCatalog;
@@ -1012,12 +1013,6 @@ public class StatisticsUtil {
if (columnStatsMeta == null) {
return true;
}
- // Column hasn't been analyzed for longer than config interval.
- if (Config.auto_analyze_interval_seconds > 0
- && System.currentTimeMillis() - columnStatsMeta.updatedTime
- > Config.auto_analyze_interval_seconds * 1000) {
- return true;
- }
// Partition table partition stats never been collected.
if (StatisticsUtil.enablePartitionAnalyze() &&
table.isPartitionedTable()
&& columnStatsMeta.partitionUpdateRows == null) {
@@ -1072,7 +1067,7 @@ public class StatisticsUtil {
}
// External is hard to calculate change rate, use time interval to
control analyze frequency.
return System.currentTimeMillis()
- - tableStatsStatus.updatedTime >
StatisticsUtil.getExternalTableAutoAnalyzeIntervalInMillis();
+ - tableStatsStatus.lastAnalyzeTime >
StatisticsUtil.getExternalTableAutoAnalyzeIntervalInMillis();
}
}
@@ -1127,4 +1122,47 @@ public class StatisticsUtil {
}
return false;
}
+
+ // This function return true means the column hasn't been analyzed for
longer than the configured time.
+ public static boolean isLongTimeColumn(TableIf table, Pair<String, String>
column) {
+ if (column == null) {
+ return false;
+ }
+ if (!table.autoAnalyzeEnabled()) {
+ return false;
+ }
+ if (!(table instanceof OlapTable)) {
+ return false;
+ }
+ AnalysisManager manager = Env.getServingEnv().getAnalysisManager();
+ TableStatsMeta tblStats = manager.findTableStatsStatus(table.getId());
+ // Table never been analyzed, skip it for higher priority jobs.
+ if (tblStats == null) {
+ LOG.warn("Table stats is null.");
+ return false;
+ }
+ ColStatsMeta columnStats = tblStats.findColumnStatsMeta(column.first,
column.second);
+ if (columnStats == null) {
+ // Column never been analyzed, skip it for higher priority jobs.
+ return false;
+ }
+ // User injected column stats, don't do auto analyze, avoid overwrite
user injected stats.
+ if (tblStats.userInjected) {
+ return false;
+ }
+ boolean isLongTime = Config.auto_analyze_interval_seconds > 0
+ && System.currentTimeMillis() - columnStats.updatedTime >
Config.auto_analyze_interval_seconds * 1000;
+ if (!isLongTime) {
+ return false;
+ }
+ // For olap table, if the table visible version and row count doesn't
change since last analyze,
+ // we don't need to analyze it because its data is not changed.
+ OlapTable olapTable = (OlapTable) table;
+ return olapTable.getVisibleVersion() != columnStats.tableVersion
+ || olapTable.getRowCount() != columnStats.rowCount;
+ }
+
+ public static boolean canCollect() {
+ return enableAutoAnalyze() &&
inAnalyzeTime(LocalTime.now(TimeUtils.getTimeZone().toZoneId()));
+ }
}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsJobAppenderTest.java
b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsJobAppenderTest.java
index e3255ab23a0..5b890795f01 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsJobAppenderTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsJobAppenderTest.java
@@ -33,6 +33,8 @@ import org.apache.doris.datasource.InternalCatalog;
import org.apache.doris.statistics.util.StatisticsUtil;
import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
import mockit.Mock;
import mockit.MockUp;
import org.junit.jupiter.api.Assertions;
@@ -178,13 +180,21 @@ public class StatisticsJobAppenderTest {
}
};
- Map<TableName, Set<Pair<String, String>>> testMap = new HashMap<>();
+ new MockUp<StatisticsUtil>() {
+ @Mock
+ public boolean needAnalyzeColumn(TableIf table, Pair<String,
String> column) {
+ return true;
+ }
+ };
+
+ Map<TableName, Set<Pair<String, String>>> testLowMap = new HashMap<>();
+ Map<TableName, Set<Pair<String, String>>> testVeryLowMap = new
HashMap<>();
StatisticsJobAppender appender = new StatisticsJobAppender();
- appender.appendToLowJobs(testMap);
- Assertions.assertEquals(100, testMap.size());
- testMap.clear();
- appender.appendToLowJobs(testMap);
- Assertions.assertEquals(40, testMap.size());
+ appender.appendToLowJobs(testLowMap, testVeryLowMap);
+ Assertions.assertEquals(100, testLowMap.size());
+ testLowMap.clear();
+ appender.appendToLowJobs(testLowMap, testVeryLowMap);
+ Assertions.assertEquals(40, testLowMap.size());
for (int i = 0; i < StatisticsJobAppender.JOB_MAP_SIZE; i++) {
Database db = new Database(id++, "testDb" + i);
@@ -198,38 +208,93 @@ public class StatisticsJobAppenderTest {
db.createTableWithLock(table2, true, false);
}
- testMap.clear();
- appender.setLastRoundFinishTime(0);
- appender.appendToLowJobs(testMap);
- appender.setLastRoundFinishTime(0);
- appender.appendToLowJobs(testMap);
- appender.setLastRoundFinishTime(0);
- appender.appendToLowJobs(testMap);
+ testLowMap.clear();
appender.setLastRoundFinishTime(0);
- appender.appendToLowJobs(testMap);
+ appender.appendToLowJobs(testLowMap, testVeryLowMap);
appender.setLastRoundFinishTime(0);
- appender.appendToLowJobs(testMap);
+ appender.appendToLowJobs(testLowMap, testVeryLowMap);
appender.setLastRoundFinishTime(0);
- appender.appendToLowJobs(testMap);
- appender.setLastRoundFinishTime(0);
- appender.appendToLowJobs(testMap);
- appender.setLastRoundFinishTime(0);
- appender.appendToLowJobs(testMap);
- appender.setLastRoundFinishTime(0);
- appender.appendToLowJobs(testMap);
- appender.setLastRoundFinishTime(0);
- appender.appendToLowJobs(testMap);
- appender.setLastRoundFinishTime(0);
- appender.appendToLowJobs(testMap);
- appender.setLastRoundFinishTime(0);
- appender.appendToLowJobs(testMap);
+ appender.appendToLowJobs(testLowMap, testVeryLowMap);
+ Assertions.assertEquals(StatisticsJobAppender.JOB_MAP_SIZE,
testLowMap.size());
+ }
+
+ @Test
+ public void testAppendQueryColumnToVeryLowJobMap() throws DdlException {
+ InternalCatalog testCatalog = new InternalCatalog();
+ int id = 10;
+ for (int i = 0; i < 70; i++) {
+ Database db = new Database(id++, "testDb" + i);
+ testCatalog.unprotectCreateDb(db);
+ Column column1 = new Column("placeholder", PrimitiveType.INT);
+ List<Column> schema = new ArrayList<>();
+ schema.add(column1);
+ OlapTable table1 = new OlapTable(id++, "testTable" + id + "_1",
schema, null, null, null);
+ OlapTable table2 = new OlapTable(id++, "testTable" + id + "_1",
schema, null, null, null);
+ db.createTableWithLock(table1, true, false);
+ db.createTableWithLock(table2, true, false);
+ }
+
+ new MockUp<Env>() {
+ @Mock
+ public InternalCatalog getCurrentInternalCatalog() {
+ return testCatalog;
+ }
+ };
+
+ new MockUp<OlapTable>() {
+ @Mock
+ public List<Column> getBaseSchema() {
+ return Lists.newArrayList();
+ }
+
+ @Mock
+ public Set<Pair<String, String>> getColumnIndexPairs(Set<String>
columns) {
+ return Collections.singleton(Pair.of("mockIndex",
"mockColumn"));
+ }
+ };
+
+ new MockUp<StatisticsUtil>() {
+ @Mock
+ public boolean needAnalyzeColumn(TableIf table, Pair<String,
String> column) {
+ return false;
+ }
+
+ @Mock
+ public boolean isLongTimeColumn(TableIf table, Pair<String,
String> column) {
+ return true;
+ }
+ };
+
+ Map<TableName, Set<Pair<String, String>>> testLowMap = new HashMap<>();
+ Map<TableName, Set<Pair<String, String>>> testVeryLowMap = new
HashMap<>();
+ StatisticsJobAppender appender = new StatisticsJobAppender();
+ appender.appendToLowJobs(testLowMap, testVeryLowMap);
+ Assertions.assertEquals(100, testVeryLowMap.size());
+ testVeryLowMap.clear();
+ appender.appendToLowJobs(testLowMap, testVeryLowMap);
+ Assertions.assertEquals(40, testVeryLowMap.size());
+
+ for (int i = 0; i < StatisticsJobAppender.JOB_MAP_SIZE; i++) {
+ Database db = new Database(id++, "testDb" + i);
+ testCatalog.unprotectCreateDb(db);
+ Column column1 = new Column("placeholder", PrimitiveType.INT);
+ List<Column> schema = new ArrayList<>();
+ schema.add(column1);
+ OlapTable table1 = new OlapTable(id++, "testTable" + id + "_1",
schema, null, null, null);
+ OlapTable table2 = new OlapTable(id++, "testTable" + id + "_1",
schema, null, null, null);
+ db.createTableWithLock(table1, true, false);
+ db.createTableWithLock(table2, true, false);
+ }
+
+ testLowMap.clear();
appender.setLastRoundFinishTime(0);
- appender.appendToLowJobs(testMap);
+ appender.appendToLowJobs(testLowMap, testVeryLowMap);
appender.setLastRoundFinishTime(0);
- appender.appendToLowJobs(testMap);
+ appender.appendToLowJobs(testLowMap, testVeryLowMap);
appender.setLastRoundFinishTime(0);
- appender.appendToLowJobs(testMap);
- Assertions.assertEquals(StatisticsJobAppender.JOB_MAP_SIZE,
testMap.size());
+ appender.appendToLowJobs(testLowMap, testVeryLowMap);
+ Assertions.assertEquals(0, testLowMap.size());
+ Assertions.assertEquals(StatisticsJobAppender.JOB_MAP_SIZE,
testVeryLowMap.size());
}
@Test
@@ -270,12 +335,48 @@ public class StatisticsJobAppenderTest {
return thresholds[count++];
}
};
- Map<TableName, Set<Pair<String, String>>> testMap = new HashMap<>();
+ Map<TableName, Set<Pair<String, String>>> testLowMap = new HashMap<>();
+ Map<TableName, Set<Pair<String, String>>> testVeryLowMap = new
HashMap<>();
StatisticsJobAppender appender = new StatisticsJobAppender();
- appender.appendToLowJobs(testMap);
- Assertions.assertEquals(0, testMap.size());
+ appender.appendToLowJobs(testLowMap, testVeryLowMap);
+ Assertions.assertEquals(0, testLowMap.size());
appender.setLastRoundFinishTime(0);
- appender.appendToLowJobs(testMap);
- Assertions.assertEquals(1, testMap.size());
+ appender.appendToLowJobs(testLowMap, testVeryLowMap);
+ Assertions.assertEquals(1, testLowMap.size());
+ }
+
+ @Test
+ public void testDoAppend() {
+ Map<TableName, Set<Pair<String, String>>> jobMap = Maps.newHashMap();
+ Set<Pair<String, String>> columnIndexPairs1 = Sets.newHashSet();
+ Set<Pair<String, String>> columnIndexPairs2 = Sets.newHashSet();
+ TableName tableName1 = new TableName("catalog1", "db1", "table1");
+ TableName tableName2 = new TableName("catalog2", "db2", "table2");
+ Pair<String, String> pair1 = Pair.of("index1", "col1");
+ columnIndexPairs1.add(pair1);
+
+ StatisticsJobAppender appender = new StatisticsJobAppender();
+ Assertions.assertTrue(appender.doAppend(jobMap, columnIndexPairs1,
tableName1));
+ Assertions.assertEquals(1, jobMap.size());
+ Assertions.assertTrue(jobMap.containsKey(tableName1));
+ Assertions.assertEquals(1, jobMap.get(tableName1).size());
+ Assertions.assertTrue(jobMap.get(tableName1).contains(pair1));
+
+ Pair<String, String> pair2 = Pair.of("index2", "col2");
+ columnIndexPairs1.add(pair2);
+ Assertions.assertTrue(appender.doAppend(jobMap, columnIndexPairs1,
tableName1));
+ Assertions.assertEquals(1, jobMap.size());
+ Assertions.assertTrue(jobMap.containsKey(tableName1));
+ Assertions.assertEquals(2, jobMap.get(tableName1).size());
+ Assertions.assertTrue(jobMap.get(tableName1).contains(pair1));
+ Assertions.assertTrue(jobMap.get(tableName1).contains(pair2));
+
+ Pair<String, String> pair3 = Pair.of("index3", "col3");
+ columnIndexPairs2.add(pair3);
+ Assertions.assertTrue(appender.doAppend(jobMap, columnIndexPairs2,
tableName2));
+ Assertions.assertEquals(2, jobMap.size());
+ Assertions.assertTrue(jobMap.containsKey(tableName2));
+ Assertions.assertEquals(1, jobMap.get(tableName2).size());
+ Assertions.assertTrue(jobMap.get(tableName2).contains(pair3));
}
}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java
b/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java
index fbac718e421..ef1e9ca0297 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java
@@ -32,6 +32,7 @@ import org.apache.doris.datasource.ExternalTable;
import org.apache.doris.datasource.hive.HMSExternalCatalog;
import org.apache.doris.datasource.hive.HMSExternalTable;
import org.apache.doris.datasource.hive.HMSExternalTable.DLAType;
+import org.apache.doris.datasource.iceberg.IcebergExternalTable;
import org.apache.doris.datasource.jdbc.JdbcExternalCatalog;
import org.apache.doris.datasource.jdbc.JdbcExternalTable;
import org.apache.doris.qe.SessionVariable;
@@ -208,12 +209,6 @@ class StatisticsUtilTest {
Assertions.assertTrue(StatisticsUtil.needAnalyzeColumn(table,
Pair.of("index", column.getName())));
// Test external table auto analyze enabled.
- new MockUp<AnalysisManager>() {
- @Mock
- public TableStatsMeta findTableStatsStatus(long tblId) {
- return null;
- }
- };
externalCatalog.getCatalogProperty().addProperty(ExternalCatalog.ENABLE_AUTO_ANALYZE,
"false");
HMSExternalTable hmsTable1 = new HMSExternalTable(1, "name", "dbName",
externalCatalog);
externalCatalog.setAutoAnalyzePolicy("dbName", "name", "enable");
@@ -238,27 +233,10 @@ class StatisticsUtilTest {
tableMeta.userInjected = false;
Assertions.assertTrue(StatisticsUtil.needAnalyzeColumn(table,
Pair.of("index", column.getName())));
- // Test column hasn't been analyzed for longer than 1 day.
new MockUp<TableStatsMeta>() {
@Mock
public ColStatsMeta findColumnStatsMeta(String indexName, String
colName) {
- return new ColStatsMeta(0, null, null, null, 0, 100, 0, null);
- }
- };
- new MockUp<OlapTable>() {
- @Mock
- public long getRowCount() {
- return 100;
- }
- };
- Config.auto_analyze_interval_seconds = 60 * 60 * 24;
- Assertions.assertTrue(StatisticsUtil.needAnalyzeColumn(table,
Pair.of("index", column.getName())));
- Config.auto_analyze_interval_seconds = 0;
-
- new MockUp<TableStatsMeta>() {
- @Mock
- public ColStatsMeta findColumnStatsMeta(String indexName, String
colName) {
- return new ColStatsMeta(System.currentTimeMillis(), null,
null, null, 0, 0, 0, null);
+ return new ColStatsMeta(System.currentTimeMillis(), null,
null, null, 0, 0, 0, 0, null);
}
};
@@ -312,7 +290,7 @@ class StatisticsUtilTest {
new MockUp<TableStatsMeta>() {
@Mock
public ColStatsMeta findColumnStatsMeta(String indexName, String
colName) {
- return new ColStatsMeta(System.currentTimeMillis(), null,
null, null, 0, 100, 0, null);
+ return new ColStatsMeta(System.currentTimeMillis(), null,
null, null, 0, 100, 0, 0, null);
}
};
tableMeta.partitionChanged.set(false);
@@ -322,7 +300,7 @@ class StatisticsUtilTest {
new MockUp<TableStatsMeta>() {
@Mock
public ColStatsMeta findColumnStatsMeta(String indexName, String
colName) {
- return new ColStatsMeta(System.currentTimeMillis(), null,
null, null, 0, 0, 0, null);
+ return new ColStatsMeta(System.currentTimeMillis(), null,
null, null, 0, 0, 0, 0, null);
}
};
tableMeta.partitionChanged.set(false);
@@ -338,7 +316,7 @@ class StatisticsUtilTest {
new MockUp<TableStatsMeta>() {
@Mock
public ColStatsMeta findColumnStatsMeta(String indexName, String
colName) {
- return new ColStatsMeta(System.currentTimeMillis(), null,
null, null, 0, 500, 0, null);
+ return new ColStatsMeta(System.currentTimeMillis(), null,
null, null, 0, 500, 0, 0, null);
}
};
tableMeta.partitionChanged.set(false);
@@ -354,7 +332,7 @@ class StatisticsUtilTest {
new MockUp<TableStatsMeta>() {
@Mock
public ColStatsMeta findColumnStatsMeta(String indexName, String
colName) {
- return new ColStatsMeta(System.currentTimeMillis(), null,
null, null, 0, 100, 80, null);
+ return new ColStatsMeta(System.currentTimeMillis(), null,
null, null, 0, 100, 80, 0, null);
}
};
tableMeta.partitionChanged.set(false);
@@ -382,6 +360,140 @@ class StatisticsUtilTest {
tableMeta.partitionChanged.set(false);
tableMeta.updatedRows.set(85);
Assertions.assertFalse(StatisticsUtil.needAnalyzeColumn(table,
Pair.of("index", column.getName())));
+ }
+
+ @Test
+ void testLongTimeNoAnalyze() {
+ Column column = new Column("testColumn", PrimitiveType.INT);
+ List<Column> schema = new ArrayList<>();
+ schema.add(column);
+ OlapTable table = new OlapTable(200, "testTable", schema, null, null,
null);
+ // Test column is null
+ Assertions.assertFalse(StatisticsUtil.isLongTimeColumn(table, null));
+
+ // Test table auto analyze is disabled.
+ new MockUp<OlapTable>() {
+ @Mock
+ public boolean autoAnalyzeEnabled() {
+ return false;
+ }
+ };
+ Assertions.assertFalse(StatisticsUtil.isLongTimeColumn(table,
Pair.of("index", column.getName())));
+ new MockUp<OlapTable>() {
+ @Mock
+ public boolean autoAnalyzeEnabled() {
+ return true;
+ }
+ };
+
+ // Test external table
+ new MockUp<ExternalTable>() {
+ @Mock
+ public boolean autoAnalyzeEnabled() {
+ return true;
+ }
+ };
+ IcebergExternalTable icebergTable = new IcebergExternalTable(0, "",
"", null);
+ Assertions.assertFalse(StatisticsUtil.isLongTimeColumn(icebergTable,
Pair.of("index", column.getName())));
+
+ // Test table stats meta is null.
+ new MockUp<AnalysisManager>() {
+ @Mock
+ public TableStatsMeta findTableStatsStatus(long tblId) {
+ return null;
+ }
+ };
+ Assertions.assertFalse(StatisticsUtil.isLongTimeColumn(table,
Pair.of("index", column.getName())));
+
+ // Test column stats meta is null
+ TableStatsMeta tableMeta = new TableStatsMeta();
+ new MockUp<AnalysisManager>() {
+ @Mock
+ public TableStatsMeta findTableStatsStatus(long tblId) {
+ return tableMeta;
+ }
+ };
+ new MockUp<TableStatsMeta>() {
+ @Mock
+ public ColStatsMeta findColumnStatsMeta(String indexName, String
colName) {
+ return null;
+ }
+ };
+ Assertions.assertFalse(StatisticsUtil.isLongTimeColumn(table,
Pair.of("index", column.getName())));
+ new MockUp<TableStatsMeta>() {
+ @Mock
+ public ColStatsMeta findColumnStatsMeta(String indexName, String
colName) {
+ return new ColStatsMeta(System.currentTimeMillis(), null,
null, null, 0, 100, 0, 0, null);
+ }
+ };
+
+ // Test table stats is user injected
+ tableMeta.userInjected = true;
+ Assertions.assertFalse(StatisticsUtil.isLongTimeColumn(table,
Pair.of("index", column.getName())));
+ tableMeta.userInjected = false;
+
+ // Test Config.auto_analyze_interval_seconds == 0
+ Config.auto_analyze_interval_seconds = 0;
+ Assertions.assertFalse(StatisticsUtil.isLongTimeColumn(table,
Pair.of("index", column.getName())));
+
+ // Test column analyzed within the time interval
+ Config.auto_analyze_interval_seconds = 86400;
+ Assertions.assertFalse(StatisticsUtil.isLongTimeColumn(table,
Pair.of("index", column.getName())));
+
+ // Test column hasn't analyzed for longer than time interval, but
version and row count doesn't change
+ new MockUp<TableStatsMeta>() {
+ @Mock
+ public ColStatsMeta findColumnStatsMeta(String indexName, String
colName) {
+ ColStatsMeta ret = new
ColStatsMeta(System.currentTimeMillis(), null, null, null, 0, 100, 20, 10,
null);
+ try {
+ Thread.sleep(1500);
+ } catch (InterruptedException e) {
+ e.printStackTrace();
+ }
+ return ret;
+ }
+ };
+ new MockUp<OlapTable>() {
+ @Mock
+ public long getVisibleVersion() {
+ return 10;
+ }
+
+ @Mock
+ public long fetchRowCount() {
+ return 100;
+ }
+ };
+ Config.auto_analyze_interval_seconds = 1;
+ Assertions.assertFalse(StatisticsUtil.isLongTimeColumn(table,
Pair.of("index", column.getName())));
+
+ // Test column hasn't analyzed for longer than time interval, and
version change
+ new MockUp<OlapTable>() {
+ @Mock
+ public long getVisibleVersion() {
+ return 11;
+ }
+
+ @Mock
+ public long fetchRowCount() {
+ return 100;
+ }
+ };
+ Assertions.assertTrue(StatisticsUtil.isLongTimeColumn(table,
Pair.of("index", column.getName())));
+
+ // Test column hasn't analyzed for longer than time interval, and row
count change
+ new MockUp<OlapTable>() {
+ @Mock
+ public long getVisibleVersion() {
+ return 10;
+ }
+
+ @Mock
+ public long fetchRowCount() {
+ return 101;
+ }
+ };
+ Assertions.assertTrue(StatisticsUtil.isLongTimeColumn(table,
Pair.of("index", column.getName())));
}
}
diff --git a/regression-test/suites/statistics/analyze_stats.groovy
b/regression-test/suites/statistics/analyze_stats.groovy
index 55074e995fe..69360da6911 100644
--- a/regression-test/suites/statistics/analyze_stats.groovy
+++ b/regression-test/suites/statistics/analyze_stats.groovy
@@ -2903,7 +2903,38 @@ PARTITION `p599` VALUES IN (599)
assertEquals("521779.0", alter_result[0][5])
assertEquals("7.142863009760572", alter_result[0][6])
-
sql """DROP DATABASE IF EXISTS trigger"""
+
+ // Test show last analyze table version
+ sql """create database if not exists test_version"""
+ sql """use test_version"""
+ sql """drop table if exists region"""
+ sql """
+ CREATE TABLE region (
+ r_regionkey int NOT NULL,
+ r_name VARCHAR(25) NOT NULL,
+ r_comment VARCHAR(152)
+ )ENGINE=OLAP
+ DUPLICATE KEY(`r_regionkey`)
+ COMMENT "OLAP"
+ DISTRIBUTED BY HASH(`r_regionkey`) BUCKETS 1
+ PROPERTIES (
+ "replication_num" = "1"
+ );
+ """
+ sql """analyze table region with sync"""
+ def versionResult = sql """show column stats region"""
+ assertEquals(versionResult[0][16], "1")
+ assertEquals(versionResult[1][16], "1")
+ assertEquals(versionResult[2][16], "1")
+
+ sql """insert into region values (1, "1", "1")"""
+ sql """analyze table region with sync"""
+ versionResult = sql """show column stats region"""
+ assertEquals(versionResult[0][16], "2")
+ assertEquals(versionResult[1][16], "2")
+ assertEquals(versionResult[2][16], "2")
+
+ sql """drop database if exists test_version"""
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]