This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 971b35c6c2 [Improvement](statistics)Improve show analyze performance.
(#22484) (#22882)
971b35c6c2 is described below
commit 971b35c6c27831fa074d2fe9470da72fcbeae473
Author: Jibing-Li <[email protected]>
AuthorDate: Fri Aug 11 17:50:18 2023 +0800
[Improvement](statistics)Improve show analyze performance. (#22484) (#22882)
---
.../java/org/apache/doris/statistics/AnalysisInfo.java | 17 ++++++++++++++++-
.../apache/doris/statistics/AnalysisInfoBuilder.java | 11 ++++++++++-
.../org/apache/doris/statistics/AnalysisManager.java | 18 +++++++++++++++++-
3 files changed, 43 insertions(+), 3 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
index 592fb4a99b..ee39582aac 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
@@ -37,6 +37,7 @@ import java.io.IOException;
import java.lang.reflect.Type;
import java.util.HashMap;
import java.util.HashSet;
+import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.StringJoiner;
@@ -77,9 +78,14 @@ public class AnalysisInfo implements Writable {
@SerializedName("jobId")
public final long jobId;
+ // When this AnalysisInfo represent a task, this is the task id for it.
@SerializedName("taskId")
public final long taskId;
+ // When this AnalysisInfo represent a job, this is the list of task ids
belong to this job.
+ @SerializedName("taskIds")
+ public final List<Long> taskIds;
+
@SerializedName("catalogName")
public final String catalogName;
@@ -153,7 +159,11 @@ public class AnalysisInfo implements Writable {
@SerializedName("samplingPartition")
public boolean samplingPartition;
- public AnalysisInfo(long jobId, long taskId, String catalogName, String
dbName, String tblName,
+ // For serialize
+ @SerializedName("cronExpr")
+ public String cronExprStr;
+
+ public AnalysisInfo(long jobId, long taskId, List<Long> taskIds, String
catalogName, String dbName, String tblName,
Map<String, Set<String>> colToPartitions, Set<String>
partitionNames, String colName, Long indexId,
JobType jobType, AnalysisMode analysisMode, AnalysisMethod
analysisMethod, AnalysisType analysisType,
int samplePercent, int sampleRows, int maxBucketNum, long
periodTimeInMs, String message,
@@ -161,6 +171,7 @@ public class AnalysisInfo implements Writable {
boolean isExternalTableLevelTask, boolean partitionOnly, boolean
samplingPartition) {
this.jobId = jobId;
this.taskId = taskId;
+ this.taskIds = taskIds;
this.catalogName = catalogName;
this.dbName = dbName;
this.tblName = tblName;
@@ -231,6 +242,10 @@ public class AnalysisInfo implements Writable {
return taskId == -1;
}
+ public void addTaskId(long taskId) {
+ taskIds.add(taskId);
+ }
+
// TODO: use thrift
public static AnalysisInfo fromResultRow(ResultRow resultRow) {
try {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java
index 7d8b99502c..2fd0e25d72 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java
@@ -23,12 +23,14 @@ import
org.apache.doris.statistics.AnalysisInfo.AnalysisType;
import org.apache.doris.statistics.AnalysisInfo.JobType;
import org.apache.doris.statistics.AnalysisInfo.ScheduleType;
+import java.util.List;
import java.util.Map;
import java.util.Set;
public class AnalysisInfoBuilder {
private long jobId;
private long taskId;
+ private List<Long> taskIds;
private String catalogName;
private String dbName;
private String tblName;
@@ -59,6 +61,7 @@ public class AnalysisInfoBuilder {
public AnalysisInfoBuilder(AnalysisInfo info) {
jobId = info.jobId;
taskId = info.taskId;
+ taskIds = info.taskIds;
catalogName = info.catalogName;
dbName = info.dbName;
tblName = info.tblName;
@@ -94,6 +97,11 @@ public class AnalysisInfoBuilder {
return this;
}
+ public AnalysisInfoBuilder setTaskIds(List<Long> taskIds) {
+ this.taskIds = taskIds;
+ return this;
+ }
+
public AnalysisInfoBuilder setCatalogName(String catalogName) {
this.catalogName = catalogName;
return this;
@@ -210,7 +218,7 @@ public class AnalysisInfoBuilder {
}
public AnalysisInfo build() {
- return new AnalysisInfo(jobId, taskId, catalogName, dbName, tblName,
colToPartitions, partitionNames,
+ return new AnalysisInfo(jobId, taskId, taskIds, catalogName, dbName,
tblName, colToPartitions, partitionNames,
colName, indexId, jobType, analysisMode, analysisMethod,
analysisType, samplePercent,
sampleRows, maxBucketNum, periodTimeInMs, message,
lastExecTimeInMs, timeCostInMs, state, scheduleType,
externalTableLevelTask, partitionOnly, samplingPartition);
@@ -220,6 +228,7 @@ public class AnalysisInfoBuilder {
return new AnalysisInfoBuilder()
.setJobId(jobId)
.setTaskId(taskId)
+ .setTaskIds(taskIds)
.setCatalogName(catalogName)
.setDbName(dbName)
.setTblName(tblName)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
index a7e545f98c..e549bd6b0b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
@@ -58,6 +58,7 @@ import org.apache.doris.statistics.AnalysisInfo.ScheduleType;
import org.apache.doris.statistics.util.StatisticsUtil;
import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import org.apache.logging.log4j.LogManager;
@@ -439,6 +440,7 @@ public class AnalysisManager extends Daemon implements
Writable {
Map<String, Set<String>> colToPartitions =
validateAndGetPartitions(table, columnNames,
partitionNames, analysisType, analysisMode);
taskInfoBuilder.setColToPartitions(colToPartitions);
+ taskInfoBuilder.setTaskIds(Lists.newArrayList());
return taskInfoBuilder.build();
}
@@ -511,6 +513,7 @@ public class AnalysisManager extends Daemon implements
Writable {
AnalysisInfoBuilder indexTaskInfoBuilder = new
AnalysisInfoBuilder(jobInfo);
AnalysisInfo analysisInfo =
indexTaskInfoBuilder.setIndexId(indexId)
.setTaskId(taskId).setLastExecTimeInMs(System.currentTimeMillis()).build();
+ jobInfo.addTaskId(taskId);
if (isSync) {
return;
}
@@ -537,6 +540,7 @@ public class AnalysisManager extends Daemon implements
Writable {
AnalysisInfo analysisInfo =
colTaskInfoBuilder.setColName(colName).setIndexId(indexId)
.setTaskId(taskId).setLastExecTimeInMs(System.currentTimeMillis()).build();
analysisTasks.put(taskId, createTask(analysisInfo));
+ jobInfo.addTaskId(taskId);
if (isSync) {
continue;
}
@@ -580,6 +584,7 @@ public class AnalysisManager extends Daemon implements
Writable {
AnalysisInfo analysisInfo =
colTaskInfoBuilder.setIndexId(-1L).setLastExecTimeInMs(System.currentTimeMillis())
.setTaskId(taskId).setColName("TableRowCount").setExternalTableLevelTask(true).build();
analysisTasks.put(taskId, createTask(analysisInfo));
+ jobInfo.addTaskId(taskId);
if (isSync) {
// For sync job, don't need to persist, return here and execute it
immediately.
return;
@@ -708,7 +713,10 @@ public class AnalysisManager extends Daemon implements
Writable {
}
public String getJobProgress(long jobId) {
- List<AnalysisInfo> tasks = findTasks(jobId);
+ List<AnalysisInfo> tasks = findTasksByTaskIds(jobId);
+ if (tasks == null) {
+ return "N/A";
+ }
int finished = 0;
int failed = 0;
int inProgress = 0;
@@ -921,6 +929,14 @@ public class AnalysisManager extends Daemon implements
Writable {
}
}
+ public List<AnalysisInfo> findTasksByTaskIds(long jobId) {
+ AnalysisInfo jobInfo = analysisJobInfoMap.get(jobId);
+ if (jobInfo != null && jobInfo.taskIds != null) {
+ return jobInfo.taskIds.stream().map(id ->
analysisTaskInfoMap.get(id)).collect(Collectors.toList());
+ }
+ return null;
+ }
+
public void removeAll(List<AnalysisInfo> analysisInfos) {
for (AnalysisInfo analysisInfo : analysisInfos) {
analysisTaskInfoMap.remove(analysisInfo.taskId);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]