This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 50b078eff0a [opt](paimon) add split num and partition num #32597
(#32621)
50b078eff0a is described below
commit 50b078eff0ac7808a43c4cf8d73df4dae4a597fa
Author: Mingyu Chen <[email protected]>
AuthorDate: Thu Mar 21 19:49:58 2024 +0800
[opt](paimon) add split num and partition num #32597 (#32621)
---
.../functions/table/TableValuedFunction.java | 4 +++-
.../planner/external/paimon/PaimonScanNode.java | 26 +++++++++++++++++++---
2 files changed, 26 insertions(+), 4 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/table/TableValuedFunction.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/table/TableValuedFunction.java
index c0969362ed6..e3e25481691 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/table/TableValuedFunction.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/table/TableValuedFunction.java
@@ -53,7 +53,9 @@ public abstract class TableValuedFunction extends
BoundFunction implements Unary
} catch (AnalysisException e) {
throw e;
} catch (Throwable t) {
- throw new AnalysisException("Can not build FunctionGenTable by " +
this + ": " + t.getMessage(), t);
+ // Do not print the whole stmt, it is too long and may contain
sensitive information
+ throw new AnalysisException(
+ "Can not build FunctionGenTable '" + this.getName() + "'.
error: " + t.getMessage(), t);
}
});
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/paimon/PaimonScanNode.java
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/paimon/PaimonScanNode.java
index 07de5aeda79..37c3660d068 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/paimon/PaimonScanNode.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/paimon/PaimonScanNode.java
@@ -34,6 +34,7 @@ import org.apache.doris.planner.external.FileQueryScanNode;
import org.apache.doris.qe.ConnectContext;
import org.apache.doris.spi.Split;
import org.apache.doris.statistics.StatisticalType;
+import org.apache.doris.thrift.TExplainLevel;
import org.apache.doris.thrift.TFileAttributes;
import org.apache.doris.thrift.TFileFormatType;
import org.apache.doris.thrift.TFileRangeDesc;
@@ -43,9 +44,11 @@ import org.apache.doris.thrift.TScanRangeLocations;
import org.apache.doris.thrift.TTableFormatFileDesc;
import avro.shaded.com.google.common.base.Preconditions;
+import com.google.common.collect.Sets;
import org.apache.hadoop.fs.Path;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
+import org.apache.paimon.data.BinaryRow;
import org.apache.paimon.predicate.Predicate;
import org.apache.paimon.table.AbstractFileStoreTable;
import org.apache.paimon.table.source.DataSplit;
@@ -67,6 +70,8 @@ public class PaimonScanNode extends FileQueryScanNode {
private static final Logger LOG =
LogManager.getLogger(PaimonScanNode.class);
private PaimonSource source = null;
private List<Predicate> predicates;
+ private int rawFileSplitNum = 0;
+ private int paimonSplitNum = 0;
public PaimonScanNode(PlanNodeId id, TupleDescriptor desc, boolean
needCheckColumnPriv) {
super(id, desc, "PAIMON_SCAN_NODE", StatisticalType.PAIMON_SCAN_NODE,
needCheckColumnPriv);
@@ -145,12 +150,16 @@ public class PaimonScanNode extends FileQueryScanNode {
.withProjection(projected)
.newScan().plan().splits();
boolean supportNative = supportNativeReader();
+ // Just for counting the number of selected partitions for this paimon
table
+ Set<BinaryRow> selectedPartitionValues = Sets.newHashSet();
for (org.apache.paimon.table.source.Split split : paimonSplits) {
if (!forceJniScanner && supportNative && split instanceof
DataSplit) {
DataSplit dataSplit = (DataSplit) split;
- Optional<List<RawFile>> optRowFiles =
dataSplit.convertToRawFiles();
- if (optRowFiles.isPresent()) {
- List<RawFile> rawFiles = optRowFiles.get();
+ BinaryRow partitionValue = dataSplit.partition();
+ selectedPartitionValues.add(partitionValue);
+ Optional<List<RawFile>> optRawFiles =
dataSplit.convertToRawFiles();
+ if (optRawFiles.isPresent()) {
+ List<RawFile> rawFiles = optRawFiles.get();
for (RawFile file : rawFiles) {
LocationPath locationPath = new
LocationPath(file.path(), source.getCatalog().getProperties());
Path finalDataFilePath =
locationPath.toScanRangeLocation();
@@ -165,17 +174,22 @@ public class PaimonScanNode extends FileQueryScanNode {
true,
null,
PaimonSplit.PaimonSplitCreator.DEFAULT));
+ ++rawFileSplitNum;
} catch (IOException e) {
throw new UserException("Paimon error to split
file: " + e.getMessage(), e);
}
}
} else {
splits.add(new PaimonSplit(split));
+ ++paimonSplitNum;
}
} else {
splits.add(new PaimonSplit(split));
+ ++paimonSplitNum;
}
}
+ this.readPartitionNum = selectedPartitionValues.size();
+ // TODO: get total partition number
return splits;
}
@@ -250,4 +264,10 @@ public class PaimonScanNode extends FileQueryScanNode {
return map;
}
+ @Override
+ public String getNodeExplainString(String prefix, TExplainLevel
detailLevel) {
+ return super.getNodeExplainString(prefix, detailLevel)
+ + String.format("%spaimonNativeReadSplits=%d/%d\n",
+ prefix, rawFileSplitNum, (paimonSplitNum + rawFileSplitNum));
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]