This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-1.2-lts by this push:
new badbec6a15 [fix](spark load) not setting the file format cause null
pointer exception (#16202)
badbec6a15 is described below
commit badbec6a15bfab0d2b76c5d6f6a6856bf61209c6
Author: gnehil <[email protected]>
AuthorDate: Tue Sep 5 12:14:07 2023 +0800
[fix](spark load) not setting the file format cause null pointer exception
(#16202)
---
.../src/main/java/org/apache/doris/statistics/MetaStatisticsTask.java | 2 +-
.../main/java/org/apache/doris/statistics/StatisticsJobScheduler.java | 2 +-
.../src/main/java/org/apache/doris/load/loadv2/dpp/SparkDpp.java | 4 ++--
3 files changed, 4 insertions(+), 4 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/MetaStatisticsTask.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/MetaStatisticsTask.java
index b900085ca7..952ea5fa06 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/MetaStatisticsTask.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/MetaStatisticsTask.java
@@ -115,7 +115,7 @@ public class MetaStatisticsTask extends StatisticsTask {
break;
case PARTITION:
Partition partition = getNotNullPartition(granularity, table);
- long partitionSize = partition.getBaseIndex().getDataSize();
+ long partitionSize =
partition.getBaseIndex().getDataSize(false);
result.getStatsTypeToValue().put(StatsType.DATA_SIZE,
String.valueOf(partitionSize));
break;
case TABLET:
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobScheduler.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobScheduler.java
index 4e492d6e30..87e8c09678 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobScheduler.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobScheduler.java
@@ -330,7 +330,7 @@ public class StatisticsJobScheduler extends MasterDaemon {
LOG.info("Partition {} not found in the table {}",
partitionName, table.getName());
continue;
}
- if (partition.getDataSize() == 0) {
+ if (partition.getDataSize(false) == 0) {
LOG.info("Do not collect statistics for empty partition {} in
the table {}",
partitionName, table.getName());
continue;
diff --git
a/fe/spark-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/SparkDpp.java
b/fe/spark-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/SparkDpp.java
index e6c9bf5528..33ca13cb0e 100644
--- a/fe/spark-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/SparkDpp.java
+++ b/fe/spark-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/SparkDpp.java
@@ -627,7 +627,7 @@ public final class SparkDpp implements java.io.Serializable
{
srcColumnsWithColumnsFromPath.addAll(fileGroup.columnsFromPath);
}
- if (fileGroup.fileFormat.equalsIgnoreCase("parquet")) {
+ if ("parquet".equalsIgnoreCase(fileGroup.fileFormat)) {
// parquet had its own schema, just use it; perhaps we could add
some validation in future.
Dataset<Row> dataFrame = spark.read().parquet(fileUrl);
if (!CollectionUtils.isEmpty(columnValueFromPath)) {
@@ -639,7 +639,7 @@ public final class SparkDpp implements java.io.Serializable
{
return dataFrame;
}
- if (fileGroup.fileFormat.equalsIgnoreCase("orc")) {
+ if ("orc".equalsIgnoreCase(fileGroup.fileFormat)) {
Dataset<Row> dataFrame = spark.read().orc(fileUrl);
if (!CollectionUtils.isEmpty(columnValueFromPath)) {
for (int k = 0; k < columnValueFromPath.size(); k++) {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]