[doris] branch branch-1.2-lts updated: [fix](spark load) not setting the file format cause null pointer exception (#16202)

morningman Mon, 11 Sep 2023 08:14:22 -0700

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git



The following commit(s) were added to refs/heads/branch-1.2-lts by this push:
     new badbec6a15 [fix](spark load) not setting the file format cause null 
pointer exception (#16202)
badbec6a15 is described below

commit badbec6a15bfab0d2b76c5d6f6a6856bf61209c6
Author: gnehil <[email protected]>
AuthorDate: Tue Sep 5 12:14:07 2023 +0800

    [fix](spark load) not setting the file format cause null pointer exception 
(#16202)
---
 .../src/main/java/org/apache/doris/statistics/MetaStatisticsTask.java | 2 +-
 .../main/java/org/apache/doris/statistics/StatisticsJobScheduler.java | 2 +-
 .../src/main/java/org/apache/doris/load/loadv2/dpp/SparkDpp.java      | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/MetaStatisticsTask.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/MetaStatisticsTask.java
index b900085ca7..952ea5fa06 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/MetaStatisticsTask.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/MetaStatisticsTask.java
@@ -115,7 +115,7 @@ public class MetaStatisticsTask extends StatisticsTask {
                 break;
             case PARTITION:
                 Partition partition = getNotNullPartition(granularity, table);
-                long partitionSize = partition.getBaseIndex().getDataSize();
+                long partitionSize = 
partition.getBaseIndex().getDataSize(false);
                 result.getStatsTypeToValue().put(StatsType.DATA_SIZE, 
String.valueOf(partitionSize));
                 break;
             case TABLET:
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobScheduler.java
 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobScheduler.java
index 4e492d6e30..87e8c09678 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobScheduler.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobScheduler.java
@@ -330,7 +330,7 @@ public class StatisticsJobScheduler extends MasterDaemon {
                 LOG.info("Partition {} not found in the table {}", 
partitionName, table.getName());
                 continue;
             }
-            if (partition.getDataSize() == 0) {
+            if (partition.getDataSize(false) == 0) {
                 LOG.info("Do not collect statistics for empty partition {} in 
the table {}",
                         partitionName, table.getName());
                 continue;
diff --git 
a/fe/spark-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/SparkDpp.java 
b/fe/spark-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/SparkDpp.java
index e6c9bf5528..33ca13cb0e 100644
--- a/fe/spark-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/SparkDpp.java
+++ b/fe/spark-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/SparkDpp.java
@@ -627,7 +627,7 @@ public final class SparkDpp implements java.io.Serializable 
{
             srcColumnsWithColumnsFromPath.addAll(fileGroup.columnsFromPath);
         }
 
-        if (fileGroup.fileFormat.equalsIgnoreCase("parquet")) {
+        if ("parquet".equalsIgnoreCase(fileGroup.fileFormat)) {
             // parquet had its own schema, just use it; perhaps we could add 
some validation in future.
             Dataset<Row> dataFrame = spark.read().parquet(fileUrl);
             if (!CollectionUtils.isEmpty(columnValueFromPath)) {
@@ -639,7 +639,7 @@ public final class SparkDpp implements java.io.Serializable 
{
             return dataFrame;
         }
 
-        if (fileGroup.fileFormat.equalsIgnoreCase("orc")) {
+        if ("orc".equalsIgnoreCase(fileGroup.fileFormat)) {
             Dataset<Row> dataFrame = spark.read().orc(fileUrl);
             if (!CollectionUtils.isEmpty(columnValueFromPath)) {
                 for (int k = 0; k < columnValueFromPath.size(); k++) {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[doris] branch branch-1.2-lts updated: [fix](spark load) not setting the file format cause null pointer exception (#16202)

Reply via email to