This is an automated email from the ASF dual-hosted git repository. shaofengshi pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kylin.git
The following commit(s) were added to refs/heads/master by this push: new 6544143 KYLIN-3644 Fix SparkFactDistinct step NumberFormatException 6544143 is described below commit 65441434e0aff410050009b4538ef3bf9e7af93f Author: chao long <wayn...@qq.com> AuthorDate: Thu Nov 1 16:43:11 2018 +0800 KYLIN-3644 Fix SparkFactDistinct step NumberFormatException --- .../src/main/java/org/apache/kylin/cube/util/KeyValueBuilder.java | 2 +- .../main/java/org/apache/kylin/engine/spark/SparkFactDistinct.java | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/core-cube/src/main/java/org/apache/kylin/cube/util/KeyValueBuilder.java b/core-cube/src/main/java/org/apache/kylin/cube/util/KeyValueBuilder.java index 0ba4fd8..0636a5c 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/util/KeyValueBuilder.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/util/KeyValueBuilder.java @@ -56,7 +56,7 @@ public class KeyValueBuilder implements Serializable { } } - protected boolean isNull(String v) { + public boolean isNull(String v) { return nullStrs.contains(v); } diff --git a/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkFactDistinct.java b/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkFactDistinct.java index 5cfd2d7..cdd0ac2 100644 --- a/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkFactDistinct.java +++ b/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkFactDistinct.java @@ -63,6 +63,7 @@ import org.apache.kylin.cube.DimensionRangeInfo; import org.apache.kylin.cube.cuboid.CuboidUtil; import org.apache.kylin.cube.model.CubeDesc; import org.apache.kylin.cube.model.CubeJoinedFlatTableEnrich; +import org.apache.kylin.cube.util.KeyValueBuilder; import org.apache.kylin.dict.DictionaryGenerator; import org.apache.kylin.dict.IDictionaryBuilder; import org.apache.kylin.engine.EngineFactory; @@ -250,6 +251,7 @@ public class SparkFactDistinct extends AbstractApplication implements Serializab private Map<Integer, DimensionRangeInfo> dimensionRangeInfoMap; private transient ByteBuffer tmpbuf; private LongAccumulator bytesWritten; + private KeyValueBuilder keyValueBuilder; public FlatOutputFucntion(String cubeName, String segmentId, String metaurl, SerializableConfiguration conf, int samplingPercent, LongAccumulator bytesWritten) { @@ -272,6 +274,7 @@ public class SparkFactDistinct extends AbstractApplication implements Serializab CubeJoinedFlatTableEnrich intermediateTableDesc = new CubeJoinedFlatTableEnrich( EngineFactory.getJoinedFlatTableDesc(cubeSegment), cubeDesc); + keyValueBuilder = new KeyValueBuilder(intermediateTableDesc); reducerMapping = new FactDistinctColumnsReducerMapping(cubeInstance); tmpbuf = ByteBuffer.allocate(4096); @@ -317,7 +320,7 @@ public class SparkFactDistinct extends AbstractApplication implements Serializab for (int i = 0; i < allCols.size(); i++) { String fieldValue = row[columnIndex[i]]; - if (fieldValue == null) + if (fieldValue == null || keyValueBuilder.isNull(fieldValue)) continue; final DataType type = allCols.get(i).getType();