This is an automated email from the ASF dual-hosted git repository. shaofengshi pushed a commit to branch 2.5.x in repository https://gitbox.apache.org/repos/asf/kylin.git
commit d90bb046d191dd06f194e7380da13698772e001f Author: chao long <wayn...@qq.com> AuthorDate: Wed Sep 5 09:55:44 2018 +0800 KYLIN-3534 Don't compress fact distinct output file --- .../main/java/org/apache/kylin/engine/spark/SparkFactDistinct.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkFactDistinct.java b/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkFactDistinct.java index 77ebd69..213cdfd 100644 --- a/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkFactDistinct.java +++ b/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkFactDistinct.java @@ -198,19 +198,22 @@ public class SparkFactDistinct extends AbstractApplication implements Serializab MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_PARTITION, TextOutputFormat.class, NullWritable.class, LongWritable.class); FileOutputFormat.setOutputPath(job, new Path(outputPath)); + FileOutputFormat.setCompressOutput(job, false); // prevent to create zero-sized default output LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class); + MultipleOutputsRDD multipleOutputsRDD = MultipleOutputsRDD.rddToMultipleOutputsRDD(outputRDD); multipleOutputsRDD.saveAsNewAPIHadoopDatasetWithMultipleOutputs(job.getConfiguration()); - logger.info("Map input records={}", recordRDD.count()); + long recordCount = recordRDD.count(); + logger.info("Map input records={}", recordCount); logger.info("HDFS Read: {} HDFS Write", bytesWritten.value()); Map<String, String> counterMap = Maps.newHashMap(); - counterMap.put(ExecutableConstants.SOURCE_RECORDS_COUNT, String.valueOf(recordRDD.count())); + counterMap.put(ExecutableConstants.SOURCE_RECORDS_COUNT, String.valueOf(recordCount)); counterMap.put(ExecutableConstants.SOURCE_RECORDS_SIZE, String.valueOf(bytesWritten.value())); // save counter to hdfs