This is an automated email from the ASF dual-hosted git repository. zivanfi pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/parquet-mr.git
The following commit(s) were added to refs/heads/master by this push: new 97a880c Experiment. 97a880c is described below commit 97a880cfc4fc3c2c74ff1302bc6e4aab1582b6df Author: Zoltan Ivanfi <z...@cloudera.com> AuthorDate: Fri Oct 26 15:08:18 2018 +0200 Experiment. --- parquet-benchmarks/run.sh | 4 ++- .../apache/parquet/benchmarks/WriteBenchmarks.java | 16 +++++----- ...enchmarks.java => WriteBenchmarksParquet1.java} | 36 +++++++++++----------- .../hadoop/InternalParquetRecordWriter.java | 2 +- 4 files changed, 30 insertions(+), 28 deletions(-) diff --git a/parquet-benchmarks/run.sh b/parquet-benchmarks/run.sh index cfaddae..dfb0297 100755 --- a/parquet-benchmarks/run.sh +++ b/parquet-benchmarks/run.sh @@ -22,7 +22,9 @@ SCRIPT_PATH=$( cd "$(dirname "$0")" ; pwd -P ) echo "Starting WRITE benchmarks" -java -jar ${SCRIPT_PATH}/target/parquet-benchmarks.jar p*Write* "$@" +java -XX:+PreserveFramePointer -jar ${SCRIPT_PATH}/target/parquet-benchmarks.jar p*Write* -wi 0 -i 1 -f 3 -tu s -bm ss -rf json +exit 0 + echo "Generating test data" java -cp ${SCRIPT_PATH}/target/parquet-benchmarks.jar org.apache.parquet.benchmarks.DataGenerator generate echo "Data generated, starting READ benchmarks" diff --git a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java index 5c26a84..265c5ec 100644 --- a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java +++ b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java @@ -55,7 +55,7 @@ public class WriteBenchmarks { PAGE_SIZE_DEFAULT, FIXED_LEN_BYTEARRAY_SIZE, UNCOMPRESSED, - ONE_MILLION); + 50 * ONE_MILLION); } @Benchmark @@ -69,7 +69,7 @@ public class WriteBenchmarks { PAGE_SIZE_4M, FIXED_LEN_BYTEARRAY_SIZE, UNCOMPRESSED, - ONE_MILLION); + 50 * ONE_MILLION); } @Benchmark @@ -83,7 +83,7 @@ public class WriteBenchmarks { PAGE_SIZE_8M, FIXED_LEN_BYTEARRAY_SIZE, UNCOMPRESSED, - ONE_MILLION); + 50 * ONE_MILLION); } @Benchmark @@ -97,7 +97,7 @@ public class WriteBenchmarks { PAGE_SIZE_4M, FIXED_LEN_BYTEARRAY_SIZE, UNCOMPRESSED, - ONE_MILLION); + 50 * ONE_MILLION); } @Benchmark @@ -111,7 +111,7 @@ public class WriteBenchmarks { PAGE_SIZE_8M, FIXED_LEN_BYTEARRAY_SIZE, UNCOMPRESSED, - ONE_MILLION); + 50 * ONE_MILLION); } //TODO how to handle lzo jar? @@ -126,7 +126,7 @@ public class WriteBenchmarks { // PAGE_SIZE_DEFAULT, // FIXED_LEN_BYTEARRAY_SIZE, // LZO, -// ONE_MILLION); +// 50 * ONE_MILLION); // } @Benchmark @@ -140,7 +140,7 @@ public class WriteBenchmarks { PAGE_SIZE_DEFAULT, FIXED_LEN_BYTEARRAY_SIZE, SNAPPY, - ONE_MILLION); + 50 * ONE_MILLION); } @Benchmark @@ -154,6 +154,6 @@ public class WriteBenchmarks { PAGE_SIZE_DEFAULT, FIXED_LEN_BYTEARRAY_SIZE, GZIP, - ONE_MILLION); + 50 * ONE_MILLION); } } diff --git a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarksParquet1.java similarity index 86% copy from parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java copy to parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarksParquet1.java index 5c26a84..edd87ba 100644 --- a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java +++ b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarksParquet1.java @@ -29,13 +29,13 @@ import static org.apache.parquet.benchmarks.BenchmarkFiles.*; import java.io.IOException; -import static org.apache.parquet.column.ParquetProperties.WriterVersion.PARQUET_2_0; +import static org.apache.parquet.column.ParquetProperties.WriterVersion.PARQUET_1_0; import static org.apache.parquet.hadoop.metadata.CompressionCodecName.GZIP; import static org.apache.parquet.hadoop.metadata.CompressionCodecName.SNAPPY; import static org.apache.parquet.hadoop.metadata.CompressionCodecName.UNCOMPRESSED; @State(Thread) -public class WriteBenchmarks { +public class WriteBenchmarksParquet1 { private DataGenerator dataGenerator = new DataGenerator(); @Setup(Level.Iteration) @@ -50,12 +50,12 @@ public class WriteBenchmarks { { dataGenerator.generateData(file_1M, configuration, - PARQUET_2_0, + PARQUET_1_0, BLOCK_SIZE_DEFAULT, PAGE_SIZE_DEFAULT, FIXED_LEN_BYTEARRAY_SIZE, UNCOMPRESSED, - ONE_MILLION); + 50 * ONE_MILLION); } @Benchmark @@ -64,12 +64,12 @@ public class WriteBenchmarks { { dataGenerator.generateData(file_1M_BS256M_PS4M, configuration, - PARQUET_2_0, + PARQUET_1_0, BLOCK_SIZE_256M, PAGE_SIZE_4M, FIXED_LEN_BYTEARRAY_SIZE, UNCOMPRESSED, - ONE_MILLION); + 50 * ONE_MILLION); } @Benchmark @@ -78,12 +78,12 @@ public class WriteBenchmarks { { dataGenerator.generateData(file_1M_BS256M_PS8M, configuration, - PARQUET_2_0, + PARQUET_1_0, BLOCK_SIZE_256M, PAGE_SIZE_8M, FIXED_LEN_BYTEARRAY_SIZE, UNCOMPRESSED, - ONE_MILLION); + 50 * ONE_MILLION); } @Benchmark @@ -92,12 +92,12 @@ public class WriteBenchmarks { { dataGenerator.generateData(file_1M_BS512M_PS4M, configuration, - PARQUET_2_0, + PARQUET_1_0, BLOCK_SIZE_512M, PAGE_SIZE_4M, FIXED_LEN_BYTEARRAY_SIZE, UNCOMPRESSED, - ONE_MILLION); + 50 * ONE_MILLION); } @Benchmark @@ -106,12 +106,12 @@ public class WriteBenchmarks { { dataGenerator.generateData(file_1M_BS512M_PS8M, configuration, - PARQUET_2_0, + PARQUET_1_0, BLOCK_SIZE_512M, PAGE_SIZE_8M, FIXED_LEN_BYTEARRAY_SIZE, UNCOMPRESSED, - ONE_MILLION); + 50 * ONE_MILLION); } //TODO how to handle lzo jar? @@ -121,12 +121,12 @@ public class WriteBenchmarks { // { // dataGenerator.generateData(parquetFile_1M_LZO, // configuration, -// WriterVersion.PARQUET_2_0, +// WriterVersion.PARQUET_1_0, // BLOCK_SIZE_DEFAULT, // PAGE_SIZE_DEFAULT, // FIXED_LEN_BYTEARRAY_SIZE, // LZO, -// ONE_MILLION); +// 50 * ONE_MILLION); // } @Benchmark @@ -135,12 +135,12 @@ public class WriteBenchmarks { { dataGenerator.generateData(file_1M_SNAPPY, configuration, - PARQUET_2_0, + PARQUET_1_0, BLOCK_SIZE_DEFAULT, PAGE_SIZE_DEFAULT, FIXED_LEN_BYTEARRAY_SIZE, SNAPPY, - ONE_MILLION); + 50 * ONE_MILLION); } @Benchmark @@ -149,11 +149,11 @@ public class WriteBenchmarks { { dataGenerator.generateData(file_1M_GZIP, configuration, - PARQUET_2_0, + PARQUET_1_0, BLOCK_SIZE_DEFAULT, PAGE_SIZE_DEFAULT, FIXED_LEN_BYTEARRAY_SIZE, GZIP, - ONE_MILLION); + 50 * ONE_MILLION); } } diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java index d8af379..8b79dca 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java @@ -43,7 +43,7 @@ class InternalParquetRecordWriter<T> { private static final Logger LOG = LoggerFactory.getLogger(InternalParquetRecordWriter.class); private static final int MINIMUM_RECORD_COUNT_FOR_CHECK = 100; - private static final int MAXIMUM_RECORD_COUNT_FOR_CHECK = 10000; + private static final int MAXIMUM_RECORD_COUNT_FOR_CHECK = 100; private final ParquetFileWriter parquetFileWriter; private final WriteSupport<T> writeSupport;