Repository: parquet-mr Updated Branches: refs/heads/master 4d996d1ba -> c6764c4a0
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/c6764c4a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java ---------------------------------------------------------------------- diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java index 4df45dd..ee92d46 100644 --- a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java +++ b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java @@ -22,7 +22,9 @@ import static java.util.Collections.emptyList; import static org.apache.parquet.format.converter.ParquetMetadataConverter.filterFileMetaDataByStart; import static org.apache.parquet.schema.MessageTypeParser.parseMessageType; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertSame; +import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import static org.apache.parquet.format.CompressionCodec.UNCOMPRESSED; import static org.apache.parquet.format.Type.INT32; @@ -34,6 +36,8 @@ import static org.apache.parquet.format.converter.ParquetMetadataConverter.getOf import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.math.BigInteger; +import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -48,6 +52,7 @@ import com.google.common.collect.Sets; import org.apache.hadoop.conf.Configuration; import org.apache.parquet.Version; import org.apache.parquet.bytes.BytesUtils; +import org.apache.parquet.column.ColumnDescriptor; import org.apache.parquet.column.statistics.BinaryStatistics; import org.apache.parquet.column.statistics.BooleanStatistics; import org.apache.parquet.column.statistics.DoubleStatistics; @@ -61,9 +66,9 @@ import org.apache.parquet.hadoop.metadata.ColumnPath; import org.apache.parquet.hadoop.metadata.CompressionCodecName; import org.apache.parquet.hadoop.metadata.ParquetMetadata; import org.apache.parquet.io.api.Binary; +import org.apache.parquet.schema.PrimitiveType; import org.junit.Assert; import org.junit.Test; - import org.apache.parquet.example.Paper; import org.apache.parquet.format.ColumnChunk; import org.apache.parquet.format.ColumnMetaData; @@ -75,6 +80,7 @@ import org.apache.parquet.format.PageType; import org.apache.parquet.format.RowGroup; import org.apache.parquet.format.SchemaElement; import org.apache.parquet.format.Type; +import org.apache.parquet.schema.ColumnOrder; import org.apache.parquet.schema.MessageType; import org.apache.parquet.schema.OriginalType; import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName; @@ -101,7 +107,7 @@ public class TestParquetMetadataConverter { public void testSchemaConverter() { ParquetMetadataConverter parquetMetadataConverter = new ParquetMetadataConverter(); List<SchemaElement> parquetSchema = parquetMetadataConverter.toParquetSchema(Paper.schema); - MessageType schema = parquetMetadataConverter.fromParquetSchema(parquetSchema); + MessageType schema = parquetMetadataConverter.fromParquetSchema(parquetSchema, null); assertEquals(Paper.schema, schema); } @@ -370,7 +376,16 @@ public class TestParquetMetadataConverter { } @Test - public void testBinaryStats() { + public void testBinaryStatsV1() { + testBinaryStats(StatsHelper.V1); + } + + @Test + public void testBinaryStatsV2() { + testBinaryStats(StatsHelper.V2); + } + + private void testBinaryStats(StatsHelper helper) { // make fake stats and verify the size check BinaryStatistics stats = new BinaryStatistics(); stats.incrementNumNulls(3004); @@ -384,33 +399,47 @@ public class TestParquetMetadataConverter { Assert.assertTrue("Should be smaller than min + max size + 1", stats.isSmallerThan(totalLen + 1)); - org.apache.parquet.format.Statistics formatStats = - ParquetMetadataConverter.toParquetStatistics(stats); + org.apache.parquet.format.Statistics formatStats = helper.toParquetStatistics(stats); - Assert.assertArrayEquals("Min should match", min, formatStats.getMin()); - Assert.assertArrayEquals("Max should match", max, formatStats.getMax()); + assertFalse("Min should not be set", formatStats.isSetMin()); + assertFalse("Max should not be set", formatStats.isSetMax()); + if (helper == StatsHelper.V2) { + Assert.assertArrayEquals("Min_value should match", min, formatStats.getMin_value()); + Assert.assertArrayEquals("Max_value should match", max, formatStats.getMax_value()); + } Assert.assertEquals("Num nulls should match", 3004, formatStats.getNull_count()); // convert to empty stats because the values are too large stats.setMinMaxFromBytes(max, max); - formatStats = ParquetMetadataConverter.toParquetStatistics(stats); + formatStats = helper.toParquetStatistics(stats); Assert.assertFalse("Min should not be set", formatStats.isSetMin()); Assert.assertFalse("Max should not be set", formatStats.isSetMax()); + Assert.assertFalse("Min_value should not be set", formatStats.isSetMin_value()); + Assert.assertFalse("Max_value should not be set", formatStats.isSetMax_value()); Assert.assertFalse("Num nulls should not be set", formatStats.isSetNull_count()); Statistics roundTripStats = ParquetMetadataConverter.fromParquetStatisticsInternal( - Version.FULL_VERSION, formatStats, PrimitiveTypeName.BINARY, + Version.FULL_VERSION, formatStats, new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.BINARY, ""), ParquetMetadataConverter.SortOrder.SIGNED); Assert.assertTrue(roundTripStats.isEmpty()); } @Test - public void testIntegerStats() { + public void testIntegerStatsV1() { + testIntegerStats(StatsHelper.V1); + } + + @Test + public void testIntegerStatsV2() { + testIntegerStats(StatsHelper.V2); + } + + private void testIntegerStats(StatsHelper helper) { // make fake stats and verify the size check IntStatistics stats = new IntStatistics(); stats.incrementNumNulls(3004); @@ -419,8 +448,7 @@ public class TestParquetMetadataConverter { stats.updateStats(min); stats.updateStats(max); - org.apache.parquet.format.Statistics formatStats = - ParquetMetadataConverter.toParquetStatistics(stats); + org.apache.parquet.format.Statistics formatStats = helper.toParquetStatistics(stats); Assert.assertEquals("Min should match", min, BytesUtils.bytesToInt(formatStats.getMin())); @@ -431,7 +459,16 @@ public class TestParquetMetadataConverter { } @Test - public void testLongStats() { + public void testLongStatsV1() { + testLongStats(StatsHelper.V1); + } + + @Test + public void testLongStatsV2() { + testLongStats(StatsHelper.V2); + } + + private void testLongStats(StatsHelper helper) { // make fake stats and verify the size check LongStatistics stats = new LongStatistics(); stats.incrementNumNulls(3004); @@ -440,8 +477,7 @@ public class TestParquetMetadataConverter { stats.updateStats(min); stats.updateStats(max); - org.apache.parquet.format.Statistics formatStats = - ParquetMetadataConverter.toParquetStatistics(stats); + org.apache.parquet.format.Statistics formatStats = helper.toParquetStatistics(stats); Assert.assertEquals("Min should match", min, BytesUtils.bytesToLong(formatStats.getMin())); @@ -452,7 +488,16 @@ public class TestParquetMetadataConverter { } @Test - public void testFloatStats() { + public void testFloatStatsV1() { + testFloatStats(StatsHelper.V1); + } + + @Test + public void testFloatStatsV2() { + testFloatStats(StatsHelper.V2); + } + + private void testFloatStats(StatsHelper helper) { // make fake stats and verify the size check FloatStatistics stats = new FloatStatistics(); stats.incrementNumNulls(3004); @@ -461,8 +506,7 @@ public class TestParquetMetadataConverter { stats.updateStats(min); stats.updateStats(max); - org.apache.parquet.format.Statistics formatStats = - ParquetMetadataConverter.toParquetStatistics(stats); + org.apache.parquet.format.Statistics formatStats = helper.toParquetStatistics(stats); Assert.assertEquals("Min should match", min, Float.intBitsToFloat(BytesUtils.bytesToInt(formatStats.getMin())), @@ -475,7 +519,16 @@ public class TestParquetMetadataConverter { } @Test - public void testDoubleStats() { + public void testDoubleStatsV1() { + testDoubleStats(StatsHelper.V1); + } + + @Test + public void testDoubleStatsV2() { + testDoubleStats(StatsHelper.V2); + } + + private void testDoubleStats(StatsHelper helper) { // make fake stats and verify the size check DoubleStatistics stats = new DoubleStatistics(); stats.incrementNumNulls(3004); @@ -484,8 +537,7 @@ public class TestParquetMetadataConverter { stats.updateStats(min); stats.updateStats(max); - org.apache.parquet.format.Statistics formatStats = - ParquetMetadataConverter.toParquetStatistics(stats); + org.apache.parquet.format.Statistics formatStats = helper.toParquetStatistics(stats); Assert.assertEquals("Min should match", min, Double.longBitsToDouble(BytesUtils.bytesToLong(formatStats.getMin())), @@ -498,7 +550,16 @@ public class TestParquetMetadataConverter { } @Test - public void testBooleanStats() { + public void testBooleanStatsV1() { + testBooleanStats(StatsHelper.V1); + } + + @Test + public void testBooleanStatsV2() { + testBooleanStats(StatsHelper.V2); + } + + private void testBooleanStats(StatsHelper helper) { // make fake stats and verify the size check BooleanStatistics stats = new BooleanStatistics(); stats.incrementNumNulls(3004); @@ -507,8 +568,7 @@ public class TestParquetMetadataConverter { stats.updateStats(min); stats.updateStats(max); - org.apache.parquet.format.Statistics formatStats = - ParquetMetadataConverter.toParquetStatistics(stats); + org.apache.parquet.format.Statistics formatStats = helper.toParquetStatistics(stats); Assert.assertEquals("Min should match", min, BytesUtils.bytesToBool(formatStats.getMin())); @@ -528,17 +588,27 @@ public class TestParquetMetadataConverter { stats.updateStats(Binary.fromString("z")); stats.incrementNumNulls(); + PrimitiveType binaryType = Types.required(PrimitiveTypeName.BINARY) + .as(OriginalType.UTF8).named("b"); Statistics convertedStats = converter.fromParquetStatistics( Version.FULL_VERSION, - ParquetMetadataConverter.toParquetStatistics(stats), - Types.required(PrimitiveTypeName.BINARY) - .as(OriginalType.UTF8).named("b")); + StatsHelper.V1.toParquetStatistics(stats), + binaryType); Assert.assertTrue("Stats should be empty: " + convertedStats, convertedStats.isEmpty()); } @Test - public void testStillUseStatsWithSignedSortOrderIfSingleValue() { + public void testStillUseStatsWithSignedSortOrderIfSingleValueV1() { + testStillUseStatsWithSignedSortOrderIfSingleValue(StatsHelper.V1); + } + + @Test + public void testStillUseStatsWithSignedSortOrderIfSingleValueV2() { + testStillUseStatsWithSignedSortOrderIfSingleValue(StatsHelper.V2); + } + + private void testStillUseStatsWithSignedSortOrderIfSingleValue(StatsHelper helper) { ParquetMetadataConverter converter = new ParquetMetadataConverter(); BinaryStatistics stats = new BinaryStatistics(); stats.incrementNumNulls(); @@ -547,18 +617,27 @@ public class TestParquetMetadataConverter { stats.updateStats(Binary.fromString("A")); stats.incrementNumNulls(); + PrimitiveType binaryType = Types.required(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named("b"); Statistics convertedStats = converter.fromParquetStatistics( Version.FULL_VERSION, ParquetMetadataConverter.toParquetStatistics(stats), - Types.required(PrimitiveTypeName.BINARY) - .as(OriginalType.UTF8).named("b")); + binaryType); Assert.assertFalse("Stats should not be empty: " + convertedStats, convertedStats.isEmpty()); Assert.assertArrayEquals("min == max: " + convertedStats, convertedStats.getMaxBytes(), convertedStats.getMinBytes()); } @Test - public void testUseStatsWithSignedSortOrder() { + public void testUseStatsWithSignedSortOrderV1() { + testUseStatsWithSignedSortOrder(StatsHelper.V1); + } + + @Test + public void testUseStatsWithSignedSortOrderV2() { + testUseStatsWithSignedSortOrder(StatsHelper.V2); + } + + private void testUseStatsWithSignedSortOrder(StatsHelper helper) { // override defaults and use stats that were accumulated using signed order Configuration conf = new Configuration(); conf.setBoolean("parquet.strings.signed-min-max.enabled", true); @@ -571,17 +650,213 @@ public class TestParquetMetadataConverter { stats.updateStats(Binary.fromString("z")); stats.incrementNumNulls(); + PrimitiveType binaryType = Types.required(PrimitiveTypeName.BINARY) + .as(OriginalType.UTF8).named("b"); Statistics convertedStats = converter.fromParquetStatistics( Version.FULL_VERSION, - ParquetMetadataConverter.toParquetStatistics(stats), - Types.required(PrimitiveTypeName.BINARY) - .as(OriginalType.UTF8).named("b")); + helper.toParquetStatistics(stats), + binaryType); Assert.assertFalse("Stats should not be empty", convertedStats.isEmpty()); Assert.assertEquals("Should have 3 nulls", 3, convertedStats.getNumNulls()); - Assert.assertEquals("Should have correct min (unsigned sort)", - Binary.fromString("A"), convertedStats.genericGetMin()); - Assert.assertEquals("Should have correct max (unsigned sort)", - Binary.fromString("z"), convertedStats.genericGetMax()); + if (helper == StatsHelper.V1) { + assertFalse("Min-max should be null for V1 stats", convertedStats.hasNonNullValue()); + } else { + Assert.assertEquals("Should have correct min (unsigned sort)", + Binary.fromString("A"), convertedStats.genericGetMin()); + Assert.assertEquals("Should have correct max (unsigned sort)", + Binary.fromString("z"), convertedStats.genericGetMax()); + } + } + + @Test + public void testSkippedV2Stats() { + testSkippedV2Stats( + Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(12).as(OriginalType.INTERVAL).named(""), + new BigInteger("12345678"), + new BigInteger("12345679")); + testSkippedV2Stats(Types.optional(PrimitiveTypeName.INT96).named(""), + new BigInteger("-75687987"), + new BigInteger("45367657")); + } + + private void testSkippedV2Stats(PrimitiveType type, Object min, Object max) { + Statistics<?> stats = createStats(type, min, max); + org.apache.parquet.format.Statistics statistics = ParquetMetadataConverter.toParquetStatistics(stats); + assertFalse(statistics.isSetMin()); + assertFalse(statistics.isSetMax()); + assertFalse(statistics.isSetMin_value()); + assertFalse(statistics.isSetMax_value()); + } + + @Test + public void testV2OnlyStats() { + testV2OnlyStats(Types.optional(PrimitiveTypeName.INT32).as(OriginalType.UINT_8).named(""), + 0x7F, + 0x80); + testV2OnlyStats(Types.optional(PrimitiveTypeName.INT32).as(OriginalType.UINT_16).named(""), + 0x7FFF, + 0x8000); + testV2OnlyStats(Types.optional(PrimitiveTypeName.INT32).as(OriginalType.UINT_32).named(""), + 0x7FFFFFFF, + 0x80000000); + testV2OnlyStats(Types.optional(PrimitiveTypeName.INT64).as(OriginalType.UINT_64).named(""), + 0x7FFFFFFFFFFFFFFFL, + 0x8000000000000000L); + testV2OnlyStats(Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.DECIMAL).precision(6).named(""), + new BigInteger("-765875"), + new BigInteger("876856")); + testV2OnlyStats( + Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(14).as(OriginalType.DECIMAL).precision(7) + .named(""), + new BigInteger("-6769643"), + new BigInteger("9864675")); + } + + private void testV2OnlyStats(PrimitiveType type, Object min, Object max) { + Statistics<?> stats = createStats(type, min, max); + org.apache.parquet.format.Statistics statistics = ParquetMetadataConverter.toParquetStatistics(stats); + assertFalse(statistics.isSetMin()); + assertFalse(statistics.isSetMax()); + assertEquals(ByteBuffer.wrap(stats.getMinBytes()), statistics.min_value); + assertEquals(ByteBuffer.wrap(stats.getMaxBytes()), statistics.max_value); + } + + @Test + public void testV2StatsEqualMinMax() { + testV2StatsEqualMinMax(Types.optional(PrimitiveTypeName.INT32).as(OriginalType.UINT_8).named(""), + 93, + 93); + testV2StatsEqualMinMax(Types.optional(PrimitiveTypeName.INT32).as(OriginalType.UINT_16).named(""), + -5892, + -5892); + testV2StatsEqualMinMax(Types.optional(PrimitiveTypeName.INT32).as(OriginalType.UINT_32).named(""), + 234998934, + 234998934); + testV2StatsEqualMinMax(Types.optional(PrimitiveTypeName.INT64).as(OriginalType.UINT_64).named(""), + -2389943895984985L, + -2389943895984985L); + testV2StatsEqualMinMax(Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.DECIMAL).precision(6).named(""), + new BigInteger("823749"), + new BigInteger("823749")); + testV2StatsEqualMinMax( + Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(14).as(OriginalType.DECIMAL).precision(7) + .named(""), + new BigInteger("-8752832"), + new BigInteger("-8752832")); + testV2StatsEqualMinMax(Types.optional(PrimitiveTypeName.INT96).named(""), + new BigInteger("81032984"), + new BigInteger("81032984")); + } + + private void testV2StatsEqualMinMax(PrimitiveType type, Object min, Object max) { + Statistics<?> stats = createStats(type, min, max); + org.apache.parquet.format.Statistics statistics = ParquetMetadataConverter.toParquetStatistics(stats); + assertEquals(ByteBuffer.wrap(stats.getMinBytes()), statistics.min); + assertEquals(ByteBuffer.wrap(stats.getMaxBytes()), statistics.max); + assertEquals(ByteBuffer.wrap(stats.getMinBytes()), statistics.min_value); + assertEquals(ByteBuffer.wrap(stats.getMaxBytes()), statistics.max_value); + } + + private static <T> Statistics<?> createStats(PrimitiveType type, T min, T max) { + Class<?> c = min.getClass(); + if (c == Integer.class) { + return createStatsTyped(type, (Integer) min, (Integer) max); + } else if (c == Long.class) { + return createStatsTyped(type, (Long) min, (Long) max); + } else if (c == BigInteger.class) { + return createStatsTyped(type, (BigInteger) min, (BigInteger) max); + } + fail("Not implemented"); + return null; + } + + private static Statistics<?> createStatsTyped(PrimitiveType type, int min, int max) { + Statistics<?> stats = Statistics.createStats(type); + stats.updateStats(max); + stats.updateStats(min); + assertEquals(min, stats.genericGetMin()); + assertEquals(max, stats.genericGetMax()); + return stats; + } + + private static Statistics<?> createStatsTyped(PrimitiveType type, long min, long max) { + Statistics<?> stats = Statistics.createStats(type); + stats.updateStats(max); + stats.updateStats(min); + assertEquals(min, stats.genericGetMin()); + assertEquals(max, stats.genericGetMax()); + return stats; + } + + private static Statistics<?> createStatsTyped(PrimitiveType type, BigInteger min, BigInteger max) { + Statistics<?> stats = Statistics.createStats(type); + Binary minBinary = Binary.fromConstantByteArray(min.toByteArray()); + Binary maxBinary = Binary.fromConstantByteArray(max.toByteArray()); + stats.updateStats(maxBinary); + stats.updateStats(minBinary); + assertEquals(minBinary, stats.genericGetMin()); + assertEquals(maxBinary, stats.genericGetMax()); + return stats; + } + + private enum StatsHelper { + // Only min and max are filled (min_value and max_value are not) + V1() { + @Override + public org.apache.parquet.format.Statistics toParquetStatistics(Statistics<?> stats) { + org.apache.parquet.format.Statistics statistics = ParquetMetadataConverter.toParquetStatistics(stats); + statistics.unsetMin_value(); + statistics.unsetMax_value(); + return statistics; + } + }, + // min_value and max_value are filled (min and max might be filled as well) + V2() { + @Override + public org.apache.parquet.format.Statistics toParquetStatistics(Statistics<?> stats) { + return ParquetMetadataConverter.toParquetStatistics(stats); + } + }; + public abstract org.apache.parquet.format.Statistics toParquetStatistics(Statistics<?> stats); + } + + @Test + public void testColumnOrders() throws IOException { + MessageType schema = parseMessageType("message test {" + + " optional binary binary_col;" // Normal column with type defined column order -> typeDefined + + " optional group map_col (MAP) {" + + " repeated group map (MAP_KEY_VALUE) {" + + " required binary key (UTF8);" // Key to be hacked to have unknown column order -> undefined + + " optional group list_col (LIST) {" + + " repeated group list {" + + " optional int96 array_element;" // INT96 element with type defined column order -> undefined + + " }" + + " }" + + " }" + + " }" + + "}"); + org.apache.parquet.hadoop.metadata.FileMetaData fileMetaData = new org.apache.parquet.hadoop.metadata.FileMetaData( + schema, new HashMap<String, String>(), null); + ParquetMetadata metadata = new ParquetMetadata(fileMetaData, new ArrayList<BlockMetaData>()); + ParquetMetadataConverter converter = new ParquetMetadataConverter(); + FileMetaData formatMetadata = converter.toParquetMetadata(1, metadata); + + List<org.apache.parquet.format.ColumnOrder> columnOrders = formatMetadata.getColumn_orders(); + assertEquals(3, columnOrders.size()); + for (org.apache.parquet.format.ColumnOrder columnOrder : columnOrders) { + assertTrue(columnOrder.isSetTYPE_ORDER()); + } + + // Simulate that thrift got a union type that is not in the generated code + // (when the file contains a not-yet-supported column order) + columnOrders.get(1).clear(); + + MessageType resultSchema = converter.fromParquetMetadata(formatMetadata).getFileMetaData().getSchema(); + List<ColumnDescriptor> columns = resultSchema.getColumns(); + assertEquals(3, columns.size()); + assertEquals(ColumnOrder.typeDefined(), columns.get(0).getPrimitiveType().columnOrder()); + assertEquals(ColumnOrder.undefined(), columns.get(1).getPrimitiveType().columnOrder()); + assertEquals(ColumnOrder.undefined(), columns.get(2).getPrimitiveType().columnOrder()); } } http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/c6764c4a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetFileWriter.java ---------------------------------------------------------------------- diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetFileWriter.java b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetFileWriter.java index 6915c86..4243e9b 100644 --- a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetFileWriter.java +++ b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetFileWriter.java @@ -524,12 +524,12 @@ public class TestParquetFileWriter { String str = new String(bsout.getMaxBytes()); String str2 = new String(bsout.getMinBytes()); - assertTrue(((BinaryStatistics)readFooter.getBlocks().get(0).getColumns().get(0).getStatistics()).equals(bs1)); - assertTrue(((LongStatistics)readFooter.getBlocks().get(0).getColumns().get(1).getStatistics()).equals(ls1)); + TestUtils.assertStatsValuesEqual(bs1, readFooter.getBlocks().get(0).getColumns().get(0).getStatistics()); + TestUtils.assertStatsValuesEqual(ls1, readFooter.getBlocks().get(0).getColumns().get(1).getStatistics()); } { // assert stats are correct for the second block - assertTrue(((BinaryStatistics)readFooter.getBlocks().get(1).getColumns().get(0).getStatistics()).equals(bs2)); - assertTrue(((LongStatistics)readFooter.getBlocks().get(1).getColumns().get(1).getStatistics()).equals(ls2)); + TestUtils.assertStatsValuesEqual(bs2, readFooter.getBlocks().get(1).getColumns().get(0).getStatistics()); + TestUtils.assertStatsValuesEqual(ls2, readFooter.getBlocks().get(1).getColumns().get(1).getStatistics()); } } http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/c6764c4a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestUtils.java ---------------------------------------------------------------------- diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestUtils.java b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestUtils.java index e53ac78..59b4b62 100644 --- a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestUtils.java +++ b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestUtils.java @@ -24,6 +24,8 @@ import java.util.concurrent.Callable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.parquet.column.statistics.Statistics; +import org.hamcrest.CoreMatchers; import org.junit.Assert; public class TestUtils { @@ -61,4 +63,23 @@ public class TestUtils { } } } + + public static void assertStatsValuesEqual(Statistics<?> stats1, Statistics<?> stats2) { + assertStatsValuesEqual(null, stats1, stats2); + } + + // To be used to assert that the values (min, max, num-of-nulls) equals. It might be used in cases when creating + // Statistics object for the proper Type would require too much work/code duplications etc. + public static void assertStatsValuesEqual(String message, Statistics<?> expected, Statistics<?> actual) { + if (expected == actual) { + return; + } + if (expected == null || actual == null) { + Assert.assertEquals(expected, actual); + } + Assert.assertThat(actual, CoreMatchers.instanceOf(expected.getClass())); + Assert.assertArrayEquals(message, expected.getMaxBytes(), actual.getMaxBytes()); + Assert.assertArrayEquals(message, expected.getMinBytes(), actual.getMinBytes()); + Assert.assertEquals(message, expected.getNumNulls(), actual.getNumNulls()); + } } http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/c6764c4a/parquet-hadoop/src/test/java/org/apache/parquet/statistics/RandomValues.java ---------------------------------------------------------------------- diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/statistics/RandomValues.java b/parquet-hadoop/src/test/java/org/apache/parquet/statistics/RandomValues.java index cbdd935..16db5cb 100644 --- a/parquet-hadoop/src/test/java/org/apache/parquet/statistics/RandomValues.java +++ b/parquet-hadoop/src/test/java/org/apache/parquet/statistics/RandomValues.java @@ -26,7 +26,7 @@ import java.util.Random; public class RandomValues { private static final String ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890"; - private static abstract class RandomValueGenerator<T extends Comparable<T>> { + static abstract class RandomValueGenerator<T extends Comparable<T>> { private final Random random; protected RandomValueGenerator(long seed) { @@ -37,8 +37,8 @@ public class RandomValues { return (random.nextInt(10) == 0); } - public int randomInt() { return randomInt(Integer.MAX_VALUE - 1); } - public int randomInt(int maximum) { + public int randomInt() { return random.nextInt(); } + public int randomPositiveInt(int maximum) { // Maximum may be a random number (which may be negative). return random.nextInt(Math.abs(maximum) + 1); } @@ -63,11 +63,11 @@ public class RandomValues { } public char randomLetter() { - return ALPHABET.charAt(randomInt() % ALPHABET.length()); + return ALPHABET.charAt(randomPositiveInt(ALPHABET.length() - 1)); } public String randomString(int maxLength) { - return randomFixedLengthString(randomInt(maxLength)); + return randomFixedLengthString(randomPositiveInt(maxLength)); } public String randomFixedLengthString(int length) { @@ -82,7 +82,7 @@ public class RandomValues { public abstract T nextValue(); } - private static abstract class RandomBinaryBase<T extends Comparable<T>> extends RandomValueGenerator<T> { + static abstract class RandomBinaryBase<T extends Comparable<T>> extends RandomValueGenerator<T> { protected final int bufferLength; protected final byte[] buffer; @@ -103,18 +103,56 @@ public class RandomValues { } public static class IntGenerator extends RandomValueGenerator<Integer> { - private final RandomRange<Integer> randomRange = new RandomRange<Integer>(randomInt(), randomInt()); - private final int minimum = randomRange.minimum(); - private final int maximum = randomRange.maximum(); - private final int range = (maximum - minimum); + private final int minimum; + private final int range; public IntGenerator(long seed) { super(seed); + RandomRange<Integer> randomRange = new RandomRange<>(randomInt(), randomInt()); + this.minimum = randomRange.minimum(); + this.range = (randomRange.maximum() - this.minimum); + } + + public IntGenerator(long seed, int minimum, int maximum) { + super(seed); + RandomRange<Integer> randomRange = new RandomRange<>(minimum, maximum); + this.minimum = randomRange.minimum(); + this.range = randomRange.maximum() - this.minimum; } @Override public Integer nextValue() { - return (minimum + randomInt(range)); + return (minimum + randomPositiveInt(range)); + } + } + + public static class UIntGenerator extends IntGenerator { + private final int mask; + + public UIntGenerator(long seed, byte minimum, byte maximum) { + super(seed, minimum, maximum); + mask = 0xFF; + } + + public UIntGenerator(long seed, short minimum, short maximum) { + super(seed, minimum, maximum); + mask = 0xFFFF; + } + + @Override + public Integer nextValue() { + return super.nextValue() & mask; + } + } + + public static class UnconstrainedIntGenerator extends RandomValueGenerator<Integer> { + public UnconstrainedIntGenerator(long seed) { + super(seed); + } + + @Override + public Integer nextValue() { + return randomInt(); } } @@ -134,6 +172,17 @@ public class RandomValues { } } + public static class UnconstrainedLongGenerator extends RandomValueGenerator<Long> { + public UnconstrainedLongGenerator(long seed) { + super(seed); + } + + @Override + public Long nextValue() { + return randomLong(); + } + } + public static class Int96Generator extends RandomBinaryBase<BigInteger> { private final RandomRange<BigInteger> randomRange = new RandomRange<BigInteger>(randomInt96(), randomInt96()); private final BigInteger minimum = randomRange.minimum(); @@ -173,6 +222,17 @@ public class RandomValues { } } + public static class UnconstrainedFloatGenerator extends RandomValueGenerator<Float> { + public UnconstrainedFloatGenerator(long seed) { + super(seed); + } + + @Override + public Float nextValue() { + return randomFloat(); + } + } + public static class DoubleGenerator extends RandomValueGenerator<Double> { private final RandomRange<Double> randomRange = new RandomRange<Double>(randomDouble(), randomDouble()); private final double minimum = randomRange.minimum(); @@ -189,6 +249,17 @@ public class RandomValues { } } + public static class UnconstrainedDoubleGenerator extends RandomValueGenerator<Double> { + public UnconstrainedDoubleGenerator(long seed) { + super(seed); + } + + @Override + public Double nextValue() { + return randomDouble(); + } + } + public static class StringGenerator extends RandomBinaryBase<String> { private static final int MAX_STRING_LENGTH = 16; public StringGenerator(long seed) { @@ -197,7 +268,7 @@ public class RandomValues { @Override public String nextValue() { - int stringLength = randomInt(15) + 1; + int stringLength = randomPositiveInt(15) + 1; return randomString(stringLength); } @@ -216,7 +287,7 @@ public class RandomValues { @Override public Binary nextValue() { // use a random length, but ensure it is at least a few bytes - int length = 5 + randomInt(buffer.length - 5); + int length = 5 + randomPositiveInt(buffer.length - 5); for (int index = 0; index < length; index++) { buffer[index] = (byte) randomInt(); } http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/c6764c4a/parquet-hadoop/src/test/java/org/apache/parquet/statistics/TestStatistics.java ---------------------------------------------------------------------- diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/statistics/TestStatistics.java b/parquet-hadoop/src/test/java/org/apache/parquet/statistics/TestStatistics.java index d157cc3..5a5d6d4 100644 --- a/parquet-hadoop/src/test/java/org/apache/parquet/statistics/TestStatistics.java +++ b/parquet-hadoop/src/test/java/org/apache/parquet/statistics/TestStatistics.java @@ -42,8 +42,13 @@ import org.apache.parquet.hadoop.metadata.ParquetMetadata; import org.apache.parquet.io.api.Binary; import org.apache.parquet.io.api.PrimitiveConverter; import org.apache.parquet.schema.MessageType; +import org.apache.parquet.schema.OriginalType; import org.apache.parquet.schema.PrimitiveType; import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName; +import org.apache.parquet.schema.Type; +import org.apache.parquet.schema.Types; +import org.apache.parquet.statistics.RandomValues.RandomBinaryBase; +import org.apache.parquet.statistics.RandomValues.RandomValueGenerator; import org.junit.Assert; import org.junit.Rule; import org.junit.Test; @@ -51,7 +56,9 @@ import org.junit.rules.TemporaryFolder; import java.io.File; import java.io.IOException; +import java.math.BigInteger; import java.util.Arrays; +import java.util.Comparator; import java.util.List; import java.util.Random; @@ -59,6 +66,7 @@ import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.*; import static org.apache.parquet.schema.Type.Repetition.OPTIONAL; import static org.apache.parquet.schema.Type.Repetition.REQUIRED; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertEquals; public class TestStatistics { private static final int MEGABYTE = 1 << 20; @@ -163,9 +171,11 @@ public class TestStatistics { private final boolean hasNonNull; private final T min; private final T max; + private final Comparator<T> comparator; public StatsValidator(DataPage page) { Statistics<T> stats = getStatisticsFromPageHeader(page); + this.comparator = stats.comparator(); this.hasNonNull = stats.hasNonNullValue(); if (hasNonNull) { this.min = stats.genericGetMin(); @@ -178,8 +188,8 @@ public class TestStatistics { public void validate(T value) { if (hasNonNull) { - assertTrue("min should be <= all values", min.compareTo(value) <= 0); - assertTrue("min should be >= all values", max.compareTo(value) >= 0); + assertTrue("min should be <= all values", comparator.compare(min, value) <= 0); + assertTrue("min should be >= all values", comparator.compare(max, value) >= 0); } } } @@ -280,7 +290,11 @@ public class TestStatistics { private void validateStatsForPage(DataPage page, DictionaryPage dict, ColumnDescriptor desc) { SingletonPageReader reader = new SingletonPageReader(dict, page); PrimitiveConverter converter = getValidatingConverter(page, desc.getType()); - Statistics stats = getStatisticsFromPageHeader(page); + Statistics<?> stats = getStatisticsFromPageHeader(page); + + assertEquals("Statistics does not use the proper comparator", + desc.getPrimitiveType().comparator().getClass(), + stats.comparator().getClass()); if (stats.isEmpty()) { // stats are empty if num nulls = 0 and there are no non-null values @@ -306,8 +320,8 @@ public class TestStatistics { System.err.println(String.format( "Validated stats min=%s max=%s nulls=%d for page=%s col=%s", - String.valueOf(stats.genericGetMin()), - String.valueOf(stats.genericGetMax()), stats.getNumNulls(), page, + stats.minAsString(), + stats.maxAsString(), stats.getNumNulls(), page, Arrays.toString(desc.getPath()))); } } @@ -315,92 +329,144 @@ public class TestStatistics { public static class DataContext extends DataGenerationContext.WriteContext { private static final int MAX_TOTAL_ROWS = 1000000; - private final long seed; private final Random random; private final int recordCount; - private final int fixedLength; - private final RandomValues.IntGenerator intGenerator; - private final RandomValues.LongGenerator longGenerator; - private final RandomValues.Int96Generator int96Generator; - private final RandomValues.FloatGenerator floatGenerator; - private final RandomValues.DoubleGenerator doubleGenerator; - private final RandomValues.StringGenerator stringGenerator; - private final RandomValues.BinaryGenerator binaryGenerator; - private final RandomValues.FixedGenerator fixedBinaryGenerator; + private final List<RandomValueGenerator<?>> randomGenerators; public DataContext(long seed, File path, int blockSize, int pageSize, boolean enableDictionary, ParquetProperties.WriterVersion version) throws IOException { super(path, buildSchema(seed), blockSize, pageSize, enableDictionary, true, version); - this.seed = seed; this.random = new Random(seed); this.recordCount = random.nextInt(MAX_TOTAL_ROWS); - this.fixedLength = schema.getType("fixed-binary").asPrimitiveType().getTypeLength(); - this.intGenerator = new RandomValues.IntGenerator(random.nextLong()); - this.longGenerator = new RandomValues.LongGenerator(random.nextLong()); - this.int96Generator = new RandomValues.Int96Generator(random.nextLong()); - this.floatGenerator = new RandomValues.FloatGenerator(random.nextLong()); - this.doubleGenerator = new RandomValues.DoubleGenerator(random.nextLong()); - this.stringGenerator = new RandomValues.StringGenerator(random.nextLong()); - this.binaryGenerator = new RandomValues.BinaryGenerator(random.nextLong()); - this.fixedBinaryGenerator = new RandomValues.FixedGenerator(random.nextLong(), fixedLength); + int fixedLength = schema.getType("fixed-binary").asPrimitiveType().getTypeLength(); + + randomGenerators = Arrays.<RandomValueGenerator<?>>asList( + new RandomValues.IntGenerator(random.nextLong()), + new RandomValues.LongGenerator(random.nextLong()), + new RandomValues.Int96Generator(random.nextLong()), + new RandomValues.FloatGenerator(random.nextLong()), + new RandomValues.DoubleGenerator(random.nextLong()), + new RandomValues.StringGenerator(random.nextLong()), + new RandomValues.BinaryGenerator(random.nextLong()), + new RandomValues.FixedGenerator(random.nextLong(), fixedLength), + new RandomValues.UnconstrainedIntGenerator(random.nextLong()), + new RandomValues.UnconstrainedLongGenerator(random.nextLong()), + new RandomValues.UnconstrainedFloatGenerator(random.nextLong()), + new RandomValues.UnconstrainedDoubleGenerator(random.nextLong()), + new RandomValues.IntGenerator(random.nextLong(), Byte.MIN_VALUE, Byte.MAX_VALUE), + new RandomValues.UIntGenerator(random.nextLong(), Byte.MIN_VALUE, Byte.MAX_VALUE), + new RandomValues.IntGenerator(random.nextLong(), Short.MIN_VALUE, Short.MAX_VALUE), + new RandomValues.UIntGenerator(random.nextLong(), Short.MIN_VALUE, Short.MAX_VALUE), + new RandomValues.UnconstrainedIntGenerator(random.nextLong()), + new RandomValues.UnconstrainedIntGenerator(random.nextLong()), + new RandomValues.UnconstrainedLongGenerator(random.nextLong()), + new RandomValues.UnconstrainedLongGenerator(random.nextLong()), + new RandomValues.UnconstrainedIntGenerator(random.nextLong()), + new RandomValues.UnconstrainedLongGenerator(random.nextLong()), + new RandomValues.FixedGenerator(random.nextLong(), fixedLength), + new RandomValues.BinaryGenerator(random.nextLong()), + new RandomValues.StringGenerator(random.nextLong()), + new RandomValues.StringGenerator(random.nextLong()), + new RandomValues.StringGenerator(random.nextLong()), + new RandomValues.BinaryGenerator(random.nextLong()), + new RandomValues.IntGenerator(random.nextLong()), + new RandomValues.IntGenerator(random.nextLong()), + new RandomValues.LongGenerator(random.nextLong()), + new RandomValues.LongGenerator(random.nextLong()), + new RandomValues.LongGenerator(random.nextLong()), + new RandomValues.FixedGenerator(random.nextLong(), 12) + ); } private static MessageType buildSchema(long seed) { Random random = new Random(seed); int fixedBinaryLength = random.nextInt(21) + 1; + int fixedPrecision = calculatePrecision(fixedBinaryLength); + int fixedScale = fixedPrecision / 4; + int binaryPrecision = calculatePrecision(16); + int binaryScale = binaryPrecision / 4; return new MessageType("schema", - new PrimitiveType(OPTIONAL, INT32, "i32"), - new PrimitiveType(OPTIONAL, INT64, "i64"), - new PrimitiveType(OPTIONAL, INT96, "i96"), - new PrimitiveType(OPTIONAL, FLOAT, "sngl"), - new PrimitiveType(OPTIONAL, DOUBLE, "dbl"), - new PrimitiveType(OPTIONAL, BINARY, "strings"), - new PrimitiveType(OPTIONAL, BINARY, "binary"), - new PrimitiveType(OPTIONAL, FIXED_LEN_BYTE_ARRAY, fixedBinaryLength, "fixed-binary"), - new PrimitiveType(REQUIRED, INT32, "unconstrained-i32"), - new PrimitiveType(REQUIRED, INT64, "unconstrained-i64"), - new PrimitiveType(REQUIRED, FLOAT, "unconstrained-sngl"), - new PrimitiveType(REQUIRED, DOUBLE, "unconstrained-dbl") + new PrimitiveType(OPTIONAL, INT32, "i32"), + new PrimitiveType(OPTIONAL, INT64, "i64"), + new PrimitiveType(OPTIONAL, INT96, "i96"), + new PrimitiveType(OPTIONAL, FLOAT, "sngl"), + new PrimitiveType(OPTIONAL, DOUBLE, "dbl"), + new PrimitiveType(OPTIONAL, BINARY, "strings"), + new PrimitiveType(OPTIONAL, BINARY, "binary"), + new PrimitiveType(OPTIONAL, FIXED_LEN_BYTE_ARRAY, fixedBinaryLength, "fixed-binary"), + new PrimitiveType(REQUIRED, INT32, "unconstrained-i32"), + new PrimitiveType(REQUIRED, INT64, "unconstrained-i64"), + new PrimitiveType(REQUIRED, FLOAT, "unconstrained-sngl"), + new PrimitiveType(REQUIRED, DOUBLE, "unconstrained-dbl"), + Types.optional(INT32).as(OriginalType.INT_8).named("int8"), + Types.optional(INT32).as(OriginalType.UINT_8).named("uint8"), + Types.optional(INT32).as(OriginalType.INT_16).named("int16"), + Types.optional(INT32).as(OriginalType.UINT_16).named("uint16"), + Types.optional(INT32).as(OriginalType.INT_32).named("int32"), + Types.optional(INT32).as(OriginalType.UINT_32).named("uint32"), + Types.optional(INT64).as(OriginalType.INT_64).named("int64"), + Types.optional(INT64).as(OriginalType.UINT_64).named("uint64"), + Types.optional(INT32).as(OriginalType.DECIMAL).precision(9).scale(2).named("decimal-int32"), + Types.optional(INT64).as(OriginalType.DECIMAL).precision(18).scale(4).named("decimal-int64"), + Types.optional(FIXED_LEN_BYTE_ARRAY).length(fixedBinaryLength).as(OriginalType.DECIMAL) + .precision(fixedPrecision).scale(fixedScale).named("decimal-fixed"), + Types.optional(BINARY).as(OriginalType.DECIMAL).precision(binaryPrecision).scale(binaryScale) + .named("decimal-binary"), + Types.optional(BINARY).as(OriginalType.UTF8).named("utf8"), + Types.optional(BINARY).as(OriginalType.ENUM).named("enum"), + Types.optional(BINARY).as(OriginalType.JSON).named("json"), + Types.optional(BINARY).as(OriginalType.BSON).named("bson"), + Types.optional(INT32).as(OriginalType.DATE).named("date"), + Types.optional(INT32).as(OriginalType.TIME_MILLIS).named("time-millis"), + Types.optional(INT64).as(OriginalType.TIME_MICROS).named("time-micros"), + Types.optional(INT64).as(OriginalType.TIMESTAMP_MILLIS).named("timestamp-millis"), + Types.optional(INT64).as(OriginalType.TIMESTAMP_MICROS).named("timestamp-micros"), + Types.optional(FIXED_LEN_BYTE_ARRAY).length(12).as(OriginalType.INTERVAL).named("interval") ); } + private static int calculatePrecision(int byteCnt) { + String maxValue = BigInteger.valueOf(2L).pow(8 * byteCnt - 1).toString(); + return maxValue.length() - 1; + } + @Override public void write(ParquetWriter<Group> writer) throws IOException { for (int index = 0; index < recordCount; index++) { Group group = new SimpleGroup(super.schema); - if (!intGenerator.shouldGenerateNull()) { - group.append("i32", intGenerator.nextValue()); - } - if (!longGenerator.shouldGenerateNull()) { - group.append("i64", longGenerator.nextValue()); - } - if (!int96Generator.shouldGenerateNull()) { - group.append("i96", int96Generator.nextBinaryValue()); - } - if (!floatGenerator.shouldGenerateNull()) { - group.append("sngl", floatGenerator.nextValue()); - } - if (!doubleGenerator.shouldGenerateNull()) { - group.append("dbl", doubleGenerator.nextValue()); - } - if (!stringGenerator.shouldGenerateNull()) { - group.append("strings", stringGenerator.nextBinaryValue()); - } - if (!binaryGenerator.shouldGenerateNull()) { - group.append("binary", binaryGenerator.nextBinaryValue()); - } - if (!fixedBinaryGenerator.shouldGenerateNull()) { - group.append("fixed-binary", fixedBinaryGenerator.nextBinaryValue()); + for (int column = 0, columnCnt = schema.getFieldCount(); column < columnCnt; ++column) { + Type type = schema.getType(column); + RandomValueGenerator<?> generator = randomGenerators.get(column); + if (type.isRepetition(OPTIONAL) && generator.shouldGenerateNull()) { + continue; + } + switch (type.asPrimitiveType().getPrimitiveTypeName()) { + case BINARY: + case FIXED_LEN_BYTE_ARRAY: + case INT96: + group.append(type.getName(), ((RandomBinaryBase<?>) generator).nextBinaryValue()); + break; + case INT32: + group.append(type.getName(), (Integer) generator.nextValue()); + break; + case INT64: + group.append(type.getName(), (Long) generator.nextValue()); + break; + case FLOAT: + group.append(type.getName(), (Float) generator.nextValue()); + break; + case DOUBLE: + group.append(type.getName(), (Double) generator.nextValue()); + break; + case BOOLEAN: + group.append(type.getName(), (Boolean) generator.nextValue()); + break; + } } - group.append("unconstrained-i32", random.nextInt()); - group.append("unconstrained-i64", random.nextLong()); - group.append("unconstrained-sngl", random.nextFloat()); - group.append("unconstrained-dbl", random.nextDouble()); - writer.write(group); } } http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/c6764c4a/parquet-thrift/src/test/java/org/apache/parquet/hadoop/thrift/TestThriftToParquetFileWriter.java ---------------------------------------------------------------------- diff --git a/parquet-thrift/src/test/java/org/apache/parquet/hadoop/thrift/TestThriftToParquetFileWriter.java b/parquet-thrift/src/test/java/org/apache/parquet/hadoop/thrift/TestThriftToParquetFileWriter.java index 0439686..66b804c 100644 --- a/parquet-thrift/src/test/java/org/apache/parquet/hadoop/thrift/TestThriftToParquetFileWriter.java +++ b/parquet-thrift/src/test/java/org/apache/parquet/hadoop/thrift/TestThriftToParquetFileWriter.java @@ -19,8 +19,6 @@ package org.apache.parquet.hadoop.thrift; import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - import java.io.ByteArrayOutputStream; import java.io.IOException; import java.util.Arrays; @@ -53,6 +51,7 @@ import org.junit.Test; import org.apache.parquet.example.data.Group; import org.apache.parquet.hadoop.ParquetFileReader; import org.apache.parquet.hadoop.ParquetReader; +import org.apache.parquet.hadoop.TestUtils; import org.apache.parquet.hadoop.example.GroupReadSupport; import org.apache.parquet.hadoop.metadata.ParquetMetadata; @@ -122,21 +121,21 @@ public class TestThriftToParquetFileWriter { for(ColumnChunkMetaData cmd: bmd.getColumns()) { switch(cmd.getType()) { case INT32: - assertTrue(intStatsSmall.equals((IntStatistics)cmd.getStatistics())); + TestUtils.assertStatsValuesEqual(intStatsSmall, cmd.getStatistics()); break; case INT64: - assertTrue(longStatsSmall.equals((LongStatistics)cmd.getStatistics())); + TestUtils.assertStatsValuesEqual(longStatsSmall, cmd.getStatistics()); break; case DOUBLE: - assertTrue(doubleStatsSmall.equals((DoubleStatistics)cmd.getStatistics())); + TestUtils.assertStatsValuesEqual(doubleStatsSmall, cmd.getStatistics()); break; case BOOLEAN: - assertTrue(boolStats.equals((BooleanStatistics)cmd.getStatistics())); + TestUtils.assertStatsValuesEqual(boolStats, cmd.getStatistics()); break; case BINARY: // there is also info_string that has no statistics if(cmd.getPath().toString() == "[test_string]") - assertTrue(binaryStatsSmall.equals((BinaryStatistics)cmd.getStatistics())); + TestUtils.assertStatsValuesEqual(binaryStatsSmall, cmd.getStatistics()); break; } } @@ -171,21 +170,21 @@ public class TestThriftToParquetFileWriter { case INT32: // testing the correct limits of an int32, there are also byte and short, tested earlier if(cmd.getPath().toString() == "[test_i32]") - assertTrue(intStatsLarge.equals((IntStatistics)cmd.getStatistics())); + TestUtils.assertStatsValuesEqual(intStatsLarge, cmd.getStatistics()); break; case INT64: - assertTrue(longStatsLarge.equals((LongStatistics)cmd.getStatistics())); + TestUtils.assertStatsValuesEqual(longStatsLarge, cmd.getStatistics()); break; case DOUBLE: - assertTrue(doubleStatsLarge.equals((DoubleStatistics)cmd.getStatistics())); + TestUtils.assertStatsValuesEqual(doubleStatsLarge, cmd.getStatistics()); break; case BOOLEAN: - assertTrue(boolStats.equals((BooleanStatistics)cmd.getStatistics())); + TestUtils.assertStatsValuesEqual(boolStats, cmd.getStatistics()); break; case BINARY: // there is also info_string that has no statistics if(cmd.getPath().toString() == "[test_string]") - assertTrue(binaryStatsLarge.equals((BinaryStatistics)cmd.getStatistics())); + TestUtils.assertStatsValuesEqual(binaryStatsLarge, cmd.getStatistics()); break; } }