emkornfield commented on a change in pull request #11591: URL: https://github.com/apache/arrow/pull/11591#discussion_r741330445
########## File path: java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodecFile.java ########## @@ -0,0 +1,176 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.compression; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; + +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.BitVector; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.compression.CompressionCodec; +import org.apache.arrow.vector.compression.CompressionUtil; +import org.apache.arrow.vector.compression.NoCompressionCodec; +import org.apache.arrow.vector.ipc.ArrowFileReader; +import org.apache.arrow.vector.ipc.ArrowFileWriter; +import org.apache.arrow.vector.ipc.ArrowReader; +import org.apache.arrow.vector.ipc.ArrowStreamReader; +import org.apache.arrow.vector.ipc.ArrowStreamWriter; +import org.apache.arrow.vector.types.pojo.Field; +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestCompressionCodecFile { + private final CompressionCodec codec; + private final int vectorLength; + private final File file; + + public TestCompressionCodecFile(CompressionUtil.CodecType type, int vectorLength, CompressionCodec codec, + File file) { + this.codec = codec; + this.file = file; + this.vectorLength = vectorLength; + } + + @Parameterized.Parameters(name = "codec = {0}, length = {1}, file = {2}") + public static Collection<Object[]> getCodecs() { + List<Object[]> params = new ArrayList<>(); + + int[] lengths = new int[]{10, 100, 1000}; + for (int len : lengths) { + CompressionCodec dumbCodec = NoCompressionCodec.INSTANCE; + File fileNoCompression = new File("target/write_no_compression_" + len + ".arrow"); + params.add(new Object[]{dumbCodec.getCodecType(), len, dumbCodec, fileNoCompression}); + + CompressionCodec lz4Codec = new Lz4CompressionCodec(); + File fileLZ4Compression = new File("target/write_lz4_compression_" + len + ".arrow"); Review comment: ```suggestion File fileLz4Compression = new File("target/write_lz4_compression_" + len + ".arrow"); ``` nit: generally we use camel case for abbreviates as well ########## File path: docs/source/java/ipc.rst ########## @@ -54,6 +54,15 @@ Now, we can begin writing a stream containing some number of these batches. For ArrowStreamWriter writer = new ArrowStreamWriter(root, /*DictionaryProvider=*/null, Channels.newChannel(out)); +Here we are not used compression option, but this could be implemented on this way thru codec option: Review comment: ```suggestion Buffer level compression is also supported by passing a codec: ``` ########## File path: java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodecFile.java ########## @@ -0,0 +1,176 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.compression; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; + +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.BitVector; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.compression.CompressionCodec; +import org.apache.arrow.vector.compression.CompressionUtil; +import org.apache.arrow.vector.compression.NoCompressionCodec; +import org.apache.arrow.vector.ipc.ArrowFileReader; +import org.apache.arrow.vector.ipc.ArrowFileWriter; +import org.apache.arrow.vector.ipc.ArrowReader; +import org.apache.arrow.vector.ipc.ArrowStreamReader; +import org.apache.arrow.vector.ipc.ArrowStreamWriter; +import org.apache.arrow.vector.types.pojo.Field; +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestCompressionCodecFile { + private final CompressionCodec codec; + private final int vectorLength; + private final File file; + + public TestCompressionCodecFile(CompressionUtil.CodecType type, int vectorLength, CompressionCodec codec, + File file) { + this.codec = codec; + this.file = file; + this.vectorLength = vectorLength; + } + + @Parameterized.Parameters(name = "codec = {0}, length = {1}, file = {2}") + public static Collection<Object[]> getCodecs() { + List<Object[]> params = new ArrayList<>(); + + int[] lengths = new int[]{10, 100, 1000}; + for (int len : lengths) { + CompressionCodec dumbCodec = NoCompressionCodec.INSTANCE; + File fileNoCompression = new File("target/write_no_compression_" + len + ".arrow"); + params.add(new Object[]{dumbCodec.getCodecType(), len, dumbCodec, fileNoCompression}); + + CompressionCodec lz4Codec = new Lz4CompressionCodec(); + File fileLZ4Compression = new File("target/write_lz4_compression_" + len + ".arrow"); + params.add(new Object[]{lz4Codec.getCodecType(), len, lz4Codec, fileLZ4Compression}); + + CompressionCodec zstdCodec = new ZstdCompressionCodec(); + File fileZSTDCompression = new File("target/write_zstd_compression_" + len + ".arrow"); Review comment: ```suggestion File fileZstdCompression = new File("target/write_zstd_compression_" + len + ".arrow"); ``` ########## File path: java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowStreamWriter.java ########## @@ -65,6 +66,48 @@ public ArrowStreamWriter(VectorSchemaRoot root, DictionaryProvider provider, Wri super(root, provider, out, option); } + /** + * Construct an ArrowStreamWriter with an optional DictionaryProvider for the OutputStream. + * + * @param root Existing VectorSchemaRoot with vectors to be written. + * @param includeNullCount Controls whether null count is copied to the {@link ArrowRecordBatch} + * @param codec the codec for compressing data. If it is null, then no compression is needed. + * @param alignBuffers Controls if buffers get aligned to 8-byte boundaries. + * @param provider DictionaryProvider for any vectors that are dictionary encoded. + * (Optional, can be null) + * @param out OutputStream for writing. + */ + public ArrowStreamWriter(VectorSchemaRoot root, boolean includeNullCount, CompressionCodec codec, Review comment: same comment about constructor explosion as the file writer. ########## File path: java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodecFile.java ########## @@ -0,0 +1,176 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.compression; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; + +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.BitVector; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.compression.CompressionCodec; +import org.apache.arrow.vector.compression.CompressionUtil; +import org.apache.arrow.vector.compression.NoCompressionCodec; +import org.apache.arrow.vector.ipc.ArrowFileReader; +import org.apache.arrow.vector.ipc.ArrowFileWriter; +import org.apache.arrow.vector.ipc.ArrowReader; +import org.apache.arrow.vector.ipc.ArrowStreamReader; +import org.apache.arrow.vector.ipc.ArrowStreamWriter; +import org.apache.arrow.vector.types.pojo.Field; +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestCompressionCodecFile { + private final CompressionCodec codec; + private final int vectorLength; + private final File file; + + public TestCompressionCodecFile(CompressionUtil.CodecType type, int vectorLength, CompressionCodec codec, + File file) { + this.codec = codec; + this.file = file; + this.vectorLength = vectorLength; + } + + @Parameterized.Parameters(name = "codec = {0}, length = {1}, file = {2}") + public static Collection<Object[]> getCodecs() { + List<Object[]> params = new ArrayList<>(); + + int[] lengths = new int[]{10, 100, 1000}; + for (int len : lengths) { + CompressionCodec dumbCodec = NoCompressionCodec.INSTANCE; + File fileNoCompression = new File("target/write_no_compression_" + len + ".arrow"); + params.add(new Object[]{dumbCodec.getCodecType(), len, dumbCodec, fileNoCompression}); + + CompressionCodec lz4Codec = new Lz4CompressionCodec(); + File fileLZ4Compression = new File("target/write_lz4_compression_" + len + ".arrow"); + params.add(new Object[]{lz4Codec.getCodecType(), len, lz4Codec, fileLZ4Compression}); + + CompressionCodec zstdCodec = new ZstdCompressionCodec(); + File fileZSTDCompression = new File("target/write_zstd_compression_" + len + ".arrow"); + params.add(new Object[]{zstdCodec.getCodecType(), len, zstdCodec, fileZSTDCompression}); + + } + return params; + } + + @Test + public void writeReadRandomAccessFile() throws IOException { + RootAllocator allocator = new RootAllocator(Long.MAX_VALUE); + BitVector bitVector = new BitVector("boolean", allocator); + VarCharVector varCharVector = new VarCharVector("varchar", allocator); + for (int i = 0; i < vectorLength; i++) { + bitVector.setSafe(i, i % 2 == 0 ? 0 : 1); + varCharVector.setSafe(i, ("test" + i).getBytes(StandardCharsets.UTF_8)); + } + bitVector.setValueCount(vectorLength); + varCharVector.setValueCount(vectorLength); + + List<Field> fields = Arrays.asList(bitVector.getField(), varCharVector.getField()); + List<FieldVector> vectors = Arrays.asList(bitVector, varCharVector); + + VectorSchemaRoot schemaRootWrite = new VectorSchemaRoot(fields, vectors); Review comment: try-with-resources? ########## File path: java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodecFile.java ########## @@ -0,0 +1,176 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.compression; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; + +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.BitVector; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.compression.CompressionCodec; +import org.apache.arrow.vector.compression.CompressionUtil; +import org.apache.arrow.vector.compression.NoCompressionCodec; +import org.apache.arrow.vector.ipc.ArrowFileReader; +import org.apache.arrow.vector.ipc.ArrowFileWriter; +import org.apache.arrow.vector.ipc.ArrowReader; +import org.apache.arrow.vector.ipc.ArrowStreamReader; +import org.apache.arrow.vector.ipc.ArrowStreamWriter; +import org.apache.arrow.vector.types.pojo.Field; +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestCompressionCodecFile { + private final CompressionCodec codec; + private final int vectorLength; + private final File file; + + public TestCompressionCodecFile(CompressionUtil.CodecType type, int vectorLength, CompressionCodec codec, + File file) { + this.codec = codec; + this.file = file; + this.vectorLength = vectorLength; + } + + @Parameterized.Parameters(name = "codec = {0}, length = {1}, file = {2}") + public static Collection<Object[]> getCodecs() { + List<Object[]> params = new ArrayList<>(); + + int[] lengths = new int[]{10, 100, 1000}; + for (int len : lengths) { + CompressionCodec dumbCodec = NoCompressionCodec.INSTANCE; Review comment: ```suggestion CompressionCodec noCompression = NoCompressionCodec.INSTANCE; ``` dumb can have negative connotations. ########## File path: java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodecFile.java ########## @@ -0,0 +1,176 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.compression; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; + +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.BitVector; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.compression.CompressionCodec; +import org.apache.arrow.vector.compression.CompressionUtil; +import org.apache.arrow.vector.compression.NoCompressionCodec; +import org.apache.arrow.vector.ipc.ArrowFileReader; +import org.apache.arrow.vector.ipc.ArrowFileWriter; +import org.apache.arrow.vector.ipc.ArrowReader; +import org.apache.arrow.vector.ipc.ArrowStreamReader; +import org.apache.arrow.vector.ipc.ArrowStreamWriter; +import org.apache.arrow.vector.types.pojo.Field; +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestCompressionCodecFile { + private final CompressionCodec codec; + private final int vectorLength; + private final File file; + + public TestCompressionCodecFile(CompressionUtil.CodecType type, int vectorLength, CompressionCodec codec, + File file) { + this.codec = codec; + this.file = file; + this.vectorLength = vectorLength; + } + + @Parameterized.Parameters(name = "codec = {0}, length = {1}, file = {2}") + public static Collection<Object[]> getCodecs() { + List<Object[]> params = new ArrayList<>(); + + int[] lengths = new int[]{10, 100, 1000}; + for (int len : lengths) { + CompressionCodec dumbCodec = NoCompressionCodec.INSTANCE; + File fileNoCompression = new File("target/write_no_compression_" + len + ".arrow"); + params.add(new Object[]{dumbCodec.getCodecType(), len, dumbCodec, fileNoCompression}); + + CompressionCodec lz4Codec = new Lz4CompressionCodec(); + File fileLZ4Compression = new File("target/write_lz4_compression_" + len + ".arrow"); + params.add(new Object[]{lz4Codec.getCodecType(), len, lz4Codec, fileLZ4Compression}); + + CompressionCodec zstdCodec = new ZstdCompressionCodec(); + File fileZSTDCompression = new File("target/write_zstd_compression_" + len + ".arrow"); + params.add(new Object[]{zstdCodec.getCodecType(), len, zstdCodec, fileZSTDCompression}); + + } + return params; + } + + @Test + public void writeReadRandomAccessFile() throws IOException { + RootAllocator allocator = new RootAllocator(Long.MAX_VALUE); + BitVector bitVector = new BitVector("boolean", allocator); + VarCharVector varCharVector = new VarCharVector("varchar", allocator); + for (int i = 0; i < vectorLength; i++) { + bitVector.setSafe(i, i % 2 == 0 ? 0 : 1); + varCharVector.setSafe(i, ("test" + i).getBytes(StandardCharsets.UTF_8)); + } + bitVector.setValueCount(vectorLength); + varCharVector.setValueCount(vectorLength); + + List<Field> fields = Arrays.asList(bitVector.getField(), varCharVector.getField()); + List<FieldVector> vectors = Arrays.asList(bitVector, varCharVector); + + VectorSchemaRoot schemaRootWrite = new VectorSchemaRoot(fields, vectors); + + // write + FileOutputStream fileOutputStream = new FileOutputStream(file); + ArrowFileWriter writer = new ArrowFileWriter(schemaRootWrite, true, codec, true, + null, fileOutputStream.getChannel()); + writer.start(); + writer.writeBatch(); + writer.end(); + + // validations + Assert.assertEquals(vectorLength, schemaRootWrite.getRowCount()); Review comment: can you check style in other tests, I thought we might static import assertEquals ########## File path: java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowFileWriter.java ########## @@ -69,6 +70,31 @@ public ArrowFileWriter(VectorSchemaRoot root, DictionaryProvider provider, Writa this.metaData = metaData; } + public ArrowFileWriter(VectorSchemaRoot root, boolean includeNullCount, CompressionCodec codec, Review comment: Instead of making an ever expanding list of constructors, I think it is likely better at this point to create an "Options" class that can be built using a builder pattern and passed in. So we can have one new constructor: ArrowFileWriter(VectorSchemaRoot root, WriteOptions options, WritableByteChannel) and then maybe one private constructor if necessary that explodes the options. ########## File path: java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodecFile.java ########## @@ -0,0 +1,176 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.compression; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; + +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.BitVector; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.compression.CompressionCodec; +import org.apache.arrow.vector.compression.CompressionUtil; +import org.apache.arrow.vector.compression.NoCompressionCodec; +import org.apache.arrow.vector.ipc.ArrowFileReader; +import org.apache.arrow.vector.ipc.ArrowFileWriter; +import org.apache.arrow.vector.ipc.ArrowReader; +import org.apache.arrow.vector.ipc.ArrowStreamReader; +import org.apache.arrow.vector.ipc.ArrowStreamWriter; +import org.apache.arrow.vector.types.pojo.Field; +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestCompressionCodecFile { + private final CompressionCodec codec; + private final int vectorLength; + private final File file; + + public TestCompressionCodecFile(CompressionUtil.CodecType type, int vectorLength, CompressionCodec codec, + File file) { + this.codec = codec; + this.file = file; + this.vectorLength = vectorLength; + } + + @Parameterized.Parameters(name = "codec = {0}, length = {1}, file = {2}") + public static Collection<Object[]> getCodecs() { + List<Object[]> params = new ArrayList<>(); + + int[] lengths = new int[]{10, 100, 1000}; + for (int len : lengths) { + CompressionCodec dumbCodec = NoCompressionCodec.INSTANCE; + File fileNoCompression = new File("target/write_no_compression_" + len + ".arrow"); + params.add(new Object[]{dumbCodec.getCodecType(), len, dumbCodec, fileNoCompression}); + + CompressionCodec lz4Codec = new Lz4CompressionCodec(); + File fileLZ4Compression = new File("target/write_lz4_compression_" + len + ".arrow"); + params.add(new Object[]{lz4Codec.getCodecType(), len, lz4Codec, fileLZ4Compression}); + + CompressionCodec zstdCodec = new ZstdCompressionCodec(); + File fileZSTDCompression = new File("target/write_zstd_compression_" + len + ".arrow"); + params.add(new Object[]{zstdCodec.getCodecType(), len, zstdCodec, fileZSTDCompression}); + + } + return params; + } + + @Test + public void writeReadRandomAccessFile() throws IOException { + RootAllocator allocator = new RootAllocator(Long.MAX_VALUE); Review comment: this should be in a try-with-resources block ########## File path: java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodecFile.java ########## @@ -0,0 +1,176 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.compression; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; + +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.BitVector; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.compression.CompressionCodec; +import org.apache.arrow.vector.compression.CompressionUtil; +import org.apache.arrow.vector.compression.NoCompressionCodec; +import org.apache.arrow.vector.ipc.ArrowFileReader; +import org.apache.arrow.vector.ipc.ArrowFileWriter; +import org.apache.arrow.vector.ipc.ArrowReader; +import org.apache.arrow.vector.ipc.ArrowStreamReader; +import org.apache.arrow.vector.ipc.ArrowStreamWriter; +import org.apache.arrow.vector.types.pojo.Field; +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestCompressionCodecFile { + private final CompressionCodec codec; + private final int vectorLength; + private final File file; + + public TestCompressionCodecFile(CompressionUtil.CodecType type, int vectorLength, CompressionCodec codec, + File file) { + this.codec = codec; + this.file = file; + this.vectorLength = vectorLength; + } + + @Parameterized.Parameters(name = "codec = {0}, length = {1}, file = {2}") + public static Collection<Object[]> getCodecs() { + List<Object[]> params = new ArrayList<>(); + + int[] lengths = new int[]{10, 100, 1000}; + for (int len : lengths) { + CompressionCodec dumbCodec = NoCompressionCodec.INSTANCE; + File fileNoCompression = new File("target/write_no_compression_" + len + ".arrow"); + params.add(new Object[]{dumbCodec.getCodecType(), len, dumbCodec, fileNoCompression}); + + CompressionCodec lz4Codec = new Lz4CompressionCodec(); + File fileLZ4Compression = new File("target/write_lz4_compression_" + len + ".arrow"); + params.add(new Object[]{lz4Codec.getCodecType(), len, lz4Codec, fileLZ4Compression}); + + CompressionCodec zstdCodec = new ZstdCompressionCodec(); + File fileZSTDCompression = new File("target/write_zstd_compression_" + len + ".arrow"); + params.add(new Object[]{zstdCodec.getCodecType(), len, zstdCodec, fileZSTDCompression}); + + } + return params; + } + + @Test + public void writeReadRandomAccessFile() throws IOException { + RootAllocator allocator = new RootAllocator(Long.MAX_VALUE); + BitVector bitVector = new BitVector("boolean", allocator); + VarCharVector varCharVector = new VarCharVector("varchar", allocator); + for (int i = 0; i < vectorLength; i++) { + bitVector.setSafe(i, i % 2 == 0 ? 0 : 1); + varCharVector.setSafe(i, ("test" + i).getBytes(StandardCharsets.UTF_8)); + } + bitVector.setValueCount(vectorLength); + varCharVector.setValueCount(vectorLength); + + List<Field> fields = Arrays.asList(bitVector.getField(), varCharVector.getField()); + List<FieldVector> vectors = Arrays.asList(bitVector, varCharVector); + + VectorSchemaRoot schemaRootWrite = new VectorSchemaRoot(fields, vectors); + + // write + FileOutputStream fileOutputStream = new FileOutputStream(file); + ArrowFileWriter writer = new ArrowFileWriter(schemaRootWrite, true, codec, true, Review comment: please try to comment literal paraemters. e.g. `schemarRootWrite, /*parameter 1 name=*/ true, /*parameter 2 name=*/` ########## File path: java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowFileWriter.java ########## @@ -69,6 +70,31 @@ public ArrowFileWriter(VectorSchemaRoot root, DictionaryProvider provider, Writa this.metaData = metaData; } + public ArrowFileWriter(VectorSchemaRoot root, boolean includeNullCount, CompressionCodec codec, Review comment: Instead of making an ever expanding list of constructors, I think it is likely better at this point to create an "Options" class that can be built using a builder pattern and passed in. So we can have one new constructor: ArrowFileWriter(VectorSchemaRoot root, WriteOptions options, WritableByteChannel) and then maybe one private constructor if necessary that explodes the options. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org