emkornfield commented on a change in pull request #11591:
URL: https://github.com/apache/arrow/pull/11591#discussion_r741330445



##########
File path: 
java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodecFile.java
##########
@@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.compression;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BitVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.compression.CompressionCodec;
+import org.apache.arrow.vector.compression.CompressionUtil;
+import org.apache.arrow.vector.compression.NoCompressionCodec;
+import org.apache.arrow.vector.ipc.ArrowFileReader;
+import org.apache.arrow.vector.ipc.ArrowFileWriter;
+import org.apache.arrow.vector.ipc.ArrowReader;
+import org.apache.arrow.vector.ipc.ArrowStreamReader;
+import org.apache.arrow.vector.ipc.ArrowStreamWriter;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+@RunWith(Parameterized.class)
+public class TestCompressionCodecFile {
+  private final CompressionCodec codec;
+  private final int vectorLength;
+  private final File file;
+
+  public TestCompressionCodecFile(CompressionUtil.CodecType type, int 
vectorLength, CompressionCodec codec,
+                                  File file) {
+    this.codec = codec;
+    this.file = file;
+    this.vectorLength = vectorLength;
+  }
+
+  @Parameterized.Parameters(name = "codec = {0}, length = {1}, file = {2}")
+  public static Collection<Object[]> getCodecs() {
+    List<Object[]> params = new ArrayList<>();
+
+    int[] lengths = new int[]{10, 100, 1000};
+    for (int len : lengths) {
+      CompressionCodec dumbCodec = NoCompressionCodec.INSTANCE;
+      File fileNoCompression = new File("target/write_no_compression_" + len + 
".arrow");
+      params.add(new Object[]{dumbCodec.getCodecType(), len, dumbCodec, 
fileNoCompression});
+
+      CompressionCodec lz4Codec = new Lz4CompressionCodec();
+      File fileLZ4Compression = new File("target/write_lz4_compression_" + len 
+ ".arrow");

Review comment:
       ```suggestion
         File fileLz4Compression = new File("target/write_lz4_compression_" + 
len + ".arrow");
   ```
   nit: generally we use camel case for abbreviates as well

##########
File path: docs/source/java/ipc.rst
##########
@@ -54,6 +54,15 @@ Now, we can begin writing a stream containing some number of 
these batches. For
     ArrowStreamWriter writer = new ArrowStreamWriter(root, 
/*DictionaryProvider=*/null, Channels.newChannel(out));
 
 
+Here we are not used compression option, but this could be implemented on this 
way thru codec option:

Review comment:
       ```suggestion
   Buffer level compression is also supported by passing a codec:
   ```

##########
File path: 
java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodecFile.java
##########
@@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.compression;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BitVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.compression.CompressionCodec;
+import org.apache.arrow.vector.compression.CompressionUtil;
+import org.apache.arrow.vector.compression.NoCompressionCodec;
+import org.apache.arrow.vector.ipc.ArrowFileReader;
+import org.apache.arrow.vector.ipc.ArrowFileWriter;
+import org.apache.arrow.vector.ipc.ArrowReader;
+import org.apache.arrow.vector.ipc.ArrowStreamReader;
+import org.apache.arrow.vector.ipc.ArrowStreamWriter;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+@RunWith(Parameterized.class)
+public class TestCompressionCodecFile {
+  private final CompressionCodec codec;
+  private final int vectorLength;
+  private final File file;
+
+  public TestCompressionCodecFile(CompressionUtil.CodecType type, int 
vectorLength, CompressionCodec codec,
+                                  File file) {
+    this.codec = codec;
+    this.file = file;
+    this.vectorLength = vectorLength;
+  }
+
+  @Parameterized.Parameters(name = "codec = {0}, length = {1}, file = {2}")
+  public static Collection<Object[]> getCodecs() {
+    List<Object[]> params = new ArrayList<>();
+
+    int[] lengths = new int[]{10, 100, 1000};
+    for (int len : lengths) {
+      CompressionCodec dumbCodec = NoCompressionCodec.INSTANCE;
+      File fileNoCompression = new File("target/write_no_compression_" + len + 
".arrow");
+      params.add(new Object[]{dumbCodec.getCodecType(), len, dumbCodec, 
fileNoCompression});
+
+      CompressionCodec lz4Codec = new Lz4CompressionCodec();
+      File fileLZ4Compression = new File("target/write_lz4_compression_" + len 
+ ".arrow");
+      params.add(new Object[]{lz4Codec.getCodecType(), len, lz4Codec, 
fileLZ4Compression});
+
+      CompressionCodec zstdCodec = new ZstdCompressionCodec();
+      File fileZSTDCompression = new File("target/write_zstd_compression_" + 
len + ".arrow");

Review comment:
       ```suggestion
         File fileZstdCompression = new File("target/write_zstd_compression_" + 
len + ".arrow");
   ```

##########
File path: 
java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowStreamWriter.java
##########
@@ -65,6 +66,48 @@ public ArrowStreamWriter(VectorSchemaRoot root, 
DictionaryProvider provider, Wri
     super(root, provider, out, option);
   }
 
+  /**
+   * Construct an ArrowStreamWriter with an optional DictionaryProvider for 
the OutputStream.
+   *
+   * @param root Existing VectorSchemaRoot with vectors to be written.
+   * @param includeNullCount Controls whether null count is copied to the 
{@link ArrowRecordBatch}
+   * @param codec the codec for compressing data. If it is null, then no 
compression is needed.
+   * @param alignBuffers Controls if buffers get aligned to 8-byte boundaries.
+   * @param provider DictionaryProvider for any vectors that are dictionary 
encoded.
+   *                 (Optional, can be null)
+   * @param out OutputStream for writing.
+   */
+  public ArrowStreamWriter(VectorSchemaRoot root, boolean includeNullCount, 
CompressionCodec codec,

Review comment:
       same comment about constructor explosion as the file writer.

##########
File path: 
java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodecFile.java
##########
@@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.compression;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BitVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.compression.CompressionCodec;
+import org.apache.arrow.vector.compression.CompressionUtil;
+import org.apache.arrow.vector.compression.NoCompressionCodec;
+import org.apache.arrow.vector.ipc.ArrowFileReader;
+import org.apache.arrow.vector.ipc.ArrowFileWriter;
+import org.apache.arrow.vector.ipc.ArrowReader;
+import org.apache.arrow.vector.ipc.ArrowStreamReader;
+import org.apache.arrow.vector.ipc.ArrowStreamWriter;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+@RunWith(Parameterized.class)
+public class TestCompressionCodecFile {
+  private final CompressionCodec codec;
+  private final int vectorLength;
+  private final File file;
+
+  public TestCompressionCodecFile(CompressionUtil.CodecType type, int 
vectorLength, CompressionCodec codec,
+                                  File file) {
+    this.codec = codec;
+    this.file = file;
+    this.vectorLength = vectorLength;
+  }
+
+  @Parameterized.Parameters(name = "codec = {0}, length = {1}, file = {2}")
+  public static Collection<Object[]> getCodecs() {
+    List<Object[]> params = new ArrayList<>();
+
+    int[] lengths = new int[]{10, 100, 1000};
+    for (int len : lengths) {
+      CompressionCodec dumbCodec = NoCompressionCodec.INSTANCE;
+      File fileNoCompression = new File("target/write_no_compression_" + len + 
".arrow");
+      params.add(new Object[]{dumbCodec.getCodecType(), len, dumbCodec, 
fileNoCompression});
+
+      CompressionCodec lz4Codec = new Lz4CompressionCodec();
+      File fileLZ4Compression = new File("target/write_lz4_compression_" + len 
+ ".arrow");
+      params.add(new Object[]{lz4Codec.getCodecType(), len, lz4Codec, 
fileLZ4Compression});
+
+      CompressionCodec zstdCodec = new ZstdCompressionCodec();
+      File fileZSTDCompression = new File("target/write_zstd_compression_" + 
len + ".arrow");
+      params.add(new Object[]{zstdCodec.getCodecType(), len, zstdCodec, 
fileZSTDCompression});
+
+    }
+    return params;
+  }
+
+  @Test
+  public void writeReadRandomAccessFile() throws IOException {
+    RootAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+    BitVector bitVector = new BitVector("boolean", allocator);
+    VarCharVector varCharVector = new VarCharVector("varchar", allocator);
+    for (int i = 0; i < vectorLength; i++) {
+      bitVector.setSafe(i, i % 2 == 0 ? 0 : 1);
+      varCharVector.setSafe(i, ("test" + i).getBytes(StandardCharsets.UTF_8));
+    }
+    bitVector.setValueCount(vectorLength);
+    varCharVector.setValueCount(vectorLength);
+
+    List<Field> fields = Arrays.asList(bitVector.getField(), 
varCharVector.getField());
+    List<FieldVector> vectors = Arrays.asList(bitVector, varCharVector);
+
+    VectorSchemaRoot schemaRootWrite = new VectorSchemaRoot(fields, vectors);

Review comment:
       try-with-resources?

##########
File path: 
java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodecFile.java
##########
@@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.compression;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BitVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.compression.CompressionCodec;
+import org.apache.arrow.vector.compression.CompressionUtil;
+import org.apache.arrow.vector.compression.NoCompressionCodec;
+import org.apache.arrow.vector.ipc.ArrowFileReader;
+import org.apache.arrow.vector.ipc.ArrowFileWriter;
+import org.apache.arrow.vector.ipc.ArrowReader;
+import org.apache.arrow.vector.ipc.ArrowStreamReader;
+import org.apache.arrow.vector.ipc.ArrowStreamWriter;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+@RunWith(Parameterized.class)
+public class TestCompressionCodecFile {
+  private final CompressionCodec codec;
+  private final int vectorLength;
+  private final File file;
+
+  public TestCompressionCodecFile(CompressionUtil.CodecType type, int 
vectorLength, CompressionCodec codec,
+                                  File file) {
+    this.codec = codec;
+    this.file = file;
+    this.vectorLength = vectorLength;
+  }
+
+  @Parameterized.Parameters(name = "codec = {0}, length = {1}, file = {2}")
+  public static Collection<Object[]> getCodecs() {
+    List<Object[]> params = new ArrayList<>();
+
+    int[] lengths = new int[]{10, 100, 1000};
+    for (int len : lengths) {
+      CompressionCodec dumbCodec = NoCompressionCodec.INSTANCE;

Review comment:
       ```suggestion
         CompressionCodec noCompression = NoCompressionCodec.INSTANCE;
   ```
   dumb can have negative connotations.

##########
File path: 
java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodecFile.java
##########
@@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.compression;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BitVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.compression.CompressionCodec;
+import org.apache.arrow.vector.compression.CompressionUtil;
+import org.apache.arrow.vector.compression.NoCompressionCodec;
+import org.apache.arrow.vector.ipc.ArrowFileReader;
+import org.apache.arrow.vector.ipc.ArrowFileWriter;
+import org.apache.arrow.vector.ipc.ArrowReader;
+import org.apache.arrow.vector.ipc.ArrowStreamReader;
+import org.apache.arrow.vector.ipc.ArrowStreamWriter;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+@RunWith(Parameterized.class)
+public class TestCompressionCodecFile {
+  private final CompressionCodec codec;
+  private final int vectorLength;
+  private final File file;
+
+  public TestCompressionCodecFile(CompressionUtil.CodecType type, int 
vectorLength, CompressionCodec codec,
+                                  File file) {
+    this.codec = codec;
+    this.file = file;
+    this.vectorLength = vectorLength;
+  }
+
+  @Parameterized.Parameters(name = "codec = {0}, length = {1}, file = {2}")
+  public static Collection<Object[]> getCodecs() {
+    List<Object[]> params = new ArrayList<>();
+
+    int[] lengths = new int[]{10, 100, 1000};
+    for (int len : lengths) {
+      CompressionCodec dumbCodec = NoCompressionCodec.INSTANCE;
+      File fileNoCompression = new File("target/write_no_compression_" + len + 
".arrow");
+      params.add(new Object[]{dumbCodec.getCodecType(), len, dumbCodec, 
fileNoCompression});
+
+      CompressionCodec lz4Codec = new Lz4CompressionCodec();
+      File fileLZ4Compression = new File("target/write_lz4_compression_" + len 
+ ".arrow");
+      params.add(new Object[]{lz4Codec.getCodecType(), len, lz4Codec, 
fileLZ4Compression});
+
+      CompressionCodec zstdCodec = new ZstdCompressionCodec();
+      File fileZSTDCompression = new File("target/write_zstd_compression_" + 
len + ".arrow");
+      params.add(new Object[]{zstdCodec.getCodecType(), len, zstdCodec, 
fileZSTDCompression});
+
+    }
+    return params;
+  }
+
+  @Test
+  public void writeReadRandomAccessFile() throws IOException {
+    RootAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+    BitVector bitVector = new BitVector("boolean", allocator);
+    VarCharVector varCharVector = new VarCharVector("varchar", allocator);
+    for (int i = 0; i < vectorLength; i++) {
+      bitVector.setSafe(i, i % 2 == 0 ? 0 : 1);
+      varCharVector.setSafe(i, ("test" + i).getBytes(StandardCharsets.UTF_8));
+    }
+    bitVector.setValueCount(vectorLength);
+    varCharVector.setValueCount(vectorLength);
+
+    List<Field> fields = Arrays.asList(bitVector.getField(), 
varCharVector.getField());
+    List<FieldVector> vectors = Arrays.asList(bitVector, varCharVector);
+
+    VectorSchemaRoot schemaRootWrite = new VectorSchemaRoot(fields, vectors);
+
+    // write
+    FileOutputStream fileOutputStream = new FileOutputStream(file);
+    ArrowFileWriter writer = new ArrowFileWriter(schemaRootWrite, true, codec, 
true,
+        null, fileOutputStream.getChannel());
+    writer.start();
+    writer.writeBatch();
+    writer.end();
+
+    // validations
+    Assert.assertEquals(vectorLength, schemaRootWrite.getRowCount());

Review comment:
       can you check style in other tests, I thought we might static import 
assertEquals

##########
File path: 
java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowFileWriter.java
##########
@@ -69,6 +70,31 @@ public ArrowFileWriter(VectorSchemaRoot root, 
DictionaryProvider provider, Writa
     this.metaData = metaData;
   }
 
+  public ArrowFileWriter(VectorSchemaRoot root, boolean includeNullCount, 
CompressionCodec codec,

Review comment:
       Instead of making an ever expanding list of constructors, I think it is 
likely better at this point to create an "Options"  class that can be built 
using a builder pattern and passed in.  So we can have one new constructor:
   
   ArrowFileWriter(VectorSchemaRoot root, WriteOptions options, 
WritableByteChannel) and then maybe one private constructor if necessary that 
explodes the options.

##########
File path: 
java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodecFile.java
##########
@@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.compression;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BitVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.compression.CompressionCodec;
+import org.apache.arrow.vector.compression.CompressionUtil;
+import org.apache.arrow.vector.compression.NoCompressionCodec;
+import org.apache.arrow.vector.ipc.ArrowFileReader;
+import org.apache.arrow.vector.ipc.ArrowFileWriter;
+import org.apache.arrow.vector.ipc.ArrowReader;
+import org.apache.arrow.vector.ipc.ArrowStreamReader;
+import org.apache.arrow.vector.ipc.ArrowStreamWriter;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+@RunWith(Parameterized.class)
+public class TestCompressionCodecFile {
+  private final CompressionCodec codec;
+  private final int vectorLength;
+  private final File file;
+
+  public TestCompressionCodecFile(CompressionUtil.CodecType type, int 
vectorLength, CompressionCodec codec,
+                                  File file) {
+    this.codec = codec;
+    this.file = file;
+    this.vectorLength = vectorLength;
+  }
+
+  @Parameterized.Parameters(name = "codec = {0}, length = {1}, file = {2}")
+  public static Collection<Object[]> getCodecs() {
+    List<Object[]> params = new ArrayList<>();
+
+    int[] lengths = new int[]{10, 100, 1000};
+    for (int len : lengths) {
+      CompressionCodec dumbCodec = NoCompressionCodec.INSTANCE;
+      File fileNoCompression = new File("target/write_no_compression_" + len + 
".arrow");
+      params.add(new Object[]{dumbCodec.getCodecType(), len, dumbCodec, 
fileNoCompression});
+
+      CompressionCodec lz4Codec = new Lz4CompressionCodec();
+      File fileLZ4Compression = new File("target/write_lz4_compression_" + len 
+ ".arrow");
+      params.add(new Object[]{lz4Codec.getCodecType(), len, lz4Codec, 
fileLZ4Compression});
+
+      CompressionCodec zstdCodec = new ZstdCompressionCodec();
+      File fileZSTDCompression = new File("target/write_zstd_compression_" + 
len + ".arrow");
+      params.add(new Object[]{zstdCodec.getCodecType(), len, zstdCodec, 
fileZSTDCompression});
+
+    }
+    return params;
+  }
+
+  @Test
+  public void writeReadRandomAccessFile() throws IOException {
+    RootAllocator allocator = new RootAllocator(Long.MAX_VALUE);

Review comment:
       this should be in a try-with-resources block

##########
File path: 
java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodecFile.java
##########
@@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.compression;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BitVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.compression.CompressionCodec;
+import org.apache.arrow.vector.compression.CompressionUtil;
+import org.apache.arrow.vector.compression.NoCompressionCodec;
+import org.apache.arrow.vector.ipc.ArrowFileReader;
+import org.apache.arrow.vector.ipc.ArrowFileWriter;
+import org.apache.arrow.vector.ipc.ArrowReader;
+import org.apache.arrow.vector.ipc.ArrowStreamReader;
+import org.apache.arrow.vector.ipc.ArrowStreamWriter;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+@RunWith(Parameterized.class)
+public class TestCompressionCodecFile {
+  private final CompressionCodec codec;
+  private final int vectorLength;
+  private final File file;
+
+  public TestCompressionCodecFile(CompressionUtil.CodecType type, int 
vectorLength, CompressionCodec codec,
+                                  File file) {
+    this.codec = codec;
+    this.file = file;
+    this.vectorLength = vectorLength;
+  }
+
+  @Parameterized.Parameters(name = "codec = {0}, length = {1}, file = {2}")
+  public static Collection<Object[]> getCodecs() {
+    List<Object[]> params = new ArrayList<>();
+
+    int[] lengths = new int[]{10, 100, 1000};
+    for (int len : lengths) {
+      CompressionCodec dumbCodec = NoCompressionCodec.INSTANCE;
+      File fileNoCompression = new File("target/write_no_compression_" + len + 
".arrow");
+      params.add(new Object[]{dumbCodec.getCodecType(), len, dumbCodec, 
fileNoCompression});
+
+      CompressionCodec lz4Codec = new Lz4CompressionCodec();
+      File fileLZ4Compression = new File("target/write_lz4_compression_" + len 
+ ".arrow");
+      params.add(new Object[]{lz4Codec.getCodecType(), len, lz4Codec, 
fileLZ4Compression});
+
+      CompressionCodec zstdCodec = new ZstdCompressionCodec();
+      File fileZSTDCompression = new File("target/write_zstd_compression_" + 
len + ".arrow");
+      params.add(new Object[]{zstdCodec.getCodecType(), len, zstdCodec, 
fileZSTDCompression});
+
+    }
+    return params;
+  }
+
+  @Test
+  public void writeReadRandomAccessFile() throws IOException {
+    RootAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+    BitVector bitVector = new BitVector("boolean", allocator);
+    VarCharVector varCharVector = new VarCharVector("varchar", allocator);
+    for (int i = 0; i < vectorLength; i++) {
+      bitVector.setSafe(i, i % 2 == 0 ? 0 : 1);
+      varCharVector.setSafe(i, ("test" + i).getBytes(StandardCharsets.UTF_8));
+    }
+    bitVector.setValueCount(vectorLength);
+    varCharVector.setValueCount(vectorLength);
+
+    List<Field> fields = Arrays.asList(bitVector.getField(), 
varCharVector.getField());
+    List<FieldVector> vectors = Arrays.asList(bitVector, varCharVector);
+
+    VectorSchemaRoot schemaRootWrite = new VectorSchemaRoot(fields, vectors);
+
+    // write
+    FileOutputStream fileOutputStream = new FileOutputStream(file);
+    ArrowFileWriter writer = new ArrowFileWriter(schemaRootWrite, true, codec, 
true,

Review comment:
       please try to comment literal paraemters.  e.g. `schemarRootWrite, 
/*parameter 1 name=*/ true, /*parameter 2 name=*/`

##########
File path: 
java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowFileWriter.java
##########
@@ -69,6 +70,31 @@ public ArrowFileWriter(VectorSchemaRoot root, 
DictionaryProvider provider, Writa
     this.metaData = metaData;
   }
 
+  public ArrowFileWriter(VectorSchemaRoot root, boolean includeNullCount, 
CompressionCodec codec,

Review comment:
       Instead of making an ever expanding list of constructors, I think it is 
likely better at this point to create an "Options"  class that can be built 
using a builder pattern and passed in.  So we can have one new constructor:
   
   ArrowFileWriter(VectorSchemaRoot root, WriteOptions options, 
WritableByteChannel) and then maybe one private constructor if necessary that 
explodes the options.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to