davisusanibar commented on code in PR #35570:
URL: https://github.com/apache/arrow/pull/35570#discussion_r1218750026
##########
docs/source/java/dataset.rst:
##########
@@ -158,6 +156,21 @@ Or use shortcut construtor:
Then all columns will be emitted during scanning.
+Projection (Produce New Columns) and Filters
+============================================
+
+User can specify projections (new columns) or filters in ScanOptions. For
example:
+
+.. code-block:: Java
+
+ ByteBuffer substraitExtendedExpressions = ...; //
createExtendedExpresionMessageUsingSubstraitPOJOClasses
Review Comment:
Added comment on a new line
##########
docs/source/java/substrait.rst:
##########
@@ -102,6 +104,335 @@ Here is an example of a Java program that queries a
Parquet file using Java Subs
0 ALGERIA 0 haggle. carefully final deposits detect slyly agai
1 ARGENTINA 1 al foxes promise slyly according to the regular
accounts. bold requests alon
+Executing Projections and Filters Using Extended Expressions
+============================================================
+
+Using `Extended Expression`_ we could leverage our current Dataset operations
to
+also support Projections and Filters by. To gain access to Projections and
Filters
+is needed to define that operations using current Extended Expression Java POJO
+classes defined into `Substrait Java`_ project.
+
+Here is an example of a Java program that queries a Parquet file to project new
+columns and also filter then based on Extended Expression definitions. This
example
+show us:
+
+- Load TPCH parquet file Nation.parquet.
+- Produce new Projections and apply Filter into dataset using extended
expression definition.
+ - Expression 01 - CONCAT: N_NAME || ' - ' || N_COMMENT = col 1 || ' - ' ||
col 3.
+ - Expression 02 - ADD: N_REGIONKEY + 10 = col 1 + 10.
+ - Expression 03 - FILTER: N_NATIONKEY > 18 = col 3 > 18.
+
+.. code-block:: Java
+
+ import java.nio.ByteBuffer;
+ import java.util.ArrayList;
+ import java.util.Arrays;
+ import java.util.Base64;
+ import java.util.HashMap;
+ import java.util.List;
+ import java.util.Optional;
+
+ import org.apache.arrow.dataset.file.FileFormat;
+ import org.apache.arrow.dataset.file.FileSystemDatasetFactory;
+ import org.apache.arrow.dataset.jni.NativeMemoryPool;
+ import org.apache.arrow.dataset.scanner.ScanOptions;
+ import org.apache.arrow.dataset.scanner.Scanner;
+ import org.apache.arrow.dataset.source.Dataset;
+ import org.apache.arrow.dataset.source.DatasetFactory;
+ import org.apache.arrow.memory.BufferAllocator;
+ import org.apache.arrow.memory.RootAllocator;
+ import org.apache.arrow.vector.ipc.ArrowReader;
+
+ import com.google.protobuf.InvalidProtocolBufferException;
+ import com.google.protobuf.util.JsonFormat;
+
+ import io.substrait.proto.Expression;
+ import io.substrait.proto.ExpressionReference;
+ import io.substrait.proto.ExtendedExpression;
+ import io.substrait.proto.FunctionArgument;
+ import io.substrait.proto.SimpleExtensionDeclaration;
+ import io.substrait.proto.SimpleExtensionURI;
+ import io.substrait.type.NamedStruct;
+ import io.substrait.type.Type;
+ import io.substrait.type.TypeCreator;
+ import io.substrait.type.proto.TypeProtoConverter;
+
+ public class ClientSubstraitExtendedExpressions {
+ public static void main(String[] args) throws Exception {
+ // create extended expression for: project two new columns + one filter
+ String binaryExtendedExpressions =
createExtendedExpresionMessageUsingPOJOClasses();
Review Comment:
Changed
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]