davisusanibar commented on code in PR #215:
URL: https://github.com/apache/arrow-cookbook/pull/215#discussion_r880858576


##########
java/source/create.rst:
##########
@@ -70,6 +70,62 @@ Array of Varchar
 
     [one, two, three]
 
+In some scenarios could be more appropriate use `Dictionary-encoded Layout`_ 
to encoded data which takes much less space.
+
+.. testcode::
+
+    import org.apache.arrow.memory.BufferAllocator;
+    import org.apache.arrow.memory.RootAllocator;
+    import org.apache.arrow.vector.FieldVector;
+    import org.apache.arrow.vector.VarCharVector;
+    import org.apache.arrow.vector.dictionary.Dictionary;
+    import org.apache.arrow.vector.dictionary.DictionaryEncoder;
+    import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
+
+    import java.nio.charset.StandardCharsets;
+
+    try (BufferAllocator root = new RootAllocator();
+         VarCharVector countries = new VarCharVector("country-dict", root);
+         VarCharVector myAppUseCountryDictionary = new 
VarCharVector("app-use-country-dict", root)

Review Comment:
   Changed



##########
java/source/create.rst:
##########
@@ -70,6 +70,62 @@ Array of Varchar
 
     [one, two, three]
 
+In some scenarios could be more appropriate use `Dictionary-encoded Layout`_ 
to encoded data which takes much less space.
+
+.. testcode::
+
+    import org.apache.arrow.memory.BufferAllocator;
+    import org.apache.arrow.memory.RootAllocator;
+    import org.apache.arrow.vector.FieldVector;
+    import org.apache.arrow.vector.VarCharVector;
+    import org.apache.arrow.vector.dictionary.Dictionary;
+    import org.apache.arrow.vector.dictionary.DictionaryEncoder;
+    import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
+
+    import java.nio.charset.StandardCharsets;
+
+    try (BufferAllocator root = new RootAllocator();
+         VarCharVector countries = new VarCharVector("country-dict", root);
+         VarCharVector myAppUseCountryDictionary = new 
VarCharVector("app-use-country-dict", root)
+    ) {
+        countries.allocateNew(10);
+        countries.set(0, "Andorra".getBytes(StandardCharsets.UTF_8));
+        countries.set(1, "Cuba".getBytes(StandardCharsets.UTF_8));
+        countries.set(2, "Grecia".getBytes(StandardCharsets.UTF_8));
+        countries.set(3, "Guinea".getBytes(StandardCharsets.UTF_8));
+        countries.set(4, "Islandia".getBytes(StandardCharsets.UTF_8));
+        countries.set(5, "Malta".getBytes(StandardCharsets.UTF_8));
+        countries.set(6, "Tailandia".getBytes(StandardCharsets.UTF_8));
+        countries.set(7, "Uganda".getBytes(StandardCharsets.UTF_8));
+        countries.set(8, "Yemen".getBytes(StandardCharsets.UTF_8));
+        countries.set(9, "Zambia".getBytes(StandardCharsets.UTF_8));
+        countries.setValueCount(10);
+
+        Dictionary myCountryDictionary = new Dictionary(countries,

Review Comment:
   Changed



##########
java/source/create.rst:
##########
@@ -70,6 +70,62 @@ Array of Varchar
 
     [one, two, three]
 
+In some scenarios could be more appropriate use `Dictionary-encoded Layout`_ 
to encoded data which takes much less space.
+
+.. testcode::
+
+    import org.apache.arrow.memory.BufferAllocator;
+    import org.apache.arrow.memory.RootAllocator;
+    import org.apache.arrow.vector.FieldVector;
+    import org.apache.arrow.vector.VarCharVector;
+    import org.apache.arrow.vector.dictionary.Dictionary;
+    import org.apache.arrow.vector.dictionary.DictionaryEncoder;
+    import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
+
+    import java.nio.charset.StandardCharsets;
+
+    try (BufferAllocator root = new RootAllocator();
+         VarCharVector countries = new VarCharVector("country-dict", root);
+         VarCharVector myAppUseCountryDictionary = new 
VarCharVector("app-use-country-dict", root)
+    ) {
+        countries.allocateNew(10);
+        countries.set(0, "Andorra".getBytes(StandardCharsets.UTF_8));
+        countries.set(1, "Cuba".getBytes(StandardCharsets.UTF_8));
+        countries.set(2, "Grecia".getBytes(StandardCharsets.UTF_8));
+        countries.set(3, "Guinea".getBytes(StandardCharsets.UTF_8));
+        countries.set(4, "Islandia".getBytes(StandardCharsets.UTF_8));
+        countries.set(5, "Malta".getBytes(StandardCharsets.UTF_8));
+        countries.set(6, "Tailandia".getBytes(StandardCharsets.UTF_8));
+        countries.set(7, "Uganda".getBytes(StandardCharsets.UTF_8));
+        countries.set(8, "Yemen".getBytes(StandardCharsets.UTF_8));
+        countries.set(9, "Zambia".getBytes(StandardCharsets.UTF_8));
+        countries.setValueCount(10);
+
+        Dictionary myCountryDictionary = new Dictionary(countries,
+                new DictionaryEncoding(/*id=*/1L, /*ordered=*/false, 
/*indexType=*/null));

Review Comment:
   Added



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to