This is an automated email from the ASF dual-hosted git repository.

leerho pushed a commit to branch Update_to_core2.0.0
in repository https://gitbox.apache.org/repos/asf/datasketches-pig.git

commit c3e138cbe4e189e99bcb5a474b24913c67aac8d2
Author: Lee Rhodes <[email protected]>
AuthorDate: Mon Mar 1 16:44:25 2021 -0800

    Updates to match java core 2.0.0
---
 pom.xml                                            | 55 +++++++++++++++++-----
 .../apache/datasketches/pig/cpc/DataToSketch.java  |  5 ++
 .../apache/datasketches/pig/hll/DataToSketch.java  |  5 ++
 .../apache/datasketches/pig/theta/AexcludeB.java   |  3 +-
 .../datasketches/pig/theta/DataToSketch.java       | 19 ++++++--
 .../apache/datasketches/pig/theta/Intersect.java   |  6 +--
 .../org/apache/datasketches/pig/theta/Union.java   | 10 ++--
 .../datasketches/pig/tuple/DataToSketch.java       | 12 +++--
 .../DataToSketchAlgebraicIntermediateFinal.java    |  8 ++--
 .../apache/datasketches/pig/tuple/UnionSketch.java | 12 ++---
 .../UnionSketchAlgebraicIntermediateFinal.java     |  6 +--
 .../datasketches/pig/theta/AexcludeBTest.java      | 29 ++++++------
 12 files changed, 111 insertions(+), 59 deletions(-)

diff --git a/pom.xml b/pom.xml
index ab68c9f..efe822c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -21,6 +21,7 @@ under the License.
 
 <project xmlns="http://maven.apache.org/POM/4.0.0"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
   xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/maven-v4_0_0.xsd";>
+
   <modelVersion>4.0.0</modelVersion>
 
   <parent>
@@ -30,17 +31,14 @@ under the License.
   </parent>
 
   <groupId>org.apache.datasketches</groupId>
-
-  <!-- UNIQUE FOR THIS JAVA COMPONENT -->
   <artifactId>datasketches-pig</artifactId>
   <version>1.1.0-SNAPSHOT</version>
-  <description>Apache Pig adaptors for the DataSketches library.</description>
-  <!-- END: UNIQUE FOR THIS JAVA COMPONENT -->
+  <packaging>jar</packaging>
 
+  <name>${project.artifactId}</name> 
+  <description>Apache Pig adaptors for the DataSketches library.</description>
   <url>https://datasketches.apache.org/</url>
-  <name>${project.artifactId}</name>
   <inceptionYear>2015</inceptionYear>
-  <packaging>jar</packaging> <!-- jar is the default -->
 
   <mailingLists>
     <mailingList>
@@ -85,7 +83,7 @@ under the License.
 
   <properties>
     <!-- UNIQUE FOR THIS JAVA COMPONENT -->
-    <datasketches-java.version>1.3.0-incubating</datasketches-java.version>
+    <datasketches-java.version>2.0.0</datasketches-java.version>
     <pig.version>0.17.0</pig.version>
     <hadoop-common.version>2.8.5</hadoop-common.version>
     <commons-math3.version>3.6.1</commons-math3.version>
@@ -95,7 +93,7 @@ under the License.
     <testng.version>7.1.0</testng.version>
 
     <!-- System-wide properties -->
-    <maven.version>3.0.0</maven.version>
+    <maven.version>3.5.0</maven.version>
     <java.version>1.8</java.version>
     <maven.compiler.source>${java.version}</maven.compiler.source>
     <maven.compiler.target>${java.version}</maven.compiler.target>
@@ -105,8 +103,11 @@ under the License.
     
<project.build.resourceEncoding>${charset.encoding}</project.build.resourceEncoding>
     
<project.reporting.outputEncoding>${charset.encoding}</project.reporting.outputEncoding>
 
-    <!-- org.codehaus.plexus used for strict profile testing-->
-    
<plexus-compiler-javac-errorprone.version>2.8.8</plexus-compiler-javac-errorprone.version>
+    <!-- org.codehaus plugins -->
+    <!-- used for strict profile testing-->
+    
<plexus-compiler-javac-errorprone.version>2.8.5</plexus-compiler-javac-errorprone.version>
+    <versions-maven-plugin.version>2.8.1</versions-maven-plugin.version>
+
     <!--  Maven Plugins -->
     <maven-assembly-plugin.version>3.3.0</maven-assembly-plugin.version> <!-- 
overrides parent -->
     <maven-compiler-plugin.version>3.8.1</maven-compiler-plugin.version> <!-- 
overrides parent -->
@@ -124,6 +125,7 @@ under the License.
     <!-- org.jacoco Maven Plugins -->
     <jacoco-maven-plugin.version>0.8.6</jacoco-maven-plugin.version>
     <!-- org.eluder Maven Plugins -->
+    <coveralls-repo-token></coveralls-repo-token>
     <coveralls-maven-plugin.version>4.3.0</coveralls-maven-plugin.version>
     <!-- other -->
     <lifecycle-mapping.version>1.0.0</lifecycle-mapping.version>
@@ -201,6 +203,13 @@ under the License.
   <build>
     <pluginManagement>
       <plugins>
+
+        <plugin>
+          <groupId>org.codehaus.mojo</groupId>
+          <artifactId>versions-maven-plugin</artifactId>
+          <version>${versions-maven-plugin.version}</version>
+        </plugin>
+
         <plugin>
           <!-- We want to deploy the artifacts to a staging location for 
perusal -->
           <!-- Apache Parent pom: apache-release profile -->
@@ -212,12 +221,14 @@ under the License.
             <!-- see maven-install-plugin -->
           </configuration>
         </plugin>
+
         <plugin>
           <!-- Apache Parent pom, pluginManagement-->
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-release-plugin</artifactId>
           <version>${maven-release-plugin.version}</version>
         </plugin>
+
         <plugin>
           <!-- Extends Apache Parent pom, pluginManagement-->
           <groupId>org.apache.maven.plugins</groupId>
@@ -226,18 +237,21 @@ under the License.
           <executions>
             <execution>
               <id>default-jar</id>
+              <phase>package</phase>
               <goals>
                 <goal>jar</goal>
               </goals>
             </execution>
             <execution>
               <id>default-test-jar</id>
+              <phase>package</phase>
               <goals>
                 <goal>test-jar</goal>
               </goals>
             </execution>
           </executions>
         </plugin>
+
         <plugin>
           <!-- Extends Apache Parent pom, apache-release profile -->
           <groupId>org.apache.maven.plugins</groupId>
@@ -255,6 +269,7 @@ under the License.
             </execution>
           </executions>
         </plugin>
+
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-enforcer-plugin</artifactId>
@@ -285,6 +300,7 @@ under the License.
             </execution>
           </executions>
         </plugin>
+
         <plugin>
           <!-- Apache Parent pom, pluginManagement-->
           <groupId>org.apache.rat</groupId>
@@ -304,13 +320,16 @@ under the License.
             <useDefaultExcludes>true</useDefaultExcludes>
             <excludes>
               <!-- rat uses .gitignore for excludes by default -->
+              <exclude>**/*.yaml</exclude>
+              <exclude>**/*.yml</exclude>
+              <exclude>**/.*</exclude>
               <exclude>**/test/resources/**/*.txt</exclude>
-              <exclude>.asf.yaml</exclude>
               <exclude>LICENSE</exclude>
               <exclude>NOTICE</exclude>
             </excludes>
           </configuration>
         </plugin>
+
         <plugin>
           <!-- Extends Apache Parent pom, apache-release profile -->
           <groupId>org.apache.maven.plugins</groupId>
@@ -333,6 +352,7 @@ under the License.
             </execution>
           </executions>
         </plugin>
+
         <plugin>
           <!-- Apache Parent pom, pluginManagement-->
           <groupId>org.apache.maven.plugins</groupId>
@@ -344,6 +364,7 @@ under the License.
             <redirectTestOutputToFile>true</redirectTestOutputToFile>
           </configuration>
         </plugin>
+
         <plugin>
           <!-- Generates code coverage report from website. -->
           <groupId>org.jacoco</groupId>
@@ -351,22 +372,30 @@ under the License.
           <version>${jacoco-maven-plugin.version}</version>
           <executions>
             <execution>
-              <id>prepare-agent</id>
+              <id>default-prepare-agent</id>
               <goals>
                 <goal>prepare-agent</goal>
               </goals>
             </execution>
+            <execution>
+              <id>default-report</id>
+              <goals>
+                <goal>report</goal>
+              </goals>
+            </execution>  
           </executions>
         </plugin>
+
         <plugin>
           <!-- Submit code coverage report to Coveralls.io. -->
           <groupId>org.eluder.coveralls</groupId>
           <artifactId>coveralls-maven-plugin</artifactId>
           <version>${coveralls-maven-plugin.version}</version>
           <configuration>
-            <!-- Since we use Travis CI we do not have to put a Coveralls 
token here. -->
+            <repoToken>${coveralls-repo-token}</repoToken>
           </configuration>
         </plugin>
+
       </plugins>
     </pluginManagement>
     <plugins>
diff --git a/src/main/java/org/apache/datasketches/pig/cpc/DataToSketch.java 
b/src/main/java/org/apache/datasketches/pig/cpc/DataToSketch.java
index 30c656c..3581702 100644
--- a/src/main/java/org/apache/datasketches/pig/cpc/DataToSketch.java
+++ b/src/main/java/org/apache/datasketches/pig/cpc/DataToSketch.java
@@ -104,6 +104,11 @@ public class DataToSketch extends EvalFunc<DataByteArray> 
implements Accumulator
    *   <li>DataByteArray: BYTEARRAY</li>
    * </ul>
    *
+   * <p><b>Note</b> Strings as values are normally typed as 
DataType.CHARARRAY, which will be
+   * encoded as UTF-8 prior to being submitted to the sketch. If the user 
requires a different
+   * encoding for cross-platform compatibility, it is recommended that these 
values be encoded prior
+   * to being submitted in a DataBag and then typed as a 
DataType.BYTEARRAY.</p>
+   *
    * @param inputTuple A tuple containing a single bag, containing Datum 
Tuples.
    * @return serialized CpcSketch
    * @see "org.apache.pig.EvalFunc.exec(org.apache.pig.data.Tuple)"
diff --git a/src/main/java/org/apache/datasketches/pig/hll/DataToSketch.java 
b/src/main/java/org/apache/datasketches/pig/hll/DataToSketch.java
index 6e38253..d660593 100644
--- a/src/main/java/org/apache/datasketches/pig/hll/DataToSketch.java
+++ b/src/main/java/org/apache/datasketches/pig/hll/DataToSketch.java
@@ -107,6 +107,11 @@ public class DataToSketch extends EvalFunc<DataByteArray> 
implements Accumulator
    *   <li>DataByteArray: BYTEARRAY</li>
    * </ul>
    *
+   * <p><b>Note</b> Strings as values are normally typed as 
DataType.CHARARRAY, which will be
+   * encoded as UTF-8 prior to being submitted to the sketch. If the user 
requires a different
+   * encoding for cross-platform compatibility, it is recommended that these 
values be encoded prior
+   * to being submitted and then typed as a DataType.BYTEARRAY.</p>
+   *
    * @param inputTuple A tuple containing a single bag, containing Datum 
Tuples.
    * @return serialized HllSketch
    * @see "org.apache.pig.EvalFunc.exec(org.apache.pig.data.Tuple)"
diff --git a/src/main/java/org/apache/datasketches/pig/theta/AexcludeB.java 
b/src/main/java/org/apache/datasketches/pig/theta/AexcludeB.java
index 5619d62..d54f6c4 100644
--- a/src/main/java/org/apache/datasketches/pig/theta/AexcludeB.java
+++ b/src/main/java/org/apache/datasketches/pig/theta/AexcludeB.java
@@ -134,8 +134,7 @@ public class AexcludeB extends EvalFunc<Tuple> {
     }
 
     final AnotB aNOTb = SetOperation.builder().setSeed(seed_).buildANotB();
-    aNOTb.update(sketchA, sketchB);
-    final CompactSketch compactSketch = aNOTb.getResult(true, null);
+    final CompactSketch compactSketch = aNOTb.aNotB(sketchA, sketchB, true, 
null);
     return compactOrderedSketchToTuple(compactSketch);
   }
 
diff --git a/src/main/java/org/apache/datasketches/pig/theta/DataToSketch.java 
b/src/main/java/org/apache/datasketches/pig/theta/DataToSketch.java
index 434ed1c..e414cd0 100644
--- a/src/main/java/org/apache/datasketches/pig/theta/DataToSketch.java
+++ b/src/main/java/org/apache/datasketches/pig/theta/DataToSketch.java
@@ -72,7 +72,7 @@ public class DataToSketch extends EvalFunc<Tuple> implements 
Accumulator<Tuple>,
    * </ul>
    */
   public DataToSketch() {
-    this(DEFAULT_NOMINAL_ENTRIES, (float)(1.0), DEFAULT_UPDATE_SEED);
+    this(DEFAULT_NOMINAL_ENTRIES, (float)1.0, DEFAULT_UPDATE_SEED);
   }
 
   /**
@@ -86,7 +86,7 @@ public class DataToSketch extends EvalFunc<Tuple> implements 
Accumulator<Tuple>,
    * @param nomEntriesStr <a 
href="{@docRoot}/resources/dictionary.html#nomEntries">See Nominal Entries</a>
    */
   public DataToSketch(final String nomEntriesStr) {
-    this(Integer.parseInt(nomEntriesStr), (float)(1.0), DEFAULT_UPDATE_SEED);
+    this(Integer.parseInt(nomEntriesStr), (float)1.0, DEFAULT_UPDATE_SEED);
   }
 
   /**
@@ -129,7 +129,7 @@ public class DataToSketch extends EvalFunc<Tuple> 
implements Accumulator<Tuple>,
     //Catch these errors during construction, don't wait for the exec to be 
called.
     checkIfPowerOf2(nomEntries, "nomEntries");
     checkProbability(p, "p");
-    if (nomEntries < (1 << Util.MIN_LG_NOM_LONGS)) {
+    if (nomEntries < 1 << Util.MIN_LG_NOM_LONGS) {
       throw new IllegalArgumentException("NomEntries too small: " + nomEntries
           + ", required: " + (1 << Util.MIN_LG_NOM_LONGS));
     }
@@ -181,6 +181,11 @@ public class DataToSketch extends EvalFunc<Tuple> 
implements Accumulator<Tuple>,
    *   </li>
    * </ul>
    *
+   * <p><b>Note</b> Strings as values are normally typed as 
DataType.CHARARRAY, which will be
+   * encoded as UTF-8 prior to being submitted to the sketch. If the user 
requires a different
+   * encoding for cross-platform compatibility, it is recommended that these 
values be encoded prior
+   * to being submitted in a DataBag and then typed as a 
DataType.BYTEARRAY.</p>
+   *
    * <b>Sketch Tuple</b>
    * <ul>
    *   <li>Tuple: TUPLE (Contains exactly 1 field)
@@ -313,6 +318,10 @@ public class DataToSketch extends EvalFunc<Tuple> 
implements Accumulator<Tuple>,
    * Updates a union with the data from the given bag.
    *
    * @param bag A bag of tuples to insert.
+   * <p><b>Note</b> Strings as values are normally typed as 
DataType.CHARARRAY, which will be
+   * encoded as UTF-8 prior to being submitted to the sketch. If the user 
requires a different
+   * encoding for cross-platform compatibility, it is recommended that these 
values be encoded prior
+   * to being submitted in a DataBag and then typed as a 
DataType.BYTEARRAY.</p>
    * @param union the union to update
    */
   private static void updateUnion(final DataBag bag, final Union union) {
@@ -543,8 +552,8 @@ public class DataToSketch extends EvalFunc<Tuple> 
implements Accumulator<Tuple>,
           //If field 0 of a dataTuple is a DataByteArray we assume it is a 
sketch
           // due to system bagged outputs from multiple mapper Intermediate 
functions.
           // Each dataTuple.DBA:sketch will merged into the union.
-          final DataByteArray dba = ((DataByteArray) f0);
-          union.update(Memory.wrap(dba.get()));
+          final DataByteArray dba = (DataByteArray) f0;
+          union.union(Memory.wrap(dba.get()));
 
         }
         else { // we should never get here.
diff --git a/src/main/java/org/apache/datasketches/pig/theta/Intersect.java 
b/src/main/java/org/apache/datasketches/pig/theta/Intersect.java
index 424f7f0..db59eec 100644
--- a/src/main/java/org/apache/datasketches/pig/theta/Intersect.java
+++ b/src/main/java/org/apache/datasketches/pig/theta/Intersect.java
@@ -193,7 +193,7 @@ public class Intersect extends EvalFunc<Tuple> implements 
Accumulator<Tuple>, Al
    */
   @Override
   public Tuple getValue() {
-    if ((accumIntersection_ == null) || !accumIntersection_.hasResult()) {
+    if (accumIntersection_ == null || !accumIntersection_.hasResult()) {
       throw new IllegalStateException(""
           + "The accumulate(Tuple) method must be called at least once with "
           + "a valid inputTuple.bag.SketchTuple prior to calling getValue().");
@@ -254,7 +254,7 @@ public class Intersect extends EvalFunc<Tuple> implements 
Accumulator<Tuple>, Al
       final DataByteArray dba = (DataByteArray) f0;
       final Memory srcMem = Memory.wrap(dba.get());
       final Sketch sketch = Sketch.wrap(srcMem, seed);
-      intersection.update(sketch);
+      intersection.intersect(sketch);
     }
     else {
       throw new IllegalArgumentException(
@@ -380,7 +380,7 @@ public class Intersect extends EvalFunc<Tuple> implements 
Accumulator<Tuple>, Al
           final DataByteArray dba = (DataByteArray) f0;
           final Memory srcMem = Memory.wrap(dba.get());
           final Sketch sketch = Sketch.wrap(srcMem, mySeed_);
-          intersection.update(sketch);
+          intersection.intersect(sketch);
         }
         else { // we should never get here.
           throw new IllegalArgumentException("dataTuple.Field0: Is not a 
DataByteArray: "
diff --git a/src/main/java/org/apache/datasketches/pig/theta/Union.java 
b/src/main/java/org/apache/datasketches/pig/theta/Union.java
index 3444430..a785d32 100644
--- a/src/main/java/org/apache/datasketches/pig/theta/Union.java
+++ b/src/main/java/org/apache/datasketches/pig/theta/Union.java
@@ -71,7 +71,7 @@ public class Union extends EvalFunc<Tuple> implements 
Accumulator<Tuple>, Algebr
    * </ul>
    */
   public Union() {
-    this(DEFAULT_NOMINAL_ENTRIES, (float)(1.0), DEFAULT_UPDATE_SEED);
+    this(DEFAULT_NOMINAL_ENTRIES, (float)1.0, DEFAULT_UPDATE_SEED);
   }
 
   /**
@@ -85,7 +85,7 @@ public class Union extends EvalFunc<Tuple> implements 
Accumulator<Tuple>, Algebr
    * @param nomEntriesStr <a 
href="{@docRoot}/resources/dictionary.html#nomEntries">See Nominal Entries</a>
    */
   public Union(final String nomEntriesStr) {
-    this(Integer.parseInt(nomEntriesStr), (float)(1.0), DEFAULT_UPDATE_SEED);
+    this(Integer.parseInt(nomEntriesStr), (float)1.0, DEFAULT_UPDATE_SEED);
   }
 
   /**
@@ -134,7 +134,7 @@ public class Union extends EvalFunc<Tuple> implements 
Accumulator<Tuple>, Algebr
     //Catch these errors during construction, don't wait for the exec to be 
called.
     checkIfPowerOf2(nomEntries, "nomEntries");
     checkProbability(p, "p");
-    if (nomEntries < (1 << Util.MIN_LG_NOM_LONGS)) {
+    if (nomEntries < 1 << Util.MIN_LG_NOM_LONGS) {
       throw new IllegalArgumentException("NomEntries too small: " + nomEntries
           + ", required: " + (1 << Util.MIN_LG_NOM_LONGS));
     }
@@ -308,7 +308,7 @@ public class Union extends EvalFunc<Tuple> implements 
Accumulator<Tuple>, Algebr
       if (type == DataType.BYTEARRAY) {
         final DataByteArray dba = (DataByteArray) f0;
         if (dba.size() > 0) {
-          union.update(Memory.wrap(dba.get()));
+          union.union(Memory.wrap(dba.get()));
         }
       } else {
         throw new IllegalArgumentException("Field type was not 
DataType.BYTEARRAY: " + type);
@@ -503,7 +503,7 @@ public class Union extends EvalFunc<Tuple> implements 
Accumulator<Tuple>, Algebr
           // Each dataTuple.DBA:sketch will merged into the union.
           final DataByteArray dba = (DataByteArray) f0;
           final Memory srcMem = Memory.wrap(dba.get());
-          union.update(srcMem);
+          union.union(srcMem);
 
         }
         else { // we should never get here.
diff --git a/src/main/java/org/apache/datasketches/pig/tuple/DataToSketch.java 
b/src/main/java/org/apache/datasketches/pig/tuple/DataToSketch.java
index 0d26398..fe4837a 100644
--- a/src/main/java/org/apache/datasketches/pig/tuple/DataToSketch.java
+++ b/src/main/java/org/apache/datasketches/pig/tuple/DataToSketch.java
@@ -37,7 +37,13 @@ import org.apache.pig.data.DataType;
 import org.apache.pig.data.Tuple;
 
 /**
- * This is a generic implementation to be specialized in concrete UDFs
+ * This is a generic implementation to be specialized in concrete UDFs.
+ *
+ * <p><b>Note</b> Strings as values are normally typed as DataType.CHARARRAY, 
which will be
+ * encoded as UTF-8 prior to being submitted to the sketch. If the user 
requires a different
+ * encoding for cross-platform compatibility, it is recommended that these 
values be encoded prior
+ * to being submitted in a DataBag and then typed as a DataType.BYTEARRAY.</p>
+ *
  * @param <U> Update type
  * @param <S> Summary type
  */
@@ -80,7 +86,7 @@ public abstract class DataToSketch<U, S extends 
UpdatableSummary<U>> extends Eva
   public DataToSketch(final int sketchSize, final float samplingProbability,
       final SummaryFactory<S> summaryFactory) {
     super();
-    sketchBuilder_ = new UpdatableSketchBuilder<U, S>(summaryFactory)
+    sketchBuilder_ = new UpdatableSketchBuilder<>(summaryFactory)
         
.setNominalEntries(sketchSize).setSamplingProbability(samplingProbability);
   }
 
@@ -121,7 +127,7 @@ public abstract class DataToSketch<U, S extends 
UpdatableSummary<U>> extends Eva
       Logger.getLogger(getClass()).info("exec is used");
       isFirstCall_ = false;
     }
-    if ((inputTuple == null) || (inputTuple.size() == 0)) {
+    if (inputTuple == null || inputTuple.size() == 0) {
       return null;
     }
     if (inputTuple.size() != 1) {
diff --git 
a/src/main/java/org/apache/datasketches/pig/tuple/DataToSketchAlgebraicIntermediateFinal.java
 
b/src/main/java/org/apache/datasketches/pig/tuple/DataToSketchAlgebraicIntermediateFinal.java
index 749cd74..6428958 100644
--- 
a/src/main/java/org/apache/datasketches/pig/tuple/DataToSketchAlgebraicIntermediateFinal.java
+++ 
b/src/main/java/org/apache/datasketches/pig/tuple/DataToSketchAlgebraicIntermediateFinal.java
@@ -99,7 +99,7 @@ public abstract class 
DataToSketchAlgebraicIntermediateFinal<U, S extends Updata
     sketchSize_ = sketchSize;
     summarySetOps_ = summarySetOps;
     summaryDeserializer_ = summaryDeserializer;
-    sketchBuilder_ = new UpdatableSketchBuilder<U, S>(summaryFactory)
+    sketchBuilder_ = new UpdatableSketchBuilder<>(summaryFactory)
         
.setNominalEntries(sketchSize).setSamplingProbability(samplingProbability);
   }
 
@@ -110,7 +110,7 @@ public abstract class 
DataToSketchAlgebraicIntermediateFinal<U, S extends Updata
       Logger.getLogger(getClass()).info("algebraic is used");
       isFirstCall_ = false;
     }
-    final Union<S> union = new Union<S>(sketchSize_, summarySetOps_);
+    final Union<S> union = new Union<>(sketchSize_, summarySetOps_);
 
     final DataBag bag = (DataBag) inputTuple.get(0);
     if (bag == null) {
@@ -124,13 +124,13 @@ public abstract class 
DataToSketchAlgebraicIntermediateFinal<U, S extends Updata
         // just insert each item of the tuple into the sketch
         final UpdatableSketch<U, S> sketch = sketchBuilder_.build();
         DataToSketch.updateSketch((DataBag) item, sketch);
-        union.update(sketch);
+        union.union(sketch);
       } else if (item instanceof DataByteArray) {
         // This is a sketch from a prior call to the
         // Intermediate function. merge it with the
         // current sketch.
         final Sketch<S> incomingSketch = 
Util.deserializeSketchFromTuple(dataTuple, summaryDeserializer_);
-        union.update(incomingSketch);
+        union.union(incomingSketch);
       } else {
         // we should never get here.
         throw new IllegalArgumentException(
diff --git a/src/main/java/org/apache/datasketches/pig/tuple/UnionSketch.java 
b/src/main/java/org/apache/datasketches/pig/tuple/UnionSketch.java
index 1bba6fe..4f89598 100644
--- a/src/main/java/org/apache/datasketches/pig/tuple/UnionSketch.java
+++ b/src/main/java/org/apache/datasketches/pig/tuple/UnionSketch.java
@@ -81,11 +81,11 @@ public abstract class UnionSketch<S extends Summary> 
extends EvalFunc<Tuple> imp
       Logger.getLogger(getClass()).info("exec is used");
       isFirstCall_ = false;
     }
-    if ((inputTuple == null) || (inputTuple.size() == 0)) {
+    if (inputTuple == null || inputTuple.size() == 0) {
       return null;
     }
     final DataBag bag = (DataBag) inputTuple.get(0);
-    final Union<S> union = new Union<S>(sketchSize_, summarySetOps_);
+    final Union<S> union = new Union<>(sketchSize_, summarySetOps_);
     updateUnion(bag, union, summaryDeserializer_);
     return Util.tupleFactory.newTuple(new 
DataByteArray(union.getResult().toByteArray()));
   }
@@ -97,13 +97,13 @@ public abstract class UnionSketch<S extends Summary> 
extends EvalFunc<Tuple> imp
       Logger.getLogger(getClass()).info("accumulator is used");
       isFirstCall_ = false;
     }
-    if ((inputTuple == null) || (inputTuple.size() != 1)) {
+    if (inputTuple == null || inputTuple.size() != 1) {
       return;
     }
     final DataBag bag = (DataBag) inputTuple.get(0);
     if (bag == null || bag.size() == 0) { return; }
     if (union_ == null) {
-      union_ = new Union<S>(sketchSize_, summarySetOps_);
+      union_ = new Union<>(sketchSize_, summarySetOps_);
     }
     updateUnion(bag, union_, summaryDeserializer_);
   }
@@ -124,11 +124,11 @@ public abstract class UnionSketch<S extends Summary> 
extends EvalFunc<Tuple> imp
   private static <S extends Summary> void updateUnion(final DataBag bag, final 
Union<S> union,
       final SummaryDeserializer<S> summaryDeserializer) throws ExecException {
     for (final Tuple innerTuple: bag) {
-      if ((innerTuple.size() != 1) || (innerTuple.get(0) == null)) {
+      if (innerTuple.size() != 1 || innerTuple.get(0) == null) {
         continue;
       }
       final Sketch<S> incomingSketch = 
Util.deserializeSketchFromTuple(innerTuple, summaryDeserializer);
-      union.update(incomingSketch);
+      union.union(incomingSketch);
     }
   }
 
diff --git 
a/src/main/java/org/apache/datasketches/pig/tuple/UnionSketchAlgebraicIntermediateFinal.java
 
b/src/main/java/org/apache/datasketches/pig/tuple/UnionSketchAlgebraicIntermediateFinal.java
index 655dd71..ea713f2 100644
--- 
a/src/main/java/org/apache/datasketches/pig/tuple/UnionSketchAlgebraicIntermediateFinal.java
+++ 
b/src/main/java/org/apache/datasketches/pig/tuple/UnionSketchAlgebraicIntermediateFinal.java
@@ -81,7 +81,7 @@ public abstract class UnionSketchAlgebraicIntermediateFinal<S 
extends Summary> e
       Logger.getLogger(getClass()).info("algebraic is used");
       isFirstCall_ = false;
     }
-    final Union<S> union = new Union<S>(sketchSize_, summarySetOps_);
+    final Union<S> union = new Union<>(sketchSize_, summarySetOps_);
 
     final DataBag bag = (DataBag) inputTuple.get(0);
     if (bag == null) {
@@ -94,13 +94,13 @@ public abstract class 
UnionSketchAlgebraicIntermediateFinal<S extends Summary> e
         // this is from a prior call to the initial function, so there is a 
nested bag.
         for (Tuple innerTuple: (DataBag) item) {
           final Sketch<S> incomingSketch = 
Util.deserializeSketchFromTuple(innerTuple, summaryDeserializer_);
-          union.update(incomingSketch);
+          union.union(incomingSketch);
         }
       } else if (item instanceof DataByteArray) {
         // This is a sketch from a call to the Intermediate function
         // Add it to the current union.
         final Sketch<S> incomingSketch = 
Util.deserializeSketchFromTuple(dataTuple, summaryDeserializer_);
-        union.update(incomingSketch);
+        union.union(incomingSketch);
       } else {
         // we should never get here.
         throw new IllegalArgumentException(
diff --git a/src/test/java/org/apache/datasketches/pig/theta/AexcludeBTest.java 
b/src/test/java/org/apache/datasketches/pig/theta/AexcludeBTest.java
index 6cb6626..c0a0b27 100644
--- a/src/test/java/org/apache/datasketches/pig/theta/AexcludeBTest.java
+++ b/src/test/java/org/apache/datasketches/pig/theta/AexcludeBTest.java
@@ -23,9 +23,11 @@ import static org.apache.datasketches.pig.PigTestingUtil.LS;
 import static org.apache.datasketches.pig.PigTestingUtil.createDbaFromQssRange;
 import static org.testng.Assert.assertEquals;
 import static org.testng.Assert.assertNotNull;
+import static org.testng.Assert.fail;
 
 import java.io.IOException;
 
+import org.apache.datasketches.SketchesArgumentException;
 import org.apache.pig.EvalFunc;
 import org.apache.pig.data.DataType;
 import org.apache.pig.data.Tuple;
@@ -54,29 +56,26 @@ public class AexcludeBTest {
     Double est;
     //Two nulls
     inputTuple = TupleFactory.getInstance().newTuple(2);
-    resultTuple = aNbFunc.exec(inputTuple);
-    assertNotNull(resultTuple);
-    assertEquals(resultTuple.size(), 1);
-    est = estFunc.exec(resultTuple);
-    assertEquals(est, 0.0, 0.0);
+    try {
+      resultTuple = aNbFunc.exec(inputTuple);
+      fail();
+    } catch (SketchesArgumentException e) {}
 
     //A is null
     inputTuple = TupleFactory.getInstance().newTuple(2);
     inputTuple.set(1, createDbaFromQssRange(256, 0, 128));
-    resultTuple = aNbFunc.exec(inputTuple);
-    assertNotNull(resultTuple);
-    assertEquals(resultTuple.size(), 1);
-    est = estFunc.exec(resultTuple);
-    assertEquals(est, 0.0, 0.0);
+    try {
+      resultTuple = aNbFunc.exec(inputTuple);
+      fail();
+    } catch (SketchesArgumentException e) {}
 
     //A is valid, B is null
     inputTuple = TupleFactory.getInstance().newTuple(2);
     inputTuple.set(0, createDbaFromQssRange(256, 0, 256));
-    resultTuple = aNbFunc.exec(inputTuple);
-    assertNotNull(resultTuple);
-    assertEquals(resultTuple.size(), 1);
-    est = estFunc.exec(resultTuple);
-    assertEquals(est, 256.0, 0.0);
+    try {
+      resultTuple = aNbFunc.exec(inputTuple);
+      fail();
+    } catch (SketchesArgumentException e) {}
 
     //Both valid
     inputTuple = TupleFactory.getInstance().newTuple(2);


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to