This is an automated email from the ASF dual-hosted git repository. leerho pushed a commit to branch Update_to_core2.0.0 in repository https://gitbox.apache.org/repos/asf/datasketches-pig.git
commit c3e138cbe4e189e99bcb5a474b24913c67aac8d2 Author: Lee Rhodes <[email protected]> AuthorDate: Mon Mar 1 16:44:25 2021 -0800 Updates to match java core 2.0.0 --- pom.xml | 55 +++++++++++++++++----- .../apache/datasketches/pig/cpc/DataToSketch.java | 5 ++ .../apache/datasketches/pig/hll/DataToSketch.java | 5 ++ .../apache/datasketches/pig/theta/AexcludeB.java | 3 +- .../datasketches/pig/theta/DataToSketch.java | 19 ++++++-- .../apache/datasketches/pig/theta/Intersect.java | 6 +-- .../org/apache/datasketches/pig/theta/Union.java | 10 ++-- .../datasketches/pig/tuple/DataToSketch.java | 12 +++-- .../DataToSketchAlgebraicIntermediateFinal.java | 8 ++-- .../apache/datasketches/pig/tuple/UnionSketch.java | 12 ++--- .../UnionSketchAlgebraicIntermediateFinal.java | 6 +-- .../datasketches/pig/theta/AexcludeBTest.java | 29 ++++++------ 12 files changed, 111 insertions(+), 59 deletions(-) diff --git a/pom.xml b/pom.xml index ab68c9f..efe822c 100644 --- a/pom.xml +++ b/pom.xml @@ -21,6 +21,7 @@ under the License. <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> + <modelVersion>4.0.0</modelVersion> <parent> @@ -30,17 +31,14 @@ under the License. </parent> <groupId>org.apache.datasketches</groupId> - - <!-- UNIQUE FOR THIS JAVA COMPONENT --> <artifactId>datasketches-pig</artifactId> <version>1.1.0-SNAPSHOT</version> - <description>Apache Pig adaptors for the DataSketches library.</description> - <!-- END: UNIQUE FOR THIS JAVA COMPONENT --> + <packaging>jar</packaging> + <name>${project.artifactId}</name> + <description>Apache Pig adaptors for the DataSketches library.</description> <url>https://datasketches.apache.org/</url> - <name>${project.artifactId}</name> <inceptionYear>2015</inceptionYear> - <packaging>jar</packaging> <!-- jar is the default --> <mailingLists> <mailingList> @@ -85,7 +83,7 @@ under the License. <properties> <!-- UNIQUE FOR THIS JAVA COMPONENT --> - <datasketches-java.version>1.3.0-incubating</datasketches-java.version> + <datasketches-java.version>2.0.0</datasketches-java.version> <pig.version>0.17.0</pig.version> <hadoop-common.version>2.8.5</hadoop-common.version> <commons-math3.version>3.6.1</commons-math3.version> @@ -95,7 +93,7 @@ under the License. <testng.version>7.1.0</testng.version> <!-- System-wide properties --> - <maven.version>3.0.0</maven.version> + <maven.version>3.5.0</maven.version> <java.version>1.8</java.version> <maven.compiler.source>${java.version}</maven.compiler.source> <maven.compiler.target>${java.version}</maven.compiler.target> @@ -105,8 +103,11 @@ under the License. <project.build.resourceEncoding>${charset.encoding}</project.build.resourceEncoding> <project.reporting.outputEncoding>${charset.encoding}</project.reporting.outputEncoding> - <!-- org.codehaus.plexus used for strict profile testing--> - <plexus-compiler-javac-errorprone.version>2.8.8</plexus-compiler-javac-errorprone.version> + <!-- org.codehaus plugins --> + <!-- used for strict profile testing--> + <plexus-compiler-javac-errorprone.version>2.8.5</plexus-compiler-javac-errorprone.version> + <versions-maven-plugin.version>2.8.1</versions-maven-plugin.version> + <!-- Maven Plugins --> <maven-assembly-plugin.version>3.3.0</maven-assembly-plugin.version> <!-- overrides parent --> <maven-compiler-plugin.version>3.8.1</maven-compiler-plugin.version> <!-- overrides parent --> @@ -124,6 +125,7 @@ under the License. <!-- org.jacoco Maven Plugins --> <jacoco-maven-plugin.version>0.8.6</jacoco-maven-plugin.version> <!-- org.eluder Maven Plugins --> + <coveralls-repo-token></coveralls-repo-token> <coveralls-maven-plugin.version>4.3.0</coveralls-maven-plugin.version> <!-- other --> <lifecycle-mapping.version>1.0.0</lifecycle-mapping.version> @@ -201,6 +203,13 @@ under the License. <build> <pluginManagement> <plugins> + + <plugin> + <groupId>org.codehaus.mojo</groupId> + <artifactId>versions-maven-plugin</artifactId> + <version>${versions-maven-plugin.version}</version> + </plugin> + <plugin> <!-- We want to deploy the artifacts to a staging location for perusal --> <!-- Apache Parent pom: apache-release profile --> @@ -212,12 +221,14 @@ under the License. <!-- see maven-install-plugin --> </configuration> </plugin> + <plugin> <!-- Apache Parent pom, pluginManagement--> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-release-plugin</artifactId> <version>${maven-release-plugin.version}</version> </plugin> + <plugin> <!-- Extends Apache Parent pom, pluginManagement--> <groupId>org.apache.maven.plugins</groupId> @@ -226,18 +237,21 @@ under the License. <executions> <execution> <id>default-jar</id> + <phase>package</phase> <goals> <goal>jar</goal> </goals> </execution> <execution> <id>default-test-jar</id> + <phase>package</phase> <goals> <goal>test-jar</goal> </goals> </execution> </executions> </plugin> + <plugin> <!-- Extends Apache Parent pom, apache-release profile --> <groupId>org.apache.maven.plugins</groupId> @@ -255,6 +269,7 @@ under the License. </execution> </executions> </plugin> + <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-enforcer-plugin</artifactId> @@ -285,6 +300,7 @@ under the License. </execution> </executions> </plugin> + <plugin> <!-- Apache Parent pom, pluginManagement--> <groupId>org.apache.rat</groupId> @@ -304,13 +320,16 @@ under the License. <useDefaultExcludes>true</useDefaultExcludes> <excludes> <!-- rat uses .gitignore for excludes by default --> + <exclude>**/*.yaml</exclude> + <exclude>**/*.yml</exclude> + <exclude>**/.*</exclude> <exclude>**/test/resources/**/*.txt</exclude> - <exclude>.asf.yaml</exclude> <exclude>LICENSE</exclude> <exclude>NOTICE</exclude> </excludes> </configuration> </plugin> + <plugin> <!-- Extends Apache Parent pom, apache-release profile --> <groupId>org.apache.maven.plugins</groupId> @@ -333,6 +352,7 @@ under the License. </execution> </executions> </plugin> + <plugin> <!-- Apache Parent pom, pluginManagement--> <groupId>org.apache.maven.plugins</groupId> @@ -344,6 +364,7 @@ under the License. <redirectTestOutputToFile>true</redirectTestOutputToFile> </configuration> </plugin> + <plugin> <!-- Generates code coverage report from website. --> <groupId>org.jacoco</groupId> @@ -351,22 +372,30 @@ under the License. <version>${jacoco-maven-plugin.version}</version> <executions> <execution> - <id>prepare-agent</id> + <id>default-prepare-agent</id> <goals> <goal>prepare-agent</goal> </goals> </execution> + <execution> + <id>default-report</id> + <goals> + <goal>report</goal> + </goals> + </execution> </executions> </plugin> + <plugin> <!-- Submit code coverage report to Coveralls.io. --> <groupId>org.eluder.coveralls</groupId> <artifactId>coveralls-maven-plugin</artifactId> <version>${coveralls-maven-plugin.version}</version> <configuration> - <!-- Since we use Travis CI we do not have to put a Coveralls token here. --> + <repoToken>${coveralls-repo-token}</repoToken> </configuration> </plugin> + </plugins> </pluginManagement> <plugins> diff --git a/src/main/java/org/apache/datasketches/pig/cpc/DataToSketch.java b/src/main/java/org/apache/datasketches/pig/cpc/DataToSketch.java index 30c656c..3581702 100644 --- a/src/main/java/org/apache/datasketches/pig/cpc/DataToSketch.java +++ b/src/main/java/org/apache/datasketches/pig/cpc/DataToSketch.java @@ -104,6 +104,11 @@ public class DataToSketch extends EvalFunc<DataByteArray> implements Accumulator * <li>DataByteArray: BYTEARRAY</li> * </ul> * + * <p><b>Note</b> Strings as values are normally typed as DataType.CHARARRAY, which will be + * encoded as UTF-8 prior to being submitted to the sketch. If the user requires a different + * encoding for cross-platform compatibility, it is recommended that these values be encoded prior + * to being submitted in a DataBag and then typed as a DataType.BYTEARRAY.</p> + * * @param inputTuple A tuple containing a single bag, containing Datum Tuples. * @return serialized CpcSketch * @see "org.apache.pig.EvalFunc.exec(org.apache.pig.data.Tuple)" diff --git a/src/main/java/org/apache/datasketches/pig/hll/DataToSketch.java b/src/main/java/org/apache/datasketches/pig/hll/DataToSketch.java index 6e38253..d660593 100644 --- a/src/main/java/org/apache/datasketches/pig/hll/DataToSketch.java +++ b/src/main/java/org/apache/datasketches/pig/hll/DataToSketch.java @@ -107,6 +107,11 @@ public class DataToSketch extends EvalFunc<DataByteArray> implements Accumulator * <li>DataByteArray: BYTEARRAY</li> * </ul> * + * <p><b>Note</b> Strings as values are normally typed as DataType.CHARARRAY, which will be + * encoded as UTF-8 prior to being submitted to the sketch. If the user requires a different + * encoding for cross-platform compatibility, it is recommended that these values be encoded prior + * to being submitted and then typed as a DataType.BYTEARRAY.</p> + * * @param inputTuple A tuple containing a single bag, containing Datum Tuples. * @return serialized HllSketch * @see "org.apache.pig.EvalFunc.exec(org.apache.pig.data.Tuple)" diff --git a/src/main/java/org/apache/datasketches/pig/theta/AexcludeB.java b/src/main/java/org/apache/datasketches/pig/theta/AexcludeB.java index 5619d62..d54f6c4 100644 --- a/src/main/java/org/apache/datasketches/pig/theta/AexcludeB.java +++ b/src/main/java/org/apache/datasketches/pig/theta/AexcludeB.java @@ -134,8 +134,7 @@ public class AexcludeB extends EvalFunc<Tuple> { } final AnotB aNOTb = SetOperation.builder().setSeed(seed_).buildANotB(); - aNOTb.update(sketchA, sketchB); - final CompactSketch compactSketch = aNOTb.getResult(true, null); + final CompactSketch compactSketch = aNOTb.aNotB(sketchA, sketchB, true, null); return compactOrderedSketchToTuple(compactSketch); } diff --git a/src/main/java/org/apache/datasketches/pig/theta/DataToSketch.java b/src/main/java/org/apache/datasketches/pig/theta/DataToSketch.java index 434ed1c..e414cd0 100644 --- a/src/main/java/org/apache/datasketches/pig/theta/DataToSketch.java +++ b/src/main/java/org/apache/datasketches/pig/theta/DataToSketch.java @@ -72,7 +72,7 @@ public class DataToSketch extends EvalFunc<Tuple> implements Accumulator<Tuple>, * </ul> */ public DataToSketch() { - this(DEFAULT_NOMINAL_ENTRIES, (float)(1.0), DEFAULT_UPDATE_SEED); + this(DEFAULT_NOMINAL_ENTRIES, (float)1.0, DEFAULT_UPDATE_SEED); } /** @@ -86,7 +86,7 @@ public class DataToSketch extends EvalFunc<Tuple> implements Accumulator<Tuple>, * @param nomEntriesStr <a href="{@docRoot}/resources/dictionary.html#nomEntries">See Nominal Entries</a> */ public DataToSketch(final String nomEntriesStr) { - this(Integer.parseInt(nomEntriesStr), (float)(1.0), DEFAULT_UPDATE_SEED); + this(Integer.parseInt(nomEntriesStr), (float)1.0, DEFAULT_UPDATE_SEED); } /** @@ -129,7 +129,7 @@ public class DataToSketch extends EvalFunc<Tuple> implements Accumulator<Tuple>, //Catch these errors during construction, don't wait for the exec to be called. checkIfPowerOf2(nomEntries, "nomEntries"); checkProbability(p, "p"); - if (nomEntries < (1 << Util.MIN_LG_NOM_LONGS)) { + if (nomEntries < 1 << Util.MIN_LG_NOM_LONGS) { throw new IllegalArgumentException("NomEntries too small: " + nomEntries + ", required: " + (1 << Util.MIN_LG_NOM_LONGS)); } @@ -181,6 +181,11 @@ public class DataToSketch extends EvalFunc<Tuple> implements Accumulator<Tuple>, * </li> * </ul> * + * <p><b>Note</b> Strings as values are normally typed as DataType.CHARARRAY, which will be + * encoded as UTF-8 prior to being submitted to the sketch. If the user requires a different + * encoding for cross-platform compatibility, it is recommended that these values be encoded prior + * to being submitted in a DataBag and then typed as a DataType.BYTEARRAY.</p> + * * <b>Sketch Tuple</b> * <ul> * <li>Tuple: TUPLE (Contains exactly 1 field) @@ -313,6 +318,10 @@ public class DataToSketch extends EvalFunc<Tuple> implements Accumulator<Tuple>, * Updates a union with the data from the given bag. * * @param bag A bag of tuples to insert. + * <p><b>Note</b> Strings as values are normally typed as DataType.CHARARRAY, which will be + * encoded as UTF-8 prior to being submitted to the sketch. If the user requires a different + * encoding for cross-platform compatibility, it is recommended that these values be encoded prior + * to being submitted in a DataBag and then typed as a DataType.BYTEARRAY.</p> * @param union the union to update */ private static void updateUnion(final DataBag bag, final Union union) { @@ -543,8 +552,8 @@ public class DataToSketch extends EvalFunc<Tuple> implements Accumulator<Tuple>, //If field 0 of a dataTuple is a DataByteArray we assume it is a sketch // due to system bagged outputs from multiple mapper Intermediate functions. // Each dataTuple.DBA:sketch will merged into the union. - final DataByteArray dba = ((DataByteArray) f0); - union.update(Memory.wrap(dba.get())); + final DataByteArray dba = (DataByteArray) f0; + union.union(Memory.wrap(dba.get())); } else { // we should never get here. diff --git a/src/main/java/org/apache/datasketches/pig/theta/Intersect.java b/src/main/java/org/apache/datasketches/pig/theta/Intersect.java index 424f7f0..db59eec 100644 --- a/src/main/java/org/apache/datasketches/pig/theta/Intersect.java +++ b/src/main/java/org/apache/datasketches/pig/theta/Intersect.java @@ -193,7 +193,7 @@ public class Intersect extends EvalFunc<Tuple> implements Accumulator<Tuple>, Al */ @Override public Tuple getValue() { - if ((accumIntersection_ == null) || !accumIntersection_.hasResult()) { + if (accumIntersection_ == null || !accumIntersection_.hasResult()) { throw new IllegalStateException("" + "The accumulate(Tuple) method must be called at least once with " + "a valid inputTuple.bag.SketchTuple prior to calling getValue()."); @@ -254,7 +254,7 @@ public class Intersect extends EvalFunc<Tuple> implements Accumulator<Tuple>, Al final DataByteArray dba = (DataByteArray) f0; final Memory srcMem = Memory.wrap(dba.get()); final Sketch sketch = Sketch.wrap(srcMem, seed); - intersection.update(sketch); + intersection.intersect(sketch); } else { throw new IllegalArgumentException( @@ -380,7 +380,7 @@ public class Intersect extends EvalFunc<Tuple> implements Accumulator<Tuple>, Al final DataByteArray dba = (DataByteArray) f0; final Memory srcMem = Memory.wrap(dba.get()); final Sketch sketch = Sketch.wrap(srcMem, mySeed_); - intersection.update(sketch); + intersection.intersect(sketch); } else { // we should never get here. throw new IllegalArgumentException("dataTuple.Field0: Is not a DataByteArray: " diff --git a/src/main/java/org/apache/datasketches/pig/theta/Union.java b/src/main/java/org/apache/datasketches/pig/theta/Union.java index 3444430..a785d32 100644 --- a/src/main/java/org/apache/datasketches/pig/theta/Union.java +++ b/src/main/java/org/apache/datasketches/pig/theta/Union.java @@ -71,7 +71,7 @@ public class Union extends EvalFunc<Tuple> implements Accumulator<Tuple>, Algebr * </ul> */ public Union() { - this(DEFAULT_NOMINAL_ENTRIES, (float)(1.0), DEFAULT_UPDATE_SEED); + this(DEFAULT_NOMINAL_ENTRIES, (float)1.0, DEFAULT_UPDATE_SEED); } /** @@ -85,7 +85,7 @@ public class Union extends EvalFunc<Tuple> implements Accumulator<Tuple>, Algebr * @param nomEntriesStr <a href="{@docRoot}/resources/dictionary.html#nomEntries">See Nominal Entries</a> */ public Union(final String nomEntriesStr) { - this(Integer.parseInt(nomEntriesStr), (float)(1.0), DEFAULT_UPDATE_SEED); + this(Integer.parseInt(nomEntriesStr), (float)1.0, DEFAULT_UPDATE_SEED); } /** @@ -134,7 +134,7 @@ public class Union extends EvalFunc<Tuple> implements Accumulator<Tuple>, Algebr //Catch these errors during construction, don't wait for the exec to be called. checkIfPowerOf2(nomEntries, "nomEntries"); checkProbability(p, "p"); - if (nomEntries < (1 << Util.MIN_LG_NOM_LONGS)) { + if (nomEntries < 1 << Util.MIN_LG_NOM_LONGS) { throw new IllegalArgumentException("NomEntries too small: " + nomEntries + ", required: " + (1 << Util.MIN_LG_NOM_LONGS)); } @@ -308,7 +308,7 @@ public class Union extends EvalFunc<Tuple> implements Accumulator<Tuple>, Algebr if (type == DataType.BYTEARRAY) { final DataByteArray dba = (DataByteArray) f0; if (dba.size() > 0) { - union.update(Memory.wrap(dba.get())); + union.union(Memory.wrap(dba.get())); } } else { throw new IllegalArgumentException("Field type was not DataType.BYTEARRAY: " + type); @@ -503,7 +503,7 @@ public class Union extends EvalFunc<Tuple> implements Accumulator<Tuple>, Algebr // Each dataTuple.DBA:sketch will merged into the union. final DataByteArray dba = (DataByteArray) f0; final Memory srcMem = Memory.wrap(dba.get()); - union.update(srcMem); + union.union(srcMem); } else { // we should never get here. diff --git a/src/main/java/org/apache/datasketches/pig/tuple/DataToSketch.java b/src/main/java/org/apache/datasketches/pig/tuple/DataToSketch.java index 0d26398..fe4837a 100644 --- a/src/main/java/org/apache/datasketches/pig/tuple/DataToSketch.java +++ b/src/main/java/org/apache/datasketches/pig/tuple/DataToSketch.java @@ -37,7 +37,13 @@ import org.apache.pig.data.DataType; import org.apache.pig.data.Tuple; /** - * This is a generic implementation to be specialized in concrete UDFs + * This is a generic implementation to be specialized in concrete UDFs. + * + * <p><b>Note</b> Strings as values are normally typed as DataType.CHARARRAY, which will be + * encoded as UTF-8 prior to being submitted to the sketch. If the user requires a different + * encoding for cross-platform compatibility, it is recommended that these values be encoded prior + * to being submitted in a DataBag and then typed as a DataType.BYTEARRAY.</p> + * * @param <U> Update type * @param <S> Summary type */ @@ -80,7 +86,7 @@ public abstract class DataToSketch<U, S extends UpdatableSummary<U>> extends Eva public DataToSketch(final int sketchSize, final float samplingProbability, final SummaryFactory<S> summaryFactory) { super(); - sketchBuilder_ = new UpdatableSketchBuilder<U, S>(summaryFactory) + sketchBuilder_ = new UpdatableSketchBuilder<>(summaryFactory) .setNominalEntries(sketchSize).setSamplingProbability(samplingProbability); } @@ -121,7 +127,7 @@ public abstract class DataToSketch<U, S extends UpdatableSummary<U>> extends Eva Logger.getLogger(getClass()).info("exec is used"); isFirstCall_ = false; } - if ((inputTuple == null) || (inputTuple.size() == 0)) { + if (inputTuple == null || inputTuple.size() == 0) { return null; } if (inputTuple.size() != 1) { diff --git a/src/main/java/org/apache/datasketches/pig/tuple/DataToSketchAlgebraicIntermediateFinal.java b/src/main/java/org/apache/datasketches/pig/tuple/DataToSketchAlgebraicIntermediateFinal.java index 749cd74..6428958 100644 --- a/src/main/java/org/apache/datasketches/pig/tuple/DataToSketchAlgebraicIntermediateFinal.java +++ b/src/main/java/org/apache/datasketches/pig/tuple/DataToSketchAlgebraicIntermediateFinal.java @@ -99,7 +99,7 @@ public abstract class DataToSketchAlgebraicIntermediateFinal<U, S extends Updata sketchSize_ = sketchSize; summarySetOps_ = summarySetOps; summaryDeserializer_ = summaryDeserializer; - sketchBuilder_ = new UpdatableSketchBuilder<U, S>(summaryFactory) + sketchBuilder_ = new UpdatableSketchBuilder<>(summaryFactory) .setNominalEntries(sketchSize).setSamplingProbability(samplingProbability); } @@ -110,7 +110,7 @@ public abstract class DataToSketchAlgebraicIntermediateFinal<U, S extends Updata Logger.getLogger(getClass()).info("algebraic is used"); isFirstCall_ = false; } - final Union<S> union = new Union<S>(sketchSize_, summarySetOps_); + final Union<S> union = new Union<>(sketchSize_, summarySetOps_); final DataBag bag = (DataBag) inputTuple.get(0); if (bag == null) { @@ -124,13 +124,13 @@ public abstract class DataToSketchAlgebraicIntermediateFinal<U, S extends Updata // just insert each item of the tuple into the sketch final UpdatableSketch<U, S> sketch = sketchBuilder_.build(); DataToSketch.updateSketch((DataBag) item, sketch); - union.update(sketch); + union.union(sketch); } else if (item instanceof DataByteArray) { // This is a sketch from a prior call to the // Intermediate function. merge it with the // current sketch. final Sketch<S> incomingSketch = Util.deserializeSketchFromTuple(dataTuple, summaryDeserializer_); - union.update(incomingSketch); + union.union(incomingSketch); } else { // we should never get here. throw new IllegalArgumentException( diff --git a/src/main/java/org/apache/datasketches/pig/tuple/UnionSketch.java b/src/main/java/org/apache/datasketches/pig/tuple/UnionSketch.java index 1bba6fe..4f89598 100644 --- a/src/main/java/org/apache/datasketches/pig/tuple/UnionSketch.java +++ b/src/main/java/org/apache/datasketches/pig/tuple/UnionSketch.java @@ -81,11 +81,11 @@ public abstract class UnionSketch<S extends Summary> extends EvalFunc<Tuple> imp Logger.getLogger(getClass()).info("exec is used"); isFirstCall_ = false; } - if ((inputTuple == null) || (inputTuple.size() == 0)) { + if (inputTuple == null || inputTuple.size() == 0) { return null; } final DataBag bag = (DataBag) inputTuple.get(0); - final Union<S> union = new Union<S>(sketchSize_, summarySetOps_); + final Union<S> union = new Union<>(sketchSize_, summarySetOps_); updateUnion(bag, union, summaryDeserializer_); return Util.tupleFactory.newTuple(new DataByteArray(union.getResult().toByteArray())); } @@ -97,13 +97,13 @@ public abstract class UnionSketch<S extends Summary> extends EvalFunc<Tuple> imp Logger.getLogger(getClass()).info("accumulator is used"); isFirstCall_ = false; } - if ((inputTuple == null) || (inputTuple.size() != 1)) { + if (inputTuple == null || inputTuple.size() != 1) { return; } final DataBag bag = (DataBag) inputTuple.get(0); if (bag == null || bag.size() == 0) { return; } if (union_ == null) { - union_ = new Union<S>(sketchSize_, summarySetOps_); + union_ = new Union<>(sketchSize_, summarySetOps_); } updateUnion(bag, union_, summaryDeserializer_); } @@ -124,11 +124,11 @@ public abstract class UnionSketch<S extends Summary> extends EvalFunc<Tuple> imp private static <S extends Summary> void updateUnion(final DataBag bag, final Union<S> union, final SummaryDeserializer<S> summaryDeserializer) throws ExecException { for (final Tuple innerTuple: bag) { - if ((innerTuple.size() != 1) || (innerTuple.get(0) == null)) { + if (innerTuple.size() != 1 || innerTuple.get(0) == null) { continue; } final Sketch<S> incomingSketch = Util.deserializeSketchFromTuple(innerTuple, summaryDeserializer); - union.update(incomingSketch); + union.union(incomingSketch); } } diff --git a/src/main/java/org/apache/datasketches/pig/tuple/UnionSketchAlgebraicIntermediateFinal.java b/src/main/java/org/apache/datasketches/pig/tuple/UnionSketchAlgebraicIntermediateFinal.java index 655dd71..ea713f2 100644 --- a/src/main/java/org/apache/datasketches/pig/tuple/UnionSketchAlgebraicIntermediateFinal.java +++ b/src/main/java/org/apache/datasketches/pig/tuple/UnionSketchAlgebraicIntermediateFinal.java @@ -81,7 +81,7 @@ public abstract class UnionSketchAlgebraicIntermediateFinal<S extends Summary> e Logger.getLogger(getClass()).info("algebraic is used"); isFirstCall_ = false; } - final Union<S> union = new Union<S>(sketchSize_, summarySetOps_); + final Union<S> union = new Union<>(sketchSize_, summarySetOps_); final DataBag bag = (DataBag) inputTuple.get(0); if (bag == null) { @@ -94,13 +94,13 @@ public abstract class UnionSketchAlgebraicIntermediateFinal<S extends Summary> e // this is from a prior call to the initial function, so there is a nested bag. for (Tuple innerTuple: (DataBag) item) { final Sketch<S> incomingSketch = Util.deserializeSketchFromTuple(innerTuple, summaryDeserializer_); - union.update(incomingSketch); + union.union(incomingSketch); } } else if (item instanceof DataByteArray) { // This is a sketch from a call to the Intermediate function // Add it to the current union. final Sketch<S> incomingSketch = Util.deserializeSketchFromTuple(dataTuple, summaryDeserializer_); - union.update(incomingSketch); + union.union(incomingSketch); } else { // we should never get here. throw new IllegalArgumentException( diff --git a/src/test/java/org/apache/datasketches/pig/theta/AexcludeBTest.java b/src/test/java/org/apache/datasketches/pig/theta/AexcludeBTest.java index 6cb6626..c0a0b27 100644 --- a/src/test/java/org/apache/datasketches/pig/theta/AexcludeBTest.java +++ b/src/test/java/org/apache/datasketches/pig/theta/AexcludeBTest.java @@ -23,9 +23,11 @@ import static org.apache.datasketches.pig.PigTestingUtil.LS; import static org.apache.datasketches.pig.PigTestingUtil.createDbaFromQssRange; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.fail; import java.io.IOException; +import org.apache.datasketches.SketchesArgumentException; import org.apache.pig.EvalFunc; import org.apache.pig.data.DataType; import org.apache.pig.data.Tuple; @@ -54,29 +56,26 @@ public class AexcludeBTest { Double est; //Two nulls inputTuple = TupleFactory.getInstance().newTuple(2); - resultTuple = aNbFunc.exec(inputTuple); - assertNotNull(resultTuple); - assertEquals(resultTuple.size(), 1); - est = estFunc.exec(resultTuple); - assertEquals(est, 0.0, 0.0); + try { + resultTuple = aNbFunc.exec(inputTuple); + fail(); + } catch (SketchesArgumentException e) {} //A is null inputTuple = TupleFactory.getInstance().newTuple(2); inputTuple.set(1, createDbaFromQssRange(256, 0, 128)); - resultTuple = aNbFunc.exec(inputTuple); - assertNotNull(resultTuple); - assertEquals(resultTuple.size(), 1); - est = estFunc.exec(resultTuple); - assertEquals(est, 0.0, 0.0); + try { + resultTuple = aNbFunc.exec(inputTuple); + fail(); + } catch (SketchesArgumentException e) {} //A is valid, B is null inputTuple = TupleFactory.getInstance().newTuple(2); inputTuple.set(0, createDbaFromQssRange(256, 0, 256)); - resultTuple = aNbFunc.exec(inputTuple); - assertNotNull(resultTuple); - assertEquals(resultTuple.size(), 1); - est = estFunc.exec(resultTuple); - assertEquals(est, 256.0, 0.0); + try { + resultTuple = aNbFunc.exec(inputTuple); + fail(); + } catch (SketchesArgumentException e) {} //Both valid inputTuple = TupleFactory.getInstance().newTuple(2); --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
