svn commit: r1795844 - in /pig/trunk: ./ src/org/apache/pig/backend/hadoop/ src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/ src/org/apache/pig/backend/hadoop/executionengine/tez/ src
Author: szita Date: Mon May 22 19:46:08 2017 New Revision: 1795844 URL: http://svn.apache.org/viewvc?rev=1795844&view=rev Log: PIG-4748: DateTimeWritable forgets Chronology (szita) Added: pig/trunk/test/org/apache/pig/test/TestDateTime.java Removed: pig/trunk/test/org/apache/pig/test/TestDefaultDateTimeZone.java Modified: pig/trunk/CHANGES.txt pig/trunk/src/org/apache/pig/backend/hadoop/DateTimeWritable.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/TezDagBuilder.java pig/trunk/src/org/apache/pig/impl/PigImplConstants.java Modified: pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1795844&r1=1795843&r2=1795844&view=diff == --- pig/trunk/CHANGES.txt (original) +++ pig/trunk/CHANGES.txt Mon May 22 19:46:08 2017 @@ -97,6 +97,8 @@ OPTIMIZATIONS Â BUG FIXES +PIG-4748: DateTimeWritable forgets Chronology (szita) + PIG-5229: TestPigTest.testSpecificOrderOutput and testSpecificOrderOutputForAlias failing (knoguchi) PIG-5226: PreprocessorContext.java can deadlock forever with large stderr (jtolar via knoguchi) Modified: pig/trunk/src/org/apache/pig/backend/hadoop/DateTimeWritable.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/DateTimeWritable.java?rev=1795844&r1=1795843&r2=1795844&view=diff == --- pig/trunk/src/org/apache/pig/backend/hadoop/DateTimeWritable.java (original) +++ pig/trunk/src/org/apache/pig/backend/hadoop/DateTimeWritable.java Mon May 22 19:46:08 2017 @@ -21,20 +21,27 @@ package org.apache.pig.backend.hadoop; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Properties; +import java.util.TreeSet; import org.joda.time.DateTime; import org.joda.time.DateTimeZone; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.io.WritableComparator; +import org.apache.hadoop.util.StringUtils; +import org.apache.pig.impl.PigImplConstants; +import org.apache.pig.impl.util.UDFContext; /** - * Writable for Double values. + * Writable for DateTime values. */ public class DateTimeWritable implements WritableComparable { - -private static final int ONE_MINUTE = 6; +private static List availableZoneIDs = null; private DateTime value = null; public DateTimeWritable() { @@ -46,12 +53,32 @@ public class DateTimeWritable implements } public void readFields(DataInput in) throws IOException { -value = new DateTime(in.readLong(), DateTimeZone.forOffsetMillis(in.readShort() * ONE_MINUTE)); +retrieveAvailableZoneList(); + +long instant = in.readLong(); +int offsetInMillis = in.readInt(); +int zoneListPos = in.readInt(); + +DateTimeZone timeZone = null; +if (zoneListPos != -1){ +timeZone = DateTimeZone.forID(availableZoneIDs.get(zoneListPos)); +} else { +timeZone = DateTimeZone.forOffsetMillis(offsetInMillis); +} + +value = new DateTime(instant, timeZone); } public void write(DataOutput out) throws IOException { +retrieveAvailableZoneList(); + +String zoneId = value.getZone().getID(); +int offsetInMillis = value.getZone().getOffset(0L); +int zoneListPos = availableZoneIDs.indexOf(zoneId); + out.writeLong(value.getMillis()); -out.writeShort(value.getZone().getOffset(value) / ONE_MINUTE); +out.writeInt(offsetInMillis); +out.writeInt(zoneListPos); } public void set(DateTime dt) { @@ -62,6 +89,26 @@ public class DateTimeWritable implements return value; } +private void retrieveAvailableZoneList() throws IOException { +if (availableZoneIDs != null){ +return; +} +Properties props = UDFContext.getUDFContext().getUDFProperties(PigImplConstants.PIG_DATETIME_ZONES_LIST.getClass()); +Collection zoneList = StringUtils.getStringCollection(props.getProperty(PigImplConstants.PIG_DATETIME_ZONES_LIST)); +if (zoneList == null || zoneList.size() == 0){ +throw new IOException("Datetime zone information not set"); +} +availableZoneIDs = new ArrayList<>(zoneList); +} + +public static void setupAvailableZoneIds() { +TreeSet sortedZoneIDs = new TreeSet<>(DateTimeZone.getAvailableIDs()); +Properties props = UDFContext.getUDFContext().getUDFProperties( +PigImplConstants.PIG_DATETIME_ZONES_LIST.getClass()); +props.setProperty(PigImplConstants.PI
svn commit: r1795897 - in /pig/site: author/src/documentation/content/xdocs/ publish/
Author: szita Date: Tue May 23 11:46:07 2017 New Revision: 1795897 URL: http://svn.apache.org/viewvc?rev=1795897&view=rev Log: Adding szita as committer to whoweare page Modified: pig/site/author/src/documentation/content/xdocs/whoweare.xml pig/site/publish/about.pdf pig/site/publish/bylaws.pdf pig/site/publish/index.pdf pig/site/publish/issue_tracking.pdf pig/site/publish/linkmap.pdf pig/site/publish/mailing_lists.pdf pig/site/publish/philosophy.pdf pig/site/publish/privacypolicy.pdf pig/site/publish/releases.pdf pig/site/publish/version_control.pdf pig/site/publish/whoweare.html pig/site/publish/whoweare.pdf Modified: pig/site/author/src/documentation/content/xdocs/whoweare.xml URL: http://svn.apache.org/viewvc/pig/site/author/src/documentation/content/xdocs/whoweare.xml?rev=1795897&r1=1795896&r2=1795897&view=diff == --- pig/site/author/src/documentation/content/xdocs/whoweare.xml (original) +++ pig/site/author/src/documentation/content/xdocs/whoweare.xml Tue May 23 11:46:07 2017 @@ -187,6 +187,14 @@ +szita +http://people.apache.org/~szita";>Adam Szita +Cloudera +Committer ++1 + + + thejas http://people.apache.org/~thejas";>Thejas Nair http://hortonworks.com";>HortonWorks Modified: pig/site/publish/about.pdf URL: http://svn.apache.org/viewvc/pig/site/publish/about.pdf?rev=1795897&r1=1795896&r2=1795897&view=diff == Binary files pig/site/publish/about.pdf (original) and pig/site/publish/about.pdf Tue May 23 11:46:07 2017 differ Modified: pig/site/publish/bylaws.pdf URL: http://svn.apache.org/viewvc/pig/site/publish/bylaws.pdf?rev=1795897&r1=1795896&r2=1795897&view=diff == Binary files pig/site/publish/bylaws.pdf (original) and pig/site/publish/bylaws.pdf Tue May 23 11:46:07 2017 differ Modified: pig/site/publish/index.pdf URL: http://svn.apache.org/viewvc/pig/site/publish/index.pdf?rev=1795897&r1=1795896&r2=1795897&view=diff == Binary files pig/site/publish/index.pdf (original) and pig/site/publish/index.pdf Tue May 23 11:46:07 2017 differ Modified: pig/site/publish/issue_tracking.pdf URL: http://svn.apache.org/viewvc/pig/site/publish/issue_tracking.pdf?rev=1795897&r1=1795896&r2=1795897&view=diff == Binary files pig/site/publish/issue_tracking.pdf (original) and pig/site/publish/issue_tracking.pdf Tue May 23 11:46:07 2017 differ Modified: pig/site/publish/linkmap.pdf URL: http://svn.apache.org/viewvc/pig/site/publish/linkmap.pdf?rev=1795897&r1=1795896&r2=1795897&view=diff == Binary files pig/site/publish/linkmap.pdf (original) and pig/site/publish/linkmap.pdf Tue May 23 11:46:07 2017 differ Modified: pig/site/publish/mailing_lists.pdf URL: http://svn.apache.org/viewvc/pig/site/publish/mailing_lists.pdf?rev=1795897&r1=1795896&r2=1795897&view=diff == Binary files pig/site/publish/mailing_lists.pdf (original) and pig/site/publish/mailing_lists.pdf Tue May 23 11:46:07 2017 differ Modified: pig/site/publish/philosophy.pdf URL: http://svn.apache.org/viewvc/pig/site/publish/philosophy.pdf?rev=1795897&r1=1795896&r2=1795897&view=diff == Binary files pig/site/publish/philosophy.pdf (original) and pig/site/publish/philosophy.pdf Tue May 23 11:46:07 2017 differ Modified: pig/site/publish/privacypolicy.pdf URL: http://svn.apache.org/viewvc/pig/site/publish/privacypolicy.pdf?rev=1795897&r1=1795896&r2=1795897&view=diff == Binary files pig/site/publish/privacypolicy.pdf (original) and pig/site/publish/privacypolicy.pdf Tue May 23 11:46:07 2017 differ Modified: pig/site/publish/releases.pdf URL: http://svn.apache.org/viewvc/pig/site/publish/releases.pdf?rev=1795897&r1=1795896&r2=1795897&view=diff == Binary files pig/site/publish/releases.pdf (original) and pig/site/publish/releases.pdf Tue May 23 11:46:07 2017 differ Modified: pig/site/publish/version_control.pdf URL: http://svn.apache.org/viewvc/pig/site/publish/version_control.pdf?rev=1795897&r1=1795896&r2=1795897&view=diff == Binary files pig/site/publi
svn commit: r1796139 - in /pig/trunk: CHANGES.txt ivy.xml
Author: szita Date: Thu May 25 09:20:15 2017 New Revision: 1796139 URL: http://svn.apache.org/viewvc?rev=1796139&view=rev Log: PIG-3103: make mockito a test dependency (instead of compile) (nkollar via szita) Modified: pig/trunk/CHANGES.txt pig/trunk/ivy.xml Modified: pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1796139&r1=1796138&r2=1796139&view=diff == --- pig/trunk/CHANGES.txt (original) +++ pig/trunk/CHANGES.txt Thu May 25 09:20:15 2017 @@ -36,6 +36,8 @@ PIG-5067: Revisit union on numeric type  IMPROVEMENTS +PIG-3103: make mockito a test dependency (instead of compile) (nkollar via szita) + PIG-3021: Split results missing records when there is null values in the column comparison (jeffjee617, cheolsoo via daijy) PIG-5211: Optimize Nested Limited Sort (jins via daijy) Modified: pig/trunk/ivy.xml URL: http://svn.apache.org/viewvc/pig/trunk/ivy.xml?rev=1796139&r1=1796138&r2=1796139&view=diff == --- pig/trunk/ivy.xml (original) +++ pig/trunk/ivy.xml Thu May 25 09:20:15 2017 @@ -402,7 +402,7 @@ conf="compile->master"> - +
svn commit: r1796140 - in /pig/trunk: CHANGES.txt test/org/apache/pig/test/TestDateTime.java test/org/apache/pig/test/TestPackage.java
Author: szita Date: Thu May 25 09:36:14 2017 New Revision: 1796140 URL: http://svn.apache.org/viewvc?rev=1796140&view=rev Log: PIG-5238: Fix datetime related test issues after PIG-4748 (szita) Modified: pig/trunk/CHANGES.txt pig/trunk/test/org/apache/pig/test/TestDateTime.java pig/trunk/test/org/apache/pig/test/TestPackage.java Modified: pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1796140&r1=1796139&r2=1796140&view=diff == --- pig/trunk/CHANGES.txt (original) +++ pig/trunk/CHANGES.txt Thu May 25 09:36:14 2017 @@ -101,6 +101,8 @@ OPTIMIZATIONS Â BUG FIXES +PIG-5238: Fix datetime related test issues after PIG-4748 (szita) + PIG-5185: Job name show "DefaultJobName" when running a Python script (daijy) PIG-4748: DateTimeWritable forgets Chronology (szita) Modified: pig/trunk/test/org/apache/pig/test/TestDateTime.java URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestDateTime.java?rev=1796140&r1=1796139&r2=1796140&view=diff == --- pig/trunk/test/org/apache/pig/test/TestDateTime.java (original) +++ pig/trunk/test/org/apache/pig/test/TestDateTime.java Thu May 25 09:36:14 2017 @@ -159,11 +159,16 @@ public class TestDateTime { */ @Test public void testDateTimeZoneOnCluster() throws Exception { + +String localDateTime = "2001-01-01T01:00:00.000"; +String localDateTimeDST = "2002-07-01T01:00:00.000"; + String inputFileName = "testDateTime-input.txt"; String[] inputData = new String[]{ "1\t1990-01-04T12:30:00.000+01:00", "2\t1990-01-04T11:30:00.000Z", -"3\t2001-01-01T01:00:00.000", -"4\t2017-02-02T15:19:00.000+01:00" +"3\t"+localDateTime, +"4\t"+localDateTimeDST, +"5\t2017-02-02T15:19:00.000+01:00" }; Util.createInputFile(cluster, inputFileName, inputData); @@ -175,14 +180,19 @@ public class TestDateTime { Iterator it = pigServer.openIterator("C"); -//Should return last 3 rows from input -String sysTZOffset = DateTimeZone.forOffsetMillis(DateTime.now().getZone().getOffset(0L)).toString(); +//Should return last 4 rows from input +String tzOffsetForLocal = DateTimeZone.forOffsetMillis(DateTime.now().getZone().getOffset(new DateTime(localDateTime))).toString(); +String tzOffsetDSTForLocal = DateTimeZone.forOffsetMillis(DateTime.now().getZone().getOffset(new DateTime(localDateTimeDST))).toString(); +tzOffsetForLocal = tzOffsetForLocal.replaceAll("UTC","Z"); +tzOffsetDSTForLocal = tzOffsetDSTForLocal.replaceAll("UTC","Z"); + Util.checkQueryOutputsAfterSortRecursive( it, new String[]{ "(1990-01-04T11:30:00.000Z,2)", -"(2001-01-01T01:00:00.000"+sysTZOffset+",3)", -"(2017-02-02T15:19:00.000+01:00,4)" +"(2001-01-01T01:00:00.000"+tzOffsetForLocal+",3)", +"(2002-07-01T01:00:00.000"+tzOffsetDSTForLocal+",4)", +"(2017-02-02T15:19:00.000+01:00,5)" }, org.apache.pig.newplan.logical.Util.translateSchema(pigServer.dumpSchema("C"))); } Modified: pig/trunk/test/org/apache/pig/test/TestPackage.java URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestPackage.java?rev=1796140&r1=1796139&r2=1796140&view=diff == --- pig/trunk/test/org/apache/pig/test/TestPackage.java (original) +++ pig/trunk/test/org/apache/pig/test/TestPackage.java Thu May 25 09:36:14 2017 @@ -34,6 +34,7 @@ import java.util.Map; import java.util.Random; import org.apache.pig.backend.executionengine.ExecException; +import org.apache.pig.backend.hadoop.DateTimeWritable; import org.apache.pig.backend.hadoop.HDataType; import org.apache.pig.backend.hadoop.executionengine.physicalLayer.POStatus; import org.apache.pig.backend.hadoop.executionengine.physicalLayer.Result; @@ -171,6 +172,7 @@ public class TestPackage { runTest(r.nextLong(), inner, DataType.LONG); break; case DataType.DATETIME: +DateTimeWritable.setupAvailableZoneIds(); runTest(new DateTime(r.nextLong()), inner, DataType.DATETIME); break; case DataType.MAP:
svn commit: r1796284 - /pig/branches/spark/test/e2e/pig/tests/nightly.conf
Author: szita Date: Fri May 26 13:19:04 2017 New Revision: 1796284 URL: http://svn.apache.org/viewvc?rev=1796284&view=rev Log: PIG-5167: Limit_13 should test distinct and limit in Spark mode only (nkollar via szita) Modified: pig/branches/spark/test/e2e/pig/tests/nightly.conf Modified: pig/branches/spark/test/e2e/pig/tests/nightly.conf URL: http://svn.apache.org/viewvc/pig/branches/spark/test/e2e/pig/tests/nightly.conf?rev=1796284&r1=1796283&r2=1796284&view=diff == --- pig/branches/spark/test/e2e/pig/tests/nightly.conf (original) +++ pig/branches/spark/test/e2e/pig/tests/nightly.conf Fri May 26 13:19:04 2017 @@ -2305,11 +2305,11 @@ store b into ':OUTPATH:';\, { 'num' => 13, 'execonly' => 'spark', # Limit_4 failed on Spark: distinct doesn't do implicit sort like it does in MR + 'benchmark_exectype' => 'spark', 'pig' =>q\a = load ':INPATH:/singlefile/studentnulltab10k'; b = distinct a; -c = order b by $0, $1, $2; -d = limit c 100; -store d into ':OUTPATH:';\, +c = limit b 100; +store c into ':OUTPATH:';\, } ] },
svn commit: r1796452 - in /pig/trunk: CHANGES.txt src/docs/src/documentation/content/xdocs/pig-index.xml
Author: szita Date: Sat May 27 20:29:46 2017 New Revision: 1796452 URL: http://svn.apache.org/viewvc?rev=1796452&view=rev Log: PIG-5188: Review pig-index.xml (szita) Modified: pig/trunk/CHANGES.txt pig/trunk/src/docs/src/documentation/content/xdocs/pig-index.xml Modified: pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1796452&r1=1796451&r2=1796452&view=diff == --- pig/trunk/CHANGES.txt (original) +++ pig/trunk/CHANGES.txt Sat May 27 20:29:46 2017 @@ -36,6 +36,8 @@ PIG-5067: Revisit union on numeric type  IMPROVEMENTS +PIG-5188: Review pig-index.xml (szita) + PIG-4924: Translate failures.maxpercent MR setting to Tez Tez (rohini) PIG-5236: json simple jar not included automatically with piggybank AvroStorage (satishsaley via rohini) Modified: pig/trunk/src/docs/src/documentation/content/xdocs/pig-index.xml URL: http://svn.apache.org/viewvc/pig/trunk/src/docs/src/documentation/content/xdocs/pig-index.xml?rev=1796452&r1=1796451&r2=1796452&view=diff == --- pig/trunk/src/docs/src/documentation/content/xdocs/pig-index.xml (original) +++ pig/trunk/src/docs/src/documentation/content/xdocs/pig-index.xml Sat May 27 20:29:46 2017 @@ -152,6 +152,8 @@ BinStorage function +Bloom join + Boolean expressions Boolean operators @@ -426,6 +428,8 @@ IMPORT (macros) operator +IN function + INDEXOF function installing Pig @@ -491,6 +495,7 @@ JOIN (outer) operator joins + bloom joins inner joins join optimizations merge joins @@ -626,6 +631,8 @@ NATIVE operator +NonFSLoadFunc interface + nested blocks (FOREACH operator) NOT (Boolean) @@ -821,6 +828,8 @@ REGEX_EXTRACT_ALL function +REGEX_SEARCH function + REGISTER statement regular expressions. See pattern matching @@ -1069,8 +1078,9 @@ -Z (top) +Z (top) +Zeppelin
svn commit: r1796639 [5/12] - in /pig/trunk: ./ bin/ ivy/ src/META-INF/services/ src/docs/src/documentation/content/xdocs/ src/org/apache/pig/ src/org/apache/pig/backend/hadoop/executionengine/mapRedu
Added: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/PackageConverter.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/PackageConverter.java?rev=1796639&view=auto == --- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/PackageConverter.java (added) +++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/PackageConverter.java Mon May 29 15:00:39 2017 @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pig.backend.hadoop.executionengine.spark.converter; + +import java.io.IOException; +import java.io.Serializable; +import java.util.Iterator; +import java.util.List; + +import scala.runtime.AbstractFunction1; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.pig.backend.executionengine.ExecException; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.POStatus; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.Result; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POPackage; +import org.apache.pig.backend.hadoop.executionengine.spark.SparkUtil; +import org.apache.pig.data.Tuple; +import org.apache.pig.impl.io.NullableTuple; +import org.apache.pig.impl.io.PigNullableWritable; +import org.apache.spark.rdd.RDD; + +@SuppressWarnings({ "serial" }) +public class PackageConverter implements RDDConverter { +private static final Log LOG = LogFactory.getLog(PackageConverter.class); + +@Override +public RDD convert(List> predecessors, +POPackage physicalOperator) throws IOException { +SparkUtil.assertPredecessorSize(predecessors, physicalOperator, 1); +RDD rdd = predecessors.get(0); +// package will generate the group from the result of the local +// rearrange +return rdd.map(new PackageFunction(physicalOperator), +SparkUtil.getManifest(Tuple.class)); +} + +private static class PackageFunction extends +AbstractFunction1 implements Serializable { + +private final POPackage physicalOperator; + +public PackageFunction(POPackage physicalOperator) { +this.physicalOperator = physicalOperator; +} + +@Override +public Tuple apply(final Tuple t) { +// (key, Seq:{(index, key, value without key)}) +if (LOG.isDebugEnabled()) +LOG.debug("PackageFunction in " + t); +Result result; +try { +PigNullableWritable key = new PigNullableWritable() { + +public Object getValueAsPigType() { +try { +Object keyTuple = t.get(0); +return keyTuple; +} catch (ExecException e) { +throw new RuntimeException(e); +} +} +}; +final Iterator bagIterator = (Iterator) t.get(1); +Iterator iterator = new Iterator() { +public boolean hasNext() { +return bagIterator.hasNext(); +} + +public NullableTuple next() { +try { +// we want the value and index only +Tuple next = bagIterator.next(); +NullableTuple nullableTuple = new NullableTuple( +(Tuple) next.get(1)); +nullableTuple.setIndex(((Number) next.get(0)) +.byteValue()); +if (LOG.isDebugEnabled()) +LOG.debug("Setting index to " + next.get(0) + +" for tuple " + (Tuple)next.get(1)); +return nullableTuple; +} catch (ExecException e) { +thro
svn commit: r1796639 [1/12] - in /pig/trunk: ./ bin/ ivy/ src/META-INF/services/ src/docs/src/documentation/content/xdocs/ src/org/apache/pig/ src/org/apache/pig/backend/hadoop/executionengine/mapRedu
Author: szita Date: Mon May 29 15:00:39 2017 New Revision: 1796639 URL: http://svn.apache.org/viewvc?rev=1796639&view=rev Log: PIG-4059: Pig On Spark Added: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/NoopFilterRemoverUtil.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POBroadcastSpark.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POFRJoinSpark.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/JobGraphBuilder.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/JobMetricsListener.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/KryoSerializer.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/MapReducePartitionerWrapper.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkEngineConf.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkExecType.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkExecutionEngine.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLocalExecType.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkPOUserFuncVisitor.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkPigContext.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkPigRecordReader.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkPigSplit.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkUtil.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/UDFJarsFinder.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/BroadcastConverter.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/CollectedGroupConverter.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/CounterConverter.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/DistinctConverter.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/FRJoinConverter.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/FilterConverter.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/ForEachConverter.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/GlobalRearrangeConverter.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/IndexedKey.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/IteratorTransform.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/JoinGroupSparkConverter.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/LimitConverter.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/LoadConverter.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/LocalRearrangeConverter.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/MergeCogroupConverter.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/MergeJoinConverter.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/OutputConsumerIterator.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/PackageConverter.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/PigSecondaryKeyComparatorSpark.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/PoissonSampleConverter.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/RDDConverter.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/RankConverter.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/ReduceByConverter.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/SecondaryKeySortUtil.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/SkewedJoinConverter.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/SortConverter.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/SparkSampleSortConverter.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/SplitConverter.
svn commit: r1796639 [10/12] - in /pig/trunk: ./ bin/ ivy/ src/META-INF/services/ src/docs/src/documentation/content/xdocs/ src/org/apache/pig/ src/org/apache/pig/backend/hadoop/executionengine/mapRed
Added: pig/trunk/src/org/apache/pig/tools/pigstats/spark/SparkJobStats.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/tools/pigstats/spark/SparkJobStats.java?rev=1796639&view=auto == --- pig/trunk/src/org/apache/pig/tools/pigstats/spark/SparkJobStats.java (added) +++ pig/trunk/src/org/apache/pig/tools/pigstats/spark/SparkJobStats.java Mon May 29 15:00:39 2017 @@ -0,0 +1,349 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pig.tools.pigstats.spark; + +import java.util.List; +import java.util.Map; + +import org.apache.pig.tools.pigstats.*; +import scala.Option; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapred.Counters; +import org.apache.pig.PigWarning; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POStore; +import org.apache.pig.backend.hadoop.executionengine.spark.JobMetricsListener; +import org.apache.pig.backend.hadoop.executionengine.spark.plan.SparkOperator; +import org.apache.pig.impl.logicalLayer.FrontendException; +import org.apache.pig.newplan.PlanVisitor; +import org.apache.spark.executor.ShuffleReadMetrics; +import org.apache.spark.executor.ShuffleWriteMetrics; +import org.apache.spark.executor.TaskMetrics; + +import com.google.common.collect.Maps; + +public class SparkJobStats extends JobStats { + +private int jobId; +private Map stats = Maps.newLinkedHashMap(); +private boolean disableCounter; +private Counters counters = null; +public static String FS_COUNTER_GROUP = "FS_GROUP"; +private Map>> warningCounters = null; + +protected SparkJobStats(int jobId, PigStats.JobGraph plan, Configuration conf) { +this(String.valueOf(jobId), plan, conf); +this.jobId = jobId; +} + +protected SparkJobStats(String jobId, PigStats.JobGraph plan, Configuration conf) { +super(jobId, plan); +setConf(conf); +} + +public void setConf(Configuration conf) { +super.setConf(conf); +disableCounter = conf.getBoolean("pig.disable.counter", false); +initializeHadoopCounter(); +} + +public void addOutputInfo(POStore poStore, boolean success, + JobMetricsListener jobMetricsListener) { +if (!poStore.isTmpStore()) { +long bytes = getOutputSize(poStore, conf); +long recordsCount = -1; +if (disableCounter == false) { +recordsCount = SparkStatsUtil.getRecordCount(poStore); +} +OutputStats outputStats = new OutputStats(poStore.getSFile().getFileName(), +bytes, recordsCount, success); +outputStats.setPOStore(poStore); +outputStats.setConf(conf); + +outputs.add(outputStats); +} +} + +public void addInputStats(POLoad po, boolean success, + boolean singleInput) { + +long recordsCount = -1; +if (disableCounter == false) { +recordsCount = SparkStatsUtil.getRecordCount(po); +} +long bytesRead = -1; +if (singleInput && stats.get("BytesRead") != null) { +bytesRead = stats.get("BytesRead"); +} +InputStats inputStats = new InputStats(po.getLFile().getFileName(), +bytesRead, recordsCount, success); +inputStats.setConf(conf); + +inputs.add(inputStats); +} + +public void collectStats(JobMetricsListener jobMetricsListener) { +if (jobMetricsListener != null) { +Map> taskMetrics = jobMetricsListener.getJobMetric(jobId); +if (taskMetrics == null) { +throw new RuntimeException("No task metrics available for jobId " + jobId); +} +stats = combineTaskMetrics(taskMetrics); +} +} + +public Map getStats() { +return stats; +} + +private Map combineTaskMetrics(Map> jobMetric) { +Map results = Maps.newLinkedHashMap(); + +long executorDeserializeTime = 0
svn commit: r1796639 [3/12] - in /pig/trunk: ./ bin/ ivy/ src/META-INF/services/ src/docs/src/documentation/content/xdocs/ src/org/apache/pig/ src/org/apache/pig/backend/hadoop/executionengine/mapRedu
Added: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java?rev=1796639&view=auto == --- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java (added) +++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java Mon May 29 15:00:39 2017 @@ -0,0 +1,735 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pig.backend.hadoop.executionengine.spark; + +import java.io.File; +import java.io.IOException; +import java.io.PrintStream; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.Set; +import java.util.UUID; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.transform.TransformerException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapreduce.Job; +import org.apache.pig.PigConfiguration; +import org.apache.pig.PigException; +import org.apache.pig.PigWarning; +import org.apache.pig.backend.BackendException; +import org.apache.pig.backend.executionengine.ExecException; +import org.apache.pig.backend.hadoop.datastorage.ConfigurationUtil; +import org.apache.pig.backend.hadoop.executionengine.Launcher; +import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MRConfiguration; +import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigMapReduce; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhysicalPlan; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POBroadcastSpark; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POCollectedGroup; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POCounter; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.PODistinct; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POFRJoinSpark; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POFilter; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POForEach; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLimit; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLocalRearrange; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POMergeCogroup; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POMergeJoin; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POPackage; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POPreCombinerLocalRearrange; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.PORank; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POSkewedJoin; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POSort; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POSplit; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POStore; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POStream; +import org.apache.pig.backend.hadoop.execution
svn commit: r1796639 [6/12] - in /pig/trunk: ./ bin/ ivy/ src/META-INF/services/ src/docs/src/documentation/content/xdocs/ src/org/apache/pig/ src/org/apache/pig/backend/hadoop/executionengine/mapRedu
Added: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/StoreConverter.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/StoreConverter.java?rev=1796639&view=auto == --- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/StoreConverter.java (added) +++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/StoreConverter.java Mon May 29 15:00:39 2017 @@ -0,0 +1,175 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pig.backend.hadoop.executionengine.spark.converter; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.pig.tools.pigstats.spark.SparkCounters; +import org.apache.pig.tools.pigstats.spark.SparkPigStatusReporter; +import org.apache.pig.tools.pigstats.spark.SparkStatsUtil; +import scala.Tuple2; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.FileOutputFormat; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; +import org.apache.pig.PigConfiguration; +import org.apache.pig.StoreFuncInterface; +import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler; +import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigOutputFormat; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POStore; +import org.apache.pig.backend.hadoop.executionengine.spark.SparkUtil; +import org.apache.pig.data.Tuple; +import org.apache.pig.impl.util.ObjectSerializer; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.function.Function; +import org.apache.spark.rdd.PairRDDFunctions; +import org.apache.spark.rdd.RDD; + +import com.google.common.collect.Lists; + +/** + * Converter that takes a POStore and stores it's content. + */ +@SuppressWarnings({ "serial" }) +public class StoreConverter implements +RDDConverter, POStore> { + +private static final Log LOG = LogFactory.getLog(StoreConverter.class); + +private JobConf jobConf = null; +public StoreConverter(JobConf jobConf) { +this.jobConf = jobConf; +} + +@Override +public RDD> convert(List> predecessors, +POStore op) throws IOException { +SparkUtil.assertPredecessorSize(predecessors, op, 1); +RDD rdd = predecessors.get(0); + + SparkPigStatusReporter.getInstance().createCounter(SparkStatsUtil.SPARK_STORE_COUNTER_GROUP, +SparkStatsUtil.getCounterName(op)); + +// convert back to KV pairs +JavaRDD> rddPairs = rdd.toJavaRDD().map( +buildFromTupleFunction(op)); + +PairRDDFunctions pairRDDFunctions = new PairRDDFunctions( +rddPairs.rdd(), SparkUtil.getManifest(Text.class), +SparkUtil.getManifest(Tuple.class), null); + +POStore poStore = configureStorer(jobConf, op); + +if ("true".equalsIgnoreCase(jobConf +.get(PigConfiguration.PIG_OUTPUT_LAZY))) { +Job storeJob = new Job(jobConf); +LazyOutputFormat.setOutputFormatClass(storeJob, +PigOutputFormat.class); +jobConf = (JobConf) storeJob.getConfiguration(); +jobConf.setOutputKeyClass(Text.class); +jobConf.setOutputValueClass(Tuple.class); +String fileName = poStore.getSFile().getFileName(); +Path filePath = new Path(fileName); +FileOutputFormat.setOutputPath(jobConf,filePath); +pairRDDFunctions.saveAsNewAPIHadoopDataset(jobConf); +} else { +pairRDDFunctions.saveAsNewAPIHadoopFile(poStore.getSFile() +.getFileName(), Text.class, Tuple.class, +PigOutputFormat.class, jobConf); +} + +
svn commit: r1796639 [2/12] - in /pig/trunk: ./ bin/ ivy/ src/META-INF/services/ src/docs/src/documentation/content/xdocs/ src/org/apache/pig/ src/org/apache/pig/backend/hadoop/executionengine/mapRedu
Modified: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POMergeCogroup.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POMergeCogroup.java?rev=1796639&r1=1796638&r2=1796639&view=diff == --- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POMergeCogroup.java (original) +++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POMergeCogroup.java Mon May 29 15:00:39 2017 @@ -122,6 +122,15 @@ public class POMergeCogroup extends Phys this.endOfRecordMark = endOfRecordMark; } +//For Spark +private transient boolean endOfInput = false; +public boolean isEndOfInput() { +return endOfInput; +} +public void setEndOfInput (boolean isEndOfInput) { +endOfInput = isEndOfInput; +} + @Override public Result getNextTuple() throws ExecException { @@ -145,7 +154,7 @@ public class POMergeCogroup extends Phys break; case POStatus.STATUS_EOP: -if(!this.parentPlan.endOfAllInput) +if(!(this.parentPlan.endOfAllInput || isEndOfInput())) return baseInp; if(lastTime) Modified: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POMergeJoin.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POMergeJoin.java?rev=1796639&r1=1796638&r2=1796639&view=diff == --- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POMergeJoin.java (original) +++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POMergeJoin.java Mon May 29 15:00:39 2017 @@ -131,6 +131,24 @@ public class POMergeJoin extends Physica private byte endOfRecordMark = POStatus.STATUS_NULL; +// Only for Spark +// If current operator reaches at its end, flag endOfInput is set as true. +// The old flag parentPlan.endOfAllInput doesn't work in spark mode, because it is shared +// between operators in the same plan, so it could be set by preceding operators even +// current operator does not reach at its end. (see PIG-4876) +private transient boolean endOfInput = false; +public boolean isEndOfInput() { +return endOfInput; +} +public void setEndOfInput (boolean isEndOfInput) { +endOfInput = isEndOfInput; +} + +// Only for spark. +// it means that current operator reaches at its end and the last left input was +// added into 'leftTuples', ready for join. +private boolean leftInputConsumedInSpark = false; + // This serves as the default TupleFactory private transient TupleFactory mTupleFactory; @@ -352,7 +370,7 @@ public class POMergeJoin extends Physica } else if(cmpval > 0){// We got ahead on right side. Store currently read right tuple. -if(!this.parentPlan.endOfAllInput){ +if(!(this.parentPlan.endOfAllInput|| leftInputConsumedInSpark)){ prevRightKey = rightKey; prevRightInp = rightInp; // There cant be any more join on this key. @@ -413,11 +431,14 @@ public class POMergeJoin extends Physica } case POStatus.STATUS_EOP: -if(this.parentPlan.endOfAllInput){ +if(this.parentPlan.endOfAllInput || isEndOfInput()){ // We hit the end on left input. // Tuples in bag may still possibly join with right side. curJoinKey = prevLeftKey; curLeftKey = null; +if (isEndOfInput()) { +leftInputConsumedInSpark = true; +} break; } else// Fetch next left input. @@ -427,7 +448,9 @@ public class POMergeJoin extends Physica return curLeftInp; } -if((null != prevRightKey) && !this.parentPlan.endOfAllInput && ((Comparable)prevRightKey).compareTo(curLeftKey) >= 0){ +if((null != prevRightKey) +&& !(this.parentPlan.endOfAllInput || leftInputConsumedInSpark) +&& ((Comparable)prevRightKey).compareTo(curLeftKey) >= 0){ // This will happen when we accumulated inputs on left side and moved on, but are still behind the right side // In that case, throw away the tuples accumulated till now and add the one we read in this function call. @@ -509,7 +532,7 @@ public class POMergeJoin extend
svn commit: r1796639 [8/12] - in /pig/trunk: ./ bin/ ivy/ src/META-INF/services/ src/docs/src/documentation/content/xdocs/ src/org/apache/pig/ src/org/apache/pig/backend/hadoop/executionengine/mapRedu
Added: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkCompiler.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkCompiler.java?rev=1796639&view=auto == --- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkCompiler.java (added) +++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkCompiler.java Mon May 29 15:00:39 2017 @@ -0,0 +1,1565 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pig.backend.hadoop.executionengine.spark.plan; + +import java.io.IOException; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.Random; +import java.util.Set; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.pig.CollectableLoadFunc; +import org.apache.pig.FuncSpec; +import org.apache.pig.IndexableLoadFunc; +import org.apache.pig.LoadFunc; +import org.apache.pig.OrderedLoadFunc; +import org.apache.pig.PigConfiguration; +import org.apache.pig.PigException; +import org.apache.pig.backend.executionengine.ExecException; +import org.apache.pig.backend.hadoop.datastorage.ConfigurationUtil; +import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MergeJoinIndexer; +import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.ScalarPhyFinder; +import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.UDFFinder; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.POStatus; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.ConstantExpression; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.POProject; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.POUserFunc; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhyPlanVisitor; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhysicalPlan; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POBroadcastSpark; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POCollectedGroup; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POCounter; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POCross; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.PODistinct; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POFRJoin; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POFRJoinSpark; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POFilter; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POForEach; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POGlobalRearrange; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLimit; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLocalRearrange; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POMergeCogroup; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POMergeJoin; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.PONative; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POPackage; +import org.apache.pi
svn commit: r1796639 [12/12] - in /pig/trunk: ./ bin/ ivy/ src/META-INF/services/ src/docs/src/documentation/content/xdocs/ src/org/apache/pig/ src/org/apache/pig/backend/hadoop/executionengine/mapRed
Modified: pig/trunk/test/org/apache/pig/test/TestPigRunner.java URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestPigRunner.java?rev=1796639&r1=1796638&r2=1796639&view=diff == --- pig/trunk/test/org/apache/pig/test/TestPigRunner.java (original) +++ pig/trunk/test/org/apache/pig/test/TestPigRunner.java Mon May 29 15:00:39 2017 @@ -41,11 +41,13 @@ import org.apache.commons.lang3.ArrayUti import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapred.Counters; +import org.apache.pig.ExecType; import org.apache.pig.PigConfiguration; import org.apache.pig.PigRunner; import org.apache.pig.PigRunner.ReturnCode; import org.apache.pig.backend.hadoop.datastorage.ConfigurationUtil; import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MRConfiguration; +import org.apache.pig.backend.hadoop.executionengine.spark.SparkExecType; import org.apache.pig.impl.PigContext; import org.apache.pig.impl.io.FileLocalizer; import org.apache.pig.impl.plan.OperatorPlan; @@ -58,6 +60,7 @@ import org.apache.pig.tools.pigstats.Pig import org.apache.pig.tools.pigstats.PigStatsUtil; import org.apache.pig.tools.pigstats.mapreduce.MRJobStats; import org.apache.pig.tools.pigstats.mapreduce.MRPigStatsUtil; +import org.apache.pig.tools.pigstats.spark.SparkJobStats; import org.junit.AfterClass; import org.junit.Assume; import org.junit.Before; @@ -207,12 +210,13 @@ public class TestPigRunner { PigStats stats = PigRunner.run(args, new TestNotificationListener(execType)); assertTrue(stats.isSuccessful()); -if (execType.toString().startsWith("tez")) { -assertEquals(1, stats.getNumberJobs()); -assertEquals(stats.getJobGraph().size(), 1); -} else { +if (execType.equals("mapreduce")) { assertEquals(2, stats.getNumberJobs()); assertEquals(stats.getJobGraph().size(), 2); +} else { +// Tez and Spark +assertEquals(1, stats.getNumberJobs()); +assertEquals(stats.getJobGraph().size(), 1); } Configuration conf = ConfigurationUtil.toConfiguration(stats.getPigProperties()); @@ -274,6 +278,10 @@ public class TestPigRunner { assertEquals(stats.getJobGraph().size(), 1); // 5 vertices assertEquals(stats.getJobGraph().getSources().get(0).getPlan().size(), 5); +} else if (execType.equals("spark")) { +// In spark mode,the number of spark job is calculated by the number of POStore. +// 1 POStore generates 1 spark job. +assertEquals(stats.getJobGraph().size(), 1); } else { assertEquals(stats.getJobGraph().size(), 4); } @@ -294,7 +302,12 @@ public class TestPigRunner { // Need to investigate // assertEquals("B", ((JobStats) stats.getJobGraph().getPredecessors( //js).get(0)).getAlias()); +} else if (execType.equals("spark")) { +assertEquals("A,B", ((JobStats) stats.getJobGraph().getSources().get( +0)).getAlias()); +// TODO: alias is not set for sample-aggregation/partition/sort job. } else { + assertEquals("A", ((JobStats) stats.getJobGraph().getSources().get( 0)).getAlias()); assertEquals("B", ((JobStats) stats.getJobGraph().getPredecessors( @@ -323,7 +336,14 @@ public class TestPigRunner { String[] args = { "-x", execType, PIG_FILE }; PigStats stats = PigRunner.run(args, new TestNotificationListener(execType)); assertTrue(stats.isSuccessful()); -assertTrue(stats.getJobGraph().size() == 1); +if (execType.equals("spark")) { +// In spark mode,the number of spark job is calculated by the number of POStore. +// 2 POStore generates 2 spark jobs. +assertTrue(stats.getJobGraph().size() == 2); +} else { +assertTrue(stats.getJobGraph().size() == 1); +} + // Each output file should include the following: // output: // 1\t2\t3\n @@ -372,7 +392,13 @@ public class TestPigRunner { String[] args = { "-x", execType, PIG_FILE }; PigStats stats = PigRunner.run(args, new TestNotificationListener(execType)); assertTrue(stats.isSuccessful()); -assertEquals(stats.getJobGraph().size(), 1); +if (execType.equals("spark")) { +// In spark mode,the number of spark job is calculated by the number of POStore. +// 2 POStore generates 2 spark jobs. +
svn commit: r1796639 [9/12] - in /pig/trunk: ./ bin/ ivy/ src/META-INF/services/ src/docs/src/documentation/content/xdocs/ src/org/apache/pig/ src/org/apache/pig/backend/hadoop/executionengine/mapRedu
Added: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkOperator.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkOperator.java?rev=1796639&view=auto == --- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkOperator.java (added) +++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkOperator.java Mon May 29 15:00:39 2017 @@ -0,0 +1,302 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pig.backend.hadoop.executionengine.spark.plan; + +import java.util.ArrayList; +import java.util.BitSet; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhysicalPlan; +import org.apache.pig.impl.plan.Operator; +import org.apache.pig.impl.plan.OperatorKey; +import org.apache.pig.impl.plan.VisitorException; +import org.apache.pig.impl.util.MultiMap; + +/** + * An operator model for a Spark job. Acts as a host to the plans that will + * execute in spark. + */ +public class SparkOperator extends Operator { +private static enum OPER_FEATURE { +NONE, +// Indicate if this job is a sampling job +SAMPLER, +// Indicate if this job is a merge indexer +INDEXER, +// Indicate if this job is a group by job +GROUPBY, +// Indicate if this job is a cogroup job +COGROUP, +// Indicate if this job is a regular join job +HASHJOIN, +// Indicate if this job is a union job +UNION, +// Indicate if this job is a native job +NATIVE, +// Indicate if this job is a limit job +LIMIT, +// Indicate if this job is a limit job after sort +LIMIT_AFTER_SORT; +}; + +public PhysicalPlan physicalPlan; + +public Set UDFs; + +/* Name of the Custom Partitioner used */ +public String customPartitioner = null; + +public Set scalars; + +public int requestedParallelism = -1; + +private BitSet feature = new BitSet(); + +private boolean splitter = false; + +// Name of the partition file generated by sampling process, +// Used by Skewed Join +private String skewedJoinPartitionFile; + +private boolean usingTypedComparator = false; + +private boolean combineSmallSplits = true; + +private List crossKeys = null; + +private MultiMap multiQueryOptimizeConnectionMap = new MultiMap(); + +// Indicates if a UDF comparator is used +boolean isUDFComparatorUsed = false; + +//The quantiles file name if globalSort is true +private String quantFile; + +//Indicates if this job is an order by job +private boolean globalSort = false; + +public SparkOperator(OperatorKey k) { +super(k); +physicalPlan = new PhysicalPlan(); +UDFs = new HashSet(); +scalars = new HashSet(); +} + +@Override +public boolean supportsMultipleInputs() { +return true; +} + +@Override +public boolean supportsMultipleOutputs() { +return true; +} + +@Override +public String name() { +String udfStr = getUDFsAsStr(); +StringBuilder sb = new StringBuilder("Spark" + "(" ++ requestedParallelism + (udfStr.equals("") ? "" : ",") ++ udfStr + ")" + " - " + mKey.toString()); +return sb.toString(); +} + +private String getUDFsAsStr() { +StringBuilder sb = new StringBuilder(); +if (UDFs != null && UDFs.size() > 0) { +for (String str : UDFs) { +sb.append(str.substring(str.lastIndexOf('.') + 1)); +sb.append(','); +} +sb.deleteCharAt(sb.length() - 1); +} +return sb.toString(); +} + +public void add(PhysicalOperator physicalOper) { +this.physicalPlan.add(physicalOper); +} + +@Override +public void visit(SparkOpPlanVisitor v) throws Visitor
svn commit: r1796639 [11/12] - in /pig/trunk: ./ bin/ ivy/ src/META-INF/services/ src/docs/src/documentation/content/xdocs/ src/org/apache/pig/ src/org/apache/pig/backend/hadoop/executionengine/mapRed
Added: pig/trunk/test/org/apache/pig/spark/TestIndexedKey.java URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/spark/TestIndexedKey.java?rev=1796639&view=auto == --- pig/trunk/test/org/apache/pig/spark/TestIndexedKey.java (added) +++ pig/trunk/test/org/apache/pig/spark/TestIndexedKey.java Mon May 29 15:00:39 2017 @@ -0,0 +1,164 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pig.spark; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +import org.apache.pig.backend.hadoop.executionengine.spark.converter.IndexedKey; +import org.apache.pig.data.Tuple; +import org.apache.pig.data.TupleFactory; +import static org.junit.Assert.assertEquals; + +@RunWith(JUnit4.class) +public class TestIndexedKey { + +/**Case1:Compare IndexedKeys with same index value + * key1key2equal? hashCode1hashCode2 + * foo null N hashCode(foo)index + * nullfoo N indexhashCode(foo) + * foo foo Y hashCode(foo)hashCode(foo) + * nullnull Y indexindex + * (1,1) (1,1) Y hashCode((1,1)) hashCode((1,1)) + * (1,)(1,) Y hashCode((1,)) hashCode((1,)) + * (1,1) (1,2) N hashCode((1,1)) hashCode((1,2)) + */ +@Test +public void testIndexedKeyWithSameIndexValue() throws Exception { +IndexedKey a0 = new IndexedKey(new Byte("0"), "foo"); +IndexedKey a1 = new IndexedKey(new Byte("0"), null); +assertEquals(a0.equals(a1), false); +assertEquals(a0.hashCode()==a1.hashCode(),false); + +IndexedKey a2 = new IndexedKey(new Byte("0"), null); +IndexedKey a3 = new IndexedKey(new Byte("0"), "foo"); +assertEquals(a2.equals(a3),false); +assertEquals(a2.hashCode()==a3.hashCode(),false); + +IndexedKey a4 = new IndexedKey(new Byte("0"), "foo"); +IndexedKey a5 = new IndexedKey(new Byte("0"), "foo"); +assertEquals(a4.equals(a5),true); +assertEquals(a4.hashCode()==a5.hashCode(),true); + +IndexedKey a6 = new IndexedKey(new Byte("0"), null); +IndexedKey a7 = new IndexedKey(new Byte("0"), null); +assertEquals(a6.equals(a7),true); +assertEquals(a6.hashCode()==a7.hashCode(),true); + +Tuple t1 = TupleFactory.getInstance().newTuple(2); +t1.set(0,"1"); +t1.set(1,"1"); +Tuple t2 = TupleFactory.getInstance().newTuple(2); +t2.set(0,"1"); +t2.set(1,"1"); +IndexedKey a8 = new IndexedKey(new Byte("0"), t1); +IndexedKey a9 = new IndexedKey(new Byte("0"), t2); +assertEquals(a8.equals(a9),true); +assertEquals(a8.hashCode()==a9.hashCode(),true); + +Tuple t3 = TupleFactory.getInstance().newTuple(2); +t3.set(0,"1"); +t3.set(1,null); +Tuple t4 = TupleFactory.getInstance().newTuple(2); +t4.set(0,"1"); +t4.set(1,null); +IndexedKey a10 = new IndexedKey(new Byte("0"), t3); +IndexedKey a11 = new IndexedKey(new Byte("0"), t4); +assertEquals(a10.equals(a11),true); +assertEquals(a10.hashCode()==a11.hashCode(),true); + +Tuple t5 = TupleFactory.getInstance().newTuple(2); +t5.set(0,"1"); +t5.set(1,"1"); +Tuple t6 = TupleFactory.getInstance().newTuple(2); +t6.set(0,"1"); +t6.set(1,"2"); +IndexedKey a12 = new IndexedKey(new Byte("0"), t5); +IndexedKey a13 = new IndexedKey(new Byte("0"), t6); +assertEquals(a12.equals(a13),false); +assertEquals(a12.hashCode()==a13.hashCode(),false); +} + +/* + * Case2:Compare IndexedKeys with different index value + * key1key2equal? hashCode1hashCode2 + * foo null N hashCode(foo)index2 + * nullfoo N index1 hashCode(foo) + * foo foo Y hashCode(foo)hashCode(foo) + * nullnull N index1 index2 + * (1,1) (1,1)Y hashCode((1,1)) hashCode((1,1)) + * (1,)(1,) N h
svn commit: r1796639 [4/12] - in /pig/trunk: ./ bin/ ivy/ src/META-INF/services/ src/docs/src/documentation/content/xdocs/ src/org/apache/pig/ src/org/apache/pig/backend/hadoop/executionengine/mapRedu
Added: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/FRJoinConverter.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/FRJoinConverter.java?rev=1796639&view=auto == --- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/FRJoinConverter.java (added) +++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/FRJoinConverter.java Mon May 29 15:00:39 2017 @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pig.backend.hadoop.executionengine.spark.converter; + +import java.io.IOException; +import java.io.Serializable; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POFRJoinSpark; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.pig.backend.executionengine.ExecException; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.Result; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POFRJoin; +import org.apache.pig.backend.hadoop.executionengine.spark.SparkPigContext; +import org.apache.pig.backend.hadoop.executionengine.spark.SparkUtil; +import org.apache.pig.data.Tuple; +import org.apache.spark.api.java.function.FlatMapFunction; +import org.apache.spark.rdd.RDD; + +@SuppressWarnings("serial") +public class FRJoinConverter implements +RDDConverter { +private static final Log LOG = LogFactory.getLog(FRJoinConverter.class); + +private Set replicatedInputs; + +public RDD convert(List> predecessors, + POFRJoin poFRJoin) throws IOException { +SparkUtil.assertPredecessorSizeGreaterThan(predecessors, poFRJoin, 1); +RDD rdd = predecessors.get(0); + +attachReplicatedInputs((POFRJoinSpark) poFRJoin); + +FRJoinFunction frJoinFunction = new FRJoinFunction(poFRJoin); +return rdd.toJavaRDD().mapPartitions(frJoinFunction, true).rdd(); +} + +private void attachReplicatedInputs(POFRJoinSpark poFRJoin) { +Map> replicatedInputMap = new HashMap<>(); + +for (String replicatedInput : replicatedInputs) { +replicatedInputMap.put(replicatedInput, SparkPigContext.get().getBroadcastedVars().get(replicatedInput).value()); +} + +poFRJoin.attachInputs(replicatedInputMap); +} + +private static class FRJoinFunction implements +FlatMapFunction, Tuple>, Serializable { + +private POFRJoin poFRJoin; +private FRJoinFunction(POFRJoin poFRJoin) { +this.poFRJoin = poFRJoin; +} + +@Override +public Iterable call(final Iterator input) throws Exception { + +return new Iterable() { + +@Override +public Iterator iterator() { +return new OutputConsumerIterator(input) { + +@Override +protected void attach(Tuple tuple) { +poFRJoin.setInputs(null); +poFRJoin.attachInput(tuple); +} + +@Override +protected Result getNextResult() throws ExecException { +return poFRJoin.getNextTuple(); +} + +@Override +protected void endOfInput() { +} +}; +} +}; +} + +} + +public void setReplicatedInputs(Set replicatedInputs) { +this.replicatedInputs = replicatedInputs; +} +} \ No newline at end of file Added: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/FilterConverter.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/FilterConverter.java?rev=1796639&view
svn commit: r1796639 [7/12] - in /pig/trunk: ./ bin/ ivy/ src/META-INF/services/ src/docs/src/documentation/content/xdocs/ src/org/apache/pig/ src/org/apache/pig/backend/hadoop/executionengine/mapRedu
Added: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/ParallelismSetter.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/ParallelismSetter.java?rev=1796639&view=auto == --- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/ParallelismSetter.java (added) +++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/ParallelismSetter.java Mon May 29 15:00:39 2017 @@ -0,0 +1,55 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pig.backend.hadoop.executionengine.spark.optimizer; + +import org.apache.hadoop.mapred.JobConf; +import org.apache.pig.FuncSpec; +import org.apache.pig.backend.hadoop.executionengine.spark.operator.NativeSparkOperator; +import org.apache.pig.backend.hadoop.executionengine.spark.plan.SparkOpPlanVisitor; +import org.apache.pig.backend.hadoop.executionengine.spark.plan.SparkOperPlan; +import org.apache.pig.backend.hadoop.executionengine.spark.plan.SparkOperator; +import org.apache.pig.impl.PigContext; +import org.apache.pig.impl.PigImplConstants; +import org.apache.pig.impl.builtin.GFCross; +import org.apache.pig.impl.plan.DependencyOrderWalker; +import org.apache.pig.impl.plan.VisitorException; + +public class ParallelismSetter extends SparkOpPlanVisitor { +private JobConf jobConf; + +public ParallelismSetter(SparkOperPlan plan, JobConf jobConf) { +super(plan, new DependencyOrderWalker(plan)); +this.jobConf = jobConf; +} + +@Override +public void visitSparkOp(SparkOperator sparkOp) throws VisitorException { +if (sparkOp instanceof NativeSparkOperator) { +return; +} + +if (sparkOp.getCrossKeys() != null) { +for (String key : sparkOp.getCrossKeys()) { +jobConf.set(PigImplConstants.PIG_CROSS_PARALLELISM + "." + key, +// TODO: Estimate parallelism. For now we are hard-coding GFCross.DEFAULT_PARALLELISM +Integer.toString(96)); +} +} +} +} \ No newline at end of file Added: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/SecondaryKeyOptimizerSpark.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/SecondaryKeyOptimizerSpark.java?rev=1796639&view=auto == --- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/SecondaryKeyOptimizerSpark.java (added) +++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/SecondaryKeyOptimizerSpark.java Mon May 29 15:00:39 2017 @@ -0,0 +1,217 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ +package org.apache.pig.backend.hadoop.executionengine.spark.optimizer; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import java.util.List; + +import org.apache.pig.backend.hadoop.executionengine.optimizer.SecondaryKeyOptimizer; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhysicalPlan; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoc
svn commit: r1796647 - in /pig/trunk: CHANGES.txt src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java src/org/apache/pig/builtin/HiveUDAF.java
Author: szita Date: Mon May 29 15:19:17 2017 New Revision: 1796647 URL: http://svn.apache.org/viewvc?rev=1796647&view=rev Log: PIG-5194: HiveUDF fails with Spark exec type (szita) Modified: pig/trunk/CHANGES.txt pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java pig/trunk/src/org/apache/pig/builtin/HiveUDAF.java Modified: pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1796647&r1=1796646&r2=1796647&view=diff == --- pig/trunk/CHANGES.txt (original) +++ pig/trunk/CHANGES.txt Mon May 29 15:19:17 2017 @@ -109,6 +109,8 @@ OPTIMIZATIONS Â BUG FIXES +PIG-5194: HiveUDF fails with Spark exec type (szita) + PIG-5231: PigStorage with -schema may produce inconsistent outputs with more fields (knoguchi) PIG-5224: Extra foreach from ColumnPrune preventing Accumulator usage (knoguchi) Modified: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java?rev=1796647&r1=1796646&r2=1796647&view=diff == --- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java (original) +++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java Mon May 29 15:19:17 2017 @@ -20,6 +20,7 @@ package org.apache.pig.backend.hadoop.ex import java.io.File; import java.io.IOException; import java.io.PrintStream; +import java.net.URL; import java.nio.file.Files; import java.nio.file.Paths; import java.util.ArrayList; @@ -387,8 +388,8 @@ public class SparkLauncher extends Launc for (String file : shipFiles.split(",")) { File shipFile = new File(file.trim()); if (shipFile.exists()) { -addResourceToSparkJobWorkingDirectory(shipFile, -shipFile.getName(), ResourceType.FILE); +addResourceToSparkJobWorkingDirectory(shipFile, shipFile.getName(), +shipFile.getName().endsWith(".jar") ? ResourceType.JAR : ResourceType.FILE ); } } } @@ -429,7 +430,7 @@ public class SparkLauncher extends Launc Set allJars = new HashSet(); LOG.info("Add default jars to Spark Job"); allJars.addAll(JarManager.getDefaultJars()); -LOG.info("Add extra jars to Spark Job"); +LOG.info("Add script jars to Spark Job"); for (String scriptJar : pigContext.scriptJars) { allJars.add(scriptJar); } @@ -448,6 +449,11 @@ public class SparkLauncher extends Launc allJars.add(scriptUDFJarFile.getAbsolutePath().toString()); } +LOG.info("Add extra jars to Spark job"); +for (URL extraJarUrl : pigContext.extraJars) { +allJars.add(extraJarUrl.getFile()); +} + //Upload all jars to spark working directory for (String jar : allJars) { File jarFile = new File(jar); Modified: pig/trunk/src/org/apache/pig/builtin/HiveUDAF.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/builtin/HiveUDAF.java?rev=1796647&r1=1796646&r2=1796647&view=diff == --- pig/trunk/src/org/apache/pig/builtin/HiveUDAF.java (original) +++ pig/trunk/src/org/apache/pig/builtin/HiveUDAF.java Mon May 29 15:19:17 2017 @@ -135,11 +135,11 @@ public class HiveUDAF extends HiveUDFBas return; } -if (m == Mode.PARTIAL1 || m == Mode.FINAL) { +if (m == Mode.PARTIAL1 || m == Mode.PARTIAL2 || m == Mode.FINAL) { intermediateOutputObjectInspector = evaluator.init(Mode.PARTIAL1, inputObjectInspectorAsArray); intermediateOutputTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(intermediateOutputObjectInspector); -if (m == Mode.FINAL) { +if (m == Mode.PARTIAL2 || m == Mode.FINAL) { intermediateInputObjectInspector = HiveUtils.createObjectInspector(intermediateOutputTypeInfo); intermediateInputObjectInspectorAsArray = new ObjectInspector[] {intermediateInputObjectInspector}; outputObjectInspector = evaluator.init(Mode.FINAL, intermediateInputObjectInspectorAsArray); @@ -208,20 +208,41 @@ public class HiveUDAF extends HiveUDFBas } static public class Initial extends EvalFunc { + +private boolean inited = false; +private String funcName; +ConstantObjectInspectInfo constantsInfo; +private Sc
svn commit: r1796703 - in /pig/trunk: CHANGES.txt src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/PhysicalPlan.java src/org/apache/pig/backend/hadoop/executionengine/spark/optimi
Author: szita Date: Mon May 29 21:45:33 2017 New Revision: 1796703 URL: http://svn.apache.org/viewvc?rev=1796703&view=rev Log: PIG-5207: BugFix e2e tests fail on spark (szita) Modified: pig/trunk/CHANGES.txt pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/PhysicalPlan.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/CombinerOptimizer.java Modified: pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1796703&r1=1796702&r2=1796703&view=diff == --- pig/trunk/CHANGES.txt (original) +++ pig/trunk/CHANGES.txt Mon May 29 21:45:33 2017 @@ -109,6 +109,8 @@ OPTIMIZATIONS Â BUG FIXES +PIG-5207: BugFix e2e tests fail on spark (szita) + PIG-5194: HiveUDF fails with Spark exec type (szita) PIG-5231: PigStorage with -schema may produce inconsistent outputs with more fields (knoguchi) Modified: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/PhysicalPlan.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/PhysicalPlan.java?rev=1796703&r1=1796702&r2=1796703&view=diff == --- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/PhysicalPlan.java (original) +++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/PhysicalPlan.java Mon May 29 21:45:33 2017 @@ -24,6 +24,7 @@ import java.io.PrintStream; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.Comparator; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -40,6 +41,8 @@ import org.apache.pig.impl.plan.PlanExce import org.apache.pig.impl.plan.VisitorException; import org.apache.pig.impl.util.MultiMap; +import com.google.common.collect.HashBiMap; + /** * * The base class for all types of physical plans. @@ -304,6 +307,16 @@ public class PhysicalPlan extends Operat } } +//Fix order of edges in mToEdges lists +Map invertedMatches = HashBiMap.create(matches).inverse(); +for (PhysicalOperator newOp : clone.mToEdges.keySet()) { +List newList = clone.mToEdges.get(newOp); +if (newList.size() > 1) { +List originalList = this.mToEdges.get(invertedMatches.get(newOp)); +Collections.sort(newList, new EdgeOrderHelper(originalList,invertedMatches)); +} +} + return clone; } @@ -315,4 +328,21 @@ public class PhysicalPlan extends Operat { opmap = null; } + + +private static class EdgeOrderHelper implements Comparator { + +private final Map invertedMatches; +private final List originalList; + +public EdgeOrderHelper(List originalList, Map invertedMatches) { +this.originalList = originalList; +this.invertedMatches = invertedMatches; +} + +@Override +public int compare(PhysicalOperator o1, PhysicalOperator o2) { +return originalList.indexOf(invertedMatches.get(o1)) - originalList.indexOf(invertedMatches.get(o2)); +} +} } Modified: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/CombinerOptimizer.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/CombinerOptimizer.java?rev=1796703&r1=1796702&r2=1796703&view=diff == --- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/CombinerOptimizer.java (original) +++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/CombinerOptimizer.java Mon May 29 21:45:33 2017 @@ -296,8 +296,13 @@ public class CombinerOptimizer extends S ); newProj.setResultType(DataType.BAG); -PhysicalOperator udfInput = pplan.getPredecessors(combineUdf).get(0); -pplan.disconnect(udfInput, combineUdf); +for (PhysicalOperator originalUdfInput : pplan.getPredecessors(combineUdf).toArray(new PhysicalOperator[0])) { +if (pplan.getPredecessors(originalUdfInput) != null) { +pplan.trimAbove(originalUdfInput); +} +pplan.remove(originalUdfInput); +} + pplan.add(newProj); pplan.connect(newProj, combineUdf); i++;
svn commit: r1796822 - in /pig/trunk: CHANGES.txt build.xml test/excluded-tests-mr test/excluded-tests-tez test/org/apache/pig/test/TestEvalPipeline2.java
Author: szita Date: Tue May 30 09:32:31 2017 New Revision: 1796822 URL: http://svn.apache.org/viewvc?rev=1796822&view=rev Log: PIG-5244: Several unit tests are failing in Tez mode after merging spark branch (nkollar via szita) Modified: pig/trunk/CHANGES.txt pig/trunk/build.xml pig/trunk/test/excluded-tests-mr pig/trunk/test/excluded-tests-tez pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java Modified: pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1796822&r1=1796821&r2=1796822&view=diff == --- pig/trunk/CHANGES.txt (original) +++ pig/trunk/CHANGES.txt Tue May 30 09:32:31 2017 @@ -122,6 +122,8 @@ OPTIMIZATIONS Â BUG FIXES +PIG-5244: Several unit tests are failing in Tez mode after merging spark branch (nkollar via szita) + PIG-5207: BugFix e2e tests fail on spark (szita) PIG-5194: HiveUDF fails with Spark exec type (szita) Modified: pig/trunk/build.xml URL: http://svn.apache.org/viewvc/pig/trunk/build.xml?rev=1796822&r1=1796821&r2=1796822&view=diff == --- pig/trunk/build.xml (original) +++ pig/trunk/build.xml Tue May 30 09:32:31 2017 @@ -909,6 +909,9 @@ + + + Modified: pig/trunk/test/excluded-tests-mr URL: http://svn.apache.org/viewvc/pig/trunk/test/excluded-tests-mr?rev=1796822&r1=1796821&r2=1796822&view=diff == --- pig/trunk/test/excluded-tests-mr (original) +++ pig/trunk/test/excluded-tests-mr Tue May 30 09:32:31 2017 @@ -1,2 +1,2 @@ **/tez/*.java -+**/spark/*.java \ No newline at end of file +**/spark/*.java \ No newline at end of file Modified: pig/trunk/test/excluded-tests-tez URL: http://svn.apache.org/viewvc/pig/trunk/test/excluded-tests-tez?rev=1796822&r1=1796821&r2=1796822&view=diff == --- pig/trunk/test/excluded-tests-tez (original) +++ pig/trunk/test/excluded-tests-tez Tue May 30 09:32:31 2017 @@ -1,2 +1,2 @@ **/Test*MR.java -+**/spark/*.java \ No newline at end of file +**/spark/*.java \ No newline at end of file Modified: pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java?rev=1796822&r1=1796821&r2=1796822&view=diff == --- pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java (original) +++ pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java Tue May 30 09:32:31 2017 @@ -1597,9 +1597,8 @@ public class TestEvalPipeline2 { String[] expected = new String[] {"(1,A)", "(1,B)", "(2,C)"}; -Util.checkQueryOutputs(iter, expected, - org.apache.pig.newplan.logical.Util.translateSchema(pigServer.dumpSchema("flattened")), -Util.isSparkExecType(cluster.getExecType())); +Util.checkQueryOutputsAfterSortRecursive(iter, expected, + org.apache.pig.newplan.logical.Util.translateSchema(pigServer.dumpSchema("flattened"))); } // See PIG-2237
svn commit: r1796824 - in /pig/branches/branch-0.17: CHANGES.txt build.xml test/excluded-tests-mr test/excluded-tests-tez test/org/apache/pig/test/TestEvalPipeline2.java
Author: szita Date: Tue May 30 09:51:27 2017 New Revision: 1796824 URL: http://svn.apache.org/viewvc?rev=1796824&view=rev Log: PIG-5244: Several unit tests are failing in Tez mode after merging spark branch (nkollar via szita) Modified: pig/branches/branch-0.17/CHANGES.txt pig/branches/branch-0.17/build.xml pig/branches/branch-0.17/test/excluded-tests-mr pig/branches/branch-0.17/test/excluded-tests-tez pig/branches/branch-0.17/test/org/apache/pig/test/TestEvalPipeline2.java Modified: pig/branches/branch-0.17/CHANGES.txt URL: http://svn.apache.org/viewvc/pig/branches/branch-0.17/CHANGES.txt?rev=1796824&r1=1796823&r2=1796824&view=diff == --- pig/branches/branch-0.17/CHANGES.txt (original) +++ pig/branches/branch-0.17/CHANGES.txt Tue May 30 09:51:27 2017 @@ -111,6 +111,8 @@ OPTIMIZATIONS Â BUG FIXES +PIG-5244: Several unit tests are failing in Tez mode after merging spark branch (nkollar via szita) + PIG-5207: BugFix e2e tests fail on spark (szita) PIG-5194: HiveUDF fails with Spark exec type (szita) Modified: pig/branches/branch-0.17/build.xml URL: http://svn.apache.org/viewvc/pig/branches/branch-0.17/build.xml?rev=1796824&r1=1796823&r2=1796824&view=diff == --- pig/branches/branch-0.17/build.xml (original) +++ pig/branches/branch-0.17/build.xml Tue May 30 09:51:27 2017 @@ -909,6 +909,9 @@ + + + Modified: pig/branches/branch-0.17/test/excluded-tests-mr URL: http://svn.apache.org/viewvc/pig/branches/branch-0.17/test/excluded-tests-mr?rev=1796824&r1=1796823&r2=1796824&view=diff == --- pig/branches/branch-0.17/test/excluded-tests-mr (original) +++ pig/branches/branch-0.17/test/excluded-tests-mr Tue May 30 09:51:27 2017 @@ -1,2 +1,2 @@ **/tez/*.java -+**/spark/*.java \ No newline at end of file +**/spark/*.java \ No newline at end of file Modified: pig/branches/branch-0.17/test/excluded-tests-tez URL: http://svn.apache.org/viewvc/pig/branches/branch-0.17/test/excluded-tests-tez?rev=1796824&r1=1796823&r2=1796824&view=diff == --- pig/branches/branch-0.17/test/excluded-tests-tez (original) +++ pig/branches/branch-0.17/test/excluded-tests-tez Tue May 30 09:51:27 2017 @@ -1,2 +1,2 @@ **/Test*MR.java -+**/spark/*.java \ No newline at end of file +**/spark/*.java \ No newline at end of file Modified: pig/branches/branch-0.17/test/org/apache/pig/test/TestEvalPipeline2.java URL: http://svn.apache.org/viewvc/pig/branches/branch-0.17/test/org/apache/pig/test/TestEvalPipeline2.java?rev=1796824&r1=1796823&r2=1796824&view=diff == --- pig/branches/branch-0.17/test/org/apache/pig/test/TestEvalPipeline2.java (original) +++ pig/branches/branch-0.17/test/org/apache/pig/test/TestEvalPipeline2.java Tue May 30 09:51:27 2017 @@ -1597,9 +1597,8 @@ public class TestEvalPipeline2 { String[] expected = new String[] {"(1,A)", "(1,B)", "(2,C)"}; -Util.checkQueryOutputs(iter, expected, - org.apache.pig.newplan.logical.Util.translateSchema(pigServer.dumpSchema("flattened")), -Util.isSparkExecType(cluster.getExecType())); +Util.checkQueryOutputsAfterSortRecursive(iter, expected, + org.apache.pig.newplan.logical.Util.translateSchema(pigServer.dumpSchema("flattened"))); } // See PIG-2237
svn commit: r1797327 - in /pig/branches/branch-0.17: RELEASE_NOTES.txt build.xml src/docs/src/documentation/content/xdocs/perf.xml src/docs/src/documentation/content/xdocs/start.xml src/docs/src/docum
Author: szita Date: Fri Jun 2 08:29:36 2017 New Revision: 1797327 URL: http://svn.apache.org/viewvc?rev=1797327&view=rev Log: Updating release notes and version numbers for Pig 0.17 release Modified: pig/branches/branch-0.17/RELEASE_NOTES.txt pig/branches/branch-0.17/build.xml pig/branches/branch-0.17/src/docs/src/documentation/content/xdocs/perf.xml pig/branches/branch-0.17/src/docs/src/documentation/content/xdocs/start.xml pig/branches/branch-0.17/src/docs/src/documentation/content/xdocs/test.xml Modified: pig/branches/branch-0.17/RELEASE_NOTES.txt URL: http://svn.apache.org/viewvc/pig/branches/branch-0.17/RELEASE_NOTES.txt?rev=1797327&r1=1797326&r2=1797327&view=diff == --- pig/branches/branch-0.17/RELEASE_NOTES.txt (original) +++ pig/branches/branch-0.17/RELEASE_NOTES.txt Fri Jun 2 08:29:36 2017 @@ -1,26 +1,28 @@ -These notes are for Pig 0.3.0 release. +These notes are for Pig 0.17.0 release. Highlights == -The main focus of this release is multiquery support that allows to optimize -multiple queries within the same script that share a computation. +The highlights of this release includes Pig on Spark System Requirements === -1. Java 1.6.x or newer, preferably from Sun. Set JAVA_HOME to the root of your +1. Java 1.7.x or newer, preferably from Sun. Set JAVA_HOME to the root of your Java installation 2. Ant build tool: http://ant.apache.org - to build source only -3. Cygwin: http://www.cygwin.com/ - to run under Windows -4. This release is compatible with Hadoop 0.18.x releases +3. Run under Unix and Windows +4. This release is compatible with Hadoop 2.5+ releases. Note Hadoop 1.X is not + supported anymore. +5. For using Spark execution engine Spark 1.6.x is required. (Spark 2 support is + likely to come in the next release) Trying the Release == -1. Download pig-0.3.0.tar.gz -2. Unpack the file: tar -xzvf pig-0.3.0.tar.gz -3. Move into the installation directory: cd pig-0.3.0 +1. Download pig-0.17.0.tar.gz +2. Unpack the file: tar -xzvf pig-0.17.0.tar.gz +3. Move into the installation directory: cd pig-0.17.0 4. To run pig without Hadoop cluster, execute the command below. This will take you into an interactive shell called grunt that allows you to navigate the local file system and execute Pig commands against the local files @@ -36,7 +38,6 @@ export PIG_CLASSPATH=/hadoop/conf 7. To run unit tests run ant test 8. To build jar file with available user defined functions run commands below. -This currently only works with Java 1.6.x. cd contrib/piggybank/java ant 9. To build the tutorial: @@ -47,9 +48,6 @@ This currently only works with Java 1.6. Relevant Documentation == -Pig Language Manual(including Grunt commands): -http://wiki.apache.org/pig-data/attachments/FrontPage/attachments/plrm.htm -UDF Manual: http://wiki.apache.org/pig/UDFManual -Piggy Bank: http://wiki.apache.org/pig/PiggyBank -Pig Tutorial: http://wiki.apache.org/pig/PigTutorial -Pig Eclipse Plugin (PigPen): http://wiki.apache.org/pig/PigPen +Pig Documentation: http://pig.apache.org/docs/r0.17.0/ +Pig Wiki: https://cwiki.apache.org/confluence/display/PIG/Index +Pig Tutorial: https://cwiki.apache.org/confluence/display/PIG/PigTutorial \ No newline at end of file Modified: pig/branches/branch-0.17/build.xml URL: http://svn.apache.org/viewvc/pig/branches/branch-0.17/build.xml?rev=1797327&r1=1797326&r2=1797327&view=diff == --- pig/branches/branch-0.17/build.xml (original) +++ pig/branches/branch-0.17/build.xml Fri Jun 2 08:29:36 2017 @@ -42,7 +42,7 @@ - + Modified: pig/branches/branch-0.17/src/docs/src/documentation/content/xdocs/perf.xml URL: http://svn.apache.org/viewvc/pig/branches/branch-0.17/src/docs/src/documentation/content/xdocs/perf.xml?rev=1797327&r1=1797326&r2=1797327&view=diff == --- pig/branches/branch-0.17/src/docs/src/documentation/content/xdocs/perf.xml (original) +++ pig/branches/branch-0.17/src/docs/src/documentation/content/xdocs/perf.xml Fri Jun 2 08:29:36 2017 @@ -42,7 +42,7 @@ Tez session/container reuse -One downside of MapReduce is the startup cost for a job is very high. That hurts the performance especially for small job. Tez alleviate the problem by using session and container reuse, so it is not necessary to start an application master for every job, and start a JVM for every task. By default, session/container reuse is on and we usually shall not turn it off. JVM reuse might cause some side effect if static variable is used since static variable might live across different jobs. So if static variable is used in EvalFunc/LoadFunc/S
svn commit: r1797377 - /pig/branches/branch-0.17/build.xml
Author: szita Date: Fri Jun 2 11:57:07 2017 New Revision: 1797377 URL: http://svn.apache.org/viewvc?rev=1797377&view=rev Log: PIG-4923: Drop Hadoop 1.x support in Pig 0.17 (PIG-4923.srcReleaseFix.patch) Modified: pig/branches/branch-0.17/build.xml Modified: pig/branches/branch-0.17/build.xml URL: http://svn.apache.org/viewvc/pig/branches/branch-0.17/build.xml?rev=1797377&r1=1797376&r2=1797377&view=diff == --- pig/branches/branch-0.17/build.xml (original) +++ pig/branches/branch-0.17/build.xml Fri Jun 2 11:57:07 2017 @@ -1177,7 +1177,8 @@ - + +
svn commit: r1797379 - /pig/trunk/build.xml
Author: szita Date: Fri Jun 2 11:58:31 2017 New Revision: 1797379 URL: http://svn.apache.org/viewvc?rev=1797379&view=rev Log: PIG-4923: Drop Hadoop 1.x support in Pig 0.17 (PIG-4923.srcReleaseFix.patch) Modified: pig/trunk/build.xml Modified: pig/trunk/build.xml URL: http://svn.apache.org/viewvc/pig/trunk/build.xml?rev=1797379&r1=1797378&r2=1797379&view=diff == --- pig/trunk/build.xml (original) +++ pig/trunk/build.xml Fri Jun 2 11:58:31 2017 @@ -1177,7 +1177,8 @@ - + +
svn commit: r1797382 - /pig/branches/branch-0.17/KEYS
Author: szita Date: Fri Jun 2 12:37:52 2017 New Revision: 1797382 URL: http://svn.apache.org/viewvc?rev=1797382&view=rev Log: Adding PGP public key for szita Modified: pig/branches/branch-0.17/KEYS Modified: pig/branches/branch-0.17/KEYS URL: http://svn.apache.org/viewvc/pig/branches/branch-0.17/KEYS?rev=1797382&r1=1797381&r2=1797382&view=diff == --- pig/branches/branch-0.17/KEYS (original) +++ pig/branches/branch-0.17/KEYS Fri Jun 2 12:37:52 2017 @@ -146,3 +146,34 @@ I/0TpEVFgcGGTT2CSqLAFUlFdL6CzpwpGXhUxzSm URdA1jknkPjIIC1wBetzMrIbfcsoZS4SLbZm9u8C+2k= =u7gH -END PGP PUBLIC KEY BLOCK- + +-BEGIN PGP PUBLIC KEY BLOCK- + +mQENBFkwEKcBCAC6vnzhuNjdAJHhiVPVmkWSzBE3GTgdWsW66DzSm6Qt4l7A0Qx1 +ksrlOrXdwdamDKCJovaIuAhD3uZa2NhpKqDyARAyY9Cf5KhIq0SslXUUuBZF9nFi +dy/dH3v0n+rPsgAo3XeTB8LkBD7BiHKwE/Jbpdj/40ck3+BxgwMB8VenWtxgceaA +413tvZsiMEDxGDVD1E3weF2BNHzT/GctwEXIaZtHGw9aa/vIVF6wDYT0ZqvjkiUe +2840KHwkU8GTi7drWcvzMaGmYMQrctp3kxEIXTGaiJq19va5zOiMVZxyfXA62OKD +cW2xNbghd47cLI9/pqH+3JUY2NW8oeRiTmtlABEBAAG0HUFkYW0gU3ppdGEgPHN6 +aXRhQGFwYWNoZS5vcmc+iQFUBBMBCAA+FiEEJ0vt2OSrLbl8GwRrxQh4dCWJOjQF +AlkwEKcCGwMFCQPCZwAFCwkIBwIGFQgJCgsCBBYCAwECHgECF4AACgkQxQh4dCWJ +OjRlrwgAh71lQRTi21NPOP9KdVJGm9PrqhSKGiuwIUyssiZlh/gnfm+eypXP+i0f +gZQdtfH0FippIn/R8/FYu3UTs9kF+f+4Zx63DeXyeNlBHZMFZWovMvqUEuV/JtVj +Y6yo3fX1Hc73YsadeupIeJhQdfTtaDpttiI+DzqTCWILBDoRceuDxiS3xtCTW65m +XP5PKiHWEhVTLyntzfrCK15DsFqnMhgnUh3UJl3UhuigeR5XEYZFDGeo3oaC78ED +JmYsxDuFHNAsdZwZq5y8FbWUbUaLL3ewSgab6W04wYlBpuBPgOxR0hdiqGz/eJEB +6ibTWPbJtVZ/Fgit1f6DbjxWYJwTkrkBDQRZMBCnAQgA1Ogl3piWDTZBZO+HQgiv +vN638MgovDlHGvhl/cuYQHLhUanCVioMO2TrLq25lyZkxXEqvoQPfo2+ow0V4OLP +riaTzHa6j+9ZC3kU6tDE4oqeRL5lVN6ztyL9H8bGtYr/ntWWMgSuWWAxqwEMmm40 +Pdb4cGB6vYv1CL559y4OuG15cPgMA/L74p7J0lE3OdtjiYygs6vXrlPPVNAfrSsA +xRkOmSLC53JnowP/BEw08V/NFFAQrVfmmuH982tpsRiSDuGS31va0vRya+I0CFPP +FBr4Pze565vWK7x4x6tGF2iZtmGh5J7y8Jdd+sqteCrBF6/13tKWycbjrAoePShT +WQARAQABiQE8BBgBCAAmFiEEJ0vt2OSrLbl8GwRrxQh4dCWJOjQFAlkwEKcCGwwF +CQPCZwAACgkQxQh4dCWJOjRmewf/S92EVjH6Pcfb8lxJhCq0QeG+PNk783dbq8sL +cIEt6D0dlULFpDPXJQOspYoSQUNgXBwH1XfoeJa3S3UrhERtxJuZbaIpa0399fOb +blu4cWMEuVxu+SDeChTw5cGUt6vg+mtSEC2F9aHn2/Hj+LyjKKBvZJEEel2pb4en +kPEysGak/d5KgmfyG0SV+yy3b2bNwg+pZKzATixKecapP/jtdthMiQMs/2tnaEZR +TCgZdWfY6or6R0PDdxxizdxUfjHavxcv2WpLp5+/6vVfiyDgAxDj34UH09MHcUrT +krWBV+tZRAYc6OUsqQ0pySEW0DywV+10qyKQCmpq5pICso3/Uw== +=vUCm +-END PGP PUBLIC KEY BLOCK-
svn commit: r1797383 - /pig/trunk/KEYS
Author: szita Date: Fri Jun 2 12:38:46 2017 New Revision: 1797383 URL: http://svn.apache.org/viewvc?rev=1797383&view=rev Log: Adding PGP public key for szita Modified: pig/trunk/KEYS Modified: pig/trunk/KEYS URL: http://svn.apache.org/viewvc/pig/trunk/KEYS?rev=1797383&r1=1797382&r2=1797383&view=diff == --- pig/trunk/KEYS (original) +++ pig/trunk/KEYS Fri Jun 2 12:38:46 2017 @@ -146,3 +146,34 @@ I/0TpEVFgcGGTT2CSqLAFUlFdL6CzpwpGXhUxzSm URdA1jknkPjIIC1wBetzMrIbfcsoZS4SLbZm9u8C+2k= =u7gH -END PGP PUBLIC KEY BLOCK- + +-BEGIN PGP PUBLIC KEY BLOCK- + +mQENBFkwEKcBCAC6vnzhuNjdAJHhiVPVmkWSzBE3GTgdWsW66DzSm6Qt4l7A0Qx1 +ksrlOrXdwdamDKCJovaIuAhD3uZa2NhpKqDyARAyY9Cf5KhIq0SslXUUuBZF9nFi +dy/dH3v0n+rPsgAo3XeTB8LkBD7BiHKwE/Jbpdj/40ck3+BxgwMB8VenWtxgceaA +413tvZsiMEDxGDVD1E3weF2BNHzT/GctwEXIaZtHGw9aa/vIVF6wDYT0ZqvjkiUe +2840KHwkU8GTi7drWcvzMaGmYMQrctp3kxEIXTGaiJq19va5zOiMVZxyfXA62OKD +cW2xNbghd47cLI9/pqH+3JUY2NW8oeRiTmtlABEBAAG0HUFkYW0gU3ppdGEgPHN6 +aXRhQGFwYWNoZS5vcmc+iQFUBBMBCAA+FiEEJ0vt2OSrLbl8GwRrxQh4dCWJOjQF +AlkwEKcCGwMFCQPCZwAFCwkIBwIGFQgJCgsCBBYCAwECHgECF4AACgkQxQh4dCWJ +OjRlrwgAh71lQRTi21NPOP9KdVJGm9PrqhSKGiuwIUyssiZlh/gnfm+eypXP+i0f +gZQdtfH0FippIn/R8/FYu3UTs9kF+f+4Zx63DeXyeNlBHZMFZWovMvqUEuV/JtVj +Y6yo3fX1Hc73YsadeupIeJhQdfTtaDpttiI+DzqTCWILBDoRceuDxiS3xtCTW65m +XP5PKiHWEhVTLyntzfrCK15DsFqnMhgnUh3UJl3UhuigeR5XEYZFDGeo3oaC78ED +JmYsxDuFHNAsdZwZq5y8FbWUbUaLL3ewSgab6W04wYlBpuBPgOxR0hdiqGz/eJEB +6ibTWPbJtVZ/Fgit1f6DbjxWYJwTkrkBDQRZMBCnAQgA1Ogl3piWDTZBZO+HQgiv +vN638MgovDlHGvhl/cuYQHLhUanCVioMO2TrLq25lyZkxXEqvoQPfo2+ow0V4OLP +riaTzHa6j+9ZC3kU6tDE4oqeRL5lVN6ztyL9H8bGtYr/ntWWMgSuWWAxqwEMmm40 +Pdb4cGB6vYv1CL559y4OuG15cPgMA/L74p7J0lE3OdtjiYygs6vXrlPPVNAfrSsA +xRkOmSLC53JnowP/BEw08V/NFFAQrVfmmuH982tpsRiSDuGS31va0vRya+I0CFPP +FBr4Pze565vWK7x4x6tGF2iZtmGh5J7y8Jdd+sqteCrBF6/13tKWycbjrAoePShT +WQARAQABiQE8BBgBCAAmFiEEJ0vt2OSrLbl8GwRrxQh4dCWJOjQFAlkwEKcCGwwF +CQPCZwAACgkQxQh4dCWJOjRmewf/S92EVjH6Pcfb8lxJhCq0QeG+PNk783dbq8sL +cIEt6D0dlULFpDPXJQOspYoSQUNgXBwH1XfoeJa3S3UrhERtxJuZbaIpa0399fOb +blu4cWMEuVxu+SDeChTw5cGUt6vg+mtSEC2F9aHn2/Hj+LyjKKBvZJEEel2pb4en +kPEysGak/d5KgmfyG0SV+yy3b2bNwg+pZKzATixKecapP/jtdthMiQMs/2tnaEZR +TCgZdWfY6or6R0PDdxxizdxUfjHavxcv2WpLp5+/6vVfiyDgAxDj34UH09MHcUrT +krWBV+tZRAYc6OUsqQ0pySEW0DywV+10qyKQCmpq5pICso3/Uw== +=vUCm +-END PGP PUBLIC KEY BLOCK-
svn commit: r1797388 - /pig/tags/release-0.17.0-rc0/
Author: szita Date: Fri Jun 2 13:24:52 2017 New Revision: 1797388 URL: http://svn.apache.org/viewvc?rev=1797388&view=rev Log: Pig 0.17.0-rc0 release. Added: pig/tags/release-0.17.0-rc0/ (props changed) - copied from r1797387, pig/branches/branch-0.17/ Propchange: pig/tags/release-0.17.0-rc0/ -- --- svn:ignore (added) +++ svn:ignore Fri Jun 2 13:24:52 2017 @@ -0,0 +1,4 @@ + +dist +depend +pig.jar Propchange: pig/tags/release-0.17.0-rc0/ -- svn:mergeinfo = /hadoop/pig/branches/multiquery:741727-770826
svn commit: r1798663 - in /pig/tags: release-0.17.0-rc0/ release-0.17.0/
Author: szita Date: Wed Jun 14 11:08:58 2017 New Revision: 1798663 URL: http://svn.apache.org/viewvc?rev=1798663&view=rev Log: Pig 0.17.0 release. Added: pig/tags/release-0.17.0/ (props changed) - copied from r1798662, pig/tags/release-0.17.0-rc0/ Removed: pig/tags/release-0.17.0-rc0/ Propchange: pig/tags/release-0.17.0/ -- --- svn:ignore (added) +++ svn:ignore Wed Jun 14 11:08:58 2017 @@ -0,0 +1,4 @@ + +dist +depend +pig.jar Propchange: pig/tags/release-0.17.0/ -- svn:mergeinfo = /hadoop/pig/branches/multiquery:741727-770826
svn commit: r20044 - in /dev/pig: ./ pig-0.17.0/
Author: szita Date: Fri Jun 16 07:29:23 2017 New Revision: 20044 Log: Pig 0.17.0 release. Added: dev/pig/HEADER.html dev/pig/KEYS dev/pig/latest (with props) dev/pig/pig-0.17.0/README.txt dev/pig/pig-0.17.0/RELEASE_NOTES.txt dev/pig/pig-0.17.0/pig-0.17.0-src.tar.gz (with props) dev/pig/pig-0.17.0/pig-0.17.0-src.tar.gz.asc dev/pig/pig-0.17.0/pig-0.17.0-src.tar.gz.md5 dev/pig/pig-0.17.0/pig-0.17.0.tar.gz (with props) dev/pig/pig-0.17.0/pig-0.17.0.tar.gz.asc dev/pig/pig-0.17.0/pig-0.17.0.tar.gz.md5 Added: dev/pig/HEADER.html == --- dev/pig/HEADER.html (added) +++ dev/pig/HEADER.html Fri Jun 16 07:29:23 2017 @@ -0,0 +1,7 @@ +http://pig.apache.org/";>Pig Releases + +Please make sure you're downloading from http://www.apache.org/dyn/closer.cgi/pig/";>a nearby +mirror site, not from www.apache.org. + +Older releases are available from the http://archive.apache.org/dist/pig/";>archives. Added: dev/pig/KEYS == --- dev/pig/KEYS (added) +++ dev/pig/KEYS Fri Jun 16 07:29:23 2017 @@ -0,0 +1,211 @@ +pub 1024D/06687D96 2008-07-14 Olga Natkovich +sig 3 06687D96 2008-07-14 Olga Natkovich +sub 1024g/B6456039 2008-07-14 +sig 06687D96 2008-07-14 Olga Natkovich + +-BEGIN PGP PUBLIC KEY BLOCK- +Version: GnuPG v1.2.6 (GNU/Linux) + +mQGiBEz/+9QRBACtgXbyja8lZlYinW6pGW+a7A8jhaEUS1FN7IGTIqJtqwcN7pS3 +jL9x/8DpuMzI2H7/uEElqa3vUqwGsHZy1mK8xJ2IWfvDJkx8KKDBRFc/5gG4KrPz +zhEtqiYy+UYAyYTIa31G0Yi0tUtl7w4NMrk4SbAXvidWOLlc7fPbnzNwewCg29NA +d+DhtjMsGISw+9UdXskuK5kD/A2rRkkUGSXMMG3F6t8UxoEFS2hp6LFpjhPpp7bp +MEx4Dd3SaWYMgv2kX2Lw4GKY/cWMCBuRvSkoGvbUrLbyFk3eUFDxBthc9VudFmga +JpCXT/hIR1xJ8cqBJmfCc6K/z9U5kjyFfxm4lX/Y+iUL5f3QTvIAmVlawnZARAFu +H2aPA/4/s+qQDulGGYXRNtNoHzppzfO24mPPvjgfDJ4jupZwOISm3/Jc4zY/dGxi +x3/61cQIrZXJ+OBlsfIfPnz5zh1e+I4y1XzozZlvBPUaVoMKUZHUXyQsi9UlgvIz +QdU7iK0MpYEpXC+DC1KQtkXwNSdx63+a40wyh+vW5QZ583Fmz7QkT2xnYSBOYXRr +b3ZpY2ggPG9sZ2FuQHlhaG9vLWluYy5jb20+iF4EExECAB4FAkz/+9QCGwMGCwkI +BwMCAxUCAwMWAgECHgECF4AACgkQd9QuHGc7InkMSQCeKR0Q2Qst2zc6t/JA7T6y +X/ukBVIAnR5de4YI3+LsToqZJFUvPDg4uG5TuQENBEz/+9YQBADcTndyZ0Vmh46R +UjqNRmv27rcsH62fvLIE7vUE2AQaDE1hEMwawjZXhtFa6Bzh6WWRXqkdNUWnx0E8 +HIHhI6jAOJ2Zgc5zickkFOC/zLuipogCt0n1saK0ZXyKOLWOEX/ggxb9QUcHjpT2 +Rih5Sf95XQ8q5jI6kPvv6YwX0rwMhwADBQQAqSbFpnQIb+V1xo6cwqBpjizVSadM +M5lXmsVngW+QMRp5hrw2bGfzkyDo2eowItUVyleCzTl7npFFFHKtNDS2g40QOB3z +YzKXLa+dsst2BUzbZryXpHUZLQ0IynsY14m3CTjuhoBe4awQl2bZrl6Yuvad/RNy +eJYZ4TCJB3/0BnCISQQYEQIACQUCTP/71gIbDAAKCRB31C4cZzsieTujAKCfvkXU +b6o3+hGaiQ91NNa06K3eOQCffz+zGQJQ4Ab18AVUfjq69TRHEHA= +=0PGJ +-END PGP PUBLIC KEY BLOCK- + +-BEGIN PGP PUBLIC KEY BLOCK- +Version: GnuPG v1.2.6 (GNU/Linux) + +mQGiBEnNUFYRBACdB2/nTuzObFu/B6dqTo301jF0BiD0J4Nl1qz9cP5IwrJArCzw +BQGSzN5UX0gKTpi9FWzXPWrc/On3jAk65q9FgkUTbKQqwtRK2UUjD7GpsMiryIBx +3+f1d8MEkOsFVg+bOzKqIY2VbvksM3GElCusKaWTZpgdsY27UkdHZIj/3wCg4Qq4 +ftOTc25XTEPNgAiAE8kO5qcD/27pjtQu3nzA47VdAvWGNAiNjdwsan/bWUUAvl81 +XmQ0GHlo9D0iyOd2GX9WHuguZ4/tf203f0oOHmgvYFllsF6OttBTIfd57HXyrdQd +VuI8JuKxqM0FYaQkDKNseJZH1X80d604IOWWPVcxPs0Aqdcw1F9e6e5XUPqkBXye +6IKeA/9YxJpY5QFb4EP/AyyGjIj1CsmukwBHJ+fNKribdyY5YgaX0THAjXlYLRmC +HZugmtVIF71EiDutHfq9RPiLP1O13nh3zapo9MD9CrqJqPE7SavGSQ+l1Tnedp6Y +UGWffmL7e0XBCSvB3QKf+ZxOLIK2s72Wl5Mwd6gfBEdcBxBqsLQtQWxhbiBHYXRl +cyAoTm8gY29tbWVudCkgPGdhdGVzQHlhaG9vLWluYy5jb20+iF4EExECAB4FAknN +UFYCGwMGCwkIBwMCAxUCAwMWAgECHgECF4AACgkQiL0/VwTZuDLWUACfWFyJwm6D +oAW88ITpvypdOtRakYsAoM22YGm4jla+y9lryous9eIHNu1VuQENBEnNUFgQBADK +2OL+zY/4V80Bans/v0sRf8cUzB82eW84vMgxRmMS+Kwty8CBwEV4sgWbv0vJwifo +9ZhlMLjqmBwGTR3wIXqtRQAyk8rLYod31KWFyt64vZaubbxZNDxiM5CMFO+q3xjL +hbsMnIC/QliKT2d0K2radTp+jNz7lOkmSvZ9iQ7/0wADBgP/TWsRYmViLcUcOa1N +4Cij8Y3c1tD2qYI5b9eDY5GiOeECss0CudJN/cIvDNstLtLa4JbX5INRpskVTVsx +Duermrsj5/tONUb9GwBnhUuzA0GW1WCkpZXJG2Z2iwKcJ8wQ5KaPj9TNdahF5h7q +outJyNeVe9TC2PytS0tfCzd2lnOISQQYEQIACQUCSc1QWAIbDAAKCRCIvT9XBNm4 +MrvDAJ4ySDj+5CSCB+DQ8PotTK5oX7fDcQCfc0d5dQrqc2ul8/4WDB/LmNcXuhI= +=B5eJ +-END PGP PUBLIC KEY BLOCK- +-BEGIN PGP PUBLIC KEY BLOCK- +Version: GnuPG v1.2.6 (GNU/Linux) + +mQGiBEvhqBsRBACLI1GZhvYhVqep2oyGFPKuB/xaK55TYMoWQT60dQ1iUMGgKXww +yfsQ0Jd2sn/VJ4b1WmO7jfZ+5+ijHnk22j9ogVLJnFt/FAtYNonwt3lA5El6kbsD +gU/juBRLXVnSo9tu9a5cy6l02iT3Ndq5yGuV6VBJnGEqX+D7UtS9oCBLswCgwMR3 +UWOhvo0dUQdZ7sIuko40GxsEAIRIpdh479C2Q6fGlp2FdqoRXzEUt13a3S0le/Pf +1uIZLs8Ns9L1nrCT66bq//4qRweN8PmEaFT6UZk3tK29a46Vbc+fSbmGRYtAtmiU +1P8jUTSKUZBb4MNAHq9MfICd9bxfMyBupUQhH2mlZqKafLg48D8G1jaOECnXO4YV +aR0WA/9yJ95sfC4L6MN3iIaHpQcuLStGLyIGqWp44t2ok0AhY5CJm6YaKALV0Aki ++uiGNirMLdZTSNiDZvUu2XxShzPkzTW87Xh5QkkeNB/SeSTvox2O/5TZchCr+6qF +rOd9qoxrfDzJeZkc3xY7G/bil40TWbOTbOPmK039vMI8PXe5VrQdRGFuaWVsIERh +aSA8ZGFpanljQGdtYWlsLmNvbT6IYAQTEQIAIAUCS+GoGwIbAwYLCQgHAwIEFQII +AwQWAgMBAh4BAheAAAoJEI3MXbca8D1y8/8AoLl0L3urQB584t+6skLTWIiFK1gq +AJ9Meemm8MUUiTMwq20jCra3pcM1jbkCDQRL4ahDEAg
svn commit: r1799357 - in /pig/site: author/src/documentation/content/xdocs/ publish/ publish/docs/r0.17.0/ publish/docs/r0.17.0/api/ publish/docs/r0.17.0/api/org/ publish/docs/r0.17.0/api/org/apache/
Author: szita Date: Tue Jun 20 16:52:49 2017 New Revision: 1799357 URL: http://svn.apache.org/viewvc?rev=1799357&view=rev Log: Pig release 0.17.0 [This commit notification would consist of 921 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.]
svn commit: r1799414 [1/2] - in /pig/trunk: build.xml src/docs/jdiff/pig_0.16.0.xml src/docs/jdiff/pig_0.17.0.xml
Author: szita Date: Wed Jun 21 09:29:16 2017 New Revision: 1799414 URL: http://svn.apache.org/viewvc?rev=1799414&view=rev Log: Jdiff change for 0.17.0 Added: pig/trunk/src/docs/jdiff/pig_0.17.0.xml Removed: pig/trunk/src/docs/jdiff/pig_0.16.0.xml Modified: pig/trunk/build.xml Modified: pig/trunk/build.xml URL: http://svn.apache.org/viewvc/pig/trunk/build.xml?rev=1799414&r1=1799413&r2=1799414&view=diff == --- pig/trunk/build.xml (original) +++ pig/trunk/build.xml Wed Jun 21 09:29:16 2017 @@ -286,7 +286,7 @@ - + http://hadoop.apache.org/${name}/docs/r${jdiff.stable}/api/"/>
svn commit: r1799414 [2/2] - in /pig/trunk: build.xml src/docs/jdiff/pig_0.16.0.xml src/docs/jdiff/pig_0.17.0.xml
Added: pig/trunk/src/docs/jdiff/pig_0.17.0.xml URL: http://svn.apache.org/viewvc/pig/trunk/src/docs/jdiff/pig_0.17.0.xml?rev=1799414&view=auto == --- pig/trunk/src/docs/jdiff/pig_0.17.0.xml (added) +++ pig/trunk/src/docs/jdiff/pig_0.17.0.xml Wed Jun 21 09:29:16 2017 @@ -0,0 +1,93183 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
svn commit: r1799421 - in /pig/trunk: CHANGES.txt build.xml
Author: szita Date: Wed Jun 21 09:59:36 2017 New Revision: 1799421 URL: http://svn.apache.org/viewvc?rev=1799421&view=rev Log: PIG-5262: Fix jdiff related issues: fail build upon error, correct xml character escaping (szita) Modified: pig/trunk/CHANGES.txt pig/trunk/build.xml Modified: pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1799421&r1=1799420&r2=1799421&view=diff == --- pig/trunk/CHANGES.txt (original) +++ pig/trunk/CHANGES.txt Wed Jun 21 09:59:36 2017 @@ -32,6 +32,8 @@ OPTIMIZATIONS BUG FIXES +PIG-5262: Fix jdiff related issues: fail build upon error, correct xml character escaping (szita) + PIG-5225: Several unit tests are not annotated with @Test (nkollar via rohini) Modified: pig/trunk/build.xml URL: http://svn.apache.org/viewvc/pig/trunk/build.xml?rev=1799421&r1=1799420&r2=1799421&view=diff == --- pig/trunk/build.xml (original) +++ pig/trunk/build.xml Wed Jun 21 09:59:36 2017 @@ -1574,6 +1574,8 @@ + + @@ -1601,6 +1603,10 @@ + + + +
svn commit: r1799423 - in /pig/branches/branch-0.17: CHANGES.txt build.xml
Author: szita Date: Wed Jun 21 10:09:51 2017 New Revision: 1799423 URL: http://svn.apache.org/viewvc?rev=1799423&view=rev Log: PIG-5262: Fix jdiff related issues: fail build upon error, correct xml character escaping (szita) Modified: pig/branches/branch-0.17/CHANGES.txt pig/branches/branch-0.17/build.xml Modified: pig/branches/branch-0.17/CHANGES.txt URL: http://svn.apache.org/viewvc/pig/branches/branch-0.17/CHANGES.txt?rev=1799423&r1=1799422&r2=1799423&view=diff == --- pig/branches/branch-0.17/CHANGES.txt (original) +++ pig/branches/branch-0.17/CHANGES.txt Wed Jun 21 10:09:51 2017 @@ -111,6 +111,8 @@ OPTIMIZATIONS Â BUG FIXES +PIG-5262: Fix jdiff related issues: fail build upon error, correct xml character escaping (szita) + PIG-5248: Fix TestCombiner#testGroupByLimit after PigOnSpark merge (rohini) PIG-5245: TestGrunt.testStopOnFailure is flaky (rohini) Modified: pig/branches/branch-0.17/build.xml URL: http://svn.apache.org/viewvc/pig/branches/branch-0.17/build.xml?rev=1799423&r1=1799422&r2=1799423&view=diff == --- pig/branches/branch-0.17/build.xml (original) +++ pig/branches/branch-0.17/build.xml Wed Jun 21 10:09:51 2017 @@ -1574,6 +1574,8 @@ + + @@ -1601,6 +1603,10 @@ + + + +
svn commit: r1801128 - in /pig/trunk: CHANGES.txt src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReduceLauncher.java src/org/apache/pig/tools/pigstats/mapreduce/MRJobStats.java
Author: szita Date: Fri Jul 7 08:38:49 2017 New Revision: 1801128 URL: http://svn.apache.org/viewvc?rev=1801128&view=rev Log: PIG-5269: MapReduceLauncher and MRJobStats imports org.python.google.common.collect.Lists instead of org.google.common.collect.Lists (nkollar via szita) Modified: pig/trunk/CHANGES.txt pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReduceLauncher.java pig/trunk/src/org/apache/pig/tools/pigstats/mapreduce/MRJobStats.java Modified: pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1801128&r1=1801127&r2=1801128&view=diff == --- pig/trunk/CHANGES.txt (original) +++ pig/trunk/CHANGES.txt Fri Jul 7 08:38:49 2017 @@ -24,6 +24,8 @@ INCOMPATIBLE CHANGES IMPROVEMENTS +PIG-5269: MapReduceLauncher and MRJobStats imports org.python.google.common.collect.Lists instead of org.google.common.collect.Lists (nkollar via szita) + PIG-4700: Enable progress reporting for Tasks in Tez (satishsaley via rohini) PIG-5251: Bump joda-time to 2.9.9 (dbist13 via rohini) Modified: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReduceLauncher.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReduceLauncher.java?rev=1801128&r1=1801127&r2=1801128&view=diff == --- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReduceLauncher.java (original) +++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReduceLauncher.java Fri Jul 7 08:38:49 2017 @@ -87,9 +87,6 @@ import org.apache.pig.tools.pigstats.map import org.apache.pig.tools.pigstats.mapreduce.MRPigStatsUtil; import org.apache.pig.tools.pigstats.mapreduce.MRScriptState; -import org.python.google.common.collect.Lists; - - /** * Main class that launches pig for Map Reduce * Modified: pig/trunk/src/org/apache/pig/tools/pigstats/mapreduce/MRJobStats.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/tools/pigstats/mapreduce/MRJobStats.java?rev=1801128&r1=1801127&r2=1801128&view=diff == --- pig/trunk/src/org/apache/pig/tools/pigstats/mapreduce/MRJobStats.java (original) +++ pig/trunk/src/org/apache/pig/tools/pigstats/mapreduce/MRJobStats.java Fri Jul 7 08:38:49 2017 @@ -26,6 +26,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import com.google.common.collect.Lists; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -56,9 +57,6 @@ import org.apache.pig.tools.pigstats.Out import org.apache.pig.tools.pigstats.PigStats.JobGraph; import org.apache.pig.tools.pigstats.PigStats.JobGraphPrinter; -import org.python.google.common.collect.Lists; - - /** * This class encapsulates the runtime statistics of a MapReduce job. * Job statistics is collected when job is completed.
svn commit: r1801161 - in /pig/trunk: ./ test/org/apache/pig/spark/ test/org/apache/pig/test/utils/dotGraph/
Author: szita Date: Fri Jul 7 13:38:22 2017 New Revision: 1801161 URL: http://svn.apache.org/viewvc?rev=1801161&view=rev Log: PIG-5237: Fix DOT file parsing to enable DOT-based physical plan testing (YaShock via szita) Modified: pig/trunk/CHANGES.txt pig/trunk/test/org/apache/pig/spark/TestSparkCompiler.java pig/trunk/test/org/apache/pig/test/utils/dotGraph/DOTParser.jjt pig/trunk/test/org/apache/pig/test/utils/dotGraph/DotEdge.java pig/trunk/test/org/apache/pig/test/utils/dotGraph/DotGraph.java pig/trunk/test/org/apache/pig/test/utils/dotGraph/DotGraphReader.java pig/trunk/test/org/apache/pig/test/utils/dotGraph/DotNode.java Modified: pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1801161&r1=1801160&r2=1801161&view=diff == --- pig/trunk/CHANGES.txt (original) +++ pig/trunk/CHANGES.txt Fri Jul 7 13:38:22 2017 @@ -24,6 +24,8 @@ INCOMPATIBLE CHANGES IMPROVEMENTS +PIG-5237: Fix DOT file parsing to enable DOT-based physical plan testing (YaShock via szita) + PIG-5269: MapReduceLauncher and MRJobStats imports org.python.google.common.collect.Lists instead of org.google.common.collect.Lists (nkollar via szita) PIG-4700: Enable progress reporting for Tasks in Tez (satishsaley via rohini) Modified: pig/trunk/test/org/apache/pig/spark/TestSparkCompiler.java URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/spark/TestSparkCompiler.java?rev=1801161&r1=1801160&r2=1801161&view=diff == --- pig/trunk/test/org/apache/pig/spark/TestSparkCompiler.java (original) +++ pig/trunk/test/org/apache/pig/spark/TestSparkCompiler.java Fri Jul 7 13:38:22 2017 @@ -24,7 +24,6 @@ import java.io.FileOutputStream; import java.io.PrintStream; import java.util.Properties; import java.util.Random; - import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.TransformerException; @@ -44,13 +43,15 @@ import org.apache.pig.impl.plan.NodeIdGe import org.apache.pig.impl.plan.VisitorException; import org.apache.pig.test.Util; import org.apache.pig.test.utils.TestHelper; +import org.apache.pig.test.utils.dotGraph.DotGraph; +import org.apache.pig.test.utils.dotGraph.DotGraphReader; import org.junit.AfterClass; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; -import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; /** * Test cases to test the SparkCompiler. VERY IMPORTANT NOTE: The tests here @@ -74,9 +75,8 @@ public class TestSparkCompiler { public void doPrint(PrintStream ps, SparkOperPlan plan) throws VisitorException, ParserConfigurationException, TransformerException { switch (this) { case DOT: -throw new RuntimeException("Testing in DOT format not supported yet"); -//(new DotSparkPrinter(plan, ps)).dump(); -//break; +(new DotSparkPrinter(plan, ps)).dump(); +break; case XML: XMLSparkPrinter printer = new XMLSparkPrinter(ps, plan); printer.visit(); @@ -88,6 +88,19 @@ public class TestSparkCompiler { break; } } + +public boolean compare(String goldenPlan, String compiledPlan) { +switch (this) { +case DOT: +DotGraph a = DotGraphReader.load(goldenPlan); +DotGraph b = DotGraphReader.load(compiledPlan); +return a.isomorphic(b); +case XML: +case TEXT: +default: +return TestHelper.sortUDFs(Util.removeSignature(goldenPlan)).equals(TestHelper.sortUDFs(Util.removeSignature(compiledPlan))); +} +} } // If for some reason, the golden files need to be regenerated, set this to @@ -135,8 +148,7 @@ public class TestSparkCompiler { run(query, "test/org/apache/pig/test/data/GoldenFiles/spark/SPARKC-LoadStore-1-text.gld", PlanPrinter.TEXT); run(query, "test/org/apache/pig/test/data/GoldenFiles/spark/SPARKC-LoadStore-1-xml.gld", PlanPrinter.XML); -//TODO: enable this when DOT file comparison is supported -//run(query, "test/org/apache/pig/test/data/GoldenFiles/spark/SPARKC-LoadStore-1-dot.gld", PlanPrinter.DOT); +run(query, "test/org/apache/pig/test/data/GoldenFiles/spark/SPARKC-LoadStore-1-dot.gld", PlanPrinter.DOT); } private void run(String query, String expectedFile, PlanPrinter planPrinter) throws Exception { @@ -174,8 +186,8 @@ public class TestSparkCompiler { String goldenPlanClean = Util.standardizeNewline(goldenPlan).tri
svn commit: r1802674 - /pig/trunk/CHANGES.txt
Author: szita Date: Sat Jul 22 11:18:59 2017 New Revision: 1802674 URL: http://svn.apache.org/viewvc?rev=1802674&view=rev Log: PIG-5157: Adding missing entry in CHANGES.txt Modified: pig/trunk/CHANGES.txt Modified: pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1802674&r1=1802673&r2=1802674&view=diff == --- pig/trunk/CHANGES.txt (original) +++ pig/trunk/CHANGES.txt Sat Jul 22 11:18:59 2017 @@ -24,6 +24,8 @@ INCOMPATIBLE CHANGES IMPROVEMENTS +PIG-5157: Upgrade to Spark 2.0 (nkollar via liyunzhang) + PIG-5237: Fix DOT file parsing to enable DOT-based physical plan testing (YaShock via szita) PIG-5269: MapReduceLauncher and MRJobStats imports org.python.google.common.collect.Lists instead of org.google.common.collect.Lists (nkollar via szita)
svn commit: r1802675 - in /pig/trunk: ./ src/org/apache/pig/backend/hadoop/executionengine/spark/ src/org/apache/pig/backend/hadoop/executionengine/spark/converter/
Author: szita Date: Sat Jul 22 11:28:02 2017 New Revision: 1802675 URL: http://svn.apache.org/viewvc?rev=1802675&view=rev Log: PIG-5274: TestEvalPipelineLocal#testSetLocationCalledInFE is failing in spark mode after PIG-5157 (nkollar via szita) Modified: pig/trunk/CHANGES.txt pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkShims.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/SkewedJoinConverter.java Modified: pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1802675&r1=1802674&r2=1802675&view=diff == --- pig/trunk/CHANGES.txt (original) +++ pig/trunk/CHANGES.txt Sat Jul 22 11:28:02 2017 @@ -38,6 +38,8 @@ OPTIMIZATIONS BUG FIXES +PIG-5274: TestEvalPipelineLocal#testSetLocationCalledInFE is failing in spark mode after PIG-5157 (nkollar via szita) + PIG-4767: Partition filter not pushed down when filter clause references variable from another load path (knoguchi) PIG-5270: Typo in Pig Logging (FromAlaska49 via daijy) Modified: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java?rev=1802675&r1=1802674&r2=1802675&view=diff == --- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java (original) +++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java Sat Jul 22 11:28:02 2017 @@ -175,7 +175,6 @@ public class SparkLauncher extends Launc SparkPigStats sparkStats = (SparkPigStats) pigContext .getExecutionEngine().instantiatePigStats(); sparkStats.initialize(pigContext, sparkplan, jobConf); -UDFContext.getUDFContext().addJobConf(jobConf); PigStats.start(sparkStats); startSparkIfNeeded(jobConf, pigContext); Modified: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkShims.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkShims.java?rev=1802675&r1=1802674&r2=1802675&view=diff == --- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkShims.java (original) +++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkShims.java Sat Jul 22 11:28:02 2017 @@ -60,7 +60,12 @@ public abstract class SparkShims impleme public static SparkShims getInstance() { if (sparkShims == null) { -String sparkVersion = UDFContext.getUDFContext().getJobConf().get(SPARK_VERSION, ""); +String sparkVersion; +if (UDFContext.getUDFContext().isFrontend()) { +sparkVersion = SparkContext.getOrCreate().version(); +} else { +sparkVersion = UDFContext.getUDFContext().getJobConf().get(SPARK_VERSION, ""); +} LOG.info("Initializing SparkShims for Spark version: " + sparkVersion); String sparkMajorVersion = getSparkMajorVersion(sparkVersion); try { Modified: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/SkewedJoinConverter.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/SkewedJoinConverter.java?rev=1802675&r1=1802674&r2=1802675&view=diff == --- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/SkewedJoinConverter.java (original) +++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/SkewedJoinConverter.java Sat Jul 22 11:28:02 2017 @@ -232,8 +232,8 @@ public class SkewedJoinConverter impleme } return result; -} catch (Exception e) { -log.warn(e); +} catch (ExecException e) { +log.error(e); return null; } }
svn commit: r1802676 - in /pig/trunk: ./ src/org/apache/pig/ src/org/apache/pig/impl/io/ test/org/apache/pig/data/ test/org/apache/pig/test/
Author: szita Date: Sat Jul 22 11:38:47 2017 New Revision: 1802676 URL: http://svn.apache.org/viewvc?rev=1802676&view=rev Log: PIG-3655: BinStorage and InterStorage approach to record markers is broken (szita) Modified: pig/trunk/CHANGES.txt pig/trunk/src/org/apache/pig/PigConfiguration.java pig/trunk/src/org/apache/pig/impl/io/InterRecordReader.java pig/trunk/src/org/apache/pig/impl/io/InterRecordWriter.java pig/trunk/src/org/apache/pig/impl/io/InterStorage.java pig/trunk/test/org/apache/pig/data/TestSchemaTuple.java pig/trunk/test/org/apache/pig/test/TestBinInterSedes.java pig/trunk/test/org/apache/pig/test/TestFRJoin2.java Modified: pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1802676&r1=1802675&r2=1802676&view=diff == --- pig/trunk/CHANGES.txt (original) +++ pig/trunk/CHANGES.txt Sat Jul 22 11:38:47 2017 @@ -38,6 +38,8 @@ OPTIMIZATIONS BUG FIXES +PIG-3655: BinStorage and InterStorage approach to record markers is broken (szita) + PIG-5274: TestEvalPipelineLocal#testSetLocationCalledInFE is failing in spark mode after PIG-5157 (nkollar via szita) PIG-4767: Partition filter not pushed down when filter clause references variable from another load path (knoguchi) Modified: pig/trunk/src/org/apache/pig/PigConfiguration.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/PigConfiguration.java?rev=1802676&r1=1802675&r2=1802676&view=diff == --- pig/trunk/src/org/apache/pig/PigConfiguration.java (original) +++ pig/trunk/src/org/apache/pig/PigConfiguration.java Sat Jul 22 11:38:47 2017 @@ -40,6 +40,24 @@ public class PigConfiguration { */ public static final String PIG_AUTO_LOCAL_INPUT_MAXBYTES = "pig.auto.local.input.maxbytes"; + +/** + * Sets the length of record markers in binary files produces by Pig between jobs + * The longer the byte sequence means less chance of collision with actual data, + * shorter sequence means less overhead + */ +public static final String PIG_INTERSTORAGE_SYNCMARKER_SIZE = "pig.interstorage.syncmarker.size"; +public static final int PIG_INTERSTORAGE_SYNCMARKER_SIZE_MAX = 16; +public static final int PIG_INTERSTORAGE_SYNCMARKER_SIZE_DEFAULT = 10; +public static final int PIG_INTERSTORAGE_SYNCMARKER_SIZE_MIN = 2; + +/** + * Defines the interval (in bytes) when a sync marker should be written into the binary file + */ +public static final String PIG_INTERSTORAGE_SYNCMARKER_INTERVAL = "pig.interstorage.syncmarker.interval"; +public static final long PIG_INTERSTORAGE_SYNCMARKER_INTERVAL_DEFAULT = 2000; + + /** * Boolean value used to enable or disable fetching without a mapreduce job for DUMP. True by default */ Modified: pig/trunk/src/org/apache/pig/impl/io/InterRecordReader.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/impl/io/InterRecordReader.java?rev=1802676&r1=1802675&r2=1802676&view=diff == --- pig/trunk/src/org/apache/pig/impl/io/InterRecordReader.java (original) +++ pig/trunk/src/org/apache/pig/impl/io/InterRecordReader.java Sat Jul 22 11:38:47 2017 @@ -42,16 +42,23 @@ import org.apache.pig.data.Tuple; public class InterRecordReader extends RecordReader { private long start; - private long pos; + private long lastDataPos; private long end; private BufferedPositionedInputStream in; private Tuple value = null; - public static final int RECORD_1 = 0x01; - public static final int RECORD_2 = 0x02; - public static final int RECORD_3 = 0x03; private DataInputStream inData = null; private static InterSedes sedes = InterSedesFactory.getInterSedesInstance(); + private byte[] syncMarker; + private long lastSyncPos = -1; + private long syncMarkerInterval; + private long dataBytesSeen = 0; + + public InterRecordReader(int syncMarkerLength, long syncMarkerInterval) { + this.syncMarker = new byte[syncMarkerLength]; + this.syncMarkerInterval = syncMarkerInterval; + } + public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; @@ -60,63 +67,131 @@ public class InterRecordReader extends R end = start + split.getLength(); final Path file = split.getPath(); -// open the file and seek to the start of the split +// open the file FileSystem fs = file.getFileSystem(job); FSDataInputStream fileIn = fs.open(split.getPath()); -if (start != 0) { -fileIn.seek(start); + +// read the magic byte sequence serving as record marker but only if the file is not empty +if (!(start == 0 &&a
svn commit: r1803628 - in /pig/trunk: CHANGES.txt build.xml
Author: szita Date: Tue Aug 1 08:57:19 2017 New Revision: 1803628 URL: http://svn.apache.org/viewvc?rev=1803628&view=rev Log: building "jar" should not call "clean" (nkollar via szita) Modified: pig/trunk/CHANGES.txt pig/trunk/build.xml Modified: pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1803628&r1=1803627&r2=1803628&view=diff == --- pig/trunk/CHANGES.txt (original) +++ pig/trunk/CHANGES.txt Tue Aug 1 08:57:19 2017 @@ -40,6 +40,8 @@ OPTIMIZATIONS BUG FIXES +PIG-5276: building "jar" should not call "clean" (nkollar via szita) + PIG-5246: Modify bin/pig about SPARK_HOME, SPARK_ASSEMBLY_JAR after upgrading spark to 2 (liyunzhang) PIG-3655: BinStorage and InterStorage approach to record markers is broken (szita) Modified: pig/trunk/build.xml URL: http://svn.apache.org/viewvc/pig/trunk/build.xml?rev=1803628&r1=1803627&r2=1803628&view=diff == --- pig/trunk/build.xml (original) +++ pig/trunk/build.xml Tue Aug 1 08:57:19 2017 @@ -454,6 +454,10 @@ + + + + @@ -798,14 +802,14 @@ Compiling against Spark 2 - + Compiling against Spark 1 - +
svn commit: r1804035 - in /pig/trunk: CHANGES.txt src/org/apache/pig/impl/io/InterRecordReader.java
Author: szita Date: Thu Aug 3 18:51:27 2017 New Revision: 1804035 URL: http://svn.apache.org/viewvc?rev=1804035&view=rev Log: PIG-5284: Fix flakyness introduced by PIG-3655 (szita) Modified: pig/trunk/CHANGES.txt pig/trunk/src/org/apache/pig/impl/io/InterRecordReader.java Modified: pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1804035&r1=1804034&r2=1804035&view=diff == --- pig/trunk/CHANGES.txt (original) +++ pig/trunk/CHANGES.txt Thu Aug 3 18:51:27 2017 @@ -40,6 +40,8 @@ OPTIMIZATIONS BUG FIXES +PIG-5284: Fix flakyness introduced by PIG-3655 (szita) + PIG-5278: Unit test failures because of PIG-5264 (nkollar via rohini) PIG-5276: building "jar" should not call "clean" (nkollar via szita) Modified: pig/trunk/src/org/apache/pig/impl/io/InterRecordReader.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/impl/io/InterRecordReader.java?rev=1804035&r1=1804034&r2=1804035&view=diff == --- pig/trunk/src/org/apache/pig/impl/io/InterRecordReader.java (original) +++ pig/trunk/src/org/apache/pig/impl/io/InterRecordReader.java Thu Aug 3 18:51:27 2017 @@ -90,7 +90,7 @@ public class InterRecordReader extends R * @throws IOException */ private boolean skipUntilMarkerOrSplitEndOrEOF() throws IOException { - int b = 0; + int b = Integer.MIN_VALUE; outer:while (b != -1) { if (b != syncMarker[0]) { @@ -110,6 +110,10 @@ outer:while (b != -1) { b = in.read(); if (b == -1) return false; if ((byte) b != syncMarker[i]) { + if (in.getPosition() > end) { + //Again we should not read past the split end, only if at least the first byte of marker was seen before it + return false; + } continue outer; } ++i;
svn commit: r1804497 - in /pig/trunk: CHANGES.txt src/org/apache/pig/backend/hadoop/executionengine/spark/SparkPigSplit.java
Author: szita Date: Wed Aug 9 08:41:45 2017 New Revision: 1804497 URL: http://svn.apache.org/viewvc?rev=1804497&view=rev Log: PIG-5283: Configuration is not passed to SparkPigSplits on the backend (szita) Modified: pig/trunk/CHANGES.txt pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkPigSplit.java Modified: pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1804497&r1=1804496&r2=1804497&view=diff == --- pig/trunk/CHANGES.txt (original) +++ pig/trunk/CHANGES.txt Wed Aug 9 08:41:45 2017 @@ -40,6 +40,8 @@ OPTIMIZATIONS BUG FIXES +PIG-5283: Configuration is not passed to SparkPigSplits on the backend (szita) + PIG-5284: Fix flakyness introduced by PIG-3655 (szita) PIG-5278: Unit test failures because of PIG-5264 (nkollar via rohini) Modified: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkPigSplit.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkPigSplit.java?rev=1804497&r1=1804496&r2=1804497&view=diff == --- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkPigSplit.java (original) +++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkPigSplit.java Wed Aug 9 08:41:45 2017 @@ -22,16 +22,21 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import java.io.Serializable; +import java.util.List; import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.SplitLocationInfo; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.lib.input.FileSplit; +import org.apache.pig.PigConfiguration; import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigSplit; +import com.google.common.collect.Lists; + /** * Wrapper class for PigSplits in Spark mode * @@ -124,11 +129,13 @@ public interface SparkPigSplit extends W @Override public void readFields(DataInput is) throws IOException { + this.getConf().readFields(is); pigSplit.readFields(is); } @Override public void write(DataOutput os) throws IOException { + SparkPigSplitsUtils.writeConfigForPigSplits(this.getConf(), os); pigSplit.write(os); } @@ -242,11 +249,13 @@ public interface SparkPigSplit extends W @Override public void readFields(DataInput is) throws IOException { +this.getConf().readFields(is); pigSplit.readFields(is); } @Override public void write(DataOutput os) throws IOException { +SparkPigSplitsUtils.writeConfigForPigSplits(this.getConf(), os); pigSplit.write(os); } @@ -301,4 +310,32 @@ public interface SparkPigSplit extends W } } +public static class SparkPigSplitsUtils { + +private static final List PIGSPLIT_CONFIG_KEYS = Lists.newArrayList( +CommonConfigurationKeys.IO_SERIALIZATIONS_KEY, +PigConfiguration.PIG_COMPRESS_INPUT_SPLITS +); + +/** + * Writes a subset of the originalConf into the output stream os. Only keys in PIG_SPLIT_CONFIG_KEYS are + * considered due to optimization purposes. During deseralization on a Spark executor we need to take care of + * setting the configuration manually because Spark only sets an empty Configuration instance on the PigSplit. + * @param originalConf + * @param os + * @throws IOException + */ +public static void writeConfigForPigSplits(Configuration originalConf, DataOutput os) throws IOException { +Configuration conf = new Configuration(false); +for (String key : PIGSPLIT_CONFIG_KEYS) { +String value = originalConf.get(key); +if (value != null) { +conf.set(key, value); +} +} +conf.write(os); +} + +} + }
svn commit: r1804929 - in /pig/trunk: ./ src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/ src/org/apache/pig/backend/hadoop/executionengine/spark/converter/ src/org/apache/pig/data/
Author: szita Date: Sun Aug 13 16:09:32 2017 New Revision: 1804929 URL: http://svn.apache.org/viewvc?rev=1804929&view=rev Log: PIG-5277: Spark mode is writing nulls among tuples to the output (workaround) (szita) Added: pig/trunk/src/org/apache/pig/data/NonWritableTuple.java Modified: pig/trunk/CHANGES.txt pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigOutputFormat.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/JoinGroupSparkConverter.java pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/OutputConsumerIterator.java Modified: pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1804929&r1=1804928&r2=1804929&view=diff == --- pig/trunk/CHANGES.txt (original) +++ pig/trunk/CHANGES.txt Sun Aug 13 16:09:32 2017 @@ -44,6 +44,8 @@ OPTIMIZATIONS BUG FIXES +PIG-5277: Spark mode is writing nulls among tuples to the output (workaround) (szita) + PIG-5283: Configuration is not passed to SparkPigSplits on the backend (szita) PIG-5284: Fix flakyness introduced by PIG-3655 (szita) Modified: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigOutputFormat.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigOutputFormat.java?rev=1804929&r1=1804928&r2=1804929&view=diff == --- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigOutputFormat.java (original) +++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigOutputFormat.java Sun Aug 13 16:09:32 2017 @@ -37,6 +37,7 @@ import org.apache.pig.backend.hadoop.exe import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.StoreFuncDecorator; import org.apache.pig.backend.hadoop.executionengine.shims.HadoopShims; import org.apache.pig.backend.hadoop.executionengine.util.MapRedUtil; +import org.apache.pig.data.NonWritableTuple; import org.apache.pig.data.Tuple; import org.apache.pig.impl.PigContext; import org.apache.pig.impl.util.ObjectSerializer; @@ -141,7 +142,9 @@ public class PigOutputFormat extends Out public void write(WritableComparable key, Tuple value) throws IOException, InterruptedException { if(mode == Mode.SINGLE_STORE) { -storeDecorator.putNext(value); +if (!(value instanceof NonWritableTuple)) { +storeDecorator.putNext(value); +} } else { throw new IOException("Internal Error: Unexpected code path"); } Modified: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/JoinGroupSparkConverter.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/JoinGroupSparkConverter.java?rev=1804929&r1=1804928&r2=1804929&view=diff == --- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/JoinGroupSparkConverter.java (original) +++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/JoinGroupSparkConverter.java Sun Aug 13 16:09:32 2017 @@ -34,6 +34,7 @@ import org.apache.pig.backend.hadoop.exe import org.apache.pig.backend.hadoop.executionengine.spark.SparkUtil; import org.apache.pig.backend.hadoop.executionengine.spark.operator.POGlobalRearrangeSpark; import org.apache.pig.backend.hadoop.executionengine.spark.operator.POJoinGroupSpark; +import org.apache.pig.data.NonWritableTuple; import org.apache.pig.data.Tuple; import org.apache.pig.impl.io.NullableTuple; import org.apache.pig.impl.io.PigNullableWritable; @@ -209,7 +210,7 @@ public class JoinGroupSparkConverter imp out = (Tuple) result.result; break; case POStatus.STATUS_NULL: -out = null; +out = NonWritableTuple.INSTANCE; break; default: throw new RuntimeException( Modified: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/OutputConsumerIterator.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/OutputConsumerIterator.java?rev=1804929&r1=1804928&r2=1804929&view=diff == --- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/converter/OutputConsumerIterator.java (original) +++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/c
svn commit: r1805427 - in /pig/trunk: CHANGES.txt build.xml
Author: szita Date: Fri Aug 18 13:34:36 2017 New Revision: 1805427 URL: http://svn.apache.org/viewvc?rev=1805427&view=rev Log: PIG-5294: Spark unit tests are always run in spark1 mode (szita) Modified: pig/trunk/CHANGES.txt pig/trunk/build.xml Modified: pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1805427&r1=1805426&r2=1805427&view=diff == --- pig/trunk/CHANGES.txt (original) +++ pig/trunk/CHANGES.txt Fri Aug 18 13:34:36 2017 @@ -44,6 +44,8 @@ OPTIMIZATIONS BUG FIXES +PIG-5294: Spark unit tests are always run in spark1 mode (szita) + PIG-5277: Spark mode is writing nulls among tuples to the output (workaround) (szita) PIG-5283: Configuration is not passed to SparkPigSplits on the backend (szita) Modified: pig/trunk/build.xml URL: http://svn.apache.org/viewvc/pig/trunk/build.xml?rev=1805427&r1=1805426&r2=1805427&view=diff == --- pig/trunk/build.xml (original) +++ pig/trunk/build.xml Fri Aug 18 13:34:36 2017 @@ -456,6 +456,7 @@ + - + *** Building Test Sources *** *** To compile with all warnings enabled, supply -Dall.warnings=1 on command line *** *** Else, you will only be warned about deprecations *** @@ -892,32 +893,32 @@ - + Tests failed! - + Tests failed! - + Tests failed! - + Tests failed! - + Tests failed! - + Tests failed! @@ -998,7 +999,7 @@ + depends="setWindowsPath,setLinuxPath,compile-test,jar-simple,debugger.check,jackson-pig-3039-test-download">
svn commit: r1809162 - in /pig/trunk: CHANGES.txt contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestAllLoader.java ivy.xml ivy/libraries.properties ivy/pig-template.xml iv
Author: szita Date: Thu Sep 21 13:40:47 2017 New Revision: 1809162 URL: http://svn.apache.org/viewvc?rev=1809162&view=rev Log: PIG-5298: Verify if org.mortbay.jetty is removable (nkollar via szita) Modified: pig/trunk/CHANGES.txt pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestAllLoader.java pig/trunk/ivy.xml pig/trunk/ivy/libraries.properties pig/trunk/ivy/pig-template.xml pig/trunk/ivy/piggybank-template.xml Modified: pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1809162&r1=1809161&r2=1809162&view=diff == --- pig/trunk/CHANGES.txt (original) +++ pig/trunk/CHANGES.txt Thu Sep 21 13:40:47 2017 @@ -26,6 +26,8 @@ PIG-5282: Upgade to Java 8 (satishsaley IMPROVEMENTS +PIG-5298: Verify if org.mortbay.jetty is removable (nkollar via szita) + PIG-5268: Review of org.apache.pig.backend.hadoop.datastorage.HDataStorage (belugabehr via daijy) PIG-5288: Improve performance of PigTextRawBytesComparator (rohini) Modified: pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestAllLoader.java URL: http://svn.apache.org/viewvc/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestAllLoader.java?rev=1809162&r1=1809161&r2=1809162&view=diff == --- pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestAllLoader.java (original) +++ pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestAllLoader.java Thu Sep 21 13:40:47 2017 @@ -56,9 +56,11 @@ import org.apache.pig.test.Util; import org.junit.After; import org.junit.Before; import org.junit.Test; -import org.mortbay.log.Log; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class TestAllLoader extends TestCase { +private static final Logger LOG = LoggerFactory.getLogger(TestAllLoader.class); enum TYPE { HIVERC(".rc", new HiveRCFileTestWriter()), GZIP_PLAIN(".gz", @@ -424,7 +426,7 @@ public class TestAllLoader extends TestC count++; } -Log.info("Validating expected: " + totalRowCount + " against " + count); +LOG.info("Validating expected: " + totalRowCount + " against " + count); assertEquals(totalRowCount, count); } Modified: pig/trunk/ivy.xml URL: http://svn.apache.org/viewvc/pig/trunk/ivy.xml?rev=1809162&r1=1809161&r2=1809162&view=diff == --- pig/trunk/ivy.xml (original) +++ pig/trunk/ivy.xml Thu Sep 21 13:40:47 2017 @@ -109,9 +109,7 @@ conf="hadoop2->master"/> - - @@ -168,12 +166,6 @@ rev="${hadoop-mapreduce.version}" conf="hadoop2->master"/> - - - - http://svn.apache.org/viewvc/pig/trunk/ivy/libraries.properties?rev=1809162&r1=1809161&r2=1809162&view=diff == --- pig/trunk/ivy/libraries.properties (original) +++ pig/trunk/ivy/libraries.properties Thu Sep 21 13:40:47 2017 @@ -36,7 +36,6 @@ xmlenc.version=0.52 jersey.version=1.8 checkstyle.version=4.2 ivy.version=2.2.0 -jasper.version=6.1.14 groovy.version=2.4.5 guava.version=11.0 hadoop-common.version=2.7.3 @@ -55,7 +54,6 @@ jaxb-impl.version=2.2.3-1 jdeb.version=0.8 jdiff.version=1.0.9 jettison.version=1.3.4 -jetty.version=6.1.26 jline.version=2.11 joda-time.version=2.9.9 jopt.version=4.1 @@ -99,3 +97,4 @@ curator.version=2.6.0 htrace.version=3.1.0-incubating commons-lang3.version=3.1 scala-xml.version=1.0.5 +glassfish.el.version=3.0.1-b08 \ No newline at end of file Modified: pig/trunk/ivy/pig-template.xml URL: http://svn.apache.org/viewvc/pig/trunk/ivy/pig-template.xml?rev=1809162&r1=1809161&r2=1809162&view=diff == --- pig/trunk/ivy/pig-template.xml (original) +++ pig/trunk/ivy/pig-template.xml Thu Sep 21 13:40:47 2017 @@ -49,26 +49,6 @@ 3.1 - org.mortbay.jetty - jetty - 6.1.26 - - - org.mortbay.jetty - jetty-util - 6.1.26 - - - org.mortbay.jetty - jsp-api-2.1 - 6.1.14 - - - org.mortbay.jetty - jsp-2.1 - 6.1.14 - - commons-el commons-el 1.0 @@ -134,16 +114,16 @@ 2.9.9 + org.glassfish + javax.el + 3.0.1-b08 + + org.apache.avro avro 1.7.5 - - org.mortbay.jetty - jetty - -
svn commit: r1811322 - in /pig/trunk: CHANGES.txt build.xml src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java src/org/apache/pig/impl/util/JarManager.java test/org/apache/pig/
Author: szita Date: Fri Oct 6 12:02:22 2017 New Revision: 1811322 URL: http://svn.apache.org/viewvc?rev=1811322&view=rev Log: PIG-5305: Enable yarn-client mode execution of tests in Spark (1) mode (szita) Modified: pig/trunk/CHANGES.txt pig/trunk/build.xml pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java pig/trunk/src/org/apache/pig/impl/util/JarManager.java pig/trunk/test/org/apache/pig/builtin/TestOrcStoragePushdown.java Modified: pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1811322&r1=1811321&r2=1811322&view=diff == --- pig/trunk/CHANGES.txt (original) +++ pig/trunk/CHANGES.txt Fri Oct 6 12:02:22 2017 @@ -26,6 +26,8 @@ PIG-5282: Upgade to Java 8 (satishsaley IMPROVEMENTS +PIG-5305: Enable yarn-client mode execution of tests in Spark (1) mode (szita) + PIG-4120: Broadcast the index file in case of POMergeCoGroup and POMergeJoin (satishsaley via rohini) PIG-5306: REGEX_EXTRACT() logs every line that doesn't match (satishsaley via rohini) Modified: pig/trunk/build.xml URL: http://svn.apache.org/viewvc/pig/trunk/build.xml?rev=1811322&r1=1811321&r2=1811322&view=diff == --- pig/trunk/build.xml (original) +++ pig/trunk/build.xml Fri Oct 6 12:02:22 2017 @@ -118,6 +118,7 @@ + @@ -160,7 +161,6 @@ - @@ -278,6 +278,7 @@ value="${mvnrepo}/org/codehaus/jackson/jackson-core-asl/${jackson-pig-3039-test.version}/jackson-core-asl-${jackson-pig-3039-test.version}.jar"/> + @@ -893,32 +894,32 @@ - + Tests failed! - + Tests failed! - + Tests failed! - + Tests failed! - + Tests failed! - + Tests failed! @@ -957,6 +958,7 @@ + @@ -999,7 +1001,7 @@ + depends="setWindowsPath,setLinuxPath,compile-test,debugger.check,jackson-pig-3039-test-download"> @@ -1072,6 +1074,16 @@ + + *** Creating pigtest.jar *** + + + + + + + + Modified: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java?rev=1811322&r1=1811321&r2=1811322&view=diff == --- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java (original) +++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java Fri Oct 6 12:02:22 2017 @@ -431,6 +431,7 @@ public class SparkLauncher extends Launc Set allJars = new HashSet(); LOG.info("Add default jars to Spark Job"); allJars.addAll(JarManager.getDefaultJars()); +JarManager.addPigTestJarIfPresent(allJars); LOG.info("Add script jars to Spark Job"); for (String scriptJar : pigContext.scriptJars) { allJars.add(scriptJar); @@ -536,23 +537,35 @@ public class SparkLauncher extends Launc return sparkPlan; } + +private static String getMaster(PigContext pc){ +String master = null; +if (pc.getExecType().isLocal()) { +master = "local"; +} else { +master = System.getenv("SPARK_MASTER"); +if (master == null) { +LOG.info("SPARK_MASTER not specified, using \"local\""); +master = "local"; +} +} +return master; +} + /** * Only one SparkContext may be active per JVM (SPARK-2243). When multiple threads start SparkLaucher, - * the static member sparkContext should be initialized only once + * the static member sparkContext should be initialized only by either local or cluster mode at a time. + * + * In case it was already initialized with a different mode than what the new pigContext instance wants, it will + * close down the existing SparkContext and re-initalize it with the new mode. */ private static synchronized void startSparkIfNeeded(JobConf jobConf, PigContext pc) throws PigException { +String master = getMaster(pc); +if (sparkContext != null && !mas
svn commit: r1815189 - in /pig/trunk: CHANGES.txt NOTICE.txt build.xml ivy.xml ivy/libraries.properties ivy/pig-template.xml ivy/piggybank-template.xml ivy/pigunit-template.xml
Author: szita Date: Tue Nov 14 08:53:16 2017 New Revision: 1815189 URL: http://svn.apache.org/viewvc?rev=1815189&view=rev Log: PIG-5302: Remove HttpClient dependency (nkollar via szita) Modified: pig/trunk/CHANGES.txt pig/trunk/NOTICE.txt pig/trunk/build.xml pig/trunk/ivy.xml pig/trunk/ivy/libraries.properties pig/trunk/ivy/pig-template.xml pig/trunk/ivy/piggybank-template.xml pig/trunk/ivy/pigunit-template.xml Modified: pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1815189&r1=1815188&r2=1815189&view=diff == --- pig/trunk/CHANGES.txt (original) +++ pig/trunk/CHANGES.txt Tue Nov 14 08:53:16 2017 @@ -26,6 +26,8 @@ PIG-5282: Upgade to Java 8 (satishsaley IMPROVEMENTS +PIG-5302: Remove HttpClient dependency (nkollar via szita) + PIG-5305: Enable yarn-client mode execution of tests in Spark (1) mode (szita) PIG-4120: Broadcast the index file in case of POMergeCoGroup and POMergeJoin (satishsaley via rohini) Modified: pig/trunk/NOTICE.txt URL: http://svn.apache.org/viewvc/pig/trunk/NOTICE.txt?rev=1815189&r1=1815188&r2=1815189&view=diff == --- pig/trunk/NOTICE.txt (original) +++ pig/trunk/NOTICE.txt Tue Nov 14 08:53:16 2017 @@ -36,10 +36,6 @@ This product includes/uses HyperSQL (htt Copyright (c) 2001-2010, The HSQL Development Group All rights reserved. -This product includes/uses xmlenc (http://xmlenc.sourceforge.net/) -Copyright 2003-2005, Ernst de Haan -All rights reserved. - This product includes/uses Joda (http://joda-time.sourceforge.net/) Copyright 2001-2006 Stephen Colebourne Modified: pig/trunk/build.xml URL: http://svn.apache.org/viewvc/pig/trunk/build.xml?rev=1815189&r1=1815188&r2=1815189&view=diff == --- pig/trunk/build.xml (original) +++ pig/trunk/build.xml Tue Nov 14 08:53:16 2017 @@ -776,7 +776,6 @@ - Modified: pig/trunk/ivy.xml URL: http://svn.apache.org/viewvc/pig/trunk/ivy.xml?rev=1815189&r1=1815188&r2=1815189&view=diff == --- pig/trunk/ivy.xml (original) +++ pig/trunk/ivy.xml Tue Nov 14 08:53:16 2017 @@ -55,12 +55,8 @@ - - - http://svn.apache.org/viewvc/pig/trunk/ivy/libraries.properties?rev=1815189&r1=1815188&r2=1815189&view=diff == --- pig/trunk/ivy/libraries.properties (original) +++ pig/trunk/ivy/libraries.properties Tue Nov 14 08:53:16 2017 @@ -26,13 +26,11 @@ commons-codec.version=1.4 commons-io.version=2.3 commons-el.version=1.0 commons-logging.version=1.1.1 -commons-lang.version=2.4 +commons-lang.version=2.6 commons-configuration.version=1.6 commons-collections.version=3.2.1 commons-collections4.version=4.0 -commons-httpclient.version=3.1 commons-math3.version=3.1.1 -xmlenc.version=0.52 jersey.version=1.8 checkstyle.version=4.2 ivy.version=2.2.0 @@ -44,7 +42,7 @@ hadoop-mapreduce.version=2.7.3 hbase1.version=1.2.4 hsqldb.version=1.8.0.10 hive.version=1.2.1 -httpcomponents.version=4.1 +httpcomponents.version=4.4 jackson.version=1.9.13 jackson-pig-3039-test.version=1.9.9 javacc.version=4.2 @@ -95,6 +93,6 @@ snappy.version=0.2 leveldbjni.version=1.8 curator.version=2.6.0 htrace.version=3.1.0-incubating -commons-lang3.version=3.1 +commons-lang3.version=3.6 scala-xml.version=1.0.5 glassfish.el.version=3.0.1-b08 \ No newline at end of file Modified: pig/trunk/ivy/pig-template.xml URL: http://svn.apache.org/viewvc/pig/trunk/ivy/pig-template.xml?rev=1815189&r1=1815188&r2=1815189&view=diff == --- pig/trunk/ivy/pig-template.xml (original) +++ pig/trunk/ivy/pig-template.xml Tue Nov 14 08:53:16 2017 @@ -28,16 +28,6 @@ commons-cli 1.2 - - xmlenc - xmlenc - 0.52 - - - commons-httpclient - commons-httpclient - 3.1 - commons-codec commons-codec Modified: pig/trunk/ivy/piggybank-template.xml URL: http://svn.apache.org/viewvc/pig/trunk/ivy/piggybank-template.xml?rev=1815189&r1=1815188&r2=1815189&view=diff == --- pig/trunk/ivy/piggybank-template.xml (original) +++ pig/trunk/ivy/piggybank-template.xml Tue Nov 14 08:53:16 2017 @@ -26,7 +26,7 @@ commons-lang commons-lang - 2.4 + 2.6 log4j Modified: pig/trunk/ivy/pigunit-template.xml URL: http://svn.apache.o
svn commit: r1816542 - in /pig/trunk: CHANGES.txt src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java
Author: szita Date: Tue Nov 28 13:14:51 2017 New Revision: 1816542 URL: http://svn.apache.org/viewvc?rev=1816542&view=rev Log: PIG-5316: Initialize mapred.task.id property for PoS jobs (nkollar via szita) Modified: pig/trunk/CHANGES.txt pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java Modified: pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1816542&r1=1816541&r2=1816542&view=diff == --- pig/trunk/CHANGES.txt (original) +++ pig/trunk/CHANGES.txt Tue Nov 28 13:14:51 2017 @@ -26,6 +26,8 @@ PIG-5282: Upgade to Java 8 (satishsaley IMPROVEMENTS +PIG-5316: Initialize mapred.task.id property for PoS jobs (nkollar via szita) + PIG-5302: Remove HttpClient dependency (nkollar via szita) PIG-5305: Enable yarn-client mode execution of tests in Spark (1) mode (szita) Modified: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java?rev=1816542&r1=1816541&r2=1816542&view=diff == --- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java (original) +++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java Tue Nov 28 13:14:51 2017 @@ -42,6 +42,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.TaskAttemptID; import org.apache.pig.PigConfiguration; import org.apache.pig.PigException; import org.apache.pig.PigWarning; @@ -182,6 +183,7 @@ public class SparkLauncher extends Launc jobGroupID = String.format("%s-%s",sparkContext.getConf().getAppId(), UUID.randomUUID().toString()); jobConf.set(MRConfiguration.JOB_ID,jobGroupID); +jobConf.set(MRConfiguration.TASK_ID, new TaskAttemptID().toString()); sparkContext.setJobGroup(jobGroupID, "Pig query to Spark cluster", false);
svn commit: r1816554 - /pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java
Author: szita Date: Tue Nov 28 15:25:15 2017 New Revision: 1816554 URL: http://svn.apache.org/viewvc?rev=1816554&view=rev Log: PIG-5316: Initialize mapred.task.id property for PoS jobs (fix) Modified: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java Modified: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java?rev=1816554&r1=1816553&r2=1816554&view=diff == --- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java (original) +++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java Tue Nov 28 15:25:15 2017 @@ -42,7 +42,6 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.TaskAttemptID; import org.apache.pig.PigConfiguration; import org.apache.pig.PigException; import org.apache.pig.PigWarning; @@ -75,6 +74,7 @@ import org.apache.pig.backend.hadoop.exe import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POStore; import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POStream; import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POUnion; +import org.apache.pig.backend.hadoop.executionengine.shims.HadoopShims; import org.apache.pig.backend.hadoop.executionengine.spark.converter.BroadcastConverter; import org.apache.pig.backend.hadoop.executionengine.spark.converter.CollectedGroupConverter; import org.apache.pig.backend.hadoop.executionengine.spark.converter.CounterConverter; @@ -183,7 +183,7 @@ public class SparkLauncher extends Launc jobGroupID = String.format("%s-%s",sparkContext.getConf().getAppId(), UUID.randomUUID().toString()); jobConf.set(MRConfiguration.JOB_ID,jobGroupID); -jobConf.set(MRConfiguration.TASK_ID, new TaskAttemptID().toString()); +jobConf.set(MRConfiguration.TASK_ID, HadoopShims.getNewTaskAttemptID().toString()); sparkContext.setJobGroup(jobGroupID, "Pig query to Spark cluster", false);
svn commit: r1817995 - in /pig/trunk: ./ test/org/apache/pig/test/
Author: szita Date: Wed Dec 13 10:29:18 2017 New Revision: 1817995 URL: http://svn.apache.org/viewvc?rev=1817995&view=rev Log: PIG-5318: Unit test failures on Pig on Spark with Spark 2.2 (nkollar via szita) Modified: pig/trunk/CHANGES.txt pig/trunk/test/org/apache/pig/test/TestAssert.java pig/trunk/test/org/apache/pig/test/TestEvalPipeline.java pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java pig/trunk/test/org/apache/pig/test/TestGrunt.java pig/trunk/test/org/apache/pig/test/TestScalarAliases.java pig/trunk/test/org/apache/pig/test/TestStoreBase.java pig/trunk/test/org/apache/pig/test/Util.java Modified: pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1817995&r1=1817994&r2=1817995&view=diff == --- pig/trunk/CHANGES.txt (original) +++ pig/trunk/CHANGES.txt Wed Dec 13 10:29:18 2017 @@ -60,6 +60,8 @@ OPTIMIZATIONS BUG FIXES +PIG-5318: Unit test failures on Pig on Spark with Spark 2.2 (nkollar via szita) + PIG-5201: Null handling on FLATTEN (knoguchi) PIG-5315: pig.script is not set for scripts run via PigServer (satishsaley via rohini) Modified: pig/trunk/test/org/apache/pig/test/TestAssert.java URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestAssert.java?rev=1817995&r1=1817994&r2=1817995&view=diff == --- pig/trunk/test/org/apache/pig/test/TestAssert.java (original) +++ pig/trunk/test/org/apache/pig/test/TestAssert.java Wed Dec 13 10:29:18 2017 @@ -25,6 +25,7 @@ import java.io.ByteArrayInputStream; import java.io.InputStream; import java.util.List; +import org.apache.commons.lang3.exception.ExceptionUtils; import org.junit.Assert; import org.apache.pig.PigServer; @@ -118,7 +119,7 @@ public class TestAssert { } catch (FrontendException fe) { if (pigServer.getPigContext().getExecType().toString().startsWith("TEZ") || pigServer.getPigContext().getExecType().toString().startsWith("SPARK")) { - Assert.assertTrue(fe.getCause().getMessage().contains( + Assert.assertTrue(ExceptionUtils.getRootCause(fe).getMessage().contains( "Assertion violated: i should be greater than 1")); } else { Assert.assertTrue(fe.getCause().getMessage().contains( @@ -150,7 +151,7 @@ public class TestAssert { } catch (FrontendException fe) { if (pigServer.getPigContext().getExecType().toString().startsWith("TEZ") || pigServer.getPigContext().getExecType().toString().startsWith("SPARK")) { - Assert.assertTrue(fe.getCause().getMessage().contains( + Assert.assertTrue(ExceptionUtils.getRootCause(fe).getMessage().contains( "Assertion violated: i should be greater than 1")); } else { Assert.assertTrue(fe.getCause().getMessage().contains( Modified: pig/trunk/test/org/apache/pig/test/TestEvalPipeline.java URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestEvalPipeline.java?rev=1817995&r1=1817994&r2=1817995&view=diff == --- pig/trunk/test/org/apache/pig/test/TestEvalPipeline.java (original) +++ pig/trunk/test/org/apache/pig/test/TestEvalPipeline.java Wed Dec 13 10:29:18 2017 @@ -24,7 +24,6 @@ import java.io.PrintStream; import java.io.PrintWriter; import java.io.StringWriter; import java.util.ArrayList; -import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; Modified: pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java?rev=1817995&r1=1817994&r2=1817995&view=diff == --- pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java (original) +++ pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java Wed Dec 13 10:29:18 2017 @@ -34,6 +34,7 @@ import java.util.Map; import java.util.Properties; import java.util.Random; +import org.apache.commons.lang3.exception.ExceptionUtils; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -1459,7 +1460,8 @@ public class TestEvalPipeline2 { pigServer.openIterator("b"); Assert.fail(); } catch (Exception e) { - Assert.assertTrue(e.getMessage().contains(ArrayList.class.getName())); + Assert.assertTrue(ExceptionUtils.getRootCause(e).getMessage().contains( +"Unexpected data type " + Array
svn commit: r1820631 - in /pig/trunk: CHANGES.txt src/org/apache/pig/newplan/logical/relational/LOStore.java test/org/apache/pig/test/TestSchema.java
Author: szita Date: Tue Jan 9 10:03:07 2018 New Revision: 1820631 URL: http://svn.apache.org/viewvc?rev=1820631&view=rev Log: PIG-5325: Schema disambiguation can't be turned off for nested schemas (szita) Modified: pig/trunk/CHANGES.txt pig/trunk/src/org/apache/pig/newplan/logical/relational/LOStore.java pig/trunk/test/org/apache/pig/test/TestSchema.java Modified: pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1820631&r1=1820630&r2=1820631&view=diff == --- pig/trunk/CHANGES.txt (original) +++ pig/trunk/CHANGES.txt Tue Jan 9 10:03:07 2018 @@ -64,6 +64,8 @@ OPTIMIZATIONS BUG FIXES +PIG-5325: Schema disambiguation can't be turned off for nested schemas (szita) + PIG-5311: POReservoirSample fails for more than Integer.MAX_VALUE records (rohini) PIG-3864: ToDate(userstring, format, timezone) computes DateTime with strange handling of Daylight Saving Time with location based timezones (daijy via rohini) Modified: pig/trunk/src/org/apache/pig/newplan/logical/relational/LOStore.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/newplan/logical/relational/LOStore.java?rev=1820631&r1=1820630&r2=1820631&view=diff == --- pig/trunk/src/org/apache/pig/newplan/logical/relational/LOStore.java (original) +++ pig/trunk/src/org/apache/pig/newplan/logical/relational/LOStore.java Tue Jan 9 10:03:07 2018 @@ -63,16 +63,31 @@ public class LOStore extends LogicalRela public LogicalSchema getSchema() throws FrontendException { schema = ((LogicalRelationalOperator)plan.getPredecessors(this).get(0)).getSchema(); -if (!disambiguationEnabled && schema != null && schema.getFields() != null) { +if (!disambiguationEnabled) { //If requested try and remove parent alias substring including colon(s) +removeDisambiguation(schema); +} + +return schema; +} + +/** + * Removes schema disambiguation parts (parent alias and :) from field aliases + * @param schema + * @return + */ +private static LogicalSchema removeDisambiguation(LogicalSchema schema) { +if (schema != null && schema.getFields() != null) { for (LogicalSchema.LogicalFieldSchema field : schema.getFields()) { +if (field.schema != null) { +removeDisambiguation(field.schema); +} if (field.alias == null || !field.alias.contains(":")) { continue; } field.alias = field.alias.substring(field.alias.lastIndexOf(":") + 1); } } - return schema; } Modified: pig/trunk/test/org/apache/pig/test/TestSchema.java URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestSchema.java?rev=1820631&r1=1820630&r2=1820631&view=diff == --- pig/trunk/test/org/apache/pig/test/TestSchema.java (original) +++ pig/trunk/test/org/apache/pig/test/TestSchema.java Tue Jan 9 10:03:07 2018 @@ -990,6 +990,33 @@ public class TestSchema { } @Test +public void testDisabledDisambiguationContainsNoColonsForNestedSchema() throws IOException { +resetDisambiguationTestPropertyOverride(); + +String inputFileName = "testPrepend-nested-input.txt"; +String[] inputData = new String[]{"apple\t1\tred", "orange\t2\torange", "kiwi\t3\tgreen", "orange\t4\torange"}; +Util.createInputFile(cluster, inputFileName, inputData); + +String script = "A = LOAD '" + inputFileName + "' AS (fruit:chararray, foo:int, color: chararray);" + +"B = LOAD '" + inputFileName + "' AS (id:chararray, bar:int);" + +"C = JOIN A by fruit, B by id;" + +"D = GROUP C by fruit;" + +"E = LOAD '" + inputFileName + "' AS (name:chararray, qwe:int);" + +"F = JOIN E by name, D by group;"; + +Util.registerMultiLineQuery(pigServer, script); + +//Prepending should happen with default settings +assertEquals("{E::name: chararray,E::qwe: int,D::group: chararray,D::C: {(A::fruit: chararray,A::foo: int,A::color: chararray,B::id: chararray,B::bar: int)}}", pigServer.dumpSchema("F").toString()); + +//Override prepend property setting (check for flatten, join) + pigServer.getPigContext().getProperties().setProperty(PigConfiguration.PIG_STORE_SCHEMA_DISAMBIGUATE, "false"); +
svn commit: r1821165 - in /pig/trunk: CHANGES.txt src/org/apache/pig/backend/hadoop/executionengine/spark/JobStatisticCollector.java
Author: szita Date: Mon Jan 15 15:11:21 2018 New Revision: 1821165 URL: http://svn.apache.org/viewvc?rev=1821165&view=rev Log: PIG-5320: TestCubeOperator#testRollupBasic is flaky on Spark 2.2 (nkollar via szita) Modified: pig/trunk/CHANGES.txt pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/JobStatisticCollector.java Modified: pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1821165&r1=1821164&r2=1821165&view=diff == --- pig/trunk/CHANGES.txt (original) +++ pig/trunk/CHANGES.txt Mon Jan 15 15:11:21 2018 @@ -64,6 +64,8 @@ OPTIMIZATIONS BUG FIXES +PIG-5320: TestCubeOperator#testRollupBasic is flaky on Spark 2.2 (nkollar via szita) + PIG-5327: Check for DAG status before trying to kill (rohini) PIG-5325: Schema disambiguation can't be turned off for nested schemas (szita) Modified: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/JobStatisticCollector.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/JobStatisticCollector.java?rev=1821165&r1=1821164&r2=1821165&view=diff == --- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/JobStatisticCollector.java (original) +++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/spark/JobStatisticCollector.java Mon Jan 15 15:11:21 2018 @@ -52,15 +52,13 @@ public class JobStatisticCollector { } } -public boolean waitForJobToEnd(int jobId) throws InterruptedException { +public void waitForJobToEnd(int jobId) throws InterruptedException { synchronized (sparkListener) { -if (finishedJobIds.contains(jobId)) { -finishedJobIds.remove(jobId); -return true; +while (!finishedJobIds.contains(jobId)) { +sparkListener.wait(); } -sparkListener.wait(); -return false; +finishedJobIds.remove(jobId); } }
svn commit: r1834854 - in /pig/trunk: CHANGES.txt ivy.xml ivy/libraries.properties
Author: szita Date: Mon Jul 2 15:25:55 2018 New Revision: 1834854 URL: http://svn.apache.org/viewvc?rev=1834854&view=rev Log: PIG-5344: Update Apache HTTPD LogParser to latest version (nielsbasjes via szita) Modified: pig/trunk/CHANGES.txt pig/trunk/ivy.xml pig/trunk/ivy/libraries.properties Modified: pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1834854&r1=1834853&r2=1834854&view=diff == --- pig/trunk/CHANGES.txt (original) +++ pig/trunk/CHANGES.txt Mon Jul 2 15:25:55 2018 @@ -26,6 +26,8 @@ PIG-5282: Upgade to Java 8 (satishsaley IMPROVEMENTS +PIG-5344: Update Apache HTTPD LogParser to latest version (nielsbasjes via szita) + PIG-4092: Predicate pushdown for Parquet (nkollar via rohini) PIG-5317: Upgrade old dependencies: commons-lang, hsqldb, commons-logging (nkollar via rohini) Modified: pig/trunk/ivy.xml URL: http://svn.apache.org/viewvc/pig/trunk/ivy.xml?rev=1834854&r1=1834853&r2=1834854&view=diff == --- pig/trunk/ivy.xml (original) +++ pig/trunk/ivy.xml Mon Jul 2 15:25:55 2018 @@ -74,13 +74,9 @@ - - - + conf="compile->master"> + + http://svn.apache.org/viewvc/pig/trunk/ivy/libraries.properties?rev=1834854&r1=1834853&r2=1834854&view=diff == --- pig/trunk/ivy/libraries.properties (original) +++ pig/trunk/ivy/libraries.properties Mon Jul 2 15:25:55 2018 @@ -19,7 +19,7 @@ apacheant.version=1.7.1 apacherat.version=0.8 automaton.version=1.11-8 avro.version=1.7.5 -basjes-httpdlog-pigloader.version=2.4 +basjes-httpdlog-pigloader.version=5.0 commons-beanutils.version=1.7.0 commons-cli.version=1.2 commons-codec.version=1.4
svn commit: r1839568 - in /pig/trunk: ./ ivy/ src/org/apache/pig/backend/hadoop/hbase/ test/org/apache/pig/test/
Author: szita Date: Wed Aug 29 11:33:28 2018 New Revision: 1839568 URL: http://svn.apache.org/viewvc?rev=1839568&view=rev Log: PIG-5191: Pig HBase 2.0.0 support (nkollar via szita, reviewed by rohini) Modified: pig/trunk/CHANGES.txt pig/trunk/build.xml pig/trunk/ivy.xml pig/trunk/ivy/libraries.properties pig/trunk/src/org/apache/pig/backend/hadoop/hbase/HBaseStorage.java pig/trunk/src/org/apache/pig/backend/hadoop/hbase/HBaseTableInputFormat.java pig/trunk/test/org/apache/pig/test/TestHBaseStorage.java pig/trunk/test/org/apache/pig/test/TestJobSubmission.java Modified: pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1839568&r1=1839567&r2=1839568&view=diff == --- pig/trunk/CHANGES.txt (original) +++ pig/trunk/CHANGES.txt Wed Aug 29 11:33:28 2018 @@ -26,6 +26,8 @@ PIG-5282: Upgade to Java 8 (satishsaley IMPROVEMENTS +PIG-5191: Pig HBase 2.0.0 support (nkollar via szita, reviewed by rohini) + PIG-5344: Update Apache HTTPD LogParser to latest version (nielsbasjes via szita) PIG-4092: Predicate pushdown for Parquet (nkollar via rohini) Modified: pig/trunk/build.xml URL: http://svn.apache.org/viewvc/pig/trunk/build.xml?rev=1839568&r1=1839567&r2=1839568&view=diff == --- pig/trunk/build.xml (original) +++ pig/trunk/build.xml Wed Aug 29 11:33:28 2018 @@ -1713,7 +1713,7 @@ + pattern="${ivy.lib.dir.spark}/[artifact]-[revision](-[classifier]).[ext]" conf="spark${sparkversion},hbase${hbaseversion}"/> Modified: pig/trunk/ivy.xml URL: http://svn.apache.org/viewvc/pig/trunk/ivy.xml?rev=1839568&r1=1839567&r2=1839568&view=diff == --- pig/trunk/ivy.xml (original) +++ pig/trunk/ivy.xml Wed Aug 29 11:33:28 2018 @@ -40,6 +40,7 @@ + @@ -308,6 +309,167 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -316,7 +478,6 @@ - @@ -420,6 +581,7 @@ + Modified: pig/trunk/ivy/libraries.properties URL: http://svn.apache.org/viewvc/pig/trunk/ivy/libraries.properties?rev=1839568&r1=1839567&r2=1839568&view=diff == --- pig/trunk/ivy/libraries.properties (original) +++ pig/trunk/ivy/libraries.properties Wed Aug 29 11:33:28 2018 @@ -39,6 +39,7 @@ hadoop-common.version=2.7.3 hadoop-hdfs.version=2.7.3 hadoop-mapreduce.version=2.7.3 hbase1.version=1.2.4 +hbase2.version=2.0.0 hsqldb.version=2.4.0 hive.version=1.2.1 httpcomponents.version=4.4 @@ -64,7 +65,7 @@ antlr.version=3.4 stringtemplate.version=4.0.4 log4j.version=1.2.16 netty.version=3.6.6.Final -netty-all.version=4.0.23.Final +netty-all.version=4.1.1.Final rats-lib.version=0.5.1 slf4j-api.version=1.6.1 slf4j-log4j12.version=1.6.1 @@ -92,6 +93,7 @@ snappy.version=0.2 leveldbjni.version=1.8 curator.version=2.6.0 htrace.version=3.1.0-incubating +htrace4.version=4.0.1-incubating commons-lang3.version=3.6 scala-xml.version=1.0.5 glassfish.el.version=3.0.1-b08 \ No newline at end of file Modified: pig/trunk/src/org/apache/pig/backend/hadoop/hbase/HBaseStorage.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/hbase/HBaseStorage.java?rev=1839568&r1=1839567&r2=1839568&view=diff == --- pig/trunk/src/org/apache/pig/backend/hadoop/hbase/HBaseStorage.java (original) +++ pig/trunk/src/org/apache/pig/backend/hadoop/hbase/HBaseStorage.java Wed Aug 29 11:33:28 2018 @@ -45,7 +45,10 @@ import
svn commit: r1840299 - in /pig/trunk: BUILDING.md CHANGES.txt build.xml dev-support/docker/Dockerfile
Author: szita Date: Fri Sep 7 13:33:33 2018 New Revision: 1840299 URL: http://svn.apache.org/viewvc?rev=1840299&view=rev Log: PIG-5343: Upgrade developer build environment (nielsbasjes via szita) Modified: pig/trunk/BUILDING.md pig/trunk/CHANGES.txt pig/trunk/build.xml pig/trunk/dev-support/docker/Dockerfile Modified: pig/trunk/BUILDING.md URL: http://svn.apache.org/viewvc/pig/trunk/BUILDING.md?rev=1840299&r1=1840298&r2=1840299&view=diff == --- pig/trunk/BUILDING.md (original) +++ pig/trunk/BUILDING.md Fri Sep 7 13:33:33 2018 @@ -3,7 +3,7 @@ ## Requirements: * Unix System -* JDK 1.7+ +* JDK 1.8+ * Ant 1.8.1+ * Findbugs 3.x+ * Forrest 0.9 (for building the documentation) Modified: pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1840299&r1=1840298&r2=1840299&view=diff == --- pig/trunk/CHANGES.txt (original) +++ pig/trunk/CHANGES.txt Fri Sep 7 13:33:33 2018 @@ -26,6 +26,8 @@ PIG-5282: Upgade to Java 8 (satishsaley IMPROVEMENTS +PIG-5343: Upgrade developer build environment (nielsbasjes via szita) + PIG-5191: Pig HBase 2.0.0 support (nkollar via szita, reviewed by rohini) PIG-5344: Update Apache HTTPD LogParser to latest version (nielsbasjes via szita) Modified: pig/trunk/build.xml URL: http://svn.apache.org/viewvc/pig/trunk/build.xml?rev=1840299&r1=1840298&r2=1840299&view=diff == --- pig/trunk/build.xml (original) +++ pig/trunk/build.xml Fri Sep 7 13:33:33 2018 @@ -952,7 +952,7 @@ - + Modified: pig/trunk/dev-support/docker/Dockerfile URL: http://svn.apache.org/viewvc/pig/trunk/dev-support/docker/Dockerfile?rev=1840299&r1=1840298&r2=1840299&view=diff == --- pig/trunk/dev-support/docker/Dockerfile (original) +++ pig/trunk/dev-support/docker/Dockerfile Fri Sep 7 13:33:33 2018 @@ -17,7 +17,7 @@ # Dockerfile for installing the necessary dependencies for building Apache Pig. # See BUILDING.md. -FROM ubuntu:trusty +FROM ubuntu:bionic # Define working directory. WORKDIR /root @@ -29,16 +29,17 @@ RUN sed -i 's/# \(.*multiverse$\)/\1/g' apt-get install -y build-essential && \ apt-get install -y software-properties-common && \ apt-get install --no-install-recommends -y \ +sudo \ git subversion \ byobu htop man unzip vim \ cabal-install \ curl wget \ -openjdk-7-jdk \ +openjdk-8-jdk \ ant ant-contrib ant-optional make maven \ cmake gcc g++ protobuf-compiler \ build-essential libtool \ zlib1g-dev pkg-config libssl-dev \ -snappy libsnappy-dev \ +ubuntu-snappy ubuntu-snappy-cli libsnappy-dev \ bzip2 libbz2-dev \ libjansson-dev \ fuse libfuse-dev \ @@ -47,11 +48,7 @@ RUN sed -i 's/# \(.*multiverse$\)/\1/g' rm -rf /var/lib/apt/lists/* # Define commonly used JAVA_HOME variable -ENV JAVA_HOME /usr/lib/jvm/java-7-openjdk-amd64 - -# Fixing the Apache commons / Maven dependency problem under Ubuntu: -# See http://wiki.apache.org/commons/VfsProblems -RUN cd /usr/share/maven/lib && ln -s ../../java/commons-lang.jar . +ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64 # Avoid out of memory errors in builds ENV MAVEN_OPTS -Xms256m -Xmx512m @@ -64,12 +61,9 @@ RUN mkdir -p /opt/findbugs && \ ENV FINDBUGS_HOME /opt/findbugs # Install Forrest in /usr/local/apache-forrest -# Screenscrape the download page for a local mirror URL -RUN cd /usr/local/ && \ -curl https://forrest.apache.org/mirrors.cgi | \ -fgrep href | fgrep apache-forrest-0.9 | \ -sed 's@^.*"\(http[^"]*apache-forrest-[^"]*.tar.gz\)".*@\1@' | \ -xargs -n1 -r wget +# Download +RUN cd /usr/local/ && wget "http://www.apache.org/dyn/closer.lua?action=download&filename=/forrest/apache-forrest-0.9-sources.tar.gz"; -O "apache-forrest-0.9-sources.tar.gz" +RUN cd /usr/local/ && wget "http://www.apache.org/dyn/closer.lua?action=download&filename=/forrest/apache-forrest-0.9-dependencies.tar.gz"; -O "apache-forrest-0.9-dependencies.tar.gz" # Unpack Apache Forrest RUN cd /usr/local/ && \
svn commit: r1841248 - in /pig/trunk: CHANGES.txt build.xml
Author: szita Date: Tue Sep 18 18:56:27 2018 New Revision: 1841248 URL: http://svn.apache.org/viewvc?rev=1841248&view=rev Log: PIG-5358: Remove hive-contrib jar from lib directory (szita) Modified: pig/trunk/CHANGES.txt pig/trunk/build.xml Modified: pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1841248&r1=1841247&r2=1841248&view=diff == --- pig/trunk/CHANGES.txt (original) +++ pig/trunk/CHANGES.txt Tue Sep 18 18:56:27 2018 @@ -26,6 +26,8 @@ PIG-5282: Upgade to Java 8 (satishsaley IMPROVEMENTS +PIG-5358: Remove hive-contrib jar from lib directory (szita) + PIG-5343: Upgrade developer build environment (nielsbasjes via szita) PIG-5191: Pig HBase 2.0.0 support (nkollar via szita, reviewed by rohini) Modified: pig/trunk/build.xml URL: http://svn.apache.org/viewvc/pig/trunk/build.xml?rev=1841248&r1=1841247&r2=1841248&view=diff == --- pig/trunk/build.xml (original) +++ pig/trunk/build.xml Tue Sep 18 18:56:27 2018 @@ -731,7 +731,7 @@ - +
svn commit: r1850245 - in /pig/trunk: CHANGES.txt src/org/apache/pig/impl/io/InterRecordReader.java test/org/apache/pig/test/TestBinInterSedes.java
Author: szita Date: Thu Jan 3 15:54:11 2019 New Revision: 1850245 URL: http://svn.apache.org/viewvc?rev=1850245&view=rev Log: PIG-5373: InterRecordReader might skip records if certain sync markers are used (szita) Modified: pig/trunk/CHANGES.txt pig/trunk/src/org/apache/pig/impl/io/InterRecordReader.java pig/trunk/test/org/apache/pig/test/TestBinInterSedes.java Modified: pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1850245&r1=1850244&r2=1850245&view=diff == --- pig/trunk/CHANGES.txt (original) +++ pig/trunk/CHANGES.txt Thu Jan 3 15:54:11 2019 @@ -88,6 +88,8 @@ OPTIMIZATIONS BUG FIXES +PIG-5373: InterRecordReader might skip records if certain sync markers are used (szita) + PIG-5370: Union onschema + columnprune dropping used fields (knoguchi) PIG-5362: Parameter substitution of shell cmd results doesn't handle backslash (wlauer via rohini) Modified: pig/trunk/src/org/apache/pig/impl/io/InterRecordReader.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/impl/io/InterRecordReader.java?rev=1850245&r1=1850244&r2=1850245&view=diff == --- pig/trunk/src/org/apache/pig/impl/io/InterRecordReader.java (original) +++ pig/trunk/src/org/apache/pig/impl/io/InterRecordReader.java Thu Jan 3 15:54:11 2019 @@ -20,6 +20,7 @@ package org.apache.pig.impl.io; import java.io.DataInputStream; import java.io.IOException; +import org.apache.commons.collections4.queue.CircularFifoQueue; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; @@ -89,35 +90,34 @@ public class InterRecordReader extends R * @return true if marker was observed, false if EOF or EndOfSplit was reached * @throws IOException */ - private boolean skipUntilMarkerOrSplitEndOrEOF() throws IOException { + public boolean skipUntilMarkerOrSplitEndOrEOF() throws IOException { int b = Integer.MIN_VALUE; -outer:while (b != -1) { - if (b != syncMarker[0]) { + CircularFifoQueue queue = new CircularFifoQueue(syncMarker.length); + outer:while (b != -1) { + //There may be a case where we read through a whole split without a marker, then we shouldn't proceed + // because the records are from the next split which another reader would pick up too + //One exception of reading past split end is if at least the first byte of the marker was seen before split + // end. + if (in.getPosition() >= (end+syncMarker.length-1)) { + return false; + } + b = in.read(); - //There may be a case where we read through a whole split without a marker, then we shouldn't proceed - // because the records are from the next split which another reader would pick up too - if (in.getPosition() >= end) { - return false; - } - b = in.read(); - if ((byte) b != syncMarker[0] && b != -1) { - continue; - } - if (b == -1) return false; + //EOF reached + if (b == -1) return false; + + queue.add(b); + if (queue.size() != queue.maxSize()) { + //Not enough bytes read yet + continue outer; } - int i = 1; - while (i < syncMarker.length) { - b = in.read(); - if (b == -1) return false; - if ((byte) b != syncMarker[i]) { - if (in.getPosition() > end) { - //Again we should not read past the split end, only if at least the first byte of marker was seen before it - return false; - } + int i = 0; + for (Integer seenByte : queue){ + if (syncMarker[i++] != seenByte.byteValue()) { continue outer; } - ++i; } + //Found marker: queue content equals sync marker lastSyncPos = in.getPosition(); return true; } Modified: pig/trunk/test/org/apache/pig/test/TestBinInterSedes.java URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestBinInterSedes.java?rev=1850245&r1=1850244&r2=1850245&view=diff == --- pig/trunk/test/org/apache/pig/test/TestBinInterSedes.java (original) +++ pig/trunk/test/org/apache/pig/test/TestBinInterSedes.java Thu Jan 3 15:54:11 2019 @@ -18,6 +18,7 @@ package org.apache.pig.test; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; import java.io.ByteArrayInputStream; import java.io.
svn commit: r1850723 - in /pig/trunk: CHANGES.txt src/org/apache/pig/impl/io/InterRecordReader.java
Author: szita Date: Tue Jan 8 10:37:57 2019 New Revision: 1850723 URL: http://svn.apache.org/viewvc?rev=1850723&view=rev Log: PIG-5374: Use CircularFifoBuffer in InterRecordReader (szita) Modified: pig/trunk/CHANGES.txt pig/trunk/src/org/apache/pig/impl/io/InterRecordReader.java Modified: pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1850723&r1=1850722&r2=1850723&view=diff == --- pig/trunk/CHANGES.txt (original) +++ pig/trunk/CHANGES.txt Tue Jan 8 10:37:57 2019 @@ -88,6 +88,8 @@ OPTIMIZATIONS BUG FIXES +PIG-5374: Use CircularFifoBuffer in InterRecordReader (szita) + PIG-5373: InterRecordReader might skip records if certain sync markers are used (szita) PIG-5370: Union onschema + columnprune dropping used fields (knoguchi) Modified: pig/trunk/src/org/apache/pig/impl/io/InterRecordReader.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/impl/io/InterRecordReader.java?rev=1850723&r1=1850722&r2=1850723&view=diff == --- pig/trunk/src/org/apache/pig/impl/io/InterRecordReader.java (original) +++ pig/trunk/src/org/apache/pig/impl/io/InterRecordReader.java Tue Jan 8 10:37:57 2019 @@ -20,7 +20,7 @@ package org.apache.pig.impl.io; import java.io.DataInputStream; import java.io.IOException; -import org.apache.commons.collections4.queue.CircularFifoQueue; +import org.apache.commons.collections.buffer.CircularFifoBuffer; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; @@ -92,7 +92,7 @@ public class InterRecordReader extends R */ public boolean skipUntilMarkerOrSplitEndOrEOF() throws IOException { int b = Integer.MIN_VALUE; - CircularFifoQueue queue = new CircularFifoQueue(syncMarker.length); + CircularFifoBuffer queue = new CircularFifoBuffer(syncMarker.length); outer:while (b != -1) { //There may be a case where we read through a whole split without a marker, then we shouldn't proceed // because the records are from the next split which another reader would pick up too @@ -107,13 +107,13 @@ public class InterRecordReader extends R if (b == -1) return false; queue.add(b); - if (queue.size() != queue.maxSize()) { + if (!queue.isFull()) { //Not enough bytes read yet continue outer; } int i = 0; - for (Integer seenByte : queue){ - if (syncMarker[i++] != seenByte.byteValue()) { + for (Object seenByte : queue){ + if (syncMarker[i++] != ((Integer)seenByte).byteValue()) { continue outer; } }
svn commit: r1872871 - in /pig/trunk: CHANGES.txt build.xml ivy/ivysettings.xml test/e2e/pig/build.xml
Author: szita Date: Thu Jan 16 10:20:43 2020 New Revision: 1872871 URL: http://svn.apache.org/viewvc?rev=1872871&view=rev Log: PIG-5395: Pig build is failing due to maven repo access point change (szita) Modified: pig/trunk/CHANGES.txt pig/trunk/build.xml pig/trunk/ivy/ivysettings.xml pig/trunk/test/e2e/pig/build.xml Modified: pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1872871&r1=1872870&r2=1872871&view=diff == --- pig/trunk/CHANGES.txt (original) +++ pig/trunk/CHANGES.txt Thu Jan 16 10:20:43 2020 @@ -96,6 +96,8 @@ OPTIMIZATIONS BUG FIXES +PIG-5395: Pig build is failing due to maven repo access point change (szita) + PIG-5375: NullPointerException for multi-level self unions with Tez UnionOptimizer (knoguchi) PIG-5386: Pig local mode with bundled Hadoop broken (nkollar) Modified: pig/trunk/build.xml URL: http://svn.apache.org/viewvc/pig/trunk/build.xml?rev=1872871&r1=1872870&r2=1872871&view=diff == --- pig/trunk/build.xml (original) +++ pig/trunk/build.xml Thu Jan 16 10:20:43 2020 @@ -251,7 +251,7 @@ https://repository.apache.org"/> -http://repo2.maven.org/maven2"/> +https://repo1.maven.org/maven2"/> Modified: pig/trunk/ivy/ivysettings.xml URL: http://svn.apache.org/viewvc/pig/trunk/ivy/ivysettings.xml?rev=1872871&r1=1872870&r2=1872871&view=diff == --- pig/trunk/ivy/ivysettings.xml (original) +++ pig/trunk/ivy/ivysettings.xml Thu Jan 16 10:20:43 2020 @@ -21,7 +21,7 @@ see http://www.jayasoft.org/ivy/doc/configuration -->
svn commit: r1873947 - in /pig/trunk: ./ ivy/ shims/src/hive1/ shims/src/hive1/org/ shims/src/hive1/org/apache/ shims/src/hive1/org/apache/pig/ shims/src/hive1/org/apache/pig/hive/ shims/src/hive3/ sh
Author: szita Date: Wed Feb 12 15:27:05 2020 New Revision: 1873947 URL: http://svn.apache.org/viewvc?rev=1873947&view=rev Log: PIG-4764: Make Pig work with Hive 3.1 (szita) Added: pig/trunk/shims/src/hive1/ pig/trunk/shims/src/hive1/org/ pig/trunk/shims/src/hive1/org/apache/ pig/trunk/shims/src/hive1/org/apache/pig/ pig/trunk/shims/src/hive1/org/apache/pig/hive/ pig/trunk/shims/src/hive1/org/apache/pig/hive/HiveShims.java pig/trunk/shims/src/hive3/ pig/trunk/shims/src/hive3/org/ pig/trunk/shims/src/hive3/org/apache/ pig/trunk/shims/src/hive3/org/apache/pig/ pig/trunk/shims/src/hive3/org/apache/pig/hive/ pig/trunk/shims/src/hive3/org/apache/pig/hive/HiveShims.java Modified: pig/trunk/CHANGES.txt pig/trunk/build.xml pig/trunk/ivy.xml pig/trunk/ivy/libraries.properties pig/trunk/src/org/apache/pig/Expression.java pig/trunk/src/org/apache/pig/builtin/HiveUDAF.java pig/trunk/src/org/apache/pig/builtin/HiveUDFBase.java pig/trunk/src/org/apache/pig/builtin/OrcStorage.java pig/trunk/src/org/apache/pig/impl/util/hive/HiveUtils.java pig/trunk/src/org/apache/pig/newplan/FilterExtractor.java pig/trunk/test/org/apache/pig/builtin/TestOrcStorage.java pig/trunk/test/org/apache/pig/builtin/TestOrcStoragePushdown.java pig/trunk/test/org/apache/pig/test/TestLoaderStorerShipCacheFiles.java Modified: pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1873947&r1=1873946&r2=1873947&view=diff == --- pig/trunk/CHANGES.txt (original) +++ pig/trunk/CHANGES.txt Wed Feb 12 15:27:05 2020 @@ -26,6 +26,8 @@ PIG-5282: Upgade to Java 8 (satishsaley IMPROVEMENTS +PIG-4764: Make Pig work with Hive 3.1 (szita) + PIG-5352: Please add OWASP Dependency Check to the build ivy.xml (knoguchi) PIG-5385: Skip calling extra gc() before spilling large bag when unnecessary (knoguchi) Modified: pig/trunk/build.xml URL: http://svn.apache.org/viewvc/pig/trunk/build.xml?rev=1873947&r1=1873946&r2=1873947&view=diff == --- pig/trunk/build.xml (original) +++ pig/trunk/build.xml Wed Feb 12 15:27:05 2020 @@ -154,7 +154,7 @@ - + @@ -241,6 +241,7 @@ + @@ -248,6 +249,7 @@ + https://repository.apache.org"/> @@ -353,6 +355,7 @@ + @@ -568,8 +571,8 @@ *** Building Main Sources *** *** To compile with all warnings enabled, supply -Dall.warnings=1 on command line *** *** Else, you will only be warned about deprecations *** -*** Hadoop version used: ${hadoopversion} ; HBase version used: ${hbaseversion} ; Spark version used: ${sparkversion} *** -*** Hadoop version used: ${hadoopversion} ; HBase version used: ${hbaseversion} ; Spark version used: ${sparkversion} ; Hive version used: ${hiveversion} *** + @@ -734,6 +737,7 @@ + @@ -1161,6 +1165,10 @@ + + + + @@ -1236,6 +1244,7 @@ + @@ -1723,7 +1732,7 @@ - *** Ivy resolve with Hadoop ${hadoopversion}, Spark ${sparkversion} and HBase ${hbaseversion} *** + *** Ivy resolve with Hadoop ${hadoopversion}, Spark ${sparkversion}, HBase ${hbaseversion}, Hive ${hiveversion} *** Modified: pig/trunk/ivy.xml URL: http://svn.apache.org/viewvc/pig/trunk/ivy.xml?rev=1873947&r1=1873946&r2=1873947&view=diff == --- pig/trunk/ivy.xml (original) +++ pig/trunk/ivy.xml Wed Feb 12 15:27:05 2020 @@ -31,7 +31,7 @@ - + @@ -43,6 +43,8 @@ + + @@ -525,23 +527,48 @@ - + + + + + + + + + + + + + +conf="hive3->master" /> +conf="hive3->master" /> +conf="hive3->master" /> + +conf="hive3->master" /> + + + + - + + Modified: pig/trunk/ivy/libraries.properties URL: http://svn.apache.org/viewvc/pig/trunk/ivy/libraries.properties?rev=1873947&r1=1873946&r2=1873947&view=diff ==
svn commit: r1876880 - in /pig/trunk: CHANGES.txt src/org/apache/pig/tools/parameters/ParamLoader.jj src/org/apache/pig/tools/parameters/PigFileParser.jj
Author: szita Date: Thu Apr 23 11:25:37 2020 New Revision: 1876880 URL: http://svn.apache.org/viewvc?rev=1876880&view=rev Log: PIG-5362: Parameter substitution of shell cmd results doesn't handle backslash addendum (szita) Modified: pig/trunk/CHANGES.txt pig/trunk/src/org/apache/pig/tools/parameters/ParamLoader.jj pig/trunk/src/org/apache/pig/tools/parameters/PigFileParser.jj Modified: pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1876880&r1=1876879&r2=1876880&view=diff == --- pig/trunk/CHANGES.txt (original) +++ pig/trunk/CHANGES.txt Thu Apr 23 11:25:37 2020 @@ -100,6 +100,8 @@ OPTIMIZATIONS BUG FIXES +PIG-5362: Parameter substitution of shell cmd results doesn't handle backslash addendum (szita) + PIG-5395: Pig build is failing due to maven repo access point change (szita) PIG-5375: NullPointerException for multi-level self unions with Tez UnionOptimizer (knoguchi) Modified: pig/trunk/src/org/apache/pig/tools/parameters/ParamLoader.jj URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/tools/parameters/ParamLoader.jj?rev=1876880&r1=1876879&r2=1876880&view=diff == --- pig/trunk/src/org/apache/pig/tools/parameters/ParamLoader.jj (original) +++ pig/trunk/src/org/apache/pig/tools/parameters/ParamLoader.jj Thu Apr 23 11:25:37 2020 @@ -43,16 +43,6 @@ public class ParamLoader { public void setContext(PreprocessorContext pc) { this.pc = pc; } - -private static String unquote(String s) -{ -if (s.charAt(0) == '\'' && s.charAt(s.length()-1) == '\'') -return s.substring(1, s.length()-1); -else if (s.charAt(0) == '"' && s.charAt(s.length()-1) == '"') -return s.substring(1, s.length()-1); -else -return s; -} } PARSER_END(ParamLoader) @@ -81,16 +71,52 @@ TOKEN : | | - -| | +} + +MORE : +{ + : DOUBLE_QUOTE +| + : SINGLE_QUOTE +} + + TOKEN : +{ + { +image.deleteCharAt(image.length()-1); +image.deleteCharAt(0); +matchedToken.image = image.toString(); +} : DEFAULT +} + TOKEN : +{ + { +image.deleteCharAt(image.length()-1); +image.deleteCharAt(0); +matchedToken.image = image.toString(); +} : DEFAULT +} + + MORE : +{ + { image.replace(image.length()-2, image.length(), "\""); } +} + + MORE : +{ + { image.replace(image.length()-2, image.length(), "'"); } } + MORE: +{ +< (~[]) > +} boolean Parse() throws IOException : @@ -117,7 +143,10 @@ boolean Parse() throws IOException : | val= { pc.processShellCmd(id.image , val.image);} | -val= { s = unquote(val.image); pc.processOrdLine(id.image , s); } +val= { pc.processOrdLine(id.image , val.image); } +| +val= { pc.processOrdLine(id.image , val.image); } + ) ) | Modified: pig/trunk/src/org/apache/pig/tools/parameters/PigFileParser.jj URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/tools/parameters/PigFileParser.jj?rev=1876880&r1=1876879&r2=1876880&view=diff == --- pig/trunk/src/org/apache/pig/tools/parameters/PigFileParser.jj (original) +++ pig/trunk/src/org/apache/pig/tools/parameters/PigFileParser.jj Thu Apr 23 11:25:37 2020 @@ -223,25 +223,22 @@ TOKEN_MGR_DECLS : { }: DEFAULT } -TOKEN : +< DEFAULT, IN_DECLARE > TOKEN : { - + : DEFAULT | | -} - -// comments(single line and multi-line) -TOKEN : -{ - +> } -TOKEN: +< DEFAULT, IN_DECLARE >TOKEN: { <#LETTER : ["a"-"z", "A"-"Z"] > | @@ -254,22 +251,31 @@ TOKEN: TOKEN : { - + : IN_DECLARE | - + : IN_DECLARE +| + : IN_REGISTER +| + } - -TOKEN : +< DEFAULT, IN_DECLARE > TOKEN: { - : IN_REGISTER -| )*( | | )*> | - -| - + : DEFAULT +} + +< IN_DECLARE > MORE : +{ + : DOUBLE_QUOTE | + : SINGLE_QUOTE +} + +< DEFAULT, IN_DECLARE > TOKEN: +{ // see others() rule for use of OTHER and NOT_OTHER_CHAR // others() is supposed to match 'everything else'. To ensure that others() // don't swallow other(all the ones above) tokens, it uses two tokens OTHER and NOT_OTHER_CHAR @@ -281,6 +287,39 @@ TOKEN : } +< DOUBLE_QUOTE > TOKEN : +{ + { +image.deleteCharAt(image.length()-1)