svn commit: r904607 - in /hadoop/pig/branches/load-store-redesign/src/org/apache/pig: StoreFunc.java backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java impl/logicalLayer/LOStore.jav
Author: pradeepkth Date: Fri Jan 29 19:40:17 2010 New Revision: 904607 URL: http://svn.apache.org/viewvc?rev=904607view=rev Log: PIG-1090: additional commit to enable storeFunc interacting with metadata systems (daijy via pradeepkth) Modified: hadoop/pig/branches/load-store-redesign/src/org/apache/pig/StoreFunc.java hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/logicalLayer/LOStore.java hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt Modified: hadoop/pig/branches/load-store-redesign/src/org/apache/pig/StoreFunc.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/StoreFunc.java?rev=904607r1=904606r2=904607view=diff == --- hadoop/pig/branches/load-store-redesign/src/org/apache/pig/StoreFunc.java (original) +++ hadoop/pig/branches/load-store-redesign/src/org/apache/pig/StoreFunc.java Fri Jan 29 19:40:17 2010 @@ -73,7 +73,12 @@ * bstore A into 'bla'/b * then 'bla' is the location. This location should be either a file name * or a URI. If it does not have a URI scheme Pig will assume it is a - * filename. This will be called multiple times during execution on the backend. + * filename. + * This method will be called in the frontend and backend multiple times. Implementations + * should bear in mind that this method is called multiple times and should + * ensure there are no inconsistent side effects due to the multiple calls. + * + * @param location Location indicated in store statement. * @param job The {...@link Job} object * @throws IOException if the location is not valid. Modified: hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java?rev=904607r1=904606r2=904607view=diff == --- hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java (original) +++ hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java Fri Jan 29 19:40:17 2010 @@ -446,6 +446,7 @@ StoreFunc sFunc = st.getStoreFunc(); if (st.getSchema()!=null) sFunc.checkSchema(new ResourceSchema(st.getSchema(), st.getSortInfo())); +sFunc.setStoreLocation(st.getSFile().getFileName(), nwJob); } for (POStore st: reduceStores) { @@ -453,6 +454,7 @@ StoreFunc sFunc = st.getStoreFunc(); if (st.getSchema()!=null) sFunc.checkSchema(new ResourceSchema(st.getSchema(), st.getSortInfo())); +sFunc.setStoreLocation(st.getSFile().getFileName(), nwJob); } // the OutputFormat we report to Hadoop is always PigOutputFormat Modified: hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/logicalLayer/LOStore.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/logicalLayer/LOStore.java?rev=904607r1=904606r2=904607view=diff == --- hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/logicalLayer/LOStore.java (original) +++ hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/logicalLayer/LOStore.java Fri Jan 29 19:40:17 2010 @@ -24,6 +24,7 @@ import java.util.List; import java.util.Map; +import org.apache.pig.FuncSpec; import org.apache.pig.StoreFunc; import org.apache.pig.impl.PigContext; import org.apache.pig.impl.io.FileSpec; @@ -74,7 +75,7 @@ try { mStoreFunc = (StoreFunc) PigContext.instantiateFuncFromSpec(outputFileSpec.getFuncSpec()); this.mAlias = alias; - this.signature = constructSignature(mAlias, mOutputFile.getFileName(), mOutputFile.getFuncSpec().getCtorArgs()); + this.signature = constructSignature(mAlias, outputFileSpec.getFileName(), mOutputFile.getFuncSpec()); mStoreFunc.setStoreFuncUDFContextSignature(this.signature); } catch (Exception e) { IOException ioe = new IOException(e.getMessage()); @@ -83,16 +84,8 @@ } } -private String constructSignature(String alias, String filename, String[] args) { -String s = alias+_+filename+_; -if (args!=null) { -for (int
svn commit: r904700 - in /hadoop/pig/trunk: CHANGES.txt src/org/apache/pig/backend/executionengine/ExecJob.java src/org/apache/pig/backend/hadoop/executionengine/HJob.java
Author: hashutosh Date: Sat Jan 30 00:54:42 2010 New Revision: 904700 URL: http://svn.apache.org/viewvc?rev=904700view=rev Log: PIG-977: exit status does not account for JOB_STATUS.TERMINATED (ashutoshc) Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/backend/executionengine/ExecJob.java hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/HJob.java Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=904700r1=904699r2=904700view=diff == --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Sat Jan 30 00:54:42 2010 @@ -24,6 +24,8 @@ IMPROVEMENTS +PIG-977: exit status does not account for JOB_STATUS.TERMINATED (ashutoshc) + PIG-1192: Pig 0.6 Docs fixes (chandec via olgan) PIG-1177: Pig 0.6 Docs - Zebra docs (chandec via olgan) Modified: hadoop/pig/trunk/src/org/apache/pig/backend/executionengine/ExecJob.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/executionengine/ExecJob.java?rev=904700r1=904699r2=904700view=diff == --- hadoop/pig/trunk/src/org/apache/pig/backend/executionengine/ExecJob.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/backend/executionengine/ExecJob.java Sat Jan 30 00:54:42 2010 @@ -20,7 +20,6 @@ import java.util.Iterator; import java.util.Properties; -import java.util.Map; import java.io.OutputStream; import org.apache.pig.data.Tuple; @@ -35,12 +34,8 @@ public interface ExecJob { public enum JOB_STATUS { -QUEUED, -RUNNING, -SUSPENDED, -TERMINATED, FAILED, -COMPLETED, +COMPLETED } public static final String PROGRESS_KEY = job.progress; @@ -75,7 +70,7 @@ * * @return configuration information for the execution engine */ -public Properties getContiguration(); +public Properties getConfiguration(); /** * Can be information about the state (not submitted, e.g. the execute method Modified: hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/HJob.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/HJob.java?rev=904700r1=904699r2=904700view=diff == --- hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/HJob.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/HJob.java Sat Jan 30 00:54:42 2010 @@ -21,7 +21,6 @@ import java.io.OutputStream; import java.io.InputStream; import java.util.Iterator; -import java.util.Map; import java.util.Properties; import org.apache.commons.logging.Log; @@ -138,7 +137,7 @@ }; } -public Properties getContiguration() { +public Properties getConfiguration() { Properties props = new Properties(); return props; }
svn commit: r904710 - in /hadoop/pig/branches/branch-0.6: CHANGES.txt src/org/apache/pig/impl/logicalLayer/LOForEach.java test/org/apache/pig/test/TestPruneColumn.java
Author: daijy Date: Sat Jan 30 02:19:27 2010 New Revision: 904710 URL: http://svn.apache.org/viewvc?rev=904710view=rev Log: PIG-1210: fieldsToRead send the same fields more than once in some cases Modified: hadoop/pig/branches/branch-0.6/CHANGES.txt hadoop/pig/branches/branch-0.6/src/org/apache/pig/impl/logicalLayer/LOForEach.java hadoop/pig/branches/branch-0.6/test/org/apache/pig/test/TestPruneColumn.java Modified: hadoop/pig/branches/branch-0.6/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/CHANGES.txt?rev=904710r1=904709r2=904710view=diff == --- hadoop/pig/branches/branch-0.6/CHANGES.txt (original) +++ hadoop/pig/branches/branch-0.6/CHANGES.txt Sat Jan 30 02:19:27 2010 @@ -281,6 +281,8 @@ PIG-1195: POSort should take care of sort order (daijy) +PIG-1210: fieldsToRead send the same fields more than once in some cases (daijy) + Release 0.5.0 INCOMPATIBLE CHANGES Modified: hadoop/pig/branches/branch-0.6/src/org/apache/pig/impl/logicalLayer/LOForEach.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/src/org/apache/pig/impl/logicalLayer/LOForEach.java?rev=904710r1=904709r2=904710view=diff == --- hadoop/pig/branches/branch-0.6/src/org/apache/pig/impl/logicalLayer/LOForEach.java (original) +++ hadoop/pig/branches/branch-0.6/src/org/apache/pig/impl/logicalLayer/LOForEach.java Sat Jan 30 02:19:27 2010 @@ -866,7 +866,8 @@ ArrayListPairInteger, Integer inputList = new ArrayListPairInteger, Integer(); for (LOProject project : projectFinder.getProjectSet()) { for (int inputColumn : project.getProjection()) { -inputList.add(new PairInteger, Integer(0, inputColumn)); +if (!inputList.contains(new PairInteger, Integer(0, inputColumn))) +inputList.add(new PairInteger, Integer(0, inputColumn)); } } if (inputList.size()==0) Modified: hadoop/pig/branches/branch-0.6/test/org/apache/pig/test/TestPruneColumn.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/test/org/apache/pig/test/TestPruneColumn.java?rev=904710r1=904709r2=904710view=diff == --- hadoop/pig/branches/branch-0.6/test/org/apache/pig/test/TestPruneColumn.java (original) +++ hadoop/pig/branches/branch-0.6/test/org/apache/pig/test/TestPruneColumn.java Sat Jan 30 02:19:27 2010 @@ -33,8 +33,12 @@ import org.apache.log4j.SimpleLayout; import org.apache.pig.ExecType; import org.apache.pig.FilterFunc; +import org.apache.pig.LoadFunc; import org.apache.pig.PigServer; +import org.apache.pig.LoadFunc.RequiredFieldList; +import org.apache.pig.builtin.PigStorage; import org.apache.pig.data.Tuple; +import org.apache.pig.impl.logicalLayer.FrontendException; import org.apache.pig.impl.logicalLayer.optimizer.PruneColumns; import org.junit.Before; import org.junit.Test; @@ -55,8 +59,24 @@ File tmpFile9; File tmpFile10; File logFile; -Logger logger; +static public class PigStorageWithTrace extends PigStorage { + +/** + * @param delimiter + */ +public PigStorageWithTrace() { +super(); +} +@Override +public LoadFunc.RequiredFieldResponse fieldsToRead(RequiredFieldList requiredFieldList) throws FrontendException { +LoadFunc.RequiredFieldResponse response = super.fieldsToRead(requiredFieldList); +Logger logger = Logger.getLogger(this.getClass()); +logger.info(requiredFieldList); +return response; +} + +} private static final String simpleEchoStreamingCommand; static { if (System.getProperty(os.name).toUpperCase().startsWith(WINDOWS)) @@ -76,7 +96,7 @@ @Before @Override public void setUp() throws Exception{ -logger = Logger.getLogger(PruneColumns.class); +Logger logger = Logger.getLogger(PruneColumns.class); logger.removeAllAppenders(); logger.setLevel(Level.INFO); SimpleLayout layout = new SimpleLayout(); @@ -84,6 +104,10 @@ FileAppender appender = new FileAppender(layout, logFile.toString(), false, false, 0); logger.addAppender(appender); +Logger pigStorageWithTraceLogger = Logger.getLogger(PigStorageWithTrace.class); +pigStorageWithTraceLogger.setLevel(Level.INFO); +pigStorageWithTraceLogger.addAppender(appender); + pigServer = new PigServer(local); //pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties()); tmpFile1 = File.createTempFile(prune, txt); @@ -1673,4 +1697,27 @@ assertTrue(checkLogFileMessage(new String[]{Columns pruned for A: $0, $2, No map keys pruned for A, No
svn commit: r904712 - in /hadoop/pig/trunk: CHANGES.txt src/org/apache/pig/impl/logicalLayer/LOForEach.java test/org/apache/pig/test/TestPruneColumn.java
Author: daijy Date: Sat Jan 30 02:22:30 2010 New Revision: 904712 URL: http://svn.apache.org/viewvc?rev=904712view=rev Log: PIG-1210: fieldsToRead send the same fields more than once in some cases Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOForEach.java hadoop/pig/trunk/test/org/apache/pig/test/TestPruneColumn.java Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=904712r1=904711r2=904712view=diff == --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Sat Jan 30 02:22:30 2010 @@ -367,6 +367,8 @@ PIG-1195: POSort should take care of sort order (daijy) +PIG-1210: fieldsToRead send the same fields more than once in some cases (daijy) + Release 0.5.0 INCOMPATIBLE CHANGES Modified: hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOForEach.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOForEach.java?rev=904712r1=904711r2=904712view=diff == --- hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOForEach.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOForEach.java Sat Jan 30 02:22:30 2010 @@ -861,7 +861,8 @@ ArrayListPairInteger, Integer inputList = new ArrayListPairInteger, Integer(); for (LOProject project : projectFinder.getProjectSet()) { for (int inputColumn : project.getProjection()) { -inputList.add(new PairInteger, Integer(0, inputColumn)); +if (!inputList.contains(new PairInteger, Integer(0, inputColumn))) +inputList.add(new PairInteger, Integer(0, inputColumn)); } } if (inputList.size()==0) Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestPruneColumn.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestPruneColumn.java?rev=904712r1=904711r2=904712view=diff == --- hadoop/pig/trunk/test/org/apache/pig/test/TestPruneColumn.java (original) +++ hadoop/pig/trunk/test/org/apache/pig/test/TestPruneColumn.java Sat Jan 30 02:22:30 2010 @@ -33,8 +33,12 @@ import org.apache.log4j.SimpleLayout; import org.apache.pig.ExecType; import org.apache.pig.FilterFunc; +import org.apache.pig.LoadFunc; import org.apache.pig.PigServer; +import org.apache.pig.LoadFunc.RequiredFieldList; +import org.apache.pig.builtin.PigStorage; import org.apache.pig.data.Tuple; +import org.apache.pig.impl.logicalLayer.FrontendException; import org.apache.pig.impl.logicalLayer.optimizer.PruneColumns; import org.junit.Before; import org.junit.Test; @@ -55,8 +59,24 @@ File tmpFile9; File tmpFile10; File logFile; -Logger logger; +static public class PigStorageWithTrace extends PigStorage { + +/** + * @param delimiter + */ +public PigStorageWithTrace() { +super(); +} +@Override +public LoadFunc.RequiredFieldResponse fieldsToRead(RequiredFieldList requiredFieldList) throws FrontendException { +LoadFunc.RequiredFieldResponse response = super.fieldsToRead(requiredFieldList); +Logger logger = Logger.getLogger(this.getClass()); +logger.info(requiredFieldList); +return response; +} + +} private static final String simpleEchoStreamingCommand; static { if (System.getProperty(os.name).toUpperCase().startsWith(WINDOWS)) @@ -76,7 +96,7 @@ @Before @Override public void setUp() throws Exception{ -logger = Logger.getLogger(PruneColumns.class); +Logger logger = Logger.getLogger(PruneColumns.class); logger.removeAllAppenders(); logger.setLevel(Level.INFO); SimpleLayout layout = new SimpleLayout(); @@ -84,6 +104,10 @@ FileAppender appender = new FileAppender(layout, logFile.toString(), false, false, 0); logger.addAppender(appender); +Logger pigStorageWithTraceLogger = Logger.getLogger(PigStorageWithTrace.class); +pigStorageWithTraceLogger.setLevel(Level.INFO); +pigStorageWithTraceLogger.addAppender(appender); + pigServer = new PigServer(local); //pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties()); tmpFile1 = File.createTempFile(prune, txt); @@ -1799,4 +1823,27 @@ assertTrue(checkLogFileMessage(new String[]{Columns pruned for A: $1, No map keys pruned for A})); } + +// See PIG-1210 +@Test +public void testFieldsToReadDuplicatedEntry() throws Exception { +pigServer.registerQuery(A = load '+ Util.generateURI(tmpFile1.toString()) + ' using
svn commit: r904713 - in /hadoop/pig/trunk: CHANGES.txt src/org/apache/pig/impl/plan/OperatorPlan.java test/org/apache/pig/test/TestOperatorPlan.java
Author: daijy Date: Sat Jan 30 02:24:57 2010 New Revision: 904713 URL: http://svn.apache.org/viewvc?rev=904713view=rev Log: PIG-1212: LogicalPlan.replaceAndAddSucessors produce wrong result when successors are null Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/impl/plan/OperatorPlan.java hadoop/pig/trunk/test/org/apache/pig/test/TestOperatorPlan.java Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=904713r1=904712r2=904713view=diff == --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Sat Jan 30 02:24:57 2010 @@ -157,6 +157,8 @@ PIG-1189: StoreFunc UDF should ship to the backend automatically without register (daijy) +PIG-1212: LogicalPlan.replaceAndAddSucessors produce wrong result when successors are null (daijy) + Release 0.6.0 - Unreleased INCOMPATIBLE CHANGES Modified: hadoop/pig/trunk/src/org/apache/pig/impl/plan/OperatorPlan.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/plan/OperatorPlan.java?rev=904713r1=904712r2=904713view=diff == --- hadoop/pig/trunk/src/org/apache/pig/impl/plan/OperatorPlan.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/impl/plan/OperatorPlan.java Sat Jan 30 02:24:57 2010 @@ -864,7 +864,9 @@ } } mFromEdges.removeKey(node); - mFromEdges.put(node,newSuccessors); + if (!newSuccessors.isEmpty()) { + mFromEdges.put(node,newSuccessors); + } } // removes entry for predecessor in list of predecessors of node, @@ -885,7 +887,9 @@ } } mToEdges.removeKey(node); - mToEdges.put(node,newPredecessors); + if (!newPredecessors.isEmpty()) { + mToEdges.put(node,newPredecessors); + } } /** Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestOperatorPlan.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestOperatorPlan.java?rev=904713r1=904712r2=904713view=diff == --- hadoop/pig/trunk/test/org/apache/pig/test/TestOperatorPlan.java (original) +++ hadoop/pig/trunk/test/org/apache/pig/test/TestOperatorPlan.java Sat Jan 30 02:24:57 2010 @@ -17,6 +17,8 @@ */ package org.apache.pig.test; +import java.io.ByteArrayOutputStream; +import java.io.PrintStream; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; @@ -2948,5 +2950,38 @@ } +// See PIG-1212 +@Test +public void testPushBefore2() throws Exception { +TPlan plan = new TPlan(); +TOperator[] ops = new TOperator[6]; + +ops[0] = new SingleOperator(Load0); +ops[1] = new SingleOperator(Load1); +ops[2] = new SingleOperator(ForEach0); +ops[3] = new SingleOperator(ForEach1); +ops[4] = new MultiOperator(Join); +ops[5] = new SingleOperator(Filter); + +for (int i=0;i6;i++) + plan.add(ops[i]); + +plan.connect(ops[0], ops[2]); +plan.connect(ops[1], ops[3]); +plan.connect(ops[2], ops[4]); +plan.connect(ops[3], ops[4]); +plan.connect(ops[4], ops[5]); + +try { +plan.pushBefore(ops[4], ops[5], 0); +ByteArrayOutputStream baos = new ByteArrayOutputStream(); +PrintStream ps = new PrintStream(baos); +PlanPrinterTOperator, TPlan planPrinter = new PlanPrinterTOperator, TPlan(ps, plan); +planPrinter.visit(); +assertFalse(baos.toString().equals()); +} catch (PlanException pe) { +} +} + }