Author: gates Date: Thu Aug 19 04:33:33 2010 New Revision: 987014 URL: http://svn.apache.org/viewvc?rev=987014&view=rev Log: PIG-1404: added PigUnit, a framework fo building unit tests of Pig Latin scripts.
Added: hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/pigunit.xml hadoop/pig/trunk/test/data/ hadoop/pig/trunk/test/data/pigunit/ hadoop/pig/trunk/test/data/pigunit/top_queries.pig hadoop/pig/trunk/test/data/pigunit/top_queries_expected_top_3.txt hadoop/pig/trunk/test/data/pigunit/top_queries_input_data.txt hadoop/pig/trunk/test/data/pigunit/top_queries_params.txt hadoop/pig/trunk/test/org/apache/pig/pigunit/ hadoop/pig/trunk/test/org/apache/pig/pigunit/Cluster.java hadoop/pig/trunk/test/org/apache/pig/pigunit/MiniClusterRunner.java hadoop/pig/trunk/test/org/apache/pig/pigunit/PigTest.java hadoop/pig/trunk/test/org/apache/pig/pigunit/pig/ hadoop/pig/trunk/test/org/apache/pig/pigunit/pig/GruntParser.java hadoop/pig/trunk/test/org/apache/pig/pigunit/pig/PigServer.java hadoop/pig/trunk/test/org/apache/pig/test/pigunit/ hadoop/pig/trunk/test/org/apache/pig/test/pigunit/TestPigTest.java hadoop/pig/trunk/test/org/apache/pig/test/pigunit/pig/ hadoop/pig/trunk/test/org/apache/pig/test/pigunit/pig/TestGruntParser.java hadoop/pig/trunk/test/pigunit-tests Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/build.xml hadoop/pig/trunk/ivy.xml hadoop/pig/trunk/ivy/libraries.properties hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/site.xml Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=987014&r1=987013&r2=987014&view=diff ============================================================================== --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Thu Aug 19 04:33:33 2010 @@ -26,6 +26,8 @@ PIG-1249: Safe-guards against misconfigu IMPROVEMENTS +PIG-1404: added PigUnit, a framework fo building unit tests of Pig Latin scripts (romainr via gates) + PIG-1452: to remove hadoop20.jar from lib and use hadoop from the apache maven repo. (rding) Modified: hadoop/pig/trunk/build.xml URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/build.xml?rev=987014&r1=987013&r2=987014&view=diff ============================================================================== --- hadoop/pig/trunk/build.xml (original) +++ hadoop/pig/trunk/build.xml Thu Aug 19 04:33:33 2010 @@ -84,6 +84,10 @@ <property name="test.unit.file" value="${test.src.dir}/unit-tests"/> <property name="test.smoke.file" value="${test.src.dir}/smoke-tests"/> <property name="test.all.file" value="${test.src.dir}/all-tests"/> + <property name="pigunit.jarfile" value="pigunit.jar" /> + <property name="test.pigunit.src.dir" value="${test.src.dir}/org/apache/pig/test/pigunit" /> + <property name="commons-lang.jarfile" value="commons-lang-2.4.jar" /> + <property name="test.pigunit.file" value="${test.src.dir}/pigunit-tests"/> <!-- test configuration, use ${user.home}/build.properties to configure values --> @@ -607,6 +611,7 @@ <exclude name="**/TestOrderBy2.java" /> <exclude name="**/TestPi.java" /> <exclude name="**/nightly/**" /> + <!-- <exclude name="**/pigunit/**" /> --> <exclude name="**/${exclude.testcase}.java" if="exclude.testcase" /> </fileset> </batchtest> @@ -638,6 +643,22 @@ </target> <!-- ================================================================== --> + <!-- Pigunit --> + <!-- ================================================================== --> + + <target depends="compile-test" name="pigunit-jar" description="create the pigunit jar file"> + <echo> *** Creating pigunit.jar ***</echo> + <jar destfile="${pigunit.jarfile}"> + <fileset dir="${test.build.classes}/org/apache/pig/pigunit/"/> + <zipfileset src="${ivy.lib.dir}/${commons-lang.jarfile}" /> + </jar> + </target> + + <target name="test-pigunit" depends="compile-test,jar-withouthadoop, pigunit-jar" description="Run tests that test PigUnit"> + <macro-test-runner test.file="${test.pigunit.file}" /> + </target> + + <!-- ================================================================== --> <!-- D I S T R I B U T I O N --> <!-- ================================================================== --> <target name="package" depends="docs, api-report" description="Create a Pig release"> Modified: hadoop/pig/trunk/ivy.xml URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/ivy.xml?rev=987014&r1=987013&r2=987014&view=diff ============================================================================== --- hadoop/pig/trunk/ivy.xml (original) +++ hadoop/pig/trunk/ivy.xml Thu Aug 19 04:33:33 2010 @@ -84,8 +84,11 @@ <dependency org="org.codehaus.jackson" name="jackson-core-asl" rev="${jackson.version}" conf="compile->master"/> <dependency org="joda-time" name="joda-time" rev="${joda-time.version}" conf="compile->master"/> + <dependency org="commons-lang" name="commons-lang" rev="${commons-lang.version}" + conf="compile->master"/> <dependency org="com.google.guava" name="guava" rev="${guava.version}" conf="compile->master" /> - <dependency org="org.python" name="jython" rev="${jython.version}" conf="compile->master"/> + <dependency org="org.python" name="jython" rev="${jython.version}" conf="compile->master"/> + <!--ATM hbase, hbase-test.jar, hadoop.jar are resolved from the lib dir--> <dependency org="hsqldb" name="hsqldb" rev="${hsqldb.version}" conf="test->default" /> Modified: hadoop/pig/trunk/ivy/libraries.properties URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/ivy/libraries.properties?rev=987014&r1=987013&r2=987014&view=diff ============================================================================== --- hadoop/pig/trunk/ivy/libraries.properties (original) +++ hadoop/pig/trunk/ivy/libraries.properties Thu Aug 19 04:33:33 2010 @@ -21,6 +21,7 @@ commons-beanutils.version=1.7.0 commons-cli.version=1.0 commons-el.version=1.0 commons-logging.version=1.0.3 +commons-lang.version=2.4 checkstyle.version=4.2 ivy.version=2.2.0-rc1 Added: hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/pigunit.xml URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/pigunit.xml?rev=987014&view=auto ============================================================================== --- hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/pigunit.xml (added) +++ hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/pigunit.xml Thu Aug 19 04:33:33 2010 @@ -0,0 +1,271 @@ +<?xml version="1.0" encoding="UTF-8"?> + + <!-- + Copyright 2002-2004 The Apache Software Foundation Licensed under the Apache License, Version + 2.0 (the "License"); you may not use this file except in compliance with the License. You may + obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by + applicable law or agreed to in writing, software distributed under the License is distributed on + an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See + the License for the specific language governing permissions and limitations under the License. + --> + +<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" + "http://forrest.apache.org/dtd/document-v20.dtd"> + +<document> + <header> + <title>PigUnit - Pig script testing simplified.</title> + </header> + <body> + + <section> + <title>Overview</title> + <p>The goal is to provide a simple xUnit framework that enables our Pig scripts to be easily: + </p> + <ol> + <li> + <p>unit tested</p> + </li> + <li> + <p>regression tested</p> + </li> + <li> + <p>quickly prototyped</p> + </li> + </ol> + + <p>No cluster set up is required.</p> + </section> + + <section> + <title>PigUnit Example</title> + <p>Computing top queries, specifying the input data and expected output of the script.</p> + <p>Java test</p> + <source> + @Test + public void testTop3Queries() { + String[] args = { + "n=3", + }; + test = new PigTest("top_queries.pig", args); + + String[] input = { + "yahoo\t10", + "twitter\t7", + "facebook\t10", + "yahoo\t15", + "facebook\t5", + .... + }; + + String[] output = { + "(yahoo,25L)", + "(facebook,15L)", + "(twitter,7L)", + }; + + test.assertOutput("data", input, "queries_limit", output); + } + </source> + <p>top_queries.pig</p> + <source> +data = + LOAD '$input' + AS (query:CHARARRAY, count:INT); + + ... + +queries_sum = + FOREACH queries_group + GENERATE + group AS query, + SUM(queries.count) AS count; + + ... + +queries_limit = LIMIT queries_ordered $n; + +STORE queries_limit INTO '$output'; +</source> + + <p>You just need two jar files in your classpath:</p> + <ol> + <li>pig.jar</li> + <li>pigunit.jar</li> + </ol> + + <p> + Many examples are available in the + <a + href="http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/pigunit/TestPigTest.java" + >PigUnit tests</a> + . + </p> + </section> + + <section> + <title>Cluster</title> + + <p>They are 2 main modes:</p> + <ol> + <li>LOCAL</li> + <li>MAPREDUCE</li> + </ol> + + <section> + <title>LOCAL</title> + <p> + This is using the local mode of Pig. + It will be used by default. + </p> + + <p>It will go fast and use your local file system as a HDFS cluster.</p> + </section> + + + <section> + <title>MAPREDUCE</title> + <p>This is using a real Hadoop cluster. + The cluster selected will be the first specified in + the CLASSPATH (same + way as the HADOOP_CONF_DIR variable works). You + can also choose to have + a test cluster automatically + starting/stopping or you cab reuse an already + running cluster. + </p> + + <section> + <title>On demand cluster</title> + <p> + The default mode is using a local MiniCluster that is started at the very beginning + and + shutdown automatically at the end of the test run. + No setup needed which is really + helpful. The cluster will contain no data each time it is + started, but data can be + copied + to it as shown in the examples. + + You can select this mode by setting the Java property + <code>"pigunit.exectype.minicluster"</code> + to "true". + </p> + <p>It can be set in Java or on the command line:</p> + <ol> + <li> + <code>System.setProperty("pigunit.exectype.cluster", "true");</code> + </li> + <li> + <code>-Dpigunit.exectype.cluste=true</code> + </li> + </ol> + <p> + The + <code>HADOOP_CONF_DIR</code> + path will be + <code>~/pigtest/conf</code> + and it will be required in the CLASSPATH. + The path to the log directory is set by the + Java property + <code>"hadoop.log.dir"</code> + (default is "/tmp/pigunit"). + </p> + </section> + + <section> + <title>Existing cluster</title> + <p> + If + <code>"pigunit.exectype.cluster"</code> + property is set, the first xml configuration of an Hadoop cluster found in the + CLASSPATH + will be used. + + Notice that PigUnit comes with a standalone MiniCluster that + can be started + externally with: + </p> + <source> +java -cp .../pig.jar:.../pigunit.jar org.apache.pig.pigunit.MiniClusterRunner +</source> + <p>This is really useful when doing some prototyping in order to have a test cluster + ready.</p> + </section> + </section> + </section> + + <section> + <title>Building</title> + <p>In order to compile pigunit.jar, go in pig trunk:</p> + <source> +$pig_trunk ant compile-test +$pig_trunk ant +$pig_trunk ant pigunit-jar +</source> + </section> + + <section> + <title>Troubleshooting</title> + <p>Common problems</p> + <section> + <title>CLASSPATH in MAPREDUCE mode</title> + <p>When used in MAPREDUCE mode, do not forget the HADOOP_CONF_DIR of your cluster in + your + CLASSPATH.</p> + <p> + It is + <code>~/pigtest/conf</code> + by default + </p> + <source> +org.apache.pig.backend.executionengine.ExecException: ERROR 4010: Cannot find hadoop configurations in classpath (neither hadoop-site.xml nor core-site.xml was found in the classpath).If you plan to use local mode, please put -x local option in command line + </source> + </section> + + <section> + <title>UDF jars not found</title> + <p>This error means that you are missing some jars in your test environment.</p> + <source> +WARN util.JarManager: Couldn't find the jar for org.apache.pig.piggybank.evaluation.string.LOWER, skip it + </source> + </section> + + <section> + <title>STORING data</title> + <p>Currently pig is dropping all the STORE/DUMP commands but you can tell PigUnit to + keep + them and execute the script.</p> + <source> +test = new PigTest(PIG_SCRIPT, args); +test.unoverride("STORE"); +test.runScript(); +</source> + </section> + + <section> + <title>Cache archive</title> + <p>It works, your test environment will need to have the cache archive options + specified by + Java properties or in an additional XML configuration in its + CLASSPATH.</p> + <p>If you use a local cluster, you will need to set the required environment + variables before + starting it, e.g.</p> + <source>export LD_LIBRARY_PATH=/home/path/to/lib</source> + </section> + </section> + + <section> + <title>Future</title> + <p>Improvement and other components based on PigUnit that could be built later.</p> + <p>We could build on top of PigTest a PigTestCase and PigTestSuite in order to have:</p> + <ol> + <li>notion of workspaces for each test</li> + <li>removing the boiler plate code appearing when there is more than one test + methods</li> + <li>standalone utility that reads test configuration and generates a test report...</li> + </ol> + </section> + </body> +</document> Modified: hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/site.xml URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/site.xml?rev=987014&r1=987013&r2=987014&view=diff ============================================================================== --- hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/site.xml (original) +++ hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/site.xml Thu Aug 19 04:33:33 2010 @@ -47,6 +47,7 @@ See http://forrest.apache.org/docs/linki <plref2 label="Pig Latin 2" href="piglatin_ref2.html" /> <cookbook label="Cookbook" href="cookbook.html" /> <udf label="UDFs" href="udf.html" /> + <udf label="PigUnit" href="pigunit.html" /> </docs> <docs label="Zebra"> Added: hadoop/pig/trunk/test/data/pigunit/top_queries.pig URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/data/pigunit/top_queries.pig?rev=987014&view=auto ============================================================================== --- hadoop/pig/trunk/test/data/pigunit/top_queries.pig (added) +++ hadoop/pig/trunk/test/data/pigunit/top_queries.pig Thu Aug 19 04:33:33 2010 @@ -0,0 +1,27 @@ +--------------------------------------------------------------------- +-- Top N Queries. +--------------------------------------------------------------------- + +data = + LOAD '$input' + AS (query:CHARARRAY, count:INT); + +queries_group = + GROUP data + BY query + PARALLEL $reducers; + +queries_sum = + FOREACH queries_group + GENERATE + group AS query, + SUM(data.count) AS count; + +queries_ordered = + ORDER queries_sum + BY count DESC + PARALLEL $reducers; + +queries_limit = LIMIT queries_ordered $n; + +STORE queries_limit INTO '$output'; Added: hadoop/pig/trunk/test/data/pigunit/top_queries_expected_top_3.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/data/pigunit/top_queries_expected_top_3.txt?rev=987014&view=auto ============================================================================== --- hadoop/pig/trunk/test/data/pigunit/top_queries_expected_top_3.txt (added) +++ hadoop/pig/trunk/test/data/pigunit/top_queries_expected_top_3.txt Thu Aug 19 04:33:33 2010 @@ -0,0 +1,3 @@ +(yahoo,25) +(facebook,15) +(twitter,7) \ No newline at end of file Added: hadoop/pig/trunk/test/data/pigunit/top_queries_input_data.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/data/pigunit/top_queries_input_data.txt?rev=987014&view=auto ============================================================================== --- hadoop/pig/trunk/test/data/pigunit/top_queries_input_data.txt (added) +++ hadoop/pig/trunk/test/data/pigunit/top_queries_input_data.txt Thu Aug 19 04:33:33 2010 @@ -0,0 +1,10 @@ +yahoo 10 +twitter 7 +facebook 10 +yahoo 15 +facebook 5 +a 1 +b 2 +c 3 +d 4 +e 5 \ No newline at end of file Added: hadoop/pig/trunk/test/data/pigunit/top_queries_params.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/data/pigunit/top_queries_params.txt?rev=987014&view=auto ============================================================================== --- hadoop/pig/trunk/test/data/pigunit/top_queries_params.txt (added) +++ hadoop/pig/trunk/test/data/pigunit/top_queries_params.txt Thu Aug 19 04:33:33 2010 @@ -0,0 +1,4 @@ +n=3 +reducers=1 +input=top_queries_input_data.txt +output=top_3_queries Added: hadoop/pig/trunk/test/org/apache/pig/pigunit/Cluster.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/pigunit/Cluster.java?rev=987014&view=auto ============================================================================== --- hadoop/pig/trunk/test/org/apache/pig/pigunit/Cluster.java (added) +++ hadoop/pig/trunk/test/org/apache/pig/pigunit/Cluster.java Thu Aug 19 04:33:33 2010 @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the + * NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and limitations under the License. + */ +package org.apache.pig.pigunit; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.pig.backend.hadoop.datastorage.ConfigurationUtil; +import org.apache.pig.impl.PigContext; +import org.apache.pig.test.Util; + +/** + * Encapsulates all the file system operations. + * + * <p>Mainly used for copying data to the test cluster. + */ +public class Cluster { + private final Configuration configuration; + + public Cluster(PigContext context) { + configuration = ConfigurationUtil.toConfiguration(context.getProperties()); + } + + public boolean exists(Path destination) throws IOException { + FileSystem fs = destination.getFileSystem(configuration); + return fs.exists(destination); + } + + /** + * If file size has changed, or if destination does not exist yet, copy it. + * + * @param local Path to the local file to copy to the cluster. + * @param destination Destination path on the cluster. + * @throws IOException If the copy failed. + */ + public void update(Path local, Path destination) throws IOException { + if (! exists(destination) || ! sameSize(local, destination)) { + copyFromLocalFile(local, destination, true); + } + } + + public void copyFromLocalFile(Path local, Path destination) throws IOException { + copyFromLocalFile(local, destination, true); + } + + public void copyFromLocalFile(Path local, Path destination, boolean overwrite) + throws IOException { + FileSystem fs = local.getFileSystem(configuration); + fs.copyFromLocalFile(false, overwrite, local, destination); + } + + public void copyFromLocalFile(String[] content, String destination) throws IOException { + copyFromLocalFile(content, destination, true); + } + + public void copyFromLocalFile(String[] content, String destination, boolean overwrite) + throws IOException { + Path file = new Path(destination); + FileSystem fs = file.getFileSystem(configuration); + + if (overwrite && fs.exists(file)) { + fs.delete(file, true); + } + + Util.createInputFile(fs, destination, content); + } + + public void copyFromLocalFile(String[][] data) throws IOException { + copyFromLocalFile(data, false); + } + + public void copyFromLocalFile(String[][] data, boolean overwrite) throws IOException { + for (int i = 0; i < data.length; i++) { + copyFromLocalFile(new Path(data[i][0]), new Path(data[i][1]), overwrite); + } + } + + public FileStatus[] listStatus(Path path) throws IOException { + FileSystem fs = path.getFileSystem(configuration); + return fs.listStatus(path); + } + + public boolean delete(Path path) throws IOException { + FileSystem fs = path.getFileSystem(configuration); + return fs.delete(path, true); + } + + private boolean sameSize(Path local, Path destination) throws IOException { + FileSystem fs1 = FileSystem.getLocal(configuration); + FileSystem fs2 = destination.getFileSystem(configuration); + + return fs1.getFileStatus(local).getLen() == fs2.getFileStatus(destination).getLen(); + } +} Added: hadoop/pig/trunk/test/org/apache/pig/pigunit/MiniClusterRunner.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/pigunit/MiniClusterRunner.java?rev=987014&view=auto ============================================================================== --- hadoop/pig/trunk/test/org/apache/pig/pigunit/MiniClusterRunner.java (added) +++ hadoop/pig/trunk/test/org/apache/pig/pigunit/MiniClusterRunner.java Thu Aug 19 04:33:33 2010 @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the + * NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and limitations under the License. + */ +package org.apache.pig.pigunit; + +import org.apache.pig.test.MiniCluster; + + +/** + * Starts an on-demand mini cluster that requires no set up. + * + * <p>It can be useful if you don't want to restart the cluster between each run of test and don't + * want to set up a real cluster. + * + * <p>CLASSPATH needs to contain: pig.jar and piggybank.jar + * <pre> + * export CLASSPATH=/path/pig.jar:/path/piggybank.jar + * java org.apache.pig.pigunit.MiniClusterRunner + * </pre> + * + * <p>Possible improvements + * <ul> + * <li>add a main in MiniCluster</li> + * <li>make MiniCluster configurable (number of maps...)</li> + * <li>make MiniCluster use a default properties for chosing the hadoop conf dir + * (e.g. minicluster.conf.dir) instead of always using + * System.getProperty("user.home"), "pigtest/conf/"</li> + * <li>use CLI option</li> + * <li>make a shell wrapper</li> + * </ul> + */ +public class MiniClusterRunner { + public static void main(String[] args) { + System.setProperty("hadoop.log.dir", "/tmp/pigunit"); + MiniCluster.buildCluster(); + } +} Added: hadoop/pig/trunk/test/org/apache/pig/pigunit/PigTest.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/pigunit/PigTest.java?rev=987014&view=auto ============================================================================== --- hadoop/pig/trunk/test/org/apache/pig/pigunit/PigTest.java (added) +++ hadoop/pig/trunk/test/org/apache/pig/pigunit/PigTest.java Thu Aug 19 04:33:33 2010 @@ -0,0 +1,290 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the + * NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and limitations under the License. + */ +package org.apache.pig.pigunit; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.PrintWriter; +import java.io.StringReader; +import java.io.StringWriter; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.charset.Charset; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; + +import junit.framework.Assert; + +import org.apache.commons.lang.StringUtils; +import org.apache.log4j.Logger; +import org.apache.pig.ExecType; +import org.apache.pig.backend.executionengine.ExecException; +import org.apache.pig.data.DataType; +import org.apache.pig.data.Tuple; +import org.apache.pig.impl.logicalLayer.schema.Schema; +import org.apache.pig.pigunit.pig.PigServer; +import org.apache.pig.test.MiniCluster; +import org.apache.pig.tools.parameters.ParameterSubstitutionPreprocessor; +import org.apache.pig.tools.parameters.ParseException; + +/** + * Pig Unit + * + * <p>Equivalent of xUnit for testing Pig. + * + * <p>Call {...@link PigTest#getCluster()} then construct a test and call an assert method. + * Have a look to the test of this class for more example. + */ +public class PigTest { + /** The text of the Pig script to test with no substitution or change. */ + private final String originalTextPigScript; + /** The list of arguments of the script. */ + private final String[] args; + /** The list of file arguments of the script. */ + private final String[] argFiles; + /** The list of aliases to override in the script. */ + private final Map<String, String> aliasOverrides; + + private static PigServer pig; + private static Cluster cluster; + private static final Logger LOG = Logger.getLogger(PigTest.class); + private static final String EXEC_MINI_CLUSTER = "pigunit.exectype.minicluster"; + private static final String EXEC_CLUSTER = "pigunit.exectype.cluster"; + + /** + * Initializes the Pig test. + * + * @param args The list of arguments of the script. + * @param argFiles The list of file arguments of the script. + * @param pigTextScript The text of the Pig script to test with no substitution or change. + */ + @SuppressWarnings("serial") + PigTest(String[] args, String[] argFiles, String pigTextScript) { + this.originalTextPigScript = pigTextScript; + this.args = args; + this.argFiles = argFiles; + this.aliasOverrides = new HashMap<String, String>() {{ + put("STORE", ""); + put("DUMP", ""); + }}; + } + + public PigTest(String scriptPath) throws IOException { + this(null, null, readFile(scriptPath)); + } + + public PigTest(String[] script) { + this(null, null, StringUtils.join(script, "\n")); + } + + public PigTest(String scriptPath, String[] args) throws IOException { + this(args, null, readFile(scriptPath)); + } + + public PigTest(String[] script, String[] args) { + this(args, null, StringUtils.join(script, "\n")); + } + + public PigTest(String[] script, String[] args, String[] argsFile) { + this(args, argsFile, StringUtils.join(script, "\n")); + } + + public PigTest(String scriptPath, String[] args, String[] argFiles) throws IOException { + this(args, argFiles, readFile(scriptPath)); + } + + public PigTest(String scriptPath, String[] args, PigServer pig, Cluster cluster) + throws IOException { + this(args, null, readFile(scriptPath)); + PigTest.pig = pig; + PigTest.cluster = cluster; + } + + /** + * Connects and starts if needed the PigServer. + * + * @return The cluster where input files can be copied. + * @throws ExecException If the PigServer can't be started. + */ + public static Cluster getCluster() throws ExecException { + if (cluster == null) { + LOG.info("Using mini cluster mode"); + if (System.getProperties().containsKey(EXEC_MINI_CLUSTER)) { + if (! System.getProperties().containsKey("hadoop.log.dir")) { + System.setProperty("hadoop.log.dir", "/tmp/pigunit"); + } + MiniCluster.buildCluster(); + pig = new PigServer(ExecType.MAPREDUCE); + } else if (System.getProperties().containsKey(EXEC_CLUSTER)) { + LOG.info("Using cluster mode"); + pig = new PigServer(ExecType.MAPREDUCE); + } else { + LOG.info("Using default local mode"); + pig = new PigServer(ExecType.LOCAL); + } + + cluster = new Cluster(pig.getPigContext()); + } + + return cluster; + } + + /** + * Registers a pig scripts with its variables substituted. + * + * @throws IOException If a temp file containing the pig script could not be created. + * @throws ParseException The pig script could not have all its variables substituted. + */ + protected void registerScript() throws IOException, ParseException { + BufferedReader pigIStream = new BufferedReader(new StringReader(this.originalTextPigScript)); + StringWriter pigOStream = new StringWriter(); + + ParameterSubstitutionPreprocessor ps = new ParameterSubstitutionPreprocessor(50); + ps.genSubstitutedFile(pigIStream, pigOStream, args, argFiles); + + String substitutedPig = pigOStream.toString(); + System.out.println(substitutedPig); + + File f = File.createTempFile("tmp", "pigunit"); + PrintWriter pw = new PrintWriter(f); + pw.println(substitutedPig); + pw.close(); + + String pigSubstitutedFile = f.getCanonicalPath(); + pig.registerScript(pigSubstitutedFile, aliasOverrides); + } + + /** + * Executes the Pig script with its current overrides. + * + * @throws IOException If a temp file containing the pig script could not be created. + * @throws ParseException The pig script could not have all its variables substituted. + */ + public void runScript() throws IOException, ParseException { + registerScript(); + } + + /** + * Gets an iterator on the content of one alias of the script. + * + * <p>For now use a giant String in order to display all the differences in one time. It might not + * work with giant expected output. + * @throws ParseException If the Pig script could not be parsed. + * @throws IOException If the Pig script could not be executed correctly. + */ + public Iterator<Tuple> getAlias(String alias) throws IOException, ParseException { + registerScript(); + return pig.openIterator(alias); + } + + /** + * Gets an iterator on the content of the latest STORE alias of the script. + * + * @throws ParseException If the Pig script could not be parsed. + * @throws IOException If the Pig script could not be executed correctly. + */ + public Iterator<Tuple> getAlias() throws IOException, ParseException { + registerScript(); + String alias = aliasOverrides.get("LAST_STORE_ALIAS"); + + return getAlias(alias); + } + + /** + * Replaces the query of an aliases by another query. + * + * <p>For example: + * + * <pre> + * B = FILTER A BY count > 5; + * overridden with: + * <B, B = FILTER A BY name == 'Pig';> + * becomes + * B = FILTER A BY name == 'Pig'; + * </pre> + * + * @param alias The alias to override. + * @param query The new value of the alias. + */ + public void override(String alias, String query) { + aliasOverrides.put(alias, query); + } + + public void unoverride(String alias) { + aliasOverrides.remove(alias); + } + + public void assertOutput(String[] expected) throws IOException, ParseException { + registerScript(); + String alias = aliasOverrides.get("LAST_STORE_ALIAS"); + + assertEquals(StringUtils.join(expected, "\n"), StringUtils.join(getAlias(alias), "\n")); + } + + public void assertOutput(String alias, String[] expected) throws IOException, ParseException { + registerScript(); + + assertEquals(StringUtils.join(expected, "\n"), StringUtils.join(getAlias(alias), "\n")); + } + + public void assertOutput(File expected) throws IOException, ParseException { + registerScript(); + String alias = aliasOverrides.get("LAST_STORE_ALIAS"); + + assertEquals(readFile(expected), StringUtils.join(getAlias(alias), "\n")); + } + + public void assertOutput(String alias, File expected) throws IOException, ParseException { + registerScript(); + + assertEquals(readFile(expected), StringUtils.join(getAlias(alias), "\n")); + } + + public void assertOutput(String aliasInput, String[] input, String alias, String[] expected) + throws IOException, ParseException { + registerScript(); + + StringBuilder sb = new StringBuilder(); + Schema.stringifySchema(sb, pig.dumpSchema(aliasInput), DataType.TUPLE) ; + + final String destination = "pigunit-input-overriden.txt"; + cluster.copyFromLocalFile(input, destination, true); + override(aliasInput, + String.format("%s = LOAD '%s' AS %s;", aliasInput, destination, sb.toString())); + + assertOutput(alias, expected); + } + + protected void assertEquals(String expected, String current) { + Assert.assertEquals(expected, current); + } + + private static String readFile(String path) throws IOException { + return readFile(new File(path)); + } + + private static String readFile(File file) throws IOException { + FileInputStream stream = new FileInputStream(file); + try { + FileChannel fc = stream.getChannel(); + MappedByteBuffer bb = fc.map(FileChannel.MapMode.READ_ONLY, 0, fc.size()); + return Charset.defaultCharset().decode(bb).toString(); + } + finally { + stream.close(); + } + } +} Added: hadoop/pig/trunk/test/org/apache/pig/pigunit/pig/GruntParser.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/pigunit/pig/GruntParser.java?rev=987014&view=auto ============================================================================== --- hadoop/pig/trunk/test/org/apache/pig/pigunit/pig/GruntParser.java (added) +++ hadoop/pig/trunk/test/org/apache/pig/pigunit/pig/GruntParser.java Thu Aug 19 04:33:33 2010 @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the + * NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and limitations under the License. + */ +package org.apache.pig.pigunit.pig; + +import java.io.IOException; +import java.io.Reader; +import java.util.HashMap; +import java.util.Map; +import java.util.Map.Entry; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + + +/** + * Slightly modified GruntParser that accepts a list of aliases to override. + * + * <p>This is a way to replace a pig query by another query. + * + * <p>For example, if we have this map of overrides: Map<alias,query> + * <ul> + * <li><A, A = LOAD '/path'> --> replace the alias A by A = LOAD '/path'</li> + * <li><DUMP, > --> remove the DUMP queries</li> + * </ul> + * + * <p>It might be possible to do the same thing in a less hacky way. + * e.g. pig.registerQuery replace the query of a certain alias... + */ +public class GruntParser extends org.apache.pig.tools.grunt.GruntParser { + /** A mapping <alias,query> to apply to the pig script. */ + private final Map<String, String> aliasOverride; + + /** + * Initializes the Pig parser with its list of aliases to override. + * + * @param stream The Pig script stream. + * @param aliasOverride The list of aliases to override in the Pig script. + */ + public GruntParser(Reader stream, Map<String, String> aliasOverride) { + super(stream); + this.aliasOverride = aliasOverride; + } + + /** + * Pig relations that have been blanked are dropped. + */ + @Override + protected void processPig(String cmd) throws IOException { + String command = override(cmd); + + if (! command.equals("")) { + super.processPig(command); + } + } + + /** + * Overrides the relations of the pig script that we want to change. + * + * @param query The current pig query processed by the parser. + * @return The same query, or a modified query, or blank. + */ + public String override(String query) { + Map<String, String> metaData = new HashMap<String, String>(); + + for (Entry<String, String> alias : aliasOverride.entrySet()) { + saveLastStoreAlias(query, metaData); + + if (query.toLowerCase().startsWith(alias.getKey().toLowerCase() + " ")) { + System.out.println( + String.format("%s\n--> %s", query, alias.getValue() == "" ? "none" : alias.getValue())); + query = alias.getValue(); + } + } + + aliasOverride.putAll(metaData); + + return query; + } + + /** + * Saves the name of the alias of the last store. + * + * <p>Maybe better to replace it by PigServer.getPigContext().getLastAlias(). + */ + void saveLastStoreAlias(String cmd, Map<String, String> metaData) { + if (cmd.toUpperCase().startsWith("STORE")) { + Pattern outputFile = Pattern.compile("STORE +([^']+) INTO.*", Pattern.CASE_INSENSITIVE); + Matcher matcher = outputFile.matcher(cmd); + if (matcher.matches()) { + metaData.put("LAST_STORE_ALIAS", matcher.group(1)); + } + } + } +} Added: hadoop/pig/trunk/test/org/apache/pig/pigunit/pig/PigServer.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/pigunit/pig/PigServer.java?rev=987014&view=auto ============================================================================== --- hadoop/pig/trunk/test/org/apache/pig/pigunit/pig/PigServer.java (added) +++ hadoop/pig/trunk/test/org/apache/pig/pigunit/pig/PigServer.java Thu Aug 19 04:33:33 2010 @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the + * NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and limitations under the License. + */ +package org.apache.pig.pigunit.pig; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; +import java.util.Map; +import java.util.Properties; + +import org.apache.pig.ExecType; +import org.apache.pig.backend.executionengine.ExecException; + +/** + * Slightly modified PigServer that accepts a list of Pig aliases to override. + * + * <p>The list is given to the GruntParser. + */ +public class PigServer extends org.apache.pig.PigServer { + + public PigServer(ExecType execType, Properties properties) throws ExecException { + super(execType, properties); + } + + public PigServer(ExecType execType) throws ExecException { + super(execType); + } + + /** + * Parses and registers the pig script. + * + * @param fileName The Pig script file. + * @param aliasOverride The list of aliases to override in the Pig script. + * @throws IOException If the Pig script can't be parsed correctly. + */ + public void registerScript(String fileName, Map<String, String> aliasOverride) + throws IOException { + try { + GruntParser grunt = new GruntParser(new FileReader(new File(fileName)), aliasOverride); + grunt.setInteractive(false); + grunt.setParams(this); + grunt.parseStopOnError(true); + } catch (FileNotFoundException e) { + e.printStackTrace(); + throw new IOException(e.getCause()); + } catch (org.apache.pig.tools.pigscript.parser.ParseException e) { + e.printStackTrace(); + throw new IOException(e.getCause()); + } + } +} Added: hadoop/pig/trunk/test/org/apache/pig/test/pigunit/TestPigTest.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/pigunit/TestPigTest.java?rev=987014&view=auto ============================================================================== --- hadoop/pig/trunk/test/org/apache/pig/test/pigunit/TestPigTest.java (added) +++ hadoop/pig/trunk/test/org/apache/pig/test/pigunit/TestPigTest.java Thu Aug 19 04:33:33 2010 @@ -0,0 +1,301 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the + * NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and limitations under the License. + */ +package org.apache.pig.test.pigunit; + +import java.io.File; +import java.io.IOException; + +import junit.framework.TestCase; + +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.fs.Path; +import org.apache.pig.pigunit.Cluster; +import org.apache.pig.pigunit.PigTest; +import org.apache.pig.pigunit.pig.PigServer; +import org.apache.pig.tools.parameters.ParseException; +import org.junit.BeforeClass; +import org.junit.Ignore; +import org.junit.Test; + +/** + * <p>Various examples about how to use PigUnit. + * + * <p>Requires in CLASSPATH: + * <ul> + * <li>pig.jar</li> + * <li>pigunit.jar</li> + * <li>hadoop_conf_dir to current/future cluster if not using LOCAL mode</li> + * </ul> + */ +public class TestPigTest { + private PigTest test; + private static Cluster cluster; + private static final String PIG_SCRIPT = "test/data/pigunit/top_queries.pig"; + + @BeforeClass + public static void setUpOnce() throws IOException { + cluster = PigTest.getCluster(); + + cluster.update( + new Path("test/data/pigunit/top_queries_input_data.txt"), + new Path("top_queries_input_data.txt")); + } + + @Test + public void testNtoN() throws ParseException, IOException { + String[] args = { + "n=3", + "reducers=1", + "input=top_queries_input_data.txt", + "output=top_3_queries", + }; + test = new PigTest(PIG_SCRIPT, args); + + String[] output = { + "(yahoo,25)", + "(facebook,15)", + "(twitter,7)", + }; + + test.assertOutput("queries_limit", output); + } + + @Test + public void testImplicitNtoN() throws ParseException, IOException { + String[] args = { + "n=3", + "reducers=1", + "input=top_queries_input_data.txt", + "output=top_3_queries", + }; + test = new PigTest(PIG_SCRIPT, args); + + String[] output = { + "(yahoo,25)", + "(facebook,15)", + "(twitter,7)", + }; + + test.assertOutput(output); + } + + @Test + public void testTextInput() throws ParseException, IOException { + String[] args = { + "n=3", + "reducers=1", + "input=top_queries_input_data.txt", + "output=top_3_queries", + }; + test = new PigTest(PIG_SCRIPT, args); + + String[] input = { + "yahoo\t10", + "twitter\t7", + "facebook\t10", + "yahoo\t15", + "facebook\t5", + "a\t1", + "b\t2", + "c\t3", + "d\t4", + "e\t5", + }; + + String[] output = { + "(yahoo,25)", + "(facebook,15)", + "(twitter,7)", + }; + + test.assertOutput("data", input, "queries_limit", output); + } + + @Test + public void testSubset() throws ParseException, IOException { + String[] args = { + "n=3", + "reducers=1", + "input=top_queries_input_data.txt", + "output=top_3_queries", + }; + test = new PigTest(PIG_SCRIPT, args); + + String[] input = { + "yahoo\t10", + "twitter\t7", + "facebook\t10", + "yahoo\t15", + "facebook\t5", + "a\t1", + "b\t2", + "c\t3", + "d\t4", + "e\t5", + }; + + String[] output = { + "(yahoo,25)", + "(facebook,15)", + "(twitter,7)", + }; + + test.assertOutput("data", input, "queries_limit", output); + } + + @Test + public void testOverride() throws ParseException, IOException { + String[] args = { + "n=3", + "reducers=1", + "input=top_queries_input_data.txt", + "output=top_3_queries", + }; + test = new PigTest(PIG_SCRIPT, args); + + test.override("queries_limit", "queries_limit = LIMIT queries_ordered 2;"); + + String[] output = { + "(yahoo,25)", + "(facebook,15)", + }; + + test.assertOutput(output); + } + + @Test + public void testInlinePigScript() throws ParseException, IOException { + String[] script = { + "data = LOAD 'top_queries_input_data.txt' AS (query:CHARARRAY, count:INT);", + "queries_group = GROUP data BY query PARALLEL 1;", + "queries_sum = FOREACH queries_group GENERATE group AS query, SUM(data.count) AS count;", + "queries_ordered = ORDER queries_sum BY count DESC PARALLEL 1;", + "queries_limit = LIMIT queries_ordered 3;", + "STORE queries_limit INTO 'top_3_queries';", + }; + + test = new PigTest(script); + + String[] output = { + "(yahoo,25)", + "(facebook,15)", + "(twitter,7)", + }; + + test.assertOutput(output); + } + + @Test + public void testFileOutput() throws ParseException, IOException { + String[] args = { + "n=3", + "reducers=1", + "input=top_queries_input_data.txt", + "output=top_3_queries", + }; + test = new PigTest(PIG_SCRIPT, args); + + test.assertOutput(new File("test/data/pigunit/top_queries_expected_top_3.txt")); + } + + @Test + public void testArgFiles() throws ParseException, IOException { + String[] argsFile = { + "test/data/pigunit/top_queries_params.txt" + }; + + test = new PigTest(PIG_SCRIPT, null, argsFile); + + test.assertOutput(new File("test/data/pigunit/top_queries_expected_top_3.txt")); + } + + @Test + public void testGetLastAlias() throws ParseException, IOException { + String[] script = { + "data = LOAD 'top_queries_input_data.txt' AS (query:CHARARRAY, count:INT);", + "queries_group = GROUP data BY query PARALLEL 1;", + "queries_sum = FOREACH queries_group GENERATE group AS query, SUM(data.count) AS count;", + "queries_ordered = ORDER queries_sum BY count DESC PARALLEL 1;", + "queries_limit = LIMIT queries_ordered 3;", + "STORE queries_limit INTO 'top_3_queries';", + }; + + test = new PigTest(script); + + String expected = + "(yahoo,25)\n" + + "(facebook,15)\n" + + "(twitter,7)"; + + TestCase.assertEquals(expected, StringUtils.join(test.getAlias("queries_limit"), "\n")); + } + + @Test + public void testWithUdf() throws ParseException, IOException { + String[] script = { + // "REGISTER myIfNeeded.jar;", + "DEFINE TOKENIZE TOKENIZE();", + "data = LOAD 'top_queries_input_data.txt' AS (query:CHARARRAY, count:INT);", + "queries = FOREACH data GENERATE query, TOKENIZE(query) AS query_tokens;", + "queries_ordered = ORDER queries BY query DESC PARALLEL 1;", + "queries_limit = LIMIT queries_ordered 3;", + "STORE queries_limit INTO 'top_3_queries';", + }; + + test = new PigTest(script); + + String[] output = { + "(yahoo,{(yahoo)})", + "(yahoo,{(yahoo)})", + "(twitter,{(twitter)})", + }; + + test.assertOutput(output); + } + + @Test + public void testStore() throws ParseException, IOException { + String[] args = { + "n=3", + "reducers=1", + "input=top_queries_input_data.txt", + "output=top_3_queries", + }; + test = new PigTest(PIG_SCRIPT, args); + + // By default PigUnit removes all the STORE and DUMP + test.unoverride("STORE"); + + test.runScript(); + + TestCase.assertTrue(cluster.delete(new Path("top_3_queries"))); + } + + @Ignore("Not ready yet") + @Test + public void testWithMock() throws ParseException, IOException { + String[] args = { + "n=3", + "reducers=1", + "input=top_queries_input_data.txt", + "output=top_3_queries", + }; + + PigServer mockServer = null; + Cluster mockCluster = null; + + test = new PigTest(PIG_SCRIPT, args, mockServer, mockCluster); + + test.assertOutput(new File("data/top_queries_expected_top_3.txt")); + } +} Added: hadoop/pig/trunk/test/org/apache/pig/test/pigunit/pig/TestGruntParser.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/pigunit/pig/TestGruntParser.java?rev=987014&view=auto ============================================================================== --- hadoop/pig/trunk/test/org/apache/pig/test/pigunit/pig/TestGruntParser.java (added) +++ hadoop/pig/trunk/test/org/apache/pig/test/pigunit/pig/TestGruntParser.java Thu Aug 19 04:33:33 2010 @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the + * NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and limitations under the License. + */ +package org.apache.pig.test.pigunit.pig; + +import java.io.IOException; +import java.io.StringReader; +import java.util.HashMap; +import java.util.Map; + +import junit.framework.Assert; + +import org.apache.pig.pigunit.pig.GruntParser; +import org.junit.Before; +import org.junit.Test; + + +public class TestGruntParser { + private GruntParser parser; + private Map<String, String> override; + + @SuppressWarnings("serial") + @Before + public void setUp() { + override = new HashMap<String, String>() {{ + put("STORE", ""); + put("DUMP", ""); + }}; + parser = new GruntParser(new StringReader(""), override); + } + + @Test + public void testRemoveStores() throws IOException { + Assert.assertEquals("", parser.override("STORE output INTO '/path';")); + + override.remove("STORE"); + Assert.assertEquals( + "STORE output INTO '/path';", parser.override("STORE output INTO '/path';")); + } + + @Test + public void testRemoveDumps() throws IOException { + Assert.assertEquals("", parser.override("DUMP output;")); + + override.remove("DUMP"); + Assert.assertEquals("DUMP output;", parser.override("DUMP output;")); + } + + @Test + public void testReplaceLoad() throws IOException { + override.put("A", "A = LOAD 'file';"); + Assert.assertEquals( + "A = LOAD 'file';", + parser.override("A = LOAD 'input.txt' AS (query:CHARARRAY);")); + } + + @Test + public void testGetStoreAlias() throws IOException { + override.remove("STORE"); + parser.override("STORE output INTO '/path'"); + Assert.assertEquals("output", override.get("LAST_STORE_ALIAS")); + } +} Added: hadoop/pig/trunk/test/pigunit-tests URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/pigunit-tests?rev=987014&view=auto ============================================================================== --- hadoop/pig/trunk/test/pigunit-tests (added) +++ hadoop/pig/trunk/test/pigunit-tests Thu Aug 19 04:33:33 2010 @@ -0,0 +1,2 @@ +**/TestPigTest.java +**/TestGruntParser.java