Author: hashutosh Date: Fri Jul 2 06:05:22 2010 New Revision: 959865 URL: http://svn.apache.org/viewvc?rev=959865&view=rev Log: PIG-1449: RegExLoader hangs on lines that don't match the regular expression
Modified: hadoop/pig/trunk/contrib/CHANGES.txt hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/RegExLoader.java hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestRegExLoader.java Modified: hadoop/pig/trunk/contrib/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/CHANGES.txt?rev=959865&r1=959864&r2=959865&view=diff ============================================================================== --- hadoop/pig/trunk/contrib/CHANGES.txt (original) +++ hadoop/pig/trunk/contrib/CHANGES.txt Fri Jul 2 06:05:22 2010 @@ -32,6 +32,8 @@ OPTIMIZATIONS BUG FIXES +PIG-1449 RegExLoader hangs on lines that don't match the regular expression +(Christian Hargraves via hashutosh) PIG 0.7.0 Modified: hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/RegExLoader.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/RegExLoader.java?rev=959865&r1=959864&r2=959865&view=diff ============================================================================== --- hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/RegExLoader.java (original) +++ hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/RegExLoader.java Fri Jul 2 06:05:22 2010 @@ -48,21 +48,13 @@ public abstract class RegExLoader extend @Override public Tuple getNext() throws IOException { - if (!in.nextKeyValue()) { - return null; - } - Pattern pattern = getPattern(); Matcher matcher = pattern.matcher(""); TupleFactory mTupleFactory = DefaultTupleFactory.getInstance(); String line; - boolean tryNext = true; - while (tryNext) { - Text val = in.getCurrentValue(); - if (val == null) { - break; - } + while (in.nextKeyValue()) { + Text val = in.getCurrentValue(); line = val.toString(); if (line.length() > 0 && line.charAt(line.length() - 1) == '\r') { line = line.substring(0, line.length() - 1); @@ -70,14 +62,12 @@ public abstract class RegExLoader extend matcher = matcher.reset(line); ArrayList<DataByteArray> list = new ArrayList<DataByteArray>(); if (matcher.find()) { - tryNext=false; for (int i = 1; i <= matcher.groupCount(); i++) { list.add(new DataByteArray(matcher.group(i))); } return mTupleFactory.newTuple(list); } } - return null; } Modified: hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestRegExLoader.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestRegExLoader.java?rev=959865&r1=959864&r2=959865&view=diff ============================================================================== --- hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestRegExLoader.java (original) +++ hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestRegExLoader.java Fri Jul 2 06:05:22 2010 @@ -31,6 +31,8 @@ import org.junit.Test; public class TestRegExLoader extends TestCase { private static String patternString = "(\\w+),(\\w+);(\\w+)"; private final static Pattern pattern = Pattern.compile(patternString); + private static String patternString2 = "(3),(three);(iii)"; + private final static Pattern pattern2 = Pattern.compile(patternString); public static class DummyRegExLoader extends RegExLoader { public DummyRegExLoader() {} @@ -41,6 +43,15 @@ public class TestRegExLoader extends Tes } } + public static class DummyRegExLoader2 extends RegExLoader { + public DummyRegExLoader2() {} + + @Override + public Pattern getPattern() { + return Pattern.compile(patternString2); + } + } + public static ArrayList<String[]> data = new ArrayList<String[]>(); static { data.add(new String[] { "1,one;i" }); @@ -71,4 +82,30 @@ public class TestRegExLoader extends Tes assertEquals(data.size(), tupleCount); } + @Test + public void testOnlyLastMatch() throws Exception { + PigServer pigServer = new PigServer(LOCAL); + + String filename = TestHelper.createTempFile(data, ""); + + ArrayList<String[]> dataE = new ArrayList<String[]>(); + dataE.add(new String[] { "3,three;iii" }); + ArrayList<DataByteArray[]> expected = TestHelper.getExpected(dataE, pattern2); + + pigServer.registerQuery("A = LOAD 'file:" + Util.encodeEscape(filename) + + "' USING " + DummyRegExLoader2.class.getName() + "() AS (key, val);"); + Iterator<?> it = pigServer.openIterator("A"); + int tupleCount = 0; + while (it.hasNext()) { + Tuple tuple = (Tuple) it.next(); + if (tuple == null) + break; + else { + TestHelper.examineTuple(expected, tuple, tupleCount); + tupleCount++; + } + } + assertEquals(1, tupleCount); + } + }