Author: thiru
Date: Fri Jan 15 17:19:45 2010
New Revision: 899720
URL: http://svn.apache.org/viewvc?rev=899720&view=rev
Log:
AVRO-316. Optiminzing inner loop functions of Avro io
Modified:
hadoop/avro/trunk/CHANGES.txt
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/JsonDecoder.java
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/JsonEncoder.java
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/ResolvingDecoder.java
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/ValidatingDecoder.java
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/ValidatingEncoder.java
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/parsing/JsonGrammarGenerator.java
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/parsing/Parser.java
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/parsing/ResolvingGrammarGenerator.java
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/parsing/SkipParser.java
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/parsing/Symbol.java
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/parsing/ValidatingGrammarGenerator.java
hadoop/avro/trunk/lang/java/src/test/java/org/apache/avro/io/Perf.java
Modified: hadoop/avro/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/CHANGES.txt?rev=899720&r1=899719&r2=899720&view=diff
==============================================================================
--- hadoop/avro/trunk/CHANGES.txt (original)
+++ hadoop/avro/trunk/CHANGES.txt Fri Jan 15 17:19:45 2010
@@ -226,6 +226,8 @@
AVRO-315. Performance improvements to BinaryDecoder (thiru)
+ AVRO-316. Optiminzing inner loop functions of Avro io (thiru)
+
BUG FIXES
AVRO-176. Safeguard against bad istreams before reading. (sbanacho)
Modified:
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/JsonDecoder.java
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/JsonDecoder.java?rev=899720&r1=899719&r2=899720&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/JsonDecoder.java
(original)
+++ hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/JsonDecoder.java
Fri Jan 15 17:19:45 2010
@@ -389,7 +389,7 @@
String fn = in.getCurrentName();
if (fa.fname.equals(fn)) {
in.nextToken();
- return Symbol.CONTINUE;
+ return null;
} else {
throw new AvroTypeException("Expected field name " + fa.fname +
" got " + in.getCurrentName());
@@ -410,7 +410,7 @@
} else {
throw new AvroTypeException("Unknown action symbol " + top);
}
- return Symbol.CONTINUE;
+ return null;
}
private AvroTypeException error(String type) {
Modified:
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/JsonEncoder.java
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/JsonEncoder.java?rev=899720&r1=899719&r2=899720&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/JsonEncoder.java
(original)
+++ hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/JsonEncoder.java
Fri Jan 15 17:19:45 2010
@@ -233,7 +233,7 @@
} else {
throw new AvroTypeException("Unknown action symbol " + top);
}
- return Symbol.CONTINUE;
+ return null;
}
}
Modified:
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/ResolvingDecoder.java
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/ResolvingDecoder.java?rev=899720&r1=899719&r2=899720&view=diff
==============================================================================
---
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/ResolvingDecoder.java
(original)
+++
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/ResolvingDecoder.java
Fri Jan 15 17:19:45 2010
@@ -157,7 +157,7 @@
@Override
public Symbol doAction(Symbol input, Symbol top) throws IOException {
if (top instanceof Symbol.FieldAdjustAction) {
- return input == Symbol.FIELD_ACTION ? top : Symbol.CONTINUE;
+ return input == Symbol.FIELD_ACTION ? top : null;
} if (top instanceof Symbol.ResolvingAction) {
Symbol.ResolvingAction t = (Symbol.ResolvingAction) top;
if (t.reader != input) {
@@ -183,7 +183,7 @@
} else {
throw new AvroTypeException("Unknown action: " + top);
}
- return Symbol.CONTINUE;
+ return null;
}
@Override
Modified:
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/ValidatingDecoder.java
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/ValidatingDecoder.java?rev=899720&r1=899719&r2=899720&view=diff
==============================================================================
---
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/ValidatingDecoder.java
(original)
+++
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/ValidatingDecoder.java
Fri Jan 15 17:19:45 2010
@@ -226,7 +226,7 @@
}
public Symbol doAction(Symbol input, Symbol top) throws IOException {
- return Symbol.CONTINUE;
+ return null;
}
}
Modified:
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/ValidatingEncoder.java
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/ValidatingEncoder.java?rev=899720&r1=899719&r2=899720&view=diff
==============================================================================
---
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/ValidatingEncoder.java
(original)
+++
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/ValidatingEncoder.java
Fri Jan 15 17:19:45 2010
@@ -192,7 +192,7 @@
@Override
public Symbol doAction(Symbol input, Symbol top) throws IOException {
- return Symbol.CONTINUE;
+ return null;
}
/** Have we written at least one item into the current collection? */
Modified:
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/parsing/JsonGrammarGenerator.java
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/parsing/JsonGrammarGenerator.java?rev=899720&r1=899719&r2=899720&view=diff
==============================================================================
---
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/parsing/JsonGrammarGenerator.java
(original)
+++
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/parsing/JsonGrammarGenerator.java
Fri Jan 15 17:19:45 2010
@@ -64,13 +64,11 @@
return Symbol.seq(new Symbol.EnumLabelsAction(sc.getEnumSymbols()),
Symbol.ENUM);
case ARRAY:
- return Symbol.seq(Symbol.ARRAY_END,
- Symbol.repeat(Symbol.ARRAY_END,
+ return Symbol.seq(Symbol.repeat(Symbol.ARRAY_END,
Symbol.ITEM_END, generate(sc.getElementType(), seen)),
Symbol.ARRAY_START);
case MAP:
- return Symbol.seq(Symbol.MAP_END,
- Symbol.repeat(Symbol.MAP_END,
+ return Symbol.seq(Symbol.repeat(Symbol.MAP_END,
Symbol.ITEM_END, generate(sc.getValueType(), seen),
Symbol.MAP_KEY_MARKER, Symbol.STRING),
Symbol.MAP_START);
Modified:
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/parsing/Parser.java
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/parsing/Parser.java?rev=899720&r1=899719&r2=899720&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/parsing/Parser.java
(original)
+++ hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/parsing/Parser.java
Fri Jan 15 17:19:45 2010
@@ -34,6 +34,16 @@
* provide this help.
*/
public interface ActionHandler {
+ /**
+ * Handle the action symbol <tt>top</tt> when the <tt>input</tt> is
+ * sought to be taken off the stack.
+ * @param input The input symbol from the caller of advance
+ * @param top The symbol at the top the stack.
+ * @return <tt>null</tt> if advance() is to continue processing the
+ * stack. If not <tt>null</tt> the return value will be returned
+ * by advance().
+ * @throws IOException
+ */
Symbol doAction(Symbol input, Symbol top) throws IOException;
}
@@ -69,21 +79,25 @@
public final Symbol advance(Symbol input) throws IOException {
for (; ;) {
Symbol top = stack[--pos];
- if (top.kind == Symbol.Kind.TERMINAL) {
- if (top == input) {
- return top; // A common case
- } else {
- throw new AvroTypeException("Attempt to process a "
- + input + " when a "
- + top + " was expected.");
+ if (top == input) {
+ return top; // A common case
+ }
+
+ Symbol.Kind k = top.kind;
+ if (k == Symbol.Kind.IMPLICIT_ACTION) {
+ Symbol result = symbolHandler.doAction(input, top);
+ if (result != null) {
+ return result;
}
- } else if (top.kind == Symbol.Kind.IMPLICIT_ACTION) {
- Symbol result = symbolHandler.doAction(input, top);
- if (result != Symbol.CONTINUE) {
- return result;
- }
+ } else if (k == Symbol.Kind.TERMINAL) {
+ throw new AvroTypeException("Attempt to process a "
+ + input + " when a "
+ + top + " was expected.");
+ } else if (k == Symbol.Kind.REPEATER
+ && input == ((Symbol.Repeater) top).end) {
+ return input;
} else {
- pushProduction(input, top);
+ pushProduction(top);
}
}
}
@@ -102,7 +116,7 @@
symbolHandler.doAction(null, top);
} else if (top.kind != Symbol.Kind.TERMINAL) {
pos--;
- pushProduction(null, top);
+ pushProduction(top);
} else {
break;
}
@@ -129,19 +143,15 @@
* Pushes the production for the given symbol <tt>sym</tt>.
* If <tt>sym</tt> is a repeater and <tt>input</tt> is either
* {...@link Symbol#ARRAY_END} or {...@link Symbol#MAP_END} pushes nothing.
- * @param input
* @param sym
*/
- public final void pushProduction(Symbol input, Symbol sym) {
- if (sym.kind != Symbol.Kind.REPEATER ||
- input != ((Symbol.Repeater) sym).end) {
- Symbol[] p = sym.production;
- while (pos + p.length > stack.length) {
- expandStack();
- }
- System.arraycopy(p, 0, stack, pos, p.length);
- pos += p.length;
+ public final void pushProduction(Symbol sym) {
+ Symbol[] p = sym.production;
+ while (pos + p.length > stack.length) {
+ expandStack();
}
+ System.arraycopy(p, 0, stack, pos, p.length);
+ pos += p.length;
}
/**
Modified:
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/parsing/ResolvingGrammarGenerator.java
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/parsing/ResolvingGrammarGenerator.java?rev=899720&r1=899719&r2=899720&view=diff
==============================================================================
---
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/parsing/ResolvingGrammarGenerator.java
(original)
+++
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/parsing/ResolvingGrammarGenerator.java
Fri Jan 15 17:19:45 2010
@@ -98,15 +98,13 @@
break;
case ARRAY:
- return Symbol.seq(Symbol.ARRAY_END,
- Symbol.repeat(Symbol.ARRAY_END,
+ return Symbol.seq(Symbol.repeat(Symbol.ARRAY_END,
generate(writer.getElementType(),
reader.getElementType(), seen)),
Symbol.ARRAY_START);
case MAP:
- return Symbol.seq(Symbol.MAP_END,
- Symbol.repeat(Symbol.MAP_END,
+ return Symbol.seq(Symbol.repeat(Symbol.MAP_END,
generate(writer.getValueType(),
reader.getValueType(), seen), Symbol.STRING),
Symbol.MAP_START);
Modified:
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/parsing/SkipParser.java
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/parsing/SkipParser.java?rev=899720&r1=899719&r2=899720&view=diff
==============================================================================
---
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/parsing/SkipParser.java
(original)
+++
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/parsing/SkipParser.java
Fri Jan 15 17:19:45 2010
@@ -64,7 +64,7 @@
skipHandler.skipAction();
} else {
--pos;
- pushProduction(null, top);
+ pushProduction(top);
}
continue outer;
}
@@ -79,7 +79,7 @@
int target = pos;
Symbol repeater = stack[--pos];
assert repeater.kind == Symbol.Kind.REPEATER;
- pushProduction(null, repeater);
+ pushProduction(repeater);
skipTo(target);
}
Modified:
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/parsing/Symbol.java
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/parsing/Symbol.java?rev=899720&r1=899719&r2=899720&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/parsing/Symbol.java
(original)
+++ hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/parsing/Symbol.java
Fri Jan 15 17:19:45 2010
@@ -379,7 +379,6 @@
public static final Symbol ITEM_END = new Symbol.Terminal("item-end");
/* a pseudo terminal used by parsers */
- public static final Symbol CONTINUE = new Symbol.Terminal("continue");
public static final Symbol FIELD_ACTION =
new Symbol.Terminal("field-action");
Modified:
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/parsing/ValidatingGrammarGenerator.java
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/parsing/ValidatingGrammarGenerator.java?rev=899720&r1=899719&r2=899720&view=diff
==============================================================================
---
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/parsing/ValidatingGrammarGenerator.java
(original)
+++
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/parsing/ValidatingGrammarGenerator.java
Fri Jan 15 17:19:45 2010
@@ -71,12 +71,10 @@
return Symbol.seq(new Symbol.IntCheckAction(sc.getEnumSymbols().size()),
Symbol.ENUM);
case ARRAY:
- return Symbol.seq(Symbol.ARRAY_END,
- Symbol.repeat(Symbol.ARRAY_END, generate(sc.getElementType(), seen)),
+ return Symbol.seq(Symbol.repeat(Symbol.ARRAY_END,
generate(sc.getElementType(), seen)),
Symbol.ARRAY_START);
case MAP:
- return Symbol.seq(Symbol.MAP_END,
- Symbol.repeat(Symbol.MAP_END,
+ return Symbol.seq(Symbol.repeat(Symbol.MAP_END,
generate(sc.getValueType(), seen), Symbol.STRING),
Symbol.MAP_START);
case RECORD: {
Modified: hadoop/avro/trunk/lang/java/src/test/java/org/apache/avro/io/Perf.java
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/java/src/test/java/org/apache/avro/io/Perf.java?rev=899720&r1=899719&r2=899720&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/java/src/test/java/org/apache/avro/io/Perf.java
(original)
+++ hadoop/avro/trunk/lang/java/src/test/java/org/apache/avro/io/Perf.java Fri
Jan 15 17:19:45 2010
@@ -20,6 +20,9 @@
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
import java.util.Random;
import org.apache.avro.Schema;
@@ -34,39 +37,45 @@
public static void main(String[] args) throws IOException {
- Test[] tests = null;
- if (args.length == 0) {
- tests = new Test[] { new ReadInt(),
- new ReadLong(), new ReadFloat(), new ReadDouble() };
- } else if (args.length == 1) {
- if (args[0].equals("-i")) {
- tests = new Test[] { new ReadInt() };
- } else if (args[0].equals("-f")) {
- tests = new Test[] { new ReadFloat() };
- } else if (args[0].equals("-d")) {
- tests = new Test[] { new ReadDouble() };
- } else if (args[0].equals("-l")) {
- tests = new Test[] { new ReadLong() };
- }
- } else {
- usage();
- System.exit(1);
+ List<Test> tests = new ArrayList<Test>();
+ for (String a : args) {
+ if (a.equals("-i")) {
+ tests.add(new ReadInt());
+ } else if (a.equals("-f")) {
+ tests.add(new ReadFloat());
+ } else if (a.equals("-d")) {
+ tests.add(new ReadDouble());
+ } else if (a.equals("-l")) {
+ tests.add(new ReadLong());
+ } else if (a.equals("-R")) {
+ tests.add(new RepeaterTest());
+ } else {
+ usage();
+ System.exit(1);
+ }
+ }
+ if (tests.isEmpty()) {
+ tests.addAll(Arrays.asList(new Test[] {
+ new ReadInt(), new ReadLong(),
+ new ReadFloat(), new ReadDouble(),
+ }));
}
for (Test t : tests) {
// warmup JVM
- for (int i = 0; i < CYCLES; i++) {
- t.read();
- }
- // test
- long s = 0;
- for (int i = 0; i < CYCLES; i++) {
- long l = t.read();
- // System.out.println("** " + l);
- s += l;
- }
- s /= 1000;
- System.out.println(t.name + "(" + t.schema + "): " + s/1000 + " ms, " +
(CYCLES * (double)COUNT)/s + " million numbers decoded /sec" );
+ for (int i = 0; i < CYCLES; i++) {
+ t.read();
+ }
+ // test
+ long s = 0;
+ for (int i = 0; i < CYCLES; i++) {
+ long l = t.read();
+ // System.out.println("** " + l);
+ s += l;
+ }
+ s /= 1000;
+ System.out.println(t.name + ": " + (s / 1000) + " ms, "
+ + ((CYCLES * (double) COUNT) / s) + " million entries/sec");
}
}
@@ -82,8 +91,9 @@
genData(e);
data = bao.toByteArray();
}
+
public final long read() throws IOException {
- Decoder d = new BinaryDecoder(new ByteArrayInputStream(data));
+ Decoder d = getDecoder();
long t = System.nanoTime();
for (long l = d.readArrayStart(); l > 0; l = d.arrayNext()) {
for (int j = 0; j < l; j++) {
@@ -92,6 +102,11 @@
}
return (System.nanoTime() - t);
}
+
+ protected Decoder getDecoder() throws IOException {
+ return new BinaryDecoder(new ByteArrayInputStream(data));
+ }
+
abstract void genData(Encoder e) throws IOException;
abstract void readInternal(Decoder d) throws IOException;
}
@@ -179,11 +194,50 @@
d.readDouble();
}
}
+
+ private static class RepeaterTest extends Test {
+ public RepeaterTest() throws IOException {
+ super("RepeaterTest", "{ \"type\": \"array\", \"items\":\n"
+ + "{ \"type\": \"record\", \"name\": \"R\", \"fields\": [\n"
+ + "{ \"name\": \"f1\", \"type\": \"double\" },\n"
+ + "{ \"name\": \"f2\", \"type\": \"double\" },\n"
+ + "{ \"name\": \"f3\", \"type\": \"double\" }\n"
+ + "] } }");
+ }
+
+ @Override
+ protected void genData(Encoder e) throws IOException {
+ e.writeArrayStart();
+ e.setItemCount(COUNT);
+ Random r = new Random();
+ for (int i = 0; i < COUNT; i++) {
+ e.writeDouble(r.nextDouble());
+ e.writeDouble(r.nextDouble());
+ e.writeDouble(r.nextDouble());
+ }
+ e.writeArrayEnd();
+ }
+
+ @Override
+ protected void readInternal(Decoder d) throws IOException {
+ d.readDouble();
+ d.readDouble();
+ d.readDouble();
+ }
+
+ @Override
+ protected Decoder getDecoder() throws IOException {
+ return new ValidatingDecoder(schema, super.getDecoder());
+ }
+
+ }
+
private static void usage() {
System.out.println("Usage: Perf { -i | -l | -f | -d }");
- System.out.println(" -i measures readInt() performance");
- System.out.println(" -l measures readLong() performance");
- System.out.println(" -f measures readFloat() performance");
- System.out.println(" -d measures readDouble() performance");
+ System.out.println(" -i readInt() performance");
+ System.out.println(" -l readLong() performance");
+ System.out.println(" -f readFloat() performance");
+ System.out.println(" -d readDouble() performance");
+ System.out.println(" -R repeater performance in validating decoder");
}
}