Author: thiru
Date: Wed Feb 3 01:50:50 2010
New Revision: 905865
URL: http://svn.apache.org/viewvc?rev=905865&view=rev
Log:
AVRO-388. Using ResolvingDecoder in GenericDatumReader
Modified:
hadoop/avro/trunk/CHANGES.txt
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/Protocol.java
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/Schema.java
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/genavro/genavro.jj
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/generic/GenericDatumReader.java
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/ResolvingDecoder.java
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/reflect/ReflectData.java
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/reflect/ReflectDatumReader.java
hadoop/avro/trunk/lang/java/src/test/java/org/apache/avro/TestProtocolGeneric.java
hadoop/avro/trunk/lang/java/src/test/java/org/apache/avro/io/Perf.java
Modified: hadoop/avro/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/CHANGES.txt?rev=905865&r1=905864&r2=905865&view=diff
==============================================================================
--- hadoop/avro/trunk/CHANGES.txt (original)
+++ hadoop/avro/trunk/CHANGES.txt Wed Feb 3 01:50:50 2010
@@ -277,6 +277,8 @@
AVRO-384. Add schema projection to the C implementation (massie)
+ AVRO-388. Using ResolvingDecoder in GenericDatumReader (thiru)
+
OPTIMIZATIONS
AVRO-172. More efficient schema processing (massie)
Modified: hadoop/avro/trunk/lang/java/src/java/org/apache/avro/Protocol.java
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/java/src/java/org/apache/avro/Protocol.java?rev=905865&r1=905864&r2=905865&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/java/src/java/org/apache/avro/Protocol.java
(original)
+++ hadoop/avro/trunk/lang/java/src/java/org/apache/avro/Protocol.java Wed Feb
3 01:50:50 2010
@@ -347,8 +347,9 @@
JsonNode fieldTypeNode = field.get("type");
if (fieldTypeNode == null)
throw new SchemaParseException("No param type: "+field);
- fields.put(fieldNameNode.getTextValue(),
- new Field(Schema.parse(fieldTypeNode,types),
+ String name = fieldNameNode.getTextValue();
+ fields.put(name,
+ new Field(name, Schema.parse(fieldTypeNode,types),
null /* message fields don't have docs */,
field.get("default")));
}
Modified: hadoop/avro/trunk/lang/java/src/java/org/apache/avro/Schema.java
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/java/src/java/org/apache/avro/Schema.java?rev=905865&r1=905864&r2=905865&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/java/src/java/org/apache/avro/Schema.java (original)
+++ hadoop/avro/trunk/lang/java/src/java/org/apache/avro/Schema.java Wed Feb 3
01:50:50 2010
@@ -296,21 +296,26 @@
private Order() { this.name = this.name().toLowerCase(); }
};
+ private final String name; // name of the field.
private int position = -1;
private final Schema schema;
private final String doc;
private final JsonNode defaultValue;
private final Order order;
- public Field(Schema schema, String doc, JsonNode defaultValue) {
- this(schema, doc, defaultValue, Order.ASCENDING);
+ public Field(String name, Schema schema, String doc,
+ JsonNode defaultValue) {
+ this(name, schema, doc, defaultValue, Order.ASCENDING);
}
- public Field(Schema schema, String doc, JsonNode defaultValue, Order
order) {
+ public Field(String name, Schema schema, String doc,
+ JsonNode defaultValue, Order order) {
+ this.name = name;
this.schema = schema;
this.doc = doc;
this.defaultValue = defaultValue;
this.order = order;
}
+ public String name() { return name; };
/** The position of this field within the record. */
public int pos() { return position; }
/** This field's {...@link Schema}. */
@@ -848,8 +853,8 @@
JsonNode orderNode = field.get("order");
if (orderNode != null)
order =
Field.Order.valueOf(orderNode.getTextValue().toUpperCase());
- fields.put(fieldName,
- new Field(fieldSchema, fieldDoc, field.get("default"),
order));
+ fields.put(fieldName, new Field(fieldName, fieldSchema,
+ fieldDoc, field.get("default"), order));
}
result.setFields(fields);
} else if (type.equals("enum")) { // enum
Modified:
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/genavro/genavro.jj
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/java/src/java/org/apache/avro/genavro/genavro.jj?rev=905865&r1=905864&r2=905865&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/java/src/java/org/apache/avro/genavro/genavro.jj
(original)
+++ hadoop/avro/trunk/lang/java/src/java/org/apache/avro/genavro/genavro.jj Wed
Feb 3 01:50:50 2010
@@ -1142,7 +1142,7 @@
{
name = Identifier()
{
- fields.put(name, new Field(type, null, null));
+ fields.put(name, new Field(name, type, null, null));
}
}
Modified:
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/generic/GenericDatumReader.java
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/java/src/java/org/apache/avro/generic/GenericDatumReader.java?rev=905865&r1=905864&r2=905865&view=diff
==============================================================================
---
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/generic/GenericDatumReader.java
(original)
+++
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/generic/GenericDatumReader.java
Wed Feb 3 01:50:50 2010
@@ -19,156 +19,92 @@
import java.io.IOException;
import java.util.HashMap;
-import java.util.Iterator;
import java.util.Map;
-import java.util.Set;
import java.nio.ByteBuffer;
-import org.codehaus.jackson.JsonNode;
-
import org.apache.avro.AvroRuntimeException;
-import org.apache.avro.AvroTypeException;
import org.apache.avro.Schema;
import org.apache.avro.Schema.Field;
-import org.apache.avro.Schema.Type;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.Decoder;
+import org.apache.avro.io.ResolvingDecoder;
import org.apache.avro.util.Utf8;
/** {...@link DatumReader} for generic Java objects. */
public class GenericDatumReader<D> implements DatumReader<D> {
private Schema actual;
private Schema expected;
+ private Object resolver;
public GenericDatumReader() {}
public GenericDatumReader(Schema actual) {
- setSchema(actual);
- setExpected(actual);
+ this.actual = actual;
+ this.expected = actual;
}
- public GenericDatumReader(Schema actual, Schema expected) {
- setSchema(actual);
- setExpected(expected);
+ public GenericDatumReader(Schema actual, Schema expected)
+ throws IOException {
+ this.actual = actual;
+ this.expected = expected;
}
- public void setSchema(Schema actual) { this.actual = actual; }
+ @Override
+ public void setSchema(Schema actual) {
+ this.actual = actual;
+ if (expected == null) {
+ expected = actual;
+ }
+ resolver = null;
+ }
- public void setExpected(Schema expected) { this.expected = expected; }
+ public void setExpected(Schema expected) throws IOException {
+ this.expected = expected;
+ }
@SuppressWarnings("unchecked")
public D read(D reuse, Decoder in) throws IOException {
- return (D) read(reuse, actual, expected != null ? expected : actual, in);
+ if (resolver == null) {
+ resolver = ResolvingDecoder.resolve(actual, expected);
+ }
+ return (D) read(reuse, expected, new ResolvingDecoder(resolver, in));
}
/** Called to read data.*/
- protected Object read(Object old, Schema actual,
- Schema expected, Decoder in) throws IOException {
- if (actual.getType() == Type.UNION) // resolve unions
- actual = actual.getTypes().get((int)in.readIndex());
- if (expected.getType() == Type.UNION)
- expected = resolveExpected(actual, expected);
- switch (actual.getType()) {
- case RECORD: return readRecord(old, actual, expected, in);
- case ENUM: return readEnum(actual, expected, in);
- case ARRAY: return readArray(old, actual, expected, in);
- case MAP: return readMap(old, actual, expected, in);
- case FIXED: return readFixed(old, actual, expected, in);
- case STRING: return readString(old, actual, expected, in);
+ protected Object read(Object old, Schema expected,
+ ResolvingDecoder in) throws IOException {
+ switch (expected.getType()) {
+ case RECORD: return readRecord(old, expected, in);
+ case ENUM: return readEnum(expected, in);
+ case ARRAY: return readArray(old, expected, in);
+ case MAP: return readMap(old, expected, in);
+ case UNION: return read(old, expected.getTypes().get(in.readIndex()),
in);
+ case FIXED: return readFixed(old, expected, in);
+ case STRING: return readString(old, expected, in);
case BYTES: return readBytes(old, in);
- case INT: return readInt(old, actual, expected, in);
+ case INT: return readInt(old, expected, in);
case LONG: return in.readLong();
case FLOAT: return in.readFloat();
case DOUBLE: return in.readDouble();
case BOOLEAN: return in.readBoolean();
case NULL: in.readNull(); return null;
- default: throw new AvroRuntimeException("Unknown type: "+actual);
+ default: throw new AvroRuntimeException("Unknown type: " + expected);
}
}
- private Schema resolveExpected(Schema actual, Schema expected) {
- // first scan for exact match
- for (Schema branch : expected.getTypes())
- if (branch.getType() == actual.getType())
- switch (branch.getType()) {
- case RECORD:
- case ENUM:
- case FIXED:
- String name = branch.getName();
- if (name == null || name.equals(actual.getName()))
- return branch;
- break;
- default:
- return branch;
- }
- // then scan match via numeric promotion
- for (Schema branch : expected.getTypes())
- switch (actual.getType()) {
- case INT:
- switch (branch.getType()) {
- case LONG: case FLOAT: case DOUBLE:
- return branch;
- }
- break;
- case LONG:
- switch (branch.getType()) {
- case FLOAT: case DOUBLE:
- return branch;
- }
- break;
- case FLOAT:
- switch (branch.getType()) {
- case DOUBLE:
- return branch;
- }
- break;
- }
- throw new AvroTypeException("Expected "+expected+", found "+actual);
- }
-
/** Called to read a record instance. May be overridden for alternate record
* representations.*/
- protected Object readRecord(Object old, Schema actual, Schema expected,
- Decoder in) throws IOException {
- /* TODO: We may want to compute the expected and actual mapping and cache
- * the mapping (keyed by <actual, expected>). */
- String recordName = expected.getName();
- if (recordName != null && !recordName.equals(actual.getName()))
- throw new AvroTypeException("Expected "+expected+", found "+actual);
- Map<String, Field> expectedFields = expected.getFields();
- // all fields not in expected should be removed by newRecord.
+ protected Object readRecord(Object old, Schema expected,
+ ResolvingDecoder in) throws IOException {
Object record = newRecord(old, expected);
- int size = 0;
- for (Map.Entry<String, Field> entry : actual.getFields().entrySet()) {
- String fieldName = entry.getKey();
- Field actualField = entry.getValue();
- Field expectedField =
- expected == actual ? actualField :
expectedFields.get(entry.getKey());
- if (expectedField == null) {
- skip(actualField.schema(), in);
- continue;
- }
- int fieldPosition = expectedField.pos();
- Object oldDatum =
- (old != null) ? getField(record, fieldName, fieldPosition) : null;
- setField(record, fieldName, fieldPosition,
- read(oldDatum,actualField.schema(),expectedField.schema(), in));
- size++;
- }
- if (expectedFields.size() > size) { // not all fields set
- Set<String> actualFields = actual.getFields().keySet();
- for (Map.Entry<String, Field> entry : expectedFields.entrySet()) {
- String fieldName = entry.getKey();
- if (!actualFields.contains(fieldName)) { // an unset field
- Field f = entry.getValue();
- JsonNode json = f.defaultValue();
- if (json == null) // no default
- throw new AvroTypeException("No default value for: "+fieldName);
- setField(record, fieldName, f.pos(), // set default
- defaultFieldValue(old, f.schema(), json));
- }
- }
+
+ for (Field f : in.readFieldOrder()) {
+ int pos = f.pos();
+ String name = f.name();
+ Object oldDatum = (old != null) ? getField(record, name, pos) : null;
+ setField(record, name, pos, read(oldDatum, f.schema(), in));
}
+
return record;
}
@@ -186,70 +122,17 @@
return ((IndexedRecord)record).get(position);
}
- /** Called by the default implementation of {...@link #readRecord} to
construct
- a default value for a field. */
- protected Object defaultFieldValue(Object old, Schema schema, JsonNode json)
- throws IOException {
- switch (schema.getType()) {
- case RECORD:
- Object record = newRecord(old, schema);
- for (Map.Entry<String, Field> entry : schema.getFields().entrySet()) {
- String name = entry.getKey();
- Field f = entry.getValue();
- JsonNode v = json.get(name);
- if (v == null) v = f.defaultValue();
- if (v == null)
- throw new AvroTypeException("No default value for: "+name);
- Object o = old != null ? getField(old, name, f.pos()) : null;
- setField(record, name, f.pos(), defaultFieldValue(o, f.schema(), v));
- }
- return record;
- case ENUM:
- return createEnum(json.getTextValue(), schema);
- case ARRAY:
- Object array = newArray(old, json.size(), schema);
- Schema element = schema.getElementType();
- int pos = 0;
- for (JsonNode node : json)
- addToArray(array, pos++,
- defaultFieldValue(peekArray(array), element, node));
- return array;
- case MAP:
- Object map = newMap(old, json.size());
- Schema value = schema.getValueType();
- for (Iterator<String> i = json.getFieldNames(); i.hasNext();) {
- String key = i.next();
- addToMap(map, new Utf8(key),
- defaultFieldValue(null, value, json.get(key)));
- }
- return map;
- case UNION: return defaultFieldValue(old, schema.getTypes().get(0),
json);
- case FIXED: return
createFixed(old,json.getTextValue().getBytes("ISO-8859-1"),schema);
- case STRING: return createString(json.getTextValue());
- case BYTES: return
createBytes(json.getTextValue().getBytes("ISO-8859-1"));
- case INT: return json.getIntValue();
- case LONG: return json.getLongValue();
- case FLOAT: return (float)json.getDoubleValue();
- case DOUBLE: return json.getDoubleValue();
- case BOOLEAN: return json.getBooleanValue();
- case NULL: return null;
- default: throw new AvroRuntimeException("Unknown type: "+actual);
- }
+ /** Called by the default implementation of {...@link #readRecord} to remove
a
+ * record field value from a reused instance. The default implementation is
+ * for {...@link GenericRecord}.*/
+ protected void removeField(Object record, String field, int position) {
+ ((GenericRecord)record).put(position, null);
}
-
+
/** Called to read an enum value. May be overridden for alternate enum
* representations. By default, returns the symbol as a String. */
- protected Object readEnum(Schema actual, Schema expected, Decoder in)
- throws IOException {
- String name = expected.getName();
- if (name != null && !name.equals(actual.getName()))
- throw new AvroTypeException("Expected "+expected+", found "+actual);
- String symbol = actual.getEnumSymbols().get(in.readEnum());
- if (expected.hasEnumSymbol(symbol)) {
- return createEnum(symbol, expected);
- } else {
- throw new AvroTypeException("Symbol " + symbol + " not in " + expected);
- }
+ protected Object readEnum(Schema expected, Decoder in) throws IOException {
+ return createEnum(expected.getEnumSymbols().get(in.readEnum()), expected);
}
/** Called to create an enum value. May be overridden for alternate enum
@@ -258,9 +141,8 @@
/** Called to read an array instance. May be overridden for alternate array
* representations.*/
- protected Object readArray(Object old, Schema actual, Schema expected,
- Decoder in) throws IOException {
- Schema actualType = actual.getElementType();
+ protected Object readArray(Object old, Schema expected,
+ ResolvingDecoder in) throws IOException {
Schema expectedType = expected.getElementType();
long l = in.readArrayStart();
long base = 0;
@@ -268,12 +150,10 @@
Object array = newArray(old, (int) l, expected);
do {
for (long i = 0; i < l; i++) {
- addToArray(array, base+i,
- read(peekArray(array), actualType, expectedType, in));
+ addToArray(array, base + i, read(peekArray(array), expectedType,
in));
}
base += l;
} while ((l = in.arrayNext()) > 0);
-
return array;
} else {
return newArray(old, 0, expected);
@@ -297,18 +177,15 @@
/** Called to read a map instance. May be overridden for alternate map
* representations.*/
- protected Object readMap(Object old, Schema actual, Schema expected,
- Decoder in) throws IOException {
- Schema aValue = actual.getValueType();
+ protected Object readMap(Object old, Schema expected,
+ ResolvingDecoder in) throws IOException {
Schema eValue = expected.getValueType();
long l = in.readMapStart();
Object map = newMap(old, (int) l);
if (l > 0) {
do {
for (int i = 0; i < l; i++) {
- addToMap(map,
- readString(null, in),
- read(null, aValue, eValue, in));
+ addToMap(map, readString(null, in), read(null, eValue, in));
}
} while ((l = in.mapNext()) > 0);
}
@@ -324,13 +201,10 @@
/** Called to read a fixed value. May be overridden for alternate fixed
* representations. By default, returns {...@link GenericFixed}. */
- protected Object readFixed(Object old, Schema actual, Schema expected,
- Decoder in)
+ protected Object readFixed(Object old, Schema expected, Decoder in)
throws IOException {
- if (!actual.equals(expected))
- throw new AvroTypeException("Expected "+expected+", found "+actual);
GenericFixed fixed = (GenericFixed)createFixed(old, expected);
- in.readFixed(fixed.bytes(), 0, actual.getFixedSize());
+ in.readFixed(fixed.bytes(), 0, expected.getFixedSize());
return fixed;
}
@@ -392,7 +266,7 @@
/** Called to read strings. Subclasses may override to use a different
* string representation. By default, this calls {...@link
* #readString(Object,Decoder)}.*/
- protected Object readString(Object old, Schema actual, Schema expected,
+ protected Object readString(Object old, Schema expected,
Decoder in) throws IOException {
return readString(old, in);
}
@@ -418,8 +292,8 @@
/** Called to read integers. Subclasses may override to use a different
* integer representation. By default, this calls {...@link
* Decoder#readInt()}.*/
- protected Object readInt(Object old, Schema actual, Schema expected,
- Decoder in) throws IOException {
+ protected Object readInt(Object old, Schema expected, Decoder in)
+ throws IOException {
return in.readInt();
}
Modified:
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/ResolvingDecoder.java
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/ResolvingDecoder.java?rev=905865&r1=905864&r2=905865&view=diff
==============================================================================
---
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/ResolvingDecoder.java
(original)
+++
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/ResolvingDecoder.java
Wed Feb 3 01:50:50 2010
@@ -44,7 +44,37 @@
public ResolvingDecoder(Schema writer, Schema reader, Decoder in)
throws IOException {
- super(new ResolvingGrammarGenerator().generate(writer, reader), in);
+ this(resolve(writer, reader), in);
+ }
+
+ /**
+ * Constructs a <tt>ResolvingDecoder</tt> using the given resolver.
+ * The resolver must have been returned by a previous call to
+ * {...@link #resolve(Schema, Schema)}.
+ * @param resolver The resolver to use.
+ * @param in The underlying decoder.
+ * @throws IOException
+ */
+ public ResolvingDecoder(Object resolver, Decoder in)
+ throws IOException {
+ super((Symbol) resolver, in);
+ }
+
+ /**
+ * Produces an opaque resolver that can be used to construct a new
+ * {...@link ResolvingDecoder#ResolvingDecoder(Object, Decoder)}. The
+ * returned Object is immutable and hence can be simultaneously used
+ * in many ResolvingDecoders. This method is reasonably expensive, the
+ * users are encouraged to cache the result.
+ *
+ * @param writer The writer's schema.
+ * @param reader The reader's schema.
+ * @return The opaque reolver.
+ * @throws IOException
+ */
+ public static Object resolve(Schema writer, Schema reader)
+ throws IOException {
+ return new ResolvingGrammarGenerator().generate(writer, reader);
}
/** Returns the actual order in which the reader's fields will be
Modified:
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/reflect/ReflectData.java
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/java/src/java/org/apache/avro/reflect/ReflectData.java?rev=905865&r1=905864&r2=905865&view=diff
==============================================================================
---
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/reflect/ReflectData.java
(original)
+++
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/reflect/ReflectData.java
Wed Feb 3 01:50:50 2010
@@ -280,8 +280,8 @@
for (Field field : getFields(c))
if
((field.getModifiers()&(Modifier.TRANSIENT|Modifier.STATIC))==0){
Schema fieldSchema = createFieldSchema(field, names);
- fields.put(field.getName(), new Schema.Field(fieldSchema,
- null /* doc */, null));
+ fields.put(field.getName(), new Schema.Field(field.getName(),
+ fieldSchema, null /* doc */, null));
}
schema.setFields(fields);
}
@@ -376,7 +376,8 @@
String paramName = paramNames.length == paramTypes.length
? paramNames[i]
: paramSchema.getName()+i;
- fields.put(paramName, new Schema.Field(paramSchema, null /* doc */,
null));
+ fields.put(paramName, new Schema.Field(paramName, paramSchema,
+ null /* doc */, null));
}
Schema request = Schema.createRecord(fields);
Modified:
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/reflect/ReflectDatumReader.java
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/java/src/java/org/apache/avro/reflect/ReflectDatumReader.java?rev=905865&r1=905864&r2=905865&view=diff
==============================================================================
---
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/reflect/ReflectDatumReader.java
(original)
+++
hadoop/avro/trunk/lang/java/src/java/org/apache/avro/reflect/ReflectDatumReader.java
Wed Feb 3 01:50:50 2010
@@ -99,7 +99,7 @@
@Override
@SuppressWarnings(value="unchecked")
- protected Object readString(Object old, Schema actual, Schema s,
+ protected Object readString(Object old, Schema s,
Decoder in) throws IOException {
String value = (String)readString(null, in);
Class c = ReflectData.getClassProp(s, ReflectData.CLASS_PROP);
@@ -135,7 +135,7 @@
}
@Override
- protected Object readInt(Object old, Schema actual,
+ protected Object readInt(Object old,
Schema expected, Decoder in) throws IOException {
Object value = in.readInt();
if (Short.class.getName().equals(expected.getProp(ReflectData.CLASS_PROP)))
Modified:
hadoop/avro/trunk/lang/java/src/test/java/org/apache/avro/TestProtocolGeneric.java
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/java/src/test/java/org/apache/avro/TestProtocolGeneric.java?rev=905865&r1=905864&r2=905865&view=diff
==============================================================================
---
hadoop/avro/trunk/lang/java/src/test/java/org/apache/avro/TestProtocolGeneric.java
(original)
+++
hadoop/avro/trunk/lang/java/src/test/java/org/apache/avro/TestProtocolGeneric.java
Wed Feb 3 01:50:50 2010
@@ -164,9 +164,11 @@
Protocol protocol = new Protocol("Simple", "org.apache.avro.test");
LinkedHashMap<String,Field> fields = new LinkedHashMap<String,Field>();
fields.put("extra",
- new Schema.Field(Schema.create(Schema.Type.BOOLEAN), null,
null));
+ new Schema.Field("extra", Schema.create(Schema.Type.BOOLEAN),
+ null, null));
fields.put("greeting",
- new Schema.Field(Schema.create(Schema.Type.STRING), null,
null));
+ new Schema.Field("greeting", Schema.create(Schema.Type.STRING),
+ null, null));
Protocol.Message message =
protocol.createMessage("hello",
null /* doc */,
Modified: hadoop/avro/trunk/lang/java/src/test/java/org/apache/avro/io/Perf.java
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/java/src/test/java/org/apache/avro/io/Perf.java?rev=905865&r1=905864&r2=905865&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/java/src/test/java/org/apache/avro/io/Perf.java
(original)
+++ hadoop/avro/trunk/lang/java/src/test/java/org/apache/avro/io/Perf.java Wed
Feb 3 01:50:50 2010
@@ -19,6 +19,7 @@
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
+import java.io.EOFException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
@@ -27,6 +28,7 @@
import org.apache.avro.Schema;
import org.apache.avro.Schema.Field;
+import org.apache.avro.generic.GenericDatumReader;
/**
* Performance tests for various low level operations of
@@ -55,6 +57,14 @@
tests.add(new ResolverTest());
} else if (a.equals("-M")) {
tests.add(new MigrationTest());
+ } else if (a.equals("-G")) {
+ tests.add(new GenericReaderTest());
+ } else if (a.equals("-Gd")) {
+ tests.add(new GenericReaderWithDefaultTest());
+ } else if (a.equals("-Go")) {
+ tests.add(new GenericReaderWithOutOfOrderTest());
+ } else if (a.equals("-Gp")) {
+ tests.add(new GenericReaderWithPromotionTest());
} else {
usage();
System.exit(1);
@@ -65,13 +75,17 @@
new ReadInt(), new ReadLong(),
new ReadFloat(), new ReadDouble(),
new RepeaterTest(), new NestedRecordTest(),
+ new ResolverTest(), new MigrationTest(),
+ new GenericReaderTest(), new GenericReaderWithDefaultTest(),
+ new GenericReaderWithOutOfOrderTest(),
+ new GenericReaderWithPromotionTest(),
}));
}
for (Test t : tests) {
// warmup JVM
- for (int i = 0; i < CYCLES; i++) {
- t.read();
+ for (int i = 0; i < CYCLES; i++) {
+ t.read();
}
// test
long s = 0;
@@ -86,19 +100,38 @@
}
}
- private static abstract class Test {
+ private abstract static class Test {
+
+ /**
+ * Name of the test.
+ */
public final String name;
+
+ /**
+ * Reads the contents and returns the time taken in nanoseconds.
+ * @return The time taken to complete the operation.
+ * @throws IOException
+ */
+ abstract long read() throws IOException;
+
+ public Test(String name) {
+ this.name = name;
+ }
+ }
+
+ private static abstract class DecoderTest extends Test {
public final Schema schema;
protected byte[] data;
- public Test(String name, String json) throws IOException {
- this.name = name;
+ public DecoderTest(String name, String json) throws IOException {
+ super(name);
this.schema = Schema.parse(json);
ByteArrayOutputStream bao = new ByteArrayOutputStream();
Encoder e = new BinaryEncoder(bao);
genData(e);
data = bao.toByteArray();
}
-
+
+ @Override
public final long read() throws IOException {
Decoder d = getDecoder();
long t = System.nanoTime();
@@ -118,21 +151,21 @@
return new BinaryDecoder(new ByteArrayInputStream(data));
}
- /**
- * Use a fixed value seed for random number generation
- * to allow for better cross-run comparisons.
- */
- private static final long SEED = 19781210;
-
- protected static Random newRandom() {
- return new Random(SEED);
- }
-
abstract void genData(Encoder e) throws IOException;
abstract void readInternal(Decoder d) throws IOException;
}
- private static class ReadInt extends Test {
+ /**
+ * Use a fixed value seed for random number generation
+ * to allow for better cross-run comparisons.
+ */
+ private static final long SEED = 19781210;
+
+ protected static Random newRandom() {
+ return new Random(SEED);
+ }
+
+ private static class ReadInt extends DecoderTest {
public ReadInt() throws IOException {
this("ReadInt", "{ \"type\": \"array\", \"items\": \"int\"} ");
}
@@ -160,11 +193,13 @@
}
}
- private static class ReadLong extends Test {
+ private static class ReadLong extends DecoderTest {
public ReadLong() throws IOException {
super("ReadLong", "{ \"type\": \"array\", \"items\": \"long\"} ");
}
- @Override void genData(Encoder e) throws IOException {
+
+ @Override
+ void genData(Encoder e) throws IOException {
e.writeArrayStart();
e.setItemCount((COUNT / 4) *4);
Random r = newRandom();
@@ -183,12 +218,14 @@
}
}
- private static class ReadFloat extends Test {
+ private static class ReadFloat extends DecoderTest {
public ReadFloat() throws IOException {
super("ReadFloat", "{ \"type\": \"array\", \"items\": \"float\"} ");
}
- @Override void genData(Encoder e) throws IOException {
- e.writeArrayStart();
+
+ @Override
+ void genData(Encoder e) throws IOException {
+ e.writeArrayStart();
e.setItemCount(COUNT);
Random r = newRandom();
for (int i = 0; i < COUNT; i++) {
@@ -196,18 +233,21 @@
}
e.writeArrayEnd();
}
+
@Override
void readInternal(Decoder d) throws IOException {
d.readFloat();
}
}
- private static class ReadDouble extends Test {
+ private static class ReadDouble extends DecoderTest {
public ReadDouble() throws IOException {
super("ReadDouble", "{ \"type\": \"array\", \"items\": \"double\"} ");
}
- @Override void genData(Encoder e) throws IOException {
- e.writeArrayStart();
+
+ @Override
+ void genData(Encoder e) throws IOException {
+ e.writeArrayStart();
e.setItemCount(COUNT);
Random r = newRandom();
for (int i = 0; i < COUNT; i++) {
@@ -221,37 +261,43 @@
}
}
- private static class RepeaterTest extends Test {
+ private static final String REPEATER_SCHEMA =
+ "{ \"type\": \"array\", \"items\":\n"
+ + "{ \"type\": \"record\", \"name\": \"R\", \"fields\": [\n"
+ + "{ \"name\": \"f1\", \"type\": \"double\" },\n"
+ + "{ \"name\": \"f2\", \"type\": \"double\" },\n"
+ + "{ \"name\": \"f3\", \"type\": \"double\" },\n"
+ + "{ \"name\": \"f4\", \"type\": \"int\" },\n"
+ + "{ \"name\": \"f5\", \"type\": \"int\" },\n"
+ + "{ \"name\": \"f6\", \"type\": \"int\" }\n"
+ + "] } }";
+
+ private static void generateRepeaterData(Encoder e) throws IOException {
+ e.writeArrayStart();
+ e.setItemCount(COUNT);
+ Random r = newRandom();
+ for (int i = 0; i < COUNT; i++) {
+ e.writeDouble(r.nextDouble());
+ e.writeDouble(r.nextDouble());
+ e.writeDouble(r.nextDouble());
+ e.writeInt(r.nextInt());
+ e.writeInt(r.nextInt());
+ e.writeInt(r.nextInt());
+ }
+ e.writeArrayEnd();
+ }
+ private static class RepeaterTest extends DecoderTest {
public RepeaterTest() throws IOException {
this("RepeaterTest");
}
public RepeaterTest(String name) throws IOException {
- super(name, "{ \"type\": \"array\", \"items\":\n"
- + "{ \"type\": \"record\", \"name\": \"R\", \"fields\": [\n"
- + "{ \"name\": \"f1\", \"type\": \"double\" },\n"
- + "{ \"name\": \"f2\", \"type\": \"double\" },\n"
- + "{ \"name\": \"f3\", \"type\": \"double\" },\n"
- + "{ \"name\": \"f4\", \"type\": \"int\" },\n"
- + "{ \"name\": \"f5\", \"type\": \"int\" },\n"
- + "{ \"name\": \"f6\", \"type\": \"int\" }\n"
- + "] } }");
+ super(name, REPEATER_SCHEMA);
}
@Override
protected void genData(Encoder e) throws IOException {
- e.writeArrayStart();
- e.setItemCount(COUNT);
- Random r = newRandom();
- for (int i = 0; i < COUNT; i++) {
- e.writeDouble(r.nextDouble());
- e.writeDouble(r.nextDouble());
- e.writeDouble(r.nextDouble());
- e.writeInt(r.nextInt());
- e.writeInt(r.nextInt());
- e.writeInt(r.nextInt());
- }
- e.writeArrayEnd();
+ generateRepeaterData(e);
}
@Override
@@ -284,6 +330,44 @@
}
+ private static final String MIGRATION_SCHEMA_WITH_DEFAULT =
+ "{ \"type\": \"array\", \"items\":\n"
+ + "{ \"type\": \"record\", \"name\": \"R\", \"fields\": [\n"
+ + "{ \"name\": \"f1\", \"type\": \"double\" },\n"
+ + "{ \"name\": \"f2\", \"type\": \"double\" },\n"
+ + "{ \"name\": \"f3\", \"type\": \"double\" },\n"
+ + "{ \"name\": \"f4\", \"type\": \"int\" },\n"
+ + "{ \"name\": \"f5\", \"type\": \"int\" },\n"
+ + "{ \"name\": \"f6\", \"type\": \"int\" },\n"
+ + "{ \"name\": \"f7\", \"type\": \"string\", "
+ + "\"default\": \"undefined\" },\n"
+ + "{ \"name\": \"f8\", \"type\": \"string\","
+ + "\"default\": \"undefined\" }\n"
+ + "] } }";
+
+ private static final String MIGRATION_SCHEMA_WITH_OUT_OF_ORDER =
+ "{ \"type\": \"array\", \"items\":\n"
+ + "{ \"type\": \"record\", \"name\": \"R\", \"fields\": [\n"
+ + "{ \"name\": \"f1\", \"type\": \"double\" },\n"
+ + "{ \"name\": \"f3\", \"type\": \"double\" },\n"
+ + "{ \"name\": \"f5\", \"type\": \"int\" },\n"
+ + "{ \"name\": \"f2\", \"type\": \"double\" },\n"
+ + "{ \"name\": \"f4\", \"type\": \"int\" },\n"
+ + "{ \"name\": \"f6\", \"type\": \"int\" }\n"
+ + "] } }";
+
+ private static final String MIGRATION_SCHEMA_WITH_PROMOTION =
+ "{ \"type\": \"array\", \"items\":\n"
+ + "{ \"type\": \"record\", \"name\": \"R\", \"fields\": [\n"
+ + "{ \"name\": \"f1\", \"type\": \"double\" },\n"
+ + "{ \"name\": \"f2\", \"type\": \"double\" },\n"
+ + "{ \"name\": \"f3\", \"type\": \"double\" },\n"
+ + "{ \"name\": \"f4\", \"type\": \"long\" },\n"
+ + "{ \"name\": \"f5\", \"type\": \"long\" },\n"
+ + "{ \"name\": \"f6\", \"type\": \"long\" }\n"
+ + "] } }";
+
+
/**
* Tests the performance of introducing default values.
*/
@@ -291,19 +375,7 @@
private final Schema readerSchema;
public MigrationTest() throws IOException {
super("MigrationTest");
- readerSchema = Schema.parse( "{ \"type\": \"array\", \"items\":\n"
- + "{ \"type\": \"record\", \"name\": \"R\", \"fields\": [\n"
- + "{ \"name\": \"f1\", \"type\": \"double\" },\n"
- + "{ \"name\": \"f2\", \"type\": \"double\" },\n"
- + "{ \"name\": \"f3\", \"type\": \"double\" },\n"
- + "{ \"name\": \"f3_1\", \"type\": \"string\", "
- + "\"default\": \"undefined\" },\n"
- + "{ \"name\": \"f3_2\", \"type\": \"string\","
- + "\"default\": \"undefined\" },\n"
- + "{ \"name\": \"f4\", \"type\": \"int\" },\n"
- + "{ \"name\": \"f5\", \"type\": \"int\" },\n"
- + "{ \"name\": \"f6\", \"type\": \"int\" }\n"
- + "] } }");
+ readerSchema = Schema.parse(MIGRATION_SCHEMA_WITH_DEFAULT);
}
@Override
@@ -318,7 +390,7 @@
for (Field f : ff) {
if (f.pos() < 3) {
r.readDouble();
- } else if (f.pos() >= 5) {
+ } else if (f.pos() < 6) {
r.readInt();
} else {
r.readString(null);
@@ -327,6 +399,81 @@
}
}
+ private static class GenericReaderTest extends Test {
+ public final Schema writerSchema;
+ protected byte[] data;
+
+ public GenericReaderTest() throws IOException {
+ this("GenericReaderTest");
+ }
+
+ public GenericReaderTest(String name) throws IOException {
+ super(name);
+ this.writerSchema = Schema.parse(REPEATER_SCHEMA);
+ ByteArrayOutputStream bao = new ByteArrayOutputStream();
+ Encoder e = new BinaryEncoder(bao);
+ generateRepeaterData(e);
+ data = bao.toByteArray();
+ }
+
+ @Override
+ public final long read() throws IOException {
+ GenericDatumReader<Object> r = getReader();
+ long t = System.nanoTime();
+ Decoder d =
+ new BinaryDecoder(new ByteArrayInputStream(data));
+ for (; ;) {
+ try {
+ r.read(null, d);
+ } catch (EOFException e) {
+ break;
+ }
+ }
+
+ return (System.nanoTime() - t);
+ }
+
+ protected GenericDatumReader<Object> getReader() throws IOException {
+ return new GenericDatumReader<Object>(writerSchema);
+ }
+ }
+
+ private static class GenericReaderWithMigrationTest extends
GenericReaderTest {
+ private final Schema readerSchema;
+ protected GenericReaderWithMigrationTest(String name, String readerSchema)
+ throws IOException {
+ super(name);
+ this.readerSchema = Schema.parse(readerSchema);
+ }
+
+ protected GenericDatumReader<Object> getReader() throws IOException {
+ return new GenericDatumReader<Object>(writerSchema, readerSchema);
+ }
+ }
+
+ private static class GenericReaderWithDefaultTest extends
+ GenericReaderWithMigrationTest {
+ public GenericReaderWithDefaultTest() throws IOException {
+ super("GenericReaderTestWithDefaultTest", MIGRATION_SCHEMA_WITH_DEFAULT);
+ }
+ }
+
+ private static class GenericReaderWithOutOfOrderTest extends
+ GenericReaderWithMigrationTest {
+ public GenericReaderWithOutOfOrderTest() throws IOException {
+ super("GenericReaderTestWithOutOfOrderTest",
+ MIGRATION_SCHEMA_WITH_OUT_OF_ORDER);
+ }
+ }
+
+ private static class GenericReaderWithPromotionTest extends
+ GenericReaderWithMigrationTest {
+ public GenericReaderWithPromotionTest() throws IOException {
+ super("GenericReaderTestWithPromotionTest",
+ MIGRATION_SCHEMA_WITH_PROMOTION);
+ }
+ }
+
private static class NestedRecordTest extends ReadInt {
public NestedRecordTest() throws IOException {
super("NestedRecordTest",
@@ -344,12 +491,17 @@
private static void usage() {
System.out.println("Usage: Perf { -i | -l | -f | -d }");
- System.out.println(" -i readInt() performance");
- System.out.println(" -l readLong() performance");
- System.out.println(" -f readFloat() performance");
- System.out.println(" -d readDouble() performance");
- System.out.println(" -R repeater performance in validating decoder");
- System.out.println(" -N nested record performance in validating
decoder");
- System.out.println(" -S resolving decoder performance");
+ System.out.println(" -i readInt()");
+ System.out.println(" -l readLong()");
+ System.out.println(" -f readFloat()");
+ System.out.println(" -d readDouble()");
+ System.out.println(" -R repeater in validating decoder");
+ System.out.println(" -N nested record in validating decoder");
+ System.out.println(" -S resolving decoder");
+ System.out.println(" -M resolving decoder (with default fields)");
+ System.out.println(" -G GenericDatumReader");
+ System.out.println(" -Gd GenericDatumReader (with default fields)");
+ System.out.println(" -Go GenericDatumReader (with out-of-order fields)");
+ System.out.println(" -Gp GenericDatumReader (with promotion fields)");
}
}