Author: cutting
Date: Fri Nov 4 20:58:36 2011
New Revision: 1197768
URL: http://svn.apache.org/viewvc?rev=1197768&view=rev
Log:
AVRO-946. Java: Optimize union resolution when writing.
Modified:
avro/trunk/CHANGES.txt
avro/trunk/lang/java/avro/src/main/java/org/apache/avro/Schema.java
avro/trunk/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java
avro/trunk/lang/java/avro/src/main/java/org/apache/avro/generic/GenericDatumWriter.java
Modified: avro/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/avro/trunk/CHANGES.txt?rev=1197768&r1=1197767&r2=1197768&view=diff
==============================================================================
--- avro/trunk/CHANGES.txt (original)
+++ avro/trunk/CHANGES.txt Fri Nov 4 20:58:36 2011
@@ -15,6 +15,8 @@ Avro 1.6.1 (unreleased)
OPTIMIZATIONS
+ AVRO-946. Java: Optimize union resolution when writing. (cutting)
+
IMPROVEMENTS
BUG FIXES
Modified: avro/trunk/lang/java/avro/src/main/java/org/apache/avro/Schema.java
URL:
http://svn.apache.org/viewvc/avro/trunk/lang/java/avro/src/main/java/org/apache/avro/Schema.java?rev=1197768&r1=1197767&r2=1197768&view=diff
==============================================================================
--- avro/trunk/lang/java/avro/src/main/java/org/apache/avro/Schema.java
(original)
+++ avro/trunk/lang/java/avro/src/main/java/org/apache/avro/Schema.java Fri Nov
4 20:58:36 2011
@@ -91,6 +91,7 @@ public abstract class Schema {
INT, LONG, FLOAT, DOUBLE, BOOLEAN, NULL;
private String name;
private Type() { this.name = this.name().toLowerCase(); }
+ public String getName() { return name; }
};
private final Type type;
@@ -310,6 +311,11 @@ public abstract class Schema {
throw new AvroRuntimeException("Not a union: "+this);
}
+ /** If this is a union, return the branch with the provided full name. */
+ public Integer getIndexNamed(String name) {
+ throw new AvroRuntimeException("Not a union: "+this);
+ }
+
/** If this is fixed, returns its size. */
public int getFixedSize() {
throw new AvroRuntimeException("Not fixed: "+this);
@@ -790,37 +796,24 @@ public abstract class Schema {
private static class UnionSchema extends Schema {
private final List<Schema> types;
+ private final Map<String,Integer> indexByName
+ = new HashMap<String,Integer>();
public UnionSchema(LockableArrayList<Schema> types) {
super(Type.UNION);
this.types = types.lock();
- int seen = 0;
- Set<String> seenNames = new HashSet<String>();
- for (Schema type : types) { // check legality of union
- switch (type.getType()) {
- case UNION:
+ int index = 0;
+ for (Schema type : types) {
+ if (type.getType() == Type.UNION)
throw new AvroRuntimeException("Nested union: "+this);
- case RECORD:
- case FIXED:
- case ENUM:
- String fullname = type.getFullName();
- if (fullname != null) {
- if (seenNames.add(fullname)) {
- continue;
- } else {
- throw new AvroRuntimeException("Duplicate name in union:" +
fullname);
- }
- } else {
- throw new AvroRuntimeException("Nameless Record, Fixed, or Enum in
union:"+this);
- }
- default:
- int mask = 1 << type.getType().ordinal();
- if ((seen & mask) != 0)
- throw new AvroRuntimeException("Ambiguous union: "+this);
- seen |= mask;
- }
+ String name = type.getFullName();
+ if (name == null)
+ throw new AvroRuntimeException("Nameless in union:"+this);
+ if (indexByName.put(name, index++) != null)
+ throw new AvroRuntimeException("Duplicate in union:" + name);
}
}
public List<Schema> getTypes() { return types; }
+ public Integer getIndexNamed(String name) { return indexByName.get(name); }
public boolean equals(Object o) {
if (o == this) return true;
if (!(o instanceof UnionSchema)) return false;
Modified:
avro/trunk/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java
URL:
http://svn.apache.org/viewvc/avro/trunk/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java?rev=1197768&r1=1197767&r2=1197768&view=diff
==============================================================================
---
avro/trunk/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java
(original)
+++
avro/trunk/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java
Fri Nov 4 20:58:36 2011
@@ -541,17 +541,46 @@ public class GenericData {
}
/** Return the index for a datum within a union. Implemented with {@link
- * #instanceOf(Schema,Object)}.*/
+ * Schema#getIndexNamed(String)} and {@link #getSchemaName(Schema,Object)}.*/
public int resolveUnion(Schema union, Object datum) {
- int i = 0;
- for (Schema type : union.getTypes()) {
- if (instanceOf(type, datum))
- return i;
- i++;
- }
+ Integer i = union.getIndexNamed(getSchemaName(datum));
+ if (i != null)
+ return i;
throw new UnresolvedUnionException(union, datum);
}
+ /** Return the schema full name for a datum. Called by {@link
+ * #resolveUnion(Schema,Object)}. */
+ protected String getSchemaName(Object datum) {
+ if (datum == null)
+ return Type.NULL.getName();
+ if (isRecord(datum))
+ return getRecordSchema(datum).getFullName();
+ if (isEnum(datum))
+ return getEnumSchema(datum).getFullName();
+ if (isArray(datum))
+ return Type.ARRAY.getName();
+ if (isMap(datum))
+ return Type.MAP.getName();
+ if (isFixed(datum))
+ return getFixedSchema(datum).getFullName();
+ if (isString(datum))
+ return Type.STRING.getName();
+ if (isBytes(datum))
+ return Type.BYTES.getName();
+ if (datum instanceof Integer)
+ return Type.INT.getName();
+ if (datum instanceof Long)
+ return Type.LONG.getName();
+ if (datum instanceof Float)
+ return Type.FLOAT.getName();
+ if (datum instanceof Double)
+ return Type.DOUBLE.getName();
+ if (datum instanceof Boolean)
+ return Type.BOOLEAN.getName();
+ throw new AvroRuntimeException("Unknown datum type: "+datum);
+ }
+
/** Called by {@link #resolveUnion(Schema,Object)}. May be overridden for
alternate data representations.*/
protected boolean instanceOf(Schema schema, Object datum) {
Modified:
avro/trunk/lang/java/avro/src/main/java/org/apache/avro/generic/GenericDatumWriter.java
URL:
http://svn.apache.org/viewvc/avro/trunk/lang/java/avro/src/main/java/org/apache/avro/generic/GenericDatumWriter.java?rev=1197768&r1=1197767&r2=1197768&view=diff
==============================================================================
---
avro/trunk/lang/java/avro/src/main/java/org/apache/avro/generic/GenericDatumWriter.java
(original)
+++
avro/trunk/lang/java/avro/src/main/java/org/apache/avro/generic/GenericDatumWriter.java
Fri Nov 4 20:58:36 2011
@@ -67,7 +67,7 @@ public class GenericDatumWriter<D> imple
case ARRAY: writeArray(schema, datum, out); break;
case MAP: writeMap(schema, datum, out); break;
case UNION:
- int index = data.resolveUnion(schema, datum);
+ int index = resolveUnion(schema, datum);
out.writeIndex(index);
write(schema.getTypes().get(index), datum, out);
break;
@@ -131,6 +131,12 @@ public class GenericDatumWriter<D> imple
out.writeArrayEnd();
}
+ /** Called to find the index for a datum within a union. By default calls
+ * {@link GenericData#resolveUnion(Schema,Object)}.*/
+ protected int resolveUnion(Schema union, Object datum) {
+ return data.resolveUnion(union, datum);
+ }
+
/** Called by the default implementation of {@link #writeArray} to get the
* size of an array. The default implementation is for {@link Collection}.*/
@SuppressWarnings("unchecked")