[ https://issues.apache.org/jira/browse/AVRO-2090?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16653005#comment-16653005 ]
ASF GitHub Bot commented on AVRO-2090: -------------------------------------- rstata closed pull request #256: AVRO-2090: Improve encode/decode time for SpecificRecord using code generation URL: https://github.com/apache/avro/pull/256 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificData.java b/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificData.java index be6bde8d2..bb918e41d 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificData.java +++ b/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificData.java @@ -122,6 +122,10 @@ public DatumWriter createDatumWriter(Schema schema) { /** Return the singleton instance. */ public static SpecificData get() { return INSTANCE; } + private static final boolean USE_CUSTOM_CODERS + = Boolean.parseBoolean(System.getProperty("org.apache.avro.specific.use_custom_coders","false")); + public boolean useCustomCoders() { return USE_CUSTOM_CODERS; } + @Override protected boolean isEnum(Object datum) { return datum instanceof Enum || super.isEnum(datum); diff --git a/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificDatumReader.java b/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificDatumReader.java index 774ca0944..0a9c97014 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificDatumReader.java +++ b/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificDatumReader.java @@ -101,6 +101,23 @@ private Class getPropAsClass(Schema schema, String prop) { } } + @Override + protected Object readRecord(Object old, Schema expected, ResolvingDecoder in) + throws IOException { + SpecificData data = getSpecificData(); + Object r = data.newRecord(old, expected); + if (SpecificData.get().useCustomCoders() + && r instanceof SpecificRecordBase) // TODO: Is this needed? + { + SpecificRecordBase d = (SpecificRecordBase) r; + if (d.hasCustomCoders()) { + d.decode(in); + return d; + } + } + return super.readRecord(old, expected, in); + } + @Override protected void readField(Object r, Schema.Field f, Object oldDatum, ResolvingDecoder in, Object state) diff --git a/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificDatumWriter.java b/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificDatumWriter.java index 7bee02a65..ee1d850a7 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificDatumWriter.java +++ b/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificDatumWriter.java @@ -71,6 +71,21 @@ protected void writeString(Schema schema, Object datum, Encoder out) writeString(datum, out); } + @Override + protected void writeRecord(Schema schema, Object datum, Encoder out) + throws IOException { + if (SpecificData.get().useCustomCoders() + && datum instanceof SpecificRecordBase) // TODO: Is this needed? + { + SpecificRecordBase d = (SpecificRecordBase) datum; + if (d.hasCustomCoders()) { + d.encode(out); + return; + } + } + super.writeRecord(schema, datum, out); + } + @Override protected void writeField(Object datum, Schema.Field f, Encoder out, Object state) throws IOException { diff --git a/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificRecordBase.java b/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificRecordBase.java index 20d3dc331..2c26d0282 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificRecordBase.java +++ b/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificRecordBase.java @@ -25,6 +25,8 @@ import org.apache.avro.Conversion; import org.apache.avro.Schema; import org.apache.avro.generic.GenericRecord; +import org.apache.avro.io.Decoder; +import org.apache.avro.io.Encoder; /** Base class for generated record classes. */ public abstract class SpecificRecordBase @@ -90,4 +92,19 @@ public void readExternal(ObjectInput in) new SpecificDatumReader(getSchema()) .read(this, SpecificData.getDecoder(in)); } + + /** Returns true iff an instance supports the {@link #encode} and + * {@link #decode} operations. Should only be used by + * <code>SpecificDatumReader/Writer</code> to selectively use + * {@link #encode} and {@link #decode} to optimize the (de)serialization of + * values. */ + public boolean hasCustomCoders() { return false; } + + public void encode(Encoder out) throws IOException { + throw new UnsupportedOperationException(); + } + + public void decode(Decoder in) throws IOException { + throw new UnsupportedOperationException(); + } } diff --git a/lang/java/compiler/src/main/java/org/apache/avro/compiler/specific/SpecificCompiler.java b/lang/java/compiler/src/main/java/org/apache/avro/compiler/specific/SpecificCompiler.java index 7a6f5f1cc..8d8106538 100644 --- a/lang/java/compiler/src/main/java/org/apache/avro/compiler/specific/SpecificCompiler.java +++ b/lang/java/compiler/src/main/java/org/apache/avro/compiler/specific/SpecificCompiler.java @@ -572,9 +572,24 @@ private Schema addStringType(Schema s, Map<Schema,Schema> seen) { return result; } - private String getStringType(JsonNode overrideClassProperty) { - if (overrideClassProperty != null) - return overrideClassProperty.getTextValue(); + /** Utility for template use (and also internal use). Returns + * a string giving the FQN of the Java type to be used for a string + * schema or for the key of a map schema. (It's an error to call + * this on a schema other than a string or map.) */ + public String getStringType(Schema s) { + String prop; + switch (s.getType()) { + case MAP: + prop = SpecificData.KEY_CLASS_PROP; + break; + case STRING: + prop = SpecificData.CLASS_PROP; + break; + default: + throw new IllegalArgumentException("Can't check string-type of non-string/map type: " + s); + } + JsonNode override = s.getJsonProp(prop); + if (override != null) return override.getTextValue(); switch (stringType) { case String: return "java.lang.String"; case Utf8: return "org.apache.avro.util.Utf8"; @@ -583,6 +598,17 @@ private String getStringType(JsonNode overrideClassProperty) { } } + /** Utility for template use. Returns true iff a STRING-schema or + * the key of a MAP-schema is what SpecificData defines as + * "stringable" (which means we need to call toString on it before + * before writing it). */ + public boolean isStringable(Schema schema) { + String t = getStringType(schema); + return ! (t.equals("java.lang.String") + || t.equals("java.lang.CharSequence") + || t.equals("org.apache.avro.util.Utf8")); + } + private static final Schema NULL_SCHEMA = Schema.create(Schema.Type.NULL); /** Utility for template use. Returns the java type for a Schema. */ @@ -607,15 +633,14 @@ private String javaType(Schema schema, boolean checkConvertedLogicalType) { return "java.util.List<" + javaType(schema.getElementType()) + ">"; case MAP: return "java.util.Map<" - + getStringType(schema.getJsonProp(SpecificData.KEY_CLASS_PROP))+"," - + javaType(schema.getValueType()) + ">"; + + getStringType(schema)+ "," + javaType(schema.getValueType()) + ">"; case UNION: List<Schema> types = schema.getTypes(); // elide unions with null if ((types.size() == 2) && types.contains(NULL_SCHEMA)) return javaType(types.get(types.get(0).equals(NULL_SCHEMA) ? 1 : 0)); return "java.lang.Object"; case STRING: - return getStringType(schema.getJsonProp(SpecificData.CLASS_PROP)); + return getStringType(schema); case BYTES: return "java.nio.ByteBuffer"; case INT: return "java.lang.Integer"; case LONG: return "java.lang.Long"; @@ -656,6 +681,58 @@ public String javaUnbox(Schema schema) { } } + + /** Utility for template use. Return a string with a given number + * of spaces to be used for indentation purposes. */ + public String indent(int n) { + return new String(new char[n]).replace('\0', ' '); + } + + /** Utility for template use. For a two-branch union type with + * one null branch, returns the index of the null branch. It's an + * error to use on anything other than a two-branch union with on + * null branch. */ + public int getNonNullIndex(Schema s) { + if (s.getType() != Schema.Type.UNION + || s.getTypes().size() != 2 + || ! s.getTypes().contains(NULL_SCHEMA)) + throw new IllegalArgumentException("Can only be used on 2-branch union with a null branch: " + s); + return (s.getTypes().get(0).equals(NULL_SCHEMA) ? 1 : 0); + } + + /** Utility for template use. Returns true if the encode/decode + * logic in record.vm can handle the schema being presented. */ + public boolean isCustomCodable(Schema schema) { + if (schema.isError()) return false; + return isCustomCodable(schema, new HashSet<Schema>()); + } + + private boolean isCustomCodable(Schema schema, Set<Schema> seen) { + if (! seen.add(schema)) return true; + if (schema.getLogicalType() != null) return false; + boolean result = true; + switch (schema.getType()) { + case RECORD: + for (Schema.Field f : schema.getFields()) + result &= isCustomCodable(f.schema(), seen); + break; + case MAP: + result = isCustomCodable(schema.getValueType(), seen); + break; + case ARRAY: + result = isCustomCodable(schema.getElementType(), seen); + break; + case UNION: + List<Schema> types = schema.getTypes(); + // Only know how to handle "nulling" unions for now + if (types.size() != 2 || ! types.contains(NULL_SCHEMA)) return false; + for (Schema s : types) result &= isCustomCodable(s, seen); + break; + default: + } + return result; + } + public boolean hasLogicalTypeField(Schema schema) { for (Schema.Field field : schema.getFields()) { if (field.schema().getLogicalType() != null) { diff --git a/lang/java/compiler/src/main/velocity/org/apache/avro/compiler/specific/templates/java/classic/record.vm b/lang/java/compiler/src/main/velocity/org/apache/avro/compiler/specific/templates/java/classic/record.vm index ccec4b60c..61259591f 100644 --- a/lang/java/compiler/src/main/velocity/org/apache/avro/compiler/specific/templates/java/classic/record.vm +++ b/lang/java/compiler/src/main/velocity/org/apache/avro/compiler/specific/templates/java/classic/record.vm @@ -19,7 +19,9 @@ package $schema.getNamespace(); #end +import org.apache.avro.generic.GenericArray; import org.apache.avro.specific.SpecificData; +import org.apache.avro.util.Utf8; #if (!$schema.isError()) import org.apache.avro.message.BinaryMessageEncoder; import org.apache.avro.message.BinaryMessageDecoder; @@ -473,4 +475,282 @@ public class ${this.mangle($schema.getName())}#if ($schema.isError()) extends or READER$.read(this, SpecificData.getDecoder(in)); } +#if ($this.isCustomCodable($schema)) + @Override public boolean hasCustomCoders() { return true; } + + @Override public void encode(org.apache.avro.io.Encoder out) + throws java.io.IOException + { +#set ($nv = 0)## Counter to ensure unique var-names +#set ($maxnv = 0)## Holds high-water mark during recursion +#foreach ($field in $schema.getFields()) +#set ($n = $this.mangle($field.name(), $schema.isError())) +#set ($s = $field.schema()) +#encodeVar(0 "this.${n}" $s) + +#set ($nv = $maxnv) +#end + } + + @Override public void decode(org.apache.avro.io.Decoder in) + throws java.io.IOException + { +#set ($nv = 0)## Counter to ensure unique var-names +#set ($maxnv = 0)## Holds high-water mark during recursion +#foreach ($field in $schema.getFields()) +#set ($n = $this.mangle($field.name(), $schema.isError())) +#set ($s = $field.schema()) +#set ($rs = "SCHEMA$.getField(""${n}"").schema()") +#decodeVar(0 "this.${n}" $s $rs) + +#set ($nv = $maxnv) +#end + } +#end } + +#macro( encodeVar $indent $var $s ) +#set ($I = $this.indent($indent)) +##### Compound types (array, map, and union) require calls +##### that will recurse back into this encodeVar macro: +#if ($s.Type.Name.equals("array")) +#encodeArray($indent $var $s) +#elseif ($s.Type.Name.equals("map")) +#encodeMap($indent $var $s) +#elseif ($s.Type.Name.equals("union")) +#encodeUnion($indent $var $s) +##### Use the generated "encode" method as fast way to write +##### (specific) record types: +#elseif ($s.Type.Name.equals("record")) +$I ${var}.encode(out); +##### For rest of cases, generate calls out.writeXYZ: +#elseif ($s.Type.Name.equals("null")) +$I out.writeNull(); +#elseif ($s.Type.Name.equals("boolean")) +$I out.writeBoolean(${var}); +#elseif ($s.Type.Name.equals("int")) +$I out.writeInt(${var}); +#elseif ($s.Type.Name.equals("long")) +$I out.writeLong(${var}); +#elseif ($s.Type.Name.equals("float")) +$I out.writeFloat(${var}); +#elseif ($s.Type.Name.equals("double")) +$I out.writeDouble(${var}); +#elseif ($s.Type.Name.equals("string")) +#if ($this.isStringable($s)) +$I out.writeString(${var}.toString()); +#else +$I out.writeString(${var}); +#end +#elseif ($s.Type.Name.equals("bytes")) +$I out.writeBytes(${var}); +#elseif ($s.Type.Name.equals("fixed")) +$I out.writeFixed(${var}.bytes(), 0, ${s.FixedSize}); +#elseif ($s.Type.Name.equals("enum")) +$I out.writeEnum(${var}.ordinal()); +#else +## TODO -- singal a code-gen-time error +#end +#end + +#macro( encodeArray $indent $var $s ) +#set ($I = $this.indent($indent)) +#set ($et = $this.javaType($s.ElementType)) +$I long size${nv} = ${var}.size(); +$I out.writeArrayStart(); +$I out.setItemCount(size${nv}); +$I long actualSize${nv} = 0; +$I for ($et e${nv}: ${var}) { +$I actualSize${nv}++; +$I out.startItem(); +#set ($var = "e${nv}") +#set ($nv = $nv + 1) +#set ($maxnv = $nv) +#set ($indent = $indent + 2) +#encodeVar($indent $var $s.ElementType) +#set ($nv = $nv - 1) +#set ($indent = $indent - 2) +#set ($I = $this.indent($indent)) +$I } +$I out.writeArrayEnd(); +$I if (actualSize${nv} != size${nv}) +$I throw new java.util.ConcurrentModificationException("Array-size written was " + size${nv} + ", but element count was " + actualSize${nv} + "."); +#end + +#macro( encodeMap $indent $var $s ) +#set ($I = $this.indent($indent)) +#set ($kt = $this.getStringType($s)) +#set ($vt = $this.javaType($s.ValueType)) +$I long size${nv} = ${var}.size(); +$I out.writeMapStart(); +$I out.setItemCount(size${nv}); +$I long actualSize${nv} = 0; +$I for (java.util.Map.Entry<$kt, $vt> e${nv}: ${var}.entrySet()) { +$I actualSize${nv}++; +$I out.startItem(); +#if ($this.isStringable($s)) +$I out.writeString(e${nv}.getKey().toString()); +#else +$I out.writeString(e${nv}.getKey()); +#end +$I $vt v${nv} = e${nv}.getValue(); +#set ($var = "v${nv}") +#set ($nv = $nv + 1) +#set ($maxnv = $nv) +#set ($indent = $indent + 2) +#encodeVar($indent $var $s.ValueType) +#set ($nv = $nv - 1) +#set ($indent = $indent - 2) +#set ($I = $this.indent($indent)) +$I } +$I out.writeMapEnd(); +$I if (actualSize${nv} != size${nv}) + throw new java.util.ConcurrentModificationException("Map-size written was " + size${nv} + ", but element count was " + actualSize${nv} + "."); +#end + +#macro( encodeUnion $indent $var $s ) +#set ($I = $this.indent($indent)) +#set ($et = $this.javaType($s.Types.get($this.getNonNullIndex($s)))) +$I if (${var} == null) { +$I out.writeIndex(#if($this.getNonNullIndex($s)==0)1#{else}0#end); +$I out.writeNull(); +$I } else { +$I out.writeIndex(${this.getNonNullIndex($s)}); +#set ($indent = $indent + 2) +#encodeVar($indent $var $s.Types.get($this.getNonNullIndex($s))) +#set ($indent = $indent - 2) +#set ($I = $this.indent($indent)) +$I } +#end + + +#macro( decodeVar $indent $var $s $rs ) +#set ($I = $this.indent($indent)) +##### Compound types (array, map, and union) require calls +##### that will recurse back into this decodeVar macro: +#if ($s.Type.Name.equals("array")) +#decodeArray($indent $var $s $rs) +#elseif ($s.Type.Name.equals("map")) +#decodeMap($indent $var $s $rs) +#elseif ($s.Type.Name.equals("union")) +#decodeUnion($indent $var $s $rs) +##### Use the generated "decode" method as fast way to write +##### (specific) record types: +#elseif ($s.Type.Name.equals("record")) +$I if (${var} == null) { +$I ${var} = new ${this.javaType($s)}(); +$I } +$I ${var}.decode(in); +##### For rest of cases, generate calls in.readXYZ: +#elseif ($s.Type.Name.equals("null")) +$I in.readNull(); +#elseif ($s.Type.Name.equals("boolean")) +$I $var = in.readBoolean(); +#elseif ($s.Type.Name.equals("int")) +$I $var = in.readInt(); +#elseif ($s.Type.Name.equals("long")) +$I $var = in.readLong(); +#elseif ($s.Type.Name.equals("float")) +$I $var = in.readFloat(); +#elseif ($s.Type.Name.equals("double")) +$I $var = in.readDouble(); +#elseif ($s.Type.Name.equals("string")) +#decodeString( "$I" $var $s ) +#elseif ($s.Type.Name.equals("bytes")) +$I $var = in.readBytes(${var}); +#elseif ($s.Type.Name.equals("fixed")) +$I if (${var} == null) { +$I ${var} = new ${this.javaType($s)}(); +$I } +$I in.readFixed(${var}.bytes(), 0, ${s.FixedSize}); +#elseif ($s.Type.Name.equals("enum")) +$I $var = ${this.javaType($s)}.values()[in.readEnum()]; +#else +## TODO -- singal a code-gen-time error +#end +#end + +#macro( decodeString $II $var $s ) +#set ($st = ${this.getStringType($s)}) +#if ($this.isStringable($s)) +$II ${var} = new ${st}(in.readString()); +#elseif ($st.equals("java.lang.String")) +$II $var = in.readString(); +#elseif ($st.equals("org.apache.avro.util.Utf8")) +$II $var = in.readString(${var}); +#else +$II $var = in.readString(${var} instanceof Utf8 ? (Utf8)${var} : null); +#end +#end + +#macro( decodeArray $indent $var $s $rs ) +#set ($I = $this.indent($indent)) +#set ($t = $this.javaType($s)) +#set ($et = $this.javaType($s.ElementType)) +#set ($gat = "SpecificData.Array<${et}>") +$I long size${nv} = in.readArrayStart(); +$I $t a${nv} = ${var}; // Need fresh name due to limitation of macro system +$I if (a${nv} == null) { +$I a${nv} = new ${gat}((int)size${nv}, ${rs}); +$I $var = a${nv}; +$I } else a${nv}.clear(); +$I $gat ga${nv} = (a${nv} instanceof SpecificData.Array ? (${gat})a${nv} : null); +$I for ( ; 0 < size${nv}; size${nv} = in.arrayNext()) { +$I for ( ; size${nv} != 0; size${nv}--) { +$I $et e${nv} = (ga${nv} != null ? ga${nv}.peek() : null); +#set ($var = "e${nv}") +#set ($nv = $nv + 1) +#set ($maxnv = $nv) +#set ($indent = $indent + 4) +#decodeVar($indent $var $s.ElementType "${rs}.getElementType()") +#set ($nv = $nv - 1) +#set ($indent = $indent - 4) +#set ($I = $this.indent($indent)) +$I a${nv}.add(e${nv}); +$I } +$I } +#end + +#macro( decodeMap $indent $var $s $rs ) +#set ($I = $this.indent($indent)) +#set ($t = $this.javaType($s)) +#set ($kt = $this.getStringType($s)) +#set ($vt = $this.javaType($s.ValueType)) +$I long size${nv} = in.readMapStart(); +$I $t m${nv} = ${var}; // Need fresh name due to limitation of macro system +$I if (m${nv} == null) { +$I m${nv} = new java.util.HashMap<${kt},${vt}>((int)size${nv}); +$I $var = m${nv}; +$I } else m${nv}.clear(); +$I for ( ; 0 < size${nv}; size${nv} = in.mapNext()) { +$I for ( ; size${nv} != 0; size${nv}--) { +$I $kt k${nv} = null; +#decodeString( "$I " "k${nv}" $s ) +$I $vt v${nv} = null; +#set ($var = "v${nv}") +#set ($nv = $nv + 1) +#set ($maxnv = $nv) +#set ($indent = $indent + 4) +#decodeVar($indent $var $s.ValueType "${rs}.getValueType()") +#set ($nv = $nv - 1) +#set ($indent = $indent - 4) +#set ($I = $this.indent($indent)) +$I m${nv}.put(k${nv}, v${nv}); +$I } +$I } +#end + +#macro( decodeUnion $indent $var $s $rs ) +#set ($I = $this.indent($indent)) +#set ($et = $this.javaType($s.Types.get($this.getNonNullIndex($s)))) +#set ($si = $this.getNonNullIndex($s)) +$I if (in.readIndex() != ${si}) { +$I in.readNull(); +$I ${var} = null; +$I } else { +#set ($indent = $indent + 2) +#decodeVar($indent $var $s.Types.get($si) "${rs}.getTypes().get(${si})") +#set ($indent = $indent - 2) +#set ($I = $this.indent($indent)) +$I } +#end diff --git a/lang/java/tools/src/test/compiler/output-string/avro/examples/baseball/Player.java b/lang/java/tools/src/test/compiler/output-string/avro/examples/baseball/Player.java index 4dff5ef50..264e8e222 100644 --- a/lang/java/tools/src/test/compiler/output-string/avro/examples/baseball/Player.java +++ b/lang/java/tools/src/test/compiler/output-string/avro/examples/baseball/Player.java @@ -5,7 +5,9 @@ */ package avro.examples.baseball; +import org.apache.avro.generic.GenericArray; import org.apache.avro.specific.SpecificData; +import org.apache.avro.util.Utf8; import org.apache.avro.message.BinaryMessageEncoder; import org.apache.avro.message.BinaryMessageDecoder; import org.apache.avro.message.SchemaStore; @@ -460,4 +462,65 @@ public Player build() { READER$.read(this, SpecificData.getDecoder(in)); } + @Override public boolean hasCustomCoders() { return true; } + + @Override public void encode(org.apache.avro.io.Encoder out) + throws java.io.IOException + { + out.writeInt(this.number); + + out.writeString(this.first_name); + + out.writeString(this.last_name); + + long size0 = this.position.size(); + out.writeArrayStart(); + out.setItemCount(size0); + long actualSize0 = 0; + for (avro.examples.baseball.Position e0: this.position) { + actualSize0++; + out.startItem(); + out.writeEnum(e0.ordinal()); + } + out.writeArrayEnd(); + if (actualSize0 != size0) + throw new java.util.ConcurrentModificationException("Array-size written was " + size0 + ", but element count was " + actualSize0 + "."); + + } + + @Override public void decode(org.apache.avro.io.Decoder in) + throws java.io.IOException + { + this.number = in.readInt(); + + this.first_name = in.readString(); + + this.last_name = in.readString(); + + long size0 = in.readArrayStart(); + java.util.List<avro.examples.baseball.Position> a0 = this.position; // Need fresh name due to limitation of macro system + if (a0 == null) { + a0 = new SpecificData.Array<avro.examples.baseball.Position>((int)size0, SCHEMA$.getField("position").schema()); + this.position = a0; + } else a0.clear(); + SpecificData.Array<avro.examples.baseball.Position> ga0 = (a0 instanceof SpecificData.Array ? (SpecificData.Array<avro.examples.baseball.Position>)a0 : null); + for ( ; 0 < size0; size0 = in.arrayNext()) { + for ( ; size0 != 0; size0--) { + avro.examples.baseball.Position e0 = (ga0 != null ? ga0.peek() : null); + e0 = avro.examples.baseball.Position.values()[in.readEnum()]; + a0.add(e0); + } + } + + } } + + + + + + + + + + diff --git a/lang/java/tools/src/test/compiler/output/Player.java b/lang/java/tools/src/test/compiler/output/Player.java index 26fcbc0d5..c9bf1687a 100644 --- a/lang/java/tools/src/test/compiler/output/Player.java +++ b/lang/java/tools/src/test/compiler/output/Player.java @@ -5,7 +5,9 @@ */ package avro.examples.baseball; +import org.apache.avro.generic.GenericArray; import org.apache.avro.specific.SpecificData; +import org.apache.avro.util.Utf8; import org.apache.avro.message.BinaryMessageEncoder; import org.apache.avro.message.BinaryMessageDecoder; import org.apache.avro.message.SchemaStore; @@ -460,4 +462,65 @@ public Player build() { READER$.read(this, SpecificData.getDecoder(in)); } + @Override public boolean hasCustomCoders() { return true; } + + @Override public void encode(org.apache.avro.io.Encoder out) + throws java.io.IOException + { + out.writeInt(this.number); + + out.writeString(this.first_name); + + out.writeString(this.last_name); + + long size0 = this.position.size(); + out.writeArrayStart(); + out.setItemCount(size0); + long actualSize0 = 0; + for (avro.examples.baseball.Position e0: this.position) { + actualSize0++; + out.startItem(); + out.writeEnum(e0.ordinal()); + } + out.writeArrayEnd(); + if (actualSize0 != size0) + throw new java.util.ConcurrentModificationException("Array-size written was " + size0 + ", but element count was " + actualSize0 + "."); + + } + + @Override public void decode(org.apache.avro.io.Decoder in) + throws java.io.IOException + { + this.number = in.readInt(); + + this.first_name = in.readString(this.first_name instanceof Utf8 ? (Utf8)this.first_name : null); + + this.last_name = in.readString(this.last_name instanceof Utf8 ? (Utf8)this.last_name : null); + + long size0 = in.readArrayStart(); + java.util.List<avro.examples.baseball.Position> a0 = this.position; // Need fresh name due to limitation of macro system + if (a0 == null) { + a0 = new SpecificData.Array<avro.examples.baseball.Position>((int)size0, SCHEMA$.getField("position").schema()); + this.position = a0; + } else a0.clear(); + SpecificData.Array<avro.examples.baseball.Position> ga0 = (a0 instanceof SpecificData.Array ? (SpecificData.Array<avro.examples.baseball.Position>)a0 : null); + for ( ; 0 < size0; size0 = in.arrayNext()) { + for ( ; size0 != 0; size0--) { + avro.examples.baseball.Position e0 = (ga0 != null ? ga0.peek() : null); + e0 = avro.examples.baseball.Position.values()[in.readEnum()]; + a0.add(e0); + } + } + + } } + + + + + + + + + + ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > Improve encode/decode time for SpecificRecord using code generation > ------------------------------------------------------------------- > > Key: AVRO-2090 > URL: https://issues.apache.org/jira/browse/AVRO-2090 > Project: Avro > Issue Type: Improvement > Components: java > Reporter: Raymie Stata > Assignee: Raymie Stata > Priority: Major > Attachments: customcoders.md > > > Compared to GenericRecords, SpecificRecords offer type-safety plus the > performance of traditional getters/setters/instance variables. But these are > only beneficial to Java code accessing those records. SpecificRecords > inherit serialization and deserialization code from GenericRecords, which is > dynamic and thus slow (in fact, benchmarks show that serialization and > deserialization is _slower_ for SpecificRecord than for GenericRecord). > This patch extends record.vm to generate custom, higher-performance encoder > and decoder functions for SpecificRecords. We've run a public benchmark > showing that the new code reduces serialization time by 2/3 and > deserialization time by close to 50%. -- This message was sent by Atlassian JIRA (v7.6.3#76005)