Author: massie
Date: Tue Feb 9 20:42:02 2010
New Revision: 908209
URL: http://svn.apache.org/viewvc?rev=908209&view=rev
Log:
AVRO-412. Allow schema validation to be optional
Modified:
hadoop/avro/trunk/CHANGES.txt
hadoop/avro/trunk/lang/c/docs/index.txt
hadoop/avro/trunk/lang/c/examples/Makefile.am
hadoop/avro/trunk/lang/c/src/avro.h
hadoop/avro/trunk/lang/c/src/datafile.c
hadoop/avro/trunk/lang/c/src/datum.c
hadoop/avro/trunk/lang/c/src/datum.h
hadoop/avro/trunk/lang/c/src/datum_equal.c
hadoop/avro/trunk/lang/c/src/datum_read.c
hadoop/avro/trunk/lang/c/src/datum_validate.c
hadoop/avro/trunk/lang/c/src/datum_write.c
hadoop/avro/trunk/lang/c/tests/generate_interop_data.c
hadoop/avro/trunk/lang/c/tests/test_avro_data.c
hadoop/avro/trunk/lang/c/version.sh
Modified: hadoop/avro/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/CHANGES.txt?rev=908209&r1=908208&r2=908209&view=diff
==============================================================================
--- hadoop/avro/trunk/CHANGES.txt (original)
+++ hadoop/avro/trunk/CHANGES.txt Tue Feb 9 20:42:02 2010
@@ -320,6 +320,8 @@
AVRO-261. Allow Schemas to be immutable (thiru)
+ AVRO-412. Allow schema validation to be optional (massie)
+
OPTIMIZATIONS
AVRO-172. More efficient schema processing (massie)
Modified: hadoop/avro/trunk/lang/c/docs/index.txt
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/docs/index.txt?rev=908209&r1=908208&r2=908209&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/docs/index.txt (original)
+++ hadoop/avro/trunk/lang/c/docs/index.txt Tue Feb 9 20:42:02 2010
@@ -34,9 +34,27 @@
A C program is like a fast dance on a newly waxed dance floor by people
carrying razors.
____
-The C implementation is still not quite ready for production use.
-The current code is being tested on +MacOS X+ and +Linux+. We're
-always looking for contributions so, if you're a C hacker, please
+The C implementation has been tested on +MacOSX+ and +Linux+ but, over
+time, the number of support OSes should grow. Please let us know if
+you're using +Avro C+ on other systems. There are no dependencies on
+external libraries. We embedded http://www.digip.org/jansson/[Jansson] into
++Avro C+ for parsing JSON into schema structures.
+
+The C implementation supports:
+
+* binary encoding/decoding of all primitive and complex data types
+* storage to an Avro Object Container File
+* schema resolution, promotion and projection
+* validating and non-validating mode for writing Avro data
+
+The C implementation is lacking:
+
+* RPC
+
+To learn about the API, take a look at the examples and reference files
+later in this document.
+
+We're always looking for contributions so, if you're a C hacker, please
feel free to http://hadoop.apache.org/avro/[submit patches to the
project].
@@ -107,7 +125,7 @@
[WARNING]
===============================
-Don't "give" +Avro C+ a string that you haven't allocated from the heap with
+malloc+.
+Don't "give" +Avro C+ a string that you haven't allocated from the heap with
e.g. +malloc+ or +strdup+.
For example, *don't* do this:
----
@@ -115,8 +133,31 @@
----
===============================
+== Schema Validation
+
+If you want to write a datum, you would use the following function
+
+[source,c]
+----
+int avro_write_data(avro_writer_t writer,
+ avro_schema_t writers_schema, avro_datum_t datum);
+----
+
+If you pass in a +writers_schema+, then you +datum+ will be validated *before*
+it is sent to the +writer+. This check ensures that your data has the
+correct format. If you are certain your datum is correct, you can pass
+a +NULL+ value for +writers_schema+ and +Avro C+ will not validate before
+writing.
+
+NOTE: Data written to an Avro File Object Container is always validated.
+
== Examples
+[quote,Dante Hicks]
+____
+I'm not even supposed to be here today!
+____
+
Imagine you're a free-lance hacker in Leonardo, New Jersey and you've
been approached by the owner of the local *Quick Stop Convenience* store.
He wants you to create a contact database case he needs to call employees
Modified: hadoop/avro/trunk/lang/c/examples/Makefile.am
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/examples/Makefile.am?rev=908209&r1=908208&r2=908209&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/examples/Makefile.am (original)
+++ hadoop/avro/trunk/lang/c/examples/Makefile.am Tue Feb 9 20:42:02 2010
@@ -10,3 +10,5 @@
quickstop_SOURCES=quickstop.c
quickstop_LDADD=$(examples_LDADD)
+
+CLEANFILES=quickstop.db
Modified: hadoop/avro/trunk/lang/c/src/avro.h
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/avro.h?rev=908209&r1=908208&r2=908209&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/avro.h (original)
+++ hadoop/avro/trunk/lang/c/src/avro.h Tue Feb 9 20:42:02 2010
@@ -86,7 +86,7 @@
||is_avro_fixed(obj))
#define is_avro_map(obj) (obj && avro_typeof(obj) == AVRO_MAP)
#define is_avro_array(obj) (obj && avro_typeof(obj) == AVRO_ARRAY)
-#define is_avro_union(obj) (obj && avro_classof(obj) == AVRO_SCHEMA &&
avro_typeof(obj) == AVRO_UNION)
+#define is_avro_union(obj) (obj && avro_typeof(obj) == AVRO_UNION)
#define is_avro_complex_type(obj) (!(is_avro_primitive(obj))
#define is_avro_link(obj) (obj && avro_typeof(obj) == AVRO_LINK)
@@ -181,7 +181,7 @@
avro_datum_t avro_boolean(int8_t i);
avro_datum_t avro_null(void);
avro_datum_t avro_record(const char *name, const char *space);
-avro_datum_t avro_enum(const char *name, const char *symbol);
+avro_datum_t avro_enum(const char *name, int i);
avro_datum_t avro_fixed(const char *name, const char *bytes,
const int64_t size);
avro_datum_t avro_wrapfixed(const char *name, const char *bytes,
@@ -190,7 +190,7 @@
const int64_t size);
avro_datum_t avro_map(void);
avro_datum_t avro_array(void);
-avro_datum_t avro_union(const avro_schema_t schema, const avro_datum_t datum);
+avro_datum_t avro_union(int64_t discriminant, const avro_datum_t datum);
/* getters */
int avro_string_get(avro_datum_t datum, char **p);
Modified: hadoop/avro/trunk/lang/c/src/datafile.c
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/datafile.c?rev=908209&r1=908208&r2=908209&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/datafile.c (original)
+++ hadoop/avro/trunk/lang/c/src/datafile.c Tue Feb 9 20:42:02 2010
@@ -283,7 +283,7 @@
/* Write the sync marker */
check(rval, write_sync(w));
/* Reset the datum writer */
- avro_writer_reset(w->writer);
+ avro_writer_reset(w->datum_writer);
w->block_count = 0;
}
return 0;
Modified: hadoop/avro/trunk/lang/c/src/datum.c
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/datum.c?rev=908209&r1=908208&r2=908209&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/datum.c (original)
+++ hadoop/avro/trunk/lang/c/src/datum.c Tue Feb 9 20:42:02 2010
@@ -380,6 +380,20 @@
return &obj;
}
+avro_datum_t avro_union(int64_t discriminant, avro_datum_t value)
+{
+ struct avro_union_datum_t *datum =
+ malloc(sizeof(struct avro_union_datum_t));
+ if (!datum) {
+ return NULL;
+ }
+ datum->discriminant = discriminant;
+ datum->value = avro_datum_incref(value);
+
+ avro_datum_init(&datum->obj, AVRO_UNION);
+ return &datum->obj;
+}
+
avro_datum_t avro_record(const char *name, const char *space)
{
struct avro_record_datum_t *datum =
@@ -398,11 +412,23 @@
free((void *)datum);
return NULL;
}
- datum->fields = st_init_strtable_with_size(DEFAULT_TABLE_SIZE);
- if (!datum->fields) {
- free((void *)datum->space);
- free((void *)datum->name);
- free((void *)datum);
+ datum->field_order = st_init_numtable_with_size(DEFAULT_TABLE_SIZE);
+ if (!datum->field_order) {
+ if (space) {
+ free((void *)datum->space);
+ }
+ free((char *)datum->name);
+ free(datum);
+ return NULL;
+ }
+ datum->fields_byname = st_init_strtable_with_size(DEFAULT_TABLE_SIZE);
+ if (!datum->fields_byname) {
+ st_free_table(datum->field_order);
+ if (space) {
+ free((void *)datum->space);
+ }
+ free((char *)datum->name);
+ free(datum);
return NULL;
}
@@ -420,7 +446,7 @@
} val;
if (is_avro_datum(datum) && is_avro_record(datum) && field_name) {
if (st_lookup
- (avro_datum_to_record(datum)->fields,
+ (avro_datum_to_record(datum)->fields_byname,
(st_data_t) field_name, &(val.data))) {
*field = val.field;
return 0;
@@ -442,20 +468,25 @@
avro_datum_decref(old_field);
} else {
/* Inserting new value */
+ struct avro_record_datum_t *record =
+ avro_datum_to_record(datum);
key = strdup(field_name);
if (!key) {
return ENOMEM;
}
+ st_insert(record->field_order,
+ record->field_order->num_entries,
+ (st_data_t) key);
}
avro_datum_incref(field_value);
- st_insert(avro_datum_to_record(datum)->fields, (st_data_t) key,
- (st_data_t) field_value);
+ st_insert(avro_datum_to_record(datum)->fields_byname,
+ (st_data_t) key, (st_data_t) field_value);
return 0;
}
return EINVAL;
}
-avro_datum_t avro_enum(const char *name, const char *symbol)
+avro_datum_t avro_enum(const char *name, int i)
{
struct avro_enum_datum_t *datum =
malloc(sizeof(struct avro_enum_datum_t));
@@ -463,7 +494,7 @@
return NULL;
}
datum->name = strdup(name);
- datum->symbol = strdup(symbol);
+ datum->value = i;
avro_datum_init(&datum->obj, AVRO_ENUM);
return &datum->obj;
@@ -740,9 +771,10 @@
if (record->space) {
free((void *)record->space);
}
- st_foreach(record->fields,
+ st_foreach(record->fields_byname,
char_datum_free_foreach, 0);
- st_free_table(record->fields);
+ st_free_table(record->field_order);
+ st_free_table(record->fields_byname);
free(record);
}
break;
@@ -750,7 +782,6 @@
struct avro_enum_datum_t *enump;
enump = avro_datum_to_enum(datum);
free((void *)enump->name);
- free((void *)enump->symbol);
free(enump);
}
break;
@@ -781,7 +812,12 @@
free(array);
}
break;
- case AVRO_UNION:
+ case AVRO_UNION:{
+ struct avro_union_datum_t *unionp;
+ unionp = avro_datum_to_union(datum);
+ avro_datum_decref(unionp->value);
+ free(unionp);
+ }
break;
case AVRO_LINK:{
/* TODO */
Modified: hadoop/avro/trunk/lang/c/src/datum.h
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/datum.h?rev=908209&r1=908208&r2=908209&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/datum.h (original)
+++ hadoop/avro/trunk/lang/c/src/datum.h Tue Feb 9 20:42:02 2010
@@ -76,13 +76,14 @@
struct avro_obj_t obj;
const char *name;
const char *space;
- st_table *fields;
+ st_table *field_order;
+ st_table *fields_byname;
};
struct avro_enum_datum_t {
struct avro_obj_t obj;
const char *name;
- const char *symbol;
+ int value;
};
struct avro_array_datum_t {
@@ -90,6 +91,12 @@
st_table *els;
};
+struct avro_union_datum_t {
+ struct avro_obj_t obj;
+ int64_t discriminant;
+ avro_datum_t value;
+};
+
#define avro_datum_to_string(datum_) (container_of(datum_, struct
avro_string_datum_t, obj))
#define avro_datum_to_bytes(datum_) (container_of(datum_, struct
avro_bytes_datum_t, obj))
#define avro_datum_to_int32(datum_) (container_of(datum_, struct
avro_int32_datum_t, obj))
@@ -102,5 +109,6 @@
#define avro_datum_to_record(datum_) (container_of(datum_, struct
avro_record_datum_t, obj))
#define avro_datum_to_enum(datum_) (container_of(datum_, struct
avro_enum_datum_t, obj))
#define avro_datum_to_array(datum_) (container_of(datum_, struct
avro_array_datum_t, obj))
+#define avro_datum_to_union(datum_) (container_of(datum_, struct
avro_union_datum_t, obj))
#endif
Modified: hadoop/avro/trunk/lang/c/src/datum_equal.c
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/datum_equal.c?rev=908209&r1=908208&r2=908209&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/datum_equal.c (original)
+++ hadoop/avro/trunk/lang/c/src/datum_equal.c Tue Feb 9 20:42:02 2010
@@ -76,7 +76,7 @@
static int record_equal(struct avro_record_datum_t *a,
struct avro_record_datum_t *b)
{
- struct st_equal_args args = { 1, b->fields };
+ struct st_equal_args args = { 1, b->fields_byname };
if (strcmp(a->name, b->name)) {
/* This have different names */
return 0;
@@ -90,18 +90,16 @@
/* One has a namespace, one doesn't */
return 0;
}
-
- if (a->fields->num_entries != b->fields->num_entries) {
+ if (a->fields_byname->num_entries != b->fields_byname->num_entries) {
return 0;
}
- st_foreach(a->fields, st_equal_foreach, (st_data_t) & args);
+ st_foreach(a->fields_byname, st_equal_foreach, (st_data_t) & args);
return args.rval;
}
static int enum_equal(struct avro_enum_datum_t *a, struct avro_enum_datum_t *b)
{
- return strcmp(a->name, b->name) == 0
- && strcmp(a->symbol, b->symbol) == 0;
+ return strcmp(a->name, b->name) == 0 && a->value == b->value;
}
static int fixed_equal(struct avro_fixed_datum_t *a,
@@ -110,7 +108,14 @@
return a->size == b->size && memcmp(a->bytes, b->bytes, a->size) == 0;
}
-int avro_datum_equal(avro_datum_t a, avro_datum_t b)
+static int union_equal(struct avro_union_datum_t *a,
+ struct avro_union_datum_t *b)
+{
+ /* XXX: not sure. a->discriminant == b->discriminant important? */
+ return avro_datum_equal(a->value, b->value);
+}
+
+int avro_datum_equal(const avro_datum_t a, const avro_datum_t b)
{
if (!(is_avro_datum(a) && is_avro_datum(b))) {
return 0;
@@ -161,7 +166,9 @@
avro_datum_to_fixed(b));
case AVRO_UNION:
- break;
+ return union_equal(avro_datum_to_union(a),
+ avro_datum_to_union(b));
+
case AVRO_LINK:
/*
* TODO
Modified: hadoop/avro/trunk/lang/c/src/datum_read.c
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/datum_read.c?rev=908209&r1=908208&r2=908209&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/datum_read.c (original)
+++ hadoop/avro/trunk/lang/c/src/datum_read.c Tue Feb 9 20:42:02 2010
@@ -20,6 +20,7 @@
#include "encoding.h"
#include "schema.h"
#include "datum.h"
+#include "avro_private.h"
int
avro_schema_match(avro_schema_t writers_schema, avro_schema_t readers_schema)
@@ -99,20 +100,9 @@
{
int rval;
int64_t index;
- union {
- st_data_t data;
- char *sym;
- } val;
- rval = enc->read_long(reader, &index);
- if (rval) {
- return rval;
- }
-
- if (!st_lookup(writers_schema->symbols, index, &val.data)) {
- return EINVAL;
- }
- *datum = avro_enum(writers_schema->name, val.sym);
+ check(rval, enc->read_long(reader, &index));
+ *datum = avro_enum(writers_schema->name, index);
return 0;
}
@@ -230,21 +220,20 @@
struct avro_union_schema_t *readers_schema, avro_datum_t * datum)
{
int rval;
- int64_t index;
+ int64_t discriminant;
+ avro_datum_t value;
union {
st_data_t data;
avro_schema_t schema;
} val;
-
- rval = enc->read_long(reader, &index);
- if (rval) {
- return rval;
- }
-
- if (!st_lookup(writers_schema->branches, index, &val.data)) {
+ check(rval, enc->read_long(reader, &discriminant));
+ if (!st_lookup(writers_schema->branches, discriminant, &val.data)) {
return EILSEQ;
}
- return avro_read_data(reader, val.schema, NULL, datum);
+ check(rval, avro_read_data(reader, val.schema, NULL, &value));
+ *datum = avro_union(discriminant, value);
+ avro_datum_decref(value);
+ return 0;
}
/* TODO: handle default values in fields */
@@ -310,27 +299,6 @@
return EINVAL;
}
- /*
- * schema resolution
- */
- if (!is_avro_union(writers_schema) && is_avro_union(readers_schema)) {
- struct avro_union_schema_t *union_schema =
- avro_schema_to_union(readers_schema);
-
- for (i = 0; i < union_schema->branches->num_entries; i++) {
- union {
- st_data_t data;
- avro_schema_t schema;
- } val;
- st_lookup(union_schema->branches, i, &val.data);
- if (avro_schema_match(writers_schema, val.schema)) {
- return avro_read_data(reader, writers_schema,
- val.schema, datum);
- }
- }
- return EINVAL;
- }
-
switch (avro_typeof(writers_schema)) {
case AVRO_NULL:
rval = enc->read_null(reader);
Modified: hadoop/avro/trunk/lang/c/src/datum_validate.c
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/datum_validate.c?rev=908209&r1=908208&r2=908209&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/datum_validate.c (original)
+++ hadoop/avro/trunk/lang/c/src/datum_validate.c Tue Feb 9 20:42:02 2010
@@ -84,15 +84,11 @@
case AVRO_ENUM:
if (is_avro_enum(datum)) {
- struct avro_enum_schema_t *enump =
- avro_schema_to_enum(expected_schema);
- struct avro_enum_datum_t *d = avro_datum_to_enum(datum);
- union {
- st_data_t data;
- long idx;
- } val;
- return st_lookup(enump->symbols_byname,
- (st_data_t) d->symbol, &val.data);
+ long value = avro_datum_to_enum(datum)->value;
+ long max_value =
+ avro_schema_to_enum(expected_schema)->symbols->
+ num_entries;
+ return 0 <= value && value <= max_value;
}
return 0;
@@ -130,24 +126,25 @@
break;
case AVRO_UNION:
- {
+ if (is_avro_union(datum)) {
struct avro_union_schema_t *union_schema =
avro_schema_to_union(expected_schema);
+ struct avro_union_datum_t *union_datum =
+ avro_datum_to_union(datum);
+ union {
+ st_data_t data;
+ avro_schema_t schema;
+ } val;
- for (i = 0; i < union_schema->branches->num_entries;
- i++) {
- union {
- st_data_t data;
- avro_schema_t schema;
- } val;
- st_lookup(union_schema->branches, i, &val.data);
- if (avro_schema_datum_validate
- (val.schema, datum)) {
- return 1;
- }
+ if (!st_lookup
+ (union_schema->branches, union_datum->discriminant,
+ &val.data)) {
+ return 0;
}
+ return avro_schema_datum_validate(val.schema,
+ union_datum->value);
}
- return 0;
+ break;
case AVRO_RECORD:
if (is_avro_record(datum)) {
Modified: hadoop/avro/trunk/lang/c/src/datum_write.c
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/datum_write.c?rev=908209&r1=908208&r2=908209&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/datum_write.c (original)
+++ hadoop/avro/trunk/lang/c/src/datum_write.c Tue Feb 9 20:42:02 2010
@@ -21,27 +21,45 @@
#include "datum.h"
#include "encoding.h"
+static int write_datum(avro_writer_t writer, const avro_encoding_t * enc,
+ avro_schema_t writers_schema, avro_datum_t datum);
+
static int
write_record(avro_writer_t writer, const avro_encoding_t * enc,
- struct avro_record_schema_t *record, avro_datum_t datum)
+ struct avro_record_schema_t *schema, avro_datum_t datum)
{
int rval;
long i;
+ avro_datum_t field_datum;
- for (i = 0; i < record->fields->num_entries; i++) {
- avro_datum_t field_datum;
- union {
- st_data_t data;
- struct avro_record_field_t *field;
- } val;
- st_lookup(record->fields, i, &val.data);
- rval = avro_record_get(datum, val.field->name, &field_datum);
- if (rval) {
- return rval;
+ if (schema) {
+ for (i = 0; i < schema->fields->num_entries; i++) {
+ union {
+ st_data_t data;
+ struct avro_record_field_t *field;
+ } val;
+ st_lookup(schema->fields, i, &val.data);
+ check(rval,
+ avro_record_get(datum, val.field->name,
+ &field_datum));
+ check(rval,
+ write_datum(writer, enc, val.field->type,
+ field_datum));
}
- rval = avro_write_data(writer, val.field->type, field_datum);
- if (rval) {
- return rval;
+ } else {
+ /* No schema. Just write the record datum */
+ struct avro_record_datum_t *record =
+ avro_datum_to_record(datum);
+ for (i = 0; i < record->field_order->num_entries; i++) {
+ union {
+ st_data_t data;
+ char *name;
+ } val;
+ st_lookup(record->field_order, i, &val.data);
+ check(rval,
+ avro_record_get(datum, val.name, &field_datum));
+ check(rval,
+ write_datum(writer, enc, NULL, field_datum));
}
}
return 0;
@@ -51,15 +69,7 @@
write_enum(avro_writer_t writer, const avro_encoding_t * enc,
struct avro_enum_schema_t *enump, struct avro_enum_datum_t *datum)
{
- union {
- st_data_t data;
- long idx;
- } val;
- if (!st_lookup
- (enump->symbols_byname, (st_data_t) datum->symbol, &val.data)) {
- return EINVAL;
- }
- return enc->write_long(writer, val.idx);
+ return enc->write_long(writer, datum->value);
}
struct write_map_args {
@@ -77,7 +87,7 @@
args->rval = rval;
return ST_STOP;
}
- rval = avro_write_data(args->writer, args->values_schema, datum);
+ rval = write_datum(args->writer, args->enc, args->values_schema, datum);
if (rval) {
args->rval = rval;
return ST_STOP;
@@ -87,11 +97,12 @@
static int
write_map(avro_writer_t writer, const avro_encoding_t * enc,
- struct avro_map_schema_t *writer_schema,
+ struct avro_map_schema_t *writers_schema,
struct avro_map_datum_t *datum)
{
int rval;
- struct write_map_args args = { 0, writer, enc, writer_schema->values };
+ struct write_map_args args =
+ { 0, writer, enc, writers_schema ? writers_schema->values : NULL };
if (datum->map->num_entries) {
rval = enc->write_long(writer, datum->map->num_entries);
@@ -129,11 +140,10 @@
avro_datum_t datum;
} val;
st_lookup(array->els, i, &val.data);
- rval =
- avro_write_data(writer, schema->items, val.datum);
- if (rval) {
- return rval;
- }
+ check(rval,
+ write_datum(writer, enc,
+ schema ? schema->items : NULL,
+ val.datum));
}
}
return enc->write_long(writer, 0);
@@ -141,153 +151,138 @@
static int
write_union(avro_writer_t writer, const avro_encoding_t * enc,
- struct avro_union_schema_t *schema, avro_datum_t datum)
+ struct avro_union_schema_t *schema,
+ struct avro_union_datum_t *unionp)
{
int rval;
- long i;
+ avro_schema_t write_schema = NULL;
- for (i = 0; i < schema->branches->num_entries; i++) {
+ check(rval, enc->write_long(writer, unionp->discriminant));
+ if (schema) {
union {
st_data_t data;
avro_schema_t schema;
} val;
- st_lookup(schema->branches, i, &val.data);
- if (avro_schema_datum_validate(val.schema, datum)) {
- rval = enc->write_long(writer, i);
- if (rval) {
- return rval;
- }
- return avro_write_data(writer, val.schema, datum);
+ if (!st_lookup
+ (schema->branches, unionp->discriminant, &val.data)) {
+ return EINVAL;
}
+ write_schema = val.schema;
}
- return EINVAL;
+ return write_datum(writer, enc, write_schema, unionp->value);
}
-int
-avro_write_data(avro_writer_t writer, avro_schema_t writer_schema,
- avro_datum_t datum)
+static int write_datum(avro_writer_t writer, const avro_encoding_t * enc,
+ avro_schema_t writers_schema, avro_datum_t datum)
{
- const avro_encoding_t *enc = &avro_binary_encoding;
- int rval = -1;
+ int rval;
- if (!writer || !(is_avro_schema(writer_schema) &&
is_avro_datum(datum))) {
- return EINVAL;
+ if (is_avro_schema(writers_schema) && is_avro_link(writers_schema)) {
+ return write_datum(writer, enc,
+ (avro_schema_to_link(writers_schema))->to,
+ datum);
}
- if (!avro_schema_datum_validate(writer_schema, datum)) {
- return EINVAL;
- }
- switch (avro_typeof(writer_schema)) {
+
+ switch (avro_typeof(datum)) {
case AVRO_NULL:
- rval = enc->write_null(writer);
- break;
+ return enc->write_null(writer);
+
case AVRO_BOOLEAN:
- rval =
- enc->write_boolean(writer, avro_datum_to_boolean(datum)->i);
- break;
+ return enc->write_boolean(writer,
+ avro_datum_to_boolean(datum)->i);
+
case AVRO_STRING:
- rval =
- enc->write_string(writer, avro_datum_to_string(datum)->s);
- break;
+ return enc->write_string(writer,
+ avro_datum_to_string(datum)->s);
+
case AVRO_BYTES:
- rval =
- enc->write_bytes(writer, avro_datum_to_bytes(datum)->bytes,
- avro_datum_to_bytes(datum)->size);
- break;
+ return enc->write_bytes(writer,
+ avro_datum_to_bytes(datum)->bytes,
+ avro_datum_to_bytes(datum)->size);
+
case AVRO_INT32:
- {
- int32_t i;
- if (is_avro_int32(datum)) {
- i = avro_datum_to_int32(datum)->i32;
- } else if (is_avro_int64(datum)) {
- i = (int32_t) avro_datum_to_int64(datum)->i64;
- } else {
- assert(0
- &&
- "Serious bug in schema validation code");
+ case AVRO_INT64:{
+ int64_t val = avro_typeof(datum) == AVRO_INT32 ?
+ avro_datum_to_int32(datum)->i32 :
+ avro_datum_to_int64(datum)->i64;
+ if (is_avro_schema(writers_schema)) {
+ /* handle promotion */
+ if (is_avro_float(writers_schema)) {
+ return enc->write_float(writer,
+ (float)val);
+ } else if (is_avro_double(writers_schema)) {
+ return enc->write_double(writer,
+ (double)val);
+ }
}
- rval = enc->write_int(writer, i);
+ return enc->write_long(writer, val);
}
- break;
- case AVRO_INT64:
- rval = enc->write_long(writer, avro_datum_to_int64(datum)->i64);
- break;
- case AVRO_FLOAT:
- {
- float f;
- if (is_avro_int32(datum)) {
- f = (float)(avro_datum_to_int32(datum)->i32);
- } else if (is_avro_int64(datum)) {
- f = (float)(avro_datum_to_int64(datum)->i64);
- } else if (is_avro_float(datum)) {
- f = avro_datum_to_float(datum)->f;
- } else if (is_avro_double(datum)) {
- f = (float)(avro_datum_to_double(datum)->d);
- } else {
- assert(0
- &&
- "Serious bug in schema validation code");
+
+ case AVRO_FLOAT:{
+ float val = avro_datum_to_float(datum)->f;
+ if (is_avro_schema(writers_schema)
+ && is_avro_double(writers_schema)) {
+ /* handle promotion */
+ return enc->write_double(writer, (double)val);
}
- rval = enc->write_float(writer, f);
+ return enc->write_float(writer, val);
}
- break;
+
case AVRO_DOUBLE:
- {
- double d;
- if (is_avro_int32(datum)) {
- d = (double)(avro_datum_to_int32(datum)->i32);
- } else if (is_avro_int64(datum)) {
- d = (double)(avro_datum_to_int64(datum)->i64);
- } else if (is_avro_float(datum)) {
- d = (double)(avro_datum_to_float(datum)->f);
- } else if (is_avro_double(datum)) {
- d = avro_datum_to_double(datum)->d;
- } else {
- assert(0 && "Bug in schema validation code");
- }
- rval = enc->write_double(writer, d);
- }
- break;
+ return enc->write_double(writer,
+ avro_datum_to_double(datum)->d);
case AVRO_RECORD:
- rval =
- write_record(writer, enc,
- avro_schema_to_record(writer_schema), datum);
- break;
+ return write_record(writer, enc,
+ avro_schema_to_record(writers_schema),
+ datum);
case AVRO_ENUM:
- rval =
- write_enum(writer, enc, avro_schema_to_enum(writer_schema),
- avro_datum_to_enum(datum));
- break;
+ return write_enum(writer, enc,
+ avro_schema_to_enum(writers_schema),
+ avro_datum_to_enum(datum));
case AVRO_FIXED:
- return avro_write(writer, avro_datum_to_fixed(datum)->bytes,
+ return avro_write(writer,
+ avro_datum_to_fixed(datum)->bytes,
avro_datum_to_fixed(datum)->size);
case AVRO_MAP:
- rval =
- write_map(writer, enc, avro_schema_to_map(writer_schema),
- avro_datum_to_map(datum));
- break;
+ return write_map(writer, enc,
+ avro_schema_to_map(writers_schema),
+ avro_datum_to_map(datum));
+
case AVRO_ARRAY:
- rval =
- write_array(writer, enc,
- avro_schema_to_array(writer_schema),
- avro_datum_to_array(datum));
- break;
+ return write_array(writer, enc,
+ avro_schema_to_array(writers_schema),
+ avro_datum_to_array(datum));
case AVRO_UNION:
- rval =
- write_union(writer, enc,
- avro_schema_to_union(writer_schema), datum);
- break;
+ return write_union(writer, enc,
+ avro_schema_to_union(writers_schema),
+ avro_datum_to_union(datum));
case AVRO_LINK:
- rval =
- avro_write_data(writer,
- (avro_schema_to_link(writer_schema))->to,
- datum);
break;
}
- return rval;
+
+ return 0;
+}
+
+int avro_write_data(avro_writer_t writer, avro_schema_t writers_schema,
+ avro_datum_t datum)
+{
+ const avro_encoding_t *enc = &avro_binary_encoding;
+ int rval = -1;
+
+ if (!writer || !is_avro_datum(datum)) {
+ return EINVAL;
+ }
+ /* Only validate datum if a writer's schema is provided */
+ if (is_avro_schema(writers_schema)
+ && !avro_schema_datum_validate(writers_schema, datum)) {
+ return EINVAL;
+ }
+ return write_datum(writer, &avro_binary_encoding,
+ writers_schema, datum);
}
Modified: hadoop/avro/trunk/lang/c/tests/generate_interop_data.c
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/tests/generate_interop_data.c?rev=908209&r1=908208&r2=908209&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/tests/generate_interop_data.c (original)
+++ hadoop/avro/trunk/lang/c/tests/generate_interop_data.c Tue Feb 9 20:42:02
2010
@@ -17,13 +17,19 @@
avro_datum_t interop;
avro_datum_t array_datum;
avro_datum_t node_datum;
+ avro_datum_t union_datum;
avro_datum_t out_datum;
+ enum Kind {
+ KIND_A,
+ KIND_B,
+ KIND_C
+ };
if (argc != 3) {
exit(EXIT_FAILURE);
}
snprintf(outpath, sizeof(outpath), "%s/c.avro", argv[2]);
- fprintf(stderr, "Writing %s\n", outpath);
+ fprintf(stderr, "Writing to %s\n", outpath);
fp = fopen(argv[1], "r");
rval = fread(jsontext, 1, sizeof(jsontext) - 1, fp);
@@ -34,7 +40,7 @@
check(rval, avro_file_writer_create(outpath, schema, &file_writer));
/* TODO: create a method for generating random data from schema */
- interop = avro_record("interop", "org.apache.avro");
+ interop = avro_record("Interop", "org.apache.avro");
avro_record_set(interop, "intField", avro_int32(42));
avro_record_set(interop, "longField", avro_int64(4242));
avro_record_set(interop, "stringField",
@@ -52,8 +58,9 @@
avro_record_set(interop, "arrayField", array_datum);
avro_record_set(interop, "mapField", avro_map());
- avro_record_set(interop, "unionField", avro_double(1.61803399));
- avro_record_set(interop, "enumField", avro_enum("Kind", "B"));
+ union_datum = avro_union(1, avro_double(1.61803399));
+ avro_record_set(interop, "unionField", union_datum);
+ avro_record_set(interop, "enumField", avro_enum("Kind", KIND_A));
avro_record_set(interop, "fixedField",
avro_fixed("MD5", "1234567890123456", 16));
@@ -66,14 +73,24 @@
rval = avro_file_writer_append(file_writer, interop);
if (rval) {
fprintf(stderr, "Unable to append data to interop file!\n");
+ exit(EXIT_FAILURE);
+ } else {
+ fprintf(stderr, "Successfully appended datum to file\n");
}
+
check(rval, avro_file_writer_close(file_writer));
+ fprintf(stderr, "Closed writer.\n");
check(rval, avro_file_reader(outpath, &file_reader));
+ fprintf(stderr, "Re-reading datum to verify\n");
check(rval, avro_file_reader_read(file_reader, NULL, &out_datum));
+ fprintf(stderr, "Verifying datum...");
if (!avro_datum_equal(interop, out_datum)) {
+ fprintf(stderr, "fail!\n");
exit(EXIT_FAILURE);
}
+ fprintf(stderr, "ok\n");
check(rval, avro_file_reader_close(file_reader));
+ fprintf(stderr, "Closed reader.\n");
return 0;
}
Modified: hadoop/avro/trunk/lang/c/tests/test_avro_data.c
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/tests/test_avro_data.c?rev=908209&r1=908208&r2=908209&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/tests/test_avro_data.c (original)
+++ hadoop/avro/trunk/lang/c/tests/test_avro_data.c Tue Feb 9 20:42:02 2010
@@ -53,25 +53,38 @@
avro_schema_t readers_schema, avro_datum_t datum, char *type)
{
avro_datum_t datum_out;
- reader = avro_reader_memory(buf, sizeof(buf));
- writer = avro_writer_memory(buf, sizeof(buf));
+ int validate;
- if (avro_write_data(writer, writers_schema, datum)) {
- fprintf(stderr, "Unable to write %s\n", type);
- exit(EXIT_FAILURE);
- }
- if (avro_read_data(reader, writers_schema, readers_schema, &datum_out))
{
- fprintf(stderr, "Unable to read %s\n", type);
- exit(EXIT_FAILURE);
- }
- if (!avro_datum_equal(datum, datum_out)) {
- fprintf(stderr, "Unable to encode/decode %s\n", type);
- exit(EXIT_FAILURE);
+ for (validate = 0; validate <= 1; validate++) {
+
+ reader = avro_reader_memory(buf, sizeof(buf));
+ writer = avro_writer_memory(buf, sizeof(buf));
+
+ /* Validating read/write */
+ if (avro_write_data
+ (writer, validate ? writers_schema : NULL, datum)) {
+ fprintf(stderr, "Unable to write %s validate=%d\n",
+ type, validate);
+ exit(EXIT_FAILURE);
+ }
+ if (avro_read_data
+ (reader, writers_schema, readers_schema, &datum_out)) {
+ fprintf(stderr, "Unable to read %s validate=%d\n", type,
+ validate);
+ exit(EXIT_FAILURE);
+ }
+ if (!avro_datum_equal(datum, datum_out)) {
+ fprintf(stderr,
+ "Unable to encode/decode %s validate=%d\n",
+ type, validate);
+ exit(EXIT_FAILURE);
+ }
+
+ avro_reader_dump(reader, stderr);
+ avro_datum_decref(datum_out);
+ avro_reader_free(reader);
+ avro_writer_free(writer);
}
- avro_reader_dump(reader, stderr);
- avro_datum_decref(datum_out);
- avro_reader_free(reader);
- avro_writer_free(writer);
}
static int test_string(void)
@@ -204,8 +217,15 @@
static int test_enum(void)
{
+ enum avro_languages {
+ AVRO_C,
+ AVRO_CPP,
+ AVRO_PYTHON,
+ AVRO_RUBY,
+ AVRO_JAVA
+ };
avro_schema_t schema = avro_schema_enum("language");
- avro_datum_t datum = avro_enum("language", "C");
+ avro_datum_t datum = avro_enum("language", AVRO_C);
avro_schema_enum_symbol_append(schema, "C");
avro_schema_enum_symbol_append(schema, "C++");
@@ -262,6 +282,7 @@
static int test_union(void)
{
avro_schema_t schema = avro_schema_union();
+ avro_datum_t union_datum;
avro_datum_t datum;
avro_schema_union_append(schema, avro_schema_string());
@@ -269,8 +290,10 @@
avro_schema_union_append(schema, avro_schema_null());
datum = avro_wrapstring("Follow your bliss.");
+ union_datum = avro_union(0, datum);
- write_read_check(schema, NULL, datum, "union");
+ write_read_check(schema, NULL, union_datum, "union");
+ avro_datum_decref(union_datum);
avro_datum_decref(datum);
avro_schema_decref(schema);
return 0;
Modified: hadoop/avro/trunk/lang/c/version.sh
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/version.sh?rev=908209&r1=908208&r2=908209&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/version.sh (original)
+++ hadoop/avro/trunk/lang/c/version.sh Tue Feb 9 20:42:02 2010
@@ -18,7 +18,7 @@
# libavro_binary_age = 0
# libavro_interface_age = 0
#
-libavro_micro_version=17
+libavro_micro_version=18
libavro_interface_age=0
libavro_binary_age=0