Author: massie
Date: Tue Feb  9 20:42:02 2010
New Revision: 908209

URL: http://svn.apache.org/viewvc?rev=908209&view=rev
Log:
AVRO-412. Allow schema validation to be optional

Modified:
    hadoop/avro/trunk/CHANGES.txt
    hadoop/avro/trunk/lang/c/docs/index.txt
    hadoop/avro/trunk/lang/c/examples/Makefile.am
    hadoop/avro/trunk/lang/c/src/avro.h
    hadoop/avro/trunk/lang/c/src/datafile.c
    hadoop/avro/trunk/lang/c/src/datum.c
    hadoop/avro/trunk/lang/c/src/datum.h
    hadoop/avro/trunk/lang/c/src/datum_equal.c
    hadoop/avro/trunk/lang/c/src/datum_read.c
    hadoop/avro/trunk/lang/c/src/datum_validate.c
    hadoop/avro/trunk/lang/c/src/datum_write.c
    hadoop/avro/trunk/lang/c/tests/generate_interop_data.c
    hadoop/avro/trunk/lang/c/tests/test_avro_data.c
    hadoop/avro/trunk/lang/c/version.sh

Modified: hadoop/avro/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/CHANGES.txt?rev=908209&r1=908208&r2=908209&view=diff
==============================================================================
--- hadoop/avro/trunk/CHANGES.txt (original)
+++ hadoop/avro/trunk/CHANGES.txt Tue Feb  9 20:42:02 2010
@@ -320,6 +320,8 @@
 
     AVRO-261. Allow Schemas to be immutable (thiru)
 
+    AVRO-412. Allow schema validation to be optional (massie)
+
   OPTIMIZATIONS
 
     AVRO-172. More efficient schema processing (massie)

Modified: hadoop/avro/trunk/lang/c/docs/index.txt
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/docs/index.txt?rev=908209&r1=908208&r2=908209&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/docs/index.txt (original)
+++ hadoop/avro/trunk/lang/c/docs/index.txt Tue Feb  9 20:42:02 2010
@@ -34,9 +34,27 @@
 A C program is like a fast dance on a newly waxed dance floor by people 
carrying razors.
 ____
 
-The C implementation is still not quite ready for production use.
-The current code is being tested on +MacOS X+ and +Linux+.  We're
-always looking for contributions so, if you're a C hacker, please
+The C implementation has been tested on +MacOSX+ and +Linux+ but, over
+time, the number of support OSes should grow.  Please let us know if
+you're using +Avro C+ on other systems. There are no dependencies on 
+external libraries.  We embedded http://www.digip.org/jansson/[Jansson] into
++Avro C+ for parsing JSON into schema structures.  
+
+The C implementation supports:
+
+* binary encoding/decoding of all primitive and complex data types
+* storage to an Avro Object Container File
+* schema resolution, promotion and projection
+* validating and non-validating mode for writing Avro data
+
+The C implementation is lacking:
+
+* RPC
+
+To learn about the API, take a look at the examples and reference files
+later in this document.
+
+We're always looking for contributions so, if you're a C hacker, please
 feel free to http://hadoop.apache.org/avro/[submit patches to the
 project].
 
@@ -107,7 +125,7 @@
 
 [WARNING] 
 ===============================
-Don't "give" +Avro C+ a string that you haven't allocated from the heap with 
+malloc+.
+Don't "give" +Avro C+ a string that you haven't allocated from the heap with 
e.g. +malloc+ or +strdup+.
 
 For example, *don't* do this:
 ----
@@ -115,8 +133,31 @@
 ----
 ===============================
 
+== Schema Validation
+
+If you want to write a datum, you would use the following function
+
+[source,c]
+----
+int avro_write_data(avro_writer_t writer,
+                    avro_schema_t writers_schema, avro_datum_t datum);
+----
+
+If you pass in a +writers_schema+, then you +datum+ will be validated *before*
+it is sent to the +writer+.  This check ensures that your data has the 
+correct format.  If you are certain your datum is correct, you can pass
+a +NULL+ value for +writers_schema+ and +Avro C+ will not validate before
+writing.
+
+NOTE: Data written to an Avro File Object Container is always validated.
+
 == Examples
 
+[quote,Dante Hicks]
+____
+I'm not even supposed to be here today!
+____
+
 Imagine you're a free-lance hacker in Leonardo, New Jersey and you've 
 been approached by the owner of the local *Quick Stop Convenience* store.
 He wants you to create a contact database case he needs to call employees

Modified: hadoop/avro/trunk/lang/c/examples/Makefile.am
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/examples/Makefile.am?rev=908209&r1=908208&r2=908209&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/examples/Makefile.am (original)
+++ hadoop/avro/trunk/lang/c/examples/Makefile.am Tue Feb  9 20:42:02 2010
@@ -10,3 +10,5 @@
 
 quickstop_SOURCES=quickstop.c
 quickstop_LDADD=$(examples_LDADD)
+
+CLEANFILES=quickstop.db

Modified: hadoop/avro/trunk/lang/c/src/avro.h
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/avro.h?rev=908209&r1=908208&r2=908209&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/avro.h (original)
+++ hadoop/avro/trunk/lang/c/src/avro.h Tue Feb  9 20:42:02 2010
@@ -86,7 +86,7 @@
                               ||is_avro_fixed(obj))
 #define is_avro_map(obj)      (obj && avro_typeof(obj) == AVRO_MAP)
 #define is_avro_array(obj)    (obj && avro_typeof(obj) == AVRO_ARRAY)
-#define is_avro_union(obj)    (obj && avro_classof(obj) == AVRO_SCHEMA && 
avro_typeof(obj) == AVRO_UNION)
+#define is_avro_union(obj)    (obj && avro_typeof(obj) == AVRO_UNION)
 #define is_avro_complex_type(obj) (!(is_avro_primitive(obj))
 #define is_avro_link(obj)     (obj && avro_typeof(obj) == AVRO_LINK)
 
@@ -181,7 +181,7 @@
 avro_datum_t avro_boolean(int8_t i);
 avro_datum_t avro_null(void);
 avro_datum_t avro_record(const char *name, const char *space);
-avro_datum_t avro_enum(const char *name, const char *symbol);
+avro_datum_t avro_enum(const char *name, int i);
 avro_datum_t avro_fixed(const char *name, const char *bytes,
                        const int64_t size);
 avro_datum_t avro_wrapfixed(const char *name, const char *bytes,
@@ -190,7 +190,7 @@
                            const int64_t size);
 avro_datum_t avro_map(void);
 avro_datum_t avro_array(void);
-avro_datum_t avro_union(const avro_schema_t schema, const avro_datum_t datum);
+avro_datum_t avro_union(int64_t discriminant, const avro_datum_t datum);
 
 /* getters */
 int avro_string_get(avro_datum_t datum, char **p);

Modified: hadoop/avro/trunk/lang/c/src/datafile.c
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/datafile.c?rev=908209&r1=908208&r2=908209&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/datafile.c (original)
+++ hadoop/avro/trunk/lang/c/src/datafile.c Tue Feb  9 20:42:02 2010
@@ -283,7 +283,7 @@
                /* Write the sync marker */
                check(rval, write_sync(w));
                /* Reset the datum writer */
-               avro_writer_reset(w->writer);
+               avro_writer_reset(w->datum_writer);
                w->block_count = 0;
        }
        return 0;

Modified: hadoop/avro/trunk/lang/c/src/datum.c
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/datum.c?rev=908209&r1=908208&r2=908209&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/datum.c (original)
+++ hadoop/avro/trunk/lang/c/src/datum.c Tue Feb  9 20:42:02 2010
@@ -380,6 +380,20 @@
        return &obj;
 }
 
+avro_datum_t avro_union(int64_t discriminant, avro_datum_t value)
+{
+       struct avro_union_datum_t *datum =
+           malloc(sizeof(struct avro_union_datum_t));
+       if (!datum) {
+               return NULL;
+       }
+       datum->discriminant = discriminant;
+       datum->value = avro_datum_incref(value);
+
+       avro_datum_init(&datum->obj, AVRO_UNION);
+       return &datum->obj;
+}
+
 avro_datum_t avro_record(const char *name, const char *space)
 {
        struct avro_record_datum_t *datum =
@@ -398,11 +412,23 @@
                free((void *)datum);
                return NULL;
        }
-       datum->fields = st_init_strtable_with_size(DEFAULT_TABLE_SIZE);
-       if (!datum->fields) {
-               free((void *)datum->space);
-               free((void *)datum->name);
-               free((void *)datum);
+       datum->field_order = st_init_numtable_with_size(DEFAULT_TABLE_SIZE);
+       if (!datum->field_order) {
+               if (space) {
+                       free((void *)datum->space);
+               }
+               free((char *)datum->name);
+               free(datum);
+               return NULL;
+       }
+       datum->fields_byname = st_init_strtable_with_size(DEFAULT_TABLE_SIZE);
+       if (!datum->fields_byname) {
+               st_free_table(datum->field_order);
+               if (space) {
+                       free((void *)datum->space);
+               }
+               free((char *)datum->name);
+               free(datum);
                return NULL;
        }
 
@@ -420,7 +446,7 @@
        } val;
        if (is_avro_datum(datum) && is_avro_record(datum) && field_name) {
                if (st_lookup
-                   (avro_datum_to_record(datum)->fields,
+                   (avro_datum_to_record(datum)->fields_byname,
                     (st_data_t) field_name, &(val.data))) {
                        *field = val.field;
                        return 0;
@@ -442,20 +468,25 @@
                        avro_datum_decref(old_field);
                } else {
                        /* Inserting new value */
+                       struct avro_record_datum_t *record =
+                           avro_datum_to_record(datum);
                        key = strdup(field_name);
                        if (!key) {
                                return ENOMEM;
                        }
+                       st_insert(record->field_order,
+                                 record->field_order->num_entries,
+                                 (st_data_t) key);
                }
                avro_datum_incref(field_value);
-               st_insert(avro_datum_to_record(datum)->fields, (st_data_t) key,
-                         (st_data_t) field_value);
+               st_insert(avro_datum_to_record(datum)->fields_byname,
+                         (st_data_t) key, (st_data_t) field_value);
                return 0;
        }
        return EINVAL;
 }
 
-avro_datum_t avro_enum(const char *name, const char *symbol)
+avro_datum_t avro_enum(const char *name, int i)
 {
        struct avro_enum_datum_t *datum =
            malloc(sizeof(struct avro_enum_datum_t));
@@ -463,7 +494,7 @@
                return NULL;
        }
        datum->name = strdup(name);
-       datum->symbol = strdup(symbol);
+       datum->value = i;
 
        avro_datum_init(&datum->obj, AVRO_ENUM);
        return &datum->obj;
@@ -740,9 +771,10 @@
                                if (record->space) {
                                        free((void *)record->space);
                                }
-                               st_foreach(record->fields,
+                               st_foreach(record->fields_byname,
                                           char_datum_free_foreach, 0);
-                               st_free_table(record->fields);
+                               st_free_table(record->field_order);
+                               st_free_table(record->fields_byname);
                                free(record);
                        }
                        break;
@@ -750,7 +782,6 @@
                                struct avro_enum_datum_t *enump;
                                enump = avro_datum_to_enum(datum);
                                free((void *)enump->name);
-                               free((void *)enump->symbol);
                                free(enump);
                        }
                        break;
@@ -781,7 +812,12 @@
                                free(array);
                        }
                        break;
-               case AVRO_UNION:
+               case AVRO_UNION:{
+                               struct avro_union_datum_t *unionp;
+                               unionp = avro_datum_to_union(datum);
+                               avro_datum_decref(unionp->value);
+                               free(unionp);
+                       }
                        break;
                case AVRO_LINK:{
                                /* TODO */

Modified: hadoop/avro/trunk/lang/c/src/datum.h
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/datum.h?rev=908209&r1=908208&r2=908209&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/datum.h (original)
+++ hadoop/avro/trunk/lang/c/src/datum.h Tue Feb  9 20:42:02 2010
@@ -76,13 +76,14 @@
        struct avro_obj_t obj;
        const char *name;
        const char *space;
-       st_table *fields;
+       st_table *field_order;
+       st_table *fields_byname;
 };
 
 struct avro_enum_datum_t {
        struct avro_obj_t obj;
        const char *name;
-       const char *symbol;
+       int value;
 };
 
 struct avro_array_datum_t {
@@ -90,6 +91,12 @@
        st_table *els;
 };
 
+struct avro_union_datum_t {
+       struct avro_obj_t obj;
+       int64_t discriminant;
+       avro_datum_t value;
+};
+
 #define avro_datum_to_string(datum_)    (container_of(datum_, struct 
avro_string_datum_t, obj))
 #define avro_datum_to_bytes(datum_)     (container_of(datum_, struct 
avro_bytes_datum_t, obj))
 #define avro_datum_to_int32(datum_)     (container_of(datum_, struct 
avro_int32_datum_t, obj))
@@ -102,5 +109,6 @@
 #define avro_datum_to_record(datum_)    (container_of(datum_, struct 
avro_record_datum_t, obj))
 #define avro_datum_to_enum(datum_)      (container_of(datum_, struct 
avro_enum_datum_t, obj))
 #define avro_datum_to_array(datum_)     (container_of(datum_, struct 
avro_array_datum_t, obj))
+#define avro_datum_to_union(datum_)    (container_of(datum_, struct 
avro_union_datum_t, obj))
 
 #endif

Modified: hadoop/avro/trunk/lang/c/src/datum_equal.c
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/datum_equal.c?rev=908209&r1=908208&r2=908209&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/datum_equal.c (original)
+++ hadoop/avro/trunk/lang/c/src/datum_equal.c Tue Feb  9 20:42:02 2010
@@ -76,7 +76,7 @@
 static int record_equal(struct avro_record_datum_t *a,
                        struct avro_record_datum_t *b)
 {
-       struct st_equal_args args = { 1, b->fields };
+       struct st_equal_args args = { 1, b->fields_byname };
        if (strcmp(a->name, b->name)) {
                /* This have different names */
                return 0;
@@ -90,18 +90,16 @@
                /* One has a namespace, one doesn't */
                return 0;
        }
-
-       if (a->fields->num_entries != b->fields->num_entries) {
+       if (a->fields_byname->num_entries != b->fields_byname->num_entries) {
                return 0;
        }
-       st_foreach(a->fields, st_equal_foreach, (st_data_t) & args);
+       st_foreach(a->fields_byname, st_equal_foreach, (st_data_t) & args);
        return args.rval;
 }
 
 static int enum_equal(struct avro_enum_datum_t *a, struct avro_enum_datum_t *b)
 {
-       return strcmp(a->name, b->name) == 0
-           && strcmp(a->symbol, b->symbol) == 0;
+       return strcmp(a->name, b->name) == 0 && a->value == b->value;
 }
 
 static int fixed_equal(struct avro_fixed_datum_t *a,
@@ -110,7 +108,14 @@
        return a->size == b->size && memcmp(a->bytes, b->bytes, a->size) == 0;
 }
 
-int avro_datum_equal(avro_datum_t a, avro_datum_t b)
+static int union_equal(struct avro_union_datum_t *a,
+                      struct avro_union_datum_t *b)
+{
+       /* XXX: not sure. a->discriminant == b->discriminant important? */
+       return avro_datum_equal(a->value, b->value);
+}
+
+int avro_datum_equal(const avro_datum_t a, const avro_datum_t b)
 {
        if (!(is_avro_datum(a) && is_avro_datum(b))) {
                return 0;
@@ -161,7 +166,9 @@
                                   avro_datum_to_fixed(b));
 
        case AVRO_UNION:
-               break;
+               return union_equal(avro_datum_to_union(a),
+                                  avro_datum_to_union(b));
+
        case AVRO_LINK:
                /*
                 * TODO 

Modified: hadoop/avro/trunk/lang/c/src/datum_read.c
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/datum_read.c?rev=908209&r1=908208&r2=908209&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/datum_read.c (original)
+++ hadoop/avro/trunk/lang/c/src/datum_read.c Tue Feb  9 20:42:02 2010
@@ -20,6 +20,7 @@
 #include "encoding.h"
 #include "schema.h"
 #include "datum.h"
+#include "avro_private.h"
 
 int
 avro_schema_match(avro_schema_t writers_schema, avro_schema_t readers_schema)
@@ -99,20 +100,9 @@
 {
        int rval;
        int64_t index;
-       union {
-               st_data_t data;
-               char *sym;
-       } val;
 
-       rval = enc->read_long(reader, &index);
-       if (rval) {
-               return rval;
-       }
-
-       if (!st_lookup(writers_schema->symbols, index, &val.data)) {
-               return EINVAL;
-       }
-       *datum = avro_enum(writers_schema->name, val.sym);
+       check(rval, enc->read_long(reader, &index));
+       *datum = avro_enum(writers_schema->name, index);
        return 0;
 }
 
@@ -230,21 +220,20 @@
           struct avro_union_schema_t *readers_schema, avro_datum_t * datum)
 {
        int rval;
-       int64_t index;
+       int64_t discriminant;
+       avro_datum_t value;
        union {
                st_data_t data;
                avro_schema_t schema;
        } val;
-
-       rval = enc->read_long(reader, &index);
-       if (rval) {
-               return rval;
-       }
-
-       if (!st_lookup(writers_schema->branches, index, &val.data)) {
+       check(rval, enc->read_long(reader, &discriminant));
+       if (!st_lookup(writers_schema->branches, discriminant, &val.data)) {
                return EILSEQ;
        }
-       return avro_read_data(reader, val.schema, NULL, datum);
+       check(rval, avro_read_data(reader, val.schema, NULL, &value));
+       *datum = avro_union(discriminant, value);
+       avro_datum_decref(value);
+       return 0;
 }
 
 /* TODO: handle default values in fields */
@@ -310,27 +299,6 @@
                return EINVAL;
        }
 
-       /*
-        * schema resolution 
-        */
-       if (!is_avro_union(writers_schema) && is_avro_union(readers_schema)) {
-               struct avro_union_schema_t *union_schema =
-                   avro_schema_to_union(readers_schema);
-
-               for (i = 0; i < union_schema->branches->num_entries; i++) {
-                       union {
-                               st_data_t data;
-                               avro_schema_t schema;
-                       } val;
-                       st_lookup(union_schema->branches, i, &val.data);
-                       if (avro_schema_match(writers_schema, val.schema)) {
-                               return avro_read_data(reader, writers_schema,
-                                                     val.schema, datum);
-                       }
-               }
-               return EINVAL;
-       }
-
        switch (avro_typeof(writers_schema)) {
        case AVRO_NULL:
                rval = enc->read_null(reader);

Modified: hadoop/avro/trunk/lang/c/src/datum_validate.c
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/datum_validate.c?rev=908209&r1=908208&r2=908209&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/datum_validate.c (original)
+++ hadoop/avro/trunk/lang/c/src/datum_validate.c Tue Feb  9 20:42:02 2010
@@ -84,15 +84,11 @@
 
        case AVRO_ENUM:
                if (is_avro_enum(datum)) {
-                       struct avro_enum_schema_t *enump =
-                           avro_schema_to_enum(expected_schema);
-                       struct avro_enum_datum_t *d = avro_datum_to_enum(datum);
-                       union {
-                               st_data_t data;
-                               long idx;
-                       } val;
-                       return st_lookup(enump->symbols_byname,
-                                        (st_data_t) d->symbol, &val.data);
+                       long value = avro_datum_to_enum(datum)->value;
+                       long max_value =
+                           avro_schema_to_enum(expected_schema)->symbols->
+                           num_entries;
+                       return 0 <= value && value <= max_value;
                }
                return 0;
 
@@ -130,24 +126,25 @@
                break;
 
        case AVRO_UNION:
-               {
+               if (is_avro_union(datum)) {
                        struct avro_union_schema_t *union_schema =
                            avro_schema_to_union(expected_schema);
+                       struct avro_union_datum_t *union_datum =
+                           avro_datum_to_union(datum);
+                       union {
+                               st_data_t data;
+                               avro_schema_t schema;
+                       } val;
 
-                       for (i = 0; i < union_schema->branches->num_entries;
-                            i++) {
-                               union {
-                                       st_data_t data;
-                                       avro_schema_t schema;
-                               } val;
-                               st_lookup(union_schema->branches, i, &val.data);
-                               if (avro_schema_datum_validate
-                                   (val.schema, datum)) {
-                                       return 1;
-                               }
+                       if (!st_lookup
+                           (union_schema->branches, union_datum->discriminant,
+                            &val.data)) {
+                               return 0;
                        }
+                       return avro_schema_datum_validate(val.schema,
+                                                         union_datum->value);
                }
-               return 0;
+               break;
 
        case AVRO_RECORD:
                if (is_avro_record(datum)) {

Modified: hadoop/avro/trunk/lang/c/src/datum_write.c
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/datum_write.c?rev=908209&r1=908208&r2=908209&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/datum_write.c (original)
+++ hadoop/avro/trunk/lang/c/src/datum_write.c Tue Feb  9 20:42:02 2010
@@ -21,27 +21,45 @@
 #include "datum.h"
 #include "encoding.h"
 
+static int write_datum(avro_writer_t writer, const avro_encoding_t * enc,
+                      avro_schema_t writers_schema, avro_datum_t datum);
+
 static int
 write_record(avro_writer_t writer, const avro_encoding_t * enc,
-            struct avro_record_schema_t *record, avro_datum_t datum)
+            struct avro_record_schema_t *schema, avro_datum_t datum)
 {
        int rval;
        long i;
+       avro_datum_t field_datum;
 
-       for (i = 0; i < record->fields->num_entries; i++) {
-               avro_datum_t field_datum;
-               union {
-                       st_data_t data;
-                       struct avro_record_field_t *field;
-               } val;
-               st_lookup(record->fields, i, &val.data);
-               rval = avro_record_get(datum, val.field->name, &field_datum);
-               if (rval) {
-                       return rval;
+       if (schema) {
+               for (i = 0; i < schema->fields->num_entries; i++) {
+                       union {
+                               st_data_t data;
+                               struct avro_record_field_t *field;
+                       } val;
+                       st_lookup(schema->fields, i, &val.data);
+                       check(rval,
+                             avro_record_get(datum, val.field->name,
+                                             &field_datum));
+                       check(rval,
+                             write_datum(writer, enc, val.field->type,
+                                         field_datum));
                }
-               rval = avro_write_data(writer, val.field->type, field_datum);
-               if (rval) {
-                       return rval;
+       } else {
+               /* No schema.  Just write the record datum */
+               struct avro_record_datum_t *record =
+                   avro_datum_to_record(datum);
+               for (i = 0; i < record->field_order->num_entries; i++) {
+                       union {
+                               st_data_t data;
+                               char *name;
+                       } val;
+                       st_lookup(record->field_order, i, &val.data);
+                       check(rval,
+                             avro_record_get(datum, val.name, &field_datum));
+                       check(rval,
+                             write_datum(writer, enc, NULL, field_datum));
                }
        }
        return 0;
@@ -51,15 +69,7 @@
 write_enum(avro_writer_t writer, const avro_encoding_t * enc,
           struct avro_enum_schema_t *enump, struct avro_enum_datum_t *datum)
 {
-       union {
-               st_data_t data;
-               long idx;
-       } val;
-       if (!st_lookup
-           (enump->symbols_byname, (st_data_t) datum->symbol, &val.data)) {
-               return EINVAL;
-       }
-       return enc->write_long(writer, val.idx);
+       return enc->write_long(writer, datum->value);
 }
 
 struct write_map_args {
@@ -77,7 +87,7 @@
                args->rval = rval;
                return ST_STOP;
        }
-       rval = avro_write_data(args->writer, args->values_schema, datum);
+       rval = write_datum(args->writer, args->enc, args->values_schema, datum);
        if (rval) {
                args->rval = rval;
                return ST_STOP;
@@ -87,11 +97,12 @@
 
 static int
 write_map(avro_writer_t writer, const avro_encoding_t * enc,
-         struct avro_map_schema_t *writer_schema,
+         struct avro_map_schema_t *writers_schema,
          struct avro_map_datum_t *datum)
 {
        int rval;
-       struct write_map_args args = { 0, writer, enc, writer_schema->values };
+       struct write_map_args args =
+           { 0, writer, enc, writers_schema ? writers_schema->values : NULL };
 
        if (datum->map->num_entries) {
                rval = enc->write_long(writer, datum->map->num_entries);
@@ -129,11 +140,10 @@
                                avro_datum_t datum;
                        } val;
                        st_lookup(array->els, i, &val.data);
-                       rval =
-                           avro_write_data(writer, schema->items, val.datum);
-                       if (rval) {
-                               return rval;
-                       }
+                       check(rval,
+                             write_datum(writer, enc,
+                                         schema ? schema->items : NULL,
+                                         val.datum));
                }
        }
        return enc->write_long(writer, 0);
@@ -141,153 +151,138 @@
 
 static int
 write_union(avro_writer_t writer, const avro_encoding_t * enc,
-           struct avro_union_schema_t *schema, avro_datum_t datum)
+           struct avro_union_schema_t *schema,
+           struct avro_union_datum_t *unionp)
 {
        int rval;
-       long i;
+       avro_schema_t write_schema = NULL;
 
-       for (i = 0; i < schema->branches->num_entries; i++) {
+       check(rval, enc->write_long(writer, unionp->discriminant));
+       if (schema) {
                union {
                        st_data_t data;
                        avro_schema_t schema;
                } val;
-               st_lookup(schema->branches, i, &val.data);
-               if (avro_schema_datum_validate(val.schema, datum)) {
-                       rval = enc->write_long(writer, i);
-                       if (rval) {
-                               return rval;
-                       }
-                       return avro_write_data(writer, val.schema, datum);
+               if (!st_lookup
+                   (schema->branches, unionp->discriminant, &val.data)) {
+                       return EINVAL;
                }
+               write_schema = val.schema;
        }
-       return EINVAL;
+       return write_datum(writer, enc, write_schema, unionp->value);
 }
 
-int
-avro_write_data(avro_writer_t writer, avro_schema_t writer_schema,
-               avro_datum_t datum)
+static int write_datum(avro_writer_t writer, const avro_encoding_t * enc,
+                      avro_schema_t writers_schema, avro_datum_t datum)
 {
-       const avro_encoding_t *enc = &avro_binary_encoding;
-       int rval = -1;
+       int rval;
 
-       if (!writer || !(is_avro_schema(writer_schema) && 
is_avro_datum(datum))) {
-               return EINVAL;
+       if (is_avro_schema(writers_schema) && is_avro_link(writers_schema)) {
+               return write_datum(writer, enc,
+                                  (avro_schema_to_link(writers_schema))->to,
+                                  datum);
        }
-       if (!avro_schema_datum_validate(writer_schema, datum)) {
-               return EINVAL;
-       }
-       switch (avro_typeof(writer_schema)) {
+
+       switch (avro_typeof(datum)) {
        case AVRO_NULL:
-               rval = enc->write_null(writer);
-               break;
+               return enc->write_null(writer);
+
        case AVRO_BOOLEAN:
-               rval =
-                   enc->write_boolean(writer, avro_datum_to_boolean(datum)->i);
-               break;
+               return enc->write_boolean(writer,
+                                         avro_datum_to_boolean(datum)->i);
+
        case AVRO_STRING:
-               rval =
-                   enc->write_string(writer, avro_datum_to_string(datum)->s);
-               break;
+               return enc->write_string(writer,
+                                        avro_datum_to_string(datum)->s);
+
        case AVRO_BYTES:
-               rval =
-                   enc->write_bytes(writer, avro_datum_to_bytes(datum)->bytes,
-                                    avro_datum_to_bytes(datum)->size);
-               break;
+               return enc->write_bytes(writer,
+                                       avro_datum_to_bytes(datum)->bytes,
+                                       avro_datum_to_bytes(datum)->size);
+
        case AVRO_INT32:
-               {
-                       int32_t i;
-                       if (is_avro_int32(datum)) {
-                               i = avro_datum_to_int32(datum)->i32;
-                       } else if (is_avro_int64(datum)) {
-                               i = (int32_t) avro_datum_to_int64(datum)->i64;
-                       } else {
-                               assert(0
-                                      &&
-                                      "Serious bug in schema validation code");
+       case AVRO_INT64:{
+                       int64_t val = avro_typeof(datum) == AVRO_INT32 ?
+                           avro_datum_to_int32(datum)->i32 :
+                           avro_datum_to_int64(datum)->i64;
+                       if (is_avro_schema(writers_schema)) {
+                               /* handle promotion */
+                               if (is_avro_float(writers_schema)) {
+                                       return enc->write_float(writer,
+                                                               (float)val);
+                               } else if (is_avro_double(writers_schema)) {
+                                       return enc->write_double(writer,
+                                                                (double)val);
+                               }
                        }
-                       rval = enc->write_int(writer, i);
+                       return enc->write_long(writer, val);
                }
-               break;
-       case AVRO_INT64:
-               rval = enc->write_long(writer, avro_datum_to_int64(datum)->i64);
-               break;
-       case AVRO_FLOAT:
-               {
-                       float f;
-                       if (is_avro_int32(datum)) {
-                               f = (float)(avro_datum_to_int32(datum)->i32);
-                       } else if (is_avro_int64(datum)) {
-                               f = (float)(avro_datum_to_int64(datum)->i64);
-                       } else if (is_avro_float(datum)) {
-                               f = avro_datum_to_float(datum)->f;
-                       } else if (is_avro_double(datum)) {
-                               f = (float)(avro_datum_to_double(datum)->d);
-                       } else {
-                               assert(0
-                                      &&
-                                      "Serious bug in schema validation code");
+
+       case AVRO_FLOAT:{
+                       float val = avro_datum_to_float(datum)->f;
+                       if (is_avro_schema(writers_schema)
+                           && is_avro_double(writers_schema)) {
+                               /* handle promotion */
+                               return enc->write_double(writer, (double)val);
                        }
-                       rval = enc->write_float(writer, f);
+                       return enc->write_float(writer, val);
                }
-               break;
+
        case AVRO_DOUBLE:
-               {
-                       double d;
-                       if (is_avro_int32(datum)) {
-                               d = (double)(avro_datum_to_int32(datum)->i32);
-                       } else if (is_avro_int64(datum)) {
-                               d = (double)(avro_datum_to_int64(datum)->i64);
-                       } else if (is_avro_float(datum)) {
-                               d = (double)(avro_datum_to_float(datum)->f);
-                       } else if (is_avro_double(datum)) {
-                               d = avro_datum_to_double(datum)->d;
-                       } else {
-                               assert(0 && "Bug in schema validation code");
-                       }
-                       rval = enc->write_double(writer, d);
-               }
-               break;
+               return enc->write_double(writer,
+                                        avro_datum_to_double(datum)->d);
 
        case AVRO_RECORD:
-               rval =
-                   write_record(writer, enc,
-                                avro_schema_to_record(writer_schema), datum);
-               break;
+               return write_record(writer, enc,
+                                   avro_schema_to_record(writers_schema),
+                                   datum);
 
        case AVRO_ENUM:
-               rval =
-                   write_enum(writer, enc, avro_schema_to_enum(writer_schema),
-                              avro_datum_to_enum(datum));
-               break;
+               return write_enum(writer, enc,
+                                 avro_schema_to_enum(writers_schema),
+                                 avro_datum_to_enum(datum));
 
        case AVRO_FIXED:
-               return avro_write(writer, avro_datum_to_fixed(datum)->bytes,
+               return avro_write(writer,
+                                 avro_datum_to_fixed(datum)->bytes,
                                  avro_datum_to_fixed(datum)->size);
 
        case AVRO_MAP:
-               rval =
-                   write_map(writer, enc, avro_schema_to_map(writer_schema),
-                             avro_datum_to_map(datum));
-               break;
+               return write_map(writer, enc,
+                                avro_schema_to_map(writers_schema),
+                                avro_datum_to_map(datum));
+
        case AVRO_ARRAY:
-               rval =
-                   write_array(writer, enc,
-                               avro_schema_to_array(writer_schema),
-                               avro_datum_to_array(datum));
-               break;
+               return write_array(writer, enc,
+                                  avro_schema_to_array(writers_schema),
+                                  avro_datum_to_array(datum));
 
        case AVRO_UNION:
-               rval =
-                   write_union(writer, enc,
-                               avro_schema_to_union(writer_schema), datum);
-               break;
+               return write_union(writer, enc,
+                                  avro_schema_to_union(writers_schema),
+                                  avro_datum_to_union(datum));
 
        case AVRO_LINK:
-               rval =
-                   avro_write_data(writer,
-                                   (avro_schema_to_link(writer_schema))->to,
-                                   datum);
                break;
        }
-       return rval;
+
+       return 0;
+}
+
+int avro_write_data(avro_writer_t writer, avro_schema_t writers_schema,
+                   avro_datum_t datum)
+{
+       const avro_encoding_t *enc = &avro_binary_encoding;
+       int rval = -1;
+
+       if (!writer || !is_avro_datum(datum)) {
+               return EINVAL;
+       }
+       /* Only validate datum if a writer's schema is provided */
+       if (is_avro_schema(writers_schema)
+           && !avro_schema_datum_validate(writers_schema, datum)) {
+               return EINVAL;
+       }
+       return write_datum(writer, &avro_binary_encoding,
+                          writers_schema, datum);
 }

Modified: hadoop/avro/trunk/lang/c/tests/generate_interop_data.c
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/tests/generate_interop_data.c?rev=908209&r1=908208&r2=908209&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/tests/generate_interop_data.c (original)
+++ hadoop/avro/trunk/lang/c/tests/generate_interop_data.c Tue Feb  9 20:42:02 
2010
@@ -17,13 +17,19 @@
        avro_datum_t interop;
        avro_datum_t array_datum;
        avro_datum_t node_datum;
+       avro_datum_t union_datum;
        avro_datum_t out_datum;
+       enum Kind {
+               KIND_A,
+               KIND_B,
+               KIND_C
+       };
 
        if (argc != 3) {
                exit(EXIT_FAILURE);
        }
        snprintf(outpath, sizeof(outpath), "%s/c.avro", argv[2]);
-       fprintf(stderr, "Writing %s\n", outpath);
+       fprintf(stderr, "Writing to %s\n", outpath);
 
        fp = fopen(argv[1], "r");
        rval = fread(jsontext, 1, sizeof(jsontext) - 1, fp);
@@ -34,7 +40,7 @@
        check(rval, avro_file_writer_create(outpath, schema, &file_writer));
 
        /* TODO: create a method for generating random data from schema */
-       interop = avro_record("interop", "org.apache.avro");
+       interop = avro_record("Interop", "org.apache.avro");
        avro_record_set(interop, "intField", avro_int32(42));
        avro_record_set(interop, "longField", avro_int64(4242));
        avro_record_set(interop, "stringField",
@@ -52,8 +58,9 @@
        avro_record_set(interop, "arrayField", array_datum);
 
        avro_record_set(interop, "mapField", avro_map());
-       avro_record_set(interop, "unionField", avro_double(1.61803399));
-       avro_record_set(interop, "enumField", avro_enum("Kind", "B"));
+       union_datum = avro_union(1, avro_double(1.61803399));
+       avro_record_set(interop, "unionField", union_datum);
+       avro_record_set(interop, "enumField", avro_enum("Kind", KIND_A));
        avro_record_set(interop, "fixedField",
                        avro_fixed("MD5", "1234567890123456", 16));
 
@@ -66,14 +73,24 @@
        rval = avro_file_writer_append(file_writer, interop);
        if (rval) {
                fprintf(stderr, "Unable to append data to interop file!\n");
+               exit(EXIT_FAILURE);
+       } else {
+               fprintf(stderr, "Successfully appended datum to file\n");
        }
+
        check(rval, avro_file_writer_close(file_writer));
+       fprintf(stderr, "Closed writer.\n");
 
        check(rval, avro_file_reader(outpath, &file_reader));
+       fprintf(stderr, "Re-reading datum to verify\n");
        check(rval, avro_file_reader_read(file_reader, NULL, &out_datum));
+       fprintf(stderr, "Verifying datum...");
        if (!avro_datum_equal(interop, out_datum)) {
+               fprintf(stderr, "fail!\n");
                exit(EXIT_FAILURE);
        }
+       fprintf(stderr, "ok\n");
        check(rval, avro_file_reader_close(file_reader));
+       fprintf(stderr, "Closed reader.\n");
        return 0;
 }

Modified: hadoop/avro/trunk/lang/c/tests/test_avro_data.c
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/tests/test_avro_data.c?rev=908209&r1=908208&r2=908209&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/tests/test_avro_data.c (original)
+++ hadoop/avro/trunk/lang/c/tests/test_avro_data.c Tue Feb  9 20:42:02 2010
@@ -53,25 +53,38 @@
                 avro_schema_t readers_schema, avro_datum_t datum, char *type)
 {
        avro_datum_t datum_out;
-       reader = avro_reader_memory(buf, sizeof(buf));
-       writer = avro_writer_memory(buf, sizeof(buf));
+       int validate;
 
-       if (avro_write_data(writer, writers_schema, datum)) {
-               fprintf(stderr, "Unable to write %s\n", type);
-               exit(EXIT_FAILURE);
-       }
-       if (avro_read_data(reader, writers_schema, readers_schema, &datum_out)) 
{
-               fprintf(stderr, "Unable to read %s\n", type);
-               exit(EXIT_FAILURE);
-       }
-       if (!avro_datum_equal(datum, datum_out)) {
-               fprintf(stderr, "Unable to encode/decode %s\n", type);
-               exit(EXIT_FAILURE);
+       for (validate = 0; validate <= 1; validate++) {
+
+               reader = avro_reader_memory(buf, sizeof(buf));
+               writer = avro_writer_memory(buf, sizeof(buf));
+
+               /* Validating read/write */
+               if (avro_write_data
+                   (writer, validate ? writers_schema : NULL, datum)) {
+                       fprintf(stderr, "Unable to write %s validate=%d\n",
+                               type, validate);
+                       exit(EXIT_FAILURE);
+               }
+               if (avro_read_data
+                   (reader, writers_schema, readers_schema, &datum_out)) {
+                       fprintf(stderr, "Unable to read %s validate=%d\n", type,
+                               validate);
+                       exit(EXIT_FAILURE);
+               }
+               if (!avro_datum_equal(datum, datum_out)) {
+                       fprintf(stderr,
+                               "Unable to encode/decode %s validate=%d\n",
+                               type, validate);
+                       exit(EXIT_FAILURE);
+               }
+
+               avro_reader_dump(reader, stderr);
+               avro_datum_decref(datum_out);
+               avro_reader_free(reader);
+               avro_writer_free(writer);
        }
-       avro_reader_dump(reader, stderr);
-       avro_datum_decref(datum_out);
-       avro_reader_free(reader);
-       avro_writer_free(writer);
 }
 
 static int test_string(void)
@@ -204,8 +217,15 @@
 
 static int test_enum(void)
 {
+       enum avro_languages {
+               AVRO_C,
+               AVRO_CPP,
+               AVRO_PYTHON,
+               AVRO_RUBY,
+               AVRO_JAVA
+       };
        avro_schema_t schema = avro_schema_enum("language");
-       avro_datum_t datum = avro_enum("language", "C");
+       avro_datum_t datum = avro_enum("language", AVRO_C);
 
        avro_schema_enum_symbol_append(schema, "C");
        avro_schema_enum_symbol_append(schema, "C++");
@@ -262,6 +282,7 @@
 static int test_union(void)
 {
        avro_schema_t schema = avro_schema_union();
+       avro_datum_t union_datum;
        avro_datum_t datum;
 
        avro_schema_union_append(schema, avro_schema_string());
@@ -269,8 +290,10 @@
        avro_schema_union_append(schema, avro_schema_null());
 
        datum = avro_wrapstring("Follow your bliss.");
+       union_datum = avro_union(0, datum);
 
-       write_read_check(schema, NULL, datum, "union");
+       write_read_check(schema, NULL, union_datum, "union");
+       avro_datum_decref(union_datum);
        avro_datum_decref(datum);
        avro_schema_decref(schema);
        return 0;

Modified: hadoop/avro/trunk/lang/c/version.sh
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/version.sh?rev=908209&r1=908208&r2=908209&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/version.sh (original)
+++ hadoop/avro/trunk/lang/c/version.sh Tue Feb  9 20:42:02 2010
@@ -18,7 +18,7 @@
 #         libavro_binary_age = 0
 #         libavro_interface_age = 0
 #
-libavro_micro_version=17
+libavro_micro_version=18
 libavro_interface_age=0
 libavro_binary_age=0
 


Reply via email to