Author: massie
Date: Fri Jan 22 00:38:01 2010
New Revision: 901950

URL: http://svn.apache.org/viewvc?rev=901950&view=rev
Log:
AVRO-364.  Add support for encoding/decoding records

Added:
    hadoop/avro/trunk/lang/c/src/datum_equal.c
    hadoop/avro/trunk/lang/c/src/datum_read.c
    hadoop/avro/trunk/lang/c/src/datum_validate.c
    hadoop/avro/trunk/lang/c/src/datum_write.c
Modified:
    hadoop/avro/trunk/CHANGES.txt
    hadoop/avro/trunk/lang/c/src/Makefile.am
    hadoop/avro/trunk/lang/c/src/avro.h
    hadoop/avro/trunk/lang/c/src/datum.c
    hadoop/avro/trunk/lang/c/src/io.c
    hadoop/avro/trunk/lang/c/src/schema.c
    hadoop/avro/trunk/lang/c/src/schema.h
    hadoop/avro/trunk/lang/c/tests/test_avro_data.c
    hadoop/avro/trunk/lang/c/version.sh

Modified: hadoop/avro/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/CHANGES.txt?rev=901950&r1=901949&r2=901950&view=diff
==============================================================================
--- hadoop/avro/trunk/CHANGES.txt (original)
+++ hadoop/avro/trunk/CHANGES.txt Fri Jan 22 00:38:01 2010
@@ -238,6 +238,8 @@
     AVRO-362. Add test to ensure Python implementation handles Union schema
     with two fixed types of different names (hammer)
 
+    AVRO-364. Add support for encoding/decoding records (massie)
+
   OPTIMIZATIONS
 
     AVRO-172. More efficient schema processing (massie)

Modified: hadoop/avro/trunk/lang/c/src/Makefile.am
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/Makefile.am?rev=901950&r1=901949&r2=901950&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/Makefile.am (original)
+++ hadoop/avro/trunk/lang/c/src/Makefile.am Fri Jan 22 00:38:01 2010
@@ -6,8 +6,10 @@
 include_HEADERS = avro.h
 
 lib_LTLIBRARIES = libavro.la
-libavro_la_SOURCES = st.c st.h schema.c schema.h schema_printf.c 
schema_equal.c datum.c datum.h \
-io.c dump.c dump.h encoding_binary.c container_of.h queue.h encoding.h
+libavro_la_SOURCES = st.c st.h schema.c schema.h schema_printf.c 
schema_equal.c \
+datum.c datum_equal.c datum_validate.c datum_read.c datum_write.c datum.h \
+io.c dump.c dump.h encoding_binary.c \
+container_of.h queue.h encoding.h
 libavro_la_LIBADD = $(top_builddir)/jansson/src/.libs/libjansson.a
 libavro_la_LDFLAGS = \
         -version-info $(LIBAVRO_VERSION) \

Modified: hadoop/avro/trunk/lang/c/src/avro.h
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/avro.h?rev=901950&r1=901949&r2=901950&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/avro.h (original)
+++ hadoop/avro/trunk/lang/c/src/avro.h Fri Jan 22 00:38:01 2010
@@ -154,6 +154,9 @@
 int avro_write(avro_writer_t writer, void *buf, int64_t len);
 int avro_flush(avro_writer_t writer);
 
+void avro_writer_dump(avro_writer_t writer, FILE * fp);
+void avro_reader_dump(avro_reader_t reader, FILE * fp);
+
 void avro_reader_free(avro_reader_t reader);
 void avro_writer_free(avro_writer_t writer);
 

Modified: hadoop/avro/trunk/lang/c/src/datum.c
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/datum.c?rev=901950&r1=901949&r2=901950&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/datum.c (original)
+++ hadoop/avro/trunk/lang/c/src/datum.c Fri Jan 22 00:38:01 2010
@@ -31,109 +31,6 @@
        datum->refcount = 1;
 }
 
-static int
-array_equal(struct avro_array_datum_t *a, struct avro_array_datum_t *b)
-{
-       struct avro_array_element_t *a_el, *b_el;
-       if (a->num_elements != b->num_elements) {
-               return 0;
-       }
-       for (a_el = STAILQ_FIRST(&a->els),
-            b_el = STAILQ_FIRST(&b->els);
-            !(a_el == NULL && a_el == NULL);
-            a_el = STAILQ_NEXT(a_el, els), b_el = STAILQ_NEXT(b_el, els)) {
-               if (a_el == NULL || b_el == NULL) {
-                       return 0;       /* different number of elements */
-               }
-               if (!avro_datum_equal(a_el->datum, b_el->datum)) {
-                       return 0;
-               }
-       }
-       return 1;
-}
-
-struct map_equal_args {
-       int rval;
-       st_table *st;
-};
-
-static int
-map_equal_foreach(char *key, avro_datum_t datum, struct map_equal_args *args)
-{
-       avro_datum_t datum_other = NULL;
-
-       st_lookup(args->st, (st_data_t) key, (st_data_t *) & datum_other);
-       if (!datum_other) {
-               args->rval = 0;
-               return ST_STOP;
-       }
-       if (!avro_datum_equal(datum, datum_other)) {
-               args->rval = 0;
-               return ST_STOP;
-       }
-       return ST_CONTINUE;
-}
-
-static int map_equal(struct avro_map_datum_t *a, struct avro_map_datum_t *b)
-{
-       struct map_equal_args args = { 1, avro_datum_to_map(b)->map };
-       if (a->map->num_entries != b->map->num_entries) {
-               return 0;
-       }
-       st_foreach(avro_datum_to_map(a)->map,
-                  map_equal_foreach, (st_data_t) & args);
-       return args.rval;
-}
-
-int avro_datum_equal(avro_datum_t a, avro_datum_t b)
-{
-       if (!(is_avro_datum(a) && is_avro_datum(b))) {
-               return 0;
-       }
-       if (avro_typeof(a) != avro_typeof(b)) {
-               return 0;
-       }
-       switch (avro_typeof(a)) {
-       case AVRO_STRING:
-               return strcmp(avro_datum_to_string(a)->s,
-                             avro_datum_to_string(b)->s) == 0;
-       case AVRO_BYTES:
-               return (avro_datum_to_bytes(a)->size ==
-                       avro_datum_to_bytes(b)->size)
-                   && memcmp(avro_datum_to_bytes(a)->bytes,
-                             avro_datum_to_bytes(b)->bytes,
-                             avro_datum_to_bytes(a)->size) == 0;
-       case AVRO_INT:
-               return avro_datum_to_int(a)->i == avro_datum_to_int(b)->i;
-       case AVRO_LONG:
-               return avro_datum_to_long(a)->l == avro_datum_to_long(b)->l;
-       case AVRO_FLOAT:
-               return avro_datum_to_float(a)->f == avro_datum_to_float(b)->f;
-       case AVRO_DOUBLE:
-               return avro_datum_to_double(a)->d == avro_datum_to_double(b)->d;
-       case AVRO_BOOLEAN:
-               return avro_datum_to_boolean(a)->i ==
-                   avro_datum_to_boolean(b)->i;
-       case AVRO_NULL:
-               return 1;
-       case AVRO_ARRAY:
-               return array_equal(avro_datum_to_array(a),
-                                  avro_datum_to_array(b));
-       case AVRO_MAP:
-               return map_equal(avro_datum_to_map(a), avro_datum_to_map(b));
-       case AVRO_RECORD:
-       case AVRO_ENUM:
-       case AVRO_FIXED:
-       case AVRO_UNION:
-       case AVRO_LINK:
-               /*
-                * TODO 
-                */
-               return 0;
-       }
-       return 0;
-}
-
 avro_datum_t avro_string(const char *str)
 {
        struct avro_string_datum_t *datum =
@@ -257,14 +154,19 @@
 avro_datum_t
 avro_record_field_get(const avro_datum_t datum, const char *field_name)
 {
-       struct avro_record_datum_t *field = NULL;
+       union {
+               avro_datum_t field;
+               st_data_t data;
+       } val;
        if (is_avro_datum(datum) && is_avro_record(datum)) {
                struct avro_record_datum_t *record =
                    avro_datum_to_record(datum);
-               st_lookup(record->fields, (st_data_t) field_name,
-                         (st_data_t *) & field);
+               if (st_lookup
+                   (record->fields, (st_data_t) field_name, &(val.data))) {
+                       return val.field;
+               }
        }
-       return &field->obj;
+       return NULL;
 }
 
 int
@@ -392,744 +294,3 @@
 {
 
 }
-
-int
-avro_schema_match(avro_schema_t writers_schema, avro_schema_t readers_schema)
-{
-       if (!is_avro_schema(writers_schema) || !is_avro_schema(readers_schema)) 
{
-               return 0;
-       }
-
-       switch (avro_typeof(writers_schema)) {
-       case AVRO_UNION:
-               return 1;
-
-       case AVRO_INT:
-               return is_avro_int(readers_schema)
-                   || is_avro_long(readers_schema)
-                   || is_avro_float(readers_schema)
-                   || is_avro_double(readers_schema);
-
-       case AVRO_LONG:
-               return is_avro_long(readers_schema)
-                   || is_avro_float(readers_schema)
-                   || is_avro_double(readers_schema);
-
-       case AVRO_FLOAT:
-               return is_avro_float(readers_schema)
-                   || is_avro_double(readers_schema);
-
-       case AVRO_STRING:
-       case AVRO_BYTES:
-       case AVRO_DOUBLE:
-       case AVRO_BOOLEAN:
-       case AVRO_NULL:
-               return avro_typeof(writers_schema) ==
-                   avro_typeof(readers_schema);
-
-       case AVRO_RECORD:
-               return is_avro_record(readers_schema)
-                   && strcmp(avro_schema_name(writers_schema),
-                             avro_schema_name(readers_schema)) == 0;
-
-       case AVRO_FIXED:
-               return is_avro_fixed(readers_schema)
-                   && strcmp(avro_schema_name(writers_schema),
-                             avro_schema_name(readers_schema)) == 0
-                   && (avro_schema_to_fixed(writers_schema))->size ==
-                   (avro_schema_to_fixed(readers_schema))->size;
-
-       case AVRO_ENUM:
-               return is_avro_enum(readers_schema)
-                   && strcmp(avro_schema_to_enum(writers_schema)->name,
-                             avro_schema_to_enum(readers_schema)->name) == 0;
-
-       case AVRO_MAP:
-               return is_avro_map(readers_schema)
-                   && avro_typeof(avro_schema_to_map(writers_schema)->values)
-                   == avro_typeof(avro_schema_to_map(readers_schema)->values);
-
-       case AVRO_ARRAY:
-               return is_avro_array(readers_schema)
-                   && avro_typeof(avro_schema_to_array(writers_schema)->items)
-                   == avro_typeof(avro_schema_to_array(readers_schema)->items);
-
-       case AVRO_LINK:
-               /*
-                * TODO 
-                */
-               break;
-       }
-
-       return 0;
-}
-
-static int
-read_fixed(avro_reader_t reader, const avro_encoding_t * enc,
-          avro_schema_t writers_schema, avro_schema_t readers_schema,
-          avro_datum_t * datum)
-{
-       return 1;
-}
-
-static int
-read_enum(avro_reader_t reader, const avro_encoding_t * enc,
-         avro_schema_t writers_schema, avro_schema_t readers_schema,
-         avro_datum_t * datum)
-{
-       return 1;
-}
-
-static int
-read_array(avro_reader_t reader, const avro_encoding_t * enc,
-          struct avro_array_schema_t *writers_schema,
-          struct avro_array_schema_t *readers_schema, avro_datum_t * datum)
-{
-       int rval;
-       int64_t i;
-       int64_t block_count;
-       int64_t block_size;
-       avro_datum_t array_datum;
-
-       rval = enc->read_long(reader, &block_count);
-       if (rval) {
-               return rval;
-       }
-
-       array_datum = avro_array();
-       while (block_count != 0) {
-               if (block_count < 0) {
-                       block_count = block_count * -1;
-                       rval = enc->read_long(reader, &block_size);
-                       if (rval) {
-                               return rval;
-                       }
-               }
-
-               for (i = 0; i < block_count; i++) {
-                       avro_datum_t datum;
-
-                       rval =
-                           avro_read_data(reader, writers_schema->items,
-                                          readers_schema->items, &datum);
-                       if (rval) {
-                               return rval;
-                       }
-                       rval = avro_array_append_datum(array_datum, datum);
-                       if (rval) {
-                               avro_datum_decref(array_datum);
-                               return rval;
-                       }
-               }
-
-               rval = enc->read_long(reader, &block_count);
-               if (rval) {
-                       return rval;
-               }
-       }
-       *datum = array_datum;
-       return 0;
-}
-
-static int
-read_map(avro_reader_t reader, const avro_encoding_t * enc,
-        struct avro_map_schema_t *writers_schema,
-        struct avro_map_schema_t *readers_schema, avro_datum_t * datum)
-{
-       int rval;
-       int64_t i, block_count;
-       avro_datum_t map = avro_map();
-
-       rval = enc->read_long(reader, &block_count);
-       if (rval) {
-               return rval;
-       }
-       while (block_count != 0) {
-               int64_t block_size;
-               if (block_count < 0) {
-                       block_count = block_count * -1;
-                       rval = enc->read_long(reader, &block_size);
-                       if (rval) {
-                               return rval;
-                       }
-               }
-               for (i = 0; i < block_count; i++) {
-                       char *key;
-                       avro_datum_t value;
-                       rval = enc->read_string(reader, &key);
-                       if (rval) {
-                               return rval;
-                       }
-                       rval =
-                           avro_read_data(reader,
-                                          avro_schema_to_map(writers_schema)->
-                                          values,
-                                          avro_schema_to_map(readers_schema)->
-                                          values, &value);
-                       if (rval) {
-                               return rval;
-                       }
-                       rval = avro_map_set(map, key, value);
-                       if (rval) {
-                               return rval;
-                       }
-               }
-               rval = enc->read_long(reader, &block_count);
-               if (rval) {
-                       return rval;
-               }
-       }
-       *datum = map;
-       return 0;
-}
-
-static int
-read_union(avro_reader_t reader, const avro_encoding_t * enc,
-          avro_schema_t writers_schema, avro_schema_t readers_schema,
-          avro_datum_t * datum)
-{
-       return 1;
-}
-
-static int
-read_record(avro_reader_t reader, const avro_encoding_t * enc,
-           avro_schema_t writers_schema, avro_schema_t readers_schema,
-           avro_datum_t * datum)
-{
-       return 1;
-}
-
-int
-avro_read_data(avro_reader_t reader, avro_schema_t writers_schema,
-              avro_schema_t readers_schema, avro_datum_t * datum)
-{
-       int rval = EINVAL;
-       const avro_encoding_t *enc = &avro_binary_encoding;
-
-       if (!reader || !is_avro_schema(writers_schema) || !datum) {
-               return EINVAL;
-       }
-
-       if (readers_schema == NULL) {
-               readers_schema = writers_schema;
-       } else if (!avro_schema_match(writers_schema, readers_schema)) {
-               return EINVAL;
-       }
-
-       /*
-        * schema resolution 
-        */
-       if (!is_avro_union(writers_schema) && is_avro_union(readers_schema)) {
-               struct avro_union_branch_t *branch;
-               struct avro_union_schema_t *union_schema =
-                   avro_schema_to_union(readers_schema);
-
-               for (branch = STAILQ_FIRST(&union_schema->branches);
-                    branch != NULL; branch = STAILQ_NEXT(branch, branches)) {
-                       if (avro_schema_match(writers_schema, branch->schema)) {
-                               return avro_read_data(reader, writers_schema,
-                                                     branch->schema, datum);
-                       }
-               }
-               return EINVAL;
-       }
-
-       switch (avro_typeof(writers_schema)) {
-       case AVRO_NULL:
-               rval = enc->read_null(reader);
-               *datum = avro_null();
-               break;
-
-       case AVRO_BOOLEAN:
-               {
-                       int8_t b;
-                       rval = enc->read_boolean(reader, &b);
-                       *datum = avro_boolean(b);
-               }
-               break;
-
-       case AVRO_STRING:
-               {
-                       char *s;
-                       rval = enc->read_string(reader, &s);
-                       *datum = avro_string(s);
-               }
-               break;
-
-       case AVRO_INT:
-               {
-                       int32_t i;
-                       rval = enc->read_int(reader, &i);
-                       *datum = avro_int(i);
-               }
-               break;
-
-       case AVRO_LONG:
-               {
-                       int64_t l;
-                       rval = enc->read_long(reader, &l);
-                       *datum = avro_long(l);
-               }
-               break;
-
-       case AVRO_FLOAT:
-               {
-                       float f;
-                       rval = enc->read_float(reader, &f);
-                       *datum = avro_float(f);
-               }
-               break;
-
-       case AVRO_DOUBLE:
-               {
-                       double d;
-                       rval = enc->read_double(reader, &d);
-                       *datum = avro_double(d);
-               }
-               break;
-
-       case AVRO_BYTES:
-               {
-                       char *bytes;
-                       int64_t len;
-                       rval = enc->read_bytes(reader, &bytes, &len);
-                       *datum = avro_bytes(bytes, len);
-               }
-               break;
-
-       case AVRO_FIXED:
-               rval =
-                   read_fixed(reader, enc, writers_schema, readers_schema,
-                              datum);
-               break;
-
-       case AVRO_ENUM:
-               rval =
-                   read_enum(reader, enc, writers_schema, readers_schema,
-                             datum);
-               break;
-
-       case AVRO_ARRAY:
-               rval =
-                   read_array(reader, enc,
-                              avro_schema_to_array(writers_schema),
-                              avro_schema_to_array(readers_schema), datum);
-               break;
-
-       case AVRO_MAP:
-               rval =
-                   read_map(reader, enc, avro_schema_to_map(writers_schema),
-                            avro_schema_to_map(readers_schema), datum);
-               break;
-
-       case AVRO_UNION:
-               rval =
-                   read_union(reader, enc, writers_schema, readers_schema,
-                              datum);
-               break;
-
-       case AVRO_RECORD:
-               rval =
-                   read_record(reader, enc, writers_schema, readers_schema,
-                               datum);
-               break;
-
-       case AVRO_LINK:
-               rval =
-                   avro_read_data(reader,
-                                  (avro_schema_to_link(writers_schema))->to,
-                                  readers_schema, datum);
-               break;
-       }
-
-       return rval;
-}
-
-struct validate_st {
-       avro_schema_t expected_schema;
-       int rval;
-};
-
-static int
-schema_map_validate_foreach(char *key, avro_datum_t datum,
-                           struct validate_st *vst)
-{
-       if (!avro_schema_datum_validate(vst->expected_schema, datum)) {
-               vst->rval = 0;
-               return ST_STOP;
-       }
-       return ST_CONTINUE;
-}
-
-int
-avro_schema_datum_validate(avro_schema_t expected_schema, avro_datum_t datum)
-{
-       if (!is_avro_schema(expected_schema) || !is_avro_datum(datum)) {
-               return EINVAL;
-       }
-
-       switch (avro_typeof(expected_schema)) {
-       case AVRO_NULL:
-               return is_avro_null(datum);
-
-       case AVRO_BOOLEAN:
-               return is_avro_boolean(datum);
-
-       case AVRO_STRING:
-               return is_avro_string(datum);
-
-       case AVRO_BYTES:
-               return is_avro_bytes(datum);
-
-       case AVRO_INT:
-               return is_avro_int(datum)
-                   || (is_avro_long(datum)
-                       && (INT_MIN <= avro_datum_to_long(datum)->l
-                           && avro_datum_to_long(datum)->l <= INT_MAX));
-
-       case AVRO_LONG:
-               return is_avro_int(datum) || is_avro_long(datum);
-
-       case AVRO_FLOAT:
-               return is_avro_int(datum) || is_avro_long(datum)
-                   || is_avro_float(datum);
-
-       case AVRO_DOUBLE:
-               return is_avro_int(datum) || is_avro_long(datum)
-                   || is_avro_float(datum) || is_avro_double(datum);
-
-       case AVRO_FIXED:
-               return (is_avro_fixed(datum)
-                       && (avro_schema_to_fixed(expected_schema)->size ==
-                           avro_datum_to_fixed(datum)->size));
-
-       case AVRO_ENUM:
-               {
-                       struct avro_enum_schema_t *enump =
-                           avro_schema_to_enum(expected_schema);
-                       struct avro_enum_symbol_t *symbol =
-                           STAILQ_FIRST(&enump->symbols);
-                       while (symbol) {
-                               if (!strcmp
-                                   (symbol->symbol,
-                                    avro_datum_to_enum(datum)->symbol)) {
-                                       return 1;
-                               }
-                               symbol = STAILQ_NEXT(symbol, symbols);
-                       }
-                       return 0;
-               }
-               break;
-
-       case AVRO_ARRAY:
-               {
-                       if (is_avro_array(datum)) {
-                               struct avro_array_datum_t *array =
-                                   avro_datum_to_array(datum);
-                               struct avro_array_element_t *el =
-                                   STAILQ_FIRST(&array->els);
-                               while (el) {
-                                       if (!avro_schema_datum_validate
-                                           ((avro_schema_to_array
-                                             (expected_schema))->items,
-                                            el->datum)) {
-                                               return 0;
-                                       }
-                                       el = STAILQ_NEXT(el, els);
-                               }
-                               return 1;
-                       }
-                       return 0;
-               }
-               break;
-
-       case AVRO_MAP:
-               if (is_avro_map(datum)) {
-                       struct validate_st vst =
-                           { avro_schema_to_map(expected_schema)->values, 1 };
-                       st_foreach(avro_datum_to_map(datum)->map,
-                                  schema_map_validate_foreach,
-                                  (st_data_t) & vst);
-                       return vst.rval;
-               }
-               break;
-
-       case AVRO_UNION:
-               {
-                       struct avro_union_schema_t *union_schema =
-                           avro_schema_to_union(expected_schema);
-                       struct avro_union_branch_t *branch;
-
-                       for (branch = STAILQ_FIRST(&union_schema->branches);
-                            branch != NULL;
-                            branch = STAILQ_NEXT(branch, branches)) {
-                               if (avro_schema_datum_validate
-                                   (branch->schema, datum)) {
-                                       return 1;
-                               }
-                       }
-                       return 0;
-               }
-               break;
-
-       case AVRO_RECORD:
-               if (is_avro_record(datum)) {
-                       struct avro_record_schema_t *record_schema =
-                           avro_schema_to_record(expected_schema);
-                       struct avro_record_field_t *field;
-                       for (field = STAILQ_FIRST(&record_schema->fields);
-                            field != NULL;
-                            field = STAILQ_NEXT(field, fields)) {
-                               avro_datum_t field_datum =
-                                   avro_record_field_get(datum, field->name);
-                               if (!field_datum) {
-                                       /*
-                                        * TODO: check for default values 
-                                        */
-                                       return 0;
-                               }
-                               if (!avro_schema_datum_validate
-                                   (field->type, field_datum)) {
-                                       return 0;
-                               }
-                       }
-                       return 1;
-               }
-               break;
-
-       case AVRO_LINK:
-               {
-                       return
-                           avro_schema_datum_validate((avro_schema_to_link
-                                                       (expected_schema))->to,
-                                                      datum);
-               }
-               break;
-       }
-       return 0;
-}
-
-static int
-write_record(avro_writer_t writer, const avro_encoding_t * enc,
-            avro_schema_t writer_schema, avro_datum_t datum)
-{
-       /*
-        * TODO 
-        */
-       return EINVAL;
-}
-
-static int
-write_enum(avro_writer_t writer, const avro_encoding_t * enc,
-          avro_schema_t writer_schema, avro_datum_t datum)
-{
-       /*
-        * TODO 
-        */
-       return EINVAL;
-}
-
-static int
-write_fixed(avro_writer_t writer, const avro_encoding_t * enc,
-           avro_schema_t writer_schema, avro_datum_t datum)
-{
-       /*
-        * TODO 
-        */
-       return EINVAL;
-}
-
-struct write_map_args {
-       int rval;
-       avro_writer_t writer;
-       const avro_encoding_t *enc;
-       avro_schema_t values_schema;
-};
-
-static int
-write_map_foreach(char *key, avro_datum_t datum, struct write_map_args *args)
-{
-       int rval = args->enc->write_string(args->writer, key);
-       if (rval) {
-               args->rval = rval;
-               return ST_STOP;
-       }
-       rval = avro_write_data(args->writer, args->values_schema, datum);
-       if (rval) {
-               args->rval = rval;
-               return ST_STOP;
-       }
-       return ST_CONTINUE;
-}
-
-static int
-write_map(avro_writer_t writer, const avro_encoding_t * enc,
-         struct avro_map_schema_t *writer_schema,
-         struct avro_map_datum_t *datum)
-{
-       int rval;
-       struct write_map_args args = { 0, writer, enc, writer_schema->values };
-
-       if (datum->map->num_entries) {
-               rval = enc->write_long(writer, datum->map->num_entries);
-               if (rval) {
-                       return rval;
-               }
-               st_foreach(datum->map, write_map_foreach, (st_data_t) & args);
-       }
-       if (!args.rval) {
-               rval = enc->write_long(writer, 0);
-               if (rval) {
-                       return rval;
-               }
-               return 0;
-       }
-       return args.rval;
-}
-
-static int
-write_array(avro_writer_t writer, const avro_encoding_t * enc,
-           struct avro_array_schema_t *schema,
-           struct avro_array_datum_t *array)
-{
-       int rval;
-       struct avro_array_element_t *el;
-
-       if (array->num_elements) {
-               rval = enc->write_long(writer, array->num_elements);
-               if (rval) {
-                       return rval;
-               }
-               for (el = STAILQ_FIRST(&array->els);
-                    el != NULL; el = STAILQ_NEXT(el, els)) {
-                       rval =
-                           avro_write_data(writer, schema->items, el->datum);
-                       if (rval) {
-                               return rval;
-                       }
-               }
-       }
-       return enc->write_long(writer, 0);
-}
-
-int
-avro_write_data(avro_writer_t writer, avro_schema_t writer_schema,
-               avro_datum_t datum)
-{
-       const avro_encoding_t *enc = &avro_binary_encoding;
-       int rval = -1;
-
-       if (!writer || !(is_avro_schema(writer_schema) && 
is_avro_datum(datum))) {
-               return EINVAL;
-       }
-       if (!avro_schema_datum_validate(writer_schema, datum)) {
-               return EINVAL;
-       }
-       switch (avro_typeof(writer_schema)) {
-       case AVRO_NULL:
-               rval = enc->write_null(writer);
-               break;
-       case AVRO_BOOLEAN:
-               rval =
-                   enc->write_boolean(writer, avro_datum_to_boolean(datum)->i);
-               break;
-       case AVRO_STRING:
-               rval =
-                   enc->write_string(writer, avro_datum_to_string(datum)->s);
-               break;
-       case AVRO_BYTES:
-               rval =
-                   enc->write_bytes(writer, avro_datum_to_bytes(datum)->bytes,
-                                    avro_datum_to_bytes(datum)->size);
-               break;
-       case AVRO_INT:
-               {
-                       int32_t i;
-                       if (is_avro_int(datum)) {
-                               i = avro_datum_to_int(datum)->i;
-                       } else if (is_avro_long(datum)) {
-                               i = (int32_t) avro_datum_to_long(datum)->l;
-                       } else {
-                               assert(0
-                                      &&
-                                      "Serious bug in schema validation code");
-                       }
-                       rval = enc->write_int(writer, i);
-               }
-               break;
-       case AVRO_LONG:
-               rval = enc->write_long(writer, avro_datum_to_long(datum)->l);
-               break;
-       case AVRO_FLOAT:
-               {
-                       float f;
-                       if (is_avro_int(datum)) {
-                               f = (float)(avro_datum_to_int(datum)->i);
-                       } else if (is_avro_long(datum)) {
-                               f = (float)(avro_datum_to_long(datum)->l);
-                       } else if (is_avro_float(datum)) {
-                               f = avro_datum_to_float(datum)->f;
-                       } else if (is_avro_double(datum)) {
-                               f = (float)(avro_datum_to_double(datum)->d);
-                       } else {
-                               assert(0
-                                      &&
-                                      "Serious bug in schema validation code");
-                       }
-                       rval = enc->write_float(writer, f);
-               }
-               break;
-       case AVRO_DOUBLE:
-               {
-                       double d;
-                       if (is_avro_int(datum)) {
-                               d = (double)(avro_datum_to_int(datum)->i);
-                       } else if (is_avro_long(datum)) {
-                               d = (double)(avro_datum_to_long(datum)->l);
-                       } else if (is_avro_float(datum)) {
-                               d = (double)(avro_datum_to_float(datum)->f);
-                       } else if (is_avro_double(datum)) {
-                               d = avro_datum_to_double(datum)->d;
-                       } else {
-                               assert(0 && "Bug in schema validation code");
-                       }
-                       rval = enc->write_double(writer, d);
-               }
-               break;
-
-       case AVRO_RECORD:
-               rval = write_record(writer, enc, writer_schema, datum);
-               break;
-       case AVRO_ENUM:
-               rval = write_enum(writer, enc, writer_schema, datum);
-               break;
-       case AVRO_FIXED:
-               rval = write_fixed(writer, enc, writer_schema, datum);
-               break;
-       case AVRO_MAP:
-               rval =
-                   write_map(writer, enc, avro_schema_to_map(writer_schema),
-                             avro_datum_to_map(datum));
-               break;
-       case AVRO_ARRAY:
-               rval =
-                   write_array(writer, enc,
-                               avro_schema_to_array(writer_schema),
-                               avro_datum_to_array(datum));
-               break;
-
-       case AVRO_UNION:
-               {
-                       assert(0 && "Bug in schema validation code");
-               }
-               break;
-
-       case AVRO_LINK:
-               rval =
-                   avro_write_data(writer,
-                                   (avro_schema_to_link(writer_schema))->to,
-                                   datum);
-               break;
-       }
-       return rval;
-}

Added: hadoop/avro/trunk/lang/c/src/datum_equal.c
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/datum_equal.c?rev=901950&view=auto
==============================================================================
--- hadoop/avro/trunk/lang/c/src/datum_equal.c (added)
+++ hadoop/avro/trunk/lang/c/src/datum_equal.c Fri Jan 22 00:38:01 2010
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0 
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.  See the License for the specific language governing
+ * permissions and limitations under the License. 
+ */
+
+#include <string.h>
+#include "datum.h"
+
+static int
+array_equal(struct avro_array_datum_t *a, struct avro_array_datum_t *b)
+{
+       struct avro_array_element_t *a_el, *b_el;
+       if (a->num_elements != b->num_elements) {
+               return 0;
+       }
+       for (a_el = STAILQ_FIRST(&a->els),
+            b_el = STAILQ_FIRST(&b->els);
+            !(a_el == NULL && a_el == NULL);
+            a_el = STAILQ_NEXT(a_el, els), b_el = STAILQ_NEXT(b_el, els)) {
+               if (a_el == NULL || b_el == NULL) {
+                       return 0;       /* different number of elements */
+               }
+               if (!avro_datum_equal(a_el->datum, b_el->datum)) {
+                       return 0;
+               }
+       }
+       return 1;
+}
+
+struct st_equal_args {
+       int rval;
+       st_table *st;
+};
+
+static int
+st_equal_foreach(char *key, avro_datum_t datum, struct st_equal_args *args)
+{
+       union {
+               avro_datum_t datum_other;
+               st_data_t data;
+       } val;
+       if (!st_lookup(args->st, (st_data_t) key, &(val.data))) {
+               args->rval = 0;
+               return ST_STOP;
+       }
+       if (!avro_datum_equal(datum, val.datum_other)) {
+               args->rval = 0;
+               return ST_STOP;
+       }
+       return ST_CONTINUE;
+}
+
+static int map_equal(struct avro_map_datum_t *a, struct avro_map_datum_t *b)
+{
+       struct st_equal_args args = { 1, b->map };
+       if (a->map->num_entries != b->map->num_entries) {
+               return 0;
+       }
+       st_foreach(a->map, st_equal_foreach, (st_data_t) & args);
+       return args.rval;
+}
+
+static int record_equal(struct avro_record_datum_t *a,
+                       struct avro_record_datum_t *b)
+{
+       struct st_equal_args args = { 1, b->fields };
+       if (a->fields->num_entries != b->fields->num_entries) {
+               fprintf(stderr, "num entries mismatch %d != %d\n",
+                       a->fields->num_entries, b->fields->num_entries);
+               return 0;
+       }
+       fprintf(stderr, "Each record has %d values\n", a->fields->num_entries);
+       st_foreach(a->fields, st_equal_foreach, (st_data_t) & args);
+       return args.rval;
+}
+
+int avro_datum_equal(avro_datum_t a, avro_datum_t b)
+{
+       if (!(is_avro_datum(a) && is_avro_datum(b))) {
+               return 0;
+       }
+       if (avro_typeof(a) != avro_typeof(b)) {
+               return 0;
+       }
+       switch (avro_typeof(a)) {
+       case AVRO_STRING:
+               return strcmp(avro_datum_to_string(a)->s,
+                             avro_datum_to_string(b)->s) == 0;
+       case AVRO_BYTES:
+               return (avro_datum_to_bytes(a)->size ==
+                       avro_datum_to_bytes(b)->size)
+                   && memcmp(avro_datum_to_bytes(a)->bytes,
+                             avro_datum_to_bytes(b)->bytes,
+                             avro_datum_to_bytes(a)->size) == 0;
+       case AVRO_INT:
+               return avro_datum_to_int(a)->i == avro_datum_to_int(b)->i;
+       case AVRO_LONG:
+               return avro_datum_to_long(a)->l == avro_datum_to_long(b)->l;
+       case AVRO_FLOAT:
+               return avro_datum_to_float(a)->f == avro_datum_to_float(b)->f;
+       case AVRO_DOUBLE:
+               return avro_datum_to_double(a)->d == avro_datum_to_double(b)->d;
+       case AVRO_BOOLEAN:
+               return avro_datum_to_boolean(a)->i ==
+                   avro_datum_to_boolean(b)->i;
+       case AVRO_NULL:
+               return 1;
+       case AVRO_ARRAY:
+               return array_equal(avro_datum_to_array(a),
+                                  avro_datum_to_array(b));
+       case AVRO_MAP:
+               return map_equal(avro_datum_to_map(a), avro_datum_to_map(b));
+
+       case AVRO_RECORD:
+               return record_equal(avro_datum_to_record(a),
+                                   avro_datum_to_record(b));
+
+       case AVRO_ENUM:
+       case AVRO_FIXED:
+       case AVRO_UNION:
+       case AVRO_LINK:
+               /*
+                * TODO 
+                */
+               return 0;
+       }
+       return 0;
+}

Added: hadoop/avro/trunk/lang/c/src/datum_read.c
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/datum_read.c?rev=901950&view=auto
==============================================================================
--- hadoop/avro/trunk/lang/c/src/datum_read.c (added)
+++ hadoop/avro/trunk/lang/c/src/datum_read.c Fri Jan 22 00:38:01 2010
@@ -0,0 +1,409 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0 
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.  See the License for the specific language governing
+ * permissions and limitations under the License. 
+ */
+#include <errno.h>
+#include <string.h>
+#include "encoding.h"
+#include "schema.h"
+#include "datum.h"
+
+int
+avro_schema_match(avro_schema_t writers_schema, avro_schema_t readers_schema)
+{
+       if (!is_avro_schema(writers_schema) || !is_avro_schema(readers_schema)) 
{
+               return 0;
+       }
+
+       switch (avro_typeof(writers_schema)) {
+       case AVRO_UNION:
+               return 1;
+
+       case AVRO_INT:
+               return is_avro_int(readers_schema)
+                   || is_avro_long(readers_schema)
+                   || is_avro_float(readers_schema)
+                   || is_avro_double(readers_schema);
+
+       case AVRO_LONG:
+               return is_avro_long(readers_schema)
+                   || is_avro_float(readers_schema)
+                   || is_avro_double(readers_schema);
+
+       case AVRO_FLOAT:
+               return is_avro_float(readers_schema)
+                   || is_avro_double(readers_schema);
+
+       case AVRO_STRING:
+       case AVRO_BYTES:
+       case AVRO_DOUBLE:
+       case AVRO_BOOLEAN:
+       case AVRO_NULL:
+               return avro_typeof(writers_schema) ==
+                   avro_typeof(readers_schema);
+
+       case AVRO_RECORD:
+               return is_avro_record(readers_schema)
+                   && strcmp(avro_schema_name(writers_schema),
+                             avro_schema_name(readers_schema)) == 0;
+
+       case AVRO_FIXED:
+               return is_avro_fixed(readers_schema)
+                   && strcmp(avro_schema_name(writers_schema),
+                             avro_schema_name(readers_schema)) == 0
+                   && (avro_schema_to_fixed(writers_schema))->size ==
+                   (avro_schema_to_fixed(readers_schema))->size;
+
+       case AVRO_ENUM:
+               return is_avro_enum(readers_schema)
+                   && strcmp(avro_schema_to_enum(writers_schema)->name,
+                             avro_schema_to_enum(readers_schema)->name) == 0;
+
+       case AVRO_MAP:
+               return is_avro_map(readers_schema)
+                   && avro_typeof(avro_schema_to_map(writers_schema)->values)
+                   == avro_typeof(avro_schema_to_map(readers_schema)->values);
+
+       case AVRO_ARRAY:
+               return is_avro_array(readers_schema)
+                   && avro_typeof(avro_schema_to_array(writers_schema)->items)
+                   == avro_typeof(avro_schema_to_array(readers_schema)->items);
+
+       case AVRO_LINK:
+               /*
+                * TODO 
+                */
+               break;
+       }
+
+       return 0;
+}
+
+static int
+read_fixed(avro_reader_t reader, const avro_encoding_t * enc,
+          avro_schema_t writers_schema, avro_schema_t readers_schema,
+          avro_datum_t * datum)
+{
+       return 1;
+}
+
+static int
+read_enum(avro_reader_t reader, const avro_encoding_t * enc,
+         avro_schema_t writers_schema, avro_schema_t readers_schema,
+         avro_datum_t * datum)
+{
+       return 1;
+}
+
+static int
+read_array(avro_reader_t reader, const avro_encoding_t * enc,
+          struct avro_array_schema_t *writers_schema,
+          struct avro_array_schema_t *readers_schema, avro_datum_t * datum)
+{
+       int rval;
+       int64_t i;
+       int64_t block_count;
+       int64_t block_size;
+       avro_datum_t array_datum;
+
+       rval = enc->read_long(reader, &block_count);
+       if (rval) {
+               return rval;
+       }
+
+       array_datum = avro_array();
+       while (block_count != 0) {
+               if (block_count < 0) {
+                       block_count = block_count * -1;
+                       rval = enc->read_long(reader, &block_size);
+                       if (rval) {
+                               return rval;
+                       }
+               }
+
+               for (i = 0; i < block_count; i++) {
+                       avro_datum_t datum;
+
+                       rval =
+                           avro_read_data(reader, writers_schema->items,
+                                          readers_schema->items, &datum);
+                       if (rval) {
+                               return rval;
+                       }
+                       rval = avro_array_append_datum(array_datum, datum);
+                       if (rval) {
+                               avro_datum_decref(array_datum);
+                               return rval;
+                       }
+               }
+
+               rval = enc->read_long(reader, &block_count);
+               if (rval) {
+                       return rval;
+               }
+       }
+       *datum = array_datum;
+       return 0;
+}
+
+static int
+read_map(avro_reader_t reader, const avro_encoding_t * enc,
+        struct avro_map_schema_t *writers_schema,
+        struct avro_map_schema_t *readers_schema, avro_datum_t * datum)
+{
+       int rval;
+       int64_t i, block_count;
+       avro_datum_t map = avro_map();
+
+       rval = enc->read_long(reader, &block_count);
+       if (rval) {
+               return rval;
+       }
+       while (block_count != 0) {
+               int64_t block_size;
+               if (block_count < 0) {
+                       block_count = block_count * -1;
+                       rval = enc->read_long(reader, &block_size);
+                       if (rval) {
+                               return rval;
+                       }
+               }
+               for (i = 0; i < block_count; i++) {
+                       char *key;
+                       avro_datum_t value;
+                       rval = enc->read_string(reader, &key);
+                       if (rval) {
+                               return rval;
+                       }
+                       rval =
+                           avro_read_data(reader,
+                                          avro_schema_to_map(writers_schema)->
+                                          values,
+                                          avro_schema_to_map(readers_schema)->
+                                          values, &value);
+                       if (rval) {
+                               return rval;
+                       }
+                       rval = avro_map_set(map, key, value);
+                       if (rval) {
+                               return rval;
+                       }
+               }
+               rval = enc->read_long(reader, &block_count);
+               if (rval) {
+                       return rval;
+               }
+       }
+       *datum = map;
+       return 0;
+}
+
+static int
+read_union(avro_reader_t reader, const avro_encoding_t * enc,
+          avro_schema_t writers_schema, avro_schema_t readers_schema,
+          avro_datum_t * datum)
+{
+       return 1;
+}
+
+/* TODO: handle default values in fields */
+static int
+read_record(avro_reader_t reader, const avro_encoding_t * enc,
+           struct avro_record_schema_t *writers_schema,
+           struct avro_record_schema_t *readers_schema, avro_datum_t * datum)
+{
+       int rval;
+       struct avro_record_field_t *reader_field;
+       struct avro_record_field_t *field;
+       avro_datum_t record;
+       avro_datum_t field_datum;
+
+       record = *datum = avro_record(writers_schema->name);
+       for (field = STAILQ_FIRST(&writers_schema->fields);
+            field != NULL; field = STAILQ_NEXT(field, fields)) {
+               for (reader_field = STAILQ_FIRST(&readers_schema->fields);
+                    reader_field != NULL;
+                    reader_field = STAILQ_NEXT(reader_field, fields)) {
+                       if (strcmp(field->name, reader_field->name) == 0) {
+                               break;
+                       }
+               }
+               if (reader_field) {
+                       rval =
+                           avro_read_data(reader, field->type,
+                                          reader_field->type, &field_datum);
+                       if (rval) {
+                               return rval;
+                       }
+                       rval =
+                           avro_record_field_set(record, field->name,
+                                                 field_datum);
+                       if (rval) {
+                               return rval;
+                       }
+               } else {
+                       /* TODO: skip_record */
+                       return -1;
+               }
+       }
+       return 0;
+}
+
+int
+avro_read_data(avro_reader_t reader, avro_schema_t writers_schema,
+              avro_schema_t readers_schema, avro_datum_t * datum)
+{
+       int rval = EINVAL;
+       const avro_encoding_t *enc = &avro_binary_encoding;
+
+       if (!reader || !is_avro_schema(writers_schema) || !datum) {
+               return EINVAL;
+       }
+
+       if (readers_schema == NULL) {
+               readers_schema = writers_schema;
+       } else if (!avro_schema_match(writers_schema, readers_schema)) {
+               return EINVAL;
+       }
+
+       /*
+        * schema resolution 
+        */
+       if (!is_avro_union(writers_schema) && is_avro_union(readers_schema)) {
+               struct avro_union_branch_t *branch;
+               struct avro_union_schema_t *union_schema =
+                   avro_schema_to_union(readers_schema);
+
+               for (branch = STAILQ_FIRST(&union_schema->branches);
+                    branch != NULL; branch = STAILQ_NEXT(branch, branches)) {
+                       if (avro_schema_match(writers_schema, branch->schema)) {
+                               return avro_read_data(reader, writers_schema,
+                                                     branch->schema, datum);
+                       }
+               }
+               return EINVAL;
+       }
+
+       switch (avro_typeof(writers_schema)) {
+       case AVRO_NULL:
+               rval = enc->read_null(reader);
+               *datum = avro_null();
+               break;
+
+       case AVRO_BOOLEAN:
+               {
+                       int8_t b;
+                       rval = enc->read_boolean(reader, &b);
+                       *datum = avro_boolean(b);
+               }
+               break;
+
+       case AVRO_STRING:
+               {
+                       char *s;
+                       rval = enc->read_string(reader, &s);
+                       *datum = avro_string(s);
+               }
+               break;
+
+       case AVRO_INT:
+               {
+                       int32_t i;
+                       rval = enc->read_int(reader, &i);
+                       *datum = avro_int(i);
+               }
+               break;
+
+       case AVRO_LONG:
+               {
+                       int64_t l;
+                       rval = enc->read_long(reader, &l);
+                       *datum = avro_long(l);
+               }
+               break;
+
+       case AVRO_FLOAT:
+               {
+                       float f;
+                       rval = enc->read_float(reader, &f);
+                       *datum = avro_float(f);
+               }
+               break;
+
+       case AVRO_DOUBLE:
+               {
+                       double d;
+                       rval = enc->read_double(reader, &d);
+                       *datum = avro_double(d);
+               }
+               break;
+
+       case AVRO_BYTES:
+               {
+                       char *bytes;
+                       int64_t len;
+                       rval = enc->read_bytes(reader, &bytes, &len);
+                       *datum = avro_bytes(bytes, len);
+               }
+               break;
+
+       case AVRO_FIXED:
+               rval =
+                   read_fixed(reader, enc, writers_schema, readers_schema,
+                              datum);
+               break;
+
+       case AVRO_ENUM:
+               rval =
+                   read_enum(reader, enc, writers_schema, readers_schema,
+                             datum);
+               break;
+
+       case AVRO_ARRAY:
+               rval =
+                   read_array(reader, enc,
+                              avro_schema_to_array(writers_schema),
+                              avro_schema_to_array(readers_schema), datum);
+               break;
+
+       case AVRO_MAP:
+               rval =
+                   read_map(reader, enc, avro_schema_to_map(writers_schema),
+                            avro_schema_to_map(readers_schema), datum);
+               break;
+
+       case AVRO_UNION:
+               rval =
+                   read_union(reader, enc, writers_schema, readers_schema,
+                              datum);
+               break;
+
+       case AVRO_RECORD:
+               rval =
+                   read_record(reader, enc,
+                               avro_schema_to_record(writers_schema),
+                               avro_schema_to_record(readers_schema), datum);
+               break;
+
+       case AVRO_LINK:
+               rval =
+                   avro_read_data(reader,
+                                  (avro_schema_to_link(writers_schema))->to,
+                                  readers_schema, datum);
+               break;
+       }
+
+       return rval;
+}

Added: hadoop/avro/trunk/lang/c/src/datum_validate.c
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/datum_validate.c?rev=901950&view=auto
==============================================================================
--- hadoop/avro/trunk/lang/c/src/datum_validate.c (added)
+++ hadoop/avro/trunk/lang/c/src/datum_validate.c Fri Jan 22 00:38:01 2010
@@ -0,0 +1,186 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0 
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.  See the License for the specific language governing
+ * permissions and limitations under the License. 
+ */
+#include <limits.h>
+#include <errno.h>
+#include <string.h>
+#include "schema.h"
+#include "datum.h"
+#include "st.h"
+
+struct validate_st {
+       avro_schema_t expected_schema;
+       int rval;
+};
+
+static int
+schema_map_validate_foreach(char *key, avro_datum_t datum,
+                           struct validate_st *vst)
+{
+       if (!avro_schema_datum_validate(vst->expected_schema, datum)) {
+               vst->rval = 0;
+               return ST_STOP;
+       }
+       return ST_CONTINUE;
+}
+
+int
+avro_schema_datum_validate(avro_schema_t expected_schema, avro_datum_t datum)
+{
+       if (!is_avro_schema(expected_schema) || !is_avro_datum(datum)) {
+               return EINVAL;
+       }
+
+       switch (avro_typeof(expected_schema)) {
+       case AVRO_NULL:
+               return is_avro_null(datum);
+
+       case AVRO_BOOLEAN:
+               return is_avro_boolean(datum);
+
+       case AVRO_STRING:
+               return is_avro_string(datum);
+
+       case AVRO_BYTES:
+               return is_avro_bytes(datum);
+
+       case AVRO_INT:
+               return is_avro_int(datum)
+                   || (is_avro_long(datum)
+                       && (INT_MIN <= avro_datum_to_long(datum)->l
+                           && avro_datum_to_long(datum)->l <= INT_MAX));
+
+       case AVRO_LONG:
+               return is_avro_int(datum) || is_avro_long(datum);
+
+       case AVRO_FLOAT:
+               return is_avro_int(datum) || is_avro_long(datum)
+                   || is_avro_float(datum);
+
+       case AVRO_DOUBLE:
+               return is_avro_int(datum) || is_avro_long(datum)
+                   || is_avro_float(datum) || is_avro_double(datum);
+
+       case AVRO_FIXED:
+               return (is_avro_fixed(datum)
+                       && (avro_schema_to_fixed(expected_schema)->size ==
+                           avro_datum_to_fixed(datum)->size));
+
+       case AVRO_ENUM:
+               {
+                       struct avro_enum_schema_t *enump =
+                           avro_schema_to_enum(expected_schema);
+                       struct avro_enum_symbol_t *symbol =
+                           STAILQ_FIRST(&enump->symbols);
+                       while (symbol) {
+                               if (!strcmp
+                                   (symbol->symbol,
+                                    avro_datum_to_enum(datum)->symbol)) {
+                                       return 1;
+                               }
+                               symbol = STAILQ_NEXT(symbol, symbols);
+                       }
+                       return 0;
+               }
+               break;
+
+       case AVRO_ARRAY:
+               {
+                       if (is_avro_array(datum)) {
+                               struct avro_array_datum_t *array =
+                                   avro_datum_to_array(datum);
+                               struct avro_array_element_t *el =
+                                   STAILQ_FIRST(&array->els);
+                               while (el) {
+                                       if (!avro_schema_datum_validate
+                                           ((avro_schema_to_array
+                                             (expected_schema))->items,
+                                            el->datum)) {
+                                               return 0;
+                                       }
+                                       el = STAILQ_NEXT(el, els);
+                               }
+                               return 1;
+                       }
+                       return 0;
+               }
+               break;
+
+       case AVRO_MAP:
+               if (is_avro_map(datum)) {
+                       struct validate_st vst =
+                           { avro_schema_to_map(expected_schema)->values, 1 };
+                       st_foreach(avro_datum_to_map(datum)->map,
+                                  schema_map_validate_foreach,
+                                  (st_data_t) & vst);
+                       return vst.rval;
+               }
+               break;
+
+       case AVRO_UNION:
+               {
+                       struct avro_union_schema_t *union_schema =
+                           avro_schema_to_union(expected_schema);
+                       struct avro_union_branch_t *branch;
+
+                       for (branch = STAILQ_FIRST(&union_schema->branches);
+                            branch != NULL;
+                            branch = STAILQ_NEXT(branch, branches)) {
+                               if (avro_schema_datum_validate
+                                   (branch->schema, datum)) {
+                                       return 1;
+                               }
+                       }
+                       return 0;
+               }
+               break;
+
+       case AVRO_RECORD:
+               if (is_avro_record(datum)) {
+                       struct avro_record_schema_t *record_schema =
+                           avro_schema_to_record(expected_schema);
+                       struct avro_record_field_t *field;
+                       for (field = STAILQ_FIRST(&record_schema->fields);
+                            field != NULL;
+                            field = STAILQ_NEXT(field, fields)) {
+                               avro_datum_t field_datum =
+                                   avro_record_field_get(datum, field->name);
+                               if (!field_datum) {
+                                       /*
+                                        * TODO: check for default values 
+                                        */
+                                       return 0;
+                               }
+                               if (!avro_schema_datum_validate
+                                   (field->type, field_datum)) {
+                                       return 0;
+                               }
+                       }
+                       return 1;
+               }
+               break;
+
+       case AVRO_LINK:
+               {
+                       return
+                           avro_schema_datum_validate((avro_schema_to_link
+                                                       (expected_schema))->to,
+                                                      datum);
+               }
+               break;
+       }
+       return 0;
+}

Added: hadoop/avro/trunk/lang/c/src/datum_write.c
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/datum_write.c?rev=901950&view=auto
==============================================================================
--- hadoop/avro/trunk/lang/c/src/datum_write.c (added)
+++ hadoop/avro/trunk/lang/c/src/datum_write.c Fri Jan 22 00:38:01 2010
@@ -0,0 +1,255 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0 
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.  See the License for the specific language governing
+ * permissions and limitations under the License. 
+ */
+#include <errno.h>
+#include <assert.h>
+#include "schema.h"
+#include "datum.h"
+#include "encoding.h"
+
+static int
+write_record(avro_writer_t writer, const avro_encoding_t * enc,
+            struct avro_record_schema_t *record, avro_datum_t datum)
+{
+       int rval;
+       struct avro_record_field_t *field = STAILQ_FIRST(&record->fields);
+       for (; field != NULL; field = STAILQ_NEXT(field, fields)) {
+               rval = avro_write_data(writer, field->type,
+                                      avro_record_field_get(datum,
+                                                            field->name));
+               if (rval) {
+                       return rval;
+               }
+       }
+       return 0;
+}
+
+static int
+write_enum(avro_writer_t writer, const avro_encoding_t * enc,
+          avro_schema_t writer_schema, avro_datum_t datum)
+{
+       /*
+        * TODO 
+        */
+       return EINVAL;
+}
+
+static int
+write_fixed(avro_writer_t writer, const avro_encoding_t * enc,
+           avro_schema_t writer_schema, avro_datum_t datum)
+{
+       /*
+        * TODO 
+        */
+       return EINVAL;
+}
+
+struct write_map_args {
+       int rval;
+       avro_writer_t writer;
+       const avro_encoding_t *enc;
+       avro_schema_t values_schema;
+};
+
+static int
+write_map_foreach(char *key, avro_datum_t datum, struct write_map_args *args)
+{
+       int rval = args->enc->write_string(args->writer, key);
+       if (rval) {
+               args->rval = rval;
+               return ST_STOP;
+       }
+       rval = avro_write_data(args->writer, args->values_schema, datum);
+       if (rval) {
+               args->rval = rval;
+               return ST_STOP;
+       }
+       return ST_CONTINUE;
+}
+
+static int
+write_map(avro_writer_t writer, const avro_encoding_t * enc,
+         struct avro_map_schema_t *writer_schema,
+         struct avro_map_datum_t *datum)
+{
+       int rval;
+       struct write_map_args args = { 0, writer, enc, writer_schema->values };
+
+       if (datum->map->num_entries) {
+               rval = enc->write_long(writer, datum->map->num_entries);
+               if (rval) {
+                       return rval;
+               }
+               st_foreach(datum->map, write_map_foreach, (st_data_t) & args);
+       }
+       if (!args.rval) {
+               rval = enc->write_long(writer, 0);
+               if (rval) {
+                       return rval;
+               }
+               return 0;
+       }
+       return args.rval;
+}
+
+static int
+write_array(avro_writer_t writer, const avro_encoding_t * enc,
+           struct avro_array_schema_t *schema,
+           struct avro_array_datum_t *array)
+{
+       int rval;
+       struct avro_array_element_t *el;
+
+       if (array->num_elements) {
+               rval = enc->write_long(writer, array->num_elements);
+               if (rval) {
+                       return rval;
+               }
+               for (el = STAILQ_FIRST(&array->els);
+                    el != NULL; el = STAILQ_NEXT(el, els)) {
+                       rval =
+                           avro_write_data(writer, schema->items, el->datum);
+                       if (rval) {
+                               return rval;
+                       }
+               }
+       }
+       return enc->write_long(writer, 0);
+}
+
+int
+avro_write_data(avro_writer_t writer, avro_schema_t writer_schema,
+               avro_datum_t datum)
+{
+       const avro_encoding_t *enc = &avro_binary_encoding;
+       int rval = -1;
+
+       if (!writer || !(is_avro_schema(writer_schema) && 
is_avro_datum(datum))) {
+               return EINVAL;
+       }
+       if (!avro_schema_datum_validate(writer_schema, datum)) {
+               return EINVAL;
+       }
+       switch (avro_typeof(writer_schema)) {
+       case AVRO_NULL:
+               rval = enc->write_null(writer);
+               break;
+       case AVRO_BOOLEAN:
+               rval =
+                   enc->write_boolean(writer, avro_datum_to_boolean(datum)->i);
+               break;
+       case AVRO_STRING:
+               rval =
+                   enc->write_string(writer, avro_datum_to_string(datum)->s);
+               break;
+       case AVRO_BYTES:
+               rval =
+                   enc->write_bytes(writer, avro_datum_to_bytes(datum)->bytes,
+                                    avro_datum_to_bytes(datum)->size);
+               break;
+       case AVRO_INT:
+               {
+                       int32_t i;
+                       if (is_avro_int(datum)) {
+                               i = avro_datum_to_int(datum)->i;
+                       } else if (is_avro_long(datum)) {
+                               i = (int32_t) avro_datum_to_long(datum)->l;
+                       } else {
+                               assert(0
+                                      &&
+                                      "Serious bug in schema validation code");
+                       }
+                       rval = enc->write_int(writer, i);
+               }
+               break;
+       case AVRO_LONG:
+               rval = enc->write_long(writer, avro_datum_to_long(datum)->l);
+               break;
+       case AVRO_FLOAT:
+               {
+                       float f;
+                       if (is_avro_int(datum)) {
+                               f = (float)(avro_datum_to_int(datum)->i);
+                       } else if (is_avro_long(datum)) {
+                               f = (float)(avro_datum_to_long(datum)->l);
+                       } else if (is_avro_float(datum)) {
+                               f = avro_datum_to_float(datum)->f;
+                       } else if (is_avro_double(datum)) {
+                               f = (float)(avro_datum_to_double(datum)->d);
+                       } else {
+                               assert(0
+                                      &&
+                                      "Serious bug in schema validation code");
+                       }
+                       rval = enc->write_float(writer, f);
+               }
+               break;
+       case AVRO_DOUBLE:
+               {
+                       double d;
+                       if (is_avro_int(datum)) {
+                               d = (double)(avro_datum_to_int(datum)->i);
+                       } else if (is_avro_long(datum)) {
+                               d = (double)(avro_datum_to_long(datum)->l);
+                       } else if (is_avro_float(datum)) {
+                               d = (double)(avro_datum_to_float(datum)->f);
+                       } else if (is_avro_double(datum)) {
+                               d = avro_datum_to_double(datum)->d;
+                       } else {
+                               assert(0 && "Bug in schema validation code");
+                       }
+                       rval = enc->write_double(writer, d);
+               }
+               break;
+
+       case AVRO_RECORD:
+               rval =
+                   write_record(writer, enc,
+                                avro_schema_to_record(writer_schema), datum);
+               break;
+       case AVRO_ENUM:
+               rval = write_enum(writer, enc, writer_schema, datum);
+               break;
+       case AVRO_FIXED:
+               rval = write_fixed(writer, enc, writer_schema, datum);
+               break;
+       case AVRO_MAP:
+               rval =
+                   write_map(writer, enc, avro_schema_to_map(writer_schema),
+                             avro_datum_to_map(datum));
+               break;
+       case AVRO_ARRAY:
+               rval =
+                   write_array(writer, enc,
+                               avro_schema_to_array(writer_schema),
+                               avro_datum_to_array(datum));
+               break;
+
+       case AVRO_UNION:
+               {
+                       assert(0 && "Bug in schema validation code");
+               }
+               break;
+
+       case AVRO_LINK:
+               rval =
+                   avro_write_data(writer,
+                                   (avro_schema_to_link(writer_schema))->to,
+                                   datum);
+               break;
+       }
+       return rval;
+}

Modified: hadoop/avro/trunk/lang/c/src/io.c
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/io.c?rev=901950&r1=901949&r2=901950&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/io.c (original)
+++ hadoop/avro/trunk/lang/c/src/io.c Fri Jan 22 00:38:01 2010
@@ -21,6 +21,7 @@
 #include <string.h>
 #include "avro.h"
 #include "container_of.h"
+#include "dump.h"
 
 enum avro_io_type_t {
        AVRO_FILE_IO,
@@ -215,6 +216,22 @@
        return -1;
 }
 
+void avro_writer_dump(avro_writer_t writer, FILE * fp)
+{
+       if (is_memory_io(writer)) {
+               dump(fp, (char *)avro_writer_to_memory(writer)->buf,
+                    avro_writer_to_memory(writer)->written);
+       }
+}
+
+void avro_reader_dump(avro_reader_t reader, FILE * fp)
+{
+       if (is_memory_io(reader)) {
+               dump(fp, (char *)avro_reader_to_memory(reader)->buf,
+                    avro_reader_to_memory(reader)->read);
+       }
+}
+
 void avro_reader_free(avro_reader_t reader)
 {
        if (is_memory_io(reader)) {

Modified: hadoop/avro/trunk/lang/c/src/schema.c
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/schema.c?rev=901950&r1=901949&r2=901950&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/schema.c (original)
+++ hadoop/avro/trunk/lang/c/src/schema.c Fri Jan 22 00:38:01 2010
@@ -307,9 +307,12 @@
 find_named_schemas(const char *name, avro_schema_error_t * error)
 {
        st_table *st = (*error)->named_schemas;
-       avro_schema_t schema;
-       if (st_lookup(st, (st_data_t) name, (st_data_t *) & schema)) {
-               return schema;
+       union {
+               avro_schema_t schema;
+               st_data_t data;
+       } val;
+       if (st_lookup(st, (st_data_t) name, &(val.data))) {
+               return val.schema;
        }
        return NULL;
 };

Modified: hadoop/avro/trunk/lang/c/src/schema.h
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/schema.h?rev=901950&r1=901949&r2=901950&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/schema.h (original)
+++ hadoop/avro/trunk/lang/c/src/schema.h Fri Jan 22 00:38:01 2010
@@ -33,6 +33,7 @@
 struct avro_record_schema_t {
        struct avro_obj_t obj;
        char *name;
+       /* TODO: st_table of names for faster lookup on record_read() */
         STAILQ_HEAD(fields, avro_record_field_t) fields;
 };
 

Modified: hadoop/avro/trunk/lang/c/tests/test_avro_data.c
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/tests/test_avro_data.c?rev=901950&r1=901949&r2=901950&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/tests/test_avro_data.c (original)
+++ hadoop/avro/trunk/lang/c/tests/test_avro_data.c Fri Jan 22 00:38:01 2010
@@ -46,7 +46,7 @@
                fprintf(stderr, "Unable to encode/decode %s\n", type);
                exit(EXIT_FAILURE);
        }
-
+       avro_reader_dump(reader, stderr);
        avro_datum_decref(datum_out);
        avro_reader_free(reader);
        avro_writer_free(writer);
@@ -104,7 +104,7 @@
        return 0;
 }
 
-static test_double(void)
+static int test_double(void)
 {
        int i;
        for (i = 0; i < 100; i++) {
@@ -131,9 +131,9 @@
 static int test_boolean(void)
 {
        int i;
-       for (i = 0; i < 100; i++) {
+       for (i = 0; i <= 1; i++) {
                avro_schema_t schema = avro_schema_boolean();
-               avro_datum_t datum = avro_boolean(rand() % 2);
+               avro_datum_t datum = avro_boolean(i);
                write_read_check(schema, NULL, datum, "boolean");
                avro_datum_decref(datum);
        }
@@ -149,15 +149,22 @@
        return 0;
 }
 
-int test_record(void)
+static int test_record(void)
 {
-       /*
-        * TODO 
-        */
+       avro_schema_t schema = avro_schema_record("person");
+       avro_datum_t datum = avro_record("person");
+
+       avro_schema_record_field_append(schema, "name", avro_schema_string());
+       avro_schema_record_field_append(schema, "age", avro_schema_int());
+
+       avro_record_field_set(datum, "name", avro_string("Joseph Campbell"));
+       avro_record_field_set(datum, "age", avro_int(83));
+
+       write_read_check(schema, NULL, datum, "record");
        return 0;
 }
 
-int test_enum(void)
+static int test_enum(void)
 {
        /*
         * TODO 
@@ -165,7 +172,7 @@
        return 0;
 }
 
-int test_array(void)
+static int test_array(void)
 {
        int i, rval;
        avro_schema_t schema = avro_schema_array(avro_schema_int());
@@ -182,7 +189,7 @@
        return 0;
 }
 
-int test_map(void)
+static int test_map(void)
 {
        avro_schema_t schema = avro_schema_map(avro_schema_long());
        avro_datum_t datum = avro_map();
@@ -198,7 +205,7 @@
        return 0;
 }
 
-int test_union(void)
+static int test_union(void)
 {
        /*
         * TODO 
@@ -206,7 +213,7 @@
        return 0;
 }
 
-int test_fixed(void)
+static int test_fixed(void)
 {
        /*
         * TODO 
@@ -240,12 +247,10 @@
        srandom(time(NULL));
        for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) {
                struct avro_tests *test = tests + i;
-               fprintf(stderr, "Running %s tests...\n", test->name);
+               fprintf(stderr, "**** Running %s tests ****\n", test->name);
                if (test->func() != 0) {
-                       fprintf(stderr, "failed!\n");
                        return EXIT_FAILURE;
                }
-               fprintf(stderr, "\t... %s tests passed!\n", test->name);
        }
        return EXIT_SUCCESS;
 }

Modified: hadoop/avro/trunk/lang/c/version.sh
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/version.sh?rev=901950&r1=901949&r2=901950&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/version.sh (original)
+++ hadoop/avro/trunk/lang/c/version.sh Fri Jan 22 00:38:01 2010
@@ -18,9 +18,9 @@
 #         libavro_binary_age = 0
 #         libavro_interface_age = 0
 #
-libavro_micro_version=6
+libavro_micro_version=7
 libavro_interface_age=0
-libavro_binary_age=1
+libavro_binary_age=2
 
 # IGNORE EVERYTHING ELSE FROM HERE DOWN.........
 if test $# != 1; then


Reply via email to