Added: hadoop/avro/trunk/src/test/py/test_datafile.py URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/test/py/test_datafile.py?rev=895732&view=auto ============================================================================== --- hadoop/avro/trunk/src/test/py/test_datafile.py (added) +++ hadoop/avro/trunk/src/test/py/test_datafile.py Mon Jan 4 18:09:42 2010 @@ -0,0 +1,144 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import unittest +from avro import schema +from avro import io +from avro import datafile + +SCHEMAS_TO_VALIDATE = ( + ('"null"', None), + ('"boolean"', True), + ('"string"', unicode('adsfasdf09809dsf-=adsf')), + ('"bytes"', '12345abcd'), + ('"int"', 1234), + ('"long"', 1234), + ('"float"', 1234.0), + ('"double"', 1234.0), + ('{"type": "fixed", "name": "Test", "size": 1}', 'B'), + ('{"type": "enum", "name": "Test", "symbols": ["A", "B"]}', 'B'), + ('{"type": "array", "items": "long"}', [1, 3, 2]), + ('{"type": "map", "values": "long"}', {'a': 1, 'b': 3, 'c': 2}), + ('["string", "null", "long"]', None), + ("""\ + {"type": "record", + "name": "Test", + "fields": [{"name": "f", "type": "long"}]} + """, {'f': 5}), + ("""\ + {"type": "record", + "name": "Lisp", + "fields": [{"name": "value", + "type": ["null", "string", + {"type": "record", + "name": "Cons", + "fields": [{"name": "car", "type": "Lisp"}, + {"name": "cdr", "type": "Lisp"}]}]}]} + """, {'value': {'car': {'value': 'head'}, 'cdr': {'value': None}}}), +) + +FILENAME = 'test_datafile.out' + +# TODO(hammer): clean up written files with ant, not os.remove +class TestDataFile(unittest.TestCase): + def test_round_trip(self): + print '' + print 'TEST ROUND TRIP' + print '===============' + print '' + correct = 0 + for i, (example_schema, datum) in enumerate(SCHEMAS_TO_VALIDATE): + print '' + print 'SCHEMA NUMBER %d' % (i + 1) + print '================' + print '' + print 'Schema: %s' % example_schema + print 'Datum: %s' % datum + + # write data in binary to file 10 times + writer = open(FILENAME, 'wb') + datum_writer = io.DatumWriter() + schema_object = schema.parse(example_schema) + dfw = datafile.DataFileWriter(writer, datum_writer, schema_object) + for i in range(10): + dfw.append(datum) + dfw.close() + + # read data in binary from file + reader = open(FILENAME, 'rb') + datum_reader = io.DatumReader() + dfr = datafile.DataFileReader(reader, datum_reader) + round_trip_data = [] + for datum in dfr: + round_trip_data.append(datum) + + print 'Round Trip Data: %s' % round_trip_data + print 'Round Trip Data Length: %d' % len(round_trip_data) + is_correct = [datum] * 10 == round_trip_data + if is_correct: correct += 1 + print 'Correct Round Trip: %s' % is_correct + print '' + os.remove(FILENAME) + self.assertEquals(correct, len(SCHEMAS_TO_VALIDATE)) + + def test_append(self): + print '' + print 'TEST APPEND' + print '===========' + print '' + correct = 0 + for i, (example_schema, datum) in enumerate(SCHEMAS_TO_VALIDATE): + print '' + print 'SCHEMA NUMBER %d' % (i + 1) + print '================' + print '' + print 'Schema: %s' % example_schema + print 'Datum: %s' % datum + + # write data in binary to file once + writer = open(FILENAME, 'wb') + datum_writer = io.DatumWriter() + schema_object = schema.parse(example_schema) + dfw = datafile.DataFileWriter(writer, datum_writer, schema_object) + dfw.append(datum) + dfw.close() + + # open file, write, and close nine times + for i in range(9): + writer = open(FILENAME, 'ab+') + dfw = datafile.DataFileWriter(writer, io.DatumWriter()) + dfw.append(datum) + dfw.close() + + # read data in binary from file + reader = open(FILENAME, 'rb') + datum_reader = io.DatumReader() + dfr = datafile.DataFileReader(reader, datum_reader) + appended_data = [] + for datum in dfr: + appended_data.append(datum) + + print 'Appended Data: %s' % appended_data + print 'Appended Data Length: %d' % len(appended_data) + is_correct = [datum] * 10 == appended_data + if is_correct: correct += 1 + print 'Correct Appended: %s' % is_correct + print '' + os.remove(FILENAME) + self.assertEquals(correct, len(SCHEMAS_TO_VALIDATE)) + +if __name__ == '__main__': + unittest.main()
Added: hadoop/avro/trunk/src/test/py/test_io.py URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/test/py/test_io.py?rev=895732&view=auto ============================================================================== --- hadoop/avro/trunk/src/test/py/test_io.py (added) +++ hadoop/avro/trunk/src/test/py/test_io.py Mon Jan 4 18:09:42 2010 @@ -0,0 +1,154 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import unittest +import cStringIO +from avro import schema +from avro import io + +SCHEMAS_TO_VALIDATE = ( + ('"null"', None), + ('"boolean"', True), + ('"string"', unicode('adsfasdf09809dsf-=adsf')), + ('"bytes"', '12345abcd'), + ('"int"', 1234), + ('"long"', 1234), + ('"float"', 1234.0), + ('"double"', 1234.0), + ('{"type": "fixed", "name": "Test", "size": 1}', 'B'), + ('{"type": "enum", "name": "Test", "symbols": ["A", "B"]}', 'B'), + ('{"type": "array", "items": "long"}', [1, 3, 2]), + ('{"type": "map", "values": "long"}', {'a': 1, 'b': 3, 'c': 2}), + ('["string", "null", "long"]', None), + ("""\ + {"type": "record", + "name": "Test", + "fields": [{"name": "f", "type": "long"}]} + """, {'f': 5}), + ("""\ + {"type": "record", + "name": "Lisp", + "fields": [{"name": "value", + "type": ["null", "string", + {"type": "record", + "name": "Cons", + "fields": [{"name": "car", "type": "Lisp"}, + {"name": "cdr", "type": "Lisp"}]}]}]} + """, {'value': {'car': {'value': 'head'}, 'cdr': {'value': None}}}), +) + +class TestIO(unittest.TestCase): + def test_validate(self): + print '' + print 'Test Validate' + print '=============' + print '' + passed = 0 + for expected_schema, datum in SCHEMAS_TO_VALIDATE: + print expected_schema, datum + validated = io.validate(schema.parse(expected_schema), datum) + print validated + if validated: passed += 1 + self.assertEquals(passed, len(SCHEMAS_TO_VALIDATE)) + + # TODO(hammer): print bytes in python + def test_encode(self): + print '' + print 'Test Encode' + print '=============' + print '' + + # boolean + writer = cStringIO.StringIO() + string_encoder = io.BinaryEncoder(writer) + string_encoder.write_boolean(True) + print 'Boolean: ' + repr(writer.getvalue()) + + # string + writer = cStringIO.StringIO() + string_encoder = io.BinaryEncoder(writer) + string_encoder.write_utf8(unicode('adsfasdf09809dsf-=adsf')) + print 'String: ' + repr(writer.getvalue()) + + # int + writer = cStringIO.StringIO() + string_encoder = io.BinaryEncoder(writer) + string_encoder.write_int(1) + print 'Int: ' + repr(writer.getvalue()) + + # long + writer = cStringIO.StringIO() + string_encoder = io.BinaryEncoder(writer) + string_encoder.write_long(1) + print 'Long: ' + repr(writer.getvalue()) + + # float + writer = cStringIO.StringIO() + string_encoder = io.BinaryEncoder(writer) + string_encoder.write_float(1.0) + print 'Float: ' + repr(writer.getvalue()) + + # double + writer = cStringIO.StringIO() + string_encoder = io.BinaryEncoder(writer) + string_encoder.write_double(1.0) + print 'Double: ' + repr(writer.getvalue()) + + # bytes + writer = cStringIO.StringIO() + string_encoder = io.BinaryEncoder(writer) + string_encoder.write_bytes('12345abcd') + print 'Bytes: ' + repr(writer.getvalue()) + + def test_decode(self): + pass + + def test_datum_reader(self): + pass + + def test_datum_writer(self): + pass + + def test_round_trip(self): + print '' + print 'TEST ROUND TRIP' + print '===============' + print '' + correct = 0 + for example_schema, datum in SCHEMAS_TO_VALIDATE: + print 'Schema: %s' % example_schema + print 'Datum: %s' % datum + print 'Valid: %s' % io.validate(schema.parse(example_schema), datum) + + # write datum in binary to string buffer + writer = cStringIO.StringIO() + encoder = io.BinaryEncoder(writer) + datum_writer = io.DatumWriter(schema.parse(example_schema)) + datum_writer.write(datum, encoder) + + # read data from string buffer + reader = cStringIO.StringIO(writer.getvalue()) + decoder = io.BinaryDecoder(reader) + datum_reader = io.DatumReader(schema.parse(example_schema)) + round_trip_datum = datum_reader.read(decoder) + + print 'Round Trip Datum: %s' % round_trip_datum + if datum == round_trip_datum: correct += 1 + print 'Correct Round Trip: %s' % (datum == round_trip_datum) + print '' + self.assertEquals(correct, len(SCHEMAS_TO_VALIDATE)) + +if __name__ == '__main__': + unittest.main() Added: hadoop/avro/trunk/src/test/py/test_schema.py URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/test/py/test_schema.py?rev=895732&view=auto ============================================================================== --- hadoop/avro/trunk/src/test/py/test_schema.py (added) +++ hadoop/avro/trunk/src/test/py/test_schema.py Mon Jan 4 18:09:42 2010 @@ -0,0 +1,338 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Test the schema parsing logic. +""" +import unittest +from avro import schema + +class ExampleSchema(object): + def __init__(self, schema_string, valid, name='', comment=''): + self._schema_string = schema_string + self._valid = valid + self._name = name or schema_string # default to schema_string for name + self.comment = comment + + @property + def schema_string(self): + return self._schema_string + + @property + def valid(self): + return self._valid + + @property + def name(self): + return self._name + +# +# Example Schemas +# + +def make_primitive_examples(): + examples = [] + for type in schema.PRIMITIVE_TYPES: + examples.append(ExampleSchema('"%s"' % type, True)) + examples.append(ExampleSchema('{"type": "%s"}' % type, True)) + return examples + +PRIMITIVE_EXAMPLES = [ + ExampleSchema('"True"', False), + ExampleSchema('True', False), + ExampleSchema('{"no_type": "test"}', False), + ExampleSchema('{"type": "panther"}', False), +] + make_primitive_examples() + +FIXED_EXAMPLES = [ + ExampleSchema('{"type": "fixed", "name": "Test", "size": 1}', True), + ExampleSchema("""\ + {"type": "fixed", + "name": "MyFixed", + "namespace": "org.apache.hadoop.avro", + "size": 1} + """, True), + ExampleSchema("""\ + {"type": "fixed", + "name": "Missing size"} + """, False), + ExampleSchema("""\ + {"type": "fixed", + "size": 314} + """, False), +] + +ENUM_EXAMPLES = [ + ExampleSchema('{"type": "enum", "name": "Test", "symbols": ["A", "B"]}', True), + ExampleSchema("""\ + {"type": "enum", + "name": "Status", + "symbols": "Normal Caution Critical"} + """, False), + ExampleSchema("""\ + {"type": "enum", + "name": [ 0, 1, 1, 2, 3, 5, 8 ], + "symbols": ["Golden", "Mean"]} + """, False), + ExampleSchema("""\ + {"type": "enum", + "symbols" : ["I", "will", "fail", "no", "name"]} + """, False), +] + +ARRAY_EXAMPLES = [ + ExampleSchema('{"type": "array", "items": "long"}', True), + ExampleSchema("""\ + {"type": "array", + "items": {"type": "enum", "name": "Test", "symbols": ["A", "B"]}} + """, True), +] + +MAP_EXAMPLES = [ + ExampleSchema('{"type": "map", "values": "long"}', True), + ExampleSchema("""\ + {"type": "map", + "values": {"type": "enum", "name": "Test", "symbols": ["A", "B"]}} + """, True), +] + +UNION_EXAMPLES = [ + ExampleSchema('["string", "null", "long"]', True), + ExampleSchema('["null", "null"]', False), + ExampleSchema("""\ + [{"type": "array", "items": "long"} + {"type": "array", "items": "string"}] + """, False), +] + +RECORD_EXAMPLES = [ + ExampleSchema("""\ + {"type": "record", + "name": "Test", + "fields": [{"name": "f", + "type": "long"}]} + """, True), + ExampleSchema("""\ + {"type": "error", + "name": "Test", + "fields": [{"name": "f", + "type": "long"}]} + """, True), + ExampleSchema("""\ + {"type": "record", + "name": "Node", + "fields": [{"name": "label", "type": "string"}, + {"name": "children", + "type": {"type": "array", "items": "Node"}}]} + """, True), + ExampleSchema("""\ + {"type": "record", + "name": "Lisp", + "fields": [{"name": "value", + "type": ["null", "string", + {"type": "record", + "name": "Cons", + "fields": [{"name": "car", "type": "Lisp"}, + {"name": "cdr", "type": "Lisp"}]}]}]} + """, True), + ExampleSchema("""\ + {"type": "record", + "name": "HandshakeRequest", + "namespace": "org.apache.avro.ipc", + "fields": [{"name": "clientHash", + "type": {"type": "fixed", "name": "MD5", "size": 16}}, + {"name": "clientProtocol", "type": ["null", "string"]}, + {"name": "serverHash", "type": "MD5"}, + {"name": "meta", + "type": ["null", {"type": "map", "values": "bytes"}]}]} + """, True), + ExampleSchema("""\ + {"type": "record", + "name": "HandshakeResponse", + "namespace": "org.apache.avro.ipc", + "fields": [{"name": "match", + "type": {"type": "enum", + "name": "HandshakeMatch", + "symbols": ["BOTH", "CLIENT", "NONE"]}}, + {"name": "serverProtocol", "type": ["null", "string"]}, + {"name": "serverHash", + "type": ["null", + {"name": "MD5", "size": 16, "type": "fixed"}]}, + {"name": "meta", + "type": ["null", {"type": "map", "values": "bytes"}]}]} + """, True), + ExampleSchema("""\ + {"type": "record", + "name": "Interop", + "namespace": "org.apache.avro", + "fields": [{"name": "intField", "type": "int"}, + {"name": "longField", "type": "long"}, + {"name": "stringField", "type": "string"}, + {"name": "boolField", "type": "boolean"}, + {"name": "floatField", "type": "float"}, + {"name": "doubleField", "type": "double"}, + {"name": "bytesField", "type": "bytes"}, + {"name": "nullField", "type": "null"}, + {"name": "arrayField", + "type": {"type": "array", "items": "double"}}, + {"name": "mapField", + "type": {"type": "map", + "values": {"name": "Foo", + "type": "record", + "fields": [{"name": "label", + "type": "string"}]}}}, + {"name": "unionField", + "type": ["boolean", + "double", + {"type": "array", "items": "bytes"}]}, + {"name": "enumField", + "type": {"type": "enum", + "name": "Kind", + "symbols": ["A", "B", "C"]}}, + {"name": "fixedField", + "type": {"type": "fixed", "name": "MD5", "size": 16}}, + {"name": "recordField", + "type": {"type": "record", + "name": "Node", + "fields": [{"name": "label", "type": "string"}, + {"name": "children", + "type": {"type": "array", + "items": "Node"}}]}}]} + """, True), + ExampleSchema("""\ + {"type": "record", + "name": "Address", + "fields": [{"type": "string"}, + {"type": "string", "name": "City"}]} + """, False), + ExampleSchema("""\ + {"type": "record", + "name": "Event", + "fields": [{"name": "Sponsor"}, + {"name": "City", "type": "string"}]} + """, False), + ExampleSchema("""\ + {"type": "record", + "fields": "His vision, from the constantly passing bars," + "name", "Rainer"} + """, False), + ExampleSchema("""\ + {"name": ["Tom", "Jerry"], + "type": "record", + "fields": [{"name": "name", "type": "string"}]} + """, False), +] + +EXAMPLES = PRIMITIVE_EXAMPLES +EXAMPLES += FIXED_EXAMPLES +EXAMPLES += ENUM_EXAMPLES +EXAMPLES += ARRAY_EXAMPLES +EXAMPLES += MAP_EXAMPLES +EXAMPLES += UNION_EXAMPLES +EXAMPLES += RECORD_EXAMPLES + +VALID_EXAMPLES = [e for e in EXAMPLES if e.valid] + +# TODO(hammer): refactor into harness for examples +# TODO(hammer): pretty-print detailed output +# TODO(hammer): make verbose flag +# TODO(hammer): show strack trace to user +# TODO(hammer): use logging module? +class TestSchema(unittest.TestCase): + def test_parse(self): + debug_msg = "\nTEST PARSE\n" + print debug_msg + + num_correct = 0 + for example in EXAMPLES: + try: + schema.parse(example.schema_string) + if example.valid: num_correct += 1 + debug_msg = "%s: PARSE SUCCESS" % example.name + except: + if not example.valid: num_correct += 1 + debug_msg = "%s: PARSE FAILURE" % example.name + finally: + print debug_msg + + fail_msg = "Parse behavior correct on %d out of %d schemas." % \ + (num_correct, len(EXAMPLES)) + self.assertEqual(num_correct, len(EXAMPLES), fail_msg) + + def test_valid_cast_to_string_after_parse(self): + """ + Test that the string generated by an Avro Schema object + is, in fact, a valid Avro schema. + """ + debug_msg = "\nTEST CAST TO STRING\n" + print debug_msg + + num_correct = 0 + for example in VALID_EXAMPLES: + schema_data = schema.parse(example.schema_string) + try: + schema.parse(str(schema_data)) + debug_msg = "%s: STRING CAST SUCCESS" % example.name + num_correct += 1 + except: + debug_msg = "%s: STRING CAST FAILURE" % example.name + finally: + print debug_msg + + fail_msg = "Cast to string success on %d out of %d schemas" % \ + (num_correct, len(VALID_EXAMPLES)) + self.assertEqual(num_correct, len(VALID_EXAMPLES), fail_msg) + + def test_equivalence_after_round_trip(self): + """ + 1. Given a string, parse it to get Avro schema "original". + 2. Serialize "original" to a string and parse that string + to generate Avro schema "round trip". + 3. Ensure "original" and "round trip" schemas are equivalent. + """ + debug_msg = "\nTEST ROUND TRIP\n" + print debug_msg + + num_correct = 0 + for example in VALID_EXAMPLES: + try: + original_schema = schema.parse(example.schema_string) + round_trip_schema = schema.parse(str(original_schema)) + + if original_schema == round_trip_schema: + num_correct += 1 + debug_msg = "%s: ROUND TRIP SUCCESS" % example.name + else: + debug_msg = "%s: ROUND TRIP FAILURE" % example.name + except: + debug_msg = "%s: ROUND TRIP FAILURE" % example.name + finally: + print debug_msg + + fail_msg = "Round trip success on %d out of %d schemas" % \ + (num_correct, len(VALID_EXAMPLES)) + self.assertEqual(num_correct, len(VALID_EXAMPLES), fail_msg) + + # TODO(hammer): more tests + def test_fullname(self): + """Test process for making full names from name, namespace pairs.""" + debug_msg = '\nTEST FULL NAME\n' + print debug_msg + + fullname = schema.Name.make_fullname('a', 'o.a.h') + self.assertEqual(fullname, 'o.a.h.a') + +if __name__ == '__main__': + unittest.main()
