Author: cutting
Date: Sun Jan 10 17:25:30 2010
New Revision: 897663

URL: http://svn.apache.org/viewvc?rev=897663&view=rev
Log:
AVRO-207.  Fix Python to detect duplicate enum symbols.  Contributed by Jeff 
Hammerbacher.

Modified:
    hadoop/avro/trunk/CHANGES.txt
    hadoop/avro/trunk/src/py/avro/schema.py
    hadoop/avro/trunk/src/test/py/test_schema.py

Modified: hadoop/avro/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/CHANGES.txt?rev=897663&r1=897662&r2=897663&view=diff
==============================================================================
--- hadoop/avro/trunk/CHANGES.txt (original)
+++ hadoop/avro/trunk/CHANGES.txt Sun Jan 10 17:25:30 2010
@@ -274,6 +274,9 @@
 
     AVRO-299. Fix Python numeric promotion.  (Jeff Hammerbacher via cutting)
 
+    AVRO-207. Fix Python to detect duplicate enum symbols and add
+    tests for duplicates in unions.  (Jeff Hammerbacher via cutting)
+
 Avro 1.2.0 (14 October 2009)
 
   INCOMPATIBLE CHANGES

Modified: hadoop/avro/trunk/src/py/avro/schema.py
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/py/avro/schema.py?rev=897663&r1=897662&r2=897663&view=diff
==============================================================================
--- hadoop/avro/trunk/src/py/avro/schema.py (original)
+++ hadoop/avro/trunk/src/py/avro/schema.py Sun Jan 10 17:25:30 2010
@@ -122,7 +122,6 @@
   def set_prop(self, key, value):
     self.props[key] = value
 
-
 class Name(object):
   """Container class for static methods on Avro names."""
   @staticmethod
@@ -304,6 +303,9 @@
     elif False in [isinstance(s, basestring) for s in symbols]:
       fail_msg = 'Enum Schems requires All symbols to be JSON strings.'
       raise AvroException(fail_msg)
+    elif len(set(symbols)) < len(symbols):
+      fail_msg = 'Duplicate symbol: %s' % symbols
+      raise AvroException(fail_msg)
 
     # Call parent ctor
     NamedSchema.__init__(self, 'enum', name, namespace, names)

Modified: hadoop/avro/trunk/src/test/py/test_schema.py
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/test/py/test_schema.py?rev=897663&r1=897662&r2=897663&view=diff
==============================================================================
--- hadoop/avro/trunk/src/test/py/test_schema.py (original)
+++ hadoop/avro/trunk/src/test/py/test_schema.py Sun Jan 10 17:25:30 2010
@@ -19,6 +19,12 @@
 import unittest
 from avro import schema
 
+def print_test_name(test_name):
+  print ''
+  print test_name
+  print '=' * len(test_name)
+  print ''
+
 class ExampleSchema(object):
   def __init__(self, schema_string, valid, name='', comment=''):
     self._schema_string = schema_string
@@ -90,6 +96,11 @@
     {"type": "enum",
      "symbols" : ["I", "will", "fail", "no", "name"]}
     """, False),
+  ExampleSchema("""\
+    {"type": "enum",
+     "name": "Test"
+     "symbols" : ["AA", "AA"]}
+    """, False),
 ]
 
 ARRAY_EXAMPLES = [
@@ -111,6 +122,7 @@
 UNION_EXAMPLES = [
   ExampleSchema('["string", "null", "long"]', True),
   ExampleSchema('["null", "null"]', False),
+  ExampleSchema('["long", "long"]', False),
   ExampleSchema("""\
     [{"type": "array", "items": "long"}
      {"type": "array", "items": "string"}]
@@ -252,48 +264,44 @@
 # TODO(hammer): use logging module?
 class TestSchema(unittest.TestCase):
   def test_parse(self):
-    debug_msg = "\nTEST PARSE\n"
-    print debug_msg
-
-    num_correct = 0
+    print_test_name('TEST PARSE')
+    correct = 0
     for example in EXAMPLES:
       try:
         schema.parse(example.schema_string)
-        if example.valid: num_correct += 1
+        if example.valid: correct += 1
         debug_msg = "%s: PARSE SUCCESS" % example.name
       except:
-        if not example.valid: num_correct += 1
+        if not example.valid: correct += 1
         debug_msg = "%s: PARSE FAILURE" % example.name
       finally:
         print debug_msg
 
     fail_msg = "Parse behavior correct on %d out of %d schemas." % \
-      (num_correct, len(EXAMPLES))
-    self.assertEqual(num_correct, len(EXAMPLES), fail_msg)
+      (correct, len(EXAMPLES))
+    self.assertEqual(correct, len(EXAMPLES), fail_msg)
 
   def test_valid_cast_to_string_after_parse(self):
     """
     Test that the string generated by an Avro Schema object
     is, in fact, a valid Avro schema.
     """
-    debug_msg = "\nTEST CAST TO STRING\n"
-    print debug_msg
-
-    num_correct = 0
+    print_test_name('TEST CAST TO STRING AFTER PARSE')
+    correct = 0
     for example in VALID_EXAMPLES:
       schema_data = schema.parse(example.schema_string)
       try:
         schema.parse(str(schema_data))
         debug_msg = "%s: STRING CAST SUCCESS" % example.name
-        num_correct += 1
+        correct += 1
       except:
         debug_msg = "%s: STRING CAST FAILURE" % example.name
       finally:
         print debug_msg
 
     fail_msg = "Cast to string success on %d out of %d schemas" % \
-      (num_correct, len(VALID_EXAMPLES))
-    self.assertEqual(num_correct, len(VALID_EXAMPLES), fail_msg)
+      (correct, len(VALID_EXAMPLES))
+    self.assertEqual(correct, len(VALID_EXAMPLES), fail_msg)
 
   def test_equivalence_after_round_trip(self):
     """
@@ -302,17 +310,14 @@
          to generate Avro schema "round trip".
     3. Ensure "original" and "round trip" schemas are equivalent.
     """
-    debug_msg = "\nTEST ROUND TRIP\n"
-    print debug_msg
-
-    num_correct = 0
+    print_test_name('TEST ROUND TRIP')
+    correct = 0
     for example in VALID_EXAMPLES:
       try:
         original_schema = schema.parse(example.schema_string)
         round_trip_schema = schema.parse(str(original_schema))
-
         if original_schema == round_trip_schema:
-          num_correct += 1
+          correct += 1
           debug_msg = "%s: ROUND TRIP SUCCESS" % example.name
         else:       
           debug_msg = "%s: ROUND TRIP FAILURE" % example.name
@@ -322,15 +327,39 @@
         print debug_msg
 
     fail_msg = "Round trip success on %d out of %d schemas" % \
-      (num_correct, len(VALID_EXAMPLES))
-    self.assertEqual(num_correct, len(VALID_EXAMPLES), fail_msg)
+      (correct, len(VALID_EXAMPLES))
+    self.assertEqual(correct, len(VALID_EXAMPLES), fail_msg)
 
   # TODO(hammer): more tests
   def test_fullname(self):
-    """Test process for making full names from name, namespace pairs."""
-    debug_msg = '\nTEST FULL NAME\n'
-    print debug_msg
-
+    """
+    The fullname is determined in one of the following ways:
+     * A name and namespace are both specified.  For example,
+       one might use "name": "X", "namespace": "org.foo"
+       to indicate the fullname "org.foo.X".
+     * A fullname is specified.  If the name specified contains
+       a dot, then it is assumed to be a fullname, and any
+       namespace also specified is ignored.  For example,
+       use "name": "org.foo.X" to indicate the
+       fullname "org.foo.X".
+     * A name only is specified, i.e., a name that contains no
+       dots.  In this case the namespace is taken from the most
+       tightly encosing schema or protocol.  For example,
+       if "name": "X" is specified, and this occurs
+       within a field of the record definition
+       of "org.foo.Y", then the fullname is "org.foo.X".
+
+    References to previously defined names are as in the latter
+    two cases above: if they contain a dot they are a fullname, if
+    they do not contain a dot, the namespace is the namespace of
+    the enclosing definition.
+
+    Primitive type names have no namespace and their names may
+    not be defined in any namespace.  A schema may only contain
+    multiple definitions of a fullname if the definitions are
+    equivalent.
+    """
+    print_test_name('TEST FULLNAME')
     fullname = schema.Name.make_fullname('a', 'o.a.h')
     self.assertEqual(fullname, 'o.a.h.a')
 


Reply via email to