Author: cutting
Date: Mon Sep 19 18:48:54 2011
New Revision: 1172742
URL: http://svn.apache.org/viewvc?rev=1172742&view=rev
Log:
AVRO-858. Python: Add --fields option to 'avro cat' command. Contributed by
Miki Tebeka.
Modified:
avro/trunk/CHANGES.txt
avro/trunk/lang/py/scripts/avro
avro/trunk/lang/py/test/test_script.py
Modified: avro/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/avro/trunk/CHANGES.txt?rev=1172742&r1=1172741&r2=1172742&view=diff
==============================================================================
--- avro/trunk/CHANGES.txt (original)
+++ avro/trunk/CHANGES.txt Mon Sep 19 18:48:54 2011
@@ -54,6 +54,9 @@ Avro 1.6.0 (unreleased)
top-level pom.xml as parent, permitting use of Maven versions
plugin. (cutting)
+ AVRO-858. Python: Add --fields option to 'avro cat' command.
+ (Miki Tebeka via cutting)
+
BUG FIXES
AVRO-824. Java: Fix usage message of BinaryFragmentToJsonTool.
Modified: avro/trunk/lang/py/scripts/avro
URL:
http://svn.apache.org/viewvc/avro/trunk/lang/py/scripts/avro?rev=1172742&r1=1172741&r2=1172742&view=diff
==============================================================================
--- avro/trunk/lang/py/scripts/avro (original)
+++ avro/trunk/lang/py/scripts/avro Mon Sep 19 18:48:54 2011
@@ -62,6 +62,19 @@ def select_printer(format):
def record_match(expr, record):
return eval(expr, None, {"r" : record})
+def parse_fields(fields):
+ fields = fields or ''
+ if not fields.strip():
+ return None
+
+ return [field.strip() for field in fields.split(',') if field.strip()]
+
+def field_selector(fields):
+ fields = set(fields)
+ def keys_filter(obj):
+ return dict((k, obj[k]) for k in (set(obj) & fields))
+ return keys_filter
+
def print_avro(avro, opts):
if opts.header and (opts.format != "csv"):
raise AvroError("--header applies only to CSV format")
@@ -76,6 +89,10 @@ def print_avro(avro, opts):
except StopIteration:
return
+ fields = parse_fields(opts.fields)
+ if fields:
+ avro = imap(field_selector(fields), avro)
+
printer = select_printer(opts.format)
for i, record in enumerate(avro):
if i == 0 and opts.header:
@@ -210,6 +227,8 @@ def main(argv=None):
default=None)
cat_options.add_option("--print-schema", help="print schema",
action="store_true", default=False)
+ cat_options.add_option('--fields', default=None,
+ help='fields to show, comma separated (show all by default)')
parser.add_option_group(cat_options)
# write options
Modified: avro/trunk/lang/py/test/test_script.py
URL:
http://svn.apache.org/viewvc/avro/trunk/lang/py/test/test_script.py?rev=1172742&r1=1172741&r2=1172742&view=diff
==============================================================================
--- avro/trunk/lang/py/test/test_script.py (original)
+++ avro/trunk/lang/py/test/test_script.py Mon Sep 19 18:48:54 2011
@@ -151,6 +151,24 @@ class TestCat(unittest.TestCase):
out = self._run(self.avro_file)
assert len(out) == 2 * NUM_RECORDS
+ def test_fields(self):
+ # One field selection (no comma)
+ out = self._run('--fields', 'last')
+ assert json.loads(out[0]) == {'last': 'duck'}
+
+ # Field selection (with comma and space)
+ out = self._run('--fields', 'first, last')
+ assert json.loads(out[0]) == {'first': 'daffy', 'last': 'duck'}
+
+ # Empty fields should get all
+ out = self._run('--fields', '')
+ assert json.loads(out[0]) == \
+ {'first': 'daffy', 'last': 'duck', 'type': 'duck'}
+
+ # Non existing fields are ignored
+ out = self._run('--fields', 'first,last,age')
+ assert json.loads(out[0]) == {'first': 'daffy', 'last': 'duck'}
+
class TestWrite(unittest.TestCase):
def setUp(self):
self.json_file = tempfile() + ".json"