Pig: correctly compose the key from the marshaller Patch by brandonwilliams, reviewed by xedin for CASSANDRA-3962
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/1ed6065b Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/1ed6065b Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/1ed6065b Branch: refs/heads/cassandra-1.1 Commit: 1ed6065b5b9d87ef6223683e1d45bebbf0463283 Parents: 5cc12f3 Author: Brandon Williams <brandonwilli...@apache.org> Authored: Mon Feb 27 10:52:20 2012 -0600 Committer: Brandon Williams <brandonwilli...@apache.org> Committed: Mon Feb 27 10:52:20 2012 -0600 ---------------------------------------------------------------------- .../cassandra/hadoop/pig/CassandraStorage.java | 5 ++- contrib/pig/test/populate-cli.txt | 15 +++++++++++++ contrib/pig/test/test_storage.pig | 17 +++++++++++++++ 3 files changed, 35 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cassandra/blob/1ed6065b/contrib/pig/src/java/org/apache/cassandra/hadoop/pig/CassandraStorage.java ---------------------------------------------------------------------- diff --git a/contrib/pig/src/java/org/apache/cassandra/hadoop/pig/CassandraStorage.java b/contrib/pig/src/java/org/apache/cassandra/hadoop/pig/CassandraStorage.java index 8863975..a2213f7 100644 --- a/contrib/pig/src/java/org/apache/cassandra/hadoop/pig/CassandraStorage.java +++ b/contrib/pig/src/java/org/apache/cassandra/hadoop/pig/CassandraStorage.java @@ -120,10 +120,11 @@ public class CassandraStorage extends LoadFunc implements StoreFuncInterface, Lo assert key != null && cf != null; // output tuple, will hold the key, each indexed column in a tuple, then a bag of the rest - Tuple tuple = TupleFactory.getInstance().newTuple(); + // NOTE: we're setting the tuple size here only for the key so we can use setTupleValue on it + Tuple tuple = TupleFactory.getInstance().newTuple(1); DefaultDataBag bag = new DefaultDataBag(); // set the key - tuple.append(new DataByteArray(ByteBufferUtil.getArray(key))); + setTupleValue(tuple, 0, getDefaultMarshallers(cfDef).get(2).compose(key)); // we must add all the indexed columns first to match the schema Map<ByteBuffer, Boolean> added = new HashMap<ByteBuffer, Boolean>(); // take care to iterate these in the same order as the schema does http://git-wip-us.apache.org/repos/asf/cassandra/blob/1ed6065b/contrib/pig/test/populate-cli.txt ---------------------------------------------------------------------- diff --git a/contrib/pig/test/populate-cli.txt b/contrib/pig/test/populate-cli.txt index 665fba4..f266ce9 100644 --- a/contrib/pig/test/populate-cli.txt +++ b/contrib/pig/test/populate-cli.txt @@ -1,6 +1,8 @@ create keyspace PigTest; use PigTest; create column family SomeApp with +placement_strategy = 'org.apache.cassandra.locator.SimpleStrategy' and +strategy_options = {replication_factor:1} and key_validation_class = UTF8Type and default_validation_class = LexicalUUIDType and comparator = UTF8Type and @@ -15,6 +17,8 @@ column_metadata = ]; create column family CopyOfSomeApp with +placement_strategy = 'org.apache.cassandra.locator.SimpleStrategy' and +strategy_options = {replication_factor:1} and key_validation_class = UTF8Type and default_validation_class = LexicalUUIDType and comparator = UTF8Type and @@ -65,3 +69,14 @@ set SomeApp['qux']['extra4'] = lexicaluuid(); set SomeApp['qux']['extra5'] = lexicaluuid(); set SomeApp['qux']['extra6'] = lexicaluuid(); set SomeApp['qux']['extra7'] = lexicaluuid(); + +create column family U8 with + key_validation_class = UTF8Type and + comparator = UTF8Type; + +create column family Bytes with + key_validation_class = BytesType and + comparator = UTF8Type; + +set U8['foo']['x'] = ascii('Z'); +set Bytes[ascii('foo')]['x'] = ascii('Z') http://git-wip-us.apache.org/repos/asf/cassandra/blob/1ed6065b/contrib/pig/test/test_storage.pig ---------------------------------------------------------------------- diff --git a/contrib/pig/test/test_storage.pig b/contrib/pig/test/test_storage.pig index 22143dc..7422db4 100644 --- a/contrib/pig/test/test_storage.pig +++ b/contrib/pig/test/test_storage.pig @@ -20,3 +20,20 @@ STORE dislikes_extras INTO 'cassandra://PigTest/CopyOfSomeApp' USING CassandraSt -- filter to fully visible rows (no uuid columns) and dump visible = FILTER rows BY COUNT(columns) == 0; dump visible; + + + +-- test key types with a join +U8 = load 'cassandra://PigTest/U8' using CassandraStorage(); +Bytes = load 'cassandra://PigTest/Bytes' using CassandraStorage(); + +-- cast key to chararray +b = foreach Bytes generate (chararray)key, columns; + +-- key in Bytes is a bytearray, U8 chararray +a = join Bytes by key, U8 by key; +dump a + +-- key should now be cast into a chararray +c = join b by (chararray)key, U8 by (chararray)key; +dump c