Pass pig vars through url params in addition to env vars. Patch by Jeremiah Johnson and brandonwilliams, reviewed by xedin for CASSANDRA-4499
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/686fe1e1 Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/686fe1e1 Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/686fe1e1 Branch: refs/heads/cassandra-1.1 Commit: 686fe1e14565259cfdc533fae352f34bfd35b9f0 Parents: ef8620f Author: Brandon Williams <brandonwilli...@apache.org> Authored: Fri Sep 21 14:58:36 2012 -0500 Committer: Brandon Williams <brandonwilli...@apache.org> Committed: Fri Sep 21 14:58:36 2012 -0500 ---------------------------------------------------------------------- examples/pig/README.txt | 7 +++++-- .../cassandra/hadoop/pig/CassandraStorage.java | 14 +++++++++++--- 2 files changed, 16 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cassandra/blob/686fe1e1/examples/pig/README.txt ---------------------------------------------------------------------- diff --git a/examples/pig/README.txt b/examples/pig/README.txt index 7c61c05..b05d1ef 100644 --- a/examples/pig/README.txt +++ b/examples/pig/README.txt @@ -78,10 +78,13 @@ The following environment variables default to false but can be set to true to e PIG_WIDEROW_INPUT: this enables loading of rows with many columns without incurring memory pressure. All columns will be in a bag and indexes are not - supported. + supported. This can also be set in the LOAD url by adding + the 'widerows=true' parameter. PIG_USE_SECONDARY: this allows easy use of secondary indexes within your script, by appending every index to the schema as 'index_$name', allowing filtering of loaded rows with a statement like "FILTER rows BY index_color eq - 'blue'" if you have an index called 'color' defined. + 'blue'" if you have an index called 'color' defined. This + can also be set in the LOAD url by adding the + 'use_secondary=true' parameter. http://git-wip-us.apache.org/repos/asf/cassandra/blob/686fe1e1/src/java/org/apache/cassandra/hadoop/pig/CassandraStorage.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/hadoop/pig/CassandraStorage.java b/src/java/org/apache/cassandra/hadoop/pig/CassandraStorage.java index f2fad67..434ca7f 100644 --- a/src/java/org/apache/cassandra/hadoop/pig/CassandraStorage.java +++ b/src/java/org/apache/cassandra/hadoop/pig/CassandraStorage.java @@ -475,6 +475,12 @@ public class CassandraStorage extends LoadFunc implements StoreFuncInterface, Lo slice_reverse = Boolean.parseBoolean(urlQuery.get("reversed")); if (urlQuery.containsKey("limit")) limit = Integer.parseInt(urlQuery.get("limit")); + if (urlQuery.containsKey("allow_deletes")) + allow_deletes = Boolean.parseBoolean(urlQuery.get("allow_deletes")); + if (urlQuery.containsKey("widerows")) + widerows = Boolean.parseBoolean(urlQuery.get("widerows")); + if (urlQuery.containsKey("use_secondary")) + usePartitionFilter = Boolean.parseBoolean(urlQuery.get("use_secondary")); } String[] parts = urlParts[0].split("/+"); keyspace = parts[1]; @@ -482,7 +488,7 @@ public class CassandraStorage extends LoadFunc implements StoreFuncInterface, Lo } catch (Exception e) { - throw new IOException("Expected 'cassandra://<keyspace>/<columnfamily>[?slice_start=<start>&slice_end=<end>[&reversed=true][&limit=1]]': " + e.getMessage()); + throw new IOException("Expected 'cassandra://<keyspace>/<columnfamily>[?slice_start=<start>&slice_end=<end>[&reversed=true][&limit=1][&allow_deletes=true][widerows=true][use_secondary=true]]': " + e.getMessage()); } } @@ -928,7 +934,8 @@ public class CassandraStorage extends LoadFunc implements StoreFuncInterface, Lo mutation.deletion.setTimestamp(FBUtilities.timestampMicros()); } else - throw new IOException("null found but deletes are disabled, set " + PIG_ALLOW_DELETES + "=true to enable"); + throw new IOException("null found but deletes are disabled, set " + PIG_ALLOW_DELETES + + "=true in environment or allow_deletes=true in URL to enable"); } else { @@ -970,7 +977,8 @@ public class CassandraStorage extends LoadFunc implements StoreFuncInterface, Lo mutation.deletion.setTimestamp(FBUtilities.timestampMicros()); } else - throw new IOException("SuperColumn deletion attempted with empty bag, but deletes are disabled, set " + PIG_ALLOW_DELETES + "=true to enable"); + throw new IOException("SuperColumn deletion attempted with empty bag, but deletes are disabled, set " + + PIG_ALLOW_DELETES + "=true in environment or allow_deletes=true in URL to enable"); } else {