mikewalch closed pull request #26: Refactored shard example
URL: https://github.com/apache/accumulo-examples/pull/26
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/conf/accumulo-client.properties b/conf/accumulo-client.properties
index eb8757d..48094ec 100644
--- a/conf/accumulo-client.properties
+++ b/conf/accumulo-client.properties
@@ -28,29 +28,20 @@ instance.name=
 ## Zookeeper connection information for Accumulo instance
 instance.zookeepers=localhost:2181
 
-## Zookeeper session timeout (in seconds)
-#instance.zookeepers.timeout.sec=30
+## Zookeeper session timeout
+#instance.zookeepers.timeout=30s
 
 
 ## Authentication properties
 ## --------------
-## Authentication method (i.e password, kerberos, provider). Set more 
properties for chosen method below.
-auth.method=password
+## Authentication method (i.e password, kerberos, PasswordToken, 
KerberosToken, etc)
+auth.type=password
 
-## Accumulo username/principal for chosen authentication method
-auth.username=
+## Accumulo principal/username for chosen authentication method
+auth.principal=root
 
-## Path to Kerberos keytab
-#auth.kerberos.keytab.path=
-
-## Accumulo user password
-auth.password=
-
-## Alias used to extract Accumulo user password from CredentialProvider
-#auth.provider.name=
-
-## Comma separated list of URLs defining CredentialProvider(s)
-#auth.provider.urls=
+## Authentication token (ex. mypassword, /path/to/keytab)
+auth.token=secret
 
 
 ## Batch Writer properties
diff --git a/docs/shard.md b/docs/shard.md
index f63f712..dbae395 100644
--- a/docs/shard.md
+++ b/docs/shard.md
@@ -31,32 +31,25 @@ To run these example programs, create two tables like below.
 
 After creating the tables, index some files. The following command indexes all 
of the java files in the Accumulo source code.
 
-    $ cd /local/username/workspace/accumulo/
-    $ find core/src server/src -name "*.java" | xargs ./bin/runex shard.Index 
-c ./examples.conf -t shard --partitions 30
+    $ find /path/to/accumulo/core -name "*.java" | xargs ./bin/runex 
shard.Index -t shard --partitions 30
 
 The following command queries the index to find all files containing 'foo' and 
'bar'.
 
-    $ ./bin/runex shard.Query -c ./examples.conf -t shard foo bar
+    $ ./bin/runex shard.Query -t shard foo bar
     
/local/username/workspace/accumulo/src/core/src/test/java/accumulo/core/security/ColumnVisibilityTest.java
     
/local/username/workspace/accumulo/src/core/src/test/java/accumulo/core/client/mock/MockConnectorTest.java
     
/local/username/workspace/accumulo/src/core/src/test/java/accumulo/core/security/VisibilityEvaluatorTest.java
-    
/local/username/workspace/accumulo/src/server/src/main/java/accumulo/test/functional/RowDeleteTest.java
-    
/local/username/workspace/accumulo/src/server/src/test/java/accumulo/server/logger/TestLogWriter.java
-    
/local/username/workspace/accumulo/src/server/src/main/java/accumulo/test/functional/DeleteEverythingTest.java
     
/local/username/workspace/accumulo/src/core/src/test/java/accumulo/core/data/KeyExtentTest.java
-    
/local/username/workspace/accumulo/src/server/src/test/java/accumulo/server/constraints/MetadataConstraintsTest.java
     
/local/username/workspace/accumulo/src/core/src/test/java/accumulo/core/iterators/WholeRowIteratorTest.java
-    
/local/username/workspace/accumulo/src/server/src/test/java/accumulo/server/util/DefaultMapTest.java
-    
/local/username/workspace/accumulo/src/server/src/test/java/accumulo/server/tabletserver/InMemoryMapTest.java
 
 In order to run ContinuousQuery, we need to run Reverse.java to populate 
doc2term.
 
-    $ ./bin/runex shard.Reverse -c ./examples.conf --shardTable shard 
--doc2Term doc2term
+    $ ./bin/runex shard.Reverse --shardTable shard --doc2Term doc2term
 
 Below ContinuousQuery is run using 5 terms. So it selects 5 random terms from 
each document, then it continually
 randomly selects one set of 5 terms and queries. It prints the number of 
matching documents and the time in seconds.
 
-    $ ./bin/runex shard.ContinuousQuery -c ./examples.conf --shardTable shard 
--doc2Term doc2term --terms 5
+    $ ./bin/runex shard.ContinuousQuery --shardTable shard --doc2Term doc2term 
--terms 5
     [public, core, class, binarycomparable, b] 2  0.081
     [wordtodelete, unindexdocument, doctablename, putdelete, insert] 1  0.041
     [import, columnvisibilityinterpreterfactory, illegalstateexception, cv, 
columnvisibility] 1  0.049
diff --git 
a/src/main/java/org/apache/accumulo/examples/mapreduce/bulk/BulkIngestExample.java
 
b/src/main/java/org/apache/accumulo/examples/mapreduce/bulk/BulkIngestExample.java
index f87a768..a4ffe10 100644
--- 
a/src/main/java/org/apache/accumulo/examples/mapreduce/bulk/BulkIngestExample.java
+++ 
b/src/main/java/org/apache/accumulo/examples/mapreduce/bulk/BulkIngestExample.java
@@ -22,7 +22,7 @@
 import java.util.Base64;
 import java.util.Collection;
 
-import org.apache.accumulo.core.client.ConnectionInfo;
+import org.apache.accumulo.core.client.ClientInfo;
 import org.apache.accumulo.core.client.Connector;
 import org.apache.accumulo.core.client.mapreduce.AccumuloFileOutputFormat;
 import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat;
@@ -120,9 +120,9 @@ public int run(String[] args) {
       job.setReducerClass(ReduceClass.class);
       job.setOutputFormatClass(AccumuloFileOutputFormat.class);
 
-      Connector connector = 
Connector.builder().usingProperties("conf/accumulo-client.properties").build();
-      ConnectionInfo connectionInfo = 
Connector.builder().usingProperties("conf/accumulo-client.properties").info();
-      AccumuloInputFormat.setConnectionInfo(job, connectionInfo);
+      ClientInfo info = 
Connector.builder().usingProperties("conf/accumulo-client.properties").info();
+      Connector connector = Connector.builder().usingClientInfo(info).build();
+      AccumuloInputFormat.setClientInfo(job, info);
       AccumuloInputFormat.setInputTableName(job, SetupTable.tableName);
       AccumuloInputFormat.setScanAuthorizations(job, Authorizations.EMPTY);
       AccumuloOutputFormat.setCreateTables(job, true);
diff --git 
a/src/main/java/org/apache/accumulo/examples/shard/ContinuousQuery.java 
b/src/main/java/org/apache/accumulo/examples/shard/ContinuousQuery.java
index cabce7a..1371964 100644
--- a/src/main/java/org/apache/accumulo/examples/shard/ContinuousQuery.java
+++ b/src/main/java/org/apache/accumulo/examples/shard/ContinuousQuery.java
@@ -21,7 +21,6 @@
 import java.util.Collections;
 import java.util.Map.Entry;
 import java.util.Random;
-import java.util.concurrent.TimeUnit;
 
 import org.apache.accumulo.core.client.BatchScanner;
 import org.apache.accumulo.core.client.Connector;
@@ -31,8 +30,8 @@
 import org.apache.accumulo.core.data.Range;
 import org.apache.accumulo.core.data.Value;
 import org.apache.accumulo.core.iterators.user.IntersectingIterator;
-import org.apache.accumulo.examples.cli.BatchScannerOpts;
-import org.apache.accumulo.examples.cli.ClientOpts;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.accumulo.examples.cli.Help;
 import org.apache.hadoop.io.Text;
 
 import com.beust.jcommander.Parameter;
@@ -44,51 +43,50 @@
  */
 public class ContinuousQuery {
 
-  static class Opts extends ClientOpts {
+  static class Opts extends Help {
+
     @Parameter(names = "--shardTable", required = true, description = "name of 
the shard table")
     String tableName = null;
+
     @Parameter(names = "--doc2Term", required = true, description = "name of 
the doc2Term table")
     String doc2Term;
+
     @Parameter(names = "--terms", required = true, description = "the number 
of terms in the query")
     int numTerms;
+
     @Parameter(names = "--count", description = "the number of queries to run")
     long iterations = Long.MAX_VALUE;
   }
 
   public static void main(String[] args) throws Exception {
     Opts opts = new Opts();
-    BatchScannerOpts bsOpts = new BatchScannerOpts();
-    opts.parseArgs(ContinuousQuery.class.getName(), args, bsOpts);
+    opts.parseArgs(ContinuousQuery.class.getName(), args);
 
-    Connector conn = opts.getConnector();
+    Connector conn = 
Connector.builder().usingProperties("conf/accumulo-client.properties").build();
 
-    ArrayList<Text[]> randTerms = 
findRandomTerms(conn.createScanner(opts.doc2Term, opts.auths), opts.numTerms);
+    ArrayList<Text[]> randTerms = 
findRandomTerms(conn.createScanner(opts.doc2Term, Authorizations.EMPTY), 
opts.numTerms);
 
     Random rand = new Random();
 
-    BatchScanner bs = conn.createBatchScanner(opts.tableName, opts.auths, 
bsOpts.scanThreads);
-    bs.setTimeout(bsOpts.scanTimeout, TimeUnit.MILLISECONDS);
+    try (BatchScanner bs = conn.createBatchScanner(opts.tableName, 
Authorizations.EMPTY, 5)) {
+      for (long i = 0; i < opts.iterations; i += 1) {
+        Text[] columns = randTerms.get(rand.nextInt(randTerms.size()));
 
-    for (long i = 0; i < opts.iterations; i += 1) {
-      Text[] columns = randTerms.get(rand.nextInt(randTerms.size()));
+        bs.clearScanIterators();
+        bs.clearColumns();
 
-      bs.clearScanIterators();
-      bs.clearColumns();
+        IteratorSetting ii = new IteratorSetting(20, "ii", 
IntersectingIterator.class);
+        IntersectingIterator.setColumnFamilies(ii, columns);
+        bs.addScanIterator(ii);
+        bs.setRanges(Collections.singleton(new Range()));
 
-      IteratorSetting ii = new IteratorSetting(20, "ii", 
IntersectingIterator.class);
-      IntersectingIterator.setColumnFamilies(ii, columns);
-      bs.addScanIterator(ii);
-      bs.setRanges(Collections.singleton(new Range()));
+        long t1 = System.currentTimeMillis();
+        int count = Iterators.size(bs.iterator());
+        long t2 = System.currentTimeMillis();
 
-      long t1 = System.currentTimeMillis();
-      int count = Iterators.size(bs.iterator());
-      long t2 = System.currentTimeMillis();
-
-      System.out.printf("  %s %,d %6.3f%n", Arrays.asList(columns), count, (t2 
- t1) / 1000.0);
+        System.out.printf("  %s %,d %6.3f%n", Arrays.asList(columns), count, 
(t2 - t1) / 1000.0);
+      }
     }
-
-    bs.close();
-
   }
 
   private static ArrayList<Text[]> findRandomTerms(Scanner scanner, int 
numTerms) {
diff --git a/src/main/java/org/apache/accumulo/examples/shard/Index.java 
b/src/main/java/org/apache/accumulo/examples/shard/Index.java
index 92c8a12..5b2d67f 100644
--- a/src/main/java/org/apache/accumulo/examples/shard/Index.java
+++ b/src/main/java/org/apache/accumulo/examples/shard/Index.java
@@ -23,10 +23,12 @@
 import java.util.List;
 
 import org.apache.accumulo.core.client.BatchWriter;
+import org.apache.accumulo.core.client.Connector;
 import org.apache.accumulo.core.data.Mutation;
 import org.apache.accumulo.core.data.Value;
 import org.apache.accumulo.examples.cli.BatchWriterOpts;
 import org.apache.accumulo.examples.cli.ClientOnRequiredTable;
+import org.apache.accumulo.examples.cli.Help;
 import org.apache.hadoop.io.Text;
 
 import com.beust.jcommander.Parameter;
@@ -91,25 +93,31 @@ public static void index(int numPartitions, File src, 
String splitRegex, BatchWr
 
   }
 
-  static class Opts extends ClientOnRequiredTable {
+  static class IndexOpts extends Help {
+
+    @Parameter(names = {"-t", "--table"}, required = true, description = 
"table to use")
+    private String tableName;
+
     @Parameter(names = "--partitions", required = true, description = "the 
number of shards to create")
     int partitions;
+
     @Parameter(required = true, description = "<file> { <file> ... }")
     List<String> files = new ArrayList<>();
   }
 
   public static void main(String[] args) throws Exception {
-    Opts opts = new Opts();
-    BatchWriterOpts bwOpts = new BatchWriterOpts();
-    opts.parseArgs(Index.class.getName(), args, bwOpts);
+    IndexOpts opts = new IndexOpts();
+    opts.parseArgs(Index.class.getName(), args);
 
     String splitRegex = "\\W+";
 
-    BatchWriter bw = 
opts.getConnector().createBatchWriter(opts.getTableName(), 
bwOpts.getBatchWriterConfig());
-    for (String filename : opts.files) {
-      index(opts.partitions, new File(filename), splitRegex, bw);
+    Connector connector = 
Connector.builder().usingProperties("conf/accumulo-client.properties").build();
+
+    try (BatchWriter bw = connector.createBatchWriter(opts.tableName)) {
+      for (String filename : opts.files) {
+        index(opts.partitions, new File(filename), splitRegex, bw);
+      }
     }
-    bw.close();
   }
 
 }
diff --git a/src/main/java/org/apache/accumulo/examples/shard/Query.java 
b/src/main/java/org/apache/accumulo/examples/shard/Query.java
index e7fe30a..16f1530 100644
--- a/src/main/java/org/apache/accumulo/examples/shard/Query.java
+++ b/src/main/java/org/apache/accumulo/examples/shard/Query.java
@@ -20,7 +20,6 @@
 import java.util.Collections;
 import java.util.List;
 import java.util.Map.Entry;
-import java.util.concurrent.TimeUnit;
 
 import org.apache.accumulo.core.client.BatchScanner;
 import org.apache.accumulo.core.client.Connector;
@@ -30,8 +29,8 @@
 import org.apache.accumulo.core.data.Range;
 import org.apache.accumulo.core.data.Value;
 import org.apache.accumulo.core.iterators.user.IntersectingIterator;
-import org.apache.accumulo.examples.cli.BatchScannerOpts;
-import org.apache.accumulo.examples.cli.ClientOnRequiredTable;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.accumulo.examples.cli.Help;
 import org.apache.hadoop.io.Text;
 
 import com.beust.jcommander.Parameter;
@@ -41,10 +40,14 @@
  */
 public class Query {
 
-  static class Opts extends ClientOnRequiredTable {
+  static class QueryOpts extends Help {
+
     @Parameter(description = " term { <term> ... }")
     List<String> terms = new ArrayList<>();
 
+    @Parameter(names = {"-t", "--table"}, required = true, description = 
"table to use")
+    private String tableName;
+
     @Parameter(names = {"--sample"}, description = "Do queries against sample, 
useful when sample is built using column qualifier")
     private boolean useSample = false;
 
@@ -81,21 +84,21 @@
   }
 
   public static void main(String[] args) throws Exception {
-    Opts opts = new Opts();
-    BatchScannerOpts bsOpts = new BatchScannerOpts();
-    opts.parseArgs(Query.class.getName(), args, bsOpts);
-    Connector conn = opts.getConnector();
-    BatchScanner bs = conn.createBatchScanner(opts.getTableName(), opts.auths, 
bsOpts.scanThreads);
-    bs.setTimeout(bsOpts.scanTimeout, TimeUnit.MILLISECONDS);
-    if (opts.useSample) {
-      SamplerConfiguration samplerConfig = 
conn.tableOperations().getSamplerConfiguration(opts.getTableName());
-      
CutoffIntersectingIterator.validateSamplerConfig(conn.tableOperations().getSamplerConfiguration(opts.getTableName()));
-      bs.setSamplerConfiguration(samplerConfig);
-    }
-    for (String entry : query(bs, opts.terms, opts.sampleCutoff))
-      System.out.println("  " + entry);
+    QueryOpts opts = new QueryOpts();
+    opts.parseArgs(Query.class.getName(), args);
 
-    bs.close();
-  }
+    Connector conn = 
Connector.builder().usingProperties("conf/accumulo-client.properties")
+        .build();
 
+    try (BatchScanner bs = conn.createBatchScanner(opts.tableName, 
Authorizations.EMPTY, 10)) {
+      if (opts.useSample) {
+        SamplerConfiguration samplerConfig = 
conn.tableOperations().getSamplerConfiguration(opts.tableName);
+        
CutoffIntersectingIterator.validateSamplerConfig(conn.tableOperations().getSamplerConfiguration(opts.tableName));
+        bs.setSamplerConfiguration(samplerConfig);
+      }
+      for (String entry : query(bs, opts.terms, opts.sampleCutoff)) {
+        System.out.println("  " + entry);
+      }
+    }
+  }
 }
diff --git a/src/main/java/org/apache/accumulo/examples/shard/Reverse.java 
b/src/main/java/org/apache/accumulo/examples/shard/Reverse.java
index 26e7f38..19ef6a4 100644
--- a/src/main/java/org/apache/accumulo/examples/shard/Reverse.java
+++ b/src/main/java/org/apache/accumulo/examples/shard/Reverse.java
@@ -24,8 +24,10 @@
 import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.Mutation;
 import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.security.Authorizations;
 import org.apache.accumulo.examples.cli.BatchWriterOpts;
 import org.apache.accumulo.examples.cli.ClientOpts;
+import org.apache.accumulo.examples.cli.Help;
 import org.apache.accumulo.examples.cli.ScannerOpts;
 import org.apache.hadoop.io.Text;
 
@@ -37,33 +39,29 @@
  */
 public class Reverse {
 
-  static class Opts extends ClientOpts {
+  static class Opts extends Help {
+
     @Parameter(names = "--shardTable")
     String shardTable = "shard";
+
     @Parameter(names = "--doc2Term")
     String doc2TermTable = "doc2Term";
   }
 
   public static void main(String[] args) throws Exception {
     Opts opts = new Opts();
-    ScannerOpts scanOpts = new ScannerOpts();
-    BatchWriterOpts bwOpts = new BatchWriterOpts();
-    opts.parseArgs(Reverse.class.getName(), args, scanOpts, bwOpts);
-
-    Connector conn = opts.getConnector();
+    opts.parseArgs(Reverse.class.getName(), args);
 
-    Scanner scanner = conn.createScanner(opts.shardTable, opts.auths);
-    scanner.setBatchSize(scanOpts.scanBatchSize);
-    BatchWriter bw = conn.createBatchWriter(opts.doc2TermTable, 
bwOpts.getBatchWriterConfig());
+    Connector conn = 
Connector.builder().usingProperties("conf/accumulo-client.properties").build();
 
-    for (Entry<Key,Value> entry : scanner) {
-      Key key = entry.getKey();
-      Mutation m = new Mutation(key.getColumnQualifier());
-      m.put(key.getColumnFamily(), new Text(), new Value(new byte[0]));
-      bw.addMutation(m);
+    try (Scanner scanner = conn.createScanner(opts.shardTable, 
Authorizations.EMPTY);
+         BatchWriter bw = conn.createBatchWriter(opts.doc2TermTable)) {
+      for (Entry<Key, Value> entry : scanner) {
+        Key key = entry.getKey();
+        Mutation m = new Mutation(key.getColumnQualifier());
+        m.put(key.getColumnFamily(), new Text(), new Value(new byte[0]));
+        bw.addMutation(m);
+      }
     }
-
-    bw.close();
-
   }
 }
diff --git a/src/test/java/org/apache/accumulo/examples/ExamplesIT.java 
b/src/test/java/org/apache/accumulo/examples/ExamplesIT.java
index 0087eb4..022ed80 100644
--- a/src/test/java/org/apache/accumulo/examples/ExamplesIT.java
+++ b/src/test/java/org/apache/accumulo/examples/ExamplesIT.java
@@ -129,9 +129,9 @@ public void configureMiniCluster(MiniAccumuloConfigImpl 
cfg, Configuration hadoo
   @Before
   public void getClusterInfo() throws Exception {
     c = getConnector();
-    String user = getAdminPrincipal();
-    String instance = c.getInstance().getInstanceName();
-    String keepers = c.getInstance().getZooKeepers();
+    String user = c.info().getPrincipal();
+    String instance = c.info().getInstanceName();
+    String keepers = c.info().getZooKeepers();
     AuthenticationToken token = getAdminToken();
     if (token instanceof PasswordToken) {
       String passwd = new String(((PasswordToken) 
getAdminToken()).getPassword(), UTF_8);


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to