keith-turner closed pull request #737: Added Authorization Summarizer
URL: https://github.com/apache/accumulo/pull/737
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git 
a/core/src/main/java/org/apache/accumulo/core/client/summary/summarizers/AuthorizationSummarizer.java
 
b/core/src/main/java/org/apache/accumulo/core/client/summary/summarizers/AuthorizationSummarizer.java
new file mode 100644
index 0000000000..efee030210
--- /dev/null
+++ 
b/core/src/main/java/org/apache/accumulo/core/client/summary/summarizers/AuthorizationSummarizer.java
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.accumulo.core.client.summary.summarizers;
+
+import java.util.HashSet;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.Set;
+import java.util.function.Consumer;
+
+import org.apache.accumulo.core.client.admin.TableOperations;
+import org.apache.accumulo.core.client.summary.CountingSummarizer;
+import org.apache.accumulo.core.data.ArrayByteSequence;
+import org.apache.accumulo.core.data.ByteSequence;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.security.ColumnVisibility;
+import org.apache.accumulo.core.security.ColumnVisibility.Node;
+
+/**
+ * Counts unique authorizations in column visibility labels. Leverages super 
class to defend against
+ * too many. This class is useful for discovering what authorizations are 
present when the expected
+ * number of authorizations is small.
+ *
+ * <p>
+ * As an example, assume a data set of three keys with the column visibilities 
:
+ * {@code (A&C)|(A&D)}, {@code A&B}, and {@code C|E}. For these input this 
summarizer would output :
+ * {@code c:A=2}, {@code c:B=1}, {@code c:C=2}, {@code D:1}, {@code E:1}. 
Notice that even though
+ * {@code A} occurred 3 times in total, its only counted once per column 
visibility.
+ *
+ * <p>
+ * See the superclass documentation for more information about usage and 
configuration.
+ *
+ * @since 2.0.0
+ *
+ * @see VisibilitySummarizer
+ * @see TableOperations#addSummarizers(String,
+ *      org.apache.accumulo.core.client.summary.SummarizerConfiguration...)
+ * @see TableOperations#summaries(String)
+ */
+public class AuthorizationSummarizer extends CountingSummarizer<ByteSequence> {
+
+  @Override
+  protected Converter<ByteSequence> converter() {
+    return new AuthsConverter();
+  }
+
+  private static class AuthsConverter implements Converter<ByteSequence> {
+
+    final int MAX_ENTRIES = 1000;
+    private Map<ByteSequence,Set<ByteSequence>> cache = new 
LinkedHashMap<ByteSequence,Set<ByteSequence>>(
+        MAX_ENTRIES + 1, .75F, true) {
+      private static final long serialVersionUID = 1L;
+
+      // This method is called just after a new entry has been added
+      @Override
+      public boolean 
removeEldestEntry(Map.Entry<ByteSequence,Set<ByteSequence>> eldest) {
+        return size() > MAX_ENTRIES;
+      }
+    };
+
+    @Override
+    public void convert(Key k, Value v, Consumer<ByteSequence> consumer) {
+      ByteSequence vis = k.getColumnVisibilityData();
+
+      if (vis.length() > 0) {
+        Set<ByteSequence> auths = cache.get(vis);
+        if (auths == null) {
+          auths = findAuths(vis);
+          cache.put(new ArrayByteSequence(vis), auths);
+        }
+
+        for (ByteSequence auth : auths) {
+          consumer.accept(auth);
+        }
+      }
+    }
+
+    private Set<ByteSequence> findAuths(ByteSequence vis) {
+      HashSet<ByteSequence> auths = new HashSet<>();
+      byte[] expression = vis.toArray();
+      Node root = new ColumnVisibility(expression).getParseTree();
+
+      findAuths(root, expression, auths);
+
+      return auths;
+    }
+
+    private void findAuths(Node node, byte[] expression, HashSet<ByteSequence> 
auths) {
+      switch (node.getType()) {
+        case AND:
+        case OR:
+          for (Node child : node.getChildren()) {
+            findAuths(child, expression, auths);
+          }
+          break;
+        case TERM:
+          auths.add(node.getTerm(expression));
+          break;
+        case EMPTY:
+          break;
+        default:
+          throw new IllegalArgumentException("Unknown node type " + 
node.getType());
+      }
+    }
+  }
+}
diff --git 
a/core/src/main/java/org/apache/accumulo/core/client/summary/summarizers/VisibilitySummarizer.java
 
b/core/src/main/java/org/apache/accumulo/core/client/summary/summarizers/VisibilitySummarizer.java
index 630d42b357..927c9c82f3 100644
--- 
a/core/src/main/java/org/apache/accumulo/core/client/summary/summarizers/VisibilitySummarizer.java
+++ 
b/core/src/main/java/org/apache/accumulo/core/client/summary/summarizers/VisibilitySummarizer.java
@@ -31,6 +31,7 @@
  *
  * @since 2.0.0
  *
+ * @see AuthorizationSummarizer
  * @see TableOperations#addSummarizers(String,
  *      org.apache.accumulo.core.client.summary.SummarizerConfiguration...)
  * @see TableOperations#summaries(String)
diff --git 
a/core/src/test/java/org/apache/accumulo/core/client/summary/summarizers/AuthorizationSummarizerTest.java
 
b/core/src/test/java/org/apache/accumulo/core/client/summary/summarizers/AuthorizationSummarizerTest.java
new file mode 100644
index 0000000000..5dee90aa23
--- /dev/null
+++ 
b/core/src/test/java/org/apache/accumulo/core/client/summary/summarizers/AuthorizationSummarizerTest.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.accumulo.core.client.summary.summarizers;
+
+import static 
org.apache.accumulo.core.client.summary.CountingSummarizer.COUNTER_STAT_PREFIX;
+import static 
org.apache.accumulo.core.client.summary.CountingSummarizer.DELETES_IGNORED_STAT;
+import static 
org.apache.accumulo.core.client.summary.CountingSummarizer.EMITTED_STAT;
+import static 
org.apache.accumulo.core.client.summary.CountingSummarizer.SEEN_STAT;
+import static 
org.apache.accumulo.core.client.summary.CountingSummarizer.TOO_LONG_STAT;
+import static 
org.apache.accumulo.core.client.summary.CountingSummarizer.TOO_MANY_STAT;
+import static org.junit.Assert.assertEquals;
+
+import java.util.HashMap;
+
+import org.apache.accumulo.core.client.summary.Summarizer.Collector;
+import org.apache.accumulo.core.client.summary.SummarizerConfiguration;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Value;
+import org.junit.Test;
+
+public class AuthorizationSummarizerTest {
+
+  private static final Value EV = new Value();
+
+  @Test
+  public void testBasic() {
+    SummarizerConfiguration sc = 
SummarizerConfiguration.builder(AuthorizationSummarizer.class)
+        .build();
+    AuthorizationSummarizer authSummarizer = new AuthorizationSummarizer();
+
+    Collector collector = authSummarizer.collector(sc);
+
+    collector.accept(new Key("r", "f", "q", ""), EV);
+    collector.accept(new Key("r", "f", "q", "A"), EV);
+    collector.accept(new Key("r", "f", "q", "B"), EV);
+    collector.accept(new Key("r", "f", "q", "A&B"), EV);
+    collector.accept(new Key("r", "f", "q", "(C|D)&(A|B)"), EV);
+    collector.accept(new Key("r", "f", "q", "(C|D)&(A|B)"), EV);
+    collector.accept(new Key("r", "f", "q", "(D&E)|(D&C&F)"), EV);
+
+    HashMap<String,Long> actual = new HashMap<>();
+    collector.summarize(actual::put);
+
+    String p = COUNTER_STAT_PREFIX;
+
+    HashMap<String,Long> expected = new HashMap<>();
+    expected.put(p + "A", 4L);
+    expected.put(p + "B", 4L);
+    expected.put(p + "C", 3L);
+    expected.put(p + "D", 3L);
+    expected.put(p + "E", 1L);
+    expected.put(p + "F", 1L);
+    expected.put(TOO_LONG_STAT, 0L);
+    expected.put(TOO_MANY_STAT, 0L);
+    expected.put(SEEN_STAT, 7L);
+    expected.put(EMITTED_STAT, 16L);
+    expected.put(DELETES_IGNORED_STAT, 0L);
+
+    assertEquals(expected, actual);
+  }
+}


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to