Repository: sentry Updated Branches: refs/heads/master a41a1447b -> 201808de0
SENTRY-1892: Reduce memory consumption of HMSPath$Entry and TPathEntry (Misha Dmitriev, reviewed by Alex Kolbasov and Vadim Spector) Project: http://git-wip-us.apache.org/repos/asf/sentry/repo Commit: http://git-wip-us.apache.org/repos/asf/sentry/commit/201808de Tree: http://git-wip-us.apache.org/repos/asf/sentry/tree/201808de Diff: http://git-wip-us.apache.org/repos/asf/sentry/diff/201808de Branch: refs/heads/master Commit: 201808de059b9cda38a34c3aefb4253b402b891d Parents: a41a144 Author: Alexander Kolbasov <ako...@gmail.com> Authored: Wed Aug 23 17:02:58 2017 -0700 Committer: Alexander Kolbasov <ako...@gmail.com> Committed: Wed Aug 23 17:02:58 2017 -0700 ---------------------------------------------------------------------- .../sentry/hdfs/service/thrift/TPathEntry.java | 76 ++++++------- .../java/org/apache/sentry/hdfs/HMSPaths.java | 106 ++++++++++++++----- .../org/apache/sentry/hdfs/HMSPathsDumper.java | 28 +++-- .../main/resources/sentry_hdfs_service.thrift | 4 +- .../org/apache/sentry/hdfs/TestHMSPaths.java | 13 +++ 5 files changed, 157 insertions(+), 70 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/sentry/blob/201808de/sentry-hdfs/sentry-hdfs-common/src/gen/thrift/gen-javabean/org/apache/sentry/hdfs/service/thrift/TPathEntry.java ---------------------------------------------------------------------- diff --git a/sentry-hdfs/sentry-hdfs-common/src/gen/thrift/gen-javabean/org/apache/sentry/hdfs/service/thrift/TPathEntry.java b/sentry-hdfs/sentry-hdfs-common/src/gen/thrift/gen-javabean/org/apache/sentry/hdfs/service/thrift/TPathEntry.java index 2a89368..210d219 100644 --- a/sentry-hdfs/sentry-hdfs-common/src/gen/thrift/gen-javabean/org/apache/sentry/hdfs/service/thrift/TPathEntry.java +++ b/sentry-hdfs/sentry-hdfs-common/src/gen/thrift/gen-javabean/org/apache/sentry/hdfs/service/thrift/TPathEntry.java @@ -34,14 +34,14 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; @SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)", date = "2017-04-26") +@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)", date = "2017-08-21") public class TPathEntry implements org.apache.thrift.TBase<TPathEntry, TPathEntry._Fields>, java.io.Serializable, Cloneable, Comparable<TPathEntry> { private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TPathEntry"); private static final org.apache.thrift.protocol.TField TYPE_FIELD_DESC = new org.apache.thrift.protocol.TField("type", org.apache.thrift.protocol.TType.BYTE, (short)1); private static final org.apache.thrift.protocol.TField PATH_ELEMENT_FIELD_DESC = new org.apache.thrift.protocol.TField("pathElement", org.apache.thrift.protocol.TType.STRING, (short)2); - private static final org.apache.thrift.protocol.TField CHILDREN_FIELD_DESC = new org.apache.thrift.protocol.TField("children", org.apache.thrift.protocol.TType.SET, (short)4); - private static final org.apache.thrift.protocol.TField AUTHZ_OBJS_FIELD_DESC = new org.apache.thrift.protocol.TField("authzObjs", org.apache.thrift.protocol.TType.SET, (short)5); + private static final org.apache.thrift.protocol.TField CHILDREN_FIELD_DESC = new org.apache.thrift.protocol.TField("children", org.apache.thrift.protocol.TType.LIST, (short)4); + private static final org.apache.thrift.protocol.TField AUTHZ_OBJS_FIELD_DESC = new org.apache.thrift.protocol.TField("authzObjs", org.apache.thrift.protocol.TType.LIST, (short)5); private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>(); static { @@ -51,8 +51,8 @@ public class TPathEntry implements org.apache.thrift.TBase<TPathEntry, TPathEntr private byte type; // required private String pathElement; // required - private Set<Integer> children; // required - private Set<String> authzObjs; // optional + private List<Integer> children; // required + private List<String> authzObjs; // optional /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ public enum _Fields implements org.apache.thrift.TFieldIdEnum { @@ -133,10 +133,10 @@ public class TPathEntry implements org.apache.thrift.TBase<TPathEntry, TPathEntr tmpMap.put(_Fields.PATH_ELEMENT, new org.apache.thrift.meta_data.FieldMetaData("pathElement", org.apache.thrift.TFieldRequirementType.REQUIRED, new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); tmpMap.put(_Fields.CHILDREN, new org.apache.thrift.meta_data.FieldMetaData("children", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.SetMetaData(org.apache.thrift.protocol.TType.SET, + new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32)))); tmpMap.put(_Fields.AUTHZ_OBJS, new org.apache.thrift.meta_data.FieldMetaData("authzObjs", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.SetMetaData(org.apache.thrift.protocol.TType.SET, + new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)))); metaDataMap = Collections.unmodifiableMap(tmpMap); org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TPathEntry.class, metaDataMap); @@ -148,7 +148,7 @@ public class TPathEntry implements org.apache.thrift.TBase<TPathEntry, TPathEntr public TPathEntry( byte type, String pathElement, - Set<Integer> children) + List<Integer> children) { this(); this.type = type; @@ -167,11 +167,11 @@ public class TPathEntry implements org.apache.thrift.TBase<TPathEntry, TPathEntr this.pathElement = other.pathElement; } if (other.isSetChildren()) { - Set<Integer> __this__children = new HashSet<Integer>(other.children); + List<Integer> __this__children = new ArrayList<Integer>(other.children); this.children = __this__children; } if (other.isSetAuthzObjs()) { - Set<String> __this__authzObjs = new HashSet<String>(other.authzObjs); + List<String> __this__authzObjs = new ArrayList<String>(other.authzObjs); this.authzObjs = __this__authzObjs; } } @@ -244,16 +244,16 @@ public class TPathEntry implements org.apache.thrift.TBase<TPathEntry, TPathEntr public void addToChildren(int elem) { if (this.children == null) { - this.children = new HashSet<Integer>(); + this.children = new ArrayList<Integer>(); } this.children.add(elem); } - public Set<Integer> getChildren() { + public List<Integer> getChildren() { return this.children; } - public void setChildren(Set<Integer> children) { + public void setChildren(List<Integer> children) { this.children = children; } @@ -282,16 +282,16 @@ public class TPathEntry implements org.apache.thrift.TBase<TPathEntry, TPathEntr public void addToAuthzObjs(String elem) { if (this.authzObjs == null) { - this.authzObjs = new HashSet<String>(); + this.authzObjs = new ArrayList<String>(); } this.authzObjs.add(elem); } - public Set<String> getAuthzObjs() { + public List<String> getAuthzObjs() { return this.authzObjs; } - public void setAuthzObjs(Set<String> authzObjs) { + public void setAuthzObjs(List<String> authzObjs) { this.authzObjs = authzObjs; } @@ -332,7 +332,7 @@ public class TPathEntry implements org.apache.thrift.TBase<TPathEntry, TPathEntr if (value == null) { unsetChildren(); } else { - setChildren((Set<Integer>)value); + setChildren((List<Integer>)value); } break; @@ -340,7 +340,7 @@ public class TPathEntry implements org.apache.thrift.TBase<TPathEntry, TPathEntr if (value == null) { unsetAuthzObjs(); } else { - setAuthzObjs((Set<String>)value); + setAuthzObjs((List<String>)value); } break; @@ -634,17 +634,17 @@ public class TPathEntry implements org.apache.thrift.TBase<TPathEntry, TPathEntr } break; case 4: // CHILDREN - if (schemeField.type == org.apache.thrift.protocol.TType.SET) { + if (schemeField.type == org.apache.thrift.protocol.TType.LIST) { { - org.apache.thrift.protocol.TSet _set32 = iprot.readSetBegin(); - struct.children = new HashSet<Integer>(2*_set32.size); + org.apache.thrift.protocol.TList _list32 = iprot.readListBegin(); + struct.children = new ArrayList<Integer>(_list32.size); int _elem33; - for (int _i34 = 0; _i34 < _set32.size; ++_i34) + for (int _i34 = 0; _i34 < _list32.size; ++_i34) { _elem33 = iprot.readI32(); struct.children.add(_elem33); } - iprot.readSetEnd(); + iprot.readListEnd(); } struct.setChildrenIsSet(true); } else { @@ -652,17 +652,17 @@ public class TPathEntry implements org.apache.thrift.TBase<TPathEntry, TPathEntr } break; case 5: // AUTHZ_OBJS - if (schemeField.type == org.apache.thrift.protocol.TType.SET) { + if (schemeField.type == org.apache.thrift.protocol.TType.LIST) { { - org.apache.thrift.protocol.TSet _set35 = iprot.readSetBegin(); - struct.authzObjs = new HashSet<String>(2*_set35.size); + org.apache.thrift.protocol.TList _list35 = iprot.readListBegin(); + struct.authzObjs = new ArrayList<String>(_list35.size); String _elem36; - for (int _i37 = 0; _i37 < _set35.size; ++_i37) + for (int _i37 = 0; _i37 < _list35.size; ++_i37) { _elem36 = iprot.readString(); struct.authzObjs.add(_elem36); } - iprot.readSetEnd(); + iprot.readListEnd(); } struct.setAuthzObjsIsSet(true); } else { @@ -693,12 +693,12 @@ public class TPathEntry implements org.apache.thrift.TBase<TPathEntry, TPathEntr if (struct.children != null) { oprot.writeFieldBegin(CHILDREN_FIELD_DESC); { - oprot.writeSetBegin(new org.apache.thrift.protocol.TSet(org.apache.thrift.protocol.TType.I32, struct.children.size())); + oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.I32, struct.children.size())); for (int _iter38 : struct.children) { oprot.writeI32(_iter38); } - oprot.writeSetEnd(); + oprot.writeListEnd(); } oprot.writeFieldEnd(); } @@ -706,12 +706,12 @@ public class TPathEntry implements org.apache.thrift.TBase<TPathEntry, TPathEntr if (struct.isSetAuthzObjs()) { oprot.writeFieldBegin(AUTHZ_OBJS_FIELD_DESC); { - oprot.writeSetBegin(new org.apache.thrift.protocol.TSet(org.apache.thrift.protocol.TType.STRING, struct.authzObjs.size())); + oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, struct.authzObjs.size())); for (String _iter39 : struct.authzObjs) { oprot.writeString(_iter39); } - oprot.writeSetEnd(); + oprot.writeListEnd(); } oprot.writeFieldEnd(); } @@ -766,10 +766,10 @@ public class TPathEntry implements org.apache.thrift.TBase<TPathEntry, TPathEntr struct.pathElement = iprot.readString(); struct.setPathElementIsSet(true); { - org.apache.thrift.protocol.TSet _set42 = new org.apache.thrift.protocol.TSet(org.apache.thrift.protocol.TType.I32, iprot.readI32()); - struct.children = new HashSet<Integer>(2*_set42.size); + org.apache.thrift.protocol.TList _list42 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.I32, iprot.readI32()); + struct.children = new ArrayList<Integer>(_list42.size); int _elem43; - for (int _i44 = 0; _i44 < _set42.size; ++_i44) + for (int _i44 = 0; _i44 < _list42.size; ++_i44) { _elem43 = iprot.readI32(); struct.children.add(_elem43); @@ -779,10 +779,10 @@ public class TPathEntry implements org.apache.thrift.TBase<TPathEntry, TPathEntr BitSet incoming = iprot.readBitSet(1); if (incoming.get(0)) { { - org.apache.thrift.protocol.TSet _set45 = new org.apache.thrift.protocol.TSet(org.apache.thrift.protocol.TType.STRING, iprot.readI32()); - struct.authzObjs = new HashSet<String>(2*_set45.size); + org.apache.thrift.protocol.TList _list45 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, iprot.readI32()); + struct.authzObjs = new ArrayList<String>(_list45.size); String _elem46; - for (int _i47 = 0; _i47 < _set45.size; ++_i47) + for (int _i47 = 0; _i47 < _list45.size; ++_i47) { _elem46 = iprot.readString(); struct.authzObjs.add(_elem46); http://git-wip-us.apache.org/repos/asf/sentry/blob/201808de/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPaths.java ---------------------------------------------------------------------- diff --git a/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPaths.java b/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPaths.java index 6e71561..5a98263 100644 --- a/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPaths.java +++ b/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPaths.java @@ -132,10 +132,10 @@ public class HMSPaths implements AuthzPaths { private EntryType type; private String pathElement; - // A set of authorizable objects associated with this entry. Authorizable - // object should be case insensitive. The set is allocated lazily to avoid - // wasting memory due to empty sets. - private Set<String> authzObjs; + // A set (or single object when set size is 1) of authorizable objects associated + // with this entry. Authorizable object should be case insensitive. The set is + // allocated lazily to avoid wasting memory due to empty sets. + private Object authzObjs; // Path of child element to the path entry mapping, e.g. 'b' -> '/a/b' // This is allocated lazily to avoid wasting memory due to empty maps. @@ -161,9 +161,9 @@ public class HMSPaths implements AuthzPaths { * @param parent the parent node. If not specified, this entry is a root entry. * @param pathElement the path element of this entry on the tree. * @param type entry type. - * @param authzObjs a set of authz objects. + * @param authzObjs a collection of authz objects. */ - Entry(Entry parent, String pathElement, EntryType type, Set<String> authzObjs) { + Entry(Entry parent, String pathElement, EntryType type, Collection<String> authzObjs) { this.parent = parent; this.type = type; this.pathElement = pathElement.intern(); @@ -204,26 +204,42 @@ public class HMSPaths implements AuthzPaths { void removeAuthzObj(String authzObj) { if (authzObjs != null) { - authzObjs.remove(authzObj); + if (authzObjs instanceof Set) { + Set<String> authzObjsSet = (Set<String>) authzObjs; + authzObjsSet.remove(authzObj); + if (authzObjsSet.size() == 1) { + authzObjs = authzObjsSet.iterator().next(); + } + } else if (authzObjs.equals(authzObj)){ + authzObjs = null; + } } } void addAuthzObj(String authzObj) { if (authzObj != null) { if (authzObjs == null) { - authzObjs = new TreeSet<>(String.CASE_INSENSITIVE_ORDER); + authzObjs = authzObj; + } else { + Set<String> authzObjsSet; + if (authzObjs instanceof String) { + if (authzObjs.equals(authzObj)) { + return; + } else { + authzObjs = authzObjsSet = newTreeSetWithElement((String) authzObjs); + } + } else { + authzObjsSet = (Set) authzObjs; + } + authzObjsSet.add(authzObj.intern()); } - authzObjs.add(authzObj.intern()); } } - void addAuthzObjs(Set<String> authzObjs) { + void addAuthzObjs(Collection<String> authzObjs) { if (authzObjs != null) { - if (this.authzObjs == null) { - this.authzObjs = new TreeSet<>(String.CASE_INSENSITIVE_ORDER); - } for (String authzObj : authzObjs) { - this.authzObjs.add(authzObj.intern()); + addAuthzObj(authzObj.intern()); } } } @@ -238,7 +254,17 @@ public class HMSPaths implements AuthzPaths { public String toString() { return String.format("Entry[fullPath: %s, type: %s, authObject: %s]", - getFullPath(), type, authzObjs != null ? Joiner.on(",").join(authzObjs) : ""); + getFullPath(), type, authzObjsToString()); + } + + private String authzObjsToString() { + if (authzObjs == null) { + return ""; + } else if (authzObjs instanceof String) { + return (String) authzObjs; + } else { + return Joiner.on(",").join((Set) authzObjs); + } } @Override @@ -420,9 +446,9 @@ public class HMSPaths implements AuthzPaths { // entry no longer maps to any authzObj, removes the // entry recursively. if (authzObjs != null) { - authzObjs.remove(authzObj); + removeAuthzObj(authzObj); } - if (authzObjs == null || authzObjs.isEmpty()) { + if (authzObjs == null) { deleteFromParent(); } } else { @@ -433,7 +459,7 @@ public class HMSPaths implements AuthzPaths { if (getType() == EntryType.AUTHZ_OBJECT) { setType(EntryType.DIR); if (authzObjs != null) { - authzObjs.remove(authzObj); + removeAuthzObj(authzObj); } } } @@ -497,10 +523,42 @@ public class HMSPaths implements AuthzPaths { /** * @return the set of auth objects. The returned set should be used only - * for querying, not for any modifications. + * for querying, not for any modifications. If you just want to find out + * the set size or whether it's empty, use the specialized getAuthzObjsSize() + * and isAuthzObjsEmpty() methods that performs better. */ - public Set<String> getAuthzObjs() { - return authzObjs != null ? authzObjs : Collections.<String>emptySet(); + Set<String> getAuthzObjs() { + if (authzObjs != null) { + if (authzObjs instanceof Set) { + return (Set<String>) authzObjs; + } else { + return newTreeSetWithElement((String) authzObjs); + } + } else { + return Collections.<String>emptySet(); + } + } + + int getAuthzObjsSize() { + if (authzObjs != null) { + if (authzObjs instanceof Set) { + return ((Set<String>) authzObjs).size(); + } else { + return 1; + } + } else { + return 0; + } + } + + boolean isAuthzObjsEmpty() { + return authzObjs == null; + } + + private Set<String> newTreeSetWithElement(String el) { + Set<String> result = new TreeSet<>(String.CASE_INSENSITIVE_ORDER); + result.add(el); + return result; } public Entry findPrefixEntry(List<String> pathElements) { @@ -538,17 +596,17 @@ public class HMSPaths implements AuthzPaths { boolean isPartialMatchOk, Entry lastAuthObj) { Entry found = null; if (index == pathElements.length) { - if (isPartialMatchOk && getAuthzObjs().size() != 0) { + if (isPartialMatchOk && !isAuthzObjsEmpty()) { found = this; } } else { Entry child = getChild(pathElements[index]); if (child != null) { if (index == pathElements.length - 1) { - found = (child.getAuthzObjs().size() != 0) ? child : lastAuthObj; + found = (!child.isAuthzObjsEmpty()) ? child : lastAuthObj; } else { found = child.find(pathElements, index + 1, isPartialMatchOk, - (child.getAuthzObjs().size() != 0) ? child : lastAuthObj); + (!child.isAuthzObjsEmpty()) ? child : lastAuthObj); } } else { if (isPartialMatchOk) { http://git-wip-us.apache.org/repos/asf/sentry/blob/201808de/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPathsDumper.java ---------------------------------------------------------------------- diff --git a/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPathsDumper.java b/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPathsDumper.java index bf6c3de..1267093 100644 --- a/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPathsDumper.java +++ b/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPathsDumper.java @@ -17,6 +17,7 @@ */ package org.apache.sentry.hdfs; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; @@ -30,11 +31,15 @@ import org.apache.sentry.hdfs.HMSPaths.Entry; import org.apache.sentry.hdfs.HMSPaths.EntryType; import org.apache.sentry.hdfs.service.thrift.TPathEntry; import org.apache.sentry.hdfs.service.thrift.TPathsDump; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class HMSPathsDumper implements AuthzPathsDumper<HMSPaths> { private final HMSPaths hmsPaths; + private static final Logger LOG = LoggerFactory.getLogger(HMSPathsDumper.class); + static class Tuple { private final TPathEntry entry; private final int id; @@ -65,9 +70,16 @@ public class HMSPathsDumper implements AuthzPathsDumper<HMSPaths> { hmsPaths.getRootEntry(), tRootTuple.entry, counter, idMap, dups); TPathsDump dump = new TPathsDump(tRootTuple.id, idMap); + String stringDupMsg = ""; if (minimizeSize) { - dump.setDupStringValues(Arrays.asList(dups.getDupStringValues())); + String[] dupStringValues = dups.getDupStringValues(); + dump.setDupStringValues(Arrays.asList(dupStringValues)); + stringDupMsg = String.format(" %d total path strings, %d duplicate strings found, " + + "compacted to %d unique strings.", dups.nTotalStrings, dups.nDupStrings, + dupStringValues.length); } + LOG.info("Paths Dump created." + stringDupMsg); + return dump; } @@ -83,14 +95,14 @@ public class HMSPathsDumper implements AuthzPathsDumper<HMSPaths> { private Tuple createTPathEntry(Entry entry, AtomicInteger idCounter, Map<Integer, TPathEntry> idMap, DupDetector dups) { int myId = idCounter.incrementAndGet(); - Set<Integer> children = entry.hasChildren() ? - new HashSet<Integer>(entry.numChildren()) : Collections.<Integer>emptySet(); + List<Integer> children = entry.hasChildren() ? + new ArrayList<Integer>(entry.numChildren()) : Collections.<Integer>emptyList(); String pathElement = entry.getPathElement(); String sameOrReplacementId = dups != null ? dups.getReplacementString(pathElement) : pathElement; TPathEntry tEntry = new TPathEntry(entry.getType().getByte(), sameOrReplacementId, children); - if (!entry.getAuthzObjs().isEmpty()) { - tEntry.setAuthzObjs(entry.getAuthzObjs()); + if (!entry.isAuthzObjsEmpty()) { + tEntry.setAuthzObjs(new ArrayList<>(entry.getAuthzObjs())); } idMap.put(myId, tEntry); return new Tuple(tEntry, myId); @@ -142,7 +154,7 @@ public class HMSPathsDumper implements AuthzPathsDumper<HMSPaths> { child = new Entry(parent, tChildPathElement, EntryType.fromByte(tChild.getType()), tChild.getAuthzObjs()); } - if (child.getAuthzObjs().size() != 0) { + if (!child.isAuthzObjsEmpty()) { for (String authzObj: child.getAuthzObjs()) { Set<Entry> paths = authzObjToPath.get(authzObj); if (paths == null) { @@ -200,6 +212,8 @@ public class HMSPathsDumper implements AuthzPathsDumper<HMSPaths> { // Size of the auxiliary string array - essentially the number of duplicate // strings that we detected and encoded. private int auxArraySize; + // For statistics/debugging + int nTotalStrings, nDupStrings; /** * Finds duplicate strings in the tree of Entry objects with the given root, @@ -215,6 +229,7 @@ public class HMSPathsDumper implements AuthzPathsDumper<HMSPaths> { if (keys[i] != null) { if (values[i] >= MIN_NUM_DUPLICATES) { values[i] = auxArraySize++; + nDupStrings += values[i]; } else { // No duplication for this string keys[i] = null; values[i] = -1; // Just to mark invalid slots @@ -258,6 +273,7 @@ public class HMSPathsDumper implements AuthzPathsDumper<HMSPaths> { } private void inspectEntry(Entry entry) { + nTotalStrings++; String pathElement = entry.getPathElement(); if (pathElement.length() > AVG_ID_LENGTH) { // In the serialized data, it doesn't make sense to replace string origS http://git-wip-us.apache.org/repos/asf/sentry/blob/201808de/sentry-hdfs/sentry-hdfs-common/src/main/resources/sentry_hdfs_service.thrift ---------------------------------------------------------------------- diff --git a/sentry-hdfs/sentry-hdfs-common/src/main/resources/sentry_hdfs_service.thrift b/sentry-hdfs/sentry-hdfs-common/src/main/resources/sentry_hdfs_service.thrift index 22dea02..465b421 100644 --- a/sentry-hdfs/sentry-hdfs-common/src/main/resources/sentry_hdfs_service.thrift +++ b/sentry-hdfs/sentry-hdfs-common/src/main/resources/sentry_hdfs_service.thrift @@ -49,10 +49,10 @@ struct TPathEntry { 2: required string pathElement; # The child tuple id of the Path Entry. -4: required set<i32> children; +4: required list<i32> children; # A set of authzObjs associated with the Path Entry. -5: optional set<string> authzObjs; +5: optional list<string> authzObjs; } struct TPathsDump { http://git-wip-us.apache.org/repos/asf/sentry/blob/201808de/sentry-hdfs/sentry-hdfs-common/src/test/java/org/apache/sentry/hdfs/TestHMSPaths.java ---------------------------------------------------------------------- diff --git a/sentry-hdfs/sentry-hdfs-common/src/test/java/org/apache/sentry/hdfs/TestHMSPaths.java b/sentry-hdfs/sentry-hdfs-common/src/test/java/org/apache/sentry/hdfs/TestHMSPaths.java index a0d7bdc..20ed97c 100644 --- a/sentry-hdfs/sentry-hdfs-common/src/test/java/org/apache/sentry/hdfs/TestHMSPaths.java +++ b/sentry-hdfs/sentry-hdfs-common/src/test/java/org/apache/sentry/hdfs/TestHMSPaths.java @@ -63,6 +63,9 @@ public class TestHMSPaths { root.toString(); Assert.assertNull(root.getParent()); Assert.assertEquals(HMSPaths.EntryType.DIR, root.getType()); + Assert.assertTrue(root.getAuthzObjsSize() == 0); + Assert.assertTrue(root.isAuthzObjsEmpty()); + // getAuthzObjs().size() is not recommended, but let's check that it works correctly Assert.assertTrue(root.getAuthzObjs().size() == 0); Assert.assertEquals(Path.SEPARATOR, root.getFullPath()); Assert.assertFalse(root.hasChildren()); @@ -127,6 +130,9 @@ public class TestHMSPaths { Assert.assertEquals(root, entry.getParent()); Assert.assertEquals(HMSPaths.EntryType.PREFIX, entry.getType()); Assert.assertEquals("a", entry.getPathElement()); + Assert.assertEquals(0, entry.getAuthzObjsSize()); + Assert.assertTrue(entry.isAuthzObjsEmpty()); + // getAuthzObjs().size() is not recommended, but let's check that it works correctly Assert.assertEquals(0, entry.getAuthzObjs().size()); Assert.assertEquals(Path.SEPARATOR + "a", entry.getFullPath()); Assert.assertFalse(entry.hasChildren()); @@ -171,6 +177,10 @@ public class TestHMSPaths { entry.getParent().getType()); Assert.assertEquals("b", entry.getPathElement()); Assert.assertEquals("a", entry.getParent().getPathElement()); + Assert.assertTrue(entry.getAuthzObjsSize() == 0); + Assert.assertTrue(entry.isAuthzObjsEmpty()); + Assert.assertTrue(entry.getParent().getAuthzObjsSize() == 0); + // getAuthzObjs().size() is not recommended, but let's check that it works correctly Assert.assertTrue(entry.getAuthzObjs().size() == 0); Assert.assertTrue(entry.getParent().getAuthzObjs().size() == 0); Assert.assertEquals(Path.SEPARATOR + "a" + Path.SEPARATOR + "b", @@ -296,6 +306,9 @@ public class TestHMSPaths { Assert.assertNull(root.find(new String[]{"a", "b", "t", "p1"}, true)); Assert.assertEquals(HMSPaths.EntryType.DIR, entry.getType()); + Assert.assertEquals(entry.getAuthzObjsSize(), 0); + Assert.assertTrue(entry.isAuthzObjsEmpty()); + // getAuthzObjs().size() is not recommended, but let's check that it works correctly Assert.assertEquals(entry.getAuthzObjs().size(), 0); Assert.assertNull(root.find(new String[]{"a", "b", "t", "p1"}, false));