This is an automated email from the ASF dual-hosted git repository.

ngangam pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new dc9d1ad0965 HIVE-27595: replaced cartesian-product table filtering by 
sort + binary search (#4601)
dc9d1ad0965 is described below

commit dc9d1ad0965ece6a09abdb6481ca113dafce7c75
Author: Henrib <hbies...@gmail.com>
AuthorDate: Thu Aug 24 15:33:16 2023 +0200

    HIVE-27595: replaced cartesian-product table filtering by sort + binary 
search (#4601)
    
    * HIVE-27595: replaced cartesian-product filtering by sort + binary search 
lookup reducing complexity from m*n to (m+n)*logn; (Henri Biestro reviewed by 
John Sherman, Saihemanth)
---
 .../plugin/AuthorizationMetaStoreFilterHook.java   |  23 +-
 .../plugin/HivePrivilegeObjectUtils.java           | 125 ++++++++-
 .../plugin/metastore/HiveMetaStoreAuthorizer.java  |  49 +---
 .../metastore/TestHiveMetaStoreAuthorizer.java     |   2 +-
 .../metastore/TestTablePermissionFilterAlgos.java  | 282 +++++++++++++++++++++
 5 files changed, 425 insertions(+), 56 deletions(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/AuthorizationMetaStoreFilterHook.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/AuthorizationMetaStoreFilterHook.java
index ad218af9a05..5723022f62b 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/AuthorizationMetaStoreFilterHook.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/AuthorizationMetaStoreFilterHook.java
@@ -19,10 +19,6 @@ package 
org.apache.hadoop.hive.ql.security.authorization.plugin;
 
 import java.util.ArrayList;
 import java.util.List;
-import java.util.Set;
-import java.util.stream.Collectors;
-
-import org.apache.commons.lang3.tuple.ImmutablePair;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.common.classification.InterfaceAudience.Private;
 import org.apache.hadoop.hive.metastore.DefaultMetaStoreFilterHookImpl;
@@ -31,6 +27,7 @@ import org.apache.hadoop.hive.metastore.api.PrincipalType;
 import org.apache.hadoop.hive.metastore.api.Table;
 import org.apache.hadoop.hive.metastore.api.TableMeta;
 import 
org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject.HivePrivilegeObjectType;
+import static 
org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObjectUtils.TablePrivilegeLookup;
 import org.apache.hadoop.hive.ql.session.SessionState;
 
 import org.slf4j.Logger;
@@ -162,20 +159,18 @@ public class AuthorizationMetaStoreFilterHook extends 
DefaultMetaStoreFilterHook
     return objs;
   }
 
-  private ImmutablePair<String, String> tableMetaKey(String dbName, String 
tableName) {
-    return new ImmutablePair(dbName, tableName);
-  }
-
   @Override
   public List<TableMeta> filterTableMetas(List<TableMeta> tableMetas) throws 
MetaException {
     List<HivePrivilegeObject> listObjs = tableMetasToPrivilegeObjs(tableMetas);
     List<HivePrivilegeObject> filteredList = getFilteredObjects(listObjs);
-    Set<ImmutablePair<String, String>> filteredNames = filteredList.stream()
-        .map(e -> tableMetaKey(e.getDbname(), e.getObjectName()))
-        .collect(Collectors.toSet());
-    return tableMetas.stream()
-        .filter(e -> filteredNames.contains(tableMetaKey(e.getDbName(), 
e.getTableName())))
-        .collect(Collectors.toList());
+    final List<TableMeta> ret = new ArrayList<>();
+    final TablePrivilegeLookup index = new TablePrivilegeLookup(filteredList);
+    for(TableMeta table : tableMetas) {
+      if (index.lookup(table.getDbName(), table.getTableName()) != null) {
+        ret.add(table);
+      }
+    }
+    return ret;
   }
 
   @Override
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilegeObjectUtils.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilegeObjectUtils.java
index 4ab7870e7b3..479dce09ab7 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilegeObjectUtils.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilegeObjectUtils.java
@@ -19,8 +19,6 @@ package 
org.apache.hadoop.hive.ql.security.authorization.plugin;
 
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.Collection;
-import java.util.Iterator;
 import java.util.List;
 
 import org.apache.hadoop.classification.InterfaceStability.Evolving;
@@ -59,7 +57,130 @@ public class HivePrivilegeObjectUtils {
       objs.add(new HivePrivilegeObject(HivePrivilegeObjectType.DATACONNECTOR, 
null, dcname));
     }
     return objs;
+  }
+
+  /**
+   * A helper enabling efficient lookup of a table in a list by database name, 
table name.
+   * <p>When filtering a source list of tables by checking their presence in 
another permission list
+   * using the database name, table name, we need to avoid performing a 
cartesian product. This
+   * product stems from checking each table from the source (n tables) by 
comparing it to each
+   * table in the permission list (m permissions), thus an n * m 
complexity.</p>
+   * <p>This class reduces the complexity of the lookup in the permission by 
sorting them and
+   * using a binary search; the sort cost is m*log(m) and each lookup is 
log(m), the overall
+   * complexity of a check for all tables is in the order of m*log(m) + 
n*log(m) = (m + n)*log(n),
+   * close to m * log(n).</p>
+   *
+   */
+  public abstract static class TableLookup<T> {
+    /** The container. */
+    private final T[] index;
+
+    /**
+     * @param table the table
+     * @return the database name of the table
+     */
+    protected abstract String getDbName(T table);
+
+    /**
+     * @param table the table
+     * @return the table name of the table
+     */
+    protected abstract String getTableName(T table);
+
+    /**
+     * Compares a table to another by names.
+     * @param table the table
+     * @param arg the argument table
+     * @return &lt; 0, 0, &gt; 0
+     */
+    private int compareNames(final T table, final T arg) {
+      int cmp = getDbName(table).compareTo(getDbName(arg));
+      if (cmp == 0) {
+        String argTableName = getTableName(arg);
+        if (argTableName != null) {
+          cmp = getTableName(table).compareTo(argTableName);
+        }
+      }
+      return cmp;
+    }
+
+    /**
+     * Compares a table to names.
+     * @param table the table
+     * @param dbName the argument database name
+     * @param dbName the argument table name
+     * @return &lt; 0, 0, &gt; 0
+     */
+    private int compareNames(final T table, final String dbName, final String 
tableName) {
+      int cmp = getDbName(table).compareTo(dbName);
+      if (cmp == 0 && tableName != null) {
+        cmp = getTableName(table).compareTo(tableName);
+      }
+      return cmp;
+    }
+
+    /**
+     * Creates a ByName
+     * @param tables the tables to consider as a clique
+     */
+    protected TableLookup(List<T> tables) {
+      if (tables != null) {
+        index = tables.toArray((T[]) new Object[0]);
+        // sort them by names
+        Arrays.sort(index, this::compareNames);
+      } else {
+        index = (T[]) new Object[0];
+      }
+    }
 
+    /**
+     * Lookup using dichotomy using order described by database name, table 
name.
+     * @param dbName the database name
+     * @param tableName the table name
+     * @return the table if found in the index, null otherwise
+     */
+    public final T lookup(final String dbName, final String tableName) {
+      int low = 0;
+      int high = index.length - 1;
+      while (low <= high) {
+        int mid = (low + high) >>> 1;
+        T item = index[mid];
+        int cmp = compareNames(item, dbName, tableName);
+        if (cmp < 0) {
+          low = mid + 1;
+        } else if (cmp > 0) {
+          high = mid - 1;
+        } else {
+          return item; // key found
+        }
+      }
+      return null;  // key not found.
+    }
+
+    /**
+     * Checks whether a given table is present in this set.
+     * @param tt the table to check
+     * @return true if the set contains an item having the same database and 
table name
+     */
+    public final boolean contains(T tt) {
+      return lookup(getDbName(tt), getTableName(tt)) != null;
+    }
   }
 
+  /**
+   * Specialized lookup for table privilege (HivePrivilegeObject)..
+   */
+  public static class TablePrivilegeLookup extends 
TableLookup<HivePrivilegeObject> {
+    public TablePrivilegeLookup(List<HivePrivilegeObject> tables) {
+      super(tables);
+    }
+
+    @Override protected String getDbName(HivePrivilegeObject o) {
+      return o.getDbname();
+    }
+
+    @Override protected String getTableName(HivePrivilegeObject o) {
+      return o.getObjectName();
+    }
+  }
 }
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/metastore/HiveMetaStoreAuthorizer.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/metastore/HiveMetaStoreAuthorizer.java
index 55a980bc092..7f4cda8d922 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/metastore/HiveMetaStoreAuthorizer.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/metastore/HiveMetaStoreAuthorizer.java
@@ -42,6 +42,7 @@ import org.apache.hadoop.hive.metastore.api.TableMeta;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.metadata.HiveUtils;
 import org.apache.hadoop.hive.ql.security.HiveMetastoreAuthenticationProvider;
+import static 
org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObjectUtils.TablePrivilegeLookup;
 import 
org.apache.hadoop.hive.ql.security.authorization.plugin.metastore.events.*;
 import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthorizer;
 import 
org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthorizerFactory;
@@ -351,30 +352,16 @@ public class HiveMetaStoreAuthorizer extends 
MetaStorePreEventListener implement
   }
 
   private List<Table> getFilteredTableList(List<HivePrivilegeObject> 
hivePrivilegeObjects, List<Table> tableList) {
-    List<Table> ret = new ArrayList<>();
-    for (HivePrivilegeObject hivePrivilegeObject : hivePrivilegeObjects) {
-      String dbName = hivePrivilegeObject.getDbname();
-      String tblName = hivePrivilegeObject.getObjectName();
-      Table table = getFilteredTable(dbName, tblName, tableList);
-      if (table != null) {
+    final List<Table> ret = new ArrayList<>();
+    final TablePrivilegeLookup index = new 
TablePrivilegeLookup(hivePrivilegeObjects);
+    for(Table table : tableList) {
+      if (index.lookup(table.getDbName(), table.getTableName()) != null) {
         ret.add(table);
       }
     }
     return ret;
   }
 
-  private Table getFilteredTable(String dbName, String tblName, List<Table> 
tableList) {
-    Table ret = null;
-    for (Table table: tableList) {
-      String databaseName = table.getDbName();
-      String tableName = table.getTableName();
-      if (dbName.equals(databaseName) && tblName.equals(tableName)) {
-        ret = table;
-        break;
-      }
-    }
-    return ret;
-  }
 
   private List<String> filterTableNames(HiveMetaStoreAuthzInfo 
hiveMetaStoreAuthzInfo, String dbName,
       List<String> tableNames) throws MetaException {
@@ -396,32 +383,16 @@ public class HiveMetaStoreAuthorizer extends 
MetaStorePreEventListener implement
     }
     return ret;
   }
-
-  private List<String> getFilteredTableNames(List<HivePrivilegeObject> 
hivePrivilegeObjects, String databaseName,
-      List<String> tableNames) {
+  private List<String> getFilteredTableNames(List<HivePrivilegeObject> 
hivePrivilegeObjects, String databaseName, List<String> tableNames) {
     List<String> ret = new ArrayList<>();
-    for (HivePrivilegeObject hivePrivilegeObject : hivePrivilegeObjects) {
-      String dbName = hivePrivilegeObject.getDbname();
-      String tblName = hivePrivilegeObject.getObjectName();
-      String table = getFilteredTableNames(dbName, tblName, databaseName, 
tableNames);
-      if (table != null) {
-        ret.add(table);
-      }
-    }
-    return ret;
-  }
-
-  private String getFilteredTableNames(String dbName, String tblName, String 
databaseName, List<String> tableNames) {
-    String ret = null;
-    for (String tableName : tableNames) {
-      if (dbName.equals(databaseName) && tblName.equals(tableName)) {
-        ret = tableName;
-        break;
+    final TablePrivilegeLookup index = new 
TablePrivilegeLookup(hivePrivilegeObjects);
+    for(String tableName : tableNames) {
+      if (index.lookup(databaseName, tableName) != null) {
+        ret.add(tableName);
       }
     }
     return ret;
   }
-
   private String getDBName(String str) {
    return (str != null) ? str.substring(str.indexOf("#")+1) : null;
   }
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/security/authorization/plugin/metastore/TestHiveMetaStoreAuthorizer.java
 
b/ql/src/test/org/apache/hadoop/hive/ql/security/authorization/plugin/metastore/TestHiveMetaStoreAuthorizer.java
index bb191ba8605..ec9a93c0837 100644
--- 
a/ql/src/test/org/apache/hadoop/hive/ql/security/authorization/plugin/metastore/TestHiveMetaStoreAuthorizer.java
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/security/authorization/plugin/metastore/TestHiveMetaStoreAuthorizer.java
@@ -36,8 +36,8 @@ import org.junit.FixMethodOrder;
 import org.junit.runners.MethodSorters;
 import org.junit.Before;
 import org.junit.Test;
-import java.util.Map;
 
+import java.util.Map;
 import java.io.File;
 
 import static org.junit.Assert.assertEquals;
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/security/authorization/plugin/metastore/TestTablePermissionFilterAlgos.java
 
b/ql/src/test/org/apache/hadoop/hive/ql/security/authorization/plugin/metastore/TestTablePermissionFilterAlgos.java
new file mode 100644
index 00000000000..0f0a2ff996e
--- /dev/null
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/security/authorization/plugin/metastore/TestTablePermissionFilterAlgos.java
@@ -0,0 +1,282 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.security.authorization.plugin.metastore;
+
+import org.apache.commons.lang3.tuple.ImmutablePair;
+import 
org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObjectUtils;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Random;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+/**
+ * This test tries different methods of lookup to try and determine the most 
efficient in
+ * the particular usage case related to checking tables against permissions.
+ * Note that a meaningful performance test requires at least 20 dbs, 10000 
tables per db and 40 loops
+ * on a 2022 laptop.
+ */
+public class TestTablePermissionFilterAlgos {
+  private static final Logger LOGGER = 
LogManager.getLogger(TestHiveMetaStoreAuthorizer.class);
+  // reduce constants to speed up tests when true, set to false for 
benchmarking
+  private static final boolean TEST = true;
+  // Whether we shuffle table names or not: this has an important effect on 
sort speed - mostly sorted
+  // arrays sort faster than completely unsorted ones.
+  // A hashset of concatenated strings seem to be faster than a sort in that 
case.
+  // I suspect something fishy in the cost of string creation (intern()?) 
since a hashset of pairs is much slower.
+  // We'll make the semi-educated assumption that most of the time, tablenames 
come back mostly ordered.
+  private static final boolean SHUFFLE_TABLENAMES = !TEST;
+  private static final int NUM_DB = TEST? 2 : 20;
+  private static final int NUM_TBL = TEST? 100 : 10000;
+  private static final int NLOOPS = TEST? 1 : 40;
+
+  private final List<TestTable> tables = createTables(NUM_DB, NUM_TBL);
+  // lookup some
+  private final List<TestTable> filtered = tables.stream()
+      .filter(t -> t.getTableName().endsWith("0"))
+      .collect(Collectors.toList());
+
+  /**
+   * Fake tables to test.
+   */
+  private static class TestTable {
+    private int hashCode = -1;
+    private final String dbName;
+    private final String tableName;
+
+    private TestTable(String dbName, String tableName) {
+      this.dbName = dbName;
+      this.tableName = tableName;
+    }
+
+    @Override
+    public String toString() {
+      return tableFullName(this);
+    }
+
+    public String getDbName() {
+      return dbName;
+    }
+
+    public String getTableName() {
+      return tableName;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+      if (o instanceof TestTable) {
+        TestTable tt = (TestTable) o;
+        return dbName.equals(tt.dbName) && tableName.equals(tt.tableName);
+      }
+      return false;
+    }
+
+    @Override
+    public int hashCode() {
+       int h = hashCode;
+       if (h < 0) {
+         hashCode = h = Math.abs((31 * dbName.hashCode()) + 
tableName.hashCode());
+       }
+       return h;
+    }
+  }
+
+  /**
+   * Creates a list of tables.
+   * @param dbs number of databases
+   * @param tbls number of tables per database
+   * @return a list of tables
+   */
+  private static List<TestTable> createTables(int dbs, int tbls) {
+    Random rnd = new Random(20230815L);
+    List<Integer> dbis = new ArrayList<>(dbs);
+    for(int i = 0; i < dbs; ++i) {
+      dbis.add(i);
+    }
+    Collections.shuffle(dbis, rnd);
+    List<Integer> tblis = new ArrayList<>(tbls);
+    for(int i = 0; i < tbls; ++i) {
+      tblis.add(i);
+    }
+    if (SHUFFLE_TABLENAMES) {
+      Collections.shuffle(tblis, rnd);
+    }
+
+    List<TestTable> tables  = new ArrayList<>(dbs * tbls);
+    for(int d : dbis) {
+      for(int t : tblis) {
+        String dbname = String.format("db%05x", d);
+        String tblName = String.format("tbl%05x", t);
+        tables.add(new TestTable(dbname, tblName));
+      }
+    }
+    return tables;
+  }
+
+  /**
+   * Allows timing code blocks execution precisely.
+   */
+  static class Chrono {
+    long start;
+    long end;
+
+    void start() {
+      start = System.currentTimeMillis();
+    }
+    void stop() {
+      end = System.currentTimeMillis();
+    }
+    String elapse() {
+      return String.format("%.3f", (double) (end - start) / 1000d);
+    }
+  }
+
+  /**
+   * Specialized for TestTable purpose.
+   */
+  private static class TestLookup extends 
HivePrivilegeObjectUtils.TableLookup<TestTable> {
+    protected TestLookup(List<TestTable> tables) {
+      super(tables);
+    }
+
+    @Override protected String getDbName(TestTable o) {
+      return o.getDbName();
+    }
+
+    @Override protected String getTableName(TestTable o) {
+      return o.getTableName();
+    }
+  };
+
+  @Test
+  public void testIndexedLookup() {
+    System.gc();
+    Chrono chrono = new Chrono();
+    chrono.start();
+    int cnt = 0;
+    for(int l = 0; l < NLOOPS; ++l) {
+      TestLookup index = new TestLookup(tables);
+      List<TestTable> tts = l % 2 == 1
+          ? tables
+          : filtered;
+        cnt = 0;
+        // lookup all
+        for (TestTable tt : tts) {
+          if (index.contains(tt)) {
+            cnt += 1;
+          }
+        }
+      Assert.assertEquals(cnt, tts.size());
+    }
+    chrono.stop();
+    LOGGER.info("lookup elapse " + chrono.elapse());
+  }
+
+  private static ImmutablePair<String,String> tableKey(TestTable tt) {
+    return new ImmutablePair<>(tt.getDbName(), tt.getTableName());
+  }
+
+  @Test
+  public void testKeyPairLookup() {
+    System.gc();
+    Chrono chrono = new Chrono();
+    chrono.start();
+    for(int l = 0; l < NLOOPS; ++l) {
+      Set<ImmutablePair<String, String>> index = tables.stream()
+          .map(TestTablePermissionFilterAlgos::tableKey)
+          .collect(Collectors.toSet());
+      List<TestTable> tts = l % 2 == 1
+          ? tables
+          : filtered;
+      int cnt = 0;
+      // lookup all
+      for (TestTable tt : tts) {
+        if (index.contains(tableKey(tt))) {
+          cnt += 1;
+        }
+      }
+      Assert.assertEquals(cnt, tts.size());
+    }
+    chrono.stop();
+    LOGGER.info("set pair elapse " + chrono.elapse());
+  }
+
+  @Test
+  public void testTestTableLookup() {
+    System.gc();
+    Chrono chrono = new Chrono();
+    chrono.start();
+    for(int l = 0; l < NLOOPS; ++l) {
+      Set<TestTable> index = new HashSet<>(tables);
+      List<TestTable> tts = l % 2 == 1
+          ? tables
+          : filtered;
+      int cnt = 0;
+      // lookup all
+      for (TestTable tt : tts) {
+        if (index.contains(tt)) {
+          cnt += 1;
+        }
+      }
+      Assert.assertEquals(cnt, tts.size());
+    }
+    chrono.stop();
+    LOGGER.info("set table elapse " + chrono.elapse());
+  }
+
+
+  private static String tableFullName(TestTable tt) {
+    return tt.getDbName() + "." + tt.getTableName();
+  }
+
+  @Test
+  public void testFullNameLookup() {
+    System.gc();
+    Chrono chrono = new Chrono();
+    chrono.start();
+    for(int l = 0; l < NLOOPS; ++l) {
+      Set<String> index = tables.stream()
+          .map(TestTablePermissionFilterAlgos::tableFullName)
+          .collect(Collectors.toSet());
+      List<TestTable> tts = l % 2 == 1
+          ? tables
+          : filtered;
+      int cnt = 0;
+      // lookup all
+      for (TestTable tt : tts) {
+        if (index.contains(tableFullName(tt))) {
+          cnt += 1;
+        }
+      }
+      Assert.assertEquals(cnt, tts.size());
+    }
+    chrono.stop();
+    LOGGER.info("set fullname elapse " + chrono.elapse());
+  }
+}

Reply via email to