Author: amareshwari Date: Mon May 13 12:42:41 2013 New Revision: 1481826 URL: http://svn.apache.org/r1481826 Log: Add configuration for valid fact tables and storage tables
Added: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/CubeQueryConstants.java Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/AliasReplacer.java hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/CubeQueryContext.java hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/StorageTableResolver.java hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/cube/parse/CubeTestSetup.java hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/cube/processors/TestCubeDriver.java Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/AliasReplacer.java URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/AliasReplacer.java?rev=1481826&r1=1481825&r2=1481826&view=diff ============================================================================== --- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/AliasReplacer.java (original) +++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/AliasReplacer.java Mon May 13 12:42:41 2013 @@ -69,10 +69,8 @@ public class AliasReplacer implements Co } // Update the aggregate expression set - System.out.println("AggrSet Before:" + cubeql.aggregateExprs.toString()); updateAggregates(selectAST, cubeql); updateAggregates(havingAST, cubeql); - System.out.println("AggrSet After:" + cubeql.aggregateExprs.toString()); } private void replaceAliases(ASTNode node, int nodePos, Map<String, String> colToTableAlias) { Added: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/CubeQueryConstants.java URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/CubeQueryConstants.java?rev=1481826&view=auto ============================================================================== --- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/CubeQueryConstants.java (added) +++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/CubeQueryConstants.java Mon May 13 12:42:41 2013 @@ -0,0 +1,9 @@ +package org.apache.hadoop.hive.ql.cube.parse; + +public interface CubeQueryConstants { + public static final String VALID_FACT_TABLES = "cube.query.valid.fact.tables"; + public static final String VALID_STORAGE_FACT_TABLES = "cube.query.valid." + + "fact.storagetables"; + public static final String VALID_STORAGE_DIM_TABLES = "cube.query.valid." + + "dim.storgaetables"; +} Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/CubeQueryContext.java URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/CubeQueryContext.java?rev=1481826&r1=1481825&r2=1481826&view=diff ============================================================================== --- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/CubeQueryContext.java (original) +++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/CubeQueryContext.java Mon May 13 12:42:41 2013 @@ -10,6 +10,7 @@ import static org.apache.hadoop.hive.ql. import static org.apache.hadoop.hive.ql.parse.HiveParser.TOK_TABLE_OR_COL; import java.util.ArrayList; +import java.util.Arrays; import java.util.Date; import java.util.HashMap; import java.util.HashSet; @@ -447,9 +448,20 @@ public class CubeQueryContext { if (cube != null) { // go over the columns accessed in the query and find out which tables // can answer the query + String str = conf.get(CubeQueryConstants.VALID_FACT_TABLES); + List<String> validFactTables = StringUtils.isBlank(str) ? null : + Arrays.asList(StringUtils.split(str.toLowerCase())); for (Iterator<CubeFactTable> i = candidateFactTables.iterator(); i.hasNext();) { CubeFactTable fact = i.next(); + if (validFactTables != null) { + if (!validFactTables.contains(fact.getName().toLowerCase())) { + LOG.info("Not considering the fact table:" + fact + " as it is" + + " not a valid fact"); + i.remove(); + continue; + } + } List<String> factCols = cubeTabToCols.get(fact); List<String> validFactCols = fact.getValidColumns(); for (String col : cubeColumnsQueried) { @@ -509,7 +521,7 @@ public class CubeQueryContext { return dimensions; } - private String getAliasForTabName(String tabName) { + public String getAliasForTabName(String tabName) { for (String alias : qb.getTabAliases()) { if (qb.getTabNameForAlias(alias).equalsIgnoreCase(tabName)) { return alias; @@ -906,6 +918,11 @@ public class CubeQueryContext { Iterator<UpdatePeriod> it = partColMap.keySet().iterator(); while (it.hasNext()) { UpdatePeriod updatePeriod = it.next(); + if (storageTableMap.get(updatePeriod) == null || + storageTableMap.get(updatePeriod).isEmpty()) { + throw new SemanticException("No storage table available for fact" + + fact + " for update period" + updatePeriod); + } String storageTable = storageTableMap.get(updatePeriod).get(0); storageTableToQuery.put(getCube(), storageTable); query.append(toHQL(storageTable)); Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/StorageTableResolver.java URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/StorageTableResolver.java?rev=1481826&r1=1481825&r2=1481826&view=diff ============================================================================== --- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/StorageTableResolver.java (original) +++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/StorageTableResolver.java Mon May 13 12:42:41 2013 @@ -1,10 +1,15 @@ package org.apache.hadoop.hive.ql.cube.parse; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; +import java.util.Iterator; import java.util.List; import java.util.Map; +import org.apache.commons.lang.StringUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.cube.metadata.CubeDimensionTable; import org.apache.hadoop.hive.ql.cube.metadata.CubeFactTable; @@ -14,8 +19,12 @@ import org.apache.hadoop.hive.ql.cube.me import org.apache.hadoop.hive.ql.parse.SemanticException; public class StorageTableResolver implements ContextRewriter { + public static Log LOG = LogFactory.getLog(StorageTableResolver.class.getName()); + private final Configuration conf; public StorageTableResolver(Configuration conf) { + this.conf = conf; + String str = conf.get(CubeQueryConstants.VALID_STORAGE_FACT_TABLES); } @Override @@ -29,6 +38,10 @@ public class StorageTableResolver implem new HashMap<CubeFactTable, Map<UpdatePeriod, List<String>>>(); Map<CubeFactTable, Map<UpdatePeriod, List<String>>> factPartMap = cubeql.getFactPartitionMap(); + String str = conf.get(CubeQueryConstants.VALID_STORAGE_FACT_TABLES); + List<String> validFactStorageTables = StringUtils.isBlank(str) ? null : + Arrays.asList(StringUtils.split(str.toLowerCase())); + // Find candidate tables wrt supported storages for (CubeFactTable fact : factPartMap.keySet()) { Map<UpdatePeriod, List<String>> storageTableMap = @@ -42,36 +55,71 @@ public class StorageTableResolver implem for (String storage : fact.getStorages()) { if (cubeql.isStorageSupported(storage)) { String tableName = MetastoreUtil.getFactStorageTableName( - fact.getName(), updatePeriod, Storage.getPrefix(storage)); + fact.getName(), updatePeriod, Storage.getPrefix(storage)) + .toLowerCase(); + if (validFactStorageTables != null && !validFactStorageTables + .contains(tableName)) { + LOG.info("Not considering the fact storage table:" + tableName + + " as it is not a valid fact storage"); + continue; + } storageTables.add(tableName); - storageTableToWhereClause.put(tableName, - getWherePartClause(fact.getCubeName(), parts)); + storageTableToWhereClause.put(tableName, getWherePartClause( + cubeql.getAliasForTabName(fact.getCubeName()), parts)); } else { - System.out.println("Storage:" + storage + " is not supported"); + LOG.info("Storage:" + storage + " is not supported"); } } } } cubeql.setFactStorageMap(factStorageMap); + for (Iterator<CubeFactTable> i = + cubeql.getCandidateFactTables().iterator(); i.hasNext();) { + CubeFactTable fact = i.next(); + Map<UpdatePeriod, List<String>> storageTableMap = factStorageMap.get( + fact); + Map<UpdatePeriod, List<String>> partColMap = cubeql.getFactPartitionMap() + .get(fact); + Iterator<UpdatePeriod> it = partColMap.keySet().iterator(); + while (it.hasNext()) { + UpdatePeriod updatePeriod = it.next(); + if (storageTableMap.get(updatePeriod) == null || + storageTableMap.get(updatePeriod).isEmpty()) { + LOG.info("Removing fact:" + fact + + " from candidate fact tables, as it does not have storage tables" + + " for update period" + updatePeriod); + i.remove(); + break; + } + } + } // resolve dimension tables Map<CubeDimensionTable, List<String>> dimStorageMap = new HashMap<CubeDimensionTable, List<String>>(); + str = conf.get(CubeQueryConstants.VALID_STORAGE_DIM_TABLES); + List<String> validDimTables = StringUtils.isBlank(str) ? null : + Arrays.asList(StringUtils.split(str.toLowerCase())); for (CubeDimensionTable dim : cubeql.getDimensionTables()) { List<String> storageTables = new ArrayList<String>(); dimStorageMap.put(dim, storageTables); for (String storage : dim.getStorages()) { if (cubeql.isStorageSupported(storage)) { String tableName = MetastoreUtil.getDimStorageTableName( - dim.getName(), Storage.getPrefix(storage)); + dim.getName(), Storage.getPrefix(storage)).toLowerCase(); + if (validDimTables != null && !validDimTables.contains(tableName)) { + LOG.info("Not considering the dim storage table:" + tableName + + " as it is not a valid dim storage"); + continue; + } storageTables.add(tableName); if (dim.hasStorageSnapshots(storage)) { storageTableToWhereClause.put(tableName, - getWherePartClause(dim.getName(), Storage - .getPartitionsForLatest())); + getWherePartClause(cubeql.getAliasForTabName(dim.getName()), + Storage.getPartitionsForLatest())); } } else { - System.out.println("Storage:" + storage + " is not supported"); + LOG.info("Storage:" + storage + " is not supported"); } } } Modified: hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/cube/parse/CubeTestSetup.java URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/cube/parse/CubeTestSetup.java?rev=1481826&r1=1481825&r2=1481826&view=diff ============================================================================== --- hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/cube/parse/CubeTestSetup.java (original) +++ hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/cube/parse/CubeTestSetup.java Mon May 13 12:42:41 2013 @@ -115,6 +115,10 @@ public class CubeTestSetup { TextInputFormat.class.getCanonicalName(), HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); storageAggregatePeriods.put(hdfsStorage, updates); + Storage hdfsStorage2 = new HDFSStorage("C2", + TextInputFormat.class.getCanonicalName(), + HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); + storageAggregatePeriods.put(hdfsStorage2, updates); // create cube fact client.createCubeFactTable(cubeName, factName, factColumns, Modified: hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/cube/processors/TestCubeDriver.java URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/cube/processors/TestCubeDriver.java?rev=1481826&r1=1481825&r2=1481826&view=diff ============================================================================== --- hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/cube/processors/TestCubeDriver.java (original) +++ hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/cube/processors/TestCubeDriver.java Mon May 13 12:42:41 2013 @@ -6,6 +6,7 @@ import java.util.Date; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.cube.parse.CubeQueryConstants; import org.apache.hadoop.hive.ql.cube.parse.CubeTestSetup; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.junit.Assert; @@ -102,6 +103,64 @@ public class TestCubeDriver { " where time_range_in('" + getDateUptoHours(twodaysBack) + "','" + getDateUptoHours(now) + "')"); System.out.println("cube hql:" + hqlQuery); + + conf.set(CubeQueryConstants.VALID_FACT_TABLES, "testFact"); + driver = new CubeDriver(new HiveConf(conf, HiveConf.class)); + hqlQuery = driver.compileCubeQuery("select SUM(msr2) from testCube" + + " where time_range_in('" + getDateUptoHours(twodaysBack) + + "','" + getDateUptoHours(now) + "')"); + System.out.println("cube hql:" + hqlQuery); + + conf.set(HiveConf.ConfVars.HIVE_DRIVER_SUPPORTED_STORAGES.toString(), "C2"); + conf.set(CubeQueryConstants.VALID_FACT_TABLES, "testFact"); + driver = new CubeDriver(new HiveConf(conf, HiveConf.class)); + hqlQuery = driver.compileCubeQuery("select SUM(msr2) from testCube" + + " where time_range_in('" + getDateUptoHours(twodaysBack) + + "','" + getDateUptoHours(now) + "')"); + System.out.println("cube hql:" + hqlQuery); + + conf.set(HiveConf.ConfVars.HIVE_DRIVER_SUPPORTED_STORAGES.toString(), "C1"); + conf.set(CubeQueryConstants.VALID_FACT_TABLES, "testFact2"); + driver = new CubeDriver(new HiveConf(conf, HiveConf.class)); + hqlQuery = driver.compileCubeQuery("select SUM(msr2) from testCube" + + " where time_range_in('" + getDateUptoHours(twodaysBack) + + "','" + getDateUptoHours(now) + "')"); + System.out.println("cube hql:" + hqlQuery); + + conf.set(CubeQueryConstants.VALID_FACT_TABLES, ""); + conf.set(HiveConf.ConfVars.HIVE_DRIVER_SUPPORTED_STORAGES.toString(), "C1"); + conf.set(CubeQueryConstants.VALID_STORAGE_FACT_TABLES, + "C1_testFact2_HOURLY"); + driver = new CubeDriver(new HiveConf(conf, HiveConf.class)); + hqlQuery = driver.compileCubeQuery("select SUM(msr2) from testCube" + + " where time_range_in('" + getDateUptoHours(twodaysBack) + + "','" + getDateUptoHours(now) + "')"); + System.out.println("cube hql:" + hqlQuery); + + + // TODO fix following cases + try { + conf.set(CubeQueryConstants.VALID_STORAGE_FACT_TABLES, + "C1_testFact_HOURLY"); + driver = new CubeDriver(new HiveConf(conf, HiveConf.class)); + hqlQuery = driver.compileCubeQuery("select SUM(msr2) from testCube" + + " where time_range_in('" + getDateUptoHours(twodaysBack) + + "','" + getDateUptoHours(now) + "')"); + System.out.println("cube hql:" + hqlQuery); + + conf.set(HiveConf.ConfVars.HIVE_DRIVER_SUPPORTED_STORAGES.toString(), + "C2"); + conf.set(CubeQueryConstants.VALID_FACT_TABLES, ""); + conf.set(CubeQueryConstants.VALID_STORAGE_FACT_TABLES, + "C2_testFact_HOURLY"); + driver = new CubeDriver(new HiveConf(conf, HiveConf.class)); + hqlQuery = driver.compileCubeQuery("select SUM(msr2) from testCube" + + " where time_range_in('" + getDateUptoHours(twodaysBack) + + "','" + getDateUptoHours(now) + "')"); + System.out.println("cube hql:" + hqlQuery); + } catch (SemanticException e) { + e.printStackTrace(); + } //Assert.assertEquals(queries[1], cubeql.toHQL()); } @@ -278,6 +337,13 @@ public class TestCubeDriver { System.out.println("cube hql:" + hqlQuery); conf.set(HiveConf.ConfVars.HIVE_DRIVER_SUPPORTED_STORAGES.toString(), ""); + conf.set(CubeQueryConstants.VALID_STORAGE_DIM_TABLES, "C1_citytable"); + driver = new CubeDriver(new HiveConf(conf, HiveConf.class)); + hqlQuery = driver.compileCubeQuery("select name, stateid from citytable"); + System.out.println("cube hql:" + hqlQuery); + + conf.set(HiveConf.ConfVars.HIVE_DRIVER_SUPPORTED_STORAGES.toString(), ""); + conf.set(CubeQueryConstants.VALID_STORAGE_DIM_TABLES, "C2_citytable"); driver = new CubeDriver(new HiveConf(conf, HiveConf.class)); hqlQuery = driver.compileCubeQuery("select name, stateid from citytable"); System.out.println("cube hql:" + hqlQuery); @@ -363,8 +429,6 @@ public class TestCubeDriver { Assert.assertNotNull(exc); exc.printStackTrace(); } - - } @Test @@ -444,8 +508,8 @@ public class TestCubeDriver { " where time_range_in('" + getDateUptoHours(twodaysBack) + "','" + getDateUptoHours(now) + "')"); System.out.println("cube hql:" + hqlQuery); - hqlQuery = driver.compileCubeQuery("select dim1, dim2, COUNT(msr1), SUM(msr2)," + - " msr3 from testCube" + + hqlQuery = driver.compileCubeQuery("select dim1, dim2, COUNT(msr1)," + + " SUM(msr2), msr3 from testCube" + " where time_range_in('" + getDateUptoHours(twodaysBack) + "','" + getDateUptoHours(now) + "')"); System.out.println("cube hql:" + hqlQuery);