Modified: hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java?rev=1640263&r1=1640262&r2=1640263&view=diff ============================================================================== --- hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java (original) +++ hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java Tue Nov 18 00:48:40 2014 @@ -32,6 +32,7 @@ import java.util.List; import java.util.Map; import java.util.TreeMap; +import javax.jdo.JDODataStoreException; import javax.jdo.PersistenceManager; import javax.jdo.Query; import javax.jdo.Transaction; @@ -40,6 +41,10 @@ import javax.jdo.datastore.JDOConnection import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.derby.iapi.error.StandardException; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.api.AggrStats; import org.apache.hadoop.hive.metastore.api.ColumnStatistics; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; @@ -80,64 +85,108 @@ import com.google.common.collect.Lists; * to SQL stores only. There's always a way to do without direct SQL. */ class MetaStoreDirectSql { - private static final Log LOG = LogFactory.getLog(MetaStoreDirectSql.class); + private static enum DB { + MYSQL, + ORACLE, + MSSQL, + DERBY, + OTHER + } + + private static final int NO_BATCHING = -1, DETECT_BATCHING = 0; + private static final Log LOG = LogFactory.getLog(MetaStoreDirectSql.class); private final PersistenceManager pm; /** - * We want to avoid db-specific code in this class and stick with ANSI SQL. However, mysql - * and postgres are differently ansi-incompatible (mysql by default doesn't support quoted - * identifiers, and postgres contravenes ANSI by coercing unquoted ones to lower case). + * We want to avoid db-specific code in this class and stick with ANSI SQL. However: + * 1) mysql and postgres are differently ansi-incompatible (mysql by default doesn't support + * quoted identifiers, and postgres contravenes ANSI by coercing unquoted ones to lower case). * MySQL's way of working around this is simpler (just set ansi quotes mode on), so we will - * use that. MySQL detection is done by actually issuing the set-ansi-quotes command. + * use that. MySQL detection is done by actually issuing the set-ansi-quotes command; + * + * Use sparingly, we don't want to devolve into another DataNucleus... */ - private final boolean isMySql; + private final DB dbType; + private final int batchSize; /** * Whether direct SQL can be used with the current datastore backing {@link #pm}. */ private final boolean isCompatibleDatastore; - - public MetaStoreDirectSql(PersistenceManager pm) { + + public MetaStoreDirectSql(PersistenceManager pm, Configuration conf) { this.pm = pm; - Transaction tx = pm.currentTransaction(); - tx.begin(); - boolean isMySql = false; + this.dbType = determineDbType(); + int batchSize = HiveConf.getIntVar(conf, ConfVars.METASTORE_DIRECT_SQL_PARTITION_BATCH_SIZE); + if (batchSize == DETECT_BATCHING) { + batchSize = (dbType == DB.ORACLE || dbType == DB.MSSQL) ? 1000 : NO_BATCHING; + } + this.batchSize = batchSize; + + this.isCompatibleDatastore = ensureDbInit() && runTestQuery(); + if (isCompatibleDatastore) { + LOG.info("Using direct SQL, underlying DB is " + dbType); + } + } + + private DB determineDbType() { + DB dbType = DB.OTHER; + if (runDbCheck("SET @@session.sql_mode=ANSI_QUOTES", "MySql")) { + dbType = DB.MYSQL; + } else if (runDbCheck("SELECT version FROM v$instance", "Oracle")) { + dbType = DB.ORACLE; + } else if (runDbCheck("SELECT @@version", "MSSQL")) { + dbType = DB.MSSQL; + } else { + // TODO: maybe we should use getProductName to identify all the DBs + String productName = getProductName(); + if (productName != null && productName.toLowerCase().contains("derby")) { + dbType = DB.DERBY; + } + } + return dbType; + } + + private String getProductName() { + JDOConnection jdoConn = pm.getDataStoreConnection(); try { - trySetAnsiQuotesForMysql(); - isMySql = true; - } catch (SQLException sqlEx) { - LOG.info("MySQL check failed, assuming we are not on mysql: " + sqlEx.getMessage()); - tx.rollback(); - tx = pm.currentTransaction(); - tx.begin(); + return ((Connection)jdoConn.getNativeConnection()).getMetaData().getDatabaseProductName(); + } catch (Throwable t) { + LOG.warn("Error retrieving product name", t); + return null; + } finally { + jdoConn.close(); // We must release the connection before we call other pm methods. } + } - boolean isCompatibleDatastore = true; + private boolean ensureDbInit() { + Transaction tx = pm.currentTransaction(); try { // Force the underlying db to initialize. pm.newQuery(MDatabase.class, "name == ''").execute(); pm.newQuery(MTableColumnStatistics.class, "dbName == ''").execute(); pm.newQuery(MPartitionColumnStatistics.class, "dbName == ''").execute(); + return true; } catch (Exception ex) { - isCompatibleDatastore = false; - LOG.error("Database initialization failed; direct SQL is disabled", ex); + LOG.warn("Database initialization failed; direct SQL is disabled", ex); tx.rollback(); + return false; } - if (isCompatibleDatastore) { - // Self-test query. If it doesn't work, we will self-disable. What a PITA... - String selfTestQuery = "select \"DB_ID\" from \"DBS\""; - try { - pm.newQuery("javax.jdo.query.SQL", selfTestQuery).execute(); - tx.commit(); - } catch (Exception ex) { - isCompatibleDatastore = false; - LOG.error("Self-test query [" + selfTestQuery + "] failed; direct SQL is disabled", ex); - tx.rollback(); - } - } + } - this.isCompatibleDatastore = isCompatibleDatastore; - this.isMySql = isMySql; + private boolean runTestQuery() { + Transaction tx = pm.currentTransaction(); + // Run a self-test query. If it doesn't work, we will self-disable. What a PITA... + String selfTestQuery = "select \"DB_ID\" from \"DBS\""; + try { + pm.newQuery("javax.jdo.query.SQL", selfTestQuery).execute(); + tx.commit(); + return true; + } catch (Exception ex) { + LOG.warn("Self-test query [" + selfTestQuery + "] failed; direct SQL is disabled", ex); + tx.rollback(); + return false; + } } public boolean isCompatibleDatastore() { @@ -150,22 +199,16 @@ class MetaStoreDirectSql { * here - for eg., for MySQL, we signal that we want to use ANSI SQL quoting behaviour */ private void doDbSpecificInitializationsBeforeQuery() throws MetaException { - if (!isMySql) return; + if (dbType != DB.MYSQL) return; try { assert pm.currentTransaction().isActive(); // must be inside tx together with queries - trySetAnsiQuotesForMysql(); + executeNoResult("SET @@session.sql_mode=ANSI_QUOTES"); } catch (SQLException sqlEx) { throw new MetaException("Error setting ansi quotes: " + sqlEx.getMessage()); } } - /** - * MySQL, by default, doesn't recognize ANSI quotes which we need to have for Postgres. - * Try to set the ANSI quotes mode on for the session. Due to connection pooling, needs - * to be called in the same transaction as the actual queries. - */ - private void trySetAnsiQuotesForMysql() throws SQLException { - final String queryText = "SET @@session.sql_mode=ANSI_QUOTES"; + private void executeNoResult(final String queryText) throws SQLException { JDOConnection jdoConn = pm.getDataStoreConnection(); boolean doTrace = LOG.isDebugEnabled(); try { @@ -177,6 +220,23 @@ class MetaStoreDirectSql { } } + private boolean runDbCheck(String queryText, String name) { + Transaction tx = pm.currentTransaction(); + if (!tx.isActive()) { + tx.begin(); + } + try { + executeNoResult(queryText); + return true; + } catch (Throwable t) { + LOG.debug(name + " check failed, assuming we are not on " + name + ": " + t.getMessage()); + tx.rollback(); + tx = pm.currentTransaction(); + tx.begin(); + return false; + } + } + public Database getDatabase(String dbName) throws MetaException{ Query queryDbSelector = null; Query queryDbParams = null; @@ -197,8 +257,8 @@ class MetaStoreDirectSql { + " with param [" + params[0] + "]"); } - @SuppressWarnings("unchecked") - List<Object[]> sqlResult = (List<Object[]>)queryDbSelector.executeWithArray(params); + List<Object[]> sqlResult = executeWithArray( + queryDbSelector, params, queryTextDbSelector); if ((sqlResult == null) || sqlResult.isEmpty()) { return null; } @@ -209,7 +269,7 @@ class MetaStoreDirectSql { } Object[] dbline = sqlResult.get(0); - Long dbid = StatObjectConverter.extractSqlLong(dbline[0]); + Long dbid = extractSqlLong(dbline[0]); String queryTextDbParams = "select \"PARAM_KEY\", \"PARAM_VALUE\" " + " FROM \"DATABASE_PARAMS\" " @@ -223,10 +283,11 @@ class MetaStoreDirectSql { } Map<String,String> dbParams = new HashMap<String,String>(); - List<Object[]> sqlResult2 = ensureList(queryDbParams.executeWithArray(params)); + List<Object[]> sqlResult2 = ensureList(executeWithArray( + queryDbParams, params, queryTextDbParams)); if (!sqlResult2.isEmpty()) { for (Object[] line : sqlResult2) { - dbParams.put(extractSqlString(line[0]),extractSqlString(line[1])); + dbParams.put(extractSqlString(line[0]), extractSqlString(line[1])); } } Database db = new Database(); @@ -256,20 +317,20 @@ class MetaStoreDirectSql { /** * Gets partitions by using direct SQL queries. + * Note that batching is not needed for this method - list of names implies the batch size; * @param dbName Metastore db name. * @param tblName Metastore table name. * @param partNames Partition names to get. - * @param max The maximum number of partitions to return. * @return List of partitions. */ public List<Partition> getPartitionsViaSqlFilter( - String dbName, String tblName, List<String> partNames, Integer max) throws MetaException { + String dbName, String tblName, List<String> partNames) throws MetaException { if (partNames.isEmpty()) { return new ArrayList<Partition>(); } return getPartitionsViaSqlFilterInternal(dbName, tblName, null, "\"PARTITIONS\".\"PART_NAME\" in (" + makeParams(partNames.size()) + ")", - partNames, new ArrayList<String>(), max); + partNames, new ArrayList<String>(), null); } /** @@ -284,12 +345,16 @@ class MetaStoreDirectSql { assert tree != null; List<Object> params = new ArrayList<Object>(); List<String> joins = new ArrayList<String>(); - String sqlFilter = PartitionFilterGenerator.generateSqlFilter(table, tree, params, joins); + // Derby and Oracle do not interpret filters ANSI-properly in some cases and need a workaround. + boolean dbHasJoinCastBug = (dbType == DB.DERBY || dbType == DB.ORACLE); + String sqlFilter = PartitionFilterGenerator.generateSqlFilter( + table, tree, params, joins, dbHasJoinCastBug); if (sqlFilter == null) { return null; // Cannot make SQL filter to push down. } + Boolean isViewTable = isViewTable(table); return getPartitionsViaSqlFilterInternal(table.getDbName(), table.getTableName(), - isViewTable(table), sqlFilter, params, joins, max); + isViewTable, sqlFilter, params, joins, max); } /** @@ -317,7 +382,7 @@ class MetaStoreDirectSql { Object[] params = new Object[] { tblName, dbName }; Query query = pm.newQuery("javax.jdo.query.SQL", queryText); query.setUnique(true); - Object result = query.executeWithArray(params); + Object result = executeWithArray(query, params, queryText); return (result != null) && result.toString().equals(TableType.VIRTUAL_VIEW.toString()); } @@ -374,26 +439,46 @@ class MetaStoreDirectSql { if (max != null) { query.setRange(0, max.shortValue()); } - @SuppressWarnings("unchecked") - List<Object> sqlResult = (List<Object>)query.executeWithArray(params); + List<Object> sqlResult = executeWithArray(query, params, queryText); long queryTime = doTrace ? System.nanoTime() : 0; if (sqlResult.isEmpty()) { timingTrace(doTrace, queryText, start, queryTime); return new ArrayList<Partition>(); // no partitions, bail early. } + // Get full objects. For Oracle, do it in batches. + List<Partition> result = null; + if (batchSize != NO_BATCHING && batchSize < sqlResult.size()) { + result = new ArrayList<Partition>(sqlResult.size()); + while (result.size() < sqlResult.size()) { + int toIndex = Math.min(result.size() + batchSize, sqlResult.size()); + List<Object> batchedSqlResult = sqlResult.subList(result.size(), toIndex); + result.addAll(getPartitionsFromPartitionIds(dbName, tblName, isView, batchedSqlResult)); + } + } else { + result = getPartitionsFromPartitionIds(dbName, tblName, isView, sqlResult); + } + + timingTrace(doTrace, queryText, start, queryTime); + query.closeAll(); + return result; + } + + private List<Partition> getPartitionsFromPartitionIds(String dbName, String tblName, + Boolean isView, List<Object> partIdList) throws MetaException { + boolean doTrace = LOG.isDebugEnabled(); + int idStringWidth = (int)Math.ceil(Math.log10(partIdList.size())) + 1; // 1 for comma + int sbCapacity = partIdList.size() * idStringWidth; // Prepare StringBuilder for "PART_ID in (...)" to use in future queries. - int sbCapacity = sqlResult.size() * 7; // if there are 100k things => 6 chars, plus comma StringBuilder partSb = new StringBuilder(sbCapacity); - // Assume db and table names are the same for all partition, that's what we're selecting for. - for (Object partitionId : sqlResult) { - partSb.append(StatObjectConverter.extractSqlLong(partitionId)).append(","); + for (Object partitionId : partIdList) { + partSb.append(extractSqlLong(partitionId)).append(","); } String partIds = trimCommaList(partSb); - timingTrace(doTrace, queryText, start, queryTime); - // Now get most of the other fields. - queryText = + // Get most of the fields for the IDs provided. + // Assume db and table names are the same for all partition, as provided in arguments. + String queryText = "select \"PARTITIONS\".\"PART_ID\", \"SDS\".\"SD_ID\", \"SDS\".\"CD_ID\"," + " \"SERDES\".\"SERDE_ID\", \"PARTITIONS\".\"CREATE_TIME\"," + " \"PARTITIONS\".\"LAST_ACCESS_TIME\", \"SDS\".\"INPUT_FORMAT\", \"SDS\".\"IS_COMPRESSED\"," @@ -403,11 +488,11 @@ class MetaStoreDirectSql { + " left outer join \"SDS\" on \"PARTITIONS\".\"SD_ID\" = \"SDS\".\"SD_ID\" " + " left outer join \"SERDES\" on \"SDS\".\"SERDE_ID\" = \"SERDES\".\"SERDE_ID\" " + "where \"PART_ID\" in (" + partIds + ") order by \"PART_NAME\" asc"; - start = doTrace ? System.nanoTime() : 0; - query = pm.newQuery("javax.jdo.query.SQL", queryText); + long start = doTrace ? System.nanoTime() : 0; + Query query = pm.newQuery("javax.jdo.query.SQL", queryText); @SuppressWarnings("unchecked") - List<Object[]> sqlResult2 = (List<Object[]>)query.executeWithArray(params); - queryTime = doTrace ? System.nanoTime() : 0; + List<Object[]> sqlResult = executeWithArray(query, null, queryText); + long queryTime = doTrace ? System.nanoTime() : 0; // Read all the fields and create partitions, SDs and serdes. TreeMap<Long, Partition> partitions = new TreeMap<Long, Partition>(); @@ -415,19 +500,19 @@ class MetaStoreDirectSql { TreeMap<Long, SerDeInfo> serdes = new TreeMap<Long, SerDeInfo>(); TreeMap<Long, List<FieldSchema>> colss = new TreeMap<Long, List<FieldSchema>>(); // Keep order by name, consistent with JDO. - ArrayList<Partition> orderedResult = new ArrayList<Partition>(sqlResult.size()); + ArrayList<Partition> orderedResult = new ArrayList<Partition>(partIdList.size()); // Prepare StringBuilder-s for "in (...)" lists to use in one-to-many queries. StringBuilder sdSb = new StringBuilder(sbCapacity), serdeSb = new StringBuilder(sbCapacity); StringBuilder colsSb = new StringBuilder(7); // We expect that there's only one field schema. tblName = tblName.toLowerCase(); dbName = dbName.toLowerCase(); - for (Object[] fields : sqlResult2) { + for (Object[] fields : sqlResult) { // Here comes the ugly part... - long partitionId = StatObjectConverter.extractSqlLong(fields[0]); - Long sdId = StatObjectConverter.extractSqlLong(fields[1]); - Long colId = StatObjectConverter.extractSqlLong(fields[2]); - Long serdeId = StatObjectConverter.extractSqlLong(fields[3]); + long partitionId = extractSqlLong(fields[0]); + Long sdId = extractSqlLong(fields[1]); + Long colId = extractSqlLong(fields[2]); + Long serdeId = extractSqlLong(fields[3]); // A partition must have either everything set, or nothing set if it's a view. if (sdId == null || colId == null || serdeId == null) { if (isView == null) { @@ -596,7 +681,7 @@ class MetaStoreDirectSql { currentListId = null; t.getSkewedInfo().addToSkewedColValues(new ArrayList<String>()); } else { - long fieldsListId = StatObjectConverter.extractSqlLong(fields[1]); + long fieldsListId = extractSqlLong(fields[1]); if (currentListId == null || fieldsListId != currentListId) { currentList = new ArrayList<String>(); currentListId = fieldsListId; @@ -638,7 +723,7 @@ class MetaStoreDirectSql { currentList = new ArrayList<String>(); // left outer join produced a list with no values currentListId = null; } else { - long fieldsListId = StatObjectConverter.extractSqlLong(fields[1]); + long fieldsListId = extractSqlLong(fields[1]); if (currentListId == null || fieldsListId != currentListId) { currentList = new ArrayList<String>(); currentListId = fieldsListId; @@ -683,6 +768,14 @@ class MetaStoreDirectSql { (System.nanoTime() - queryTime) / 1000000.0 + "ms, the query is [" + queryText + "]"); } + static Long extractSqlLong(Object obj) throws MetaException { + if (obj == null) return null; + if (!(obj instanceof Number)) { + throw new MetaException("Expected numeric type but got " + obj.getClass().getName()); + } + return ((Number)obj).longValue(); + } + private static Boolean extractSqlBoolean(Object value) throws MetaException { // MySQL has booleans, but e.g. Derby uses 'Y'/'N' mapping. People using derby probably // don't care about performance anyway, but let's cover the common case. @@ -749,7 +842,7 @@ class MetaStoreDirectSql { if (fields == null) { fields = iter.next(); } - long nestedId = StatObjectConverter.extractSqlLong(fields[keyIndex]); + long nestedId = extractSqlLong(fields[keyIndex]); if (nestedId < id) throw new MetaException("Found entries for unknown ID " + nestedId); if (nestedId > id) break; // fields belong to one of the next entries func.apply(entry.getValue(), fields); @@ -767,12 +860,14 @@ class MetaStoreDirectSql { private final FilterBuilder filterBuffer; private final List<Object> params; private final List<String> joins; + private final boolean dbHasJoinCastBug; private PartitionFilterGenerator( - Table table, List<Object> params, List<String> joins) { + Table table, List<Object> params, List<String> joins, boolean dbHasJoinCastBug) { this.table = table; this.params = params; this.joins = joins; + this.dbHasJoinCastBug = dbHasJoinCastBug; this.filterBuffer = new FilterBuilder(false); } @@ -783,13 +878,14 @@ class MetaStoreDirectSql { * @param joins the joins necessary for the resulting expression * @return the string representation of the expression tree */ - public static String generateSqlFilter(Table table, - ExpressionTree tree, List<Object> params, List<String> joins) throws MetaException { + private static String generateSqlFilter(Table table, ExpressionTree tree, + List<Object> params, List<String> joins, boolean dbHasJoinCastBug) throws MetaException { assert table != null; if (tree.getRoot() == null) { return ""; } - PartitionFilterGenerator visitor = new PartitionFilterGenerator(table, params, joins); + PartitionFilterGenerator visitor = new PartitionFilterGenerator( + table, params, joins, dbHasJoinCastBug); tree.accept(visitor); if (visitor.filterBuffer.hasError()) { LOG.info("Unable to push down SQL filter: " + visitor.filterBuffer.getErrorMessage()); @@ -928,11 +1024,15 @@ class MetaStoreDirectSql { tableValue = "cast(" + tableValue + " as date)"; } - // This is a workaround for DERBY-6358; as such, it is pretty horrible. - tableValue = "(case when \"TBLS\".\"TBL_NAME\" = ? and \"DBS\".\"NAME\" = ? then " - + tableValue + " else null end)"; - params.add(table.getTableName().toLowerCase()); - params.add(table.getDbName().toLowerCase()); + if (dbHasJoinCastBug) { + // This is a workaround for DERBY-6358 and Oracle bug; it is pretty horrible. + tableValue = "(case when \"TBLS\".\"TBL_NAME\" = ? and \"DBS\".\"NAME\" = ? and " + + "\"FILTER" + partColIndex + "\".\"PART_ID\" = \"PARTITIONS\".\"PART_ID\" and " + + "\"FILTER" + partColIndex + "\".\"INTEGER_IDX\" = " + partColIndex + " then " + + tableValue + " else null end)"; + params.add(table.getTableName().toLowerCase()); + params.add(table.getDbName().toLowerCase()); + } } if (!node.isReverseOrder) { params.add(nodeValue); @@ -961,7 +1061,7 @@ class MetaStoreDirectSql { for (int i = 0; i < colNames.size(); ++i) { params[i + 2] = colNames.get(i); } - Object qResult = query.executeWithArray(params); + Object qResult = executeWithArray(query, params, queryText); long queryTime = doTrace ? System.nanoTime() : 0; if (qResult == null) { query.closeAll(); @@ -978,8 +1078,7 @@ class MetaStoreDirectSql { public AggrStats aggrColStatsForPartitions(String dbName, String tableName, List<String> partNames, List<String> colNames) throws MetaException { - long partsFound = partsFoundForPartitions(dbName, tableName, partNames, - colNames); + long partsFound = partsFoundForPartitions(dbName, tableName, partNames, colNames); List<ColumnStatisticsObj> stats = columnStatisticsObjForPartitions(dbName, tableName, partNames, colNames, partsFound); return new AggrStats(stats, partsFound); @@ -989,21 +1088,21 @@ class MetaStoreDirectSql { List<String> partNames, List<String> colNames) throws MetaException { long partsFound = 0; boolean doTrace = LOG.isDebugEnabled(); - String qText = "select count(\"COLUMN_NAME\") from \"PART_COL_STATS\"" + String queryText = "select count(\"COLUMN_NAME\") from \"PART_COL_STATS\"" + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? " + " and \"COLUMN_NAME\" in (" + makeParams(colNames.size()) + ")" + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + " group by \"PARTITION_NAME\""; long start = doTrace ? System.nanoTime() : 0; - Query query = pm.newQuery("javax.jdo.query.SQL", qText); - Object qResult = query.executeWithArray(prepareParams(dbName, tableName, - partNames, colNames)); + Query query = pm.newQuery("javax.jdo.query.SQL", queryText); + Object qResult = executeWithArray(query, prepareParams( + dbName, tableName, partNames, colNames), queryText); long end = doTrace ? System.nanoTime() : 0; - timingTrace(doTrace, qText, start, end); + timingTrace(doTrace, queryText, start, end); ForwardQueryResult fqr = (ForwardQueryResult) qResult; Iterator<?> iter = fqr.iterator(); while (iter.hasNext()) { - if (StatObjectConverter.extractSqlLong(iter.next()) == colNames.size()) { + if (extractSqlLong(iter.next()) == colNames.size()) { partsFound++; } } @@ -1013,12 +1112,14 @@ class MetaStoreDirectSql { private List<ColumnStatisticsObj> columnStatisticsObjForPartitions( String dbName, String tableName, List<String> partNames, List<String> colNames, long partsFound) throws MetaException { + // TODO: all the extrapolation logic should be moved out of this class, + // only mechanical data retrieval should remain here. String commonPrefix = "select \"COLUMN_NAME\", \"COLUMN_TYPE\", " + "min(\"LONG_LOW_VALUE\"), max(\"LONG_HIGH_VALUE\"), min(\"DOUBLE_LOW_VALUE\"), max(\"DOUBLE_HIGH_VALUE\"), " + "min(\"BIG_DECIMAL_LOW_VALUE\"), max(\"BIG_DECIMAL_HIGH_VALUE\"), sum(\"NUM_NULLS\"), max(\"NUM_DISTINCTS\"), " + "max(\"AVG_COL_LEN\"), max(\"MAX_COL_LEN\"), sum(\"NUM_TRUES\"), sum(\"NUM_FALSES\") from \"PART_COL_STATS\"" + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? "; - String qText = null; + String queryText = null; long start = 0; long end = 0; Query query = null; @@ -1028,20 +1129,20 @@ class MetaStoreDirectSql { // Check if the status of all the columns of all the partitions exists // Extrapolation is not needed. if (partsFound == partNames.size()) { - qText = commonPrefix + queryText = commonPrefix + " and \"COLUMN_NAME\" in (" + makeParams(colNames.size()) + ")" + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + " group by \"COLUMN_NAME\", \"COLUMN_TYPE\""; start = doTrace ? System.nanoTime() : 0; - query = pm.newQuery("javax.jdo.query.SQL", qText); - qResult = query.executeWithArray(prepareParams(dbName, tableName, - partNames, colNames)); + query = pm.newQuery("javax.jdo.query.SQL", queryText); + qResult = executeWithArray(query, prepareParams( + dbName, tableName, partNames, colNames), queryText); if (qResult == null) { query.closeAll(); return Lists.newArrayList(); } end = doTrace ? System.nanoTime() : 0; - timingTrace(doTrace, qText, start, end); + timingTrace(doTrace, queryText, start, end); List<Object[]> list = ensureList(qResult); List<ColumnStatisticsObj> colStats = new ArrayList<ColumnStatisticsObj>( list.size()); @@ -1056,18 +1157,18 @@ class MetaStoreDirectSql { // We need to extrapolate this partition based on the other partitions List<ColumnStatisticsObj> colStats = new ArrayList<ColumnStatisticsObj>( colNames.size()); - qText = "select \"COLUMN_NAME\", \"COLUMN_TYPE\", count(\"PARTITION_NAME\") " + queryText = "select \"COLUMN_NAME\", \"COLUMN_TYPE\", count(\"PARTITION_NAME\") " + " from \"PART_COL_STATS\"" + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? " + " and \"COLUMN_NAME\" in (" + makeParams(colNames.size()) + ")" + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + " group by \"COLUMN_NAME\", \"COLUMN_TYPE\""; start = doTrace ? System.nanoTime() : 0; - query = pm.newQuery("javax.jdo.query.SQL", qText); - qResult = query.executeWithArray(prepareParams(dbName, tableName, - partNames, colNames)); + query = pm.newQuery("javax.jdo.query.SQL", queryText); + qResult = executeWithArray(query, prepareParams( + dbName, tableName, partNames, colNames), queryText); end = doTrace ? System.nanoTime() : 0; - timingTrace(doTrace, qText, start, end); + timingTrace(doTrace, queryText, start, end); if (qResult == null) { query.closeAll(); return Lists.newArrayList(); @@ -1082,7 +1183,7 @@ class MetaStoreDirectSql { // count(\"PARTITION_NAME\")==partNames.size() // Or, extrapolation is not possible for this column if // count(\"PARTITION_NAME\")<2 - Long count = StatObjectConverter.extractSqlLong(row[2]); + Long count = extractSqlLong(row[2]); if (count == partNames.size() || count < 2) { noExtraColumnNames.add(colName); } else { @@ -1092,14 +1193,14 @@ class MetaStoreDirectSql { query.closeAll(); // Extrapolation is not needed for columns noExtraColumnNames if (noExtraColumnNames.size() != 0) { - qText = commonPrefix + queryText = commonPrefix + " and \"COLUMN_NAME\" in ("+ makeParams(noExtraColumnNames.size()) + ")" + " and \"PARTITION_NAME\" in ("+ makeParams(partNames.size()) +")" + " group by \"COLUMN_NAME\", \"COLUMN_TYPE\""; start = doTrace ? System.nanoTime() : 0; - query = pm.newQuery("javax.jdo.query.SQL", qText); - qResult = query.executeWithArray(prepareParams(dbName, tableName, - partNames, noExtraColumnNames)); + query = pm.newQuery("javax.jdo.query.SQL", queryText); + qResult = executeWithArray(query, prepareParams( + dbName, tableName, partNames, noExtraColumnNames), queryText); if (qResult == null) { query.closeAll(); return Lists.newArrayList(); @@ -1109,7 +1210,7 @@ class MetaStoreDirectSql { colStats.add(prepareCSObj(row, 0)); } end = doTrace ? System.nanoTime() : 0; - timingTrace(doTrace, qText, start, end); + timingTrace(doTrace, queryText, start, end); query.closeAll(); } // Extrapolation is needed for extraColumnNames. @@ -1121,18 +1222,18 @@ class MetaStoreDirectSql { } // get sum for all columns to reduce the number of queries Map<String, Map<Integer, Object>> sumMap = new HashMap<String, Map<Integer, Object>>(); - qText = "select \"COLUMN_NAME\", sum(\"NUM_NULLS\"), sum(\"NUM_TRUES\"), sum(\"NUM_FALSES\")" + queryText = "select \"COLUMN_NAME\", sum(\"NUM_NULLS\"), sum(\"NUM_TRUES\"), sum(\"NUM_FALSES\")" + " from \"PART_COL_STATS\"" + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? " + " and \"COLUMN_NAME\" in (" +makeParams(extraColumnNameTypeParts.size())+ ")" + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + " group by \"COLUMN_NAME\""; start = doTrace ? System.nanoTime() : 0; - query = pm.newQuery("javax.jdo.query.SQL", qText); + query = pm.newQuery("javax.jdo.query.SQL", queryText); List<String> extraColumnNames = new ArrayList<String>(); extraColumnNames.addAll(extraColumnNameTypeParts.keySet()); - qResult = query.executeWithArray(prepareParams(dbName, tableName, - partNames, extraColumnNames)); + qResult = executeWithArray(query, prepareParams( + dbName, tableName, partNames, extraColumnNames), queryText); if (qResult == null) { query.closeAll(); return Lists.newArrayList(); @@ -1148,7 +1249,7 @@ class MetaStoreDirectSql { sumMap.put((String) row[0], indexToObject); } end = doTrace ? System.nanoTime() : 0; - timingTrace(doTrace, qText, start, end); + timingTrace(doTrace, queryText, start, end); query.closeAll(); for (Map.Entry<String, String[]> entry : extraColumnNameTypeParts .entrySet()) { @@ -1177,23 +1278,23 @@ class MetaStoreDirectSql { if (o == null) { row[2 + colStatIndex] = null; } else { - Long val = StatObjectConverter.extractSqlLong(o); + Long val = extractSqlLong(o); row[2 + colStatIndex] = (Long) (val / sumVal * (partNames.size())); } } else { // if the aggregation type is min/max, we extrapolate from the // left/right borders - qText = "select \"" + queryText = "select \"" + colStatName + "\",\"PARTITION_NAME\" from \"PART_COL_STATS\"" + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ?" - + " and \"COLUMN_NAME\" in (" +makeParams(1)+ ")" + + " and \"COLUMN_NAME\" = ?" + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + " order by \'" + colStatName + "\'"; start = doTrace ? System.nanoTime() : 0; - query = pm.newQuery("javax.jdo.query.SQL", qText); - qResult = query.executeWithArray(prepareParams(dbName, - tableName, partNames, Arrays.asList(colName))); + query = pm.newQuery("javax.jdo.query.SQL", queryText); + qResult = executeWithArray(query, prepareParams( + dbName, tableName, partNames, Arrays.asList(colName)), queryText); if (qResult == null) { query.closeAll(); return Lists.newArrayList(); @@ -1202,7 +1303,7 @@ class MetaStoreDirectSql { Object[] min = (Object[]) (fqr.get(0)); Object[] max = (Object[]) (fqr.get(fqr.size() - 1)); end = doTrace ? System.nanoTime() : 0; - timingTrace(doTrace, qText, start, end); + timingTrace(doTrace, queryText, start, end); query.closeAll(); if (min[0] == null || max[0] == null) { row[2 + colStatIndex] = null; @@ -1260,7 +1361,8 @@ class MetaStoreDirectSql { + makeParams(partNames.size()) + ") order by \"PARTITION_NAME\""; Query query = pm.newQuery("javax.jdo.query.SQL", queryText); - Object qResult = query.executeWithArray(prepareParams(dbName, tableName, partNames, colNames)); + Object qResult = executeWithArray(query, prepareParams( + dbName, tableName, partNames, colNames), queryText); long queryTime = doTrace ? System.nanoTime() : 0; if (qResult == null) { query.closeAll(); @@ -1306,8 +1408,8 @@ class MetaStoreDirectSql { // LastAnalyzed is stored per column but thrift has it per several; // get the lowest for now as nobody actually uses this field. Object laObj = row[offset + 14]; - if (laObj != null && (!csd.isSetLastAnalyzed() || csd.getLastAnalyzed() > StatObjectConverter.extractSqlLong(laObj))) { - csd.setLastAnalyzed(StatObjectConverter.extractSqlLong(laObj)); + if (laObj != null && (!csd.isSetLastAnalyzed() || csd.getLastAnalyzed() > extractSqlLong(laObj))) { + csd.setLastAnalyzed(extractSqlLong(laObj)); } csos.add(prepareCSObj(row, offset)); } @@ -1327,4 +1429,23 @@ class MetaStoreDirectSql { // W/ size 0, query will fail, but at least we'd get to see the query in debug output. return (size == 0) ? "" : repeat(",?", size).substring(1); } + + @SuppressWarnings("unchecked") + private <T> T executeWithArray(Query query, Object[] params, String sql) throws MetaException { + try { + return (T)((params == null) ? query.execute() : query.executeWithArray(params)); + } catch (Exception ex) { + String error = "Failed to execute [" + sql + "] with parameters ["; + if (params != null) { + boolean isFirst = true; + for (Object param : params) { + error += (isFirst ? "" : ", ") + param; + isFirst = false; + } + } + LOG.warn(error + "]", ex); + // We just logged an exception with (in case of JDO) a humongous callstack. Make a new one. + throw new MetaException("See previous errors; " + ex.getMessage()); + } + } }
Modified: hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreThread.java URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreThread.java?rev=1640263&r1=1640262&r2=1640263&view=diff ============================================================================== --- hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreThread.java (original) +++ hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreThread.java Tue Nov 18 00:48:40 2014 @@ -20,6 +20,8 @@ package org.apache.hadoop.hive.metastore import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.MetaException; +import java.util.concurrent.atomic.AtomicBoolean; + /** * A thread that runs in the metastore, separate from the threads in the thrift service. */ @@ -49,21 +51,12 @@ public interface MetaStoreThread { * thread should then assure that the loop has been gone completely through at * least once. */ - void init(BooleanPointer stop, BooleanPointer looped) throws MetaException; + void init(AtomicBoolean stop, AtomicBoolean looped) throws MetaException; /** * Run the thread in the background. This must not be called until - * {@link #init(org.apache.hadoop.hive.metastore.MetaStoreThread.BooleanPointer)} has + * {@link ##init(java.util.concurrent.atomic.AtomicBoolean, java.util.concurrent.atomic.AtomicBoolean)} has * been called. */ void start(); - - class BooleanPointer { - public boolean boolVal; - - public BooleanPointer() { - boolVal = false; - } - } - } Modified: hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java?rev=1640263&r1=1640262&r2=1640263&view=diff ============================================================================== --- hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java (original) +++ hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java Tue Nov 18 00:48:40 2014 @@ -132,6 +132,8 @@ import org.apache.hadoop.hive.metastore. import org.apache.hadoop.hive.metastore.parser.ExpressionTree.Operator; import org.apache.hadoop.hive.metastore.parser.FilterLexer; import org.apache.hadoop.hive.metastore.parser.FilterParser; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy; import org.apache.hadoop.util.StringUtils; @@ -265,7 +267,7 @@ public class ObjectStore implements RawS isInitialized = pm != null; if (isInitialized) { expressionProxy = createExpressionProxy(hiveConf); - directSql = new MetaStoreDirectSql(pm); + directSql = new MetaStoreDirectSql(pm, hiveConf); } LOG.debug("RawStore: " + this + ", with PersistenceManager: " + pm + " created in the thread with id: " + Thread.currentThread().getId()); @@ -1999,7 +2001,7 @@ public class ObjectStore implements RawS return new GetListHelper<Partition>(dbName, tblName, allowSql, allowJdo) { @Override protected List<Partition> getSqlResult(GetHelper<List<Partition>> ctx) throws MetaException { - return directSql.getPartitionsViaSqlFilter(dbName, tblName, partNames, null); + return directSql.getPartitionsViaSqlFilter(dbName, tblName, partNames); } @Override protected List<Partition> getJdoResult( @@ -2052,7 +2054,7 @@ public class ObjectStore implements RawS List<String> partNames = new LinkedList<String>(); hasUnknownPartitions.set(getPartitionNamesPrunedByExprNoTxn( ctx.getTable(), expr, defaultPartitionName, maxParts, partNames)); - result = directSql.getPartitionsViaSqlFilter(dbName, tblName, partNames, null); + result = directSql.getPartitionsViaSqlFilter(dbName, tblName, partNames); } return result; } @@ -2136,14 +2138,16 @@ public class ObjectStore implements RawS result.addAll(getPartitionNamesNoTxn( table.getDbName(), table.getTableName(), maxParts)); List<String> columnNames = new ArrayList<String>(); + List<PrimitiveTypeInfo> typeInfos = new ArrayList<PrimitiveTypeInfo>(); for (FieldSchema fs : table.getPartitionKeys()) { columnNames.add(fs.getName()); + typeInfos.add(TypeInfoFactory.getPrimitiveTypeInfo(fs.getType())); } if (defaultPartName == null || defaultPartName.isEmpty()) { defaultPartName = HiveConf.getVar(getConf(), HiveConf.ConfVars.DEFAULTPARTITIONNAME); } return expressionProxy.filterPartitionsByExpr( - columnNames, expr, defaultPartName, result); + columnNames, typeInfos, expr, defaultPartName, result); } /** Modified: hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/PartitionExpressionProxy.java URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/PartitionExpressionProxy.java?rev=1640263&r1=1640262&r2=1640263&view=diff ============================================================================== --- hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/PartitionExpressionProxy.java (original) +++ hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/PartitionExpressionProxy.java Tue Nov 18 00:48:40 2014 @@ -21,6 +21,7 @@ package org.apache.hadoop.hive.metastore import java.util.List; import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; /** * The proxy interface that metastore uses to manipulate and apply @@ -37,12 +38,14 @@ public interface PartitionExpressionProx /** * Filters the partition names via serialized Hive expression. - * @param columnNames Partition column names in the underlying table. + * @param partColumnNames Partition column names in the underlying table. + * @param partColumnTypeInfos Partition column types in the underlying table * @param expr Serialized expression. * @param defaultPartitionName Default partition name from job or server configuration. * @param partitionNames Partition names; the list is modified in place. * @return Whether there were any unknown partitions preserved in the name list. */ - public boolean filterPartitionsByExpr(List<String> columnNames, byte[] expr, + public boolean filterPartitionsByExpr(List<String> partColumnNames, + List<PrimitiveTypeInfo> partColumnTypeInfos, byte[] expr, String defaultPartitionName, List<String> partitionNames) throws MetaException; } Modified: hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/RetryingHMSHandler.java URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/RetryingHMSHandler.java?rev=1640263&r1=1640262&r2=1640263&view=diff ============================================================================== --- hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/RetryingHMSHandler.java (original) +++ hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/RetryingHMSHandler.java Tue Nov 18 00:48:40 2014 @@ -34,6 +34,7 @@ import org.apache.hadoop.hive.common.cla import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.datanucleus.exceptions.NucleusException; @InterfaceAudience.Private @InterfaceStability.Evolving @@ -132,8 +133,9 @@ public class RetryingHMSHandler implemen } throw e.getCause(); } else if (e.getCause() instanceof MetaException && e.getCause().getCause() != null - && e.getCause().getCause() instanceof javax.jdo.JDOException) { - // The JDOException may be wrapped further in a MetaException + && (e.getCause().getCause() instanceof javax.jdo.JDOException || + e.getCause().getCause() instanceof NucleusException)) { + // The JDOException or the Nucleus Exception may be wrapped further in a MetaException caughtException = e.getCause().getCause(); } else { LOG.error(ExceptionUtils.getStackTrace(e.getCause())); Modified: hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java?rev=1640263&r1=1640262&r2=1640263&view=diff ============================================================================== --- hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java (original) +++ hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java Tue Nov 18 00:48:40 2014 @@ -420,58 +420,58 @@ public class StatObjectConverter { colType = colType.toLowerCase(); if (colType.equals("boolean")) { BooleanColumnStatsData boolStats = new BooleanColumnStatsData(); - boolStats.setNumFalses(extractSqlLong(falses)); - boolStats.setNumTrues(extractSqlLong(trues)); - boolStats.setNumNulls(extractSqlLong(nulls)); + boolStats.setNumFalses(MetaStoreDirectSql.extractSqlLong(falses)); + boolStats.setNumTrues(MetaStoreDirectSql.extractSqlLong(trues)); + boolStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls)); data.setBooleanStats(boolStats); } else if (colType.equals("string") || colType.startsWith("varchar") || colType.startsWith("char")) { StringColumnStatsData stringStats = new StringColumnStatsData(); - stringStats.setNumNulls(extractSqlLong(nulls)); + stringStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls)); stringStats.setAvgColLen((Double)avglen); - stringStats.setMaxColLen(extractSqlLong(maxlen)); - stringStats.setNumDVs(extractSqlLong(dist)); + stringStats.setMaxColLen(MetaStoreDirectSql.extractSqlLong(maxlen)); + stringStats.setNumDVs(MetaStoreDirectSql.extractSqlLong(dist)); data.setStringStats(stringStats); } else if (colType.equals("binary")) { BinaryColumnStatsData binaryStats = new BinaryColumnStatsData(); - binaryStats.setNumNulls(extractSqlLong(nulls)); + binaryStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls)); binaryStats.setAvgColLen((Double)avglen); - binaryStats.setMaxColLen(extractSqlLong(maxlen)); + binaryStats.setMaxColLen(MetaStoreDirectSql.extractSqlLong(maxlen)); data.setBinaryStats(binaryStats); } else if (colType.equals("bigint") || colType.equals("int") || colType.equals("smallint") || colType.equals("tinyint") || colType.equals("timestamp")) { LongColumnStatsData longStats = new LongColumnStatsData(); - longStats.setNumNulls(extractSqlLong(nulls)); + longStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls)); if (lhigh != null) { - longStats.setHighValue(extractSqlLong(lhigh)); + longStats.setHighValue(MetaStoreDirectSql.extractSqlLong(lhigh)); } if (llow != null) { - longStats.setLowValue(extractSqlLong(llow)); + longStats.setLowValue(MetaStoreDirectSql.extractSqlLong(llow)); } - longStats.setNumDVs(extractSqlLong(dist)); + longStats.setNumDVs(MetaStoreDirectSql.extractSqlLong(dist)); data.setLongStats(longStats); } else if (colType.equals("double") || colType.equals("float")) { DoubleColumnStatsData doubleStats = new DoubleColumnStatsData(); - doubleStats.setNumNulls(extractSqlLong(nulls)); + doubleStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls)); if (dhigh != null) { doubleStats.setHighValue((Double)dhigh); } if (dlow != null) { doubleStats.setLowValue((Double)dlow); } - doubleStats.setNumDVs(extractSqlLong(dist)); + doubleStats.setNumDVs(MetaStoreDirectSql.extractSqlLong(dist)); data.setDoubleStats(doubleStats); } else if (colType.startsWith("decimal")) { DecimalColumnStatsData decimalStats = new DecimalColumnStatsData(); - decimalStats.setNumNulls(extractSqlLong(nulls)); + decimalStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls)); if (dechigh != null) { decimalStats.setHighValue(createThriftDecimal((String)dechigh)); } if (declow != null) { decimalStats.setLowValue(createThriftDecimal((String)declow)); } - decimalStats.setNumDVs(extractSqlLong(dist)); + decimalStats.setNumDVs(MetaStoreDirectSql.extractSqlLong(dist)); data.setDecimalStats(decimalStats); } } @@ -484,12 +484,4 @@ public class StatObjectConverter { private static String createJdoDecimalString(Decimal d) { return new BigDecimal(new BigInteger(d.getUnscaled()), d.getScale()).toString(); } - - static Long extractSqlLong(Object obj) throws MetaException { - if (obj == null) return null; - if (!(obj instanceof Number)) { - throw new MetaException("Expected numeric type but got " + obj.getClass().getName()); - } - return ((Number)obj).longValue(); - } } Modified: hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/parser/ExpressionTree.java URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/parser/ExpressionTree.java?rev=1640263&r1=1640262&r2=1640263&view=diff ============================================================================== --- hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/parser/ExpressionTree.java (original) +++ hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/parser/ExpressionTree.java Tue Nov 18 00:48:40 2014 @@ -367,20 +367,8 @@ public class ExpressionTree { partitionColumnIndex, partitionColumnCount, isOpEquals, filterBuilder); return; } - - String keyEqual = FileUtils.escapePathName(keyName) + "="; - String valString = "partitionName.substring("; - String indexOfKeyStr = ""; - if (partitionColumnIndex != 0) { - keyEqual = "/" + keyEqual; - indexOfKeyStr = "partitionName.indexOf(\"" + keyEqual + "\") + "; - valString += indexOfKeyStr; - } - valString += keyEqual.length(); - if (partitionColumnIndex != (partitionColumnCount - 1)) { - valString += ", partitionName.concat(\"/\").indexOf(\"/\", " + indexOfKeyStr + keyEqual.length() + ")"; - } - valString += ")"; + //get the value for a partition key form MPartition.values (PARTITION_KEY_VALUES) + String valString = "values.get(" + partitionColumnIndex + ")"; if (operator == Operator.LIKE) { if (isReverseOrder) { Modified: hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/partition/spec/CompositePartitionSpecProxy.java URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/partition/spec/CompositePartitionSpecProxy.java?rev=1640263&r1=1640262&r2=1640263&view=diff ============================================================================== --- hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/partition/spec/CompositePartitionSpecProxy.java (original) +++ hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/partition/spec/CompositePartitionSpecProxy.java Tue Nov 18 00:48:40 2014 @@ -1,3 +1,21 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.hadoop.hive.metastore.partition.spec; import org.apache.commons.logging.Log; Modified: hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/partition/spec/PartitionListComposingSpecProxy.java URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/partition/spec/PartitionListComposingSpecProxy.java?rev=1640263&r1=1640262&r2=1640263&view=diff ============================================================================== --- hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/partition/spec/PartitionListComposingSpecProxy.java (original) +++ hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/partition/spec/PartitionListComposingSpecProxy.java Tue Nov 18 00:48:40 2014 @@ -1,3 +1,21 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.hadoop.hive.metastore.partition.spec; import org.apache.hadoop.hive.metastore.api.MetaException; Modified: hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/partition/spec/PartitionSpecProxy.java URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/partition/spec/PartitionSpecProxy.java?rev=1640263&r1=1640262&r2=1640263&view=diff ============================================================================== --- hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/partition/spec/PartitionSpecProxy.java (original) +++ hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/partition/spec/PartitionSpecProxy.java Tue Nov 18 00:48:40 2014 @@ -1,3 +1,21 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.hadoop.hive.metastore.partition.spec; import org.apache.hadoop.hive.metastore.api.MetaException; Modified: hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/partition/spec/PartitionSpecWithSharedSDProxy.java URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/partition/spec/PartitionSpecWithSharedSDProxy.java?rev=1640263&r1=1640262&r2=1640263&view=diff ============================================================================== --- hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/partition/spec/PartitionSpecWithSharedSDProxy.java (original) +++ hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/partition/spec/PartitionSpecWithSharedSDProxy.java Tue Nov 18 00:48:40 2014 @@ -1,3 +1,21 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.hadoop.hive.metastore.partition.spec; import org.apache.hadoop.hive.metastore.api.MetaException; Modified: hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java?rev=1640263&r1=1640262&r2=1640263&view=diff ============================================================================== --- hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java (original) +++ hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java Tue Nov 18 00:48:40 2014 @@ -911,8 +911,9 @@ public class TxnHandler { // If you change this function, remove the @Ignore from TestTxnHandler.deadlockIsDetected() // to test these changes. // MySQL and MSSQL use 40001 as the state code for rollback. Postgres uses 40001 and 40P01. - // Oracle seems to return different SQLStates each time, but the message always contains - // "deadlock detected", so I've used that instead. + // Oracle seems to return different SQLStates and messages each time, + // so I've tried to capture the different error messages (there appear to be fewer different + // error messages than SQL states). // Derby and newer MySQL driver use the new SQLTransactionRollbackException if (dbProduct == null) { determineDatabaseProduct(conn); @@ -921,7 +922,8 @@ public class TxnHandler { ((dbProduct == DatabaseProduct.MYSQL || dbProduct == DatabaseProduct.POSTGRES || dbProduct == DatabaseProduct.SQLSERVER) && e.getSQLState().equals("40001")) || (dbProduct == DatabaseProduct.POSTGRES && e.getSQLState().equals("40P01")) || - (dbProduct == DatabaseProduct.ORACLE && (e.getMessage().contains("deadlock detected")))) { + (dbProduct == DatabaseProduct.ORACLE && (e.getMessage().contains("deadlock detected") + || e.getMessage().contains("can't serialize access for this transaction")))) { if (deadlockCnt++ < ALLOWED_REPEATED_DEADLOCKS) { LOG.warn("Deadlock detected in " + caller + ", trying again."); throw new DeadlockException(); Modified: hive/branches/spark/metastore/src/test/org/apache/hadoop/hive/metastore/MockPartitionExpressionForMetastore.java URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/test/org/apache/hadoop/hive/metastore/MockPartitionExpressionForMetastore.java?rev=1640263&r1=1640262&r2=1640263&view=diff ============================================================================== --- hive/branches/spark/metastore/src/test/org/apache/hadoop/hive/metastore/MockPartitionExpressionForMetastore.java (original) +++ hive/branches/spark/metastore/src/test/org/apache/hadoop/hive/metastore/MockPartitionExpressionForMetastore.java Tue Nov 18 00:48:40 2014 @@ -1,6 +1,25 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.hadoop.hive.metastore; import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import java.util.List; @@ -14,7 +33,9 @@ public class MockPartitionExpressionForM } @Override - public boolean filterPartitionsByExpr(List<String> columnNames, byte[] expr, String defaultPartitionName, List<String> partitionNames) throws MetaException { + public boolean filterPartitionsByExpr(List<String> partColumnNames, + List<PrimitiveTypeInfo> partColumnTypeInfos, byte[] expr, String defaultPartitionName, + List<String> partitionNames) throws MetaException { return false; } } Modified: hive/branches/spark/metastore/src/test/org/apache/hadoop/hive/metastore/TestHiveMetaStorePartitionSpecs.java URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/test/org/apache/hadoop/hive/metastore/TestHiveMetaStorePartitionSpecs.java?rev=1640263&r1=1640262&r2=1640263&view=diff ============================================================================== --- hive/branches/spark/metastore/src/test/org/apache/hadoop/hive/metastore/TestHiveMetaStorePartitionSpecs.java (original) +++ hive/branches/spark/metastore/src/test/org/apache/hadoop/hive/metastore/TestHiveMetaStorePartitionSpecs.java Tue Nov 18 00:48:40 2014 @@ -1,3 +1,21 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.hadoop.hive.metastore; import org.apache.hadoop.hive.conf.HiveConf; Modified: hive/branches/spark/metastore/src/test/org/apache/hadoop/hive/metastore/txn/TestTxnHandler.java URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/test/org/apache/hadoop/hive/metastore/txn/TestTxnHandler.java?rev=1640263&r1=1640262&r2=1640263&view=diff ============================================================================== --- hive/branches/spark/metastore/src/test/org/apache/hadoop/hive/metastore/txn/TestTxnHandler.java (original) +++ hive/branches/spark/metastore/src/test/org/apache/hadoop/hive/metastore/txn/TestTxnHandler.java Tue Nov 18 00:48:40 2014 @@ -35,6 +35,7 @@ import java.sql.Statement; import java.util.ArrayList; import java.util.List; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; import static junit.framework.Assert.*; @@ -1104,7 +1105,7 @@ public class TestTxnHandler { conn.commit(); txnHandler.closeDbConn(conn); - final MetaStoreThread.BooleanPointer sawDeadlock = new MetaStoreThread.BooleanPointer(); + final AtomicBoolean sawDeadlock = new AtomicBoolean(); final Connection conn1 = txnHandler.getDbConn(Connection.TRANSACTION_SERIALIZABLE); final Connection conn2 = txnHandler.getDbConn(Connection.TRANSACTION_SERIALIZABLE); @@ -1131,7 +1132,7 @@ public class TestTxnHandler { LOG.debug("Forced a deadlock, SQLState is " + e.getSQLState() + " class of " + "exception is " + e.getClass().getName() + " msg is <" + e .getMessage() + ">"); - sawDeadlock.boolVal = true; + sawDeadlock.set(true); } } conn1.rollback(); @@ -1161,7 +1162,7 @@ public class TestTxnHandler { LOG.debug("Forced a deadlock, SQLState is " + e.getSQLState() + " class of " + "exception is " + e.getClass().getName() + " msg is <" + e .getMessage() + ">"); - sawDeadlock.boolVal = true; + sawDeadlock.set(true); } } conn2.rollback(); @@ -1175,9 +1176,9 @@ public class TestTxnHandler { t2.start(); t1.join(); t2.join(); - if (sawDeadlock.boolVal) break; + if (sawDeadlock.get()) break; } - assertTrue(sawDeadlock.boolVal); + assertTrue(sawDeadlock.get()); } finally { conn1.rollback(); txnHandler.closeDbConn(conn1); Modified: hive/branches/spark/packaging/src/main/assembly/bin.xml URL: http://svn.apache.org/viewvc/hive/branches/spark/packaging/src/main/assembly/bin.xml?rev=1640263&r1=1640262&r2=1640263&view=diff ============================================================================== --- hive/branches/spark/packaging/src/main/assembly/bin.xml (original) +++ hive/branches/spark/packaging/src/main/assembly/bin.xml Tue Nov 18 00:48:40 2014 @@ -39,8 +39,9 @@ <useStrictFiltering>true</useStrictFiltering> <useTransitiveFiltering>true</useTransitiveFiltering> <excludes> + <exclude>org.apache.hadoop:*</exclude> <exclude>org.apache.hive.hcatalog:*</exclude> - <exclude>org.slf4j:*</exclude> + <exclude>org.slf4j:*</exclude> </excludes> </dependencySet> <dependencySet> @@ -328,6 +329,11 @@ <destName>hive-exec-log4j.properties.template</destName> </file> <file> + <source>${project.parent.basedir}/beeline/src/main/resources/beeline-log4j.properties</source> + <outputDirectory>conf</outputDirectory> + <destName>beeline-log4j.properties.template</destName> + </file> + <file> <source>${project.parent.basedir}/hcatalog/README.txt</source> <outputDirectory>hcatalog/share/doc/hcatalog</outputDirectory> </file> Modified: hive/branches/spark/packaging/src/main/assembly/src.xml URL: http://svn.apache.org/viewvc/hive/branches/spark/packaging/src/main/assembly/src.xml?rev=1640263&r1=1640262&r2=1640263&view=diff ============================================================================== --- hive/branches/spark/packaging/src/main/assembly/src.xml (original) +++ hive/branches/spark/packaging/src/main/assembly/src.xml Tue Nov 18 00:48:40 2014 @@ -48,6 +48,7 @@ <include>NOTICE</include> <include>CHANGELOG</include> <include>RELEASE_NOTES.txt</include> + <include>accumulo-handler/**/*</include> <include>ant/**/*</include> <include>beeline/**/*</include> <include>bin/**/*</include> Modified: hive/branches/spark/pom.xml URL: http://svn.apache.org/viewvc/hive/branches/spark/pom.xml?rev=1640263&r1=1640262&r2=1640263&view=diff ============================================================================== --- hive/branches/spark/pom.xml (original) +++ hive/branches/spark/pom.xml Tue Nov 18 00:48:40 2014 @@ -72,6 +72,9 @@ <test.warehouse.dir>${project.build.directory}/warehouse</test.warehouse.dir> <test.warehouse.scheme>pfile://</test.warehouse.scheme> + <!-- To add additional exclude patterns set this property --> + <test.excludes.additional></test.excludes.additional> + <!-- Plugin and Plugin Dependency Versions --> <ant.contrib.version>1.0b3</ant.contrib.version> <datanucleus.maven.plugin.version>3.3.0-release</datanucleus.maven.plugin.version> @@ -797,6 +800,7 @@ <exclude>**/ql/exec/vector/udf/generic/*.java</exclude> <exclude>**/TestHiveServer2Concurrency.java</exclude> <exclude>**/TestHiveMetaStore.java</exclude> + <exclude>${test.excludes.additional}</exclude> </excludes> <redirectTestOutputToFile>true</redirectTestOutputToFile> <reuseForks>false</reuseForks> @@ -881,6 +885,11 @@ <exclude>**/gen-java/**</exclude> <exclude>**/testdata/**</exclude> <exclude>**/ptest2/*.md</exclude> + <exclude>**/test/org/apache/hadoop/hive/hbase/avro/**</exclude> + <exclude>**/avro_test.avpr</exclude> + <exclude>**/xmlReport.pl</exclude> + <exclude>**/*.html</exclude> + <exclude>**/sit</exclude> </excludes> </configuration> </plugin> Modified: hive/branches/spark/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java?rev=1640263&r1=1640262&r2=1640263&view=diff ============================================================================== --- hive/branches/spark/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java (original) +++ hive/branches/spark/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java Tue Nov 18 00:48:40 2014 @@ -16333,14 +16333,32 @@ public final class OrcProto { // repeated uint32 version = 4 [packed = true]; /** * <code>repeated uint32 version = 4 [packed = true];</code> + * + * <pre> + * the version of the file format + * [0, 11] = Hive 0.11 + * [0, 12] = Hive 0.12 + * </pre> */ java.util.List<java.lang.Integer> getVersionList(); /** * <code>repeated uint32 version = 4 [packed = true];</code> + * + * <pre> + * the version of the file format + * [0, 11] = Hive 0.11 + * [0, 12] = Hive 0.12 + * </pre> */ int getVersionCount(); /** * <code>repeated uint32 version = 4 [packed = true];</code> + * + * <pre> + * the version of the file format + * [0, 11] = Hive 0.11 + * [0, 12] = Hive 0.12 + * </pre> */ int getVersion(int index); @@ -16354,6 +16372,28 @@ public final class OrcProto { */ long getMetadataLength(); + // optional uint32 writerVersion = 6; + /** + * <code>optional uint32 writerVersion = 6;</code> + * + * <pre> + * Version of the writer: + * 0 (or missing) = original + * 1 = HIVE-8732 fixed + * </pre> + */ + boolean hasWriterVersion(); + /** + * <code>optional uint32 writerVersion = 6;</code> + * + * <pre> + * Version of the writer: + * 0 (or missing) = original + * 1 = HIVE-8732 fixed + * </pre> + */ + int getWriterVersion(); + // optional string magic = 8000; /** * <code>optional string magic = 8000;</code> @@ -16483,8 +16523,13 @@ public final class OrcProto { metadataLength_ = input.readUInt64(); break; } - case 64002: { + case 48: { bitField0_ |= 0x00000010; + writerVersion_ = input.readUInt32(); + break; + } + case 64002: { + bitField0_ |= 0x00000020; magic_ = input.readBytes(); break; } @@ -16584,6 +16629,12 @@ public final class OrcProto { private java.util.List<java.lang.Integer> version_; /** * <code>repeated uint32 version = 4 [packed = true];</code> + * + * <pre> + * the version of the file format + * [0, 11] = Hive 0.11 + * [0, 12] = Hive 0.12 + * </pre> */ public java.util.List<java.lang.Integer> getVersionList() { @@ -16591,12 +16642,24 @@ public final class OrcProto { } /** * <code>repeated uint32 version = 4 [packed = true];</code> + * + * <pre> + * the version of the file format + * [0, 11] = Hive 0.11 + * [0, 12] = Hive 0.12 + * </pre> */ public int getVersionCount() { return version_.size(); } /** * <code>repeated uint32 version = 4 [packed = true];</code> + * + * <pre> + * the version of the file format + * [0, 11] = Hive 0.11 + * [0, 12] = Hive 0.12 + * </pre> */ public int getVersion(int index) { return version_.get(index); @@ -16619,6 +16682,34 @@ public final class OrcProto { return metadataLength_; } + // optional uint32 writerVersion = 6; + public static final int WRITERVERSION_FIELD_NUMBER = 6; + private int writerVersion_; + /** + * <code>optional uint32 writerVersion = 6;</code> + * + * <pre> + * Version of the writer: + * 0 (or missing) = original + * 1 = HIVE-8732 fixed + * </pre> + */ + public boolean hasWriterVersion() { + return ((bitField0_ & 0x00000010) == 0x00000010); + } + /** + * <code>optional uint32 writerVersion = 6;</code> + * + * <pre> + * Version of the writer: + * 0 (or missing) = original + * 1 = HIVE-8732 fixed + * </pre> + */ + public int getWriterVersion() { + return writerVersion_; + } + // optional string magic = 8000; public static final int MAGIC_FIELD_NUMBER = 8000; private java.lang.Object magic_; @@ -16630,7 +16721,7 @@ public final class OrcProto { * </pre> */ public boolean hasMagic() { - return ((bitField0_ & 0x00000010) == 0x00000010); + return ((bitField0_ & 0x00000020) == 0x00000020); } /** * <code>optional string magic = 8000;</code> @@ -16680,6 +16771,7 @@ public final class OrcProto { compressionBlockSize_ = 0L; version_ = java.util.Collections.emptyList(); metadataLength_ = 0L; + writerVersion_ = 0; magic_ = ""; } private byte memoizedIsInitialized = -1; @@ -16714,6 +16806,9 @@ public final class OrcProto { output.writeUInt64(5, metadataLength_); } if (((bitField0_ & 0x00000010) == 0x00000010)) { + output.writeUInt32(6, writerVersion_); + } + if (((bitField0_ & 0x00000020) == 0x00000020)) { output.writeBytes(8000, getMagicBytes()); } getUnknownFields().writeTo(output); @@ -16757,6 +16852,10 @@ public final class OrcProto { } if (((bitField0_ & 0x00000010) == 0x00000010)) { size += com.google.protobuf.CodedOutputStream + .computeUInt32Size(6, writerVersion_); + } + if (((bitField0_ & 0x00000020) == 0x00000020)) { + size += com.google.protobuf.CodedOutputStream .computeBytesSize(8000, getMagicBytes()); } size += getUnknownFields().getSerializedSize(); @@ -16889,8 +16988,10 @@ public final class OrcProto { bitField0_ = (bitField0_ & ~0x00000008); metadataLength_ = 0L; bitField0_ = (bitField0_ & ~0x00000010); - magic_ = ""; + writerVersion_ = 0; bitField0_ = (bitField0_ & ~0x00000020); + magic_ = ""; + bitField0_ = (bitField0_ & ~0x00000040); return this; } @@ -16943,6 +17044,10 @@ public final class OrcProto { if (((from_bitField0_ & 0x00000020) == 0x00000020)) { to_bitField0_ |= 0x00000010; } + result.writerVersion_ = writerVersion_; + if (((from_bitField0_ & 0x00000040) == 0x00000040)) { + to_bitField0_ |= 0x00000020; + } result.magic_ = magic_; result.bitField0_ = to_bitField0_; onBuilt(); @@ -16982,8 +17087,11 @@ public final class OrcProto { if (other.hasMetadataLength()) { setMetadataLength(other.getMetadataLength()); } + if (other.hasWriterVersion()) { + setWriterVersion(other.getWriterVersion()); + } if (other.hasMagic()) { - bitField0_ |= 0x00000020; + bitField0_ |= 0x00000040; magic_ = other.magic_; onChanged(); } @@ -17126,6 +17234,12 @@ public final class OrcProto { } /** * <code>repeated uint32 version = 4 [packed = true];</code> + * + * <pre> + * the version of the file format + * [0, 11] = Hive 0.11 + * [0, 12] = Hive 0.12 + * </pre> */ public java.util.List<java.lang.Integer> getVersionList() { @@ -17133,18 +17247,36 @@ public final class OrcProto { } /** * <code>repeated uint32 version = 4 [packed = true];</code> + * + * <pre> + * the version of the file format + * [0, 11] = Hive 0.11 + * [0, 12] = Hive 0.12 + * </pre> */ public int getVersionCount() { return version_.size(); } /** * <code>repeated uint32 version = 4 [packed = true];</code> + * + * <pre> + * the version of the file format + * [0, 11] = Hive 0.11 + * [0, 12] = Hive 0.12 + * </pre> */ public int getVersion(int index) { return version_.get(index); } /** * <code>repeated uint32 version = 4 [packed = true];</code> + * + * <pre> + * the version of the file format + * [0, 11] = Hive 0.11 + * [0, 12] = Hive 0.12 + * </pre> */ public Builder setVersion( int index, int value) { @@ -17155,6 +17287,12 @@ public final class OrcProto { } /** * <code>repeated uint32 version = 4 [packed = true];</code> + * + * <pre> + * the version of the file format + * [0, 11] = Hive 0.11 + * [0, 12] = Hive 0.12 + * </pre> */ public Builder addVersion(int value) { ensureVersionIsMutable(); @@ -17164,6 +17302,12 @@ public final class OrcProto { } /** * <code>repeated uint32 version = 4 [packed = true];</code> + * + * <pre> + * the version of the file format + * [0, 11] = Hive 0.11 + * [0, 12] = Hive 0.12 + * </pre> */ public Builder addAllVersion( java.lang.Iterable<? extends java.lang.Integer> values) { @@ -17174,6 +17318,12 @@ public final class OrcProto { } /** * <code>repeated uint32 version = 4 [packed = true];</code> + * + * <pre> + * the version of the file format + * [0, 11] = Hive 0.11 + * [0, 12] = Hive 0.12 + * </pre> */ public Builder clearVersion() { version_ = java.util.Collections.emptyList(); @@ -17215,6 +17365,63 @@ public final class OrcProto { return this; } + // optional uint32 writerVersion = 6; + private int writerVersion_ ; + /** + * <code>optional uint32 writerVersion = 6;</code> + * + * <pre> + * Version of the writer: + * 0 (or missing) = original + * 1 = HIVE-8732 fixed + * </pre> + */ + public boolean hasWriterVersion() { + return ((bitField0_ & 0x00000020) == 0x00000020); + } + /** + * <code>optional uint32 writerVersion = 6;</code> + * + * <pre> + * Version of the writer: + * 0 (or missing) = original + * 1 = HIVE-8732 fixed + * </pre> + */ + public int getWriterVersion() { + return writerVersion_; + } + /** + * <code>optional uint32 writerVersion = 6;</code> + * + * <pre> + * Version of the writer: + * 0 (or missing) = original + * 1 = HIVE-8732 fixed + * </pre> + */ + public Builder setWriterVersion(int value) { + bitField0_ |= 0x00000020; + writerVersion_ = value; + onChanged(); + return this; + } + /** + * <code>optional uint32 writerVersion = 6;</code> + * + * <pre> + * Version of the writer: + * 0 (or missing) = original + * 1 = HIVE-8732 fixed + * </pre> + */ + public Builder clearWriterVersion() { + bitField0_ = (bitField0_ & ~0x00000020); + writerVersion_ = 0; + onChanged(); + return this; + } + // optional string magic = 8000; private java.lang.Object magic_ = ""; /** @@ -17225,7 +17432,7 @@ public final class OrcProto { * </pre> */ public boolean hasMagic() { - return ((bitField0_ & 0x00000020) == 0x00000020); + return ((bitField0_ & 0x00000040) == 0x00000040); } /** * <code>optional string magic = 8000;</code> @@ -17277,7 +17484,7 @@ public final class OrcProto { if (value == null) { throw new NullPointerException(); } - bitField0_ |= 0x00000020; + bitField0_ |= 0x00000040; magic_ = value; onChanged(); return this; @@ -17290,7 +17497,7 @@ public final class OrcProto { * </pre> */ public Builder clearMagic() { - bitField0_ = (bitField0_ & ~0x00000020); + bitField0_ = (bitField0_ & ~0x00000040); magic_ = getDefaultInstance().getMagic(); onChanged(); return this; @@ -17307,7 +17514,7 @@ public final class OrcProto { if (value == null) { throw new NullPointerException(); } - bitField0_ |= 0x00000020; + bitField0_ |= 0x00000040; magic_ = value; onChanged(); return this; @@ -17513,13 +17720,14 @@ public final class OrcProto { "em\022\024\n\014numberOfRows\030\006 \001(\004\022F\n\nstatistics\030\007" + " \003(\01322.org.apache.hadoop.hive.ql.io.orc." + "ColumnStatistics\022\026\n\016rowIndexStride\030\010 \001(\r" + - "\"\305\001\n\nPostScript\022\024\n\014footerLength\030\001 \001(\004\022F\n" + + "\"\334\001\n\nPostScript\022\024\n\014footerLength\030\001 \001(\004\022F\n" + "\013compression\030\002 \001(\01621.org.apache.hadoop.h" + "ive.ql.io.orc.CompressionKind\022\034\n\024compres" + "sionBlockSize\030\003 \001(\004\022\023\n\007version\030\004 \003(\rB\002\020\001" + - "\022\026\n\016metadataLength\030\005 \001(\004\022\016\n\005magic\030\300> \001(\t", - "*:\n\017CompressionKind\022\010\n\004NONE\020\000\022\010\n\004ZLIB\020\001\022" + - "\n\n\006SNAPPY\020\002\022\007\n\003LZO\020\003" + "\022\026\n\016metadataLength\030\005 \001(\004\022\025\n\rwriterVersio", + "n\030\006 \001(\r\022\016\n\005magic\030\300> \001(\t*:\n\017CompressionKi" + + "nd\022\010\n\004NONE\020\000\022\010\n\004ZLIB\020\001\022\n\n\006SNAPPY\020\002\022\007\n\003LZ" + + "O\020\003" }; com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner = new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() { @@ -17651,7 +17859,7 @@ public final class OrcProto { internal_static_org_apache_hadoop_hive_ql_io_orc_PostScript_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_org_apache_hadoop_hive_ql_io_orc_PostScript_descriptor, - new java.lang.String[] { "FooterLength", "Compression", "CompressionBlockSize", "Version", "MetadataLength", "Magic", }); + new java.lang.String[] { "FooterLength", "Compression", "CompressionBlockSize", "Version", "MetadataLength", "WriterVersion", "Magic", }); return null; } }; Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java?rev=1640263&r1=1640262&r2=1640263&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java Tue Nov 18 00:48:40 2014 @@ -212,14 +212,13 @@ public abstract class AbstractFileMergeO // move any incompatible files to final path if (!incompatFileSet.isEmpty()) { for (Path incompatFile : incompatFileSet) { - String fileName = incompatFile.getName(); - Path destFile = new Path(finalPath.getParent(), fileName); + Path destDir = finalPath.getParent(); try { - Utilities.renameOrMoveFiles(fs, incompatFile, destFile); + Utilities.renameOrMoveFiles(fs, incompatFile, destDir); LOG.info("Moved incompatible file " + incompatFile + " to " + - destFile); + destDir); } catch (HiveException e) { - LOG.error("Unable to move " + incompatFile + " to " + destFile); + LOG.error("Unable to move " + incompatFile + " to " + destDir); throw new IOException(e); } } Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java?rev=1640263&r1=1640262&r2=1640263&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java Tue Nov 18 00:48:40 2014 @@ -438,8 +438,11 @@ public class CommonMergeJoinOperator ext WritableComparable key_1 = (WritableComparable) k1.get(i); WritableComparable key_2 = (WritableComparable) k2.get(i); if (key_1 == null && key_2 == null) { - return nullsafes != null && nullsafes[i] ? 0 : -1; // just return k1 is - // smaller than k2 + if (nullsafes != null && nullsafes[i]) { + continue; + } else { + return -1; + } } else if (key_1 == null) { return -1; } else if (key_2 == null) { Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java?rev=1640263&r1=1640262&r2=1640263&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java Tue Nov 18 00:48:40 2014 @@ -741,8 +741,7 @@ public class DDLTask extends Task<DDLWor Collections.sort(entries); StringBuilder sb = new StringBuilder(); for(String entry : entries){ - sb.append(entry); - sb.append((char)terminator); + appendNonNull(sb, entry, true); } writeToFile(sb.toString(), resFile); }