Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableGroupConverter.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableGroupConverter.java?rev=1619936&r1=1619935&r2=1619936&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableGroupConverter.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableGroupConverter.java Fri Aug 22 21:36:47 2014 @@ -16,6 +16,7 @@ package org.apache.hadoop.hive.ql.io.par import java.util.ArrayList; import java.util.List; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.io.ArrayWritable; import org.apache.hadoop.io.Writable; @@ -36,19 +37,21 @@ public class DataWritableGroupConverter private final Object[] currentArr; private Writable[] rootMap; - public DataWritableGroupConverter(final GroupType requestedSchema, final GroupType tableSchema) { - this(requestedSchema, null, 0, tableSchema); + public DataWritableGroupConverter(final GroupType requestedSchema, final GroupType tableSchema, + final List<TypeInfo> hiveSchemaTypeInfos) { + this(requestedSchema, null, 0, tableSchema, hiveSchemaTypeInfos); final int fieldCount = tableSchema.getFieldCount(); this.rootMap = new Writable[fieldCount]; } public DataWritableGroupConverter(final GroupType groupType, final HiveGroupConverter parent, - final int index) { - this(groupType, parent, index, groupType); + final int index, final List<TypeInfo> hiveSchemaTypeInfos) { + this(groupType, parent, index, groupType, hiveSchemaTypeInfos); } public DataWritableGroupConverter(final GroupType selectedGroupType, - final HiveGroupConverter parent, final int index, final GroupType containingGroupType) { + final HiveGroupConverter parent, final int index, final GroupType containingGroupType, + final List<TypeInfo> hiveSchemaTypeInfos) { this.parent = parent; this.index = index; final int totalFieldCount = containingGroupType.getFieldCount(); @@ -62,7 +65,8 @@ public class DataWritableGroupConverter Type subtype = selectedFields.get(i); if (containingGroupType.getFields().contains(subtype)) { converters[i] = getConverterFromDescription(subtype, - containingGroupType.getFieldIndex(subtype.getName()), this); + containingGroupType.getFieldIndex(subtype.getName()), this, + hiveSchemaTypeInfos); } else { throw new IllegalStateException("Group type [" + containingGroupType + "] does not contain requested field: " + subtype);
Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java?rev=1619936&r1=1619935&r2=1619936&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java Fri Aug 22 21:36:47 2014 @@ -13,6 +13,9 @@ */ package org.apache.hadoop.hive.ql.io.parquet.convert; +import java.util.List; + +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.io.ArrayWritable; import parquet.io.api.GroupConverter; @@ -28,8 +31,10 @@ public class DataWritableRecordConverter private final DataWritableGroupConverter root; - public DataWritableRecordConverter(final GroupType requestedSchema, final GroupType tableSchema) { - this.root = new DataWritableGroupConverter(requestedSchema, tableSchema); + public DataWritableRecordConverter(final GroupType requestedSchema, final GroupType tableSchema, + final List<TypeInfo> hiveColumnTypeInfos) { + this.root = new DataWritableGroupConverter(requestedSchema, tableSchema, + hiveColumnTypeInfos); } @Override @@ -41,4 +46,4 @@ public class DataWritableRecordConverter public GroupConverter getRootConverter() { return root; } -} +} \ No newline at end of file Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java?rev=1619936&r1=1619935&r2=1619936&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java Fri Aug 22 21:36:47 2014 @@ -16,12 +16,19 @@ package org.apache.hadoop.hive.ql.io.par import java.math.BigDecimal; import java.sql.Timestamp; import java.util.ArrayList; +import java.util.List; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTime; import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils; +import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.FloatWritable; @@ -145,6 +152,32 @@ public enum ETypeConverter { } }; } + }, + ECHAR_CONVERTER(HiveCharWritable.class) { + @Override + Converter getConverter(final PrimitiveType type, final int index, final HiveGroupConverter parent) { + return new BinaryConverter<HiveCharWritable>(type, parent, index) { + @Override + protected HiveCharWritable convert(Binary binary) { + HiveChar hiveChar = new HiveChar(); + hiveChar.setValue(binary.toStringUsingUTF8()); + return new HiveCharWritable(hiveChar); + } + }; + } + }, + EVARCHAR_CONVERTER(HiveVarcharWritable.class) { + @Override + Converter getConverter(final PrimitiveType type, final int index, final HiveGroupConverter parent) { + return new BinaryConverter<HiveVarcharWritable>(type, parent, index) { + @Override + protected HiveVarcharWritable convert(Binary binary) { + HiveVarchar hiveVarchar = new HiveVarchar(); + hiveVarchar.setValue(binary.toStringUsingUTF8()); + return new HiveVarcharWritable(hiveVarchar); + } + }; + } }; final Class<?> _type; @@ -159,7 +192,8 @@ public enum ETypeConverter { abstract Converter getConverter(final PrimitiveType type, final int index, final HiveGroupConverter parent); - public static Converter getNewConverter(final PrimitiveType type, final int index, final HiveGroupConverter parent) { + public static Converter getNewConverter(final PrimitiveType type, final int index, + final HiveGroupConverter parent, List<TypeInfo> hiveSchemaTypeInfos) { if (type.isPrimitive() && (type.asPrimitiveType().getPrimitiveTypeName().equals(PrimitiveType.PrimitiveTypeName.INT96))) { //TODO- cleanup once parquet support Timestamp type annotation. return ETypeConverter.ETIMESTAMP_CONVERTER.getConverter(type, index, parent); @@ -167,7 +201,15 @@ public enum ETypeConverter { if (OriginalType.DECIMAL == type.getOriginalType()) { return EDECIMAL_CONVERTER.getConverter(type, index, parent); } else if (OriginalType.UTF8 == type.getOriginalType()) { - return ESTRING_CONVERTER.getConverter(type, index, parent); + if (hiveSchemaTypeInfos.get(index).getTypeName() + .startsWith(serdeConstants.CHAR_TYPE_NAME)) { + return ECHAR_CONVERTER.getConverter(type, index, parent); + } else if (hiveSchemaTypeInfos.get(index).getTypeName() + .startsWith(serdeConstants.VARCHAR_TYPE_NAME)) { + return EVARCHAR_CONVERTER.getConverter(type, index, parent); + } else if (type.isPrimitive()) { + return ESTRING_CONVERTER.getConverter(type, index, parent); + } } Class<?> javaType = type.getPrimitiveTypeName().javaType; Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java?rev=1619936&r1=1619935&r2=1619936&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java Fri Aug 22 21:36:47 2014 @@ -13,6 +13,9 @@ */ package org.apache.hadoop.hive.ql.io.parquet.convert; +import java.util.List; + +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.io.Writable; import parquet.io.api.Converter; @@ -23,17 +26,20 @@ import parquet.schema.Type.Repetition; public abstract class HiveGroupConverter extends GroupConverter { protected static Converter getConverterFromDescription(final Type type, final int index, - final HiveGroupConverter parent) { + final HiveGroupConverter parent, List<TypeInfo> hiveSchemaTypeInfos) { if (type == null) { return null; } if (type.isPrimitive()) { - return ETypeConverter.getNewConverter(type.asPrimitiveType(), index, parent); + return ETypeConverter.getNewConverter(type.asPrimitiveType(), index, parent, + hiveSchemaTypeInfos); } else { if (type.asGroupType().getRepetition() == Repetition.REPEATED) { - return new ArrayWritableGroupConverter(type.asGroupType(), parent, index); + return new ArrayWritableGroupConverter(type.asGroupType(), parent, index, + hiveSchemaTypeInfos); } else { - return new DataWritableGroupConverter(type.asGroupType(), parent, index); + return new DataWritableGroupConverter(type.asGroupType(), parent, index, + hiveSchemaTypeInfos); } } } @@ -42,4 +48,4 @@ public abstract class HiveGroupConverter protected abstract void add(int index, Writable value); -} +} \ No newline at end of file Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java?rev=1619936&r1=1619935&r2=1619936&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java Fri Aug 22 21:36:47 2014 @@ -16,6 +16,7 @@ package org.apache.hadoop.hive.ql.io.par import java.util.List; import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe; +import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; @@ -25,7 +26,6 @@ import org.apache.hadoop.hive.serde2.typ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import parquet.schema.ConversionPatterns; -import parquet.schema.DecimalMetadata; import parquet.schema.GroupType; import parquet.schema.MessageType; import parquet.schema.OriginalType; @@ -81,6 +81,14 @@ public class HiveSchemaConverter { return new PrimitiveType(repetition, PrimitiveTypeName.INT96, name); } else if (typeInfo.equals(TypeInfoFactory.voidTypeInfo)) { throw new UnsupportedOperationException("Void type not implemented"); + } else if (typeInfo.getTypeName().toLowerCase().startsWith( + serdeConstants.CHAR_TYPE_NAME)) { + return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8) + .named(name); + } else if (typeInfo.getTypeName().toLowerCase().startsWith( + serdeConstants.VARCHAR_TYPE_NAME)) { + return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8) + .named(name); } else if (typeInfo instanceof DecimalTypeInfo) { DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) typeInfo; int prec = decimalTypeInfo.precision(); Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java?rev=1619936&r1=1619935&r2=1619936&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java Fri Aug 22 21:36:47 2014 @@ -14,6 +14,7 @@ package org.apache.hadoop.hive.ql.io.parquet.read; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -23,6 +24,8 @@ import org.apache.hadoop.hive.ql.io.IOCo import org.apache.hadoop.hive.ql.io.parquet.convert.DataWritableRecordConverter; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.ArrayWritable; import org.apache.hadoop.util.StringUtils; @@ -60,6 +63,28 @@ public class DataWritableReadSupport ext return (List<String>) VirtualColumn. removeVirtualColumns(StringUtils.getStringCollection(columns)); } + + private static List<TypeInfo> getColumnTypes(Configuration configuration) { + + List<String> columnNames; + String columnNamesProperty = configuration.get(IOConstants.COLUMNS); + if (columnNamesProperty.length() == 0) { + columnNames = new ArrayList<String>(); + } else { + columnNames = Arrays.asList(columnNamesProperty.split(",")); + } + List<TypeInfo> columnTypes; + String columnTypesProperty = configuration.get(IOConstants.COLUMNS_TYPES); + if (columnTypesProperty.length() == 0) { + columnTypes = new ArrayList<TypeInfo>(); + } else { + columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypesProperty); + } + + columnTypes = VirtualColumn.removeVirtualColumnTypes(columnNames, columnTypes); + return columnTypes; + } + /** * * It creates the readContext for Parquet side with the requested schema during the init phase. @@ -100,20 +125,22 @@ public class DataWritableReadSupport ext final List<Type> typeListWanted = new ArrayList<Type>(); final boolean indexAccess = configuration.getBoolean(PARQUET_COLUMN_INDEX_ACCESS, false); for (final Integer idx : indexColumnsWanted) { - String col = listColumns.get(idx); - if (indexAccess) { - typeListWanted.add(tableSchema.getType(col)); - } else { - col = col.toLowerCase(); - if (lowerCaseFileSchemaColumns.containsKey(col)) { - typeListWanted.add(tableSchema.getType(lowerCaseFileSchemaColumns.get(col))); + if (idx < listColumns.size()) { + String col = listColumns.get(idx); + if (indexAccess) { + typeListWanted.add(tableSchema.getType(col)); } else { - // should never occur? - String msg = "Column " + col + " at index " + idx + " does not exist in " + + col = col.toLowerCase(); + if (lowerCaseFileSchemaColumns.containsKey(col)) { + typeListWanted.add(tableSchema.getType(lowerCaseFileSchemaColumns.get(col))); + } else { + // should never occur? + String msg = "Column " + col + " at index " + idx + " does not exist in " + lowerCaseFileSchemaColumns; - throw new IllegalStateException(msg); + throw new IllegalStateException(msg); + } } - } + } } requestedSchemaByUser = resolveSchemaAccess(new MessageType(fileSchema.getName(), typeListWanted), fileSchema, configuration); @@ -146,7 +173,8 @@ public class DataWritableReadSupport ext } final MessageType tableSchema = resolveSchemaAccess(MessageTypeParser. parseMessageType(metadata.get(HIVE_SCHEMA_KEY)), fileSchema, configuration); - return new DataWritableRecordConverter(readContext.getRequestedSchema(), tableSchema); + return new DataWritableRecordConverter(readContext.getRequestedSchema(), tableSchema, + getColumnTypes(configuration)); } /** Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java?rev=1619936&r1=1619935&r2=1619936&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java Fri Aug 22 21:36:47 2014 @@ -25,12 +25,14 @@ import org.apache.hadoop.hive.serde2.obj import org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; import org.apache.hadoop.io.ArrayWritable; /** @@ -102,12 +104,10 @@ public class ArrayWritableObjectInspecto return PrimitiveObjectInspectorFactory.writableTimestampObjectInspector; } else if (typeInfo.equals(TypeInfoFactory.dateTypeInfo)) { throw new UnsupportedOperationException("Parquet does not support date. See HIVE-6384"); - } else if (typeInfo.getTypeName().toLowerCase().startsWith(serdeConstants.DECIMAL_TYPE_NAME)) { - throw new UnsupportedOperationException("Parquet does not support decimal. See HIVE-6384"); } else if (typeInfo.getTypeName().toLowerCase().startsWith(serdeConstants.CHAR_TYPE_NAME)) { - throw new UnsupportedOperationException("Parquet does not support char. See HIVE-6384"); + return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector((CharTypeInfo) typeInfo); } else if (typeInfo.getTypeName().toLowerCase().startsWith(serdeConstants.VARCHAR_TYPE_NAME)) { - throw new UnsupportedOperationException("Parquet does not support varchar. See HIVE-6384"); + return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector((VarcharTypeInfo) typeInfo); } else { throw new UnsupportedOperationException("Unknown field type: " + typeInfo); } Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java?rev=1619936&r1=1619935&r2=1619936&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java Fri Aug 22 21:36:47 2014 @@ -42,6 +42,8 @@ import org.apache.hadoop.hive.serde2.obj import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector; @@ -60,6 +62,7 @@ import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; +import parquet.io.api.Binary; /** * @@ -280,6 +283,12 @@ public class ParquetHiveSerDe extends Ab return new BytesWritable(tgt); case TIMESTAMP: return new TimestampWritable(((TimestampObjectInspector) inspector).getPrimitiveJavaObject(obj)); + case CHAR: + String strippedValue = ((HiveCharObjectInspector) inspector).getPrimitiveJavaObject(obj).getStrippedValue(); + return new BytesWritable(Binary.fromString(strippedValue).getBytes()); + case VARCHAR: + String value = ((HiveVarcharObjectInspector) inspector).getPrimitiveJavaObject(obj).getValue(); + return new BytesWritable(Binary.fromString(value).getBytes()); default: throw new SerDeException("Unknown primitive : " + inspector.getPrimitiveCategory()); } Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java?rev=1619936&r1=1619935&r2=1619936&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java Fri Aug 22 21:36:47 2014 @@ -18,14 +18,6 @@ package org.apache.hadoop.hive.ql.io.sarg; -import java.util.ArrayDeque; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Deque; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - import com.esotericsoftware.kryo.Kryo; import com.esotericsoftware.kryo.io.Input; import com.esotericsoftware.kryo.io.Output; @@ -57,6 +49,15 @@ import org.apache.hadoop.hive.serde2.obj import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import java.math.BigDecimal; +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Deque; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + /** * The implementation of SearchArguments. */ @@ -947,7 +948,8 @@ final class SearchArgumentImpl implement literal instanceof Long || literal instanceof Double || literal instanceof DateWritable || - literal instanceof HiveDecimal) { + literal instanceof HiveDecimal || + literal instanceof BigDecimal) { return literal; } else if (literal instanceof HiveChar || literal instanceof HiveVarchar) { @@ -979,7 +981,8 @@ final class SearchArgumentImpl implement return PredicateLeaf.Type.FLOAT; } else if (literal instanceof DateWritable) { return PredicateLeaf.Type.DATE; - } else if (literal instanceof HiveDecimal) { + } else if (literal instanceof HiveDecimal || + literal instanceof BigDecimal) { return PredicateLeaf.Type.DECIMAL; } throw new IllegalArgumentException("Unknown type for literal " + literal); Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java?rev=1619936&r1=1619935&r2=1619936&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java Fri Aug 22 21:36:47 2014 @@ -165,13 +165,13 @@ public class DbTxnManager extends HiveTx break; case TABLE: + case DUMMYPARTITION: // in case of dynamic partitioning lock the table t = output.getTable(); compBuilder.setDbName(t.getDbName()); compBuilder.setTableName(t.getTableName()); break; case PARTITION: - case DUMMYPARTITION: compBuilder.setPartitionName(output.getPartition().getName()); t = output.getPartition().getTable(); compBuilder.setDbName(t.getDbName()); @@ -301,7 +301,10 @@ public class DbTxnManager extends HiveTx try { if (txnId > 0) rollbackTxn(); if (lockMgr != null) lockMgr.close(); + if (client != null) client.close(); } catch (Exception e) { + LOG.error("Caught exception " + e.getClass().getName() + " with message <" + e.getMessage() + + ">, swallowing as there is nothing we can do with it."); // Not much we can do about it here. } } Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java?rev=1619936&r1=1619935&r2=1619936&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java Fri Aug 22 21:36:47 2014 @@ -89,6 +89,7 @@ import org.apache.hadoop.hive.metastore. import org.apache.hadoop.hive.metastore.api.Role; import org.apache.hadoop.hive.metastore.api.RolePrincipalGrant; import org.apache.hadoop.hive.metastore.api.SerDeInfo; +import org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest; import org.apache.hadoop.hive.metastore.api.ShowCompactResponse; import org.apache.hadoop.hive.metastore.api.SkewedInfo; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; @@ -979,7 +980,7 @@ public class Hive { tTable = getMSC().getTable(dbName, tableName); } catch (NoSuchObjectException e) { if (throwException) { - LOG.error(StringUtils.stringifyException(e)); + LOG.error("Table " + tableName + " not found: " + e.getMessage()); throw new InvalidTableException(tableName); } return null; @@ -2553,6 +2554,15 @@ private void constructOneLBLocationMap(F throw new HiveException(e); } } + + public boolean setPartitionColumnStatistics(SetPartitionsStatsRequest request) throws HiveException { + try { + return getMSC().setPartitionColumnStatistics(request); + } catch (Exception e) { + LOG.debug(StringUtils.stringifyException(e)); + throw new HiveException(e); + } + } public List<ColumnStatisticsObj> getTableColumnStatistics( String dbName, String tableName, List<String> colNames) throws HiveException { @@ -2716,4 +2726,19 @@ private void constructOneLBLocationMap(F } } + public void setMetaConf(String propName, String propValue) throws HiveException { + try { + getMSC().setMetaConf(propName, propValue); + } catch (TException te) { + throw new HiveException(te); + } + } + + public String getMetaConf(String propName) throws HiveException { + try { + return getMSC().getMetaConf(propName); + } catch (TException te) { + throw new HiveException(te); + } + } }; Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java?rev=1619936&r1=1619935&r2=1619936&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java Fri Aug 22 21:36:47 2014 @@ -249,6 +249,8 @@ public class SessionHiveMetaStoreClient + " is not a directory or unable to create one"); } } + // Make sure location string is in proper format + tbl.getSd().setLocation(tblPath.toString()); } // Add temp table info to current session Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java?rev=1619936&r1=1619935&r2=1619936&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java Fri Aug 22 21:36:47 2014 @@ -22,25 +22,36 @@ import java.io.Serializable; import java.util.ArrayList; import java.util.Collection; import java.util.List; +import java.util.ListIterator; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Iterables; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.common.classification.InterfaceAudience; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.io.RecordIdentifier; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +@InterfaceAudience.Private public class VirtualColumn implements Serializable { private static final long serialVersionUID = 1L; - public static VirtualColumn FILENAME = new VirtualColumn("INPUT__FILE__NAME", (PrimitiveTypeInfo)TypeInfoFactory.stringTypeInfo); - public static VirtualColumn BLOCKOFFSET = new VirtualColumn("BLOCK__OFFSET__INSIDE__FILE", (PrimitiveTypeInfo)TypeInfoFactory.longTypeInfo); - public static VirtualColumn ROWOFFSET = new VirtualColumn("ROW__OFFSET__INSIDE__BLOCK", (PrimitiveTypeInfo)TypeInfoFactory.longTypeInfo); + public static final VirtualColumn FILENAME = new VirtualColumn("INPUT__FILE__NAME", (PrimitiveTypeInfo)TypeInfoFactory.stringTypeInfo); + public static final VirtualColumn BLOCKOFFSET = new VirtualColumn("BLOCK__OFFSET__INSIDE__FILE", (PrimitiveTypeInfo)TypeInfoFactory.longTypeInfo); + public static final VirtualColumn ROWOFFSET = new VirtualColumn("ROW__OFFSET__INSIDE__BLOCK", (PrimitiveTypeInfo)TypeInfoFactory.longTypeInfo); - public static VirtualColumn RAWDATASIZE = new VirtualColumn("RAW__DATA__SIZE", (PrimitiveTypeInfo)TypeInfoFactory.longTypeInfo); + public static final VirtualColumn RAWDATASIZE = new VirtualColumn("RAW__DATA__SIZE", (PrimitiveTypeInfo)TypeInfoFactory.longTypeInfo); + /** + * {@link org.apache.hadoop.hive.ql.io.RecordIdentifier} + */ + public static final VirtualColumn ROWID = new VirtualColumn("ROW__ID", RecordIdentifier.StructInfo.typeInfo, true, RecordIdentifier.StructInfo.oi); /** * GROUPINGID is used with GROUP BY GROUPINGS SETS, ROLLUP and CUBE. @@ -49,27 +60,28 @@ public class VirtualColumn implements Se * set if that column has been aggregated in that row. Otherwise the * value is "0". Returns the decimal representation of the bit vector. */ - public static VirtualColumn GROUPINGID = + public static final VirtualColumn GROUPINGID = new VirtualColumn("GROUPING__ID", (PrimitiveTypeInfo) TypeInfoFactory.intTypeInfo); - public static VirtualColumn[] VIRTUAL_COLUMNS = - new VirtualColumn[] {FILENAME, BLOCKOFFSET, ROWOFFSET, RAWDATASIZE, GROUPINGID}; - - private String name; - private PrimitiveTypeInfo typeInfo; - private boolean isHidden = true; + public static ImmutableSet<String> VIRTUAL_COLUMN_NAMES = + ImmutableSet.of(FILENAME.getName(), BLOCKOFFSET.getName(), ROWOFFSET.getName(), + RAWDATASIZE.getName(), GROUPINGID.getName(), ROWID.getName()); - public VirtualColumn() { - } + private final String name; + private final TypeInfo typeInfo; + private final boolean isHidden; + private final ObjectInspector oi; - public VirtualColumn(String name, PrimitiveTypeInfo typeInfo) { - this(name, typeInfo, true); + private VirtualColumn(String name, PrimitiveTypeInfo typeInfo) { + this(name, typeInfo, true, + PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo)); } - VirtualColumn(String name, PrimitiveTypeInfo typeInfo, boolean isHidden) { + private VirtualColumn(String name, TypeInfo typeInfo, boolean isHidden, ObjectInspector oi) { this.name = name; this.typeInfo = typeInfo; this.isHidden = isHidden; + this.oi = oi; } public static List<VirtualColumn> getStatsRegistry(Configuration conf) { @@ -87,26 +99,19 @@ public class VirtualColumn implements Se if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEROWOFFSET)) { l.add(ROWOFFSET); } + l.add(ROWID); return l; } - public PrimitiveTypeInfo getTypeInfo() { + public TypeInfo getTypeInfo() { return typeInfo; } - public void setTypeInfo(PrimitiveTypeInfo typeInfo) { - this.typeInfo = typeInfo; - } - public String getName() { return this.name; } - public void setName(String name) { - this.name = name; - } - public boolean isHidden() { return isHidden; } @@ -115,37 +120,58 @@ public class VirtualColumn implements Se return isHidden; } - public void setIsHidden(boolean isHidden) { - this.isHidden = isHidden; + public ObjectInspector getObjectInspector() { + return oi; } @Override public boolean equals(Object o) { - if (o == null) { - return false; - } if (this == o) { return true; } + if(!(o instanceof VirtualColumn)) { + return false; + } VirtualColumn c = (VirtualColumn) o; return this.name.equals(c.name) && this.typeInfo.getTypeName().equals(c.getTypeInfo().getTypeName()); } - + @Override + public int hashCode() { + int c = 19; + c = 31 * name.hashCode() + c; + return 31 * typeInfo.getTypeName().hashCode() + c; + } public static Collection<String> removeVirtualColumns(final Collection<String> columns) { - for(VirtualColumn vcol : VIRTUAL_COLUMNS) { - columns.remove(vcol.getName()); - } + Iterables.removeAll(columns, VIRTUAL_COLUMN_NAMES); return columns; } + public static List<TypeInfo> removeVirtualColumnTypes(final List<String> columnNames, + final List<TypeInfo> columnTypes) { + if (columnNames.size() != columnTypes.size()) { + throw new IllegalArgumentException("Number of column names in configuration " + + columnNames.size() + " differs from column types " + columnTypes.size()); + } + + int i = 0; + ListIterator<TypeInfo> it = columnTypes.listIterator(); + while(it.hasNext()) { + it.next(); + if (VIRTUAL_COLUMN_NAMES.contains(columnNames.get(i))) { + it.remove(); + } + ++i; + } + return columnTypes; + } + public static StructObjectInspector getVCSObjectInspector(List<VirtualColumn> vcs) { List<String> names = new ArrayList<String>(vcs.size()); List<ObjectInspector> inspectors = new ArrayList<ObjectInspector>(vcs.size()); for (VirtualColumn vc : vcs) { names.add(vc.getName()); - inspectors.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( - vc.getTypeInfo())); + inspectors.add(vc.oi); } return ObjectInspectorFactory.getStandardStructObjectInspector(names, inspectors); } Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java?rev=1619936&r1=1619935&r2=1619936&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java Fri Aug 22 21:36:47 2014 @@ -927,11 +927,9 @@ public class Vectorizer implements Physi if (desc instanceof ExprNodeColumnDesc) { ExprNodeColumnDesc c = (ExprNodeColumnDesc) desc; // Currently, we do not support vectorized virtual columns (see HIVE-5570). - for (VirtualColumn vc : VirtualColumn.VIRTUAL_COLUMNS) { - if (c.getColumn().equals(vc.getName())) { - LOG.info("Cannot vectorize virtual column " + c.getColumn()); - return false; - } + if (VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(c.getColumn())) { + LOG.info("Cannot vectorize virtual column " + c.getColumn()); + return false; } } String typeName = desc.getTypeInfo().getTypeName(); @@ -1076,10 +1074,8 @@ public class Vectorizer implements Physi // Not using method column.getIsVirtualCol() because partitioning columns are also // treated as virtual columns in ColumnInfo. - for (VirtualColumn vc : VirtualColumn.VIRTUAL_COLUMNS) { - if (column.getInternalName().equals(vc.getName())) { + if (VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(column.getInternalName())) { return true; - } } return false; } Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java?rev=1619936&r1=1619935&r2=1619936&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java Fri Aug 22 21:36:47 2014 @@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.optimi import com.google.common.collect.Lists; import com.google.common.collect.Maps; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.conf.HiveConf; @@ -67,8 +68,10 @@ import org.apache.hadoop.hive.ql.udf.gen import org.apache.hadoop.hive.serde.serdeConstants; import java.util.Collections; +import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.Stack; public class StatsRulesProcFactory { @@ -803,12 +806,13 @@ public class StatsRulesProcFactory { // statistics object that is combination of statistics from all // relations involved in JOIN Statistics stats = new Statistics(); - List<Long> rowCountParents = Lists.newArrayList(); + Map<String, Long> rowCountParents = new HashMap<String, Long>(); List<Long> distinctVals = Lists.newArrayList(); // 2 relations, multiple attributes boolean multiAttr = false; int numAttr = 1; + int numParent = parents.size(); Map<String, ColStatistics> joinedColStats = Maps.newHashMap(); Map<Integer, List<String>> joinKeys = Maps.newHashMap(); @@ -818,9 +822,20 @@ public class StatsRulesProcFactory { ReduceSinkOperator parent = (ReduceSinkOperator) jop.getParentOperators().get(pos); Statistics parentStats = parent.getStatistics(); - rowCountParents.add(parentStats.getNumRows()); List<ExprNodeDesc> keyExprs = parent.getConf().getKeyCols(); + // Parent RS may have column statistics from multiple parents. + // Populate table alias to row count map, this will be used later to + // scale down/up column statistics based on new row count + // NOTE: JOIN with UNION as parent of RS will not have table alias + // propagated properly. UNION operator does not propagate the table + // alias of subqueries properly to expression nodes. Hence union20.q + // will have wrong number of rows. + Set<String> tableAliases = StatsUtils.getAllTableAlias(parent.getColumnExprMap()); + for (String tabAlias : tableAliases) { + rowCountParents.put(tabAlias, parentStats.getNumRows()); + } + // multi-attribute join key if (keyExprs.size() > 1) { multiAttr = true; @@ -860,12 +875,19 @@ public class StatsRulesProcFactory { perAttrDVs.add(cs.getCountDistint()); } } + distinctVals.add(getDenominator(perAttrDVs)); perAttrDVs.clear(); } - for (Long l : distinctVals) { - denom *= l; + if (numAttr > numParent) { + // To avoid denominator getting larger and aggressively reducing + // number of rows, we will ease out denominator. + denom = getEasedOutDenominator(distinctVals); + } else { + for (Long l : distinctVals) { + denom *= l; + } } } else { for (List<String> jkeys : joinKeys.values()) { @@ -890,6 +912,7 @@ public class StatsRulesProcFactory { Map<String, ExprNodeDesc> colExprMap = jop.getColumnExprMap(); RowSchema rs = jop.getSchema(); List<ColStatistics> outColStats = Lists.newArrayList(); + Map<String, String> outInTabAlias = new HashMap<String, String>(); for (ColumnInfo ci : rs.getSignature()) { String key = ci.getInternalName(); ExprNodeDesc end = colExprMap.get(key); @@ -901,6 +924,7 @@ public class StatsRulesProcFactory { ColStatistics cs = joinedColStats.get(fqColName); String outColName = key; String outTabAlias = ci.getTabAlias(); + outInTabAlias.put(outTabAlias, tabAlias); if (cs != null) { cs.setColumnName(outColName); cs.setTableAlias(outTabAlias); @@ -911,7 +935,8 @@ public class StatsRulesProcFactory { // update join statistics stats.setColumnStats(outColStats); - long newRowCount = computeNewRowCount(rowCountParents, denom); + long newRowCount = computeNewRowCount( + Lists.newArrayList(rowCountParents.values()), denom); if (newRowCount <= 0 && LOG.isDebugEnabled()) { newRowCount = 0; @@ -920,7 +945,8 @@ public class StatsRulesProcFactory { + " #Rows of parents: " + rowCountParents.toString() + ". Denominator: " + denom); } - updateStatsForJoinType(stats, newRowCount, true, jop.getConf()); + updateStatsForJoinType(stats, newRowCount, jop.getConf(), + rowCountParents, outInTabAlias); jop.setStatistics(stats); if (LOG.isDebugEnabled()) { @@ -966,37 +992,54 @@ public class StatsRulesProcFactory { return null; } + private Long getEasedOutDenominator(List<Long> distinctVals) { + // Exponential back-off for NDVs. + // 1) Descending order sort of NDVs + // 2) denominator = NDV1 * (NDV2 ^ (1/2)) * (NDV3 ^ (1/4))) * .... + Collections.sort(distinctVals, Collections.reverseOrder()); + + long denom = distinctVals.get(0); + for (int i = 1; i < distinctVals.size(); i++) { + denom = (long) (denom * Math.pow(distinctVals.get(i), 1.0 / (1 << i))); + } + + return denom; + } + private void updateStatsForJoinType(Statistics stats, long newNumRows, - boolean useColStats, JoinDesc conf) { - long oldRowCount = stats.getNumRows(); - double ratio = (double) newNumRows / (double) oldRowCount; + JoinDesc conf, Map<String, Long> rowCountParents, + Map<String, String> outInTabAlias) { stats.setNumRows(newNumRows); - if (useColStats) { - List<ColStatistics> colStats = stats.getColumnStats(); - for (ColStatistics cs : colStats) { - long oldDV = cs.getCountDistint(); - long newDV = oldDV; - - // if ratio is greater than 1, then number of rows increases. This can happen - // when some operators like GROUPBY duplicates the input rows in which case - // number of distincts should not change. Update the distinct count only when - // the output number of rows is less than input number of rows. - if (ratio <= 1.0) { - newDV = (long) Math.ceil(ratio * oldDV); - } - // Assumes inner join - // TODO: HIVE-5579 will handle different join types - cs.setNumNulls(0); - cs.setCountDistint(newDV); - } - stats.setColumnStats(colStats); - long newDataSize = StatsUtils.getDataSizeFromColumnStats(newNumRows, colStats); - stats.setDataSize(newDataSize); - } else { - long newDataSize = (long) (ratio * stats.getDataSize()); - stats.setDataSize(newDataSize); + // scale down/up the column statistics based on the changes in number of + // rows from each parent. For ex: If there are 2 parents for JOIN operator + // with 1st parent having 200 rows and 2nd parent having 2000 rows. Now if + // the new number of rows after applying join rule is 10, then the column + // stats for columns from 1st parent should be scaled down by 200/10 = 20x + // and stats for columns from 2nd parent should be scaled down by 200x + List<ColStatistics> colStats = stats.getColumnStats(); + for (ColStatistics cs : colStats) { + long oldRowCount = rowCountParents.get(outInTabAlias.get(cs.getTableAlias())); + double ratio = (double) newNumRows / (double) oldRowCount; + long oldDV = cs.getCountDistint(); + long newDV = oldDV; + + // if ratio is greater than 1, then number of rows increases. This can happen + // when some operators like GROUPBY duplicates the input rows in which case + // number of distincts should not change. Update the distinct count only when + // the output number of rows is less than input number of rows. + if (ratio <= 1.0) { + newDV = (long) Math.ceil(ratio * oldDV); + } + // Assumes inner join + // TODO: HIVE-5579 will handle different join types + cs.setNumNulls(0); + cs.setCountDistint(newDV); } + stats.setColumnStats(colStats); + long newDataSize = StatsUtils + .getDataSizeFromColumnStats(newNumRows, colStats); + stats.setDataSize(newDataSize); } private long computeNewRowCount(List<Long> rowCountParents, long denom) { Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java?rev=1619936&r1=1619935&r2=1619936&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java Fri Aug 22 21:36:47 2014 @@ -25,6 +25,7 @@ import java.io.Serializable; import java.net.URI; import java.net.URISyntaxException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; @@ -57,6 +58,7 @@ import org.apache.hadoop.hive.metastore. import org.apache.hadoop.hive.ql.Driver; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.ArchiveUtils; +import org.apache.hadoop.hive.ql.exec.ColumnStatsUpdateTask; import org.apache.hadoop.hive.ql.exec.FetchTask; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.Task; @@ -91,6 +93,8 @@ import org.apache.hadoop.hive.ql.plan.Al import org.apache.hadoop.hive.ql.plan.AlterTableDesc.AlterTableTypes; import org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition; import org.apache.hadoop.hive.ql.plan.AlterTableSimpleDesc; +import org.apache.hadoop.hive.ql.plan.ColumnStatsDesc; +import org.apache.hadoop.hive.ql.plan.ColumnStatsUpdateWork; import org.apache.hadoop.hive.ql.plan.CreateDatabaseDesc; import org.apache.hadoop.hive.ql.plan.CreateIndexDesc; import org.apache.hadoop.hive.ql.plan.DDLWork; @@ -276,6 +280,8 @@ public class DDLSemanticAnalyzer extends analyzeAlterTableClusterSort(ast, tableName, partSpec); } else if (ast.getToken().getType() == HiveParser.TOK_COMPACT) { analyzeAlterTableCompact(ast, tableName, partSpec); + } else if(ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_UPDATECOLSTATS){ + analyzeAlterTableUpdateStats(ast,tblPart); } break; } @@ -378,6 +384,9 @@ public class DDLSemanticAnalyzer extends case HiveParser.TOK_ALTERTABLE_RENAME: analyzeAlterTableRename(ast, false); break; + case HiveParser.TOK_ALTERTABLE_UPDATECOLSTATS: + analyzeAlterTableUpdateStats(ast, null); + break; case HiveParser.TOK_ALTERTABLE_TOUCH: analyzeAlterTableTouch(ast); break; @@ -507,6 +516,57 @@ public class DDLSemanticAnalyzer extends } } + private void analyzeAlterTableUpdateStats(ASTNode ast, TablePartition tblPart) + throws SemanticException { + String tblName = null; + String colName = null; + Map<String, String> mapProp = null; + Map<String, String> partSpec = null; + String partName = null; + if (tblPart == null) { + tblName = getUnescapedName((ASTNode) ast.getChild(0)); + colName = getUnescapedName((ASTNode) ast.getChild(1)); + mapProp = getProps((ASTNode) (ast.getChild(2)).getChild(0)); + } else { + tblName = tblPart.tableName; + partSpec = tblPart.partSpec; + try { + partName = Warehouse.makePartName(partSpec, false); + } catch (MetaException e) { + // TODO Auto-generated catch block + throw new SemanticException("partition " + partSpec.toString() + + " not found"); + } + colName = getUnescapedName((ASTNode) ast.getChild(0)); + mapProp = getProps((ASTNode) (ast.getChild(1)).getChild(0)); + } + + Table tbl = null; + try { + tbl = db.getTable(tblName); + } catch (HiveException e) { + throw new SemanticException("table " + tbl + " not found"); + } + + String colType = null; + List<FieldSchema> cols = tbl.getCols(); + for (FieldSchema col : cols) { + if (colName.equalsIgnoreCase(col.getName())) { + colType = col.getType(); + break; + } + } + + if (colType == null) + throw new SemanticException("column type not found"); + + ColumnStatsDesc cStatsDesc = new ColumnStatsDesc(tbl.getTableName(), + Arrays.asList(colName), Arrays.asList(colType), partSpec == null); + ColumnStatsUpdateTask cStatsUpdateTask = (ColumnStatsUpdateTask) TaskFactory + .get(new ColumnStatsUpdateWork(cStatsDesc, partName, mapProp), conf); + rootTasks.add(cStatsUpdateTask); + } + private void analyzeSetShowRole(ASTNode ast) throws SemanticException { switch (ast.getChildCount()) { case 0: @@ -745,6 +805,8 @@ public class DDLSemanticAnalyzer extends if (dbProps != null) { createDatabaseDesc.setDatabaseProperties(dbProps); } + Database database = new Database(dbName, dbComment, dbLocation, dbProps); + outputs.add(new WriteEntity(database, WriteEntity.WriteType.DDL_NO_LOCK)); rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), createDatabaseDesc), conf)); @@ -795,8 +857,12 @@ public class DDLSemanticAnalyzer extends rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), dropDatabaseDesc), conf)); } - private void analyzeSwitchDatabase(ASTNode ast) { + private void analyzeSwitchDatabase(ASTNode ast) throws SemanticException { String dbName = unescapeIdentifier(ast.getChild(0).getText()); + Database database = getDatabase(dbName, true); + ReadEntity dbReadEntity = new ReadEntity(database); + dbReadEntity.noLockNeeded(); + inputs.add(dbReadEntity); SwitchDatabaseDesc switchDatabaseDesc = new SwitchDatabaseDesc(dbName); rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), switchDatabaseDesc), conf)); @@ -1015,7 +1081,7 @@ public class DDLSemanticAnalyzer extends private void analyzeCreateIndex(ASTNode ast) throws SemanticException { String indexName = unescapeIdentifier(ast.getChild(0).getText()); String typeName = unescapeSQLString(ast.getChild(1).getText()); - String[] qualified = getQualifiedTableName((ASTNode) ast.getChild(2)); + String[] qTabName = getQualifiedTableName((ASTNode) ast.getChild(2)); List<String> indexedCols = getColumnNames((ASTNode) ast.getChild(3)); IndexType indexType = HiveIndex.getIndexType(typeName); @@ -1080,15 +1146,15 @@ public class DDLSemanticAnalyzer extends } storageFormat.fillDefaultStorageFormat(); - if (indexTableName == null) { - indexTableName = MetaStoreUtils.getIndexTableName(qualified[0], qualified[1], indexName); - indexTableName = qualified[0] + "." + indexTableName; // on same database with base table + indexTableName = MetaStoreUtils.getIndexTableName(qTabName[0], qTabName[1], indexName); + indexTableName = qTabName[0] + "." + indexTableName; // on same database with base table } else { indexTableName = getDotName(Utilities.getDbTableName(indexTableName)); } + inputs.add(new ReadEntity(getTable(qTabName))); - CreateIndexDesc crtIndexDesc = new CreateIndexDesc(getDotName(qualified), indexName, + CreateIndexDesc crtIndexDesc = new CreateIndexDesc(getDotName(qTabName), indexName, indexedCols, indexTableName, deferredRebuild, storageFormat.getInputFormat(), storageFormat.getOutputFormat(), storageFormat.getStorageHandler(), typeName, location, idxProps, tblProps, @@ -1116,6 +1182,8 @@ public class DDLSemanticAnalyzer extends } } + inputs.add(new ReadEntity(getTable(tableName))); + DropIndexDesc dropIdxDesc = new DropIndexDesc(indexName, tableName); rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), dropIdxDesc), conf)); Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g?rev=1619936&r1=1619935&r2=1619936&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g Fri Aug 22 21:36:47 2014 @@ -144,7 +144,7 @@ fromSource @init { gParent.pushMsg("from source", state); } @after { gParent.popMsg(state); } : - ((Identifier LPAREN)=> partitionedTableFunction | tableSource | subQuerySource) (lateralView^)* + ((Identifier LPAREN)=> partitionedTableFunction | tableSource | subQuerySource | virtualTableSource) (lateralView^)* ; tableBucketSample @@ -256,3 +256,46 @@ searchCondition ; //----------------------------------------------------------------------------------- + +//-------- Row Constructor ---------------------------------------------------------- +//in support of SELECT * FROM (VALUES(1,2,3),(4,5,6),...) as FOO(a,b,c) and +// INSERT INTO <table> (col1,col2,...) VALUES(...),(...),... +// INSERT INTO <table> (col1,col2,...) SELECT * FROM (VALUES(1,2,3),(4,5,6),...) as Foo(a,b,c) +valueRowConstructor + : + LPAREN atomExpression (COMMA atomExpression)* RPAREN -> ^(TOK_VALUE_ROW atomExpression+) + ; + +valuesTableConstructor + : + valueRowConstructor (COMMA valueRowConstructor)* -> ^(TOK_VALUES_TABLE valueRowConstructor+) + ; + +/* +VALUES(1),(2) means 2 rows, 1 column each. +VALUES(1,2),(3,4) means 2 rows, 2 columns each. +VALUES(1,2,3) means 1 row, 3 columns +*/ +valuesClause + : + KW_VALUES valuesTableConstructor -> valuesTableConstructor + ; + +/* +This represents a clause like this: +(VALUES(1,2),(2,3)) as VirtTable(col1,col2) +*/ +virtualTableSource + : + LPAREN valuesClause RPAREN tableNameColList -> ^(TOK_VIRTUAL_TABLE tableNameColList valuesClause) + ; +/* +e.g. as VirtTable(col1,col2) +Note that we only want literals as column names +*/ +tableNameColList + : + KW_AS? identifier LPAREN identifier (COMMA identifier)* RPAREN -> ^(TOK_VIRTUAL_TABREF ^(TOK_TABNAME identifier) ^(TOK_COL_NAME identifier+)) + ; + +//----------------------------------------------------------------------------------- \ No newline at end of file Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/FunctionSemanticAnalyzer.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/FunctionSemanticAnalyzer.java?rev=1619936&r1=1619935&r2=1619936&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/FunctionSemanticAnalyzer.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/FunctionSemanticAnalyzer.java Fri Aug 22 21:36:47 2014 @@ -169,6 +169,7 @@ public class FunctionSemanticAnalyzer ex try { String[] qualifiedNameParts = FunctionUtils.getQualifiedFunctionNameParts(functionName); String dbName = qualifiedNameParts[0]; + functionName = qualifiedNameParts[1]; database = getDatabase(dbName); } catch (HiveException e) { LOG.error(e); Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g?rev=1619936&r1=1619935&r2=1619936&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g Fri Aug 22 21:36:47 2014 @@ -292,6 +292,7 @@ KW_TRANSACTIONS: 'TRANSACTIONS'; KW_REWRITE : 'REWRITE'; KW_AUTHORIZATION: 'AUTHORIZATION'; KW_CONF: 'CONF'; +KW_VALUES: 'VALUES'; // Operators // NOTE: if you add a new function/operator, add it to sysFuncNames so that describe function _FUNC_ will work. Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g?rev=1619936&r1=1619935&r2=1619936&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g Fri Aug 22 21:36:47 2014 @@ -146,6 +146,7 @@ TOK_ALTERTABLE_ARCHIVE; TOK_ALTERTABLE_UNARCHIVE; TOK_ALTERTABLE_SERDEPROPERTIES; TOK_ALTERTABLE_SERIALIZER; +TOK_ALTERTABLE_UPDATECOLSTATS; TOK_TABLE_PARTITION; TOK_ALTERTABLE_FILEFORMAT; TOK_ALTERTABLE_LOCATION; @@ -330,6 +331,15 @@ TOK_RESOURCE_LIST; TOK_COMPACT; TOK_SHOW_COMPACTIONS; TOK_SHOW_TRANSACTIONS; +TOK_DELETE_FROM; +TOK_UPDATE_TABLE; +TOK_SET_COLUMNS_CLAUSE; +TOK_VALUE_ROW; +TOK_VALUES_TABLE; +TOK_VIRTUAL_TABLE; +TOK_VIRTUAL_TABREF; +TOK_ANONYMOUS; +TOK_COL_NAME; } @@ -468,6 +478,9 @@ import java.util.HashMap; xlateMap.put("KW_DEFINED", "DEFINED"); xlateMap.put("KW_SUBQUERY", "SUBQUERY"); xlateMap.put("KW_REWRITE", "REWRITE"); + xlateMap.put("KW_UPDATE", "UPDATE"); + + xlateMap.put("KW_VALUES", "VALUES"); // Operators xlateMap.put("DOT", "."); @@ -637,6 +650,8 @@ execStatement | exportStatement | importStatement | ddlStatement + | deleteStatement + | updateStatement ; loadStatement @@ -938,6 +953,7 @@ alterTableStatementSuffix : alterStatementSuffixRename | alterStatementSuffixAddCol | alterStatementSuffixRenameCol + | alterStatementSuffixUpdateStatsCol | alterStatementSuffixDropPartitions | alterStatementSuffixAddPartitions | alterStatementSuffixTouch @@ -1028,6 +1044,13 @@ alterStatementSuffixRenameCol ->^(TOK_ALTERTABLE_RENAMECOL tableName $oldName $newName colType $comment? alterStatementChangeColPosition?) ; +alterStatementSuffixUpdateStatsCol +@init { pushMsg("update column statistics", state); } +@after { popMsg(state); } + : identifier KW_UPDATE KW_STATISTICS KW_FOR KW_COLUMN? colName=identifier KW_SET tableProperties (KW_COMMENT comment=StringLiteral)? + ->^(TOK_ALTERTABLE_UPDATECOLSTATS identifier $colName tableProperties $comment?) + ; + alterStatementChangeColPosition : first=KW_FIRST|KW_AFTER afterCol=identifier ->{$first != null}? ^(TOK_ALTERTABLE_CHANGECOL_AFTER_POSITION ) @@ -1130,6 +1153,7 @@ alterTblPartitionStatementSuffix | alterStatementSuffixMergeFiles | alterStatementSuffixSerdeProperties | alterStatementSuffixRenamePart + | alterStatementSuffixStatsPart | alterStatementSuffixBucketNum | alterTblPartitionStatementSuffixSkewedLocation | alterStatementSuffixClusterbySortby @@ -1221,6 +1245,13 @@ alterStatementSuffixRenamePart ->^(TOK_ALTERTABLE_RENAMEPART partitionSpec) ; +alterStatementSuffixStatsPart +@init { pushMsg("alter table stats partition statement", state); } +@after { popMsg(state); } + : KW_UPDATE KW_STATISTICS KW_FOR KW_COLUMN? colName=identifier KW_SET tableProperties (KW_COMMENT comment=StringLiteral)? + ->^(TOK_ALTERTABLE_UPDATECOLSTATS $colName tableProperties $comment?) + ; + alterStatementSuffixMergeFiles @init { pushMsg("", state); } @after { popMsg(state); } @@ -1300,6 +1331,7 @@ descStatement | (KW_DESCRIBE|KW_DESC) (KW_DATABASE|KW_SCHEMA) KW_EXTENDED? (dbName=identifier) -> ^(TOK_DESCDATABASE $dbName KW_EXTENDED?) ; + analyzeStatement @init { pushMsg("analyze statement", state); } @after { popMsg(state); } @@ -2077,11 +2109,28 @@ singleFromStatement ( b+=body )+ -> ^(TOK_QUERY fromClause body+) ; +/* +The valuesClause rule below ensures that the parse tree for +"insert into table FOO values (1,2),(3,4)" looks the same as +"insert into table FOO select a,b from (values(1,2),(3,4)) as BAR(a,b)" which itself is made to look +very similar to the tree for "insert into table FOO select a,b from BAR". Since virtual table name +is implicit, it's represented as TOK_ANONYMOUS. +*/ regularBody[boolean topLevel] : i=insertClause + ( s=selectStatement[topLevel] {$s.tree.getChild(1).replaceChildren(0, 0, $i.tree);} -> {$s.tree} + | + valuesClause + -> ^(TOK_QUERY + ^(TOK_FROM + ^(TOK_VIRTUAL_TABLE ^(TOK_VIRTUAL_TABREF ^(TOK_ANONYMOUS)) valuesClause) + ) + ^(TOK_INSERT {$i.tree} ^(TOK_SELECT ^(TOK_SELEXPR TOK_ALLCOLREF))) + ) + ) | selectStatement[topLevel] ; @@ -2190,3 +2239,34 @@ limitClause : KW_LIMIT num=Number -> ^(TOK_LIMIT $num) ; + +//DELETE FROM <tableName> WHERE ...; +deleteStatement +@init { pushMsg("delete statement", state); } +@after { popMsg(state); } + : + KW_DELETE KW_FROM tableName (whereClause)? -> ^(TOK_DELETE_FROM tableName whereClause?) + ; + +/*SET <columName> = (3 + col2)*/ +columnAssignmentClause + : + tableOrColumn EQUAL^ atomExpression + ; + +/*SET col1 = 5, col2 = (4 + col4), ...*/ +setColumnsClause + : + KW_SET columnAssignmentClause (COMMA columnAssignmentClause)* -> ^(TOK_SET_COLUMNS_CLAUSE columnAssignmentClause* ) + ; + +/* + UPDATE <table> + SET col1 = val1, col2 = val2... WHERE ... +*/ +updateStatement +@init { pushMsg("update statement", state); } +@after { popMsg(state); } + : + KW_UPDATE tableName setColumnsClause whereClause? -> ^(TOK_UPDATE_TABLE tableName setColumnsClause whereClause?) + ; Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g?rev=1619936&r1=1619935&r2=1619936&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g Fri Aug 22 21:36:47 2014 @@ -538,5 +538,5 @@ functionIdentifier nonReserved : - KW_TRUE | KW_FALSE | KW_LIKE | KW_EXISTS | KW_ASC | KW_DESC | KW_ORDER | KW_GROUP | KW_BY | KW_AS | KW_INSERT | KW_OVERWRITE | KW_OUTER | KW_LEFT | KW_RIGHT | KW_FULL | KW_PARTITION | KW_PARTITIONS | KW_TABLE | KW_TABLES | KW_COLUMNS | KW_INDEX | KW_INDEXES | KW_REBUILD | KW_FUNCTIONS | KW_SHOW | KW_MSCK | KW_REPAIR | KW_DIRECTORY | KW_LOCAL | KW_USING | KW_CLUSTER | KW_DISTRIBUTE | KW_SORT | KW_UNION | KW_LOAD | KW_EXPORT | KW_IMPORT | KW_DATA | KW_INPATH | KW_IS | KW_NULL | KW_CREATE | KW_EXTERNAL | KW_ALTER | KW_CHANGE | KW_FIRST | KW_AFTER | KW_DESCRIBE | KW_DROP | KW_RENAME | KW_IGNORE | KW_PROTECTION | KW_TO | KW_COMMENT | KW_BOOLEAN | KW_TINYINT | KW_SMALLINT | KW_INT | KW_BIGINT | KW_FLOAT | KW_DOUBLE | KW_DATE | KW_DATETIME | KW_TIMESTAMP | KW_DECIMAL | KW_STRING | KW_ARRAY | KW_STRUCT | KW_UNIONTYPE | KW_PARTITIONED | KW_CLUSTERED | KW_SORTED | KW_INTO | KW_BUCKETS | KW_ROW | KW_ROWS | KW_FORMAT | KW_DELIMITED | KW_FIELDS | KW_TERMINATED | KW_ESCAPED | KW_COLLECTION | KW_ITEMS | KW_KEYS | KW_KEY_TYPE | KW_LINES | KW_STORED | KW_FILEFORMAT | KW_INPUTFORMAT | KW_OUTPUTFORMAT | KW_INPUTDRIVER | KW_OUTPUTDRIVER | KW_OFFLINE | KW_ENABLE | KW_DISABLE | KW_READONLY | KW_NO_DROP | KW_LOCATION | KW_BUCKET | KW_OUT | KW_OF | KW_PERCENT | KW_ADD | KW_REPLACE | KW_RLIKE | KW_REGEXP | KW_TEMPORARY | KW_EXPLAIN | KW_FORMATTED | KW_PRETTY | KW_DEPENDENCY | KW_LOGICAL | KW_SERDE | KW_WITH | KW_DEFERRED | KW_SERDEPROPERTIES | KW_DBPROPERTIES | KW_LIMIT | KW_SET | KW_UNSET | KW_TBLPROPERTIES | KW_IDXPROPERTIES | KW_VALUE_TYPE | KW_ELEM_TYPE | KW_MAPJOIN | KW_STREAMTABLE | KW_HOLD_DDLTIME | KW_CLUSTERSTATUS | KW_UTC | KW_UTCTIMESTAMP | KW_LONG | KW_DELETE | KW_PLUS | KW_MINUS | KW_FETCH | KW_INTERSECT | KW_VIEW | KW_IN | KW_DATABASES | KW_MATERIALIZED | KW_SCHEMA | KW_SCHEMAS | KW_GRANT | KW_REVOKE | KW_SSL | KW_UNDO | KW_LOCK | KW_LOCKS | KW_UNLOCK | KW_SHARED | KW_EXCLUSIVE | KW_PROCEDURE | KW_UNSIGNED | KW_WHILE | KW_READ | KW_READS | KW_PURGE | KW_RANGE | KW_AN ALYZE | KW_BEFORE | KW_BETWEEN | KW_BOTH | KW_BINARY | KW_CONTINUE | KW_CURSOR | KW_TRIGGER | KW_RECORDREADER | KW_RECORDWRITER | KW_SEMI | KW_LATERAL | KW_TOUCH | KW_ARCHIVE | KW_UNARCHIVE | KW_COMPUTE | KW_STATISTICS | KW_USE | KW_OPTION | KW_CONCATENATE | KW_SHOW_DATABASE | KW_UPDATE | KW_RESTRICT | KW_CASCADE | KW_SKEWED | KW_ROLLUP | KW_CUBE | KW_DIRECTORIES | KW_FOR | KW_GROUPING | KW_SETS | KW_TRUNCATE | KW_NOSCAN | KW_USER | KW_ROLE | KW_ROLES | KW_INNER | KW_DEFINED | KW_ADMIN | KW_JAR | KW_FILE | KW_OWNER | KW_PRINCIPALS | KW_ALL | KW_DEFAULT | KW_NONE | KW_COMPACT | KW_COMPACTIONS | KW_TRANSACTIONS | KW_REWRITE | KW_AUTHORIZATION + KW_TRUE | KW_FALSE | KW_LIKE | KW_EXISTS | KW_ASC | KW_DESC | KW_ORDER | KW_GROUP | KW_BY | KW_AS | KW_INSERT | KW_OVERWRITE | KW_OUTER | KW_LEFT | KW_RIGHT | KW_FULL | KW_PARTITION | KW_PARTITIONS | KW_TABLE | KW_TABLES | KW_COLUMNS | KW_INDEX | KW_INDEXES | KW_REBUILD | KW_FUNCTIONS | KW_SHOW | KW_MSCK | KW_REPAIR | KW_DIRECTORY | KW_LOCAL | KW_USING | KW_CLUSTER | KW_DISTRIBUTE | KW_SORT | KW_UNION | KW_LOAD | KW_EXPORT | KW_IMPORT | KW_DATA | KW_INPATH | KW_IS | KW_NULL | KW_CREATE | KW_EXTERNAL | KW_ALTER | KW_CHANGE | KW_FIRST | KW_AFTER | KW_DESCRIBE | KW_DROP | KW_RENAME | KW_IGNORE | KW_PROTECTION | KW_TO | KW_COMMENT | KW_BOOLEAN | KW_TINYINT | KW_SMALLINT | KW_INT | KW_BIGINT | KW_FLOAT | KW_DOUBLE | KW_DATE | KW_DATETIME | KW_TIMESTAMP | KW_DECIMAL | KW_STRING | KW_ARRAY | KW_STRUCT | KW_UNIONTYPE | KW_PARTITIONED | KW_CLUSTERED | KW_SORTED | KW_INTO | KW_BUCKETS | KW_ROW | KW_ROWS | KW_FORMAT | KW_DELIMITED | KW_FIELDS | KW_TERMINATED | KW_ESCAPED | KW_COLLECTION | KW_ITEMS | KW_KEYS | KW_KEY_TYPE | KW_LINES | KW_STORED | KW_FILEFORMAT | KW_INPUTFORMAT | KW_OUTPUTFORMAT | KW_INPUTDRIVER | KW_OUTPUTDRIVER | KW_OFFLINE | KW_ENABLE | KW_DISABLE | KW_READONLY | KW_NO_DROP | KW_LOCATION | KW_BUCKET | KW_OUT | KW_OF | KW_PERCENT | KW_ADD | KW_REPLACE | KW_RLIKE | KW_REGEXP | KW_TEMPORARY | KW_EXPLAIN | KW_FORMATTED | KW_PRETTY | KW_DEPENDENCY | KW_LOGICAL | KW_SERDE | KW_WITH | KW_DEFERRED | KW_SERDEPROPERTIES | KW_DBPROPERTIES | KW_LIMIT | KW_SET | KW_UNSET | KW_TBLPROPERTIES | KW_IDXPROPERTIES | KW_VALUE_TYPE | KW_ELEM_TYPE | KW_MAPJOIN | KW_STREAMTABLE | KW_HOLD_DDLTIME | KW_CLUSTERSTATUS | KW_UTC | KW_UTCTIMESTAMP | KW_LONG | KW_DELETE | KW_PLUS | KW_MINUS | KW_FETCH | KW_INTERSECT | KW_VIEW | KW_IN | KW_DATABASES | KW_MATERIALIZED | KW_SCHEMA | KW_SCHEMAS | KW_GRANT | KW_REVOKE | KW_SSL | KW_UNDO | KW_LOCK | KW_LOCKS | KW_UNLOCK | KW_SHARED | KW_EXCLUSIVE | KW_PROCEDURE | KW_UNSIGNED | KW_WHILE | KW_READ | KW_READS | KW_PURGE | KW_RANGE | KW_AN ALYZE | KW_BEFORE | KW_BETWEEN | KW_BOTH | KW_BINARY | KW_CONTINUE | KW_CURSOR | KW_TRIGGER | KW_RECORDREADER | KW_RECORDWRITER | KW_SEMI | KW_LATERAL | KW_TOUCH | KW_ARCHIVE | KW_UNARCHIVE | KW_COMPUTE | KW_STATISTICS | KW_USE | KW_OPTION | KW_CONCATENATE | KW_SHOW_DATABASE | KW_UPDATE | KW_RESTRICT | KW_CASCADE | KW_SKEWED | KW_ROLLUP | KW_CUBE | KW_DIRECTORIES | KW_FOR | KW_GROUPING | KW_SETS | KW_TRUNCATE | KW_NOSCAN | KW_USER | KW_ROLE | KW_ROLES | KW_INNER | KW_DEFINED | KW_ADMIN | KW_JAR | KW_FILE | KW_OWNER | KW_PRINCIPALS | KW_ALL | KW_DEFAULT | KW_NONE | KW_COMPACT | KW_COMPACTIONS | KW_TRANSACTIONS | KW_REWRITE | KW_AUTHORIZATION | KW_VALUES ; Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1619936&r1=1619935&r2=1619936&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Fri Aug 22 21:36:47 2014 @@ -1091,6 +1091,8 @@ public class SemanticAnalyzer extends Ba ASTNode frm = (ASTNode) ast.getChild(0); if (frm.getToken().getType() == HiveParser.TOK_TABREF) { processTable(qb, frm); + } else if (frm.getToken().getType() == HiveParser.TOK_VIRTUAL_TABLE) { + throw new RuntimeException("VALUES() clause is not fully supported yet..."); } else if (frm.getToken().getType() == HiveParser.TOK_SUBQUERY) { processSubQuery(qb, frm); } else if (frm.getToken().getType() == HiveParser.TOK_LATERAL_VIEW || @@ -1282,6 +1284,10 @@ public class SemanticAnalyzer extends Ba case HiveParser.TOK_CTE: processCTE(qb, ast); break; + case HiveParser.TOK_DELETE_FROM: + throw new RuntimeException("DELETE is not (yet) implemented..."); + case HiveParser.TOK_UPDATE_TABLE: + throw new RuntimeException("UPDATE is not (yet) implemented..."); default: skipRecursion = false; break; @@ -10353,6 +10359,7 @@ public class SemanticAnalyzer extends Ba String dbName = qualified.length == 1 ? SessionState.get().getCurrentDatabase() : qualified[0]; Database database = getDatabase(dbName); outputs.add(new WriteEntity(database, WriteEntity.WriteType.DDL_SHARED)); + outputs.add(new WriteEntity(new Table(dbName, tableName), WriteEntity.WriteType.DDL_NO_LOCK)); if (isTemporary) { if (partCols.size() > 0) { @@ -10526,6 +10533,19 @@ public class SemanticAnalyzer extends Ba try { Table oldView = getTable(createVwDesc.getViewName(), false); + // Do not allow view to be defined on temp table + Set<String> tableAliases = qb.getTabAliases(); + for (String alias : tableAliases) { + try { + Table table = db.getTable(qb.getTabNameForAlias(alias)); + if (table.isTemporary()) { + throw new SemanticException("View definition references temporary table " + alias); + } + } catch (HiveException ex) { + throw new SemanticException(ex); + } + } + // ALTER VIEW AS SELECT requires the view must exist if (createVwDesc.getIsAlterViewAs() && oldView == null) { String viewNotExistErrorMsg = Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java?rev=1619936&r1=1619935&r2=1619936&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java Fri Aug 22 21:36:47 2014 @@ -109,6 +109,7 @@ public final class SemanticAnalyzerFacto commandType.put(HiveParser.TOK_ALTERTABLE_PARTCOLTYPE, HiveOperation.ALTERTABLE_PARTCOLTYPE); commandType.put(HiveParser.TOK_SHOW_COMPACTIONS, HiveOperation.SHOW_COMPACTIONS); commandType.put(HiveParser.TOK_SHOW_TRANSACTIONS, HiveOperation.SHOW_TRANSACTIONS); + commandType.put(HiveParser.TOK_ALTERTABLE_UPDATECOLSTATS, HiveOperation.ALTERTABLE_UPDATETABLESTATS); } static { @@ -231,12 +232,14 @@ public final class SemanticAnalyzerFacto case HiveParser.TOK_TRUNCATETABLE: case HiveParser.TOK_EXCHANGEPARTITION: case HiveParser.TOK_SHOW_SET_ROLE: - + case HiveParser.TOK_ALTERTABLE_UPDATECOLSTATS: return new DDLSemanticAnalyzer(conf); case HiveParser.TOK_ALTERTABLE_PARTITION: HiveOperation commandType = null; Integer type = ((ASTNode) tree.getChild(1)).getToken().getType(); - if (tree.getChild(0).getChildCount() > 1) { + if (type == HiveParser.TOK_ALTERTABLE_UPDATECOLSTATS) { + commandType = HiveOperation.ALTERTABLE_UPDATEPARTSTATS; + } else if (tree.getChild(0).getChildCount() > 1) { commandType = tablePartitionCommandType.get(type)[1]; } else { commandType = tablePartitionCommandType.get(type)[0]; Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/HiveOperation.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/HiveOperation.java?rev=1619936&r1=1619935&r2=1619936&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/HiveOperation.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/HiveOperation.java Fri Aug 22 21:36:47 2014 @@ -27,7 +27,7 @@ public enum HiveOperation { IMPORT("IMPORT", null, new Privilege[]{Privilege.ALTER_METADATA, Privilege.ALTER_DATA}), CREATEDATABASE("CREATEDATABASE", null, new Privilege[]{Privilege.CREATE}), DROPDATABASE("DROPDATABASE", null, new Privilege[]{Privilege.DROP}), - SWITCHDATABASE("SWITCHDATABASE", new Privilege[]{Privilege.SELECT}, null), + SWITCHDATABASE("SWITCHDATABASE", null, null), LOCKDB("LOCKDATABASE", new Privilege[]{Privilege.LOCK}, null), UNLOCKDB("UNLOCKDATABASE", new Privilege[]{Privilege.LOCK}, null), DROPTABLE ("DROPTABLE", null, new Privilege[]{Privilege.DROP}), @@ -38,6 +38,8 @@ public enum HiveOperation { ALTERTABLE_REPLACECOLS("ALTERTABLE_REPLACECOLS", new Privilege[]{Privilege.ALTER_METADATA}, null), ALTERTABLE_RENAMECOL("ALTERTABLE_RENAMECOL", new Privilege[]{Privilege.ALTER_METADATA}, null), ALTERTABLE_RENAMEPART("ALTERTABLE_RENAMEPART", new Privilege[]{Privilege.DROP}, new Privilege[]{Privilege.CREATE}), + ALTERTABLE_UPDATEPARTSTATS("ALTERTABLE_UPDATEPARTSTATS", new Privilege[]{Privilege.ALTER_METADATA}, null), + ALTERTABLE_UPDATETABLESTATS("ALTERTABLE_UPDATETABLESTATS", new Privilege[]{Privilege.ALTER_METADATA}, null), ALTERTABLE_RENAME("ALTERTABLE_RENAME", new Privilege[]{Privilege.ALTER_METADATA}, null), ALTERTABLE_DROPPARTS("ALTERTABLE_DROPPARTS", new Privilege[]{Privilege.DROP}, null), // The location is input and table is output for alter-table add partitions Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java?rev=1619936&r1=1619935&r2=1619936&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java Fri Aug 22 21:36:47 2014 @@ -157,7 +157,7 @@ public final class PlanUtils { } catch (ClassNotFoundException e) { // mimicking behaviour in CreateTableDesc tableDesc creation // returning null table description for output. - e.printStackTrace(); + LOG.warn("Unable to find class in getDefaultTableDesc: " + e.getMessage(), e); return null; } return ret; @@ -364,8 +364,7 @@ public final class PlanUtils { ret.setInputFileFormatClass(in_class); ret.setOutputFileFormatClass(out_class); } catch (ClassNotFoundException e) { - e.printStackTrace(); - return null; + throw new RuntimeException("Unable to find class in getTableDesc: " + e.getMessage(), e); } return ret; } Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/processors/CommandUtil.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/processors/CommandUtil.java?rev=1619936&r1=1619935&r2=1619936&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/processors/CommandUtil.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/processors/CommandUtil.java Fri Aug 22 21:36:47 2014 @@ -22,11 +22,14 @@ import java.util.Arrays; import java.util.List; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAccessControlException; +import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzContext; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzPluginException; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType; import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject; import org.apache.hadoop.hive.ql.session.SessionState; +import com.google.common.base.Joiner; + class CommandUtil { /** @@ -68,7 +71,10 @@ class CommandUtil { static void authorizeCommandThrowEx(SessionState ss, HiveOperationType type, List<String> command) throws HiveAuthzPluginException, HiveAccessControlException { HivePrivilegeObject commandObj = HivePrivilegeObject.createHivePrivilegeObject(command); - ss.getAuthorizerV2().checkPrivileges(type, Arrays.asList(commandObj), null, null); + HiveAuthzContext.Builder ctxBuilder = new HiveAuthzContext.Builder(); + ctxBuilder.setCommandString(Joiner.on(' ').join(command)); + ctxBuilder.setUserIpAddress(ss.getUserIpAddress()); + ss.getAuthorizerV2().checkPrivileges(type, Arrays.asList(commandObj), null, ctxBuilder.build()); } Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/processors/HiveCommand.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/processors/HiveCommand.java?rev=1619936&r1=1619935&r2=1619936&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/processors/HiveCommand.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/processors/HiveCommand.java Fri Aug 22 21:36:47 2014 @@ -49,6 +49,9 @@ public enum HiveCommand { if (command.length > 1 && "role".equalsIgnoreCase(command[1])) { // special handling for set role r1 statement return null; + } else if(command.length > 1 && "from".equalsIgnoreCase(command[1])) { + //special handling for SQL "delete from <table> where..." + return null; } else if (COMMANDS.contains(cmd)) { return HiveCommand.valueOf(cmd); } Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/processors/SetProcessor.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/processors/SetProcessor.java?rev=1619936&r1=1619935&r2=1619936&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/processors/SetProcessor.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/processors/SetProcessor.java Fri Aug 22 21:36:47 2014 @@ -32,6 +32,7 @@ import java.util.TreeMap; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Schema; +import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.parse.VariableSubstitution; import org.apache.hadoop.hive.ql.session.SessionState; @@ -104,11 +105,12 @@ public class SetProcessor implements Com try { return new CommandProcessorResponse(setVariable(varname, varvalue)); } catch (Exception e) { - return new CommandProcessorResponse(1, e.getMessage(), "42000"); + return new CommandProcessorResponse(1, e.getMessage(), "42000", + e instanceof IllegalArgumentException ? null : e); } } - public static int setVariable(String varname, String varvalue) throws IllegalArgumentException { + public static int setVariable(String varname, String varvalue) throws Exception { SessionState ss = SessionState.get(); if (varvalue.contains("\n")){ ss.err.println("Warning: Value had a \\n character in it."); @@ -126,6 +128,10 @@ public class SetProcessor implements Com } else if (varname.startsWith(HIVEVAR_PREFIX)) { String propName = varname.substring(HIVEVAR_PREFIX.length()); ss.getHiveVariables().put(propName, new VariableSubstitution().substitute(ss.getConf(),varvalue)); + } else if (varname.startsWith(METACONF_PREFIX)) { + String propName = varname.substring(METACONF_PREFIX.length()); + Hive hive = Hive.get(ss.getConf()); + hive.setMetaConf(propName, new VariableSubstitution().substitute(ss.getConf(), varvalue)); } else { setConf(varname, varname, varvalue, true); } @@ -178,8 +184,7 @@ public class SetProcessor implements Com return sortedEnvMap; } - - private CommandProcessorResponse getVariable(String varname) { + private CommandProcessorResponse getVariable(String varname) throws Exception { SessionState ss = SessionState.get(); if (varname.equals("silent")){ ss.out.println("silent" + "=" + ss.getIsSilent()); @@ -222,6 +227,17 @@ public class SetProcessor implements Com ss.out.println(varname + " is undefined as a hive variable"); return new CommandProcessorResponse(1); } + } else if (varname.indexOf(METACONF_PREFIX) == 0) { + String var = varname.substring(METACONF_PREFIX.length()); + Hive hive = Hive.get(ss.getConf()); + String value = hive.getMetaConf(var); + if (value != null) { + ss.out.println(METACONF_PREFIX + var + "=" + value); + return createProcessorSuccessResponse(); + } else { + ss.out.println(varname + " is undefined as a hive meta variable"); + return new CommandProcessorResponse(1); + } } else { dumpOption(varname); return createProcessorSuccessResponse(); @@ -263,10 +279,12 @@ public class SetProcessor implements Com return new CommandProcessorResponse(0); } return executeSetVariable(part[0],part[1]); - } else { + } + try { return getVariable(nwcmd); + } catch (Exception e) { + return new CommandProcessorResponse(1, e.getMessage(), "42000", e); } - } // create a Schema object containing the give column Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/StorageBasedAuthorizationProvider.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/StorageBasedAuthorizationProvider.java?rev=1619936&r1=1619935&r2=1619936&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/StorageBasedAuthorizationProvider.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/StorageBasedAuthorizationProvider.java Fri Aug 22 21:36:47 2014 @@ -148,22 +148,19 @@ public class StorageBasedAuthorizationPr public void authorize(Table table, Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) throws HiveException, AuthorizationException { - // Table path can be null in the case of a new create table - in this case, - // we try to determine what the path would be after the create table is issued. - Path path = null; + // To create/drop/alter a table, the owner should have WRITE permission on the database directory + authorize(hive_db.getDatabase(table.getDbName()), readRequiredPriv, writeRequiredPriv); + + // If the user has specified a location - external or not, check if the user has the try { initWh(); String location = table.getTTable().getSd().getLocation(); - if (location == null || location.isEmpty()) { - path = wh.getTablePath(hive_db.getDatabase(table.getDbName()), table.getTableName()); - } else { - path = new Path(location); + if (location != null && !location.isEmpty()) { + authorize(new Path(location), readRequiredPriv, writeRequiredPriv); } } catch (MetaException ex) { throw hiveException(ex); } - - authorize(path, readRequiredPriv, writeRequiredPriv); } @Override