Author: cws Date: Fri Jan 20 21:32:24 2012 New Revision: 1234150 URL: http://svn.apache.org/viewvc?rev=1234150&view=rev Log: HIVE-2203. Extend concat_ws() UDF to support arrays of strings (Zhenxiao Luo via cws)
Added: hive/trunk/ql/src/test/queries/clientnegative/udf_concat_ws_wrong1.q hive/trunk/ql/src/test/queries/clientnegative/udf_concat_ws_wrong2.q hive/trunk/ql/src/test/queries/clientnegative/udf_concat_ws_wrong3.q hive/trunk/ql/src/test/results/clientnegative/udf_concat_ws_wrong1.q.out hive/trunk/ql/src/test/results/clientnegative/udf_concat_ws_wrong2.q.out hive/trunk/ql/src/test/results/clientnegative/udf_concat_ws_wrong3.q.out Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcatWS.java hive/trunk/ql/src/test/queries/clientpositive/udf_concat_ws.q hive/trunk/ql/src/test/results/clientpositive/udf_concat_ws.q.out Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcatWS.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcatWS.java?rev=1234150&r1=1234149&r2=1234150&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcatWS.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcatWS.java Fri Jan 20 21:32:24 2012 @@ -24,24 +24,28 @@ import org.apache.hadoop.hive.ql.exec.UD import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde.Constants; +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; import org.apache.hadoop.io.Text; /** * Generic UDF for string function - * <code>CONCAT_WS(sep,str1,str2,str3,...)</code>. This mimics the function from + * <code>CONCAT_WS(sep, [string | array(string)]+)<code>. + * This mimics the function from * MySQL http://dev.mysql.com/doc/refman/5.0/en/string-functions.html# * function_concat-ws - * + * * @see org.apache.hadoop.hive.ql.udf.generic.GenericUDF */ -@Description(name = "concat_ws", value = "_FUNC_(separator, str1, str2, ...) - " +@Description(name = "concat_ws", + value = "_FUNC_(separator, [string | array(string)]+) - " + "returns the concatenation of the strings separated by the separator.", extended = "Example:\n" - + " > SELECT _FUNC_('ce', 'fa', 'book') FROM src LIMIT 1;\n" - + " 'facebook'") + + " > SELECT _FUNC_('.', 'www', array('facebook', 'com')) FROM src LIMIT 1;\n" + + " 'www.facebook.com'") public class GenericUDFConcatWS extends GenericUDF { private ObjectInspector[] argumentOIs; @@ -49,15 +53,28 @@ public class GenericUDFConcatWS extends public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { if (arguments.length < 2) { throw new UDFArgumentLengthException( - "The function CONCAT_WS(separator,str1,str2,str3,...) needs at least two arguments."); + "The function CONCAT_WS(separator,[string | array(string)]+) " + + "needs at least two arguments."); } + // check if argument is a string or an array of strings for (int i = 0; i < arguments.length; i++) { - if (arguments[i].getTypeName() != Constants.STRING_TYPE_NAME - && arguments[i].getTypeName() != Constants.VOID_TYPE_NAME) { - throw new UDFArgumentTypeException(i, "Argument " + (i + 1) + switch(arguments[i].getCategory()) { + case LIST: + if (((ListObjectInspector)arguments[i]).getListElementObjectInspector() + .getTypeName().equals(Constants.STRING_TYPE_NAME) + || ((ListObjectInspector)arguments[i]).getListElementObjectInspector() + .getTypeName().equals(Constants.VOID_TYPE_NAME)) + break; + case PRIMITIVE: + if (arguments[i].getTypeName().equals(Constants.STRING_TYPE_NAME) + || arguments[i].getTypeName().equals(Constants.VOID_TYPE_NAME)) + break; + default: + throw new UDFArgumentTypeException(i, "Argument " + (i + 1) + " of function CONCAT_WS must be \"" + Constants.STRING_TYPE_NAME - + "\", but \"" + arguments[i].getTypeName() + "\" was found."); + + " or " + Constants.LIST_TYPE_NAME + "<" + Constants.STRING_TYPE_NAME + + ">\", but \"" + arguments[i].getTypeName() + "\" was found."); } } @@ -84,8 +101,22 @@ public class GenericUDFConcatWS extends } else { sb.append(separator); } - sb.append(((StringObjectInspector) argumentOIs[i]) + if (argumentOIs[i].getCategory().equals(Category.LIST)) { + Object strArray = arguments[i].get(); + ListObjectInspector strArrayOI = (ListObjectInspector) argumentOIs[i]; + boolean strArrayFirst = true; + for (int j = 0; j < strArrayOI.getListLength(strArray); j++) { + if (strArrayFirst) { + strArrayFirst = false; + } else { + sb.append(separator); + } + sb.append(strArrayOI.getListElement(strArray, j)); + } + } else { + sb.append(((StringObjectInspector) argumentOIs[i]) .getPrimitiveJavaObject(arguments[i].get())); + } } } Added: hive/trunk/ql/src/test/queries/clientnegative/udf_concat_ws_wrong1.q URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientnegative/udf_concat_ws_wrong1.q?rev=1234150&view=auto ============================================================================== --- hive/trunk/ql/src/test/queries/clientnegative/udf_concat_ws_wrong1.q (added) +++ hive/trunk/ql/src/test/queries/clientnegative/udf_concat_ws_wrong1.q Fri Jan 20 21:32:24 2012 @@ -0,0 +1,2 @@ +-- invalid argument number +SELECT concat_ws('-') FROM src LIMIT 1; Added: hive/trunk/ql/src/test/queries/clientnegative/udf_concat_ws_wrong2.q URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientnegative/udf_concat_ws_wrong2.q?rev=1234150&view=auto ============================================================================== --- hive/trunk/ql/src/test/queries/clientnegative/udf_concat_ws_wrong2.q (added) +++ hive/trunk/ql/src/test/queries/clientnegative/udf_concat_ws_wrong2.q Fri Jan 20 21:32:24 2012 @@ -0,0 +1,2 @@ +-- invalid argument type +SELECT concat_ws('[]', array(100, 200, 50)) FROM src LIMIT 1; Added: hive/trunk/ql/src/test/queries/clientnegative/udf_concat_ws_wrong3.q URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientnegative/udf_concat_ws_wrong3.q?rev=1234150&view=auto ============================================================================== --- hive/trunk/ql/src/test/queries/clientnegative/udf_concat_ws_wrong3.q (added) +++ hive/trunk/ql/src/test/queries/clientnegative/udf_concat_ws_wrong3.q Fri Jan 20 21:32:24 2012 @@ -0,0 +1,2 @@ +-- invalid argument type +SELECT concat_ws(1234, array('www', 'facebook', 'com')) FROM src LIMIT 1; Modified: hive/trunk/ql/src/test/queries/clientpositive/udf_concat_ws.q URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/udf_concat_ws.q?rev=1234150&r1=1234149&r2=1234150&view=diff ============================================================================== --- hive/trunk/ql/src/test/queries/clientpositive/udf_concat_ws.q (original) +++ hive/trunk/ql/src/test/queries/clientpositive/udf_concat_ws.q Fri Jan 20 21:32:24 2012 @@ -16,3 +16,28 @@ SELECT concat_ws(dest1.c1, dest1.c2, des concat_ws(NULL, dest1.c1, dest1.c2, dest1.c3), concat_ws('**', dest1.c1, NULL, dest1.c3) FROM dest1; +-- evalutes function for array of strings +EXPLAIN +SELECT concat_ws('.', array('www', 'face', 'book', 'com'), '1234'), + concat_ws('-', 'www', array('face', 'book', 'com'), '1234'), + concat_ws('F', 'www', array('face', 'book', 'com', '1234')), + concat_ws('_', array('www', 'face'), array('book', 'com', '1234')), + concat_ws('**', 'www', array('face'), array('book', 'com', '1234')), + concat_ws('[]', array('www'), 'face', array('book', 'com', '1234')), + concat_ws('AAA', array('www'), array('face', 'book', 'com'), '1234') FROM dest1 LIMIT 1; + +SELECT concat_ws('.', array('www', 'face', 'book', 'com'), '1234'), + concat_ws('-', 'www', array('face', 'book', 'com'), '1234'), + concat_ws('F', 'www', array('face', 'book', 'com', '1234')), + concat_ws('_', array('www', 'face'), array('book', 'com', '1234')), + concat_ws('**', 'www', array('face'), array('book', 'com', '1234')), + concat_ws('[]', array('www'), 'face', array('book', 'com', '1234')), + concat_ws('AAA', array('www'), array('face', 'book', 'com'), '1234') FROM dest1 LIMIT 1; + +SELECT concat_ws(NULL, array('www', 'face', 'book', 'com'), '1234'), + concat_ws(NULL, 'www', array('face', 'book', 'com'), '1234'), + concat_ws(NULL, 'www', array('face', 'book', 'com', '1234')), + concat_ws(NULL, array('www', 'face'), array('book', 'com', '1234')), + concat_ws(NULL, 'www', array('face'), array('book', 'com', '1234')), + concat_ws(NULL, array('www'), 'face', array('book', 'com', '1234')), + concat_ws(NULL, array('www'), array('face', 'book', 'com'), '1234') FROM dest1 LIMIT 1; Added: hive/trunk/ql/src/test/results/clientnegative/udf_concat_ws_wrong1.q.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientnegative/udf_concat_ws_wrong1.q.out?rev=1234150&view=auto ============================================================================== --- hive/trunk/ql/src/test/results/clientnegative/udf_concat_ws_wrong1.q.out (added) +++ hive/trunk/ql/src/test/results/clientnegative/udf_concat_ws_wrong1.q.out Fri Jan 20 21:32:24 2012 @@ -0,0 +1 @@ +FAILED: Error in semantic analysis: Line 2:7 Arguments length mismatch ''-'': The function CONCAT_WS(separator,[string | array(string)]+) needs at least two arguments. Added: hive/trunk/ql/src/test/results/clientnegative/udf_concat_ws_wrong2.q.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientnegative/udf_concat_ws_wrong2.q.out?rev=1234150&view=auto ============================================================================== --- hive/trunk/ql/src/test/results/clientnegative/udf_concat_ws_wrong2.q.out (added) +++ hive/trunk/ql/src/test/results/clientnegative/udf_concat_ws_wrong2.q.out Fri Jan 20 21:32:24 2012 @@ -0,0 +1 @@ +FAILED: Error in semantic analysis: Line 2:23 Argument type mismatch '50': Argument 2 of function CONCAT_WS must be "string or array<string>", but "array<int>" was found. Added: hive/trunk/ql/src/test/results/clientnegative/udf_concat_ws_wrong3.q.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientnegative/udf_concat_ws_wrong3.q.out?rev=1234150&view=auto ============================================================================== --- hive/trunk/ql/src/test/results/clientnegative/udf_concat_ws_wrong3.q.out (added) +++ hive/trunk/ql/src/test/results/clientnegative/udf_concat_ws_wrong3.q.out Fri Jan 20 21:32:24 2012 @@ -0,0 +1 @@ +FAILED: Error in semantic analysis: Line 2:17 Argument type mismatch '1234': Argument 1 of function CONCAT_WS must be "string or array<string>", but "int" was found. Modified: hive/trunk/ql/src/test/results/clientpositive/udf_concat_ws.q.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/udf_concat_ws.q.out?rev=1234150&r1=1234149&r2=1234150&view=diff ============================================================================== --- hive/trunk/ql/src/test/results/clientpositive/udf_concat_ws.q.out (original) +++ hive/trunk/ql/src/test/results/clientpositive/udf_concat_ws.q.out Fri Jan 20 21:32:24 2012 @@ -2,15 +2,15 @@ PREHOOK: query: DESCRIBE FUNCTION concat PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION concat_ws POSTHOOK: type: DESCFUNCTION -concat_ws(separator, str1, str2, ...) - returns the concatenation of the strings separated by the separator. +concat_ws(separator, [string | array(string)]+) - returns the concatenation of the strings separated by the separator. PREHOOK: query: DESCRIBE FUNCTION EXTENDED concat_ws PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION EXTENDED concat_ws POSTHOOK: type: DESCFUNCTION -concat_ws(separator, str1, str2, ...) - returns the concatenation of the strings separated by the separator. +concat_ws(separator, [string | array(string)]+) - returns the concatenation of the strings separated by the separator. Example: - > SELECT concat_ws('ce', 'fa', 'book') FROM src LIMIT 1; - 'facebook' + > SELECT concat_ws('.', 'www', array('facebook', 'com')) FROM src LIMIT 1; + 'www.facebook.com' PREHOOK: query: CREATE TABLE dest1(c1 STRING, c2 STRING, c3 STRING) PREHOOK: type: CREATETABLE POSTHOOK: query: CREATE TABLE dest1(c1 STRING, c2 STRING, c3 STRING) @@ -97,3 +97,118 @@ POSTHOOK: Lineage: dest1.c1 SIMPLE [] POSTHOOK: Lineage: dest1.c2 SIMPLE [] POSTHOOK: Lineage: dest1.c3 SIMPLE [] xyzabc8675309 abc,xyz,8675309 NULL abc**8675309 +PREHOOK: query: -- evalutes function for array of strings +EXPLAIN +SELECT concat_ws('.', array('www', 'face', 'book', 'com'), '1234'), + concat_ws('-', 'www', array('face', 'book', 'com'), '1234'), + concat_ws('F', 'www', array('face', 'book', 'com', '1234')), + concat_ws('_', array('www', 'face'), array('book', 'com', '1234')), + concat_ws('**', 'www', array('face'), array('book', 'com', '1234')), + concat_ws('[]', array('www'), 'face', array('book', 'com', '1234')), + concat_ws('AAA', array('www'), array('face', 'book', 'com'), '1234') FROM dest1 LIMIT 1 +PREHOOK: type: QUERY +POSTHOOK: query: -- evalutes function for array of strings +EXPLAIN +SELECT concat_ws('.', array('www', 'face', 'book', 'com'), '1234'), + concat_ws('-', 'www', array('face', 'book', 'com'), '1234'), + concat_ws('F', 'www', array('face', 'book', 'com', '1234')), + concat_ws('_', array('www', 'face'), array('book', 'com', '1234')), + concat_ws('**', 'www', array('face'), array('book', 'com', '1234')), + concat_ws('[]', array('www'), 'face', array('book', 'com', '1234')), + concat_ws('AAA', array('www'), array('face', 'book', 'com'), '1234') FROM dest1 LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: dest1.c1 SIMPLE [] +POSTHOOK: Lineage: dest1.c2 SIMPLE [] +POSTHOOK: Lineage: dest1.c3 SIMPLE [] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME dest1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION concat_ws '.' (TOK_FUNCTION array 'www' 'face' 'book' 'com') '1234')) (TOK_SELEXPR (TOK_FUNCTION concat_ws '-' 'www' (TOK_FUNCTION array 'face' 'book' 'com') '1234')) (TOK_SELEXPR (TOK_FUNCTION concat_ws 'F' 'www' (TOK_FUNCTION array 'face' 'book' 'com' '1234'))) (TOK_SELEXPR (TOK_FUNCTION concat_ws '_' (TOK_FUNCTION array 'www' 'face') (TOK_FUNCTION array 'book' 'com' '1234'))) (TOK_SELEXPR (TOK_FUNCTION concat_ws '**' 'www' (TOK_FUNCTION array 'face') (TOK_FUNCTION array 'book' 'com' '1234'))) (TOK_SELEXPR (TOK_FUNCTION concat_ws '[]' (TOK_FUNCTION array 'www') 'face' (TOK_FUNCTION array 'book' 'com' '1234'))) (TOK_SELEXPR (TOK_FUNCTION concat_ws 'AAA' (TOK_FUNCTION array 'www') (TOK_FUNCTION array 'face' 'book' 'com') '1234'))) (TOK_LIMIT 1))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + dest1 + TableScan + alias: dest1 + Select Operator + expressions: + expr: concat_ws('.', array('www','face','book','com'), '1234') + type: string + expr: concat_ws('-', 'www', array('face','book','com'), '1234') + type: string + expr: concat_ws('F', 'www', array('face','book','com','1234')) + type: string + expr: concat_ws('_', array('www','face'), array('book','com','1234')) + type: string + expr: concat_ws('**', 'www', array('face'), array('book','com','1234')) + type: string + expr: concat_ws('[]', array('www'), 'face', array('book','com','1234')) + type: string + expr: concat_ws('AAA', array('www'), array('face','book','com'), '1234') + type: string + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: 1 + + +PREHOOK: query: SELECT concat_ws('.', array('www', 'face', 'book', 'com'), '1234'), + concat_ws('-', 'www', array('face', 'book', 'com'), '1234'), + concat_ws('F', 'www', array('face', 'book', 'com', '1234')), + concat_ws('_', array('www', 'face'), array('book', 'com', '1234')), + concat_ws('**', 'www', array('face'), array('book', 'com', '1234')), + concat_ws('[]', array('www'), 'face', array('book', 'com', '1234')), + concat_ws('AAA', array('www'), array('face', 'book', 'com'), '1234') FROM dest1 LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT concat_ws('.', array('www', 'face', 'book', 'com'), '1234'), + concat_ws('-', 'www', array('face', 'book', 'com'), '1234'), + concat_ws('F', 'www', array('face', 'book', 'com', '1234')), + concat_ws('_', array('www', 'face'), array('book', 'com', '1234')), + concat_ws('**', 'www', array('face'), array('book', 'com', '1234')), + concat_ws('[]', array('www'), 'face', array('book', 'com', '1234')), + concat_ws('AAA', array('www'), array('face', 'book', 'com'), '1234') FROM dest1 LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: Lineage: dest1.c1 SIMPLE [] +POSTHOOK: Lineage: dest1.c2 SIMPLE [] +POSTHOOK: Lineage: dest1.c3 SIMPLE [] +www.face.book.com.1234 www-face-book-com-1234 wwwFfaceFbookFcomF1234 www_face_book_com_1234 www**face**book**com**1234 www[]face[]book[]com[]1234 wwwAAAfaceAAAbookAAAcomAAA1234 +PREHOOK: query: SELECT concat_ws(NULL, array('www', 'face', 'book', 'com'), '1234'), + concat_ws(NULL, 'www', array('face', 'book', 'com'), '1234'), + concat_ws(NULL, 'www', array('face', 'book', 'com', '1234')), + concat_ws(NULL, array('www', 'face'), array('book', 'com', '1234')), + concat_ws(NULL, 'www', array('face'), array('book', 'com', '1234')), + concat_ws(NULL, array('www'), 'face', array('book', 'com', '1234')), + concat_ws(NULL, array('www'), array('face', 'book', 'com'), '1234') FROM dest1 LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT concat_ws(NULL, array('www', 'face', 'book', 'com'), '1234'), + concat_ws(NULL, 'www', array('face', 'book', 'com'), '1234'), + concat_ws(NULL, 'www', array('face', 'book', 'com', '1234')), + concat_ws(NULL, array('www', 'face'), array('book', 'com', '1234')), + concat_ws(NULL, 'www', array('face'), array('book', 'com', '1234')), + concat_ws(NULL, array('www'), 'face', array('book', 'com', '1234')), + concat_ws(NULL, array('www'), array('face', 'book', 'com'), '1234') FROM dest1 LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: Lineage: dest1.c1 SIMPLE [] +POSTHOOK: Lineage: dest1.c2 SIMPLE [] +POSTHOOK: Lineage: dest1.c3 SIMPLE [] +NULL NULL NULL NULL NULL NULL NULL