Author: cws
Date: Fri Jan 20 21:32:24 2012
New Revision: 1234150

URL: http://svn.apache.org/viewvc?rev=1234150&view=rev
Log:
HIVE-2203. Extend concat_ws() UDF to support arrays of strings (Zhenxiao Luo 
via cws)

Added:
    hive/trunk/ql/src/test/queries/clientnegative/udf_concat_ws_wrong1.q
    hive/trunk/ql/src/test/queries/clientnegative/udf_concat_ws_wrong2.q
    hive/trunk/ql/src/test/queries/clientnegative/udf_concat_ws_wrong3.q
    hive/trunk/ql/src/test/results/clientnegative/udf_concat_ws_wrong1.q.out
    hive/trunk/ql/src/test/results/clientnegative/udf_concat_ws_wrong2.q.out
    hive/trunk/ql/src/test/results/clientnegative/udf_concat_ws_wrong3.q.out
Modified:
    
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcatWS.java
    hive/trunk/ql/src/test/queries/clientpositive/udf_concat_ws.q
    hive/trunk/ql/src/test/results/clientpositive/udf_concat_ws.q.out

Modified: 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcatWS.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcatWS.java?rev=1234150&r1=1234149&r2=1234150&view=diff
==============================================================================
--- 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcatWS.java
 (original)
+++ 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcatWS.java
 Fri Jan 20 21:32:24 2012
@@ -24,24 +24,28 @@ import org.apache.hadoop.hive.ql.exec.UD
 import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.serde.Constants;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
 import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
 import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
 import org.apache.hadoop.io.Text;
 
 /**
  * Generic UDF for string function
- * <code>CONCAT_WS(sep,str1,str2,str3,...)</code>. This mimics the function 
from
+ * <code>CONCAT_WS(sep, [string | array(string)]+)<code>.
+ * This mimics the function from
  * MySQL http://dev.mysql.com/doc/refman/5.0/en/string-functions.html#
  * function_concat-ws
- * 
+ *
  * @see org.apache.hadoop.hive.ql.udf.generic.GenericUDF
  */
-@Description(name = "concat_ws", value = "_FUNC_(separator, str1, str2, ...) - 
"
+@Description(name = "concat_ws",
+    value = "_FUNC_(separator, [string | array(string)]+) - "
     + "returns the concatenation of the strings separated by the separator.",
     extended = "Example:\n"
-    + "  > SELECT _FUNC_('ce', 'fa', 'book') FROM src LIMIT 1;\n"
-    + "  'facebook'")
+    + "  > SELECT _FUNC_('.', 'www', array('facebook', 'com')) FROM src LIMIT 
1;\n"
+    + "  'www.facebook.com'")
 public class GenericUDFConcatWS extends GenericUDF {
   private ObjectInspector[] argumentOIs;
 
@@ -49,15 +53,28 @@ public class GenericUDFConcatWS extends 
   public ObjectInspector initialize(ObjectInspector[] arguments) throws 
UDFArgumentException {
     if (arguments.length < 2) {
       throw new UDFArgumentLengthException(
-          "The function CONCAT_WS(separator,str1,str2,str3,...) needs at least 
two arguments.");
+          "The function CONCAT_WS(separator,[string | array(string)]+) "
+            + "needs at least two arguments.");
     }
 
+    // check if argument is a string or an array of strings
     for (int i = 0; i < arguments.length; i++) {
-      if (arguments[i].getTypeName() != Constants.STRING_TYPE_NAME
-          && arguments[i].getTypeName() != Constants.VOID_TYPE_NAME) {
-        throw new UDFArgumentTypeException(i, "Argument " + (i + 1)
+      switch(arguments[i].getCategory()) {
+        case LIST:
+          if 
(((ListObjectInspector)arguments[i]).getListElementObjectInspector()
+            .getTypeName().equals(Constants.STRING_TYPE_NAME)
+            || 
((ListObjectInspector)arguments[i]).getListElementObjectInspector()
+            .getTypeName().equals(Constants.VOID_TYPE_NAME))
+          break;
+        case PRIMITIVE:
+          if (arguments[i].getTypeName().equals(Constants.STRING_TYPE_NAME)
+            || arguments[i].getTypeName().equals(Constants.VOID_TYPE_NAME))
+          break;
+        default:
+          throw new UDFArgumentTypeException(i, "Argument " + (i + 1)
             + " of function CONCAT_WS must be \"" + Constants.STRING_TYPE_NAME
-            + "\", but \"" + arguments[i].getTypeName() + "\" was found.");
+            + " or " + Constants.LIST_TYPE_NAME + "<" + 
Constants.STRING_TYPE_NAME
+            + ">\", but \"" + arguments[i].getTypeName() + "\" was found.");
       }
     }
 
@@ -84,8 +101,22 @@ public class GenericUDFConcatWS extends 
         } else {
           sb.append(separator);
         }
-        sb.append(((StringObjectInspector) argumentOIs[i])
+        if (argumentOIs[i].getCategory().equals(Category.LIST)) {
+          Object strArray = arguments[i].get();
+          ListObjectInspector strArrayOI = (ListObjectInspector) 
argumentOIs[i];
+          boolean strArrayFirst = true;
+          for (int j = 0; j < strArrayOI.getListLength(strArray); j++) {
+            if (strArrayFirst) {
+              strArrayFirst = false;
+            } else {
+              sb.append(separator);
+            }
+            sb.append(strArrayOI.getListElement(strArray, j));
+          }
+        } else {
+          sb.append(((StringObjectInspector) argumentOIs[i])
             .getPrimitiveJavaObject(arguments[i].get()));
+        }
       }
     }
 

Added: hive/trunk/ql/src/test/queries/clientnegative/udf_concat_ws_wrong1.q
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientnegative/udf_concat_ws_wrong1.q?rev=1234150&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientnegative/udf_concat_ws_wrong1.q (added)
+++ hive/trunk/ql/src/test/queries/clientnegative/udf_concat_ws_wrong1.q Fri 
Jan 20 21:32:24 2012
@@ -0,0 +1,2 @@
+-- invalid argument number
+SELECT concat_ws('-') FROM src LIMIT 1;

Added: hive/trunk/ql/src/test/queries/clientnegative/udf_concat_ws_wrong2.q
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientnegative/udf_concat_ws_wrong2.q?rev=1234150&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientnegative/udf_concat_ws_wrong2.q (added)
+++ hive/trunk/ql/src/test/queries/clientnegative/udf_concat_ws_wrong2.q Fri 
Jan 20 21:32:24 2012
@@ -0,0 +1,2 @@
+-- invalid argument type
+SELECT concat_ws('[]', array(100, 200, 50)) FROM src LIMIT 1;

Added: hive/trunk/ql/src/test/queries/clientnegative/udf_concat_ws_wrong3.q
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientnegative/udf_concat_ws_wrong3.q?rev=1234150&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientnegative/udf_concat_ws_wrong3.q (added)
+++ hive/trunk/ql/src/test/queries/clientnegative/udf_concat_ws_wrong3.q Fri 
Jan 20 21:32:24 2012
@@ -0,0 +1,2 @@
+-- invalid argument type
+SELECT concat_ws(1234, array('www', 'facebook', 'com')) FROM src LIMIT 1;

Modified: hive/trunk/ql/src/test/queries/clientpositive/udf_concat_ws.q
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/udf_concat_ws.q?rev=1234150&r1=1234149&r2=1234150&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/udf_concat_ws.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/udf_concat_ws.q Fri Jan 20 
21:32:24 2012
@@ -16,3 +16,28 @@ SELECT concat_ws(dest1.c1, dest1.c2, des
        concat_ws(NULL, dest1.c1, dest1.c2, dest1.c3),
        concat_ws('**', dest1.c1, NULL, dest1.c3) FROM dest1;
 
+-- evalutes function for array of strings
+EXPLAIN
+SELECT concat_ws('.', array('www', 'face', 'book', 'com'), '1234'),
+       concat_ws('-', 'www', array('face', 'book', 'com'), '1234'),
+       concat_ws('F', 'www', array('face', 'book', 'com', '1234')),
+       concat_ws('_', array('www', 'face'), array('book', 'com', '1234')),
+       concat_ws('**', 'www', array('face'), array('book', 'com', '1234')),
+       concat_ws('[]', array('www'), 'face', array('book', 'com', '1234')),
+       concat_ws('AAA', array('www'), array('face', 'book', 'com'), '1234') 
FROM dest1 LIMIT 1;
+
+SELECT concat_ws('.', array('www', 'face', 'book', 'com'), '1234'),
+       concat_ws('-', 'www', array('face', 'book', 'com'), '1234'),
+       concat_ws('F', 'www', array('face', 'book', 'com', '1234')),
+       concat_ws('_', array('www', 'face'), array('book', 'com', '1234')),
+       concat_ws('**', 'www', array('face'), array('book', 'com', '1234')),
+       concat_ws('[]', array('www'), 'face', array('book', 'com', '1234')),
+       concat_ws('AAA', array('www'), array('face', 'book', 'com'), '1234') 
FROM dest1 LIMIT 1;
+
+SELECT concat_ws(NULL, array('www', 'face', 'book', 'com'), '1234'),
+       concat_ws(NULL, 'www', array('face', 'book', 'com'), '1234'),
+       concat_ws(NULL, 'www', array('face', 'book', 'com', '1234')),
+       concat_ws(NULL, array('www', 'face'), array('book', 'com', '1234')),
+       concat_ws(NULL, 'www', array('face'), array('book', 'com', '1234')),
+       concat_ws(NULL, array('www'), 'face', array('book', 'com', '1234')),
+       concat_ws(NULL, array('www'), array('face', 'book', 'com'), '1234') 
FROM dest1 LIMIT 1;

Added: hive/trunk/ql/src/test/results/clientnegative/udf_concat_ws_wrong1.q.out
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientnegative/udf_concat_ws_wrong1.q.out?rev=1234150&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientnegative/udf_concat_ws_wrong1.q.out 
(added)
+++ hive/trunk/ql/src/test/results/clientnegative/udf_concat_ws_wrong1.q.out 
Fri Jan 20 21:32:24 2012
@@ -0,0 +1 @@
+FAILED: Error in semantic analysis: Line 2:7 Arguments length mismatch ''-'': 
The function CONCAT_WS(separator,[string | array(string)]+) needs at least two 
arguments.

Added: hive/trunk/ql/src/test/results/clientnegative/udf_concat_ws_wrong2.q.out
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientnegative/udf_concat_ws_wrong2.q.out?rev=1234150&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientnegative/udf_concat_ws_wrong2.q.out 
(added)
+++ hive/trunk/ql/src/test/results/clientnegative/udf_concat_ws_wrong2.q.out 
Fri Jan 20 21:32:24 2012
@@ -0,0 +1 @@
+FAILED: Error in semantic analysis: Line 2:23 Argument type mismatch '50': 
Argument 2 of function CONCAT_WS must be "string or array<string>", but 
"array<int>" was found.

Added: hive/trunk/ql/src/test/results/clientnegative/udf_concat_ws_wrong3.q.out
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientnegative/udf_concat_ws_wrong3.q.out?rev=1234150&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientnegative/udf_concat_ws_wrong3.q.out 
(added)
+++ hive/trunk/ql/src/test/results/clientnegative/udf_concat_ws_wrong3.q.out 
Fri Jan 20 21:32:24 2012
@@ -0,0 +1 @@
+FAILED: Error in semantic analysis: Line 2:17 Argument type mismatch '1234': 
Argument 1 of function CONCAT_WS must be "string or array<string>", but "int" 
was found.

Modified: hive/trunk/ql/src/test/results/clientpositive/udf_concat_ws.q.out
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/udf_concat_ws.q.out?rev=1234150&r1=1234149&r2=1234150&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/udf_concat_ws.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/udf_concat_ws.q.out Fri Jan 
20 21:32:24 2012
@@ -2,15 +2,15 @@ PREHOOK: query: DESCRIBE FUNCTION concat
 PREHOOK: type: DESCFUNCTION
 POSTHOOK: query: DESCRIBE FUNCTION concat_ws
 POSTHOOK: type: DESCFUNCTION
-concat_ws(separator, str1, str2, ...) - returns the concatenation of the 
strings separated by the separator.
+concat_ws(separator, [string | array(string)]+) - returns the concatenation of 
the strings separated by the separator.
 PREHOOK: query: DESCRIBE FUNCTION EXTENDED concat_ws
 PREHOOK: type: DESCFUNCTION
 POSTHOOK: query: DESCRIBE FUNCTION EXTENDED concat_ws
 POSTHOOK: type: DESCFUNCTION
-concat_ws(separator, str1, str2, ...) - returns the concatenation of the 
strings separated by the separator.
+concat_ws(separator, [string | array(string)]+) - returns the concatenation of 
the strings separated by the separator.
 Example:
-  > SELECT concat_ws('ce', 'fa', 'book') FROM src LIMIT 1;
-  'facebook'
+  > SELECT concat_ws('.', 'www', array('facebook', 'com')) FROM src LIMIT 1;
+  'www.facebook.com'
 PREHOOK: query: CREATE TABLE dest1(c1 STRING, c2 STRING, c3 STRING)
 PREHOOK: type: CREATETABLE
 POSTHOOK: query: CREATE TABLE dest1(c1 STRING, c2 STRING, c3 STRING)
@@ -97,3 +97,118 @@ POSTHOOK: Lineage: dest1.c1 SIMPLE []
 POSTHOOK: Lineage: dest1.c2 SIMPLE []
 POSTHOOK: Lineage: dest1.c3 SIMPLE []
 xyzabc8675309  abc,xyz,8675309 NULL    abc**8675309
+PREHOOK: query: -- evalutes function for array of strings
+EXPLAIN
+SELECT concat_ws('.', array('www', 'face', 'book', 'com'), '1234'),
+       concat_ws('-', 'www', array('face', 'book', 'com'), '1234'),
+       concat_ws('F', 'www', array('face', 'book', 'com', '1234')),
+       concat_ws('_', array('www', 'face'), array('book', 'com', '1234')),
+       concat_ws('**', 'www', array('face'), array('book', 'com', '1234')),
+       concat_ws('[]', array('www'), 'face', array('book', 'com', '1234')),
+       concat_ws('AAA', array('www'), array('face', 'book', 'com'), '1234') 
FROM dest1 LIMIT 1
+PREHOOK: type: QUERY
+POSTHOOK: query: -- evalutes function for array of strings
+EXPLAIN
+SELECT concat_ws('.', array('www', 'face', 'book', 'com'), '1234'),
+       concat_ws('-', 'www', array('face', 'book', 'com'), '1234'),
+       concat_ws('F', 'www', array('face', 'book', 'com', '1234')),
+       concat_ws('_', array('www', 'face'), array('book', 'com', '1234')),
+       concat_ws('**', 'www', array('face'), array('book', 'com', '1234')),
+       concat_ws('[]', array('www'), 'face', array('book', 'com', '1234')),
+       concat_ws('AAA', array('www'), array('face', 'book', 'com'), '1234') 
FROM dest1 LIMIT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: dest1.c1 SIMPLE []
+POSTHOOK: Lineage: dest1.c2 SIMPLE []
+POSTHOOK: Lineage: dest1.c3 SIMPLE []
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME dest1))) (TOK_INSERT 
(TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION 
concat_ws '.' (TOK_FUNCTION array 'www' 'face' 'book' 'com') '1234')) 
(TOK_SELEXPR (TOK_FUNCTION concat_ws '-' 'www' (TOK_FUNCTION array 'face' 
'book' 'com') '1234')) (TOK_SELEXPR (TOK_FUNCTION concat_ws 'F' 'www' 
(TOK_FUNCTION array 'face' 'book' 'com' '1234'))) (TOK_SELEXPR (TOK_FUNCTION 
concat_ws '_' (TOK_FUNCTION array 'www' 'face') (TOK_FUNCTION array 'book' 
'com' '1234'))) (TOK_SELEXPR (TOK_FUNCTION concat_ws '**' 'www' (TOK_FUNCTION 
array 'face') (TOK_FUNCTION array 'book' 'com' '1234'))) (TOK_SELEXPR 
(TOK_FUNCTION concat_ws '[]' (TOK_FUNCTION array 'www') 'face' (TOK_FUNCTION 
array 'book' 'com' '1234'))) (TOK_SELEXPR (TOK_FUNCTION concat_ws 'AAA' 
(TOK_FUNCTION array 'www') (TOK_FUNCTION array 'face' 'book' 'com') '1234'))) 
(TOK_LIMIT 1)))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        dest1 
+          TableScan
+            alias: dest1
+            Select Operator
+              expressions:
+                    expr: concat_ws('.', array('www','face','book','com'), 
'1234')
+                    type: string
+                    expr: concat_ws('-', 'www', array('face','book','com'), 
'1234')
+                    type: string
+                    expr: concat_ws('F', 'www', 
array('face','book','com','1234'))
+                    type: string
+                    expr: concat_ws('_', array('www','face'), 
array('book','com','1234'))
+                    type: string
+                    expr: concat_ws('**', 'www', array('face'), 
array('book','com','1234'))
+                    type: string
+                    expr: concat_ws('[]', array('www'), 'face', 
array('book','com','1234'))
+                    type: string
+                    expr: concat_ws('AAA', array('www'), 
array('face','book','com'), '1234')
+                    type: string
+              outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6
+              Limit
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 1
+
+
+PREHOOK: query: SELECT concat_ws('.', array('www', 'face', 'book', 'com'), 
'1234'),
+       concat_ws('-', 'www', array('face', 'book', 'com'), '1234'),
+       concat_ws('F', 'www', array('face', 'book', 'com', '1234')),
+       concat_ws('_', array('www', 'face'), array('book', 'com', '1234')),
+       concat_ws('**', 'www', array('face'), array('book', 'com', '1234')),
+       concat_ws('[]', array('www'), 'face', array('book', 'com', '1234')),
+       concat_ws('AAA', array('www'), array('face', 'book', 'com'), '1234') 
FROM dest1 LIMIT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT concat_ws('.', array('www', 'face', 'book', 'com'), 
'1234'),
+       concat_ws('-', 'www', array('face', 'book', 'com'), '1234'),
+       concat_ws('F', 'www', array('face', 'book', 'com', '1234')),
+       concat_ws('_', array('www', 'face'), array('book', 'com', '1234')),
+       concat_ws('**', 'www', array('face'), array('book', 'com', '1234')),
+       concat_ws('[]', array('www'), 'face', array('book', 'com', '1234')),
+       concat_ws('AAA', array('www'), array('face', 'book', 'com'), '1234') 
FROM dest1 LIMIT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: dest1.c1 SIMPLE []
+POSTHOOK: Lineage: dest1.c2 SIMPLE []
+POSTHOOK: Lineage: dest1.c3 SIMPLE []
+www.face.book.com.1234 www-face-book-com-1234  wwwFfaceFbookFcomF1234  
www_face_book_com_1234  www**face**book**com**1234      
www[]face[]book[]com[]1234      wwwAAAfaceAAAbookAAAcomAAA1234
+PREHOOK: query: SELECT concat_ws(NULL, array('www', 'face', 'book', 'com'), 
'1234'),
+       concat_ws(NULL, 'www', array('face', 'book', 'com'), '1234'),
+       concat_ws(NULL, 'www', array('face', 'book', 'com', '1234')),
+       concat_ws(NULL, array('www', 'face'), array('book', 'com', '1234')),
+       concat_ws(NULL, 'www', array('face'), array('book', 'com', '1234')),
+       concat_ws(NULL, array('www'), 'face', array('book', 'com', '1234')),
+       concat_ws(NULL, array('www'), array('face', 'book', 'com'), '1234') 
FROM dest1 LIMIT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT concat_ws(NULL, array('www', 'face', 'book', 'com'), 
'1234'),
+       concat_ws(NULL, 'www', array('face', 'book', 'com'), '1234'),
+       concat_ws(NULL, 'www', array('face', 'book', 'com', '1234')),
+       concat_ws(NULL, array('www', 'face'), array('book', 'com', '1234')),
+       concat_ws(NULL, 'www', array('face'), array('book', 'com', '1234')),
+       concat_ws(NULL, array('www'), 'face', array('book', 'com', '1234')),
+       concat_ws(NULL, array('www'), array('face', 'book', 'com'), '1234') 
FROM dest1 LIMIT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: dest1.c1 SIMPLE []
+POSTHOOK: Lineage: dest1.c2 SIMPLE []
+POSTHOOK: Lineage: dest1.c3 SIMPLE []
+NULL   NULL    NULL    NULL    NULL    NULL    NULL


Reply via email to