Author: jdere
Date: Thu Mar 26 18:12:50 2015
New Revision: 1669394

URL: http://svn.apache.org/r1669394
Log:
HIVE-1575: get_json_object does not support JSON array at the root level (Alex 
Pivovarov via Jason Dere)

Added:
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFJson.java
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java
    hive/trunk/ql/src/test/queries/clientpositive/udf_get_json_object.q
    hive/trunk/ql/src/test/results/clientpositive/udf_get_json_object.q.out

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java?rev=1669394&r1=1669393&r2=1669394&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java Thu Mar 
26 18:12:50 2015
@@ -69,6 +69,7 @@ public class UDFJson extends UDF {
   }
   private static final ObjectMapper MAPPER = new ObjectMapper(JSON_FACTORY);
   private static final JavaType MAP_TYPE = TypeFactory.fromClass(Map.class);
+  private static final JavaType LIST_TYPE = TypeFactory.fromClass(List.class);
 
   // An LRU cache using a linked hash map
   static class HashCache<K, V> extends LinkedHashMap<K, V> {
@@ -124,10 +125,24 @@ public class UDFJson extends UDF {
   public Text evaluate(String jsonString, String pathString) {
 
     if (jsonString == null || jsonString.isEmpty() || pathString == null
-        || pathString.isEmpty()) {
+        || pathString.isEmpty() || pathString.charAt(0) != '$') {
       return null;
     }
 
+    int pathExprStart = 1;
+    boolean isRootArray = false;
+
+    if (pathString.length() > 1) {
+      if (pathString.charAt(1) == '[') {
+        pathExprStart = 0;
+        isRootArray = true;
+      } else if (pathString.charAt(1) == '.') {
+        isRootArray = pathString.length() > 2 && pathString.charAt(2) == '[';
+      } else {
+        return null;
+      }
+    }
+
     // Cache pathExpr
     String[] pathExpr = pathExprCache.get(pathString);
     if (pathExpr == null) {
@@ -135,24 +150,22 @@ public class UDFJson extends UDF {
       pathExprCache.put(pathString, pathExpr);
     }
 
-    if (!pathExpr[0].equalsIgnoreCase("$")) {
-      return null;
-    }
     // Cache extractObject
     Object extractObject = extractObjectCache.get(jsonString);
     if (extractObject == null) {
+      JavaType javaType = isRootArray ? LIST_TYPE : MAP_TYPE;
       try {
-        extractObject = MAPPER.readValue(jsonString, MAP_TYPE);
+        extractObject = MAPPER.readValue(jsonString, javaType);
       } catch (Exception e) {
         return null;
       }
       extractObjectCache.put(jsonString, extractObject);
     }
-    for (int i = 1; i < pathExpr.length; i++) {
+    for (int i = pathExprStart; i < pathExpr.length; i++) {
       if (extractObject == null) {
           return null;
       }
-      extractObject = extract(extractObject, pathExpr[i]);
+      extractObject = extract(extractObject, pathExpr[i], i == pathExprStart 
&& isRootArray);
     }
     if (extractObject instanceof Map || extractObject instanceof List) {
       try {
@@ -168,36 +181,37 @@ public class UDFJson extends UDF {
     return result;
   }
 
-  private Object extract(Object json, String path) {
-
-    // Cache patternkey.matcher(path).matches()
-    Matcher mKey = null;
-    Boolean mKeyMatches = mKeyMatchesCache.get(path);
-    if (mKeyMatches == null) {
-      mKey = patternKey.matcher(path);
-      mKeyMatches = mKey.matches() ? Boolean.TRUE : Boolean.FALSE;
-      mKeyMatchesCache.put(path, mKeyMatches);
-    }
-    if (!mKeyMatches.booleanValue()) {
-      return null;
-    }
-
-    // Cache mkey.group(1)
-    String mKeyGroup1 = mKeyGroup1Cache.get(path);
-    if (mKeyGroup1 == null) {
-      if (mKey == null) {
+  private Object extract(Object json, String path, boolean skipMapProc) {
+    // skip MAP processing for the first path element if root is array
+    if (!skipMapProc) {
+      // Cache patternkey.matcher(path).matches()
+      Matcher mKey = null;
+      Boolean mKeyMatches = mKeyMatchesCache.get(path);
+      if (mKeyMatches == null) {
         mKey = patternKey.matcher(path);
         mKeyMatches = mKey.matches() ? Boolean.TRUE : Boolean.FALSE;
         mKeyMatchesCache.put(path, mKeyMatches);
-        if (!mKeyMatches.booleanValue()) {
-          return null;
+      }
+      if (!mKeyMatches.booleanValue()) {
+        return null;
+      }
+
+      // Cache mkey.group(1)
+      String mKeyGroup1 = mKeyGroup1Cache.get(path);
+      if (mKeyGroup1 == null) {
+        if (mKey == null) {
+          mKey = patternKey.matcher(path);
+          mKeyMatches = mKey.matches() ? Boolean.TRUE : Boolean.FALSE;
+          mKeyMatchesCache.put(path, mKeyMatches);
+          if (!mKeyMatches.booleanValue()) {
+            return null;
+          }
         }
+        mKeyGroup1 = mKey.group(1);
+        mKeyGroup1Cache.put(path, mKeyGroup1);
       }
-      mKeyGroup1 = mKey.group(1);
-      mKeyGroup1Cache.put(path, mKeyGroup1);
+      json = extract_json_withkey(json, mKeyGroup1);
     }
-    json = extract_json_withkey(json, mKeyGroup1);
-
     // Cache indexList
     ArrayList<String> indexList = indexListCache.get(path);
     if (indexList == null) {

Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFJson.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFJson.java?rev=1669394&view=auto
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFJson.java 
(added)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFJson.java Thu 
Mar 26 18:12:50 2015
@@ -0,0 +1,104 @@
+package org.apache.hadoop.hive.ql.udf;
+
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.io.Text;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestUDFJson {
+
+  @Test
+  public void testJson() throws HiveException {
+
+    String book0 = "{\"author\":\"Nigel Rees\",\"title\":\"Sayings of the 
Century\""
+        + ",\"category\":\"reference\",\"price\":8.95}";
+    String backet0 = "[1,2,{\"b\":\"y\",\"a\":\"x\"}]";
+    String backet = "[" + backet0 + ",[3,4],[5,6]]";
+    String backetFlat = backet0.substring(0, backet0.length() - 1) + 
",3,4,5,6]";
+
+    String book = "[" + book0 + ",{\"author\":\"Herman 
Melville\",\"title\":\"Moby Dick\","
+        + "\"category\":\"fiction\",\"price\":8.99"
+        + ",\"isbn\":\"0-553-21311-3\"},{\"author\":\"J. R. R. Tolkien\""
+        + ",\"title\":\"The Lord of the Rings\",\"category\":\"fiction\""
+        + 
",\"reader\":[{\"age\":25,\"name\":\"bob\"},{\"age\":26,\"name\":\"jack\"}]"
+        + ",\"price\":22.99,\"isbn\":\"0-395-19395-8\"}]";
+
+    String json = "{\"store\":{\"fruit\":[{\"weight\":8,\"type\":\"apple\"},"
+        + "{\"weight\":9,\"type\":\"pear\"}],\"basket\":" + backet + 
",\"book\":" + book
+        + ",\"bicycle\":{\"price\":19.95,\"color\":\"red\"}}"
+        + ",\"email\":\"amy@only_for_json_udf_test.net\""
+        + ",\"owner\":\"amy\",\"zip code\":\"94025\",\"fb:testid\":\"1234\"}";
+
+    UDFJson udf = new UDFJson();
+
+    runTest(json, "$.owner", "amy", udf);
+    runTest(json, "$.store.bicycle", "{\"price\":19.95,\"color\":\"red\"}", 
udf);
+    runTest(json, "$.store.book", book, udf);
+    runTest(json, "$.store.book[0]", book0, udf);
+    runTest(json, "$.store.book[*]", book, udf);
+    runTest(json, "$.store.book[0].category", "reference", udf);
+    runTest(json, "$.store.book[*].category", 
"[\"reference\",\"fiction\",\"fiction\"]", udf);
+    runTest(json, "$.store.book[*].reader[0].age", "25", udf);
+    runTest(json, "$.store.book[*].reader[*].age", "[25,26]", udf);
+    runTest(json, "$.store.basket[0][1]", "2", udf);
+    runTest(json, "$.store.basket[*]", backet, udf);
+    runTest(json, "$.store.basket[*][0]", "[1,3,5]", udf);
+    runTest(json, "$.store.basket[0][*]", backet0, udf);
+    runTest(json, "$.store.basket[*][*]", backetFlat, udf);
+    runTest(json, "$.store.basket[0][2].b", "y", udf);
+    runTest(json, "$.store.basket[0][*].b", "[\"y\"]", udf);
+    runTest(json, "$.non_exist_key", null, udf);
+    runTest(json, "$.store.book[10]", null, udf);
+    runTest(json, "$.store.book[0].non_exist_key", null, udf);
+    runTest(json, "$.store.basket[*].non_exist_key", null, udf);
+    runTest(json, "$.store.basket[0][*].non_exist_key", null, udf);
+    runTest(json, "$.store.basket[*][*].non_exist_key", null, udf);
+    runTest(json, "$.zip code", "94025", udf);
+    runTest(json, "$.fb:testid", "1234", udf);
+    runTest("{\"a\":\"b\nc\"}", "$.a", "b\nc", udf);
+  }
+
+  @Test
+  public void testRootArray() throws HiveException {
+    UDFJson udf = new UDFJson();
+
+    runTest("[1,2,3]", "$[0]", "1", udf);
+    runTest("[1,2,3]", "$.[0]", "1", udf);
+    runTest("[1,2,3]", "$.[1]", "2", udf);
+    runTest("[1,2,3]", "$[1]", "2", udf);
+
+    runTest("[1,2,3]", "$[3]", null, udf);
+    runTest("[1,2,3]", "$.[*]", "[1,2,3]", udf);
+    runTest("[1,2,3]", "$[*]", "[1,2,3]", udf);
+    runTest("[1,2,3]", "$", "[1,2,3]", udf);
+    runTest("[{\"k1\":\"v1\"},{\"k2\":\"v2\"},{\"k3\":\"v3\"}]", "$[2]", 
"{\"k3\":\"v3\"}", udf);
+    runTest("[{\"k1\":\"v1\"},{\"k2\":\"v2\"},{\"k3\":\"v3\"}]", "$[2].k3", 
"v3", udf);
+    runTest("[{\"k1\":[{\"k11\":[1,2,3]}]}]", "$[0].k1[0].k11[1]", "2", udf);
+    runTest("[{\"k1\":[{\"k11\":[1,2,3]}]}]", "$[0].k1[0].k11", "[1,2,3]", 
udf);
+    runTest("[{\"k1\":[{\"k11\":[1,2,3]}]}]", "$[0].k1[0]", 
"{\"k11\":[1,2,3]}", udf);
+    runTest("[{\"k1\":[{\"k11\":[1,2,3]}]}]", "$[0].k1", 
"[{\"k11\":[1,2,3]}]", udf);
+    runTest("[{\"k1\":[{\"k11\":[1,2,3]}]}]", "$[0]", 
"{\"k1\":[{\"k11\":[1,2,3]}]}", udf);
+    runTest("[[1,2,3],[4,5,6],[7,8,9]]", "$[1]", "[4,5,6]", udf);
+    runTest("[[1,2,3],[4,5,6],[7,8,9]]", "$[1][0]", "4", udf);
+    runTest("[\"a\",\"b\"]", "$[1]", "b", udf);
+    runTest("[[\"a\",\"b\"]]", "$[0][1]", "b", udf);
+
+    runTest("[1,2,3]", "[0]", null, udf);
+    runTest("[1,2,3]", "$0", null, udf);
+    runTest("[1,2,3]", "0", null, udf);
+    runTest("[1,2,3]", "$.", null, udf);
+
+    runTest("[1,2,3]", "$", "[1,2,3]", udf);
+    runTest("{\"a\":4}", "$", "{\"a\":4}", udf);
+  }
+
+  protected void runTest(String json, String path, String exp, UDFJson udf) {
+    Text res = udf.evaluate(json, path);
+    if (exp == null) {
+      Assert.assertNull(res);
+    } else {
+      Assert.assertNotNull(res);
+      Assert.assertEquals("get_json_object test", exp, res.toString());
+    }
+  }
+}

Modified: hive/trunk/ql/src/test/queries/clientpositive/udf_get_json_object.q
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/udf_get_json_object.q?rev=1669394&r1=1669393&r2=1669394&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/udf_get_json_object.q 
(original)
+++ hive/trunk/ql/src/test/queries/clientpositive/udf_get_json_object.q Thu Mar 
26 18:12:50 2015
@@ -42,3 +42,24 @@ INSERT OVERWRITE TABLE dest2 SELECT '{"a
 SELECT * FROM dest2;
 
 SELECT get_json_object(c1, '$.a') FROM dest2;
+
+--root is array
+SELECT
+get_json_object('[1,2,3]', '$[0]'),
+get_json_object('[1,2,3]', '$.[0]'),
+get_json_object('[1,2,3]', '$.[1]'),
+get_json_object('[1,2,3]', '$[1]'),
+get_json_object('[1,2,3]', '$[2]'),
+get_json_object('[1,2,3]', '$[*]'),
+get_json_object('[1,2,3]', '$'),
+get_json_object('[{"k1":"v1"},{"k2":"v2"},{"k3":"v3"}]', '$[2]'),
+get_json_object('[{"k1":"v1"},{"k2":"v2"},{"k3":"v3"}]', '$[2].k3'),
+get_json_object('[[1,2,3],[4,5,6],[7,8,9]]', '$[1]'),
+get_json_object('[[1,2,3],[4,5,6],[7,8,9]]', '$[1][0]'),
+get_json_object('[{"k1":[{"k11":[1,2,3]}]}]', '$[0].k1[0].k11[1]');
+
+--null
+SELECT
+get_json_object('[1,2,3]', '[2]'),
+get_json_object('[1,2,3]', '$0'),
+get_json_object('[1,2,3]', '$[3]');
\ No newline at end of file

Modified: 
hive/trunk/ql/src/test/results/clientpositive/udf_get_json_object.q.out
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/udf_get_json_object.q.out?rev=1669394&r1=1669393&r2=1669394&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/udf_get_json_object.q.out 
(original)
+++ hive/trunk/ql/src/test/results/clientpositive/udf_get_json_object.q.out Thu 
Mar 26 18:12:50 2015
@@ -191,3 +191,55 @@ POSTHOOK: Input: default@dest2
 #### A masked pattern was here ####
 b
 c
+PREHOOK: query: --root is array
+SELECT
+get_json_object('[1,2,3]', '$[0]'),
+get_json_object('[1,2,3]', '$.[0]'),
+get_json_object('[1,2,3]', '$.[1]'),
+get_json_object('[1,2,3]', '$[1]'),
+get_json_object('[1,2,3]', '$[2]'),
+get_json_object('[1,2,3]', '$[*]'),
+get_json_object('[1,2,3]', '$'),
+get_json_object('[{"k1":"v1"},{"k2":"v2"},{"k3":"v3"}]', '$[2]'),
+get_json_object('[{"k1":"v1"},{"k2":"v2"},{"k3":"v3"}]', '$[2].k3'),
+get_json_object('[[1,2,3],[4,5,6],[7,8,9]]', '$[1]'),
+get_json_object('[[1,2,3],[4,5,6],[7,8,9]]', '$[1][0]'),
+get_json_object('[{"k1":[{"k11":[1,2,3]}]}]', '$[0].k1[0].k11[1]')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: --root is array
+SELECT
+get_json_object('[1,2,3]', '$[0]'),
+get_json_object('[1,2,3]', '$.[0]'),
+get_json_object('[1,2,3]', '$.[1]'),
+get_json_object('[1,2,3]', '$[1]'),
+get_json_object('[1,2,3]', '$[2]'),
+get_json_object('[1,2,3]', '$[*]'),
+get_json_object('[1,2,3]', '$'),
+get_json_object('[{"k1":"v1"},{"k2":"v2"},{"k3":"v3"}]', '$[2]'),
+get_json_object('[{"k1":"v1"},{"k2":"v2"},{"k3":"v3"}]', '$[2].k3'),
+get_json_object('[[1,2,3],[4,5,6],[7,8,9]]', '$[1]'),
+get_json_object('[[1,2,3],[4,5,6],[7,8,9]]', '$[1][0]'),
+get_json_object('[{"k1":[{"k11":[1,2,3]}]}]', '$[0].k1[0].k11[1]')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+1      1       2       2       3       [1,2,3] [1,2,3] {"k3":"v3"}     v3      
[4,5,6] 4       2
+PREHOOK: query: --null
+SELECT
+get_json_object('[1,2,3]', '[2]'),
+get_json_object('[1,2,3]', '$0'),
+get_json_object('[1,2,3]', '$[3]')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: --null
+SELECT
+get_json_object('[1,2,3]', '[2]'),
+get_json_object('[1,2,3]', '$0'),
+get_json_object('[1,2,3]', '$[3]')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+NULL   NULL    NULL


Reply via email to