[ https://issues.apache.org/jira/browse/PHOENIX-628?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14512144#comment-14512144 ]
ASF GitHub Bot commented on PHOENIX-628: ---------------------------------------- Github user twdsilva commented on a diff in the pull request: https://github.com/apache/phoenix/pull/76#discussion_r29096341 --- Diff: phoenix-core/src/main/java/org/apache/phoenix/schema/json/PhoenixJson.java --- @@ -0,0 +1,280 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.phoenix.schema.json; + +import java.io.IOException; +import java.sql.SQLException; + +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.phoenix.exception.SQLExceptionCode; +import org.apache.phoenix.exception.SQLExceptionInfo; +import org.codehaus.jackson.JsonFactory; +import org.codehaus.jackson.JsonNode; +import org.codehaus.jackson.JsonParser; +import org.codehaus.jackson.JsonParser.Feature; +import org.codehaus.jackson.JsonProcessingException; +import org.codehaus.jackson.map.ObjectMapper; +import org.codehaus.jackson.node.ValueNode; + +import com.google.common.base.Preconditions; + +/** + * The {@link PhoenixJson} wraps json and uses Jackson library to parse and traverse the json. It + * should be used to represent the JSON data type and also should be used to parse Json data and + * read the value from it. It always conside the last value if same key exist more than once. + */ +public class PhoenixJson implements Comparable<PhoenixJson> { + private final JsonNode node; + private String jsonAsString; + + /** + * Static Factory method to get an {@link PhoenixJson} object. It also validates the json and + * throws {@link SQLException} if it is invalid with line number and character. + * @param data Buffer that contains data to parse + * @param offset Offset of the first data byte within buffer + * @param length Length of contents to parse within buffer + * @return {@link PhoenixJson}. + * @throws SQLException + */ + public static PhoenixJson getPhoenixJson(byte[] jsonData, int offset, int length) + throws SQLException { + + String jsonDataStr = Bytes.toString(jsonData, offset, length); + return getPhoenixJson(jsonDataStr); + + } + + /** + * Static Factory method to get an {@link PhoenixJson} object. It also validates the json and + * throws {@link SQLException} if it is invalid with line number and character. + * @param jsonData Json data as {@link String}. + * @return {@link PhoenixJson}. + * @throws SQLException + */ + public static PhoenixJson getPhoenixJson(String jsonData) throws SQLException { + try { + JsonFactory jsonFactory = new JsonFactory(); + JsonParser jsonParser = jsonFactory.createJsonParser(jsonData); + PhoenixJson phoenixJson = getPhoneixJson(jsonParser); + /* + * input data has been stored as it is, since some data is lost when json parser runs, + * for example if a JSON object within the value contains the same key more than once + * then only last one is stored rest all of them are ignored, which will defy the + * contract of PJsonDataType of keeping user data as it is. + */ + phoenixJson.setJsonAsString(jsonData); + return phoenixJson; + } catch (IOException x) { + throw new SQLExceptionInfo.Builder(SQLExceptionCode.INVALID_JSON_DATA).setRootCause(x) + .setMessage(x.getMessage()).build().buildException(); + } + + } + + private static PhoenixJson getPhoneixJson(JsonParser jsonParser) throws IOException, + JsonProcessingException { + jsonParser.configure(Feature.ALLOW_COMMENTS, true); + ObjectMapper objectMapper = new ObjectMapper(); + try { + JsonNode rootNode = objectMapper.readTree(jsonParser); + PhoenixJson phoenixJson = new PhoenixJson(rootNode); + return phoenixJson; + } finally { + jsonParser.close(); + } + } + + /* Default for unit testing */PhoenixJson(final JsonNode node) { + Preconditions.checkNotNull(node, "root node cannot be null for json"); + this.node = node; + } + + /** + * Get {@link PhoenixJson} for a given json paths. For example : + * <p> + * <code> + * {"f2":{"f3":1},"f4":{"f5":99,"f6":{"f7":"2"}}}' + * </code> + * <p> + * for this source json, if we want to know the json at path {'f4','f6'} it will return + * {@link PhoenixJson} object for json {"f7":"2"}. It always returns the last key if same key + * exist more than once. + * <p> + * If the given path is unreachable then it throws {@link PhoenixJsonException} with the message + * having information about not found path. It is caller responsibility to wrap it in + * {@link SQLException} or catch it and return null to client. + * @param paths {@link String []} of path in the same order as they appear in json. + * @return {@link PhoenixJson} for the json against @paths. + * @throws PhoenixJsonException + */ + public PhoenixJson getPhoenixJson(String[] paths) throws PhoenixJsonException { + JsonNode node = this.node; + for (String path : paths) { + JsonNode nodeTemp = null; + if (node.isArray()) { + try { + int index = Integer.parseInt(path); + nodeTemp = node.path(index); + } catch (NumberFormatException nfe) { + throw new PhoenixJsonException("path: " + path + " not found", nfe); + } + } else { + nodeTemp = node.path(path); + } + if (nodeTemp == null || nodeTemp.isMissingNode()) { + throw new PhoenixJsonException("path: " + path + " not found"); + } + node = nodeTemp; + } + return new PhoenixJson(node); + } + + /** + * Get {@link PhoenixJson} for a given json paths. For example : + * <p> + * <code> + * {"f2":{"f3":1},"f4":{"f5":99,"f6":{"f7":"2"}}}' + * </code> + * <p> + * for this source json, if we want to know the json at path {'f4','f6'} it will return + * {@link PhoenixJson} object for json {"f7":"2"}. It always returns the last key if same key + * exist more than once. + * <p> + * If the given path is unreachable then it return null. + * @param paths {@link String []} of path in the same order as they appear in json. + * @return {@link PhoenixJson} for the json against @paths. + */ + public PhoenixJson getNullablePhoenixJson(String[] paths) { --- End diff -- Since getPhoenixJson and getNullablePhoenixJson are almost the same, maybe have a helper function that takes a nullable argument in order to prevent code duplication. > Support native JSON data type > ----------------------------- > > Key: PHOENIX-628 > URL: https://issues.apache.org/jira/browse/PHOENIX-628 > Project: Phoenix > Issue Type: Task > Affects Versions: 4.4.0 > Reporter: James Taylor > Assignee: Aakash Pradeep > Labels: JSON, Java, SQL > Fix For: 4.4.0 > > > MongoDB and PostGres do some interesting things with JSON. We should look at > adding similar support. For a detailed description, see JSONB support in > Postgres: > http://www.craigkerstiens.com/2014/03/24/Postgres-9.4-Looking-up > http://www.depesz.com/2014/03/25/waiting-for-9-4-introduce-jsonb-a-structured-format-for-storing-json/ > http://michael.otacoo.com/postgresql-2/manipulating-jsonb-data-with-key-unique/ -- This message was sent by Atlassian JIRA (v6.3.4#6332)