[ https://issues.apache.org/jira/browse/HAWQ-178?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15131830#comment-15131830 ]
ASF GitHub Bot commented on HAWQ-178: ------------------------------------- Github user hornn commented on a diff in the pull request: https://github.com/apache/incubator-hawq/pull/302#discussion_r51835237 --- Diff: pxf/pxf-json/src/test/java/org/apache/hawq/pxf/plugins/json/JsonExtensionTest.java --- @@ -0,0 +1,288 @@ +package org.apache.hawq.pxf.plugins.json; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import java.io.File; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.fs.Path; +import org.apache.hawq.pxf.api.Fragmenter; +import org.apache.hawq.pxf.api.ReadAccessor; +import org.apache.hawq.pxf.api.ReadResolver; +import org.apache.hawq.pxf.api.io.DataType; +import org.apache.hawq.pxf.plugins.hdfs.HdfsDataFragmenter; +import org.apache.hawq.pxf.plugins.json.JsonAccessor; +import org.apache.hawq.pxf.plugins.json.JsonResolver; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +public class JsonExtensionTest extends PxfUnit { + + private List<Pair<String, DataType>> columnDefs = null; + private List<Pair<String, String>> extraParams = new ArrayList<Pair<String, String>>(); + + @Before + public void before() { + + columnDefs = new ArrayList<Pair<String, DataType>>(); + + columnDefs.add(new Pair<String, DataType>("created_at", DataType.TEXT)); + columnDefs.add(new Pair<String, DataType>("id", DataType.BIGINT)); + columnDefs.add(new Pair<String, DataType>("text", DataType.TEXT)); + columnDefs.add(new Pair<String, DataType>("user.screen_name", DataType.TEXT)); + columnDefs.add(new Pair<String, DataType>("entities.hashtags[0]", DataType.TEXT)); + columnDefs.add(new Pair<String, DataType>("coordinates.coordinates[0]", DataType.FLOAT8)); + columnDefs.add(new Pair<String, DataType>("coordinates.coordinates[1]", DataType.FLOAT8)); + } + + @After + public void cleanup() throws Exception { + columnDefs.clear(); + extraParams.clear(); + } + + @Test + public void testCompressedMultilineJsonFile() throws Exception { + + extraParams.add(new Pair<String, String>("IDENTIFIER", "created_at")); + + List<String> output = new ArrayList<String>(); + + output.add("Fri Jun 07 22:45:02 +0000 2013,343136547115253761,text1,SpreadButter,tweetCongress,,"); + output.add("Fri Jun 07 22:45:02 +0000 2013,343136547123646465,text2,patronusdeadly,,,"); + output.add("Fri Jun 07 22:45:02 +0000 2013,343136547136233472,text3,NoSecrets_Vagas,,,"); + + super.assertOutput(new Path(System.getProperty("user.dir") + File.separator + + "src/test/resources/tweets.tar.gz"), output); + } + + @Test + public void testMaxRecordLength() throws Exception { + + // variable-size-objects.json contains 3 json objects but only 2 of them fit in the 27 byte length limitation + + extraParams.add(new Pair<String, String>("IDENTIFIER", "key666")); + extraParams.add(new Pair<String, String>("MAXLENGTH", "27")); + + columnDefs.clear(); --- End diff -- I think the clear is not necessary since it's also done in the @After function > Add JSON plugin support in code base > ------------------------------------ > > Key: HAWQ-178 > URL: https://issues.apache.org/jira/browse/HAWQ-178 > Project: Apache HAWQ > Issue Type: New Feature > Components: PXF > Reporter: Goden Yao > Assignee: Christian Tzolov > Fix For: backlog > > Attachments: PXFJSONPluginforHAWQ2.0andPXF3.0.0.pdf, > PXFJSONPluginforHAWQ2.0andPXF3.0.0v.2.pdf, > PXFJSONPluginforHAWQ2.0andPXF3.0.0v.3.pdf > > > JSON has been a popular format used in HDFS as well as in the community, > there has been a few JSON PXF plugins developed by the community and we'd > like to see it being incorporated into the code base as an optional package. -- This message was sent by Atlassian JIRA (v6.3.4#6332)