[ 
https://issues.apache.org/jira/browse/DATAFU-62?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14093789#comment-14093789
 ] 

Matthew Hayes commented on DATAFU-62:
-------------------------------------

I've tried reworking the test case.  Have a look at this below.  I had to break 
out the expected query params into a separate field because Pig was having 
trouble parsing the tuple when there were no query params.  Please incorporate 
this into your patch if you agree.

{code}
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package datafu.test.pig.urls;

import org.adrianwalker.multilinestring.Multiline;
import org.apache.pig.data.Tuple;
import org.apache.pig.pigunit.PigTest;
import org.testng.annotations.Test;
import static org.testng.Assert.*;

import datafu.test.pig.PigTests;

public class URLInfoTest extends PigTests
{
    /**
     define URLInfo datafu.pig.urls.URLInfo();

     data = load 'input' using PigStorage('|') as (i:int, 
url:chararray,expected:tuple(domain:chararray,host:chararray,protocol:chararray,path:chararray,port:int),
 queryparams:map[chararray]);
     data_out = foreach data generate i, URLInfo(url) as url_info, expected, 
queryparams;

     store data_out into 'output';
     */
    @Multiline private String urlInfoTest;

    @Test
    public void urlInfoTest() throws Exception
    {
        PigTest test = createPigTestFromString(urlInfoTest);

        String[] input = {
                
"1|http://roger.bar.com/marketing/brand.html?x=foo|(bar.com,roger.bar.com,http,/marketing/brand.html,)|[x#foo]",
                
"2|https://hello.world.org:90/products/data/data.html|(world.org,hello.world.org,https,/products/data/data.html,90)|)",
                
"3|ftp://roger.bar.com/eng/hello.jsp?x=foo&y=bar|(bar.com,roger.bar.com,ftp,/eng/hello.jsp,)|[x#foo,y#bar])",
                
"4|http://hello.world.org:90/products/data/data.html|(world.org,hello.world.org,http,/products/data/data.html,90)|)",
                
"5|http://roger.bar.com/eng/hello.jsp?x=foo&y=bar&x=baz|(bar.com,roger.bar.com,http,/eng/hello.jsp,)|[x#foo,y#bar])"
        };
        
        writeLinesToFile("input",input);

        test.runScript();
        
        for (Tuple t : getLinesForAlias(test, "data_out"))
        {
                System.out.println("Validating case " + t.get(0));
                Tuple actual = (Tuple)t.get(1);
                Tuple expected = (Tuple)t.get(2);
                assertEquals(actual.get(0),expected.get(0));
                assertEquals(actual.get(1),expected.get(1));
                assertEquals(actual.get(2),expected.get(2));
                assertEquals(actual.get(3),expected.get(3));
                assertEquals(actual.get(4),expected.get(4));
                assertEquals(actual.get(5),t.get(3));
        }
    }
}
{code}

> URL Info
> --------
>
>                 Key: DATAFU-62
>                 URL: https://issues.apache.org/jira/browse/DATAFU-62
>             Project: DataFu
>          Issue Type: New Feature
>    Affects Versions: 1.3.0
>            Reporter: Joydeep Banerjee
>         Attachments: DATAFU-62-v2.patch, DATAFU-62-v3.patch, 
> DATAFU-62-v4.patch, DATAFU-62.patch, DATAFU-62.patch, DATAFU-62.patch
>
>
> [This is work-in-progress]
> Given a valid URL, provide the following information about that URL - 
> domain name
> host name
> protocol
> path
> file name
> query parameters and their values



--
This message was sent by Atlassian JIRA
(v6.2#6252)

Reply via email to