[
https://issues.apache.org/jira/browse/DATAFU-62?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14093789#comment-14093789
]
Matthew Hayes commented on DATAFU-62:
-------------------------------------
I've tried reworking the test case. Have a look at this below. I had to break
out the expected query params into a separate field because Pig was having
trouble parsing the tuple when there were no query params. Please incorporate
this into your patch if you agree.
{code}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package datafu.test.pig.urls;
import org.adrianwalker.multilinestring.Multiline;
import org.apache.pig.data.Tuple;
import org.apache.pig.pigunit.PigTest;
import org.testng.annotations.Test;
import static org.testng.Assert.*;
import datafu.test.pig.PigTests;
public class URLInfoTest extends PigTests
{
/**
define URLInfo datafu.pig.urls.URLInfo();
data = load 'input' using PigStorage('|') as (i:int,
url:chararray,expected:tuple(domain:chararray,host:chararray,protocol:chararray,path:chararray,port:int),
queryparams:map[chararray]);
data_out = foreach data generate i, URLInfo(url) as url_info, expected,
queryparams;
store data_out into 'output';
*/
@Multiline private String urlInfoTest;
@Test
public void urlInfoTest() throws Exception
{
PigTest test = createPigTestFromString(urlInfoTest);
String[] input = {
"1|http://roger.bar.com/marketing/brand.html?x=foo|(bar.com,roger.bar.com,http,/marketing/brand.html,)|[x#foo]",
"2|https://hello.world.org:90/products/data/data.html|(world.org,hello.world.org,https,/products/data/data.html,90)|)",
"3|ftp://roger.bar.com/eng/hello.jsp?x=foo&y=bar|(bar.com,roger.bar.com,ftp,/eng/hello.jsp,)|[x#foo,y#bar])",
"4|http://hello.world.org:90/products/data/data.html|(world.org,hello.world.org,http,/products/data/data.html,90)|)",
"5|http://roger.bar.com/eng/hello.jsp?x=foo&y=bar&x=baz|(bar.com,roger.bar.com,http,/eng/hello.jsp,)|[x#foo,y#bar])"
};
writeLinesToFile("input",input);
test.runScript();
for (Tuple t : getLinesForAlias(test, "data_out"))
{
System.out.println("Validating case " + t.get(0));
Tuple actual = (Tuple)t.get(1);
Tuple expected = (Tuple)t.get(2);
assertEquals(actual.get(0),expected.get(0));
assertEquals(actual.get(1),expected.get(1));
assertEquals(actual.get(2),expected.get(2));
assertEquals(actual.get(3),expected.get(3));
assertEquals(actual.get(4),expected.get(4));
assertEquals(actual.get(5),t.get(3));
}
}
}
{code}
> URL Info
> --------
>
> Key: DATAFU-62
> URL: https://issues.apache.org/jira/browse/DATAFU-62
> Project: DataFu
> Issue Type: New Feature
> Affects Versions: 1.3.0
> Reporter: Joydeep Banerjee
> Attachments: DATAFU-62-v2.patch, DATAFU-62-v3.patch,
> DATAFU-62-v4.patch, DATAFU-62.patch, DATAFU-62.patch, DATAFU-62.patch
>
>
> [This is work-in-progress]
> Given a valid URL, provide the following information about that URL -
> domain name
> host name
> protocol
> path
> file name
> query parameters and their values
--
This message was sent by Atlassian JIRA
(v6.2#6252)