Author: gates Date: Mon Oct 13 09:26:21 2008 New Revision: 704154 URL: http://svn.apache.org/viewvc?rev=704154&view=rev Log: PIG-488: Added SearchTermExtractor, a piggybank eval func that, for many search engines, recognizes the search term in the URL returns it to the caller.
Added: incubator/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/apachelogparser/SearchTermExtractor.java incubator/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/util/apachelogparser/TestSearchTermExtractor.java Modified: incubator/pig/trunk/CHANGES.txt Modified: incubator/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/incubator/pig/trunk/CHANGES.txt?rev=704154&r1=704153&r2=704154&view=diff ============================================================================== --- incubator/pig/trunk/CHANGES.txt (original) +++ incubator/pig/trunk/CHANGES.txt Mon Oct 13 09:26:21 2008 @@ -367,3 +367,7 @@ PIG-487: Added HostExtractor, a piggybank eval func that, given a URL, determines the host (spackest via gates). + + PIG-488: Added SearchTermExtractor, a piggybank eval func that, for many + search engines, recognizes the search term in the URL returns it to the + caller (spackest via gates). Added: incubator/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/apachelogparser/SearchTermExtractor.java URL: http://svn.apache.org/viewvc/incubator/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/apachelogparser/SearchTermExtractor.java?rev=704154&view=auto ============================================================================== --- incubator/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/apachelogparser/SearchTermExtractor.java (added) +++ incubator/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/apachelogparser/SearchTermExtractor.java Mon Oct 13 09:26:21 2008 @@ -0,0 +1,191 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the + * NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and limitations under the License. + */ + +package org.apache.pig.piggybank.evaluation.util.apachelogparser; + +import java.io.UnsupportedEncodingException; +import java.net.MalformedURLException; +import java.net.URL; +import java.net.URLDecoder; +import java.util.HashMap; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.pig.EvalFunc; +import org.apache.pig.data.DataAtom; +import org.apache.pig.data.Tuple; + + +/** + * SearchTermExtractor takes a url string and extracts the search terms. For example, given + * + * http://www.google.com/search?hl=en&safe=active&rls=GGLG,GGLG:2005-24,GGLG:en&q=purpose+of+life&btnG=Search + * + * then + * + * purpose of life + * + * would be extracted. + * + * From pig latin, usage looks something like + * + * searchTerm = FOREACH row GENERATE + * org.apache.pig.piggybank.evaluation.util.apachelogparser.SearchTermExtractor(referer); + * + * Supported search engines include alltheweb.com, altavista.com, aolsearch.aol.com, arianna.libero.it, + * as.starware.com, ask.com, blogs.icerocket.com, blueyonder.co.uk, busca.orange.es, buscador.lycos.es, + * buscador.terra.es, buscar.ozu.es, categorico.it, cerca.lycos.it, cuil.com, excite.it, godado.com, + * godado.it, gps.virgin.net, hotbot.com, ilmotore.com, it.altavista.com, ithaki.net, libero.it, lycos.es, + * lycos.it, mamma.com, megasearching.net, mirago.co.uk, netscape.com, ozu.es, ricerca.alice.it, + * search.aol.co.uk, search.bbc.co.uk, search.conduit.com, search.icq.com, search.live.com, + * search.lycos.co.uk, search.lycos.com, search.msn.co.uk, search.msn.com, search.myway.com, + * search.mywebsearch.com, search.ntlworld.com, search.orange.co.uk, search.sweetim.com, + * search.virginmedia.com, simpatico.ws, soso.com, suche.fireball.de, suche.web.de, terra.es, tesco.net, + * thespider.it, tiscali.co.uk, uk.altavista.com, uk.ask.com + * + * Thanks to Spiros Denaxas for his URI::ParseSearchString, which is the basis for the lookups. + */ +public class SearchTermExtractor extends EvalFunc<DataAtom> { + private static Matcher TERM_MATCHER = null; + private static Matcher P_TERM_MATCHER = null; + + static { + TERM_MATCHER = Pattern.compile("\\b(?:q|buscar|key|qry|qs|query|s|searchfor|su|w)=([^&]+)").matcher(""); + P_TERM_MATCHER = Pattern.compile("\\bp=([^&]+)").matcher(""); + } + + private String myDecode(String string) { + try { + string = URLDecoder.decode(string, "UTF-8"); + } catch (UnsupportedEncodingException e) { + e.printStackTrace(); + } + return string; + } + + private static HashMap<String, Boolean> HOSTS = new HashMap<String, Boolean>(); + static { + HOSTS.put("alltheweb.com", true); + HOSTS.put("altavista.com", true); + HOSTS.put("aolsearch.aol.com", true); + HOSTS.put("arianna.libero.it", true); + HOSTS.put("as.starware.com", true); + HOSTS.put("ask.com", true); + HOSTS.put("blogs.icerocket.com", true); + HOSTS.put("blueyonder.co.uk", true); + HOSTS.put("busca.orange.es", true); + HOSTS.put("buscador.lycos.es", true); + HOSTS.put("buscador.terra.es", true); + HOSTS.put("buscar.ozu.es", true); + HOSTS.put("categorico.it", true); + HOSTS.put("cerca.lycos.it", true); + HOSTS.put("cuil.com", true); + HOSTS.put("excite.it", true); + HOSTS.put("godado.com", true); + HOSTS.put("godado.it", true); + HOSTS.put("gps.virgin.net", true); + HOSTS.put("hotbot.com", true); + HOSTS.put("ilmotore.com", true); + HOSTS.put("it.altavista.com", true); + HOSTS.put("ithaki.net", true); + HOSTS.put("libero.it", true); + HOSTS.put("lycos.es", true); + HOSTS.put("lycos.it", true); + HOSTS.put("mamma.com", true); + HOSTS.put("megasearching.net", true); + HOSTS.put("mirago.co.uk", true); + HOSTS.put("netscape.com", true); + HOSTS.put("ozu.es", true); + HOSTS.put("ricerca.alice.it", true); + HOSTS.put("search.aol.co.uk", true); + HOSTS.put("search.bbc.co.uk", true); + HOSTS.put("search.conduit.com", true); + HOSTS.put("search.icq.com", true); + HOSTS.put("search.live.com", true); + HOSTS.put("search.lycos.co.uk", true); + HOSTS.put("search.lycos.com", true); + HOSTS.put("search.msn.co.uk", true); + HOSTS.put("search.msn.com", true); + HOSTS.put("search.myway.com", true); + HOSTS.put("search.mywebsearch.com", true); + HOSTS.put("search.ntlworld.com", true); + HOSTS.put("search.orange.co.uk", true); + HOSTS.put("search.sweetim.com", true); + HOSTS.put("search.virginmedia.com", true); + HOSTS.put("simpatico.ws", true); + HOSTS.put("soso.com", true); + HOSTS.put("suche.fireball.de", true); + HOSTS.put("suche.web.de", true); + HOSTS.put("terra.es", true); + HOSTS.put("tesco.net", true); + HOSTS.put("thespider.it", true); + HOSTS.put("tiscali.co.uk", true); + HOSTS.put("uk.altavista.com", true); + HOSTS.put("uk.ask.com", true); + } + + @Override + public void exec(Tuple input, DataAtom output) { + String url = input.getAtomField(0).strval(); + + if (url == null) + return; + + URL urlObject = null; + try { + urlObject = new URL(url); + } catch (MalformedURLException e) { + e.printStackTrace(); + } + if (urlObject == null) + return; + + String host = urlObject.getHost(); + if (host == null) + return; + + host = host.replaceFirst("^www\\.", ""); + if (host == null) + return; + + host = host.toLowerCase(); + + if (HOSTS.containsKey(host) || host.contains("google.co") || host.contains("search.yahoo")) { + String queryString = urlObject.getQuery(); + + TERM_MATCHER.reset(queryString); + if (TERM_MATCHER.find()) { + String terms = TERM_MATCHER.group(1); + output.setValue(myDecode(terms)); + + // at least once, a p= comes before a q= when p= isn't tied to the search terms + } else { + P_TERM_MATCHER.reset(queryString); + if (P_TERM_MATCHER.find()) { + String terms = P_TERM_MATCHER.group(1); + output.setValue(myDecode(terms)); + } + } + return; + } + + if (host.endsWith("feedster.com") || host.endsWith("technorati.com")) { + String path = urlObject.getPath(); + if (path == null) + return; + + path = path.replaceFirst("^/search/", ""); + output.setValue(myDecode(path)); + } + } +} Added: incubator/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/util/apachelogparser/TestSearchTermExtractor.java URL: http://svn.apache.org/viewvc/incubator/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/util/apachelogparser/TestSearchTermExtractor.java?rev=704154&view=auto ============================================================================== --- incubator/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/util/apachelogparser/TestSearchTermExtractor.java (added) +++ incubator/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/util/apachelogparser/TestSearchTermExtractor.java Mon Oct 13 09:26:21 2008 @@ -0,0 +1,153 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the + * NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and limitations under the License. + */ + +package org.apache.pig.piggybank.test.evaluation.util.apachelogparser; + +import java.util.ArrayList; +import java.util.HashMap; + +import junit.framework.TestCase; + +import org.apache.pig.data.DataAtom; +import org.apache.pig.data.Datum; +import org.apache.pig.data.Tuple; +import org.apache.pig.piggybank.evaluation.util.apachelogparser.SearchEngineExtractor; +import org.apache.pig.piggybank.evaluation.util.apachelogparser.SearchTermExtractor; +import org.junit.Test; + +public class TestSearchTermExtractor extends TestCase { + private static HashMap<String, String> tests = new HashMap<String, String>(); + static { + tests.put("http://www.google.com/search?hl=en&q=a+simple+test&btnG=Google+Search", "a simple test"); + tests.put("http://www.google.co.uk/search?hl=en&q=a+simple+test&btnG=Google+Search&meta=", "a simple test"); + tests.put("http://www.google.co.jp/search?hl=ja&q=a+simple+test&btnG=Google+%E6%A4%9C%E7%B4%A2&lr=", "a simple test"); + tests.put("http://search.msn.co.uk/results.aspx?q=a+simple+test&geovar=56&FORM=REDIR", "a simple test"); + tests.put("http://search.msn.com/results.aspx?q=a+simple+test&geovar=56&FORM=REDIR", "a simple test"); + tests.put("http://www.altavista.com/web/results?itag=ody&q=a+simple+test&kgs=1&kls=0", "a simple test"); + tests.put("http://uk.altavista.com/web/results?itag=ody&q=a+simple+test&kgs=1&kls=0", "a simple test"); + tests.put("http://www.blueyonder.co.uk/blueyonder/searches/search.jsp?q=a+simple+test&cr=&sitesearch=&x=0&y=0", "a simple test"); + tests.put("http://www.alltheweb.com/search?cat=web&cs=iso88591&q=a+simple+test&rys=0&itag=crv&_sb_lang=pref", "a simple test"); + tests.put("http://search.lycos.com/?query=a+simple+test&x=0&y=0", "a simple test"); + tests.put("http://search.lycos.co.uk/cgi-bin/pursuit?query=a+simple+test&enc=utf-8&cat=slim_loc&sc=blue", "a simple test"); + tests.put("http://www.hotbot.com/index.php?query=a+simple+test&ps=&loc=searchbox&tab=web&mode=search&currProv=msn", "a simple test"); + tests.put("http://search.yahoo.com/search?p=a+simple+test&fr=FP-tab-web-t400&toggle=1&cop=&ei=UTF-8", "a simple test"); + tests.put("http://uk.search.yahoo.com/search?p=a+simple+test&fr=FP-tab-web-t340&ei=UTF-8&meta=vc%3D", "a simple test"); + tests.put("http://uk.ask.com/web?q=a+simple+test&qsrc=0&o=0&l=dir&dm=all", "a simple test"); + tests.put("http://www.mirago.co.uk/scripts/qhandler.aspx?qry=a+simple+test&x=0&y=0", "a simple test"); + tests.put("http://www.netscape.com/search/?s=a+simple+test", "a simple test"); + tests.put("http://search.aol.co.uk/web?invocationType=ns_uk&query=a%20simple%20test", "a simple test"); + tests.put("http://www.tiscali.co.uk/search/results.php?section=&from=&query=a+simple+test", "a simple test"); + tests.put("http://www.mamma.com/Mamma?utfout=1&qtype=0&query=a+simple+test&Submit=%C2%A0%C2%A0Search%C2%A0%C2%A0", "a simple test"); + tests.put("http://blogs.icerocket.com/search?q=a+simple+test", "a simple test"); + tests.put("http://blogsearch.google.com/blogsearch?hl=en&ie=UTF-8&q=a+simple+test&btnG=Search+Blogs", "a simple test"); + tests.put("http://suche.fireball.de/cgi-bin/pursuit?query=a+simple+test&x=0&y=0&cat=fb_loc&enc=utf-8", "a simple test"); + tests.put("http://suche.web.de/search/web/?allparams=&smode=&su=a+simple+test&webRb=de", "a simple test"); + tests.put("http://www.technorati.com/search/a%20simple%20test", "a simple test"); + tests.put("http://www.feedster.com/search/a%20simple%20test", "a simple test"); + tests.put("http://www.tesco.net/google/searchresults.asp?q=a+simple+test&cr=", "a simple test"); + tests + .put( + "http://gps.virgin.net/search/sitesearch?submit.x=1&start=0&format=1&num=10&restrict=site&sitefilter=site%2Fsite_filter.hts&siteresults=site%2Fsite_results.hts&sitescorethreshold=28&q=a+simple+test&scope=UK&x=0&y=0", + "a simple test"); + tests.put("http://search.bbc.co.uk/cgi-bin/search/results.pl?tab=web&go=homepage&q=a+simple+test&Search.x=0&Search.y=0&Search=Search&scope=all", + "a simple test"); + tests.put("http://search.live.com/results.aspx?q=a+simple+test&mkt=en-us&FORM=LVSP&go.x=0&go.y=0&go=Search", "a simple test"); + tests.put("http://search.mywebsearch.com/mywebsearch/AJmain.jhtml?searchfor=a+simple+test", "a simple test"); + tests.put("http://www.megasearching.net/m/search.aspx?s=a+simple+test&mkt=&orig=1", "a simple test"); + tests.put("http://www.blueyonder.co.uk/blueyonder/searches/search.jsp?q=a+simple+test&cr=&sitesearch=&x=0&y=0", "a simple test"); + tests.put("http://search.ntlworld.com/ntlworld/search.php?q=a+simple+test&cr=&x=0&y=0", "a simple test"); + tests.put("http://search.orange.co.uk/all?p=_searchbox&pt=resultgo&brand=ouk&tab=web&q=a+simple+test", "a simple test"); + tests.put("http://search.virginmedia.com/results/index.php?channel=other&q=a+simple+test&cr=&x=0&y=0", "a simple test"); + tests.put("http://as.starware.com/dp/search?src_id=305&product=unknown&qry=a+simple+test&z=Find+It", "a simple test"); + tests.put("http://aolsearch.aol.com/aol/search?invocationType=topsearchbox.webhome&query=a+simple+test", "a simple test"); + tests.put("http://www.ask.com/web?q=a+simple+test&qsrc=0&o=0&l=dir", "a simple test"); + tests.put("http://buscador.terra.es/Default.aspx?source=Search&ca=s&query=a%20simple%20test", "a simple test"); + tests.put("http://busca.orange.es/search?origen=home&destino=web&buscar=a+simple+test", "a simple test"); + tests.put("http://search.sweetim.com/search.asp?ln=en&q=a%20simple%20test", "a simple test"); + tests.put("http://search.conduit.com/Results.aspx?q=a+simple+test&hl=en&SelfSearch=1&SearchSourceOrigin=1&ctid=WEBSITE", "a simple test"); + tests.put("http://buscar.ozu.es/index.php?etq=web&q=a+simple+test", "a simple test"); + tests.put("http://buscador.lycos.es/cgi-bin/pursuit?query=a+simple+test&websearchCat=loc&cat=loc&SITE=de&enc=utf-8&ref=sboxlink", "a simple test"); + tests.put("http://search.icq.com/search/results.php?q=a+simple+test&ch_id=st&search_mode=web", "a simple test"); + tests.put("http://search.yahoo.co.jp/search?ei=UTF-8&fr=sfp_as&p=a+simple+test&meta=vc%3D", "a simple test"); + tests.put("http://www.soso.com/q?pid=s.idx&w=a+simple+test", "a simple test"); + tests.put("http://search.myway.com/search/AJmain.jhtml?searchfor=a+simple+test", "a simple test"); + tests.put("http://www.ilmotore.com/newsearch/?query=a+simple+test&where=web", "a simple test"); + tests.put("http://www.ithaki.net/ricerca.cgi?where=italia&query=a+simple+test", "a simple test"); + tests.put("http://ricerca.alice.it/ricerca?f=hpn&qs=a+simple+test", "a simple test"); + tests.put("http://it.search.yahoo.com/search?p=a+simple+test&fr=yfp-t-501&ei=UTF-8&rd=r1", "a simple test"); + tests.put("http://www.excite.it/search/web/results?l=&q=a+simple+test", "a simple test"); + tests.put("http://it.altavista.com/web/results?itag=ody&q=a+simple+test&kgs=1&kls=0", "a simple test"); + tests.put("http://cerca.lycos.it/cgi-bin/pursuit?query=a+simple+test&cat=web", "a simple test"); + tests.put("http://arianna.libero.it/search/abin/integrata.cgi?query=a+simple+test®ione=8&x=0&y=0", "a simple test"); + tests.put("http://www.thespider.it/dir/index.php?q=a+simple+test&search-btn.x=0&search-btn.y=0", "a simple test"); + tests.put("http://godado.it/engine.php?l=it&key=a+simple+test&x=0&y=0", "a simple test"); + tests.put("http://www.simpatico.ws/cgi-bin/links/search.cgi?query=a+simple+test&Vai=Go", "a simple test"); + tests + .put( + "http://www.categorico.it/ricerca.html?domains=Categorico.it&q=a+simple+test&sa=Cerca+con+Google&sitesearch=&client=pub-0499722654836507&forid=1&channel=7983145815&ie=ISO-8859-1&oe=ISO-8859-1&cof=GALT%3A%23008000%3BGL%3A1%3BDIV%3A%23336699%3BVLC%3A663399%3BAH%3Acenter%3BBGC%3AFFFFFF%3BLBGC%3A336699%3BALC%3A0000FF%3BLC%3A0000FF%3BT%3A000000%3BGFNT%3A0000FF%3BGIMP%3A0000FF%3BFORID%3A11&hl=it", + "a simple test"); + tests.put("http://www.cuil.com/search?q=a+simple+test", "a simple test"); + tests.put("http://www.google.com/search?hl=en&lr=&q=a+more%21+complex_+search%24&btnG=Search", "a more! complex_ search$"); + tests.put("http://www.google.co.uk/search?hl=en&q=a+more%21+complex_+search%24&btnG=Google+Search&meta=", "a more! complex_ search$"); + tests.put("http://www.google.co.jp/search?hl=ja&q=a+more%21+complex_+search%24&btnG=Google+%E6%A4%9C%E7%B4%A2&lr=", "a more! complex_ search$"); + tests.put("http://search.msn.com/results.aspx?q=a+more%21+complex_+search%24&FORM=QBHP", "a more! complex_ search$"); + tests.put("http://search.msn.co.uk/results.aspx?q=a+more%21+complex_+search%24&FORM=MSNH&srch_type=0&cp=65001", "a more! complex_ search$"); + tests.put("http://www.altavista.com/web/results?itag=ody&q=a+more%21+complex_+search%24&kgs=1&kls=0", "a more! complex_ search$"); + tests.put("http://uk.altavista.com/web/results?itag=ody&q=a+more%21+complex_+search%24&kgs=1&kls=0", "a more! complex_ search$"); + tests.put("http://www.blueyonder.co.uk/blueyonder/searches/search.jsp?q=a+more%21+complex_+search%24&cr=&sitesearch=&x=0&y=0", + "a more! complex_ search$"); + tests + .put("http://www.alltheweb.com/search?cat=web&cs=iso88591&q=a+more%21+complex_+search%24&rys=0&itag=crv&_sb_lang=pref", "a more! complex_ search$"); + tests.put("http://search.lycos.com/?query=a+more%21+complex_+search%24&x=0&y=0", "a more! complex_ search$"); + tests.put("http://search.lycos.co.uk/cgi-bin/pursuit?query=a+more%21+complex_+search%24&enc=utf-8&cat=slim_loc&sc=blue", "a more! complex_ search$"); + tests.put("http://www.hotbot.com/index.php?query=a+more%21+complex_+search%24&ps=&loc=searchbox&tab=web&mode=search&currProv=msn", + "a more! complex_ search$"); + tests.put("http://search.yahoo.com/search?p=a+more%21+complex_+search%24&fr=FP-tab-web-t400&toggle=1&cop=&ei=UTF-8", "a more! complex_ search$"); + tests.put("http://uk.search.yahoo.com/search?p=a+more%21+complex_+search%24&fr=FP-tab-web-t340&ei=UTF-8&meta=vc%3D", "a more! complex_ search$"); + tests.put("http://uk.ask.com/web?q=a+more%21+complex_+search%24&qsrc=0&o=0&l=dir&dm=all", "a more! complex_ search$"); + tests.put("http://www.mirago.co.uk/scripts/qhandler.aspx?qry=a+more%21+complex_+search%24&x=0&y=0", "a more! complex_ search$"); + tests.put("http://www.netscape.com/search/?s=a+more%21+complex_+search%24", "a more! complex_ search$"); + tests.put("http://search.aol.co.uk/web?query=a+more%21+complex_+search%24&x=0&y=0&isinit=true&restrict=wholeweb", "a more! complex_ search$"); + tests.put("http://www.tiscali.co.uk/search/results.php?section=&from=&query=a+more%21+complex_+search%24", "a more! complex_ search$"); + tests.put("http://www.mamma.com/Mamma?utfout=1&qtype=0&query=a+more%21+complex_+search%24&Submit=%C2%A0%C2%A0Search%C2%A0%C2%A0", + "a more! complex_ search$"); + tests.put("dud", null); + } + + @Test + public void testInstantiation() { + assertNotNull(new SearchEngineExtractor()); + } + + @Test + public void testTests() { + SearchTermExtractor searchTermExtractor = new SearchTermExtractor(); + int testCount = 0; + for (String key : tests.keySet()) { + String expected = tests.get(key); + + ArrayList<Datum> input = new ArrayList<Datum>(); + input.add(new DataAtom(key)); + + DataAtom output = new DataAtom(); + searchTermExtractor.exec(new Tuple(input), output); + if (expected == null) { + assertEquals(0, output.strval().length()); + } else { + assertEquals(expected, output.toString()); + } + testCount++; + } + assertEquals(tests.size(), testCount); + } +}