Update of /cvsroot/nutch/playground/src/test/net/nutch/util
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv10313/src/test/net/nutch/util
Added Files:
TestFibonacciHeap.java TestRobotsMetaProcessor.java
TestDOMContentUtils.java TestPrefixStringMatcher.java
TestStringUtil.java TestSuffixStringMatcher.java
TestSoftHashMap.java TestGZIPUtils.java
Log Message:
intial commit
--- NEW FILE: TestFibonacciHeap.java ---
/* Copyright (c) 2003 The Nutch Organization. All rights reserved. */
/* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
package net.nutch.util;
import junit.framework.TestCase;
import java.util.Arrays;
/** Unit tests for FibonacciHeap. */
public class TestFibonacciHeap extends TestCase {
public TestFibonacciHeap(String name) {
super(name);
}
private static class TestItem implements Comparable {
int id;
int priority;
public TestItem(int id, int priority) {
this.id= id;
this.priority= priority;
}
public String toString() {
return "<"+id+","+priority+">";
}
public int compareTo(Object other) {
TestItem o= (TestItem) other;
if (this.priority < o.priority)
return -1;
else if (this.priority == o.priority)
return 0;
else return 1;
}
}
private final static int NUM_TEST_ITEMS= 200;
private final static int NUM_TEST_OPERATIONS= 10000;
// likelihood of doing any of these operations
private final static double ADD_PROB= .35;
private final static double DECREASEKEY_PROB= .25;
private final static double POP_PROB= .30;
private final static double PEEK_PROB= .10;
public void testFibHeap() {
FibonacciHeap h= new FibonacciHeap();
TestItem[] vals= new TestItem[NUM_TEST_ITEMS];
for (int i= 0; i < NUM_TEST_ITEMS; i++)
vals[i]= new TestItem(i,i);
// the number of vals in the heap
int numInVal= 0;
// the number of vals that are not in the heap
int numOutVal= NUM_TEST_ITEMS;
// thresholds
double addMaxP= ADD_PROB;
double decreaseKeyMaxP= ADD_PROB + DECREASEKEY_PROB;
double popMaxP= ADD_PROB + DECREASEKEY_PROB + POP_PROB;
// number of operations we've done
int numOps= 0;
// test add/peek/pop/decreaseKey
while (numOps < NUM_TEST_OPERATIONS) {
numOps++;
assertTrue("heap reports wrong size!", numInVal == h.size());
double randVal= Math.random();
if (randVal < addMaxP) {
if (numOutVal == 0) // can't add...
continue;
// add
int index= ( (NUM_TEST_ITEMS - 1) -
(int) (Math.random() * (double) numOutVal) );
TestItem tmp= vals[index];
vals[index]= vals[numInVal];
vals[numInVal]= tmp;
numInVal++;
numOutVal--;
h.add(tmp, tmp.priority);
} else if (randVal < decreaseKeyMaxP) {
// decreaseKey
if (numInVal == 0) {
// do nothing
} else {
int index= (int) (Math.random() * (double) numInVal);
TestItem tmp= vals[index];
tmp.priority-= Math.random() * 5.0;
h.decreaseKey(tmp, tmp.priority);
}
} else if (randVal < popMaxP) {
// pop
if (numInVal == 0) {
if (h.size() != 0) {
assertTrue("heap empty, but peekMin() did not return null!",
h.peekMin() == null);
assertTrue("heap empty, but popMin() did not return null!",
h.popMin() == null );
}
} else {
Arrays.sort(vals, 0, numInVal);
int i= 0;
TestItem tmp= (TestItem) h.popMin();
while ( (i < numInVal) && (tmp.priority == vals[i].priority) ) {
if (tmp.id == vals[i].id)
break;
i++;
}
assertTrue("popMin did not return lowest-priority item!",
tmp.id == vals[i].id);
assertTrue("popMin did not return lowest-priority item!",
tmp == vals[i]);
vals[i]= vals[numInVal - 1];
vals[numInVal - 1]= tmp;
numInVal--;
numOutVal++;
}
} else {
// peek
if (numInVal == 0) {
assertTrue("heap reports non-zero size when empty", h.size() == 0);
assertTrue("heap.peekMin() returns item when empty",
h.peekMin() == null);
assertTrue("heap.popMin() returns item when empty",
h.popMin() == null);
} else {
Arrays.sort(vals, 0, numInVal);
int i= 0;
TestItem tmp= (TestItem) h.peekMin();
while ( (i < numInVal) && (tmp.priority == vals[i].priority) ) {
if (tmp.id == vals[i].id)
break;
i++;
}
assertTrue("heap.peekMin() returns wrong item",
tmp.id == vals[i].id);
assertTrue("heap.peekMin() returns wrong item",
tmp == vals[i]);
}
}
}
}
}
--- NEW FILE: TestRobotsMetaProcessor.java ---
/* Copyright (c) 2003 The Nutch Organization. All rights reserved. */
/* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
package net.nutch.util;
import junit.framework.TestCase;
import net.nutch.util.RobotsMetaProcessor.*;
import java.io.ByteArrayInputStream;
import java.net.URL;
import org.cyberneko.html.parsers.*;
import org.xml.sax.*;
import org.w3c.dom.*;
import org.apache.html.dom.*;
/** Unit tests for RobotsMetaProcessor. */
public class TestRobotsMetaProcessor extends TestCase {
public TestRobotsMetaProcessor(String name) {
super(name);
}
/*
some sample tags:
<meta name="robots" content="index,follow">
<meta name="robots" content="noindex,follow">
<meta name="robots" content="index,nofollow">
<meta name="robots" content="noindex,nofollow">
<META HTTP-EQUIV="Pragma" CONTENT="no-cache">
*/
public static String[] tests=
{
"<html><head><title>test page</title>"
+ "<META NAME=\"ROBOTS\" CONTENT=\"NONE\"> "
+ "<META HTTP-EQUIV=\"PRAGMA\" CONTENT=\"NO-CACHE\"> "
+ "</head><body>"
+ " some text"
+ "</body></html>",
"<html><head><title>test page</title>"
+ "<meta name=\"robots\" content=\"all\"> "
+ "<meta http-equiv=\"pragma\" content=\"no-cache\"> "
+ "</head><body>"
+ " some text"
+ "</body></html>",
"<html><head><title>test page</title>"
+ "<MeTa NaMe=\"RoBoTs\" CoNtEnT=\"nOnE\"> "
+ "<MeTa HtTp-EqUiV=\"pRaGmA\" cOnTeNt=\"No-CaChE\"> "
+ "</head><body>"
+ " some text"
+ "</body></html>",
"<html><head><title>test page</title>"
+ "<meta name=\"robots\" content=\"none\"> "
+ "</head><body>"
+ " some text"
+ "</body></html>",
"<html><head><title>test page</title>"
+ "<meta name=\"robots\" content=\"noindex,nofollow\"> "
+ "</head><body>"
+ " some text"
+ "</body></html>",
"<html><head><title>test page</title>"
+ "<meta name=\"robots\" content=\"noindex,follow\"> "
+ "</head><body>"
+ " some text"
+ "</body></html>",
"<html><head><title>test page</title>"
+ "<meta name=\"robots\" content=\"index,nofollow\"> "
+ "</head><body>"
+ " some text"
+ "</body></html>",
"<html><head><title>test page</title>"
+ "<meta name=\"robots\" content=\"index,follow\"> "
+ "<base href=\"http://www.nutch.org/\">"
+ "</head><body>"
+ " some text"
+ "</body></html>",
"<html><head><title>test page</title>"
+ "<meta name=\"robots\"> "
+ "<base href=\"http://www.nutch.org/base/\">"
+ "</head><body>"
+ " some text"
+ "</body></html>",
};
public static final boolean[][] answers= {
{true, true, true}, // NONE
{false, false, true}, // all
{true, true, true}, // nOnE
{true, true, false}, // none
{true, true, false}, // noindex,nofollow
{true, false, false}, // noindex,follow
{false, true, false}, // index,nofollow
{false, false, false}, // index,follow
{false, false, false}, // missing!
};
private URL[][] currURLsAndAnswers;
public void testRobotsMetaProcessor() {
DOMFragmentParser parser= new DOMFragmentParser();;
try {
currURLsAndAnswers= new URL[][] {
{new URL("http://www.nutch.org"), null},
{new URL("http://www.nutch.org"), null},
{new URL("http://www.nutch.org"), null},
{new URL("http://www.nutch.org"), null},
{new URL("http://www.nutch.org"), null},
{new URL("http://www.nutch.org"), null},
{new URL("http://www.nutch.org"), null},
{new URL("http://www.nutch.org/foo/"),
new URL("http://www.nutch.org/")},
{new URL("http://www.nutch.org"),
new URL("http://www.nutch.org/base/")}
};
} catch (Exception e) {
assertTrue("couldn't make test URLs!", false);
}
for (int i= 0; i < tests.length; i++) {
byte[] bytes= tests[i].getBytes();
DocumentFragment node = new HTMLDocumentImpl().createDocumentFragment();
try {
parser.parse(new InputSource(new ByteArrayInputStream(bytes)), node);
} catch (Exception e) {
e.printStackTrace();
}
RobotsMetaIndicator robotsMeta= new RobotsMetaIndicator();
RobotsMetaProcessor.getRobotsMetaDirectives(robotsMeta, node,
currURLsAndAnswers[i][0]);
assertTrue("got index wrong on test " + i,
robotsMeta.getNoIndex() == answers[i][0]);
assertTrue("got follow wrong on test " + i,
robotsMeta.getNoFollow() == answers[i][1]);
assertTrue("got cache wrong on test " + i,
robotsMeta.getNoCache() == answers[i][2]);
assertTrue("got base href wrong on test " + i + " (got "
+ robotsMeta.getBaseHref() + ")",
( (robotsMeta.getBaseHref() == null)
&& (currURLsAndAnswers[i][1] == null) )
|| ( (robotsMeta.getBaseHref() != null)
&& robotsMeta.getBaseHref().equals(
currURLsAndAnswers[i][1]) ) );
}
}
}
--- NEW FILE: TestDOMContentUtils.java ---
/* Copyright (c) 2003 The Nutch Organization. All rights reserved. */
/* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
package net.nutch.util;
import junit.framework.TestCase;
import net.nutch.fetcher.Outlink;
import java.io.ByteArrayInputStream;
import java.net.URL;
import java.util.ArrayList;
import java.util.StringTokenizer;
import org.cyberneko.html.parsers.*;
import org.xml.sax.*;
import org.w3c.dom.*;
import org.w3c.dom.html.*;
import org.apache.html.dom.*;
/**
* Unit tests for DOMContentUtils.
*/
public class TestDOMContentUtils extends TestCase {
private static final String[] testPages= {
new String("<html><head><title> title </title><script> script </script>"
+ "</head><body> body <a href=\"http://www.nutch.org\">"
+ " anchor </a><!--comment-->"
+ "</body></html>"),
new String("<html><head><title> title </title><script> script </script>"
+ "</head><body> body <a href=\"/\">"
+ " home </a><!--comment-->"
+ "<style> style </style>"
+ " <a href=\"bot.html\">"
+ " bots </a>"
+ "</body></html>"),
new String("<html><head><title> </title>"
+ "</head><body> "
+ "<a href=\"/\"> separate this "
+ "<a href=\"ok\"> from this"
+ "</a></a>"
+ "</body></html>"),
// this one relies on certain neko fixup behavior, possibly
// distributing the anchors into the LI's-but not the other
// anchors (outside of them, instead)! So you get a tree that
// looks like:
// ... <li> <a href=/> home </a> </li>
// <li> <a href=/> <a href="1"> 1 </a> </a> </li>
// <li> <a href=/> <a href="1"> <a href="2"> 2 </a> </a> </a> </li>
new String("<html><head><title> my title </title>"
+ "</head><body> body "
+ "<ul>"
+ "<li> <a href=\"/\"> home"
+ "<li> <a href=\"1\"> 1"
+ "<li> <a href=\"2\"> 2"
+ "</ul>"
+ "</body></html>"),
};
private static String[] testBaseHrefs= {
"http://www.nutch.org",
"http://www.nutch.org/docs/foo.html",
"http://www.nutch.org/docs/",
"http://www.nutch.org/docs/",
};
private static final DocumentFragment testDOMs[]=
new DocumentFragment[testPages.length];
private static URL[] testBaseHrefURLs=
new URL[testPages.length];
private static final String[] answerText= {
"title body anchor",
"title body home bots",
"separate this from this",
"my title body home 1 2",
};
private static final String[] answerTitle= {
"title",
"title",
"",
"my title",
};
// note: should be in page-order
private static final Outlink[][] answerOutlinks= {
{
new Outlink("http://www.nutch.org", "anchor"),
},
{
new Outlink("http://www.nutch.org/", "home"),
new Outlink("http://www.nutch.org/docs/bot.html", "bots"),
},
{
new Outlink("http://www.nutch.org/", "separate this"),
new Outlink("http://www.nutch.org/docs/ok", "from this"),
},
{
new Outlink("http://www.nutch.org/", "home"),
new Outlink("http://www.nutch.org/docs/1", "1"),
new Outlink("http://www.nutch.org/docs/2", "2"),
},
};
public TestDOMContentUtils(String name) {
super(name);
}
private static void setup() {
DOMFragmentParser parser= new DOMFragmentParser();
for (int i= 0; i < testPages.length; i++) {
DocumentFragment node=
new HTMLDocumentImpl().createDocumentFragment();
try {
parser.parse(
new InputSource(
new ByteArrayInputStream(testPages[i].getBytes()) ),
node);
testBaseHrefURLs[i]= new URL(testBaseHrefs[i]);
} catch (Exception e) {
assertTrue("caught exception: " + e, false);
}
testDOMs[i]= node;
}
}
private static boolean equalsIgnoreWhitespace(String s1, String s2) {
StringTokenizer st1= new StringTokenizer(s1);
StringTokenizer st2= new StringTokenizer(s2);
while (st1.hasMoreTokens()) {
if (!st2.hasMoreTokens())
return false;
if ( ! st1.nextToken().equals(st2.nextToken()) )
return false;
}
if (st2.hasMoreTokens())
return false;
return true;
}
public void testGetText() {
if (testDOMs[0] == null)
setup();
for (int i= 0; i < testPages.length; i++) {
StringBuffer sb= new StringBuffer();
DOMContentUtils.getText(sb, testDOMs[i]);
String text= sb.toString();
assertTrue("expecting text: " + answerText[i]
+ System.getProperty("line.separator")
+ System.getProperty("line.separator")
+ "got text: "+ text,
equalsIgnoreWhitespace(answerText[i], text));
}
}
public void testGetTitle() {
if (testDOMs[0] == null)
setup();
for (int i= 0; i < testPages.length; i++) {
StringBuffer sb= new StringBuffer();
DOMContentUtils.getTitle(sb, testDOMs[i]);
String text= sb.toString();
assertTrue("expecting text: " + answerText[i]
+ System.getProperty("line.separator")
+ System.getProperty("line.separator")
+ "got text: "+ text,
equalsIgnoreWhitespace(answerTitle[i], text));
}
}
public void testGetOutlinks() {
if (testDOMs[0] == null)
setup();
for (int i= 0; i < testPages.length; i++) {
ArrayList outlinks= new ArrayList();
DOMContentUtils.getOutlinks(testBaseHrefURLs[i], outlinks, testDOMs[i]);
Outlink[] outlinkArr= new Outlink[outlinks.size()];
outlinkArr= (Outlink[]) outlinks.toArray(outlinkArr);
compareOutlinks(answerOutlinks[i], outlinkArr);
}
}
private static final void appendOutlinks(StringBuffer sb, Outlink[] o) {
for (int i= 0; i < o.length; i++) {
sb.append(o[i].toString());
sb.append(System.getProperty("line.separator"));
}
}
private static final String outlinksString(Outlink[] o) {
StringBuffer sb= new StringBuffer();
appendOutlinks(sb, o);
return sb.toString();
}
private static final void compareOutlinks(Outlink[] o1, Outlink[] o2) {
if (o1.length != o2.length) {
assertTrue("got wrong number of outlinks (expecting " + o1.length
+ ", got " + o2.length + ")"
+ System.getProperty("line.separator")
+ "answer: " + System.getProperty("line.separator")
+ outlinksString(o1)
+ System.getProperty("line.separator")
+ "got: " + System.getProperty("line.separator")
+ outlinksString(o2)
+ System.getProperty("line.separator"),
false
);
}
for (int i= 0; i < o1.length; i++) {
if (!o1[i].equals(o2[i])) {
assertTrue("got wrong outlinks at position " + i
+ System.getProperty("line.separator")
+ "answer: " + System.getProperty("line.separator")
+ o1[i].toString()
+ System.getProperty("line.separator")
+ "got: " + System.getProperty("line.separator")
+ o2[i].toString(),
false
);
}
}
}
}
--- NEW FILE: TestPrefixStringMatcher.java ---
/* Copyright (c) 2003 The Nutch Organization. All rights reserved. */
/* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
package net.nutch.util;
import junit.framework.TestCase;
/** Unit tests for PrefixStringMatcher. */
public class TestPrefixStringMatcher extends TestCase {
public TestPrefixStringMatcher(String name) {
super(name);
}
private final static int NUM_TEST_ROUNDS= 20;
private final static int MAX_TEST_PREFIXES= 100;
private final static int MAX_PREFIX_LEN= 10;
private final static int NUM_TEST_INPUTS_PER_ROUND= 100;
private final static int MAX_INPUT_LEN= 20;
private final static char[] alphabet=
new char[] {
'a', 'b', 'c', 'd',
// 'e', 'f', 'g', 'h', 'i', 'j',
// 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
// 'u', 'v', 'w', 'x', 'y', 'z', '1', '2', '3', '4',
// '5', '6', '7', '8', '9', '0'
};
private String makeRandString(int minLen, int maxLen) {
int len= minLen + (int) (Math.random() * (maxLen - minLen));
char[] chars= new char[len];
for (int pos= 0; pos < len; pos++) {
chars[pos]= alphabet[(int) (Math.random() * alphabet.length)];
}
return new String(chars);
}
public void testPrefixMatcher() {
int numMatches= 0;
int numInputsTested= 0;
for (int round= 0; round < NUM_TEST_ROUNDS; round++) {
// build list of prefixes
int numPrefixes= (int) (Math.random() * MAX_TEST_PREFIXES);
String[] prefixes= new String[numPrefixes];
for (int i= 0; i < numPrefixes; i++) {
prefixes[i]= makeRandString(0, MAX_PREFIX_LEN);
}
PrefixStringMatcher prematcher= new PrefixStringMatcher(prefixes);
// test random strings for prefix matches
for (int i= 0; i < NUM_TEST_INPUTS_PER_ROUND; i++) {
String input= makeRandString(0, MAX_INPUT_LEN);
boolean matches= false;
int longestMatch= -1;
int shortestMatch= -1;
for (int j= 0; j < prefixes.length; j++) {
if ((prefixes[j].length() > 0)
&& input.startsWith(prefixes[j])) {
matches= true;
int matchSize= prefixes[j].length();
if (matchSize > longestMatch)
longestMatch= matchSize;
if ( (matchSize < shortestMatch)
|| (shortestMatch == -1) )
shortestMatch= matchSize;
}
}
if (matches)
numMatches++;
numInputsTested++;
assertTrue( "'" + input + "' should " + (matches ? "" : "not ")
+ "match!",
matches == prematcher.matches(input) );
if (matches) {
assertTrue( shortestMatch
== prematcher.shortestMatch(input).length());
assertTrue( input.substring(0, shortestMatch).equals(
prematcher.shortestMatch(input)) );
assertTrue( longestMatch
== prematcher.longestMatch(input).length());
assertTrue( input.substring(0, longestMatch).equals(
prematcher.longestMatch(input)) );
}
}
}
System.out.println("got " + numMatches + " matches out of "
+ numInputsTested + " tests");
}
}
--- NEW FILE: TestStringUtil.java ---
/* Copyright (c) 2003 The Nutch Organization. All rights reserved. */
/* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
package net.nutch.util;
import junit.framework.TestCase;
/** Unit tests for StringUtil methods. */
public class TestStringUtil extends TestCase {
public TestStringUtil(String name) {
super(name);
}
public void testRightPad() {
String s= "my string";
String ps= StringUtil.rightPad(s, 0);
assertTrue(s.equals(ps));
ps= StringUtil.rightPad(s, 9);
assertTrue(s.equals(ps));
ps= StringUtil.rightPad(s, 10);
assertTrue( (s+" ").equals(ps) );
ps= StringUtil.rightPad(s, 15);
assertTrue( (s+" ").equals(ps) );
}
public void testLeftPad() {
String s= "my string";
String ps= StringUtil.leftPad(s, 0);
assertTrue(s.equals(ps));
ps= StringUtil.leftPad(s, 9);
assertTrue(s.equals(ps));
ps= StringUtil.leftPad(s, 10);
assertTrue( (" "+s).equals(ps) );
ps= StringUtil.leftPad(s, 15);
assertTrue( (" "+s).equals(ps) );
}
}
--- NEW FILE: TestSuffixStringMatcher.java ---
/* Copyright (c) 2003 The Nutch Organization. All rights reserved. */
/* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
package net.nutch.util;
import junit.framework.TestCase;
/** Unit tests for SuffixStringMatcher. */
public class TestSuffixStringMatcher extends TestCase {
public TestSuffixStringMatcher(String name) {
super(name);
}
private final static int NUM_TEST_ROUNDS= 20;
private final static int MAX_TEST_SUFFIXES= 100;
private final static int MAX_SUFFIX_LEN= 10;
private final static int NUM_TEST_INPUTS_PER_ROUND= 100;
private final static int MAX_INPUT_LEN= 20;
private final static char[] alphabet=
new char[] {
'a', 'b', 'c', 'd',
// 'e', 'f', 'g', 'h', 'i', 'j',
// 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
// 'u', 'v', 'w', 'x', 'y', 'z', '1', '2', '3', '4',
// '5', '6', '7', '8', '9', '0'
};
private String makeRandString(int minLen, int maxLen) {
int len= minLen + (int) (Math.random() * (maxLen - minLen));
char[] chars= new char[len];
for (int pos= 0; pos < len; pos++) {
chars[pos]= alphabet[(int) (Math.random() * alphabet.length)];
}
return new String(chars);
}
public void testSuffixMatcher() {
int numMatches= 0;
int numInputsTested= 0;
for (int round= 0; round < NUM_TEST_ROUNDS; round++) {
// build list of suffixes
int numSuffixes= (int) (Math.random() * MAX_TEST_SUFFIXES);
String[] suffixes= new String[numSuffixes];
for (int i= 0; i < numSuffixes; i++) {
suffixes[i]= makeRandString(0, MAX_SUFFIX_LEN);
}
SuffixStringMatcher sufmatcher= new SuffixStringMatcher(suffixes);
// test random strings for suffix matches
for (int i= 0; i < NUM_TEST_INPUTS_PER_ROUND; i++) {
String input= makeRandString(0, MAX_INPUT_LEN);
boolean matches= false;
int longestMatch= -1;
int shortestMatch= -1;
for (int j= 0; j < suffixes.length; j++) {
if ((suffixes[j].length() > 0)
&& input.endsWith(suffixes[j])) {
matches= true;
int matchSize= suffixes[j].length();
if (matchSize > longestMatch)
longestMatch= matchSize;
if ( (matchSize < shortestMatch)
|| (shortestMatch == -1) )
shortestMatch= matchSize;
}
}
if (matches)
numMatches++;
numInputsTested++;
assertTrue( "'" + input + "' should " + (matches ? "" : "not ")
+ "match!",
matches == sufmatcher.matches(input) );
if (matches) {
assertTrue( shortestMatch
== sufmatcher.shortestMatch(input).length());
assertTrue( input.substring(input.length() - shortestMatch).equals(
sufmatcher.shortestMatch(input)) );
assertTrue( longestMatch
== sufmatcher.longestMatch(input).length());
assertTrue( input.substring(input.length() - longestMatch).equals(
sufmatcher.longestMatch(input)) );
}
}
}
System.out.println("got " + numMatches + " matches out of "
+ numInputsTested + " tests");
}
}
--- NEW FILE: TestSoftHashMap.java ---
/* Copyright (c) 2003 The Nutch Organization. All rights reserved. */
/* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
package net.nutch.util;
import junit.framework.TestCase;
import java.util.ArrayList;
import java.util.Iterator;
/**
* Unit tests for SoftHashMap.
*/
public class TestSoftHashMap extends TestCase {
// set to true to get flood of status messages on stderr- useful
// for seeing when JVM is collecting everything.
private static final boolean verbose= false;
// 1kB for int[]
private static final int TEST_VALUE_ARRAY_SIZE= 1024 / 4;
private static final int BASIC_OPS_SIZE= 10;
private boolean keyHasBeenFinalized;
private boolean valHasBeenFinalized;
private class TestKey {
Integer key;
boolean notify;
TestKey(Integer key, boolean notify) {
this.key= key;
this.notify= notify;
}
protected void finalize() {
if (notify)
TestSoftHashMap.this.keyFinalized(key);
}
public int hashCode() {
return key.hashCode();
}
public boolean equals(Object o) {
if (o == null)
return false;
if ( !(o instanceof TestKey) )
return false;
TestKey other= (TestKey) o;
return (this.key.equals(other.key));
}
public String toString() {
return "Key:"+key;
}
}
private class TestValue implements SoftHashMap.FinalizationNotifier {
int[] val;
boolean notify;
ArrayList finalizationListeners;
TestValue(int key, boolean notify) {
this.val= new int[TEST_VALUE_ARRAY_SIZE];
this.val[0]= key;
this.notify= notify;
this.finalizationListeners= new ArrayList();
}
public void addFinalizationListener(SoftHashMap.FinalizationListener
listener) {
finalizationListeners.add(listener);
}
protected void finalize() {
if (notify)
TestSoftHashMap.this.valFinalized(val[0]);
for (Iterator iter= finalizationListeners.iterator();
iter.hasNext() ; ) {
SoftHashMap.FinalizationListener l=
(SoftHashMap.FinalizationListener) iter.next();
l.finalizationOccurring();
}
}
boolean isMyKey(int key) {
return key == val[0];
}
public String toString() {
return "Val:"+val[0];
}
}
public TestSoftHashMap(String name) {
super(name);
}
public void testBasicOps() {
SoftHashMap shm= new SoftHashMap();
// cache keys & vals so they don't go away
TestKey[] keys= new TestKey[BASIC_OPS_SIZE];
TestValue[] vals= new TestValue[BASIC_OPS_SIZE];
for (int i= 0; i < BASIC_OPS_SIZE; i++) {
keys[i]= new TestKey(new Integer(i), false);
vals[i]= new TestValue(i, false);
shm.put(keys[i], vals[i]);
}
for (int i= 0; i < BASIC_OPS_SIZE; i++) {
TestValue v= (TestValue) shm.get(new TestKey(new Integer(i), false));
assertTrue("got back null, expecting value! (key= "+i+")", v != null);
assertTrue("got back wrong value (isMyKey())!", v.isMyKey(i));
assertTrue("got back wrong value (!=)!", v == vals[i]);
assertTrue("contains key doesn't have " + i,
shm.containsKey(new TestKey(new Integer(i), false)));
assertTrue("isEmpty returns true when it shouldn't",
!shm.isEmpty());
}
Object removed= shm.remove(
new TestKey(new Integer(BASIC_OPS_SIZE - 1), false));
if (verbose)
System.err.println("removed: " + removed);
TestValue v= (TestValue)
shm.get(new TestKey(new Integer(BASIC_OPS_SIZE), false));
assertTrue("got back val after delete!", v == null);
int size= shm.size();
assertTrue("got bad value from size(); returned " + size,
size == (BASIC_OPS_SIZE - 1) );
shm.clear();
assertTrue("isEmpty returns false when it shouldn't",
shm.isEmpty());
}
public void testExpiry() {
if (verbose)
System.err.println("entering testExpiry()");
SoftHashMap shm= new SoftHashMap();
valHasBeenFinalized= false;
keyHasBeenFinalized= false;
int i= 0;
try {
while (!valHasBeenFinalized) {
if (verbose)
System.err.println("(!v) trying to put " + i);
shm.put(new TestKey(new Integer(i), true), new TestValue(i, true));
i++;
if (verbose)
System.err.println("after adding " + i
+ " items, size is " + shm.size());
}
while (!keyHasBeenFinalized) {
if (verbose)
System.err.println("(!k) trying to put " + i);
shm.put(new TestKey(new Integer(i), true), new TestValue(i, true));
i++;
}
// sleep and busy loop to see if JVM goes on collecting stuff...
if (verbose)
System.err.println("sleeping... ");
Thread.sleep(20 * 1000);
if (verbose)
System.err.println("busy looping...");
int j;
for (j= 0; j < 2000000; j++) {
i+= j;
}
if (verbose)
System.err.println("done, j=" + j);
} catch (Exception e) {
System.err.println("caught exception");
e.printStackTrace();
} finally {
if (verbose)
System.err.println("out of put loops");
}
}
void keyFinalized(Integer key) {
if (verbose)
System.err.println("notified of finalized key: " + key);
keyHasBeenFinalized= true;
}
void valFinalized(int key) {
if (verbose)
System.err.println("notified of finalized value for: " + key);
valHasBeenFinalized= true;
}
public static final void main(String[] a) {
TestSoftHashMap t= new TestSoftHashMap("test");
t.testExpiry();
}
}
--- NEW FILE: TestGZIPUtils.java ---
/* Copyright (c) 2003 The Nutch Organization. All rights reserved. */
/* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
package net.nutch.util;
import junit.framework.TestCase;
import java.io.IOException;
/** Unit tests for GZIPUtils methods. */
public class TestGZIPUtils extends TestCase {
public TestGZIPUtils(String name) {
super(name);
}
/* a short, highly compressable, string */
String SHORT_TEST_STRING=
"aaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbcccccccccccccccc";
/* a short, highly compressable, string */
String LONGER_TEST_STRING=
SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING
+ SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING
+ SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING
+ SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING;
/* a snapshot of the nutch webpage */
String WEBPAGE=
"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n"
+ "<html>\n"
+ "<head>\n"
+ " <meta http-equiv=\"content-type\"\n"
+ " content=\"text/html; charset=ISO-8859-1\">\n"
+ " <title>Nutch</title>\n"
+ "</head>\n"
+ "<body>\n"
+ "<h1\n"
+ " style=\"font-family: helvetica,arial,sans-serif; text-align: center; color:
rgb(255, 153, 0);\"><a\n"
+ " href=\"http://www.nutch.org/\"><font style=\"color: rgb(255, 153,
0);\">Nutch</font></a><br>\n"
+ "<small>an open source web-search engine</small></h1>\n"
+ "<hr style=\"width: 100%; height: 1px;\" noshade=\"noshade\">\n"
+ "<table\n"
+ " style=\"width: 100%; text-align: left; margin-left: auto; margin-right:
auto;\"\n"
+ " border=\"0\" cellspacing=\"0\" cellpadding=\"0\">\n"
+ " <tbody>\n"
+ " <tr>\n"
+ " <td style=\"vertical-align: top; text-align: center;\"><a\n"
+ "
href=\"http://sourceforge.net/project/showfiles.php?group_id=59548\">Download</a><br>\n"
+ " </td>\n"
+ " <td style=\"vertical-align: top; text-align: center;\"><a\n"
+ " href=\"tutorial.html\">Tutorial</a><br>\n"
+ " </td>\n"
+ " <td style=\"vertical-align: top; text-align: center;\"><a\n"
+ "
href=\"http://cvs.sourceforge.net/cgi-bin/viewcvs.cgi/nutch/nutch/\">CVS</a><br>\n"
+ " </td>\n"
+ " <td style=\"vertical-align: top; text-align: center;\"><a\n"
+ " href=\"api/index.html\">Javadoc</a><br>\n"
+ " </td>\n"
+ " <td style=\"vertical-align: top; text-align: center;\"><a\n"
+ "
href=\"http://sourceforge.net/tracker/?atid=491356&group_id=59548&func=browse\">Bugs</a><br>\n"
+ " </td>\n"
+ " <td style=\"vertical-align: top; text-align: center;\"><a\n"
+ " href=\"http://sourceforge.net/mail/?group_id=59548\">Lists</a></td>\n"
+ " <td style=\"vertical-align: top; text-align: center;\"><a\n"
+ " href=\"policies.html\">Policies</a><br>\n"
+ " </td>\n"
+ " </tr>\n"
+ " </tbody>\n"
+ "</table>\n"
+ "<hr style=\"width: 100%; height: 1px;\" noshade=\"noshade\">\n"
+ "<h2>Introduction</h2>\n"
+ "Nutch is a nascent effort to implement an open-source web search\n"
+ "engine. Web search is a basic requirement for internet navigation, yet\n"
+ "the number of web search engines is decreasing. Today's oligopoly could\n"
+ "soon be a monopoly, with a single company controlling nearly all web\n"
+ "search for its commercial gain. That would not be good for the\n"
+ "users of internet. Nutch aims to enable anyone to easily and\n"
+ "cost-effectively deploy a world-class web search engine.<br>\n"
+ "<br>\n"
+ "To succeed, the Nutch software must be able to:<br>\n"
+ "<ul>\n"
+ " <li> crawl several billion pages per month</li>\n"
+ " <li>maintain an index of these pages</li>\n"
+ " <li>search that index up to 1000 times per second</li>\n"
+ " <li>provide very high quality search results</li>\n"
+ " <li>operate at minimal cost</li>\n"
+ "</ul>\n"
+ "<h2>Status</h2>\n"
+ "Currently we're just a handful of developers working part-time to put\n"
+ "together a demo. The demo is coded entirely in Java. However\n"
+ "persistent data is written in well-documented formats so that modules\n"
+ "may eventually be re-written in other languages (e.g., Perl, C++) as the\n"
+ "project progresses.<br>\n"
+ "<br>\n"
+ "<hr style=\"width: 100%; height: 1px;\" noshade=\"noshade\"> <a\n"
+ " href=\"http://sourceforge.net\"> </a>\n"
+ "<div style=\"text-align: center;\"><a href=\"http://sourceforge.net\"><img\n"
+ " src=\"http://sourceforge.net/sflogo.php?group_id=59548&type=1\"\n"
+ " style=\"border: 0px solid ; width: 88px; height: 31px;\"\n"
+ " alt=\"SourceForge.net Logo\" title=\"\"></a></div>\n"
+ "</body>\n"
+ "</html>\n";
// tests
public void testZipUnzip() {
byte[] testBytes= SHORT_TEST_STRING.getBytes();
testZipUnzip(testBytes);
testBytes= LONGER_TEST_STRING.getBytes();
testZipUnzip(testBytes);
testBytes= WEBPAGE.getBytes();
testZipUnzip(testBytes);
}
public void testZipUnzipBestEffort() {
byte[] testBytes= SHORT_TEST_STRING.getBytes();
testZipUnzipBestEffort(testBytes);
testBytes= LONGER_TEST_STRING.getBytes();
testZipUnzipBestEffort(testBytes);
testBytes= WEBPAGE.getBytes();
testZipUnzipBestEffort(testBytes);
}
public void testTruncation() {
byte[] testBytes= SHORT_TEST_STRING.getBytes();
testTruncation(testBytes);
testBytes= LONGER_TEST_STRING.getBytes();
testTruncation(testBytes);
testBytes= WEBPAGE.getBytes();
testTruncation(testBytes);
}
public void testLimit() {
byte[] testBytes= SHORT_TEST_STRING.getBytes();
testLimit(testBytes);
testBytes= LONGER_TEST_STRING.getBytes();
testLimit(testBytes);
testBytes= WEBPAGE.getBytes();
testLimit(testBytes);
}
// helpers
public void testZipUnzip(byte[] origBytes) {
byte[] compressedBytes= GZIPUtils.zip(origBytes);
assertTrue("compressed array is not smaller!",
compressedBytes.length < origBytes.length);
byte[] uncompressedBytes= null;
try {
uncompressedBytes= GZIPUtils.unzip(compressedBytes);
} catch (IOException e) {
e.printStackTrace();
assertTrue("caught exception '" + e + "' during unzip()",
false);
}
assertTrue("uncompressedBytes is wrong size",
uncompressedBytes.length == origBytes.length);
for (int i= 0; i < origBytes.length; i++)
if (origBytes[i] != uncompressedBytes[i])
assertTrue("uncompressedBytes does not match origBytes", false);
}
public void testZipUnzipBestEffort(byte[] origBytes) {
byte[] compressedBytes= GZIPUtils.zip(origBytes);
assertTrue("compressed array is not smaller!",
compressedBytes.length < origBytes.length);
byte[] uncompressedBytes= GZIPUtils.unzipBestEffort(compressedBytes);
assertTrue("uncompressedBytes is wrong size",
uncompressedBytes.length == origBytes.length);
for (int i= 0; i < origBytes.length; i++)
if (origBytes[i] != uncompressedBytes[i])
assertTrue("uncompressedBytes does not match origBytes", false);
}
public void testTruncation(byte[] origBytes) {
byte[] compressedBytes= GZIPUtils.zip(origBytes);
System.out.println("original data has len " + origBytes.length);
System.out.println("compressed data has len "
+ compressedBytes.length);
for (int i= compressedBytes.length; i >= 0; i--) {
byte[] truncCompressed= new byte[i];
for (int j= 0; j < i; j++)
truncCompressed[j]= compressedBytes[j];
byte[] trunc= GZIPUtils.unzipBestEffort(truncCompressed);
if (trunc == null) {
System.out.println("truncated to len "
+ i + ", trunc is null");
} else {
System.out.println("truncated to len "
+ i + ", trunc.length= "
+ trunc.length);
for (int j= 0; j < trunc.length; j++)
if (trunc[j] != origBytes[j])
assertTrue("truncated/uncompressed array differs at pos "
+ j + " (compressed data had been truncated to len "
+ i + ")", false);
}
}
}
public void testLimit(byte[] origBytes) {
byte[] compressedBytes= GZIPUtils.zip(origBytes);
assertTrue("compressed array is not smaller!",
compressedBytes.length < origBytes.length);
for (int i= 0; i < origBytes.length; i++) {
byte[] uncompressedBytes=
GZIPUtils.unzipBestEffort(compressedBytes, i);
assertTrue("uncompressedBytes is wrong size",
uncompressedBytes.length == i);
for (int j= 0; j < i; j++)
if (origBytes[j] != uncompressedBytes[j])
assertTrue("uncompressedBytes does not match origBytes", false);
}
}
}
-------------------------------------------------------
The SF.Net email is sponsored by EclipseCon 2004
Premiere Conference on Open Tools Development and Integration
See the breadth of Eclipse activity. February 3-5 in Anaheim, CA.
http://www.eclipsecon.org/osdn
_______________________________________________
Nutch-cvs mailing list
[EMAIL PROTECTED]
https://lists.sourceforge.net/lists/listinfo/nutch-cvs