Thanks for catching this Doron. Another option if you want to keep the case-insensitive feature here would be to use toUpperCase(Locale.ENGLISH)
It might look bad, but its actually recommended by the JDK for locale-insensitive strings: http://download.oracle.com/javase/6/docs/api/java/lang/String.html#toUpperCase() On Sun, Feb 6, 2011 at 11:43 AM, <dor...@apache.org> wrote: > Author: doronc > Date: Sun Feb 6 16:43:54 2011 > New Revision: 1067699 > > URL: http://svn.apache.org/viewvc?rev=1067699&view=rev > Log: > LUCENE-1540: Improvements to contrib.benchmark for TREC collections - fix > test failures in some locales due to toUpperCase() > > Modified: > > lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocParser.java > > lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/trecdocs.zip > > Modified: > lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocParser.java > URL: > http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocParser.java?rev=1067699&r1=1067698&r2=1067699&view=diff > ============================================================================== > --- > lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocParser.java > (original) > +++ > lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocParser.java > Sun Feb 6 16:43:54 2011 > @@ -29,7 +29,12 @@ import java.util.Map; > public abstract class TrecDocParser { > > /** Types of trec parse paths, */ > - public enum ParsePathType { GOV2, FBIS, FT, FR94, LATIMES } > + public enum ParsePathType { GOV2("gov2"), FBIS("fbis"), FT("ft"), > FR94("fr94"), LATIMES("latimes"); > + public final String dirName; > + private ParsePathType(String dirName) { > + this.dirName = dirName; > + } > + } > > /** trec parser type used for unknown extensions */ > public static final ParsePathType DEFAULT_PATH_TYPE = ParsePathType.GOV2; > @@ -46,7 +51,7 @@ public abstract class TrecDocParser { > static final Map<String,ParsePathType> pathName2Type = new > HashMap<String,ParsePathType>(); > static { > for (ParsePathType ppt : ParsePathType.values()) { > - pathName2Type.put(ppt.name(),ppt); > + pathName2Type.put(ppt.dirName,ppt); > } > } > > @@ -59,7 +64,7 @@ public abstract class TrecDocParser { > public static ParsePathType pathType(File f) { > int pathLength = 0; > while (f != null && ++pathLength < MAX_PATH_LENGTH) { > - ParsePathType ppt = pathName2Type.get(f.getName().toUpperCase()); > + ParsePathType ppt = pathName2Type.get(f.getName()); > if (ppt!=null) { > return ppt; > } > > Modified: > lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/trecdocs.zip > URL: > http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/trecdocs.zip?rev=1067699&r1=1067698&r2=1067699&view=diff > ============================================================================== > Binary files - no diff available. > > > --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org