Mark/All, I'm using your highlighter and have been very happy with it. You have saved me a ton of time.
In my particular use I need to get a few fragments from several different sources and then select the best fragment(s) to use. To do this I needed access to the fragment scores, so I made a couple of changes to your code. What do you think about exposing the score of the fragments? I've included my changes as a patch against the current cvs version. My implementaion is pretty basic, but I'm interested to see what you think. Thanks, Jason Index: contributions/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java =================================================================== RCS file: /home/cvspublic/jakarta-lucene-sandbox/contributions/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java,v retrieving revision 1.2 diff -c -r1.2 Highlighter.java *** contributions/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java 26 Jul 2004 20:39:47 -0000 1.2 --- contributions/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java 11 Aug 2004 17:01:37 -0000 *************** *** 118,123 **** --- 118,179 ---- return (String[]) fragTexts.toArray(new String[0]); } + public final ScoredFragment[] getScoredFragments( + TokenStream tokenStream, + String text, + int maxNumFragments) + throws IOException + { + maxNumFragments = Math.max(1, maxNumFragments); //sanity check + StringBuffer newText = new StringBuffer(); + + TextFragment[] frag =getBestDocFragments(tokenStream,text, newText, maxNumFragments); + + mergeContiguousFragments(frag); + + //Get text + ArrayList scoredfrags = new ArrayList(); + int n = 0; + for (int i = 0; i < frag.length; i++) + { + if ((frag[i] != null) && (frag[i].getScore() > 0)) + { + String fragment = newText.substring( + frag[i].textStartPos, + frag[i].textEndPos); + scoredfrags.add(new ScoredFragment(fragment, frag[i].getScore())); + } + } + return (ScoredFragment[]) scoredfrags.toArray(new ScoredFragment[0]); + } + + public final ScoredFragment getScoredFragments( + TokenStream tokenStream, + String text, + int maxNumFragments, + String separator) + throws IOException + + { + ScoredFragment[] frags = getScoredFragments(tokenStream, text, maxNumFragments); + + StringBuffer result = new StringBuffer(); + float totalScore = 0; + for (int i = 0; i < frags.length; i++) + { + if (i > 0) + { + result.append(separator); + } + result.append(frags[i].getFragment()); + totalScore += frags[i].getScore(); + } + + return new ScoredFragment(result.toString(), totalScore); + + + } + /** * Low level api to get the most relevant sections of the document * @param tokenStream Index: contributions/highlighter/src/java/org/apache/lucene/search/highlight/ScoredFragment.java =================================================================== RCS file: contributions/highlighter/src/java/org/apache/lucene/search/highlight/ScoredFragment.java diff -N contributions/highlighter/src/java/org/apache/lucene/search/highlight/ScoredFragment.java *** /dev/null 1 Jan 1970 00:00:00 -0000 --- contributions/highlighter/src/java/org/apache/lucene/search/highlight/ScoredFragment.java 1 Jan 1970 00:00:00 -0000 *************** *** 0 **** --- 1,25 ---- + package org.apache.lucene.search.highlight; + + public final class ScoredFragment + { + + private String fragment; + private float score; + + public ScoredFragment(String fragment, float score) + { + this.fragment = fragment; + this.score = score; + } + + public String getFragment() + { + return fragment; + } + + public float getScore() + { + return score; + } + + } --------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]