Re[6]: Analyzer don't work with wildcard queries, snowball analyzer.

Sven Duzont Sat, 02 Apr 2005 04:01:40 -0800

Hello,

EH> What about handling BooleanQuery's nested within a BooleanQuery?
EH> You'll need some recursion.
thanks for all hints, i've re-coded the method to handle nested
BooleanQueries


EH> Could you share that filter with the community?
Of course, the code is in the attachment

>>     // The first clause is required
>>     if(bClauses[0].prohibited != true)
>>       bClauses[0].required = true;
EH> Why do you flip the required flag like this?
On the search interface, near the keyword field, there is a combo
with 4 values :
- KW_MODE_OR      : "Search for at least one of the terms"
- KW_MODE_AND     : "Search for all the terms"
- KW_MODE_PHRASE  : "Search for exact phrase"
- KW_MODE_BOOLEAN : "Search using boolean query" (for advanced users)
  I flip the request field only when boolean expression is selected
  It force the first term to be required so the user will not
  need to specify the "+" or "AND" operator
  Maybe there is a more elegant way to do this ?
  The code is following

  Thanks
---
 Sven (is not a bersek)

*/-------------------------------- CODE ---------------------------/*
// mots clés contenus dans le cv
if (cvSearchBean.keywords != null &&
    cvSearchBean.keywords.length() != 0) {
  // "Tous les Mot clés" ou "Au moins un des mots clés"
  boolean required = false;
  if ((required = cvSearchBean.keywordModeId == KW_MODE_AND) ||
       cvSearchBean.keywordModeId == KW_MODE_OR) {
    final Query q = CreateCustomQuery(QueryParser.parse(
           cvSearchBean.keywords, FIELD_RESUME_BODY, analyzer));
    if (q instanceof BooleanQuery) {
      final BooleanClause[] terms = ((BooleanQuery) q).getClauses();
      for (int i = 0; i < terms.length; i++) {
        terms[i].prohibited = false;
        terms[i].required = required;
      }
    }
    bQuery.add(q, true, false);
  }
  // Expression exacte
  if (cvSearchBean.keywordModeId == KW_MODE_PHRASE) {
    final PhraseQuery q = new PhraseQuery();
    final TokenStream ts = analyzer.tokenStream(FIELD_RESUME_BODY,
                          new StringReader(cvSearchBean.keywords));
    Token token;
    while ((token = ts.next()) != null)
      q.add(new Term(FIELD_RESUME_BODY, token.termText()));
    bQuery.add(q, true, false);
  }
  // Expression booléenne
  if (cvSearchBean.keywordModeId == KW_MODE_BOOLEAN) {
    final Query query = QueryParser.parse(cvSearchBean.title,
                                        FIELD_RESUME_BODY, analyzer);
    if (query instanceof BooleanQuery) {
      final BooleanClause[] bClauses =
                              ((BooleanQuery) query).getClauses();
      if (bClauses[0].prohibited != true)
        bClauses[0].required = true;
    }
    bQuery.add(CreateCustomQuery(query), true, false);
  }

*/--------------------------END OF CODE --------------------------/*
      
      

EH>     Erik

package org.apache.lucene.analysis;

/**
 * Copyright 2005 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;

/**
 * A filter that replace accented characters in the ISO Latin 1 character set by
 * their unaccented equivalent. The case will not be altered.
 * <p>
 * For instance, 'à' will be replace by 'a'.
 * <p>
 */
public class ISOLatin1AccentFilter extends TokenFilter {
        public ISOLatin1AccentFilter(TokenStream input) {
                super(input);
        }

        /**
         * To replace accented characters by unaccented equivalents.
         */
        public final Token next() throws java.io.IOException {
                final Token t = input.next();
                if (t == null)
                        return null;
                // Return a token with filtered characters.
                return new Token(RemoveAccents(t.termText()), t.startOffset(), 
t.endOffset(), t.type());
        }

        /**
         * To replace accented characters in a String by unaccented equivalents.
         */
        public final static String RemoveAccents(String input) {
                final StringBuffer output = new StringBuffer();
                for (int i = 0; i < input.length(); i++) {
                        switch (input.charAt(i)) {
                                case '\u00C0' : // À
                                case '\u00C1' : // Á
                                case '\u00C2' : // Â
                                case '\u00C3' : // Ã
                                case '\u00C4' : // Ä
                                case '\u00C5' : // Å
                                        output.append("A");
                                        break;
                                case '\u00C6' : // Æ
                                        output.append("AE");
                                        break;
                                case '\u00C7' : // Ç
                                        output.append("C");
                                        break;
                                case '\u00C8' : // È
                                case '\u00C9' : // É
                                case '\u00CA' : // Ê
                                case '\u00CB' : // Ë
                                        output.append("E");
                                        break;
                                case '\u00CC' : // Ì
                                case '\u00CD' : // Í
                                case '\u00CE' : // Î
                                case '\u00CF' : // Ï
                                        output.append("I");
                                        break;
                                case '\u00D0' : // Ð
                                        output.append("D");
                                        break;
                                case '\u00D1' : // Ñ
                                        output.append("N");
                                        break;
                                case '\u00D2' : // Ò
                                case '\u00D3' : // Ó
                                case '\u00D4' : // Ô
                                case '\u00D5' : // Õ
                                case '\u00D6' : // Ö
                                case '\u00D8' : // Ø
                                        output.append("O");
                                        break;
                                case '\u0152' : // Œ
                                        output.append("OE");
                                        break;
                                case '\u00DE' : // Þ
                                        output.append("TH");
                                        break;
                                case '\u00D9' : // Ù
                                case '\u00DA' : // Ú
                                case '\u00DB' : // Û
                                case '\u00DC' : // Ü
                                        output.append("U");
                                        break;
                                case '\u00DD' : // Ý
                                case '\u0178' : // Ÿ
                                        output.append("Y");
                                        break;
                                case '\u00E0' : // à
                                case '\u00E1' : // á
                                case '\u00E2' : // â
                                case '\u00E3' : // ã
                                case '\u00E4' : // ä
                                case '\u00E5' : // å
                                        output.append("a");
                                        break;
                                case '\u00E6' : // æ
                                        output.append("ae");
                                        break;
                                case '\u00E7' : // ç
                                        output.append("c");
                                        break;
                                case '\u00E8' : // è
                                case '\u00E9' : // é
                                case '\u00EA' : // ê
                                case '\u00EB' : // ë
                                        output.append("e");
                                        break;
                                case '\u00EC' : // ì
                                case '\u00ED' : // í
                                case '\u00EE' : // î
                                case '\u00EF' : // ï
                                        output.append("i");
                                        break;
                                case '\u00F0' : // ð
                                        output.append("d");
                                        break;
                                case '\u00F1' : // ñ
                                        output.append("n");
                                        break;
                                case '\u00F2' : // ò
                                case '\u00F3' : // ó
                                case '\u00F4' : // ô
                                case '\u00F5' : // õ
                                case '\u00F6' : // ö
                                case '\u00F8' : // ø
                                        output.append("o");
                                        break;
                                case '\u0153' : // œ
                                        output.append("oe");
                                        break;
                                case '\u00DF' : // ß
                                        output.append("ss");
                                        break;
                                case '\u00FE' : // þ
                                        output.append("th");
                                        break;
                                case '\u00F9' : // ù
                                case '\u00FA' : // ú
                                case '\u00FB' : // û
                                case '\u00FC' : // ü
                                        output.append("u");
                                        break;
                                case '\u00FD' : // ý
                                case '\u00FF' : // ÿ
                                        output.append("y");
                                        break;
                                default :
                                        output.append(input.charAt(i));
                                        break;
                        }
                }
                return output.toString();
        }
}

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Re[6]: Analyzer don't work with wildcard queries, snowball analyzer.

Reply via email to