Ok. I'm using positions at ANALYZED fields where search is by terms. The others
fields, "NOT_ANALYZED", the search is by complete term, as culture code, url,
document code.
The index has documents in three languages (Spanish, English and Portuguese
(BR)). When perform a search, I realize filters using TermoQuery with fields
"NOT_ANALYZED" and use PhraseQuery or PrefixQuery TermoQuery and added in a
BoolenQuery with fields analyzed. The same code works with StandardAnalyzer. By
using BrazilianAnalyzer most terms do not return a result of the stored
documents with this analyzer.
In the code attached to "searchExpresion" is the phrase containing the words
entered to search by the end user. I do search containing all the words, or any
of the words or the whole phrase. This method makes my object "BooleanQuery"
only for the content (full text).
Below is a sample of the settings of my columns to index. Some values ??are
listed here with the names of the columns.
Below this code snippet, a snippet of the implementation of the search.
[code]
// Here is the configuration of the fields for each document
document = new DocumentIndex();
document.AddField(ColumnsIndexedDocuments.COLUMN_DOCUMENTCONTENT,
HtmlConvert.Convert(content), IndexField.ANALYZED, StoreField.YES,
StoreTermOccurrencesField.WITHPOSITIONSOFFSETS)
.AddField(ColumnsIndexedDocuments.COLUMN_DOCUMENTCONTENT_NODEXML,
HtmlConvert.Convert(nodeXml), IndexField.ANALYZED, StoreField.YES,
StoreTermOccurrencesField.WITHPOSITIONSOFFSETS)
.AddField(ColumnsIndexedDocuments.COLUMN_SITENAME,
ValidationHelper.GetString(node[ColumnsIndexedDocuments.COLUMN_SITENAME],
"").ToLower(), IndexField.NOTANALYZED, StoreField.YES,
StoreTermOccurrencesField.NO)
.AddField(ColumnsIndexedDocuments.COLUMN_NODESITEID,
ValidationHelper.GetString(node[ColumnsIndexedDocuments.COLUMN_NODESITEID],
"").ToLower(), IndexField.NOTANALYZED, StoreField.YES,
StoreTermOccurrencesField.NO)
.AddField(ColumnsIndexedDocuments.COLUMN_CLASSNAME,
ValidationHelper.GetString(node[ColumnsIndexedDocuments.COLUMN_CLASSNAME],
"").ToLower(), IndexField.NOTANALYZED, StoreField.YES,
StoreTermOccurrencesField.NO)
.AddField(ColumnsIndexedDocuments.COLUMN_NODEID,
ValidationHelper.GetString(node[ColumnsIndexedDocuments.COLUMN_NODEID], ""),
IndexField.NOTANALYZED, StoreField.YES, StoreTermOccurrencesField.NO)
.AddField(ColumnsIndexedDocuments.COLUMN_NODEACLID,
ValidationHelper.GetString(node[ColumnsIndexedDocuments.COLUMN_NODEACLID], ""),
IndexField.NOTANALYZED, StoreField.YES, StoreTermOccurrencesField.NO)
.AddField(ColumnsIndexedDocuments.COLUMN_NODEALIASPATH,
ValidationHelper.GetString(node[ColumnsIndexedDocuments.COLUMN_NODEALIASPATH],
"").ToLower(), IndexField.NOTANALYZED, StoreField.YES,
StoreTermOccurrencesField.NO)
.AddField(ColumnsIndexedDocuments.COLUMN_DOCUMENTNAMEPATH,
ValidationHelper.GetString(node[ColumnsIndexedDocuments.COLUMN_DOCUMENTNAMEPATH],
"").ToLower(), IndexField.NOTANALYZED, StoreField.YES,
StoreTermOccurrencesField.NO)
.AddField(ColumnsIndexedDocuments.COLUMN_DOCUMENTURLPATH,
ValidationHelper.GetString(node[ColumnsIndexedDocuments.COLUMN_DOCUMENTURLPATH],
"").ToLower(), IndexField.NOTANALYZED, StoreField.YES,
StoreTermOccurrencesField.NO)
.AddField(ColumnsIndexedDocuments.COLUMN_DOCUMENTMENUITEMHIDEINNAVIGATION,
ValidationHelper.GetString(node[ColumnsIndexedDocuments.COLUMN_DOCUMENTMENUITEMHIDEINNAVIGATION],
"").ToLower(), IndexField.NOTANALYZED, StoreField.YES,
StoreTermOccurrencesField.NO)
.AddField(ColumnsIndexedDocuments.COLUMN_DOCUMENTCULTURE,
ValidationHelper.GetString(node[ColumnsIndexedDocuments.COLUMN_DOCUMENTCULTURE],
"").ToLower(), IndexField.NOTANALYZED, StoreField.YES,
StoreTermOccurrencesField.NO)
.AddField(ColumnsIndexedDocuments.COLUMN_DOCUMENTNAME,
ValidationHelper.GetString(node[ColumnsIndexedDocuments.COLUMN_DOCUMENTNAME],
""), IndexField.ANALYZED, StoreField.YES, StoreTermOccurrencesField.NO)
.AddField(ColumnsIndexedDocuments.COLUMN_NODENAME,
ValidationHelper.GetString(node[ColumnsIndexedDocuments.COLUMN_NODENAME], ""),
IndexField.ANALYZED, StoreField.YES, StoreTermOccurrencesField.NO)
.AddField(ColumnsIndexedDocuments.COLUMN_DOCUMENTTAGS,
ValidationHelper.GetString(node[ColumnsIndexedDocuments.COLUMN_DOCUMENTTAGS],
""), IndexField.ANALYZED, StoreField.YES, StoreTermOccurrencesField.NO)
.AddField(ColumnsIndexedDocuments.COLUMN_DOCUMENTWASPUBLISHFROM,
DateTools.DateToString(ValidationHelper.GetDateTime(node[ColumnsIndexedDocuments.COLUMN_DOCUMENTWASPUBLISHFROM],
DataHelper.DATETIME_MIN_BD,
ValidationHelper.GetString(node[ColumnsIndexedDocuments.COLUMN_DOCUMENTCULTURE],
"")), DateTools.Resolution.MILLISECOND), IndexField.NOTANALYZED,
StoreField.YES, StoreTermOccurrencesField.NO)
.AddField(ColumnsIndexedDocuments.COLUMN_DOCUMENTWASPUBLISHTO,
DateTools.DateToString(ValidationHelper.GetDateTime(node[ColumnsIndexedDocuments.COLUMN_DOCUMENTWASPUBLISHTO],
DataHelper.DATETIME_MIN_BD,
ValidationHelper.GetString(node[ColumnsIndexedDocuments.COLUMN_DOCUMENTCULTURE],
"")), DateTools.Resolution.MILLISECOND), IndexField.NOTANALYZED,
StoreField.YES, StoreTermOccurrencesField.NO);
IndexManagerWriter.AddDocument(document, Analyzer);
[/code]
[code]
// Here is the conjunction of queries
public override DataSet Search(string siteName, string
searchNodePath, string cultureCode, string searchExpression, SearchModeEnum
searchMode, bool searchChildNodes, string classNames, bool
filterResultsByReadPermission, bool searchOnlyPublished, string whereCondition,
string orderBy, bool combineWithDefaultCulture, string filterCondition, bool
searchDocumentHistory, bool onlyDocumentHistory)
{
//Consulta booleana principal
BooleanQuery queryMain = new BooleanQuery();
//Filtra a expressão de busca
if (!string.IsNullOrEmpty(searchExpression))
{
BooleanQuery queryExpression =
GetExpressionBooleanQuery(searchExpression, searchMode, searchDocumentHistory,
onlyDocumentHistory);
queryMain.Add(queryExpression, BooleanClause.Occur.MUST);
}
//Filtra os parâmetros
BooleanQuery queryParameters = GetParametersBooleanQuery(siteName,
searchNodePath, cultureCode, combineWithDefaultCulture, searchOnlyPublished,
classNames);
//queryMain.Add(queryParameters, BooleanClause.Occur.MUST);
Filter filterParameters = new QueryFilter(queryParameters);
//Define os analizadores
Analyzer = GetAnalyzer(cultureCode);
//Cria o objeto de parse
QueryParser queryParser = new
QueryParser(Lucene.Net.Util.Version.LUCENE_29,
ColumnsIndexedDocuments.COLUMN_DOCUMENTCONTENT, Analyzer);
//Abre o index para leitura
IndexManagerReader.OpenReader(SearchProvider.DirectoryPathReaderIndex);
string[] columns = new string[] {
ColumnsIndexedDocuments.COLUMN_DOCUMENTCONTENT,
ColumnsIndexedDocuments.COLUMN_DOCUMENTCONTENT_NODEXML,
ColumnsIndexedDocuments.COLUMN_DOCUMENTNAME,
ColumnsIndexedDocuments.COLUMN_DOCUMENTNAMEPATH,
ColumnsIndexedDocuments.COLUMN_DOCUMENTURLPATH,
ColumnsIndexedDocuments.COLUMN_NODEALIASPATH,
ColumnsIndexedDocuments.COLUMN_NODEID,
ColumnsIndexedDocuments.COLUMN_NODESITEID,
ColumnsIndexedDocuments.COLUMN_NODEACLID,
ColumnsIndexedDocuments.COLUMN_SITENAME,
ColumnsIndexedDocuments.COLUMN_CLASSNAME };
string fieldHighlight = CMSContext.ViewMode ==
ViewModeEnum.LiveSite ? ColumnsIndexedDocuments.COLUMN_DOCUMENTCONTENT :
ColumnsIndexedDocuments.COLUMN_DOCUMENTCONTENT_NODEXML;
return
IndexManagerReader.Search(queryParser.Parse(queryMain.ToString()),
filterParameters, ColumnsIndexedDocuments.COLUMN_DOCUMENTCONTENT, Analyzer,
NumberMaxSearchResults, columns, fieldHighlight, searchExpression);
}
[/code]
[code]
// Here is the method that receives and filters and query q performs abusca and
treats the result
public DataSet Search(Lucene.Net.Search.Query query, Filter
filter, string mainColumnContentName, Analyzer analyzer, int top, string[]
fieldsReturn, string fieldHighlightName, string searchExpression)
{
Lucene.Net.Store.Directory directory = FSDirectory.Open(new
DirectoryInfo(directoryPath));
IndexSearcher searcher = new IndexSearcher(directory, true);
TopDocs topDocs = searcher.Search(query, filter, top);
DataSet result = null;
if (topDocs.scoreDocs != null && topDocs.scoreDocs.Length > 0)
{
result = new DataSet();
DataTable docs = new DataTable("SearchResult");
docs.Columns.Add("Index");
for (int i = 0; i < fieldsReturn.Length; i++)
{
if (fieldsReturn[i] == fieldHighlightName)
{
docs.Columns.Add(mainColumnContentName, typeof(string));
}
else
{
if (fieldsReturn[i] != fieldHighlightName &&
fieldsReturn[i] != mainColumnContentName)
{
docs.Columns.Add(fieldsReturn[i], typeof(string));
}
}
}
for (int j = 0; j < topDocs.scoreDocs.Length; j++)
{
Document document = searcher.Doc(topDocs.scoreDocs[j].doc);
List<string> values = new List<string>();
values.Add(topDocs.scoreDocs[j].doc.ToString());
for (int k = 0; k < fieldsReturn.Length; k++)
{
if (fieldsReturn[k] == fieldHighlightName)
{
FastVectorHighlighter highlighter = new
FastVectorHighlighter();
string fragment =
highlighter.GetBestFragment(highlighter.GetFieldQuery(query),
searcher.GetIndexReader(), topDocs.scoreDocs[j].doc, fieldHighlightName,
NumberMaxCaractersHighlight);
values.Add(fragment);
}
else
{
if (fieldsReturn[k] == mainColumnContentName)
continue;
values.Add(document.Get(fieldsReturn[k]));
}
}
docs.Rows.Add(values.ToArray());
}
result.Tables.Add(docs);
}
return result;
}
[/code]
It's working perfectly just to other analyzers. With BrazilianAnalyzer I search
the term "ferramenta" and returns nothing. There are documents in "es-ES" and
"en" in the index with this term. If I search by filtering the field
"DocumentCulture" with value "es-es" (field of culture not_analyzed and text
field [SnowBallAnalyzer ("Spanish")]) has documents. If I filter by "pt-BR"
(field of culture not_analyzed and text field [BrazilianAnalyzer]) comes
nothing.
Is it a bug in [BrazilianAnalyzer]?
-----Mensagem original-----
De: Simon Willnauer [mailto:[email protected]]
Enviada em: quinta-feira, 12 de julho de 2012 04:48
Para: [email protected]
Assunto: Re: BrazilianAnalyzer don't woks with any BooleanQuery
can you tell us more about your index side of things? Are you using positions
in the index since I see PhraseQuery in your code?
Where are you passing the text you are searching for to the BrasilianAnalyzer,
I don't see it in your code. You need to process you text at search time too to
get results.
simon
On Wed, Jul 11, 2012 at 5:32 PM, Marcelo Neves <[email protected]>wrote:
> Hi all,****
>
> ** **
>
> I create a method above que generate my boolean query based in many
> parameters. The query's on not analyzed fields works perfect in
> debug.****
>
> When start a search using any analyzed field with BrazilianAnalyzer,
> always a return empty result (zero docs). I do test in separeted
> solution with a unique field with Brazilian Analyzer in indexing and
> searching. If use a BooleanQuery and not queryparse, don't works as
> expected. Return empty result.****
>
> When change for StandardAnalyzer on Indexing and Serching, without
> alter other parts of code, works. ****
>
> ** **
>
> Could anyone help me?****
>
> [Code]****
>
> ** **
>
> protected virtual BooleanQuery GetExpressionBooleanQuery(string
> searchExpression, SearchModeEnum searchMode, bool
> searchDocumentHistory, bool onlyDocumentHistory)****
>
> {****
>
> if (string.IsNullOrEmpty(searchExpression))****
>
> {****
>
> return null;****
>
> }****
>
> ** **
>
> BooleanQuery queryWords = new BooleanQuery();****
>
> PhraseQuery phrase = new PhraseQuery();****
>
> string[] expressions = null;****
>
> ** **
>
> expressions = searchExpression.ToLower().Replace("\"",
> "").Split(' ');****
>
> for (int i = 0; i < expressions.Length; i++)****
>
> {****
>
> if
> (!string.IsNullOrEmpty(expressions[i].Trim()))****
>
> {****
>
> if (expressions.Length
> > 1)
> ****
>
> {****
>
> switch
> (searchMode)****
>
> {****
>
>
> case SearchModeEnum.ExactPhrase:****
>
>
> if (!onlyDocumentHistory)****
>
>
> {****
>
>
> phrase.Add(new Term(ColumnsIndexedDocuments.COLUMN_DOCUMENTCONTENT,
> expressions[i].Trim()), i);****
>
>
> }****
>
>
> if (searchDocumentHistory)****
>
>
> {****
>
>
> phrase.Add(new
> Term(ColumnsIndexedDocuments.COLUMN_DOCUMENTCONTENT_NODEXML,
> expressions[i].Trim()), i);****
>
>
> }****
>
>
> break;****
>
>
> case SearchModeEnum.AnyWord:****
>
>
> if (!onlyDocumentHistory)****
>
>
> {****
>
>
> queryWords.Add(new BooleanClause(new TermQuery(new
> Term(ColumnsIndexedDocuments.COLUMN_DOCUMENTCONTENT,
> expressions[i].Trim())), BooleanClause.Occur.SHOULD));****
>
>
> }****
>
>
> if (searchDocumentHistory)****
>
>
> {****
>
>
>
> queryWords.Add(new BooleanClause(new TermQuery(new
> Term(ColumnsIndexedDocuments.COLUMN_DOCUMENTCONTENT_NODEXML,
> expressions[i].Trim())), BooleanClause.Occur.SHOULD));****
>
>
> }****
>
>
> break;****
>
>
> case SearchModeEnum.AllWords:****
>
>
> if (!onlyDocumentHistory)****
>
>
> {****
>
>
> queryWords.Add(new BooleanClause(new TermQuery(new
> Term(ColumnsIndexedDocuments.COLUMN_DOCUMENTCONTENT,
> expressions[i].Trim())), BooleanClause.Occur.MUST));****
>
>
> }****
>
>
> if (searchDocumentHistory)****
>
>
> {****
>
>
> queryWords.Add(new BooleanClause(new TermQuery(new
> Term(ColumnsIndexedDocuments.COLUMN_DOCUMENTCONTENT_NODEXML,
> expressions[i].Trim())), BooleanClause.Occur.MUST));****
>
>
> }****
>
>
> break;****
>
>
> default:****
>
>
> break;****
>
> }****
>
> }****
>
> else****
>
> {****
>
> if
> (!onlyDocumentHistory)****
>
> {****
>
>
> queryWords.Add(new BooleanClause(new TermQuery(new
> Term(ColumnsIndexedDocuments.COLUMN_DOCUMENTCONTENT,
> expressions[i].Trim())), BooleanClause.Occur.MUST));****
>
> }****
>
> if
> (searchDocumentHistory)****
>
> {****
>
>
> queryWords.Add(new BooleanClause(new TermQuery(new
> Term(ColumnsIndexedDocuments.COLUMN_DOCUMENTCONTENT_NODEXML,
> expressions[i].Trim())), BooleanClause.Occur.MUST));****
>
> }****
>
> }****
>
> }****
>
> }****
>
> if (phrase.GetTerms().Length > 0)****
>
> {****
>
> queryWords.Add(new
> BooleanClause(phrase,
> BooleanClause.Occur.MUST));****
>
> }****
>
> return queryWords;****
>
> }****
>
> ** **
>
> [/Code]****
>
> ** **
>
> I wait for help. Please!****
>
> ** **
>
> Thanks!****
>
> ** **
>
> [image: Descrição: marcelo-neves]****
>
> ** **
>
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]