Always gets first row in document instead of search hit ...
Also the search does not work as expected.. for example nodeId should reqire
100% match but it still isn't checked.
All help is much appreciated!!
The code:
import static org.apache.commons.lang.StringUtils.defaultString;
import static org.apache.commons.lang.StringUtils.isBlank;
import static org.apache.commons.lang.StringUtils.isNotBlank;
import static org.apache.commons.lang.StringUtils.remove;
import static org.apache.commons.lang.StringUtils.splitPreserveAllTokens;
import static org.apache.commons.lang.StringUtils.substringBetween;
import static se.dreampark.util.StringTools.*;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.util.Iterator;
import org.apache.log4j.Logger;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexModifier;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hit;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.FSDirectory;
public class LuceneReader {
private final static int STREET_NAME = 5;
private final static int STREET_NUMBER = 6;
private final static int STREET_LETTER = 7;
private final static int FLOOR = 8;
private final static int ZIP = 9;
private final static int CITY = 10;
private final static int ADDRESS_ID = 1;
private final static int APARTMENT_ID = 11;
private FSDirectory directory;
private IndexSearcher indexSearcher;
private Document doc = null;
protected static Logger logger = Logger.getLogger(LuceneReader.class);
private void indexContent(String dbPath) throws Exception {
IndexModifier modifier = new IndexModifier(directory, new
StandardAnalyzer(), true);
doc = new Document();
for (String filename : (new File(dbPath)).list()) {
// logger.debug(filename);
String nodeId = substringBetween(filename, "coverage", ".csv");
if (isNotBlank(nodeId)) {
// logger.debug("hit! nodeid=" + nodeId);
BufferedReader reader = new BufferedReader(new
FileReader(dbPath + "/" + filename));
String line;
while ((line = reader.readLine()) != null) {
if (line.startsWith(",,,,,")) {
break;
}
String[] fields = splitPreserveAllTokens(line, ",");
String address = fields[STREET_NAME].trim() + " "
+ fields[STREET_NUMBER].trim() +
fields[STREET_LETTER].trim();
address = process(address);
// System.out.println(address);
doc.add(new Field("address", address, Field.Store.YES,
Field.Index.TOKENIZED));
String zip = process(remove(fields[ZIP].trim(), " "));
doc.add(new Field("zip", zip, Field.Store.YES,
Field.Index.TOKENIZED));
doc.add(new Field("city", process(fields[CITY]),
Field.Store.YES,
Field.Index.TOKENIZED));
doc.add(new Field("addressId",
process(fields[ADDRESS_ID]), Field.Store.YES,
Field.Index.UN_TOKENIZED));
doc.add(new Field("apartmentId",
process(fields[APARTMENT_ID]).replace("-", ""),
Field.Store.YES,
Field.Index.TOKENIZED));
doc.add(new Field("nodeId", nodeId, Field.Store.YES,
Field.Index.UN_TOKENIZED));
}
reader.close();
}
}
modifier.addDocument(doc);
modifier.optimize();
modifier.close();
}
private static String process(String str) {
return defaultString(str).trim().replace(" ", "_");
}
public Hit search(File indexDir, String dbPath, String nodeId, String
streetAddress,
String zip, String city, String apartmentId) throws Exception {
if (isBlank(streetAddress) || isBlank(zip) || isBlank(city) ||
isBlank(apartmentId)) {
return null;
}
apartmentId = apartmentId.replace("-", "");
if (doc == null) {
directory = FSDirectory.getDirectory(indexDir, true);
indexContent(dbPath);
indexSearcher = new IndexSearcher(directory);
}
String streetAddress2 = streetAddress.replace("g.",
"gatan").replace("v.", "vägen");
QueryParser parser = new QueryParser("address", new
StandardAnalyzer());
String query = "address:" + process(streetAddress) + "~0.8 AND zip:"
+ Q + process(zip) + Q
+ " AND city:" + process(city) + "~0.8 AND nodeId:" + nodeId
+ " AND apartmentId:" + Q + apartmentId + Q;
//String query = "address:" + process(streetAddress);
logger.debug("query=" + query);
Hits hits = indexSearcher.search(parser.parse(query));
Iterator it = hits.iterator();
it = hits.iterator();
return (it.hasNext()) ? (Hit) it.next() : null;
}
}
--
View this message in context:
http://www.nabble.com/Always-gets-first-row-in-document-instead-of-search-hit-tf3239099.html#a9003111
Sent from the Lucene - Java Developer mailing list archive at Nabble.com.
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]