Always gets first row in document instead of search hit

stoffeboff Fri, 16 Feb 2007 04:02:48 -0800

Always gets first row in document instead of search hit ... 
Also the search does not work as expected.. for example nodeId should reqire
100% match but it still isn't checked.
All help is much appreciated!!


The code:

import static org.apache.commons.lang.StringUtils.defaultString;
import static org.apache.commons.lang.StringUtils.isBlank;
import static org.apache.commons.lang.StringUtils.isNotBlank;
import static org.apache.commons.lang.StringUtils.remove;
import static org.apache.commons.lang.StringUtils.splitPreserveAllTokens;
import static org.apache.commons.lang.StringUtils.substringBetween;
import static se.dreampark.util.StringTools.*;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.util.Iterator;

import org.apache.log4j.Logger;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexModifier;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hit;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.FSDirectory;

public class LuceneReader {
    private final static int STREET_NAME = 5;

    private final static int STREET_NUMBER = 6;

    private final static int STREET_LETTER = 7;

    private final static int FLOOR = 8;

    private final static int ZIP = 9;

    private final static int CITY = 10;

    private final static int ADDRESS_ID = 1;

    private final static int APARTMENT_ID = 11;

    private FSDirectory directory;

    private IndexSearcher indexSearcher;

    private Document doc = null;

    protected static Logger logger = Logger.getLogger(LuceneReader.class);

    private void indexContent(String dbPath) throws Exception {
        IndexModifier modifier = new IndexModifier(directory, new
StandardAnalyzer(), true);

        doc = new Document();

        for (String filename : (new File(dbPath)).list()) {
            // logger.debug(filename);
            String nodeId = substringBetween(filename, "coverage", ".csv");
            if (isNotBlank(nodeId)) {
                // logger.debug("hit! nodeid=" + nodeId);
                BufferedReader reader = new BufferedReader(new
FileReader(dbPath + "/" + filename));
                String line;
                while ((line = reader.readLine()) != null) {
                    if (line.startsWith(",,,,,")) {
                        break;
                    }
                    String[] fields = splitPreserveAllTokens(line, ",");

                    String address = fields[STREET_NAME].trim() + " "
                            + fields[STREET_NUMBER].trim() +
fields[STREET_LETTER].trim();
                    address = process(address);
                    // System.out.println(address);
                    doc.add(new Field("address", address, Field.Store.YES,
Field.Index.TOKENIZED));

                    String zip = process(remove(fields[ZIP].trim(), " "));

                    doc.add(new Field("zip", zip, Field.Store.YES,
Field.Index.TOKENIZED));

                    doc.add(new Field("city", process(fields[CITY]),
Field.Store.YES,
                            Field.Index.TOKENIZED));
                    doc.add(new Field("addressId",
process(fields[ADDRESS_ID]), Field.Store.YES,
                            Field.Index.UN_TOKENIZED));
                    doc.add(new Field("apartmentId",
                            process(fields[APARTMENT_ID]).replace("-", ""),
Field.Store.YES,
                            Field.Index.TOKENIZED));

                    doc.add(new Field("nodeId", nodeId, Field.Store.YES,
Field.Index.UN_TOKENIZED));

                }
                reader.close();
            }
        }
        modifier.addDocument(doc);
        modifier.optimize();
        modifier.close();
    }

    private static String process(String str) {
        return defaultString(str).trim().replace(" ", "_");
    }

    public Hit search(File indexDir, String dbPath, String nodeId, String
streetAddress,
            String zip, String city, String apartmentId) throws Exception {

        if (isBlank(streetAddress) || isBlank(zip) || isBlank(city) ||
isBlank(apartmentId)) {
            return null;
        }

        apartmentId = apartmentId.replace("-", "");
        if (doc == null) {
            directory = FSDirectory.getDirectory(indexDir, true);
            indexContent(dbPath);
            indexSearcher = new IndexSearcher(directory);
        }

        String streetAddress2 = streetAddress.replace("g.",
"gatan").replace("v.", "vägen");

        QueryParser parser = new QueryParser("address", new
StandardAnalyzer());
     
        String query = "address:" + process(streetAddress) + "~0.8 AND zip:"
+ Q + process(zip) + Q
                + " AND city:" + process(city) + "~0.8 AND nodeId:" + nodeId 
                + " AND apartmentId:" + Q + apartmentId + Q;
                
      
        
        //String query = "address:" + process(streetAddress);
        logger.debug("query=" + query);

        Hits hits = indexSearcher.search(parser.parse(query));
        Iterator it = hits.iterator();

           
        it = hits.iterator();

        return (it.hasNext()) ? (Hit) it.next() : null;
    }
}

-- 
View this message in context: 
http://www.nabble.com/Always-gets-first-row-in-document-instead-of-search-hit-tf3239099.html#a9003111
Sent from the Lucene - Java Developer mailing list archive at Nabble.com.


---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Always gets first row in document instead of search hit

Reply via email to