Ralf: Here's an end-to-end Payloads example you can use to compare, although it sounds like you've already figured out your immediate problem..
https://lucidworks.com/blog/end-to-end-payload-example-in-solr/ Best, Erick On Thu, Oct 30, 2014 at 1:24 PM, Ralf Bierig <ralf.bie...@gmail.com> wrote: > Found it! I used another class that was was still using old code. My bad! > > Thanks anyway, > Ralf > > On 30.10.2014 20:53, Michael Sokolov wrote: >> >> That's a lot of code to eyeball. Have you tried printing out the input >> data as you are indexing it (just at doc.add)? I am guessing there is >> some simple variable aliasing issue that I don't see at a glance ... >> >> -Mike >> >> On 10/30/14 2:03 PM, Ralf Bierig wrote: >>> >>> I want to implement a Lucene Indexer/Searcher that uses the new >>> Payload feature to add meta information to tokens. I specifically add >>> weights (i.e. 0-100) to conceptual tags in order to use them to >>> overwrite the standard Lucene TF-IDF weighting. I am puzzled by the >>> behaviour of this and I believe there is something wrong with the >>> Similarity class, that I overwrote, but I cannot figure it out. >>> >>> I attach the complete code below for this exampe. When I run a query >>> with it (e.g. "concept:red") I discover that each payload is always >>> the first number that was passed through MyPayloadSimilarity (in the >>> code example, this is 1.0) and not 1.0, 50.0 and 100.0. As a result, >>> all documents get the same payload and the same score. However, the >>> data should feature picture #1, with a payload of 100.0, followed by >>> picture #2, followed by picture #3 and very diverse scores. I can't >>> get my heard around this... >>> >>> Here are the results of the run: >>> >>> Query: concept:red >>> ===> docid: 0 payload: 1.0 >>> ===> docid: 1 payload: 1.0 >>> ===> docid: 2 payload: 1.0 >>> Number of results:3 >>> -> docid: 3.jpg score: 0.2518424 >>> -> docid: 2.jpg score: 0.2518424 >>> -> docid: 1.jpg score: 0.2518424 >>> >>> What is wrong? Did i misunderstand something about Payloads? >>> >>> ---Start Code--- >>> >>> public class PayloadShowcase { >>> >>> public static void main(String s[]) { >>> PayloadShowcase p = new PayloadShowcase(); >>> p.run(); >>> } >>> >>> public void run () { >>> // Step 1: indexing >>> MyPayloadIndexer indexer = new MyPayloadIndexer(); >>> indexer.index(); >>> // Step 2: searching >>> MyPayloadSearcher searcher = new MyPayloadSearcher(); >>> searcher.search("red"); >>> } >>> >>> public class MyPayloadAnalyzer extends Analyzer { >>> >>> private PayloadEncoder encoder; >>> MyPayloadAnalyzer(PayloadEncoder encoder) { >>> this.encoder = encoder; >>> } >>> >>> @Override >>> protected TokenStreamComponents createComponents(String fieldName, >>> Reader reader) { >>> Tokenizer source = new WhitespaceTokenizer(reader); >>> TokenStream filter = new LowerCaseFilter(source); >>> filter = new DelimitedPayloadTokenFilter(filter, '|', encoder); >>> return new TokenStreamComponents(source, filter); >>> } >>> } >>> >>> public class MyPayloadIndexer { >>> >>> public MyPayloadIndexer() {} >>> >>> public void index() { >>> try { >>> Directory dir = FSDirectory.open(new >>> File("D:/data/indices/sandbox")); >>> Analyzer analyzer = new MyPayloadAnalyzer(new >>> FloatEncoder()); >>> IndexWriterConfig iwconfig = new >>> IndexWriterConfig(Version.LUCENE_4_10_1, analyzer); >>> iwconfig.setSimilarity(new MyPayloadSimilarity()); >>> iwconfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE); >>> >>> // load mappings and classifiers >>> HashMap<String, String> mappings = this.loadDataMappings(); >>> HashMap<String, HashMap> cMaps = this.loadData(); >>> >>> IndexWriter writer = new IndexWriter(dir, iwconfig); >>> indexDocuments(writer, mappings, cMaps); >>> writer.close(); >>> >>> } catch (IOException e) { >>> System.out.println("Exception while indexing: " + >>> e.getMessage()); >>> } >>> } >>> >>> private void indexDocuments(IndexWriter writer, HashMap<String, >>> String> fileMappings, HashMap<String, HashMap> concepts) throws >>> IOException { >>> >>> Set fileSet = fileMappings.keySet(); >>> Iterator<String> iterator = fileSet.iterator(); >>> while (iterator.hasNext()){ >>> // unique file information >>> String fileID = iterator.next(); >>> String filePath = fileMappings.get(fileID); >>> // create a new, empty document >>> Document doc = new Document(); >>> // path of the indexed file >>> Field pathField = new StringField("path", filePath, >>> Field.Store.YES); >>> doc.add(pathField); >>> // lookup all concept probabilities for this fileID >>> Iterator<String> conceptIterator = >>> concepts.keySet().iterator(); >>> while (conceptIterator.hasNext()){ >>> String conceptName = conceptIterator.next(); >>> HashMap conceptMap = concepts.get(conceptName); >>> doc.add(new TextField("concept", ("" + conceptName + >>> "|").trim() + (conceptMap.get(fileID) + "").trim(), Field.Store.YES)); >>> } >>> writer.addDocument(doc); >>> } >>> } >>> >>> public HashMap<String, String> loadDataMappings(){ >>> HashMap<String, String> h = new HashMap<>(); >>> h.put("1", "1.jpg"); >>> h.put("2", "2.jpg"); >>> h.put("3", "3.jpg"); >>> return h; >>> } >>> >>> public HashMap<String, HashMap> loadData(){ >>> HashMap<String, HashMap> h = new HashMap<>(); >>> HashMap<String, String> green = new HashMap<>(); >>> green.put("1", "50.0"); >>> green.put("2", "1.0"); >>> green.put("3", "100.0"); >>> HashMap<String, String> red = new HashMap<>(); >>> red.put("1", "100.0"); >>> red.put("2", "50.0"); >>> red.put("3", "1.0"); >>> HashMap<String, String> blue = new HashMap<>(); >>> blue.put("1", "1.0"); >>> blue.put("2", "50.0"); >>> blue.put("3", "100.0"); >>> h.put("green", green); >>> h.put("red", red); >>> h.put("blue", blue); >>> return h; >>> } >>> } >>> >>> class MyPayloadSimilarity extends DefaultSimilarity { >>> >>> @Override >>> public float scorePayload(int docID, int start, int end, BytesRef >>> payload) { >>> float pload = 1.0f; >>> if (payload != null) { >>> pload = PayloadHelper.decodeFloat(payload.bytes, >>> payload.offset); >>> } >>> System.out.println("===> docid: " + docID + " payload: " + >>> pload); >>> return pload; >>> } >>> } >>> >>> public class MyPayloadSearcher { >>> >>> public MyPayloadSearcher() {} >>> >>> public void search(String queryString) { >>> try { >>> IndexReader reader = >>> DirectoryReader.open(FSDirectory.open(new >>> File("D:/data/indices/sandbox"))); >>> IndexSearcher searcher = new IndexSearcher(reader); >>> searcher.setSimilarity(new PayloadSimilarity()); >>> PayloadTermQuery query = new PayloadTermQuery(new >>> Term("concept", queryString), >>> new AveragePayloadFunction()); >>> System.out.println("Query: " + query.toString()); >>> TopDocs topDocs = searcher.search(query, 999); >>> ScoreDoc[] hits = topDocs.scoreDocs; >>> System.out.println("Number of results:" + hits.length); >>> >>> // output >>> for (int i = 0; i < hits.length; i++) { >>> Document doc = searcher.doc(hits[i].doc); >>> System.out.println("-> docid: " + doc.get("path") + " >>> score: " + hits[i].score); >>> } >>> reader.close(); >>> >>> } catch (Exception e) { >>> System.out.println("Exception while searching: " + >>> e.getMessage()); >>> } >>> } >>> } >>> >>> ---End Code--- >>> >>> Any ideas? I am very grateful for any help... >>> >>> --------------------------------------------------------------------- >>> To unsubscribe, e-mail: java-user-unsubscr...@lucene.apache.org >>> For additional commands, e-mail: java-user-h...@lucene.apache.org >>> >> >> >> --------------------------------------------------------------------- >> To unsubscribe, e-mail: java-user-unsubscr...@lucene.apache.org >> For additional commands, e-mail: java-user-h...@lucene.apache.org >> > > > --------------------------------------------------------------------- > To unsubscribe, e-mail: java-user-unsubscr...@lucene.apache.org > For additional commands, e-mail: java-user-h...@lucene.apache.org > --------------------------------------------------------------------- To unsubscribe, e-mail: java-user-unsubscr...@lucene.apache.org For additional commands, e-mail: java-user-h...@lucene.apache.org