Here you are. This is written in kotlin but it is similar enough to Java
to be usable
private fun buildTermsFromIndex(indexReader:IndexReader, field: String,
file: File, bits:Bits): WFSTCompletionLookup {
val lookup = WFSTCompletionLookup(true)
lookup.build(WeightedLuceneDictionary(indexReader, field,
bits:Bits))
val output = BufferedOutputStream(FileOutputStream(file))
lookup.store(output)
output.close()
return lookup
}
package org.lakedaemon.queries
import org.apache.lucene.index.IndexReader
import org.apache.lucene.util.BytesRefIterator
import org.apache.lucene.index.MultiFields
import org.apache.lucene.search.spell.Dictionary
import org.apache.lucene.search.suggest.InputIterator
import org.lakedaemon.constants.Lucene
import org.apache.lucene.util.BytesRef
import org.apache.lucene.util.NumericUtils
import org.lakedaemon.L
import org.apache.lucene.search.Filter
import org.apache.lucene.util.OpenBitSet
import org.apache.lucene.util.Bits
/**
* Lucene Dictionary: terms taken from the given field
* of a Lucene index, weight computed out of docFreqs.
*/
public final class WeightedLuceneDictionary(private val reader:
IndexReader, private val field: String, private val bits:Bits?) :
Dictionary {
override fun getEntryIterator(): InputIterator {
val terms = MultiFields.getTerms(reader, field)
if (terms == null) return InputIterator.EMPTY
val termsEnum = terms.iterator(null)
return if (termsEnum == null) InputIterator.EMPTY else
WeightedLuceneInputIterator(termsEnum,
Lucene.numericFields.contains(field), bits)
}
}
package org.lakedaemon.queries
import org.apache.lucene.search.suggest.InputIterator
import org.apache.lucene.index.TermsEnum
import java.util.Comparator
import org.apache.lucene.util.BytesRef
import org.apache.lucene.util.NumericUtils
import org.lakedaemon.L
import org.lakedaemon.kotlin.safeGet
import org.apache.lucene.util.Bits
import org.apache.lucene.index.DocsEnum
import org.apache.lucene.search.DocIdSetIterator
final class WeightedLuceneInputIterator(private val termsEnum:
TermsEnum, hasNumericTerms:Boolean = false, val bits :Bits?) :
InputIterator {
val bytesRef = BytesRef()
var docsEnum : DocsEnum? = null
var docSize = 0L
// this is for traversing numeric terms (they are encoded as strings
with a prefix)
val transform = if (hasNumericTerms) {BytesRef.()->
val shift = bytes!![offset].toInt() -
NumericUtils.SHIFT_START_INT.toInt()
if (shift > 31 || shift < 0) null else {
val intString = NumericUtils.prefixCodedToInt(this).toString()
bytesRef.offset = 0
bytesRef.length = 0
bytesRef.copyChars(intString)
bytesRef
}
} else {BytesRef.()-> this}
override fun getComparator() : Comparator<BytesRef>? = null
override fun next(): BytesRef? {
docSize = 0L
while (true) {
val t = termsEnum.next() ?: return null
docsEnum = termsEnum.docs(bits, docsEnum)
val enum = docsEnum ?: continue
while (true) {
val docId = enum.nextDoc()
if (docId == -1 || docId ==
DocIdSetIterator.NO_MORE_DOCS) break
++docSize
}
if (docSize != 0L) return t.transform()
}
}
override fun weight(): Long = docSize
override fun payload(): BytesRef? = null
override fun hasPayloads(): Boolean = false
}
On 10/27/2014 02:08 PM, Clemens Wyss DEV wrote:
Salut Olivier,
would you mind providing me your Suggester-class code (or the relevant
snippets) as an ideal jump-start?
-Clemens
-----Ursprüngliche Nachricht-----
Von: Olivier Binda [mailto:olivier.bi...@wanadoo.fr]
Gesendet: Montag, 27. Oktober 2014 11:51
An: java-user@lucene.apache.org
Betreff: Re: [suggestions] fetch terms from a FilterAtomicReader(subclass)?
On 10/27/2014 07:32 AM, Clemens Wyss DEV wrote:
Is it possible to fetch the terms of a FilterAtomicReader in order to provide
suggestions from a subset of all documents in an index?
Yes, it is possible.
I do it by feeding a custom Dictionary with a custom InputIterator in the
lookup.build() method for WFSTCompletionLookup
Suggestions are preprocessed once at runtime
So my target is to "provide suggestions from a subset of all documents in an
index".
I provide different suggestions relevant to the languages chosen by my users
Note:
I have an "equal" discussion ongoing in the solr-mailinglist. But I
thought I'd might ask in the core-of-solr (i.e. lucene)-mailinglist,
too ;)
---------------------------------------------------------------------
To unsubscribe, e-mail: java-user-unsubscr...@lucene.apache.org
For additional commands, e-mail: java-user-h...@lucene.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: java-user-unsubscr...@lucene.apache.org
For additional commands, e-mail: java-user-h...@lucene.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: java-user-unsubscr...@lucene.apache.org
For additional commands, e-mail: java-user-h...@lucene.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: java-user-unsubscr...@lucene.apache.org
For additional commands, e-mail: java-user-h...@lucene.apache.org