The class looks ok, except that you shouldn't lower-case the text retrieved from
the resource, or is there a specific reason why this is done?
Jackrabbit 1.2 will support the functions fn:lower-case() and fn:upper-case(),
so there is no need to lower-case the text when it is indexed.
The query you mentioned will return nt:resource nodes with content that starts
with 'comp':
//element(*, axxia:resource)[(jcr:like(@jcr:data, 'comp%'))]
Are you sure that the first word in the document starts with 'comp'?
regards
marcel
thomasg wrote:
Sure, theres probably an obvious error / omission. This is the code:
package com.axxia.dms.indexing.jackrabbit;
import java.io.IOException;
import java.io.Reader;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import javax.jcr.RepositoryException;
import org.apache.jackrabbit.core.PropertyId;
import org.apache.jackrabbit.core.query.TextFilter;
import org.apache.jackrabbit.core.query.lucene.FieldNames;
import org.apache.jackrabbit.core.query.lucene.NamespaceMappings;
import org.apache.jackrabbit.core.query.lucene.NodeIndexer;
import org.apache.jackrabbit.core.state.ItemStateException;
import org.apache.jackrabbit.core.state.ItemStateManager;
import org.apache.jackrabbit.core.state.NodeState;
import org.apache.jackrabbit.core.state.PropertyState;
import org.apache.jackrabbit.name.QName;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import com.axxia.dms.indexing.util.IndexingUtil;
public class AxxiaJackrabbitNodeIndexer extends NodeIndexer
{
/**
* Creates a new node indexer.
*
* @param node the node state to index.
* @param stateProvider the persistent item state manager to retrieve
properties.
* @param mappings internal namespace mappings.
* @param textFilters List of [EMAIL PROTECTED]
org.apache.jackrabbit.core.query.TextFilter}s.
*/
protected AxxiaJackrabbitNodeIndexer(NodeState node,
ItemStateManager stateProvider,
NamespaceMappings mappings,
List textFilters) {
super(node, stateProvider, mappings, textFilters);
}
/**
* Creates a lucene Document from a node.
*
* @param node the node state to index.
* @param stateProvider the state provider to retrieve property values.
* @param mappings internal namespace mappings.
* @param textFilters list of text filters to use for indexing binary
* properties.
* @return the lucene Document.
* @throws RepositoryException if an error occurs while reading property
* values from the
<code>ItemStateProvider</code>.
*/
public static Document createDocument(NodeState node,
ItemStateManager stateProvider,
NamespaceMappings mappings,
List textFilters)
throws RepositoryException {
AxxiaJackrabbitNodeIndexer indexer = new
AxxiaJackrabbitNodeIndexer(node, stateProvider, mappings, textFilters);
return indexer.createDoc();
}
/**
* Adds the binary value to the document as the named field.
* <p/>
* This implementation checks if this [EMAIL PROTECTED] #node} is of type
nt:resource
* and if that is the case, tries to extract text from the data atom
using
* the [EMAIL PROTECTED] #textFilters}.
*
* @param doc The document to which to add the field
* @param fieldName The name of the field to add
* @param internalValue The value for the field to add to the document.
*/
protected void addBinaryValue(Document doc, String fieldName, Object
internalValue) {
// 'check' if node is of type nt:resource
try {
String jcrData = mappings.getPrefix(QName.NS_JCR_URI) + ":data";
if (!jcrData.equals(fieldName)) {
// don't know how to index
return;
}
//NB node variabel is of type NodeState
if (node.hasPropertyName(QName.JCR_MIMETYPE)) {
PropertyState dataProp = (PropertyState)
stateProvider.getItemState(
new PropertyId(node.getNodeId(), QName.JCR_DATA));
PropertyState mimeTypeProp =
(PropertyState) stateProvider.getItemState(
new PropertyId(node.getNodeId(),
QName.JCR_MIMETYPE));
// jcr:encoding is not mandatory
String encoding = null;
if (node.hasPropertyName(QName.JCR_ENCODING)) {
PropertyState encodingProp =
(PropertyState) stateProvider.getItemState(
new PropertyId(node.getNodeId(),
QName.JCR_ENCODING));
encoding =
encodingProp.getValues()[0].internalValue().toString();
}
String mimeType =
mimeTypeProp.getValues()[0].internalValue().toString();
Map fields = Collections.EMPTY_MAP;
for (Iterator it = textFilters.iterator(); it.hasNext();) {
TextFilter filter = (TextFilter) it.next();
// use the first filter that can handle the mimeType
if (filter.canFilter(mimeType)) {
fields = filter.doFilter(dataProp, encoding);
break;
}
}
for (Iterator it = fields.keySet().iterator();
it.hasNext();) {
String field = (String) it.next();
Reader r = (Reader) fields.get(field);
doc.add(Field.Text(field, r));
}
//After obtaining the map of fields returned by
//the text filter look for the Reader that was returned with
the key
//FieldNames.FULLTEXT. you then have to spool the reader into a
string
//value and call addStringValue().
Reader fullTextReader = (Reader)
fields.get(FieldNames.FULLTEXT);
if (fullTextReader != null)
{
try
{
String text = readerToString(fullTextReader);
addStringValue(doc, fieldName, text.toLowerCase());
}
catch (IOException e)
{
//TODO Logging etc
e.printStackTrace();
}
}
}
} catch (ItemStateException e) {
//TODO
//log.warn("Exception while indexing binary property: " +
e.toString());
//log.debug("Dump: ", e);
} catch (RepositoryException e) {
//TODO
//log.warn("Exception while indexing binary property: " +
e.toString());
//log.debug("Dump: ", e);
}
}
/**
* Spools a reader object to string representation.
* @param reader The reader to convert to a string.
* @return String representation of the reader
* @throws IOException
*/
private String readerToString(Reader reader) throws IOException
{
int charValue = 0;
StringBuilder sb = new StringBuilder(2024);
while ((charValue = reader.read()) != -1) {
sb.append((char)charValue);
}
String result = sb.toString();
return result;
}
}