Re: Restricting xpath query to document text

Marcel Reutegger Tue, 21 Nov 2006 19:23:13 -0800

The class looks ok, except that you shouldn't lower-case the text retrieved fromthe resource, or is there a specific reason why this is done?

Jackrabbit 1.2 will support the functions fn:lower-case() and fn:upper-case(),so there is no need to lower-case the text when it is indexed.

The query you mentioned will return nt:resource nodes with content that startswith 'comp':


//element(*, axxia:resource)[(jcr:like(@jcr:data, 'comp%'))]

Are you sure that the first word in the document starts with 'comp'?

regards
 marcel

thomasg wrote:

Sure, theres probably an obvious error / omission. This is the code:

package com.axxia.dms.indexing.jackrabbit;

import java.io.IOException;
import java.io.Reader;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import javax.jcr.RepositoryException;

import org.apache.jackrabbit.core.PropertyId;
import org.apache.jackrabbit.core.query.TextFilter;
import org.apache.jackrabbit.core.query.lucene.FieldNames;
import org.apache.jackrabbit.core.query.lucene.NamespaceMappings;
import org.apache.jackrabbit.core.query.lucene.NodeIndexer;
import org.apache.jackrabbit.core.state.ItemStateException;
import org.apache.jackrabbit.core.state.ItemStateManager;
import org.apache.jackrabbit.core.state.NodeState;
import org.apache.jackrabbit.core.state.PropertyState;
import org.apache.jackrabbit.name.QName;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;

import com.axxia.dms.indexing.util.IndexingUtil;

public class AxxiaJackrabbitNodeIndexer extends NodeIndexer
{
    /**
     * Creates a new node indexer.
     *
     * @param node          the node state to index.
     * @param stateProvider the persistent item state manager to retrieve
properties.
     * @param mappings      internal namespace mappings.
     * @param textFilters   List of [EMAIL PROTECTED]
org.apache.jackrabbit.core.query.TextFilter}s.
     */
    protected AxxiaJackrabbitNodeIndexer(NodeState node,
                          ItemStateManager stateProvider,
                          NamespaceMappings mappings,
                          List textFilters) {
        super(node, stateProvider, mappings, textFilters);
    }

/**

     * Creates a lucene Document from a node.
     *
     * @param node          the node state to index.
     * @param stateProvider the state provider to retrieve property values.
     * @param mappings      internal namespace mappings.
     * @param textFilters   list of text filters to use for indexing binary
     *                      properties.
     * @return the lucene Document.
     * @throws RepositoryException if an error occurs while reading property
     *                             values from the
<code>ItemStateProvider</code>.
     */
    public static Document createDocument(NodeState node,
                                          ItemStateManager stateProvider,
                                          NamespaceMappings mappings,
                                          List textFilters)
            throws RepositoryException {
        AxxiaJackrabbitNodeIndexer indexer = new
AxxiaJackrabbitNodeIndexer(node, stateProvider, mappings, textFilters);
        return indexer.createDoc();

}/**

     * Adds the binary value to the document as the named field.
     * <p/>
     * This implementation checks if this [EMAIL PROTECTED] #node} is of type
nt:resource
     * and if that is the case, tries to extract text from the data atom
using
     * the [EMAIL PROTECTED] #textFilters}.
     *
     * @param doc           The document to which to add the field
     * @param fieldName     The name of the field to add
     * @param internalValue The value for the field to add to the document.
     */
    protected void addBinaryValue(Document doc, String fieldName, Object
internalValue) {
        
        // 'check' if node is of type nt:resource
        try {
            String jcrData = mappings.getPrefix(QName.NS_JCR_URI) + ":data";
            if (!jcrData.equals(fieldName)) {
                // don't know how to index
                return;
            }
            //NB node variabel is of type NodeState
            if (node.hasPropertyName(QName.JCR_MIMETYPE)) {
                PropertyState dataProp = (PropertyState)
stateProvider.getItemState(
                        new PropertyId(node.getNodeId(), QName.JCR_DATA));
                PropertyState mimeTypeProp =
                        (PropertyState) stateProvider.getItemState(
                                new PropertyId(node.getNodeId(),
QName.JCR_MIMETYPE));

                // jcr:encoding is not mandatory
                String encoding = null;
                if (node.hasPropertyName(QName.JCR_ENCODING)) {
                    PropertyState encodingProp =
                            (PropertyState) stateProvider.getItemState(
                                    new PropertyId(node.getNodeId(),
QName.JCR_ENCODING));
                    encoding =
encodingProp.getValues()[0].internalValue().toString();
                }

                String mimeType =
mimeTypeProp.getValues()[0].internalValue().toString();
                Map fields = Collections.EMPTY_MAP;
                for (Iterator it = textFilters.iterator(); it.hasNext();) {
                    TextFilter filter = (TextFilter) it.next();
                    // use the first filter that can handle the mimeType
                    if (filter.canFilter(mimeType)) {
                        fields = filter.doFilter(dataProp, encoding);
                        break;
                    }
                }

                for (Iterator it = fields.keySet().iterator();
it.hasNext();) {
                    String field = (String) it.next();
                    Reader r = (Reader) fields.get(field);
                    doc.add(Field.Text(field, r));
                }

//After obtaining the map of fields returned by

                //the text filter look for the Reader that was returned with
the key
                //FieldNames.FULLTEXT. you then have to spool the reader into a
string

//value and call addStringValue().Reader fullTextReader = (Reader)

fields.get(FieldNames.FULLTEXT);
                if (fullTextReader != null)
                {
                        try

{String text = readerToString(fullTextReader);

                            addStringValue(doc, fieldName, text.toLowerCase());
                        }
                        catch (IOException e)
                        {
                                //TODO Logging etc
                                e.printStackTrace();
                        }
                }
            }
        } catch (ItemStateException e) {
                //TODO
            //log.warn("Exception while indexing binary property: " +
e.toString());
            //log.debug("Dump: ", e);
        } catch (RepositoryException e) {
                //TODO
            //log.warn("Exception while indexing binary property: " +
e.toString());
            //log.debug("Dump: ", e);
        }
    }

/**

    * Spools a reader object to string representation.
    * @param reader The reader to convert to a string.
    * @return String representation of the reader
    * @throws IOException
    */

private String readerToString(Reader reader) throws IOException{

            int charValue = 0;
            StringBuilder sb = new StringBuilder(2024);

while ((charValue = reader.read()) != -1) {

                sb.append((char)charValue);
            }
            String result = sb.toString();
            return result;

}}

Re: Restricting xpath query to document text

Reply via email to