Source Code of org.apache.jackrabbit.core.query.lucene.NodeIndexer

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.jackrabbit.core.query.lucene;


import org.apache.jackrabbit.core.PropertyId;
import org.apache.jackrabbit.core.query.TextFilter;
import org.apache.jackrabbit.core.state.ItemStateException;
import org.apache.jackrabbit.core.state.ItemStateManager;
import org.apache.jackrabbit.core.state.NoSuchItemStateException;
import org.apache.jackrabbit.core.state.NodeState;
import org.apache.jackrabbit.core.state.PropertyState;
import org.apache.jackrabbit.core.value.InternalValue;
import org.apache.jackrabbit.name.NoPrefixDeclaredException;
import org.apache.jackrabbit.name.Path;
import org.apache.jackrabbit.name.QName;
import org.apache.jackrabbit.name.NameFormat;
import org.apache.jackrabbit.name.PathFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;


import javax.jcr.NamespaceException;
import javax.jcr.PropertyType;
import javax.jcr.RepositoryException;
import java.io.Reader;
import java.util.Calendar;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;


/**
 * Creates a lucene <code>Document</code> object from a {@link javax.jcr.Node}.
 */
public class NodeIndexer {


    /**
     * The logger instance for this class.
     */
    private static final Logger log = LoggerFactory.getLogger(NodeIndexer.class);


    /**
     * The <code>NodeState</code> of the node to index
     */
    protected final NodeState node;


    /**
     * The persistent item state provider
     */
    protected final ItemStateManager stateProvider;


    /**
     * Namespace mappings to use for indexing. This is the internal
     * namespace mapping.
     */
    protected final NamespaceMappings mappings;


    /**
     * List of text filters in use.
     */
    protected final List textFilters;


    /**
     * Creates a new node indexer.
     *
     * @param node          the node state to index.
     * @param stateProvider the persistent item state manager to retrieve properties.
     * @param mappings      internal namespace mappings.
     * @param textFilters   List of {@link org.apache.jackrabbit.core.query.TextFilter}s.
     */
    protected NodeIndexer(NodeState node,
                          ItemStateManager stateProvider,
                          NamespaceMappings mappings,
                          List textFilters) {
        this.node = node;
        this.stateProvider = stateProvider;
        this.mappings = mappings;
        this.textFilters = textFilters;
    }


    /**
     * Creates a lucene Document from a node.
     *
     * @param node          the node state to index.
     * @param stateProvider the state provider to retrieve property values.
     * @param mappings      internal namespace mappings.
     * @param textFilters   list of text filters to use for indexing binary
     *                      properties.
     * @return the lucene Document.
     * @throws RepositoryException if an error occurs while reading property
     *                             values from the <code>ItemStateProvider</code>.
     */
    public static Document createDocument(NodeState node,
                                          ItemStateManager stateProvider,
                                          NamespaceMappings mappings,
                                          List textFilters)
            throws RepositoryException {
        NodeIndexer indexer = new NodeIndexer(node, stateProvider, mappings, textFilters);
        return indexer.createDoc();
    }


    /**
     * Creates a lucene Document.
     *
     * @return the lucene Document with the index layout.
     * @throws RepositoryException if an error occurs while reading property
     *                             values from the <code>ItemStateProvider</code>.
     */
    protected Document createDoc() throws RepositoryException {
        Document doc = new Document();


        // special fields
        // UUID
        doc.add(new Field(FieldNames.UUID, node.getNodeId().getUUID().toString(), Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
        try {
            // parent UUID
            if (node.getParentId() == null) {
                // root node
                doc.add(new Field(FieldNames.PARENT, "", Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
                doc.add(new Field(FieldNames.LABEL, "", Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
            } else {
                doc.add(new Field(FieldNames.PARENT, node.getParentId().toString(), Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
                NodeState parent = (NodeState) stateProvider.getItemState(node.getParentId());
                NodeState.ChildNodeEntry child = parent.getChildNodeEntry(node.getNodeId());
                String name = NameFormat.format(child.getName(), mappings);
                doc.add(new Field(FieldNames.LABEL, name, Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
            }
        } catch (NoSuchItemStateException e) {
            throwRepositoryException(e);
        } catch (ItemStateException e) {
            throwRepositoryException(e);
        } catch (NoPrefixDeclaredException e) {
            // will never happen, because this.mappings will dynamically add
            // unknown uri<->prefix mappings
        }


        Set props = node.getPropertyNames();
        for (Iterator it = props.iterator(); it.hasNext();) {
            QName propName = (QName) it.next();
            PropertyId id = new PropertyId(node.getNodeId(), propName);
            try {
                PropertyState propState = (PropertyState) stateProvider.getItemState(id);
                InternalValue[] values = propState.getValues();
                for (int i = 0; i < values.length; i++) {
                    addValue(doc, values[i], propState.getName());
                }
                if (values.length > 1) {
                    // real multi-valued
                    addMVPName(doc, propState.getName());
                }
            } catch (NoSuchItemStateException e) {
                throwRepositoryException(e);
            } catch (ItemStateException e) {
                throwRepositoryException(e);
            }
        }
        return doc;
    }


    /**
     * Wraps the exception <code>e</code> into a <code>RepositoryException</code>
     * and throws the created exception.
     *
     * @param e the base exception.
     */
    private void throwRepositoryException(Exception e)
            throws RepositoryException {
        String msg = "Error while indexing node: " + node.getNodeId() + " of "
            + "type: " + node.getNodeTypeName();
        throw new RepositoryException(msg, e);
    }


    /**
     * Adds a {@link FieldNames#MVP} field to <code>doc</code> with the resolved
     * <code>name</code> using the internal search index namespace mapping.
     *
     * @param doc  the lucene document.
     * @param name the name of the multi-value property.
     */
    private void addMVPName(Document doc, QName name) {
        try {
            String propName = NameFormat.format(name, mappings);
            doc.add(new Field(FieldNames.MVP, propName, Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
        } catch (NoPrefixDeclaredException e) {
            // will never happen, prefixes are created dynamically
        }
    }


    /**
     * Adds a value to the lucene Document.
     *
     * @param doc   the document.
     * @param value the internal jackrabbit value.
     * @param name  the name of the property.
     */
    private void addValue(Document doc, InternalValue value, QName name) {
        String fieldName = name.getLocalName();
        try {
            fieldName = NameFormat.format(name, mappings);
        } catch (NoPrefixDeclaredException e) {
            // will never happen
        }
        Object internalValue = value.internalValue();
        switch (value.getType()) {
            case PropertyType.BINARY:
                addBinaryValue(doc, fieldName, internalValue);
                break;
            case PropertyType.BOOLEAN:
                addBooleanValue(doc, fieldName, internalValue);
                break;
            case PropertyType.DATE:
                addCalendarValue(doc, fieldName, internalValue);
                break;
            case PropertyType.DOUBLE:
                addDoubleValue(doc, fieldName, internalValue);
                break;
            case PropertyType.LONG:
                addLongValue(doc, fieldName, internalValue);
                break;
            case PropertyType.REFERENCE:
                addReferenceValue(doc, fieldName, internalValue);
                break;
            case PropertyType.PATH:
                addPathValue(doc, fieldName, internalValue);
                break;
            case PropertyType.STRING:
                // do not fulltext index jcr:uuid String
                boolean tokenize = !name.equals(QName.JCR_UUID);
                addStringValue(doc, fieldName, internalValue, tokenize);
                break;
            case PropertyType.NAME:
                addNameValue(doc, fieldName, internalValue);
                break;
            default:
                throw new IllegalArgumentException("illegal internal value type");
        }
    }


    /**
     * Adds the binary value to the document as the named field.
     * <p/>
     * This implementation checks if this {@link #node} is of type nt:resource
     * and if that is the case, tries to extract text from the data atom using
     * the {@link #textFilters}.
     *
     * @param doc           The document to which to add the field
     * @param fieldName     The name of the field to add
     * @param internalValue The value for the field to add to the document.
     */
    protected void addBinaryValue(Document doc, String fieldName, Object internalValue) {
        // 'check' if node is of type nt:resource
        try {
            String jcrData = mappings.getPrefix(QName.NS_JCR_URI) + ":data";
            if (!jcrData.equals(fieldName)) {
                // don't know how to index
                return;
            }
            if (node.hasPropertyName(QName.JCR_MIMETYPE)) {
                PropertyState dataProp = (PropertyState) stateProvider.getItemState(
                        new PropertyId(node.getNodeId(), QName.JCR_DATA));
                PropertyState mimeTypeProp =
                        (PropertyState) stateProvider.getItemState(
                                new PropertyId(node.getNodeId(), QName.JCR_MIMETYPE));


                // jcr:encoding is not mandatory
                String encoding = null;
                if (node.hasPropertyName(QName.JCR_ENCODING)) {
                    PropertyState encodingProp =
                            (PropertyState) stateProvider.getItemState(
                                    new PropertyId(node.getNodeId(), QName.JCR_ENCODING));
                    encoding = encodingProp.getValues()[0].internalValue().toString();
                }


                String mimeType = mimeTypeProp.getValues()[0].internalValue().toString();
                Map fields = Collections.EMPTY_MAP;
                for (Iterator it = textFilters.iterator(); it.hasNext();) {
                    TextFilter filter = (TextFilter) it.next();
                    // use the first filter that can handle the mimeType
                    if (filter.canFilter(mimeType)) {
                        fields = filter.doFilter(dataProp, encoding);
                        break;
                    }
                }


                for (Iterator it = fields.keySet().iterator(); it.hasNext();) {
                    String field = (String) it.next();
                    Reader r = (Reader) fields.get(field);
                    doc.add(new Field(field, r));
                }
            }
        } catch (Exception e) {
            // TODO: How to recover from a transient indexing failure?
            log.warn("Exception while indexing binary property: " + e.toString());
            log.debug("Dump: ", e);
        }
    }


    /**
     * Adds the string representation of the boolean value to the document as
     * the named field.
     *
     * @param doc           The document to which to add the field
     * @param fieldName     The name of the field to add
     * @param internalValue The value for the field to add to the document.
     */
    protected void addBooleanValue(Document doc, String fieldName, Object internalValue) {
        doc.add(new Field(FieldNames.PROPERTIES,
                FieldNames.createNamedValue(fieldName, internalValue.toString()),
                Field.Store.NO,
                Field.Index.UN_TOKENIZED,
                Field.TermVector.NO));
    }


    /**
     * Adds the calendar value to the document as the named field. The calendar
     * value is converted to an indexable string value using the {@link DateField}
     * class.
     *
     * @param doc           The document to which to add the field
     * @param fieldName     The name of the field to add
     * @param internalValue The value for the field to add to the document.
     */
    protected void addCalendarValue(Document doc, String fieldName, Object internalValue) {
        long millis = ((Calendar) internalValue).getTimeInMillis();
        doc.add(new Field(FieldNames.PROPERTIES,
                FieldNames.createNamedValue(fieldName, DateField.timeToString(millis)),
                Field.Store.NO,
                Field.Index.UN_TOKENIZED,
                Field.TermVector.NO));
    }


    /**
     * Adds the double value to the document as the named field. The double
     * value is converted to an indexable string value using the
     * {@link DoubleField} class.
     *
     * @param doc           The document to which to add the field
     * @param fieldName     The name of the field to add
     * @param internalValue The value for the field to add to the document.
     */
    protected void addDoubleValue(Document doc, String fieldName, Object internalValue) {
        double doubleVal = ((Double) internalValue).doubleValue();
        doc.add(new Field(FieldNames.PROPERTIES,
                FieldNames.createNamedValue(fieldName, DoubleField.doubleToString(doubleVal)),
                Field.Store.NO,
                Field.Index.UN_TOKENIZED,
                Field.TermVector.NO));
    }


    /**
     * Adds the long value to the document as the named field. The long
     * value is converted to an indexable string value using the {@link LongField}
     * class.
     *
     * @param doc           The document to which to add the field
     * @param fieldName     The name of the field to add
     * @param internalValue The value for the field to add to the document.
     */
    protected void addLongValue(Document doc, String fieldName, Object internalValue) {
        long longVal = ((Long) internalValue).longValue();
        doc.add(new Field(FieldNames.PROPERTIES,
                FieldNames.createNamedValue(fieldName, LongField.longToString(longVal)),
                Field.Store.NO,
                Field.Index.UN_TOKENIZED,
                Field.TermVector.NO));
    }


    /**
     * Adds the reference value to the document as the named field. The value's
     * string representation is added as the reference data. Additionally the
     * reference data is stored in the index.
     *
     * @param doc           The document to which to add the field
     * @param fieldName     The name of the field to add
     * @param internalValue The value for the field to add to the document.
     */
    protected void addReferenceValue(Document doc, String fieldName, Object internalValue) {
        String uuid = internalValue.toString();
        doc.add(new Field(FieldNames.PROPERTIES,
                FieldNames.createNamedValue(fieldName, uuid),
                Field.Store.YES, // store
                Field.Index.UN_TOKENIZED,
                Field.TermVector.NO));
    }


    /**
     * Adds the path value to the document as the named field. The path
     * value is converted to an indexable string value using the name space
     * mappings with which this class has been created.
     *
     * @param doc           The document to which to add the field
     * @param fieldName     The name of the field to add
     * @param internalValue The value for the field to add to the document.
     */
    protected void addPathValue(Document doc, String fieldName, Object internalValue) {
        Path path = (Path) internalValue;
        String pathString = path.toString();
        try {
            pathString = PathFormat.format(path, mappings);
        } catch (NoPrefixDeclaredException e) {
            // will never happen
        }
        doc.add(new Field(FieldNames.PROPERTIES,
                FieldNames.createNamedValue(fieldName, pathString),
                Field.Store.NO,
                Field.Index.UN_TOKENIZED,
                Field.TermVector.NO));
    }


    /**
     * Adds the string value to the document both as the named field and for
     * full text indexing.
     *
     * @param doc           The document to which to add the field
     * @param fieldName     The name of the field to add
     * @param internalValue The value for the field to add to the document.
     * @deprecated Use {@link #addStringValue(Document, String, Object, boolean)
     *             addStringValue(Document, String, Object, boolean)} instead.
     */
    protected void addStringValue(Document doc, String fieldName, Object internalValue) {
        addStringValue(doc, fieldName, internalValue, true);
    }


    /**
     * Adds the string value to the document both as the named field and
     * optionally for full text indexing if <code>tokenized</code> is
     * <code>true</code>.
     *
     * @param doc           The document to which to add the field
     * @param fieldName     The name of the field to add
     * @param internalValue The value for the field to add to the document.
     * @param tokenized     If <code>true</code> the string is also tokenized
     *                      and fulltext indexed.
     */
    protected void addStringValue(Document doc, String fieldName, Object internalValue, boolean tokenized) {
        String stringValue = String.valueOf(internalValue);


        // simple String
        doc.add(new Field(FieldNames.PROPERTIES,
                FieldNames.createNamedValue(fieldName, stringValue),
                Field.Store.NO,
                Field.Index.UN_TOKENIZED,
                Field.TermVector.NO));
        if (tokenized) {
            // also create fulltext index of this value
            doc.add(new Field(FieldNames.FULLTEXT,
                    stringValue,
                    Field.Store.NO,
                    Field.Index.TOKENIZED,
                    Field.TermVector.NO));
            // create fulltext index on property
            int idx = fieldName.indexOf(':');
            fieldName = fieldName.substring(0, idx + 1)
                    + FieldNames.FULLTEXT_PREFIX + fieldName.substring(idx + 1);
            doc.add(new Field(fieldName, stringValue,
                    Field.Store.NO,
                    Field.Index.TOKENIZED,
                    Field.TermVector.NO));
        }
    }


    /**
     * Adds the name value to the document as the named field. The name
     * value is converted to an indexable string treating the internal value
     * as a qualified name and mapping the name space using the name space
     * mappings with which this class has been created.
     *
     * @param doc           The document to which to add the field
     * @param fieldName     The name of the field to add
     * @param internalValue The value for the field to add to the document.
     */
    protected void addNameValue(Document doc, String fieldName, Object internalValue) {
        QName qualiName = (QName) internalValue;
        String normValue = internalValue.toString();
        try {
            normValue = mappings.getPrefix(qualiName.getNamespaceURI())
                    + ":" + qualiName.getLocalName();
        } catch (NamespaceException e) {
            // will never happen
        }
        doc.add(new Field(FieldNames.PROPERTIES,
                FieldNames.createNamedValue(fieldName, normValue),
                Field.Store.NO,
                Field.Index.UN_TOKENIZED,
                Field.TermVector.NO));
    }
}
Source Code of org.apache.jackrabbit.core.query.lucene.NodeIndexer

Related Classes of org.apache.jackrabbit.core.query.lucene.NodeIndexer