Package org.apache.slide.index

Source Code of org.apache.slide.index.XMLContentIndexer

/*
* $Header: /home/cvs/jakarta-slide/src/stores/org/apache/slide/index/XMLContentIndexer.java,v 1.1 2004/08/03 15:01:46 unico Exp $
* $Revision: 1.1 $
* $Date: 2004/08/03 15:01:46 $
*
* ====================================================================
*
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.slide.index;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;

import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import org.apache.slide.common.NamespaceAccessToken;
import org.apache.slide.common.ServiceInitializationFailedException;
import org.apache.slide.content.NodeRevisionContent;
import org.apache.slide.content.NodeRevisionDescriptor;
import org.apache.slide.util.logger.Logger;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

/**
* Extends TextContentIndexer for handling XML content
* by only indexing the actual character data.
*/
public class XMLContentIndexer extends TextContentIndexer {
   
    private SAXParser m_parser;
   
    public void initialize(NamespaceAccessToken token) throws ServiceInitializationFailedException {
        super.initialize(token);
        try {
            m_parser = SAXParserFactory.newInstance().newSAXParser();           
        }
        catch (ParserConfigurationException e) {
            getLogger().log("Error creating parser for indexer", LOG_CHANNEL, Logger.ERROR);
            throw new ServiceInitializationFailedException(this, e);
        }
        catch (SAXException e) {
            getLogger().log("Error creating parser for indexer", LOG_CHANNEL, Logger.ERROR);
            throw new ServiceInitializationFailedException(this, e);
        }
    }

    protected synchronized Reader readContent(NodeRevisionDescriptor revisionDescriptor,
                                 NodeRevisionContent revisionContent) throws IOException {
        if (revisionDescriptor.getContentType().equals("text/xml")) {
            try {
                final XMLContentIndexerHandler handler = new XMLContentIndexerHandler();
                m_parser.parse(new ByteArrayInputStream(revisionContent.getContentBytes()), handler);
                return new StringReader(handler.getText());
            } catch (SAXException e) {
                getLogger().log("Error parsing xml content for indexer", LOG_CHANNEL, Logger.ERROR);
            }
        }
        return super.readContent(revisionDescriptor, revisionContent);
    }
   
    private static final class XMLContentIndexerHandler extends DefaultHandler {

        private final StringBuffer m_text = new StringBuffer();

        public void characters(char[] ch, int start, int length) throws SAXException {
            m_text.append(ch, start, length);
        }

        public void endElement(String uri, String localName, String qName) throws SAXException {
            super.endElement(uri, localName, qName);
            m_text.append(' ');
        }

        public String getText() {
            return m_text.toString();
        }

    }

   
}
TOP

Related Classes of org.apache.slide.index.XMLContentIndexer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.
), m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) })(window,document,'script','//www.google-analytics.com/analytics.js','ga'); ga('create', 'UA-20639858-1', 'auto'); ga('send', 'pageview');