Package org.hibernate.search.bridge.builtin

Source Code of org.hibernate.search.bridge.builtin.TikaBridge$NoopTikaMetadataProcessor

/*
* Hibernate Search, full-text search for your domain model
*
* License: GNU Lesser General Public License (LGPL), version 2.1 or later
* See the lgpl.txt file in the root directory or <http://www.gnu.org/licenses/lgpl-2.1.html>.
*/
package org.hibernate.search.bridge.builtin;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.InputStream;
import java.io.StringWriter;
import java.net.URI;
import java.sql.Blob;
import java.sql.SQLException;

import org.apache.lucene.document.Document;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.WriteOutContentHandler;

import org.hibernate.search.bridge.FieldBridge;
import org.hibernate.search.bridge.LuceneOptions;
import org.hibernate.search.bridge.TikaMetadataProcessor;
import org.hibernate.search.bridge.TikaParseContextProvider;
import org.hibernate.search.util.impl.ClassLoaderHelper;
import org.hibernate.search.util.logging.impl.Log;
import org.hibernate.search.util.logging.impl.LoggerFactory;

import static org.apache.tika.io.IOUtils.closeQuietly;

/**
* Bridge implementation which uses Apache Tika to extract data from provided input.
*
* @author Hardy Ferentschik
*/
public class TikaBridge implements FieldBridge {
  private static final Log log = LoggerFactory.make();

  private TikaMetadataProcessor metadataProcessor;
  private TikaParseContextProvider parseContextProvider;

  public TikaBridge() {
    setMetadataProcessorClass( null );
    setParseContextProviderClass( null );
  }

  public void setParseContextProviderClass(Class<?> parseContextProviderClass) {
    if ( parseContextProviderClass == null ) {
      parseContextProvider = new NoopParseContextProvider();
    }
    else {
      parseContextProvider = ClassLoaderHelper.instanceFromClass(
          TikaParseContextProvider.class,
          parseContextProviderClass,
          "Tika metadata processor"
      );
    }
  }

  public void setMetadataProcessorClass(Class<?> metadataProcessorClass) {
    if ( metadataProcessorClass == null ) {
      metadataProcessor = new NoopTikaMetadataProcessor();
    }
    else {
      metadataProcessor = ClassLoaderHelper.instanceFromClass(
          TikaMetadataProcessor.class,
          metadataProcessorClass,
          "Tika parse context provider"
      );
    }
  }

  @Override
  public void set(String name, Object value, Document document, LuceneOptions luceneOptions) {
    if ( value == null ) {
      throw new IllegalArgumentException( "null cannot be passed to Tika bridge" );
    }
    InputStream in = getInputStreamForData( value );
    try {
      Metadata metadata = metadataProcessor.prepareMetadata();
      ParseContext parseContext = parseContextProvider.getParseContext( name, value );

      StringWriter writer = new StringWriter();
      WriteOutContentHandler contentHandler = new WriteOutContentHandler( writer );

      Parser parser = new AutoDetectParser();
      parser.parse( in, contentHandler, metadata, parseContext );
      luceneOptions.addFieldToDocument( name, writer.toString(), document );

      // allow for optional indexing of metadata by the user
      metadataProcessor.set( name, value, document, luceneOptions, metadata );
    }
    catch (Exception e) {
      throw log.unableToParseDocument( e );
    }
    finally {
      closeQuietly( in );
    }
  }

  private InputStream getInputStreamForData(Object object) {
    if ( object instanceof Blob ) {
      try {
        return ( (Blob) object ).getBinaryStream();
      }
      catch (SQLException e) {
        throw log.unableToGetInputStreamFromBlob( e );
      }
    }
    else if ( object instanceof byte[] ) {
      byte[] data = (byte[]) object;
      return new ByteArrayInputStream( data );
    }
    else if ( object instanceof String ) {
      String path = (String) object;
      File file = new File( path );
      return openInputStream( file );
    }
    else if ( object instanceof URI ) {
      URI uri = (URI) object;
      File file = new File( uri );
      return openInputStream( file );
    }
    else {
      throw log.unsupportedTikaBridgeType( object != null ? object.getClass() : null );
    }
  }

  private FileInputStream openInputStream(File file) {
    if ( file.exists() ) {
      if ( file.isDirectory() ) {
        throw log.fileIsADirectory( file.toString() );
      }
      if ( !file.canRead() ) {
        throw log.fileIsNotReadable( file.toString() );
      }
    }
    else {
      throw log.fileDoesNotExist( file.toString() );
    }
    try {
      return new FileInputStream( file );
    }
    catch (FileNotFoundException e) {
      throw log.fileDoesNotExist( file.toString() );
    }
  }

  private static class NoopTikaMetadataProcessor implements TikaMetadataProcessor {
    @Override
    public Metadata prepareMetadata() {
      return new Metadata();
    }

    @Override
    public void set(String name, Object value, Document document, LuceneOptions luceneOptions, Metadata metadata) {
    }
  }

  private static class NoopParseContextProvider implements TikaParseContextProvider {
    @Override
    public ParseContext getParseContext(String name, Object value) {
      return new ParseContext();
    }
  }
}
TOP

Related Classes of org.hibernate.search.bridge.builtin.TikaBridge$NoopTikaMetadataProcessor

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.