Package org.hibernate.search.indexes.serialization.avro.impl

Source Code of org.hibernate.search.indexes.serialization.avro.impl.AvroDeserializer

/*
* Hibernate, Relational Persistence for Idiomatic Java
*
* JBoss, Home of Professional Open Source
* Copyright 2011 Red Hat Inc. and/or its affiliates and other contributors
* as indicated by the @authors tag. All rights reserved.
* See the copyright.txt in the distribution for a
* full listing of individual contributors.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU Lesser General Public License, v. 2.1.
* This program is distributed in the hope that it will be useful, but WITHOUT A
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
* PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more details.
* You should have received a copy of the GNU Lesser General Public License,
* v.2.1 along with this distribution; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA  02110-1301, USA.
*/
package org.hibernate.search.indexes.serialization.avro.impl;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.Serializable;
import java.nio.ByteBuffer;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.avro.Protocol;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.Decoder;
import org.apache.avro.io.DecoderFactory;
import org.apache.avro.util.Utf8;

import org.hibernate.search.bridge.spi.ConversionContext;
import org.hibernate.search.bridge.util.impl.ContextualExceptionBridgeHelper;
import org.hibernate.search.indexes.serialization.spi.Deserializer;
import org.hibernate.search.indexes.serialization.spi.LuceneWorksBuilder;
import org.hibernate.search.indexes.serialization.spi.SerializableIndex;
import org.hibernate.search.indexes.serialization.spi.SerializableStore;
import org.hibernate.search.indexes.serialization.spi.SerializableTermVector;
import org.hibernate.search.util.logging.impl.Log;
import org.hibernate.search.util.logging.impl.LoggerFactory;

/**
* @author Emmanuel Bernard <emmanuel@hibernate.org>
*/
public class AvroDeserializer implements Deserializer {

  private static final Log log = LoggerFactory.make();
  private final Protocol protocol;
  private List<Utf8> classReferences;

  public AvroDeserializer(Protocol protocol) {
    this.protocol = protocol;
  }

  @Override
  public void deserialize(byte[] data, LuceneWorksBuilder hydrator) {
    ByteArrayInputStream inputStream = new ByteArrayInputStream(data);
    int majorVersion = inputStream.read();
    int minorVersion = inputStream.read();
    if ( AvroSerializationProvider.getMajorVersion() != majorVersion ) {
      throw log.incompatibleProtocolVersion(
          majorVersion,
          minorVersion,
          AvroSerializationProvider.getMajorVersion(),
          AvroSerializationProvider.getMinorVersion()
      );
    }
    if ( AvroSerializationProvider.getMinorVersion() < minorVersion ) {
      //TODO what to do about it? Log each time? Once?
      if ( log.isTraceEnabled() ) {
        log.tracef( "Parsing message from a future protocol version. Some feature might not be propagated. Message version: "
                + majorVersion + "." + minorVersion
                + ". Current protocol version: "
                + AvroSerializationProvider.getMajorVersion()
                + "." + AvroSerializationProvider.getMinorVersion()
        );
      }
    }

    Decoder decoder = DecoderFactory.get().binaryDecoder( inputStream, null );
    GenericDatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>( protocol.getType( "Message" ) );
    GenericRecord result;
    try {
      result = reader.read( null, decoder );
    }
    catch (IOException e) {
      throw log.unableToDeserializeAvroStream( e );
    }

    classReferences = asListOfString( result, "classReferences" );
    final List<GenericRecord> operations = asListOfGenericRecords( result, "operations" );
    final ConversionContext conversionContext = new ContextualExceptionBridgeHelper();
    for ( GenericRecord operation : operations ) {
      String schema = operation.getSchema().getName();
      if ( "OptimizeAll".equals( schema ) ) {
        hydrator.addOptimizeAll();
      }
      else if ( "PurgeAll".equals( schema ) ) {
        hydrator.addPurgeAllLuceneWork( asClass( operation, "class" ) );
      }
      else if ( "Delete".equals( schema ) ) {
        processId( operation, hydrator );
        hydrator.addDeleteLuceneWork(
            asClass( operation, "class" ), conversionContext
        );
      }
      else if ( "Add".equals( schema ) ) {
        buildLuceneDocument( asGenericRecord( operation, "document" ), hydrator );
        Map<String, String> analyzers = getAnalyzers( operation );
        processId( operation, hydrator );
        hydrator.addAddLuceneWork(
            asClass( operation, "class" ),
            analyzers,
            conversionContext
        );
      }
      else if ( "Update".equals( schema ) ) {
        buildLuceneDocument( asGenericRecord( operation, "document" ), hydrator );
        Map<String, String> analyzers = getAnalyzers( operation );
        processId( operation, hydrator );
        hydrator.addUpdateLuceneWork(
            asClass( operation, "class" ),
            analyzers,
            conversionContext
        );
      }
      else {
        throw log.cannotDeserializeOperation( schema );
      }
    }
  }

  private String asClass(GenericRecord operation, String attribute) {
    Integer index = (Integer) operation.get( attribute );
    return classReferences.get( index ).toString();
  }

  private List<Utf8> asListOfString(GenericRecord result, String attribute) {
    return (List<Utf8>) result.get( attribute );
  }

  private void processId(GenericRecord operation, LuceneWorksBuilder hydrator) {
    GenericRecord id = (GenericRecord) operation.get( "id" );
    Object value = id.get( "value" );
    if ( value instanceof ByteBuffer ) {
      hydrator.addIdAsJavaSerialized( asByteArray( (ByteBuffer) value ) );
    }
    else if ( value instanceof Utf8 ) {
      hydrator.addId( value.toString() );
    }
    else {
      //the rest are serialized objects
      hydrator.addId( (Serializable) value );
    }
  }

  private Map<String, String> getAnalyzers(GenericRecord operation) {
    Map<?,?> analyzersWithUtf8 = (Map<?,?>) operation.get( "fieldToAnalyzerMap" );
    if ( analyzersWithUtf8 == null ) {
      return null;
    }
    Map<String,String> analyzers = new HashMap<String, String>( analyzersWithUtf8.size() );
    for ( Map.Entry<?,?> entry : analyzersWithUtf8.entrySet() ) {
      analyzers.put( entry.getKey().toString(), entry.getValue().toString() );
    }
    return analyzers;
  }

  private void buildLuceneDocument(GenericRecord document, LuceneWorksBuilder hydrator) {
    hydrator.defineDocument( asFloat( document, "boost" ) );
    List<GenericRecord> fieldables = asListOfGenericRecords( document, "fieldables" );
    for ( GenericRecord field : fieldables ) {
      String schema = field.getSchema().getName();
      if ( "CustomFieldable".equals( schema ) ) {
        hydrator.addFieldable( asByteArray( field, "instance" ) );
      }
      else if ( "NumericIntField".equals( schema ) ) {
        hydrator.addIntNumericField(
              asInt( field, "value" ),
              asString( field, "name" ),
              asInt( field, "precisionStep" ),
              asStore( field ),
              asBoolean( field, "indexed" ),
              asFloat( field, "boost" ),
              asBoolean( field, "omitNorms" ),
              asBoolean( field, "omitTermFreqAndPositions" )
        );
      }
      else if ( "NumericFloatField".equals( schema ) ) {
        hydrator.addFloatNumericField(
              asFloat( field, "value" ),
              asString( field, "name" ),
              asInt( field, "precisionStep" ),
              asStore( field ),
              asBoolean( field, "indexed" ),
              asFloat( field, "boost" ),
              asBoolean( field, "omitNorms" ),
              asBoolean( field, "omitTermFreqAndPositions" )
        );
      }
      else if ( "NumericLongField".equals( schema ) ) {
        hydrator.addLongNumericField(
              asLong( field, "value" ),
              asString( field, "name" ),
              asInt( field, "precisionStep" ),
              asStore( field ),
              asBoolean( field, "indexed" ),
              asFloat( field, "boost" ),
              asBoolean( field, "omitNorms" ),
              asBoolean( field, "omitTermFreqAndPositions" )
        );
      }
      else if ( "NumericDoubleField".equals( schema ) ) {
        hydrator.addDoubleNumericField(
              asDouble( field, "value" ),
              asString( field, "name" ),
              asInt( field, "precisionStep" ),
              asStore( field ),
              asBoolean( field, "indexed" ),
              asFloat( field, "boost" ),
              asBoolean( field, "omitNorms" ),
              asBoolean( field, "omitTermFreqAndPositions" )
        );
      }
      else if ( "BinaryField".equals( schema ) ) {
        hydrator.addFieldWithBinaryData(
              asString( field, "name" ),
              asByteArray( field, "value" ),
              asInt( field, "offset" ),
              asInt( field, "length" ),
              asFloat( field, "boost" ),
              asBoolean( field, "omitNorms" ),
              asBoolean( field, "omitTermFreqAndPositions" )
        );
      }
      else if ( "StringField".equals( schema ) ) {
        hydrator.addFieldWithStringData(
            asString( field, "name" ),
            asString( field, "value" ),
            asStore( field ),
            asIndex( field ),
            asTermVector( field ),
            asFloat( field, "boost" ),
            asBoolean( field, "omitNorms" ),
            asBoolean( field, "omitTermFreqAndPositions" )
        );
      }
      else if ( "TokenStreamField".equals( schema ) ) {
        buildAttributes( field, "value", hydrator );
        hydrator.addFieldWithTokenStreamData(
            asString( field, "name" ),
            asTermVector( field ),
            asFloat( field, "boost" ),
            asBoolean( field, "omitNorms" ),
            asBoolean( field, "omitTermFreqAndPositions" )
        );
      }
      else if ( "ReaderField".equals( schema ) ) {
        hydrator.addFieldWithSerializableReaderData(
            asString( field, "name" ),
            asByteArray( field, "value" ),
            asTermVector( field ),
            asFloat( field, "boost" ),
            asBoolean( field, "omitNorms" ),
            asBoolean( field, "omitTermFreqAndPositions" )
        );
      }
      else {
        throw log.cannotDeserializeField( schema );
      }
    }
  }

  private void buildAttributes(GenericRecord record, String field, LuceneWorksBuilder hydrator) {
    List<List<?>> tokens = (List<List<?>>) record.get( field );
    for ( List<?> token : tokens ) {
      for ( Object attribute : token ) {
        buildAttribute( attribute, hydrator );
      }
      hydrator.addToken();
    }
  }

  private void buildAttribute(Object element, LuceneWorksBuilder hydrator) {
    if ( element instanceof GenericRecord ) {
      GenericRecord record = (GenericRecord) element;
      String name = record.getSchema().getName();
      if ( "TokenTrackingAttribute".equals( name ) ) {
        hydrator.addTokenTrackingAttribute( (List<Integer>) record.get( "positions" ) );
      }
      else if ( "CharTermAttribute".equals( name ) ) {
        hydrator.addCharTermAttribute( (CharSequence) record.get( "sequence" ) );
      }
      else if ( "PayloadAttribute".equals( name ) ) {
        hydrator.addPayloadAttribute( asByteArray( record, "payload") );
      }
      else if ( "KeywordAttribute".equals( name ) ) {
        hydrator.addKeywordAttribute( asBoolean( record, "isKeyword") );
      }
      else if ( "PositionIncrementAttribute".equals( name ) ) {
        hydrator.addPositionIncrementAttribute( asInt( record, "positionIncrement") );
      }
      else if ( "FlagsAttribute".equals( name ) ) {
        hydrator.addFlagsAttribute( asInt( record, "flags") );
      }
      else if ( "TypeAttribute".equals( name ) ) {
        hydrator.addTypeAttribute( asString( record, "type") );
      }
      else if ( "OffsetAttribute".equals( name ) ) {
        hydrator.addOffsetAttribute( asInt( record, "startOffset"), asInt( record, "endOffset" ) );
      }
      else {
        log.unknownAttributeSerializedRepresentation( name );
      }
    }
    if ( element instanceof ByteBuffer ) {
      hydrator.addSerializedAttribute( asByteArray( (ByteBuffer) element ) );
    }
    else {
      log.unknownAttributeSerializedRepresentation( element.getClass().getName() );
    }
  }

  private GenericRecord asGenericRecord(GenericRecord operation, String field) {
    return (GenericRecord) operation.get( field );
  }

  private List<GenericRecord> asListOfGenericRecords(GenericRecord result, String field) {
    return (List<GenericRecord>) result.get( field );
  }

  private float asFloat(GenericRecord record, String field) {
    return ( (Float) record.get( field ) ).floatValue();
  }

  private int asInt(GenericRecord record, String field) {
    return ( (Integer) record.get( field ) ).intValue();
  }

  private long asLong(GenericRecord record, String field) {
    return ( (Long) record.get( field ) ).longValue();
  }

  private double asDouble(GenericRecord record, String field) {
    return ( (Double) record.get( field ) ).doubleValue();
  }

  private String asString(GenericRecord record, String field) {
    return record.get( field ).toString();
  }

  private boolean asBoolean(GenericRecord record, String field) {
    return ( (Boolean) record.get( field ) ).booleanValue();
  }

  private SerializableStore asStore(GenericRecord field) {
    String string = field.get( "store" ).toString();
    return SerializableStore.valueOf( string );
  }

  private SerializableIndex asIndex(GenericRecord field) {
    String string = field.get( "index" ).toString();
    return SerializableIndex.valueOf( string );
  }

  private SerializableTermVector asTermVector(GenericRecord field) {
    String string = field.get( "termVector" ).toString();
    return SerializableTermVector.valueOf( string );
  }

  private byte[] asByteArray(GenericRecord operation, String field) {
    ByteBuffer buffer = (ByteBuffer) operation.get( field );
    return asByteArray( buffer );
  }

  private byte[] asByteArray(ByteBuffer buffer) {
    byte[] copy = new byte[buffer.remaining()];
    buffer.get( copy );
    return copy;
  }
}
TOP

Related Classes of org.hibernate.search.indexes.serialization.avro.impl.AvroDeserializer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.