Package org.solbase.lucenehbase

Source Code of org.solbase.lucenehbase.CompactedTermDocMetadataArray

package org.solbase.lucenehbase;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.util.concurrent.locks.ReentrantReadWriteLock;

import org.solbase.SolbaseByteArrayInputStream;

public class CompactedTermDocMetadataArray implements Serializable {

  private static final long serialVersionUID = 7086849173840377521L;

  // floor of buffer should be greater than equal to this val. TODO: figure out average term vector byte size is
  private static final int minTermVectorSize = 15 * 5; // in bytes. 15 bytes are average single tv, so times 5 is floor
 
  // ceil of buffer
  private static final int maxTermVectorSize = 15 * 1000; // in bytes. create buffer for 1000 docs
 
  // using percentage to buffer term vector array
  private static final int bufferedPercentage = 5; // percentage
 
  private byte[] termVectorArray;
  private int docAmount = 0;
  public ReentrantReadWriteLock readWriteLock = new ReentrantReadWriteLock();
 
  private int termVectorSize;
 
  public void setTermDocMetadataArray(byte[] termVectorArray) {
    this.termVectorArray = termVectorArray;
  }

  public void setDocAmount(int docAmount) {
    this.docAmount = docAmount;
  }
 
  public int getTermVectorSize(){
    return this.termVectorSize;
  }
 
  public void setTermVectorSize(int termVectorSize){
    this.termVectorSize = termVectorSize;
  }

  public CompactedTermDocMetadataArray(ByteArrayOutputStream bos, int docAmount) {
    termVectorSize = bos.size();
    // make this array buffered
    int bufferedTotal = bufferTermVectorArray(bos);
    bos.write(new byte[bufferedTotal-termVectorSize], 0, bufferedTotal-termVectorSize);
   
    this.termVectorArray = bos.toByteArray();
    this.docAmount = docAmount;
  }
 
  /*
   * @return int - remaining buffered size in byte array
   */
  public int bufferedSize(){
    return this.termVectorArray.length - this.termVectorSize;
  }
 
  /*
   * bufferedTermVectorArray
   *
   * find proper buffered array size given input array
   *
   * @param termVectorArray : initial term vector array
   * @return int : length of buffered total size
   */
  public static int bufferTermVectorArray(ByteArrayOutputStream bos){
    int length = bos.size();
   
    int bufferedSize = (int) Math.floor(length * (bufferedPercentage/100.0));
    if(bufferedSize > maxTermVectorSize){
      // ceil buffer size
      bufferedSize = maxTermVectorSize;
    } else if(bufferedSize < minTermVectorSize){
      // min buffer size
      bufferedSize = minTermVectorSize;
    }
   
    return length + bufferedSize;
  }

  public byte[] getTermDocMetadataArray() {
    return termVectorArray;
  }

  public int getDocAmount() {
    return docAmount;
  }
 
  public SolbaseByteArrayInputStream getTermDocMetadataInputStream() {
    return new SolbaseByteArrayInputStream(termVectorArray, termVectorSize);
  }
 
  public void deleteTermVector(int prevPosition, int currentPosition){
    System.arraycopy(termVectorArray, currentPosition, termVectorArray, prevPosition, termVectorSize-currentPosition);
    // adjust term vector size
    termVectorSize = termVectorSize - (currentPosition - prevPosition);
  }
 
  public void updateTermVector(int prevPosition, int currentPosition, byte[] newTdm){
    // prev position is the doc we are trying to update here
    int newSize = newTdm.length;
    int prevSize = currentPosition - prevPosition;
   
    if(prevSize == newTdm.length){
      // update doc is same as previous doc
      System.arraycopy(newTdm, 0, termVectorArray, prevPosition, newTdm.length);
    } else {
      // we need to find difference and move each elements of array accordingly
      if(newSize > prevSize){
        // new size is bigger than previous size
        int diff = newSize - prevSize;
       
        // move over remaining term docs
        System.arraycopy(termVectorArray, currentPosition, termVectorArray, currentPosition+diff, termVectorSize-currentPosition);
        // copy updated term doc
        System.arraycopy(newTdm, 0, termVectorArray, prevPosition, newTdm.length);

        termVectorSize += diff;
      } else {
        // new size is smaller than previous size
        int diff = prevSize - newSize;
       
        // copy updated term doc into term vector array
        System.arraycopy(newTdm, 0, termVectorArray, prevPosition, newTdm.length);
        // move remaining tdm's over
        System.arraycopy(termVectorArray, currentPosition, termVectorArray, prevPosition+newSize, termVectorSize-currentPosition);
       
        termVectorSize -= diff;
      }
    }
  }
 
  public void addTermVector(int prevPosition, int currentPosition, byte[] newTdm){
    // move over remaining bytes
    System.arraycopy(termVectorArray, prevPosition, termVectorArray, prevPosition+newTdm.length, termVectorSize-prevPosition);
    // copy new term doc into term vector array
    System.arraycopy(newTdm, 0, termVectorArray, prevPosition, newTdm.length);
   
    termVectorSize += newTdm.length;
  }
 
  private void writeObject(ObjectOutputStream out) throws IOException {
    /*
    out.write(SolbaseUtil.writeVInt(termVectorArray.length));
    for (TermDocMetadata tdm : termVectorArray) {
      out.write(SolbaseUtil.writeVInt(tdm.getDocId()));
      out.write(Bytes.toBytes(tdm.serialize()));
    }
    */
  }
 
  private void readObject(ObjectInputStream in) throws IOException{
    /*
    int arrayLength = SolbaseUtil.mreadVInt(in);
    termVectorArray = new TermDocMetadata[arrayLength];
   
    for (int i = 0; i < arrayLength; i++) {
      termVectorArray[i] = TermDocMetadataFactory.create(in);
    }
    */
  }
TOP

Related Classes of org.solbase.lucenehbase.CompactedTermDocMetadataArray

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.