package org.solbase.lucenehbase;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import org.solbase.SolbaseByteArrayInputStream;
public class CompactedTermDocMetadataArray implements Serializable {
private static final long serialVersionUID = 7086849173840377521L;
// floor of buffer should be greater than equal to this val. TODO: figure out average term vector byte size is
private static final int minTermVectorSize = 15 * 5; // in bytes. 15 bytes are average single tv, so times 5 is floor
// ceil of buffer
private static final int maxTermVectorSize = 15 * 1000; // in bytes. create buffer for 1000 docs
// using percentage to buffer term vector array
private static final int bufferedPercentage = 5; // percentage
private byte[] termVectorArray;
private int docAmount = 0;
public ReentrantReadWriteLock readWriteLock = new ReentrantReadWriteLock();
private int termVectorSize;
public void setTermDocMetadataArray(byte[] termVectorArray) {
this.termVectorArray = termVectorArray;
}
public void setDocAmount(int docAmount) {
this.docAmount = docAmount;
}
public int getTermVectorSize(){
return this.termVectorSize;
}
public void setTermVectorSize(int termVectorSize){
this.termVectorSize = termVectorSize;
}
public CompactedTermDocMetadataArray(ByteArrayOutputStream bos, int docAmount) {
termVectorSize = bos.size();
// make this array buffered
int bufferedTotal = bufferTermVectorArray(bos);
bos.write(new byte[bufferedTotal-termVectorSize], 0, bufferedTotal-termVectorSize);
this.termVectorArray = bos.toByteArray();
this.docAmount = docAmount;
}
/*
* @return int - remaining buffered size in byte array
*/
public int bufferedSize(){
return this.termVectorArray.length - this.termVectorSize;
}
/*
* bufferedTermVectorArray
*
* find proper buffered array size given input array
*
* @param termVectorArray : initial term vector array
* @return int : length of buffered total size
*/
public static int bufferTermVectorArray(ByteArrayOutputStream bos){
int length = bos.size();
int bufferedSize = (int) Math.floor(length * (bufferedPercentage/100.0));
if(bufferedSize > maxTermVectorSize){
// ceil buffer size
bufferedSize = maxTermVectorSize;
} else if(bufferedSize < minTermVectorSize){
// min buffer size
bufferedSize = minTermVectorSize;
}
return length + bufferedSize;
}
public byte[] getTermDocMetadataArray() {
return termVectorArray;
}
public int getDocAmount() {
return docAmount;
}
public SolbaseByteArrayInputStream getTermDocMetadataInputStream() {
return new SolbaseByteArrayInputStream(termVectorArray, termVectorSize);
}
public void deleteTermVector(int prevPosition, int currentPosition){
System.arraycopy(termVectorArray, currentPosition, termVectorArray, prevPosition, termVectorSize-currentPosition);
// adjust term vector size
termVectorSize = termVectorSize - (currentPosition - prevPosition);
}
public void updateTermVector(int prevPosition, int currentPosition, byte[] newTdm){
// prev position is the doc we are trying to update here
int newSize = newTdm.length;
int prevSize = currentPosition - prevPosition;
if(prevSize == newTdm.length){
// update doc is same as previous doc
System.arraycopy(newTdm, 0, termVectorArray, prevPosition, newTdm.length);
} else {
// we need to find difference and move each elements of array accordingly
if(newSize > prevSize){
// new size is bigger than previous size
int diff = newSize - prevSize;
// move over remaining term docs
System.arraycopy(termVectorArray, currentPosition, termVectorArray, currentPosition+diff, termVectorSize-currentPosition);
// copy updated term doc
System.arraycopy(newTdm, 0, termVectorArray, prevPosition, newTdm.length);
termVectorSize += diff;
} else {
// new size is smaller than previous size
int diff = prevSize - newSize;
// copy updated term doc into term vector array
System.arraycopy(newTdm, 0, termVectorArray, prevPosition, newTdm.length);
// move remaining tdm's over
System.arraycopy(termVectorArray, currentPosition, termVectorArray, prevPosition+newSize, termVectorSize-currentPosition);
termVectorSize -= diff;
}
}
}
public void addTermVector(int prevPosition, int currentPosition, byte[] newTdm){
// move over remaining bytes
System.arraycopy(termVectorArray, prevPosition, termVectorArray, prevPosition+newTdm.length, termVectorSize-prevPosition);
// copy new term doc into term vector array
System.arraycopy(newTdm, 0, termVectorArray, prevPosition, newTdm.length);
termVectorSize += newTdm.length;
}
private void writeObject(ObjectOutputStream out) throws IOException {
/*
out.write(SolbaseUtil.writeVInt(termVectorArray.length));
for (TermDocMetadata tdm : termVectorArray) {
out.write(SolbaseUtil.writeVInt(tdm.getDocId()));
out.write(Bytes.toBytes(tdm.serialize()));
}
*/
}
private void readObject(ObjectInputStream in) throws IOException{
/*
int arrayLength = SolbaseUtil.mreadVInt(in);
termVectorArray = new TermDocMetadata[arrayLength];
for (int i = 0; i < arrayLength; i++) {
termVectorArray[i] = TermDocMetadataFactory.create(in);
}
*/
}
}