Package org.terrier.compression

Examples of org.terrier.compression.MemorySBOS


   * @param freq the frequency of the term in the document.
   * @return the number of bytes consumed in the buffer
   * @throws IOException if an I/O error ocurrs.
   */ 
  public int writeFirstDoc(int docId, int freq) throws IOException{   
    docIds = new MemorySBOS();
    TF = freq;     
    Nt = 1;
    //System.err.println("Writing docid="+ (docId+1) + " f=" + freq);
    docIds.writeGamma(docId + 1);
    docIds.writeGamma(freq);
View Full Code Here


  /** Write the posting to the output collector
   */
  @Override
  public void writeTerm(final String term, final Posting post) throws IOException
 
    final MemorySBOS Docs = post.getDocs();
    Docs.pad();
    //get the posting array buffer
    byte[] buffer = new byte[Docs.getMOS().getPos()+1];
    System.arraycopy(Docs.getMOS().getBuffer(), 0,
        buffer, 0,
        Math.min(Docs.getMOS().getBuffer().length, Docs.getMOS().getPos()+1));
   
    //emit the term and its posting list
    outputCollector.collect(
        SplitEmittedTerm.createNewTerm(term, splitId, flushNo),
        MapEmittedPostingList.create_Hadoop_WritableRunPostingData(
View Full Code Here

         
          //if the document is non-empty
          if (p.getDocF() > 0)
          {         
            //obtain the compressed memory posting list
            final MemorySBOS Docs = p.getDocs();
            //some obscure problem when reading from memory rather than disk.
            //by padding the posting list with some non zero bytes the problem
            //is solved. Thanks to Roicho for working this one out.
            Docs.writeGamma(1);
            Docs.writeGamma(1);
            Docs.pad();
         
            //use a PostingInRun to decompress the postings stored in memory
            final PostingInRun pir = getPostingReader();
            pir.setDf(p.getDocF());
            pir.setTF(p.getTF());
            pir.setPostingSource(new BitInputStream(new ByteArrayInputStream(
              Docs.getMOS().getBuffer())));
            //System.err.println("temp compressed buffer size="+Docs.getMOS().getPos() + " length="+Docs.getMOS().getBuffer().length);
            //decompress the memory postings and write out to the direct file
            pir.append(bos, -1);
          }
View Full Code Here

  public void writeTerm(final String term, final Posting post) throws IOException{   
    stringDos.writeUTF(term);
    bos.writeGamma(post.getDocF());
    bos.writeGamma(post.getTF());
    //System.err.println("Writing "+term + " TF="+post.getTF()+ " Nt="+post.getDocF());
    final MemorySBOS Docs = post.getDocs();
    Docs.pad();
    /* when reading, ie RunReader and it's children classes
     * an align call is required here. */
    bos.append(Docs.getMOS().getBuffer(), Docs.getMOS().getPos());
  }
View Full Code Here

TOP

Related Classes of org.terrier.compression.MemorySBOS

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.