Package org.terrier.compression

Examples of org.terrier.compression.BitIn


    for(int i=0;i<fieldCount+3;i++)
      documentTerms[i] = new int[df];
    final TIntArrayList blockids = new TIntArrayList(df); //ideally we'd have TF here

    try{
      final BitIn file = this.file[pointer.getFileNumber()].readReset(startOffset, startBitOffset);
 
      if (loadTagInformation) { //if there are tag information to process
        //documentTerms[2] = new int[df];
        documentTerms[0][0] = file.readGamma() - 1;       
        documentTerms[1][0] = file.readUnary();
        for(int fi=0;fi < fieldCount;fi++)
          documentTerms[2+fi][0] = file.readUnary() -1;
        int blockfreq = documentTerms[2+fieldCount][0] = file.readUnary() - DocumentBlockCountDelta;
        int tmpBlocks[] = new int[blockfreq];
        int previousBlockId = -1;
        for(int j=0;j<blockfreq;j++)
        {
          tmpBlocks[j] = previousBlockId = file.readGamma() + previousBlockId;
        }
        blockids.add(tmpBlocks);
       
        for (int i = 1; i < df; i++) {         
          documentTerms[0][i= file.readGamma() + documentTerms[0][i - 1];
          documentTerms[1][i= file.readUnary();
          for(int fi=0;fi < fieldCount;fi++)
            documentTerms[2+fi][0] = file.readUnary() -1;
          blockfreq = documentTerms[2+fieldCount][i] = file.readUnary() - DocumentBlockCountDelta;
          tmpBlocks = new int[blockfreq];
          previousBlockId = -1;
          for(int j=0;j<blockfreq;j++)
          {
            tmpBlocks[j] = previousBlockId = file.readGamma() + previousBlockId;
          }
          blockids.add(tmpBlocks);
        }
      } else { //no tag information to process         
       
        documentTerms[0][0] = file.readGamma() - 1;
        documentTerms[1][0] = file.readUnary();
       
        int blockfreq = documentTerms[2][0] = file.readUnary() - DocumentBlockCountDelta;
        int tmpBlocks[] = new int[blockfreq];
        int previousBlockId = -1;
        for(int j=0;j<blockfreq;j++)
        {
          tmpBlocks[j] = previousBlockId = file.readGamma() + previousBlockId;
        }
        blockids.add(tmpBlocks);
       
        for (int i = 1; i < df; i++) {         
          documentTerms[0][i= file.readGamma() + documentTerms[0][i - 1];
          documentTerms[1][i= file.readUnary();

          blockfreq = documentTerms[2][i] = file.readUnary() - DocumentBlockCountDelta;
          tmpBlocks = new int[blockfreq];
          previousBlockId = -1;
          for(int j=0;j<blockfreq;j++)
          {
            tmpBlocks[j] = previousBlockId = file.readGamma() + previousBlockId;
          }
          blockids.add(tmpBlocks);
        }
      }
      documentTerms[documentTerms.length-1] = blockids.toNativeArray();
View Full Code Here


    if (pointer==null)
      return null;
    final boolean loadTagInformation = FieldScore.USE_FIELD_INFORMATION;
    final int count = pointer.getNumberOfEntries();
    try{
      final BitIn file = this.file[pointer.getFileNumber()].readReset(pointer.getOffset(), pointer.getOffsetBits());
      int[][] documentTerms = null;
      if (loadTagInformation) { //if there are tag information to process     
        documentTerms = new int[2+fieldCount][count];
        documentTerms[0][0] = file.readGamma() - 1;
        documentTerms[1][0] = file.readUnary();
        for (int f = 0; f < fieldCount; f++) {
          documentTerms[2+f][0] = file.readUnary() - 1;
        }
       
        for (int i = 1; i < count; i++) {         
          documentTerms[0][i] = file.readGamma() + documentTerms[0][i - 1];
          documentTerms[1][i] = file.readUnary();
          for (int f = 0; f < fieldCount; f++) {
            documentTerms[2+f][i] = file.readUnary() - 1;
          }
        }       
      } else { //no tag information to process         
        documentTerms = new int[2][count];
        //new   
        documentTerms[0][0] = file.readGamma() - 1;
        documentTerms[1][0] = file.readUnary();
        for(int i = 1; i < count; i++){              
          documentTerms[0][i] = file.readGamma() + documentTerms[0][i - 1];
          documentTerms[1][i] = file.readUnary();
        }
      }
      file.close();
      return documentTerms;
    } catch (IOException ioe) {
      logger.error("Problem reading inverted index", ioe);
      return null;
    }
View Full Code Here

  /**
   * {@inheritDoc}
   */
  public IterablePosting getPostings(BitIndexPointer pointer) throws IOException
  {
    final BitIn _file = this.file[pointer.getFileNumber()].readReset(pointer.getOffset(), pointer.getOffsetBits());
    IterablePosting rtr = null;
   
    DocumentIndex fixedDi = pointer instanceof DocumentIndexEntry
      ? new DocidSpecificDocumentIndex(index.getDocumentIndex(), (DocumentIndexEntry)pointer)
      : null;
View Full Code Here

    throw new UnsupportedOperationException("InvIndex.print() is missing. Use IndexUtil instead.");
  }
 
  @Override
  public IterablePosting getPostings(BitIndexPointer pointer) throws IOException {
    final BitIn _file = this.file[pointer.getFileNumber()].readReset(pointer.getOffset(), pointer.getOffsetBits());
    IterablePosting rtr = null;
    try{
      rtr = (fieldCount > 0)
        ? postingConstructor.newInstance(_file, pointer.getNumberOfEntries(), doi, fieldCount)
        : postingConstructor.newInstance(_file, pointer.getNumberOfEntries(), doi);
View Full Code Here

    if (pointer==null)
      return null;
    final boolean loadTagInformation = FieldScore.USE_FIELD_INFORMATION;
    final int count = pointer.getNumberOfEntries();
    try{
      final BitIn file = this.file[pointer.getFileNumber()].readReset(pointer.getOffset(), pointer.getOffsetBits());
      int[][] documentTerms = null;
      if (loadTagInformation) { //if there are tag information to process     
        documentTerms = new int[2+fieldCount][count];
        documentTerms[0][0] = file.readGamma() - 1;
        documentTerms[1][0] = file.readUnary();
        for (int f = 0; f < fieldCount; f++) {
          documentTerms[2+f][0] = file.readUnary() - 1;
        }       

        for (int i = 1; i < count; i++) {         
          documentTerms[0][i= file.readGamma() + documentTerms[0][i - 1];
          documentTerms[1][i= file.readUnary();
          for (int f = 0; f < fieldCount; f++) {
            documentTerms[2+f][i] = file.readUnary() - 1;
          }
        }       
      } else { //no tag information to process         
        documentTerms = new int[2][count];
        //new   
        documentTerms[0][0] = file.readGamma() - 1;
        documentTerms[1][0] = file.readUnary();
        for(int i = 1; i < count; i++){              
          documentTerms[0][i] = file.readGamma() + documentTerms[0][i - 1];
          documentTerms[1][i] = file.readUnary();
        }
      }
      file.close();
      return documentTerms;
    } catch (IOException ioe) {
      logger.error("Problem reading inverted index", ioe);
      return null;
    }
View Full Code Here

    for(int i=0;i<fieldCount+3;i++)
      documentTerms[i] = new int[df];
    final TIntArrayList blockids = new TIntArrayList(df); //ideally we'd have TF here

    try{
      final BitIn file = this.file[pointer.getFileNumber()].readReset(startOffset, startBitOffset);
 
      if (loadTagInformation) { //if there are tag information to process
        //documentTerms[2] = new int[df];
        documentTerms[0][0] = file.readGamma() - 1;       
        documentTerms[1][0] = file.readUnary();
        for(int fi=0;fi < fieldCount;fi++)
          documentTerms[2+fi][0] = file.readUnary() -1;
        int blockfreq = documentTerms[2+fieldCount][0] = file.readUnary() - DocumentBlockCountDelta;
        int tmpBlocks[] = new int[blockfreq];
        int previousBlockId = -1;
        for(int j=0;j<blockfreq;j++)
        {
          tmpBlocks[j] = previousBlockId = file.readGamma() + previousBlockId;
        }
        blockids.add(tmpBlocks);
       
        for (int i = 1; i < df; i++) {         
          documentTerms[0][i= file.readGamma() + documentTerms[0][i - 1];
          documentTerms[1][i= file.readUnary();
          for(int fi=0;fi < fieldCount;fi++)
            documentTerms[2+fi][0] = file.readUnary() -1;
          blockfreq = documentTerms[2+fieldCount][i] = file.readUnary() - DocumentBlockCountDelta;
          tmpBlocks = new int[blockfreq];
          previousBlockId = -1;
          for(int j=0;j<blockfreq;j++)
          {
            tmpBlocks[j] = previousBlockId = file.readGamma() + previousBlockId;
          }
          blockids.add(tmpBlocks);
        }
      } else { //no tag information to process         
       
        documentTerms[0][0] = file.readGamma() - 1;
        documentTerms[1][0] = file.readUnary();
       
        int blockfreq = documentTerms[2][0] = file.readUnary() - DocumentBlockCountDelta;
        int tmpBlocks[] = new int[blockfreq];
        int previousBlockId = -1;
        for(int j=0;j<blockfreq;j++)
        {
          tmpBlocks[j] = previousBlockId = file.readGamma() + previousBlockId;
        }
        blockids.add(tmpBlocks);
       
        for (int i = 1; i < df; i++) {         
          documentTerms[0][i= file.readGamma() + documentTerms[0][i - 1];
          documentTerms[1][i= file.readUnary();

          blockfreq = documentTerms[2][i] = file.readUnary() - DocumentBlockCountDelta;
          tmpBlocks = new int[blockfreq];
          previousBlockId = -1;
          for(int j=0;j<blockfreq;j++)
          {
            tmpBlocks[j] = previousBlockId = file.readGamma() + previousBlockId;
          }
          blockids.add(tmpBlocks);
        }
      }
      documentTerms[documentTerms.length-1] = blockids.toNativeArray();
View Full Code Here

TOP

Related Classes of org.terrier.compression.BitIn

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.