Package org.terrier.structures

Examples of org.terrier.structures.Index$UpdatingCollectionStatistics


  protected Index writeIndexStructure(int[][] postings) throws Exception
  {
    String path = ApplicationSetup.TERRIER_INDEX_PATH;
    String prefix = ApplicationSetup.TERRIER_INDEX_PREFIX;

    Index index = Index.createNewIndex(path, prefix);
    DirectInvertedDocidOnlyOuptutStream dios = new DirectInvertedDocidOnlyOuptutStream(path + '/'+ prefix + ".direct.bf");
    //FSArrayFile<BitIndexPointer>
    DocumentIndexBuilder dib = new DocumentIndexBuilder(index, "document");
    BitIndexPointer p;
    for(int[] list : postings)
    {
      final int doclen = StaTools.sum(list);
      p = dios.writePostings(new ArrayOfIdsIterablePosting(list));
      DocumentIndexEntry die = new BasicDocumentIndexEntry(doclen, p);
      dib.addEntryToBuffer(die);
    }
    dios.close();
    dib.finishedCollections();
    index.addIndexStructure(
        "direct",
        "org.terrier.structures.DirectIndex",
        "org.terrier.structures.Index,java.lang.String,java.lang.Class",
        "index,structureName,"+ BasicIterablePostingDocidOnly.class.getName());
    index.addIndexStructureInputStream(
        "direct",
        "org.terrier.structures.DirectIndexInputStream",
        "org.terrier.structures.Index,java.lang.String,java.lang.Class",
        "index,structureName,"+ BasicIterablePostingDocidOnly.class.getName());
    index.setIndexProperty("index.direct.fields.count", ""+FieldScore.FIELDS_COUNT );
    index.setIndexProperty("index.direct.fields.names", ArrayUtils.join(FieldScore.FIELD_NAMES, ","));
    index.addIndexStructure("document-factory", BasicDocumentIndexEntry.Factory.class.getName(), "", "");
    index.flush();
    DocumentIndex di = index.getDocumentIndex();
    assertNotNull(di);
    assertEquals(postings.length, di.getNumberOfDocuments());
    return index;
  }
View Full Code Here


 
  @Test public void SingleFileSingleSplit() throws Exception
 
    if (! validPlatform()) return;
    final int[][] postings = new int[][]{new int[]{0,1,2,4,8}, new int[]{0,8,10}};
    Index index = writeIndexStructure(postings);
   
    JobConf jc = new JobConf();
    BitPostingIndexInputFormat bpiif = makeInputFormat(jc, index, 0);
   
    InputSplit[] splits = bpiif.getSplits(jc, 1);
View Full Code Here

// 
  @Test public void SingleFileMultipleSplitsTrailing() throws Exception
  {
    if (! validPlatform()) return;
    final int[][] postings = new int[][]{new int[]{100,200,300,400}, new int[]{0,1,2,4,8}, new int[]{0,8,10}};
    Index index = writeIndexStructure(postings);
   
    JobConf jc = new JobConf();
    BitPostingIndexInputFormat bpiif = makeInputFormat(jc, index, 3);
   
    InputSplit[] splits = bpiif.getSplits(jc, 2);
View Full Code Here

TOP

Related Classes of org.terrier.structures.Index$UpdatingCollectionStatistics

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.