Package org.terrier.structures.indexing

Examples of org.terrier.structures.indexing.CompressingMetaIndexBuilder$KeyedPartitioner


  protected MetaIndexBuilder createMetaIndexBuilder()
  {
    final String[] forwardMetaKeys = ApplicationSetup.getProperty("indexer.meta.forward.keys", "docno").split("\\s*,\\s*");
    final int[] metaKeyLengths = parseInts(ApplicationSetup.getProperty("indexer.meta.forward.keylens", "20").split("\\s*,\\s*"));
    //no reverse metadata during main indexing, pick up as separate job later
    return new CompressingMetaIndexBuilder(currentIndex, forwardMetaKeys, metaKeyLengths, new String[0]);
  }
View Full Code Here


      final String[] metaTags = ArrayUtils.parseCommaDelimitedString(srcIndex1.getIndexProperty("index.meta.key-names", "docno"));
      final int[] metaTagLengths = ArrayUtils.parseCommaDelimitedInts(srcIndex1.getIndexProperty("index.meta.value-lengths", "20"));
      final String[] metaReverseTags = MetaReverse
        ? ArrayUtils.parseCommaDelimitedString(srcIndex1.getIndexProperty("index.meta.reverse-key-names", "docno"))
        : new String[0];
      final MetaIndexBuilder metaBuilder = new CompressingMetaIndexBuilder(destIndex, metaTags, metaTagLengths, metaReverseTags);
   
      if (! srcIndex1.getIndexProperty("index.meta.key-names", "docno").equals(srcIndex2.getIndexProperty("index.meta.key-names", "docno")))
      {
        throw new Error("Meta fields in source indices must match");
      }
      final BitIndexPointer emptyPointer = new SimpleBitIndexPointer();
     
       
      final int srcFieldCount1 = srcIndex1.getIntIndexProperty("index.direct.fields.count", 0);
      final int srcFieldCount2 = srcIndex1.getIntIndexProperty("index.direct.fields.count", 0);
      if (srcFieldCount1 != srcFieldCount2)
      {
        throw new Error("FieldCounts in source indices must match");
      }
     
      final int fieldCount = srcFieldCount1;
     
     
      for(String property : new String[] {"index.direct.fields.names","index.direct.fields.count" } )
      {
        destIndex.setIndexProperty(property, srcIndex1.getIndexProperty(property, null));
      }
     
      DirectInvertedOutputStream dfOutput = null;
      try{
        dfOutput =
          (fieldCount > 0 ? fieldDirectFileOutputStreamClass : directFileOutputStreamClass)
          .getConstructor(String.class)
          .newInstance(destIndex.getPath() + ApplicationSetup.FILE_SEPARATOR + 
                destIndex.getPrefix() + ".direct" + BitIn.USUAL_EXTENSION);
      } catch (Exception e) {
        logger.error("Couldn't create specified DirectInvertedOutputStream", e);
        return;
      }
     
     
      final Iterator<DocumentIndexEntry> docidInput1 = (Iterator<DocumentIndexEntry>)srcIndex1.getIndexStructureInputStream("document");
      final PostingIndexInputStream dfInput1 = (PostingIndexInputStream)srcIndex1.getIndexStructureInputStream("direct");
      final MetaIndex metaInput1 = srcIndex1.getMetaIndex();
     
      int sourceDocid = 0;
      //traversing the direct index, without any change
      while(docidInput1.hasNext())
      {
        BitIndexPointer pointerDF = emptyPointer;
        DocumentIndexEntry die = docidInput1.next();
        if (die.getDocumentLength() > 0)
        {
          pointerDF = dfOutput.writePostings(dfInput1.next());
        }
        die.setBitIndexPointer(pointerDF);
        docidOutput.addEntryToBuffer(die);
        metaBuilder.writeDocumentEntry(metaInput1.getAllItems(sourceDocid));
        sourceDocid++;
      }
      dfInput1.close();
      metaInput1.close();
      IndexUtil.close(docidInput1);
      final Iterator<DocumentIndexEntry> docidInput2 = (Iterator<DocumentIndexEntry>)srcIndex2.getIndexStructureInputStream("document");
      final PostingIndexInputStream dfInput2 = (PostingIndexInputStream)srcIndex2.getIndexStructureInputStream("direct");
      final MetaIndex metaInput2 = srcIndex2.getMetaIndex();
     
      sourceDocid = 0;
      while (docidInput2.hasNext())
      {
        DocumentIndexEntry die = docidInput2.next();
     
        BitIndexPointer pointerDF = emptyPointer;
        if (die.getDocumentLength() > 0)
        {
          final IterablePosting postings = dfInput2.next();
         
          List<Posting> postingList = new ArrayList<Posting>();
          while(postings.next() != IterablePosting.EOL)
          {
            final Posting p = postings.asWritablePosting();
            p.setId(termcodeHashmap.get(postings.getId()));
            postingList.add(p);
          }
          Collections.sort(postingList, new PostingIdComparator());
          pointerDF = dfOutput.writePostings(postingList.iterator());
        }
        die.setBitIndexPointer(pointerDF);
        docidOutput.addEntryToBuffer(die);
        metaBuilder.writeDocumentEntry(metaInput2.getAllItems(sourceDocid));
        sourceDocid++;
      }
      dfInput2.close();
      IndexUtil.close(docidInput2);
      metaInput2.close();
     
      metaBuilder.close();
      dfOutput.close();
      docidOutput.finishedCollections();
      docidOutput.close();

      destIndex.addIndexStructure(
View Full Code Here

      final String[] metaTags = ArrayUtils.parseCommaDelimitedString(srcIndex1.getIndexProperty("index.meta.key-names", "docno"));
      final int[] metaTagLengths = ArrayUtils.parseCommaDelimitedInts(srcIndex1.getIndexProperty("index.meta.value-lengths", "20"));
      final String[] metaReverseTags = MetaReverse
        ? ArrayUtils.parseCommaDelimitedString(srcIndex1.getIndexProperty("index.meta.reverse-key-names", "docno"))
        : new String[0];
      final MetaIndexBuilder metaBuilder = new CompressingMetaIndexBuilder(destIndex, metaTags, metaTagLengths, metaReverseTags);
   
      if (! srcIndex1.getIndexProperty("index.meta.key-names", "docno").equals(srcIndex2.getIndexProperty("index.meta.key-names", "docno")))
      {
        throw new Error("Meta fields in source indices must match");
      }
     
      //opening the first set of files.
      final Iterator<DocumentIndexEntry> docidInput1 = (Iterator<DocumentIndexEntry>)srcIndex1.getIndexStructureInputStream("document");
      final Iterator<String[]> metaInput1 = (Iterator<String[]>)srcIndex1.getIndexStructureInputStream("meta");
     
      int srcFieldCount1 = srcIndex1.getIntIndexProperty("index.inverted.fields.count", 0);
      int srcFieldCount2 = srcIndex2.getIntIndexProperty("index.inverted.fields.count", 0);
      if (srcFieldCount1 != srcFieldCount2)
      {
        throw new Error("FieldCounts in source indices must match");
      }
      if (srcIndex1.getIndexProperty("index.document-factory.class", "").equals("org.terrier.structures.SimpleDocumentIndexEntry$Factory")
        || srcIndex1.getIndexProperty("index.document-factory.class", "").equals("org.terrier.structures.BasicDocumentIndexEntry$Factory"))
      {
        //for some reason, the source document index has not fields. so we shouldn't assume that fields are being used.
        srcFieldCount1 = 0;
      }
      final int fieldCount = srcFieldCount1;
     
      //traversing the first set of files, without any change
      while(docidInput1.hasNext())
      {
        metaInput1.hasNext();
        DocumentIndexEntry die = docidInput1.next();
        DocumentIndexEntry dieNew = (fieldCount > 0) ? die : new SimpleDocumentIndexEntry(die);
        docidOutput.addEntryToBuffer(dieNew);
        metaBuilder.writeDocumentEntry(metaInput1.next());
      }
     
      final Iterator<DocumentIndexEntry> docidInput2 = (Iterator<DocumentIndexEntry>)srcIndex2.getIndexStructureInputStream("document");
      final Iterator<String[]> metaInput2 = (Iterator<String[]>)srcIndex2.getIndexStructureInputStream("meta");
      //traversing the 2nd set of files, without any change
      while(docidInput2.hasNext())
      {
        metaInput2.hasNext();
        DocumentIndexEntry die = docidInput2.next();
        DocumentIndexEntry dieNew = (fieldCount > 0) ? die : new SimpleDocumentIndexEntry(die);
        docidOutput.addEntryToBuffer(dieNew);
        metaBuilder.writeDocumentEntry(metaInput2.next());
      }
     
      docidOutput.finishedCollections();
      docidOutput.close();
      metaBuilder.close();
      IndexUtil.close(docidInput1);
      IndexUtil.close(docidInput2);
      //destIndex.setIndexProperty("index.inverted.fields.count", ""+ fieldCount);
      if (fieldCount > 0)
      {
View Full Code Here

  protected MetaIndexBuilder createMetaIndexBuilder()
  {
    final String[] forwardMetaKeys = ApplicationSetup.getProperty("indexer.meta.forward.keys", "docno").split("\\s*,\\s*");
    final int[] metaKeyLengths = parseInts(ApplicationSetup.getProperty("indexer.meta.forward.keylens", "20").split("\\s*,\\s*"));
    final String[] reverseMetaKeys = ApplicationSetup.getProperty("indexer.meta.reverse.keys", "docno").split("\\s*,\\s*");
    return new CompressingMetaIndexBuilder(currentIndex, forwardMetaKeys, metaKeyLengths, reverseMetaKeys);
  }
View Full Code Here

 
  @Test
  public void testNumKeysConfigurationMismatch()
  {
    exception.expect(IllegalArgumentException.class);
    new CompressingMetaIndexBuilder(
        null, new String[]{"docno"}, new int[0], new String[0]);
  }
View Full Code Here

  @Test
  public void testKeysSubsetConfigurationMismatch()
  {
    exception.expect(IllegalArgumentException.class);
    new CompressingMetaIndexBuilder(
        Index.createNewIndex(ApplicationSetup.TERRIER_INDEX_PATH, ApplicationSetup.TERRIER_INDEX_PREFIX),
        new String[]{"docno"}, new int[]{20}, new String[]{"url"});
  }
View Full Code Here

 
  protected void testBase(String name, String[] keyNames, int[] keyLengths, String[] revKeys, String[][] data) throws Exception
  {
    Index index = Index.createNewIndex(ApplicationSetup.TERRIER_INDEX_PATH, ApplicationSetup.TERRIER_INDEX_PREFIX);
    assertNotNull("Index should not be null", index);
    MetaIndexBuilder b = new CompressingMetaIndexBuilder(index, name,
        keyNames, keyLengths, revKeys);
    assertNotNull(b);
    Set<String> rev = new HashSet<String>();
    for(String revKey : revKeys)
    {
      rev.add(revKey);
    }
   
    for(String[] dataOne : data)
    {
      b.writeDocumentEntry(dataOne);
    }
    b.close();
    b = null;
    finishedCreatingMeta(index, name);
    //index.close();  Index.createIndex("/tmp", "test");
   
    int offset = 0;
View Full Code Here

TOP

Related Classes of org.terrier.structures.indexing.CompressingMetaIndexBuilder$KeyedPartitioner

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.