Package org.terrier.structures.indexing

Examples of org.terrier.structures.indexing.MetaIndexBuilder


  @SuppressWarnings("unchecked")
  protected void mergeDocumentIndex(Index[] src) throws IOException
  {
    //logger.info("Merging document and meta indices");
    final DocumentIndexBuilder docidOutput = new DocumentIndexBuilder(currentIndex, "document");
    final MetaIndexBuilder metaBuilder = this.createMetaIndexBuilder();
    int i_index = 0;
    int docCount =-1;
    for (Index srcIndex: src)
    {
      final Iterator<DocumentIndexEntry> docidInput = (Iterator<DocumentIndexEntry>)srcIndex.getIndexStructureInputStream("document");
      final Iterator<String[]> metaInput1 = (Iterator<String[]>)srcIndex.getIndexStructureInputStream("meta");
        while (docidInput.hasNext())
      {
        docCount++;
        docidOutput.addEntryToBuffer(docidInput.next());
            metaBuilder.writeDocumentEntry(metaInput1.next());
            this.lastReporter.progress();
      }
        IndexUtil.close(docidInput);
        IndexUtil.close(metaInput1);
        i_index++;
    }
    metaBuilder.close();
    docidOutput.finishedCollections();
    if (FieldScore.FIELDS_COUNT > 0)
    {
      currentIndex.addIndexStructure("document-factory", FieldDocumentIndexEntry.Factory.class.getName(), "java.lang.String", "${index.inverted.fields.count}");
    }
View Full Code Here


      final String[] metaTags = ArrayUtils.parseCommaDelimitedString(srcIndex1.getIndexProperty("index.meta.key-names", "docno"));
      final int[] metaTagLengths = ArrayUtils.parseCommaDelimitedInts(srcIndex1.getIndexProperty("index.meta.value-lengths", "20"));
      final String[] metaReverseTags = MetaReverse
        ? ArrayUtils.parseCommaDelimitedString(srcIndex1.getIndexProperty("index.meta.reverse-key-names", "docno"))
        : new String[0];
      final MetaIndexBuilder metaBuilder = new CompressingMetaIndexBuilder(destIndex, metaTags, metaTagLengths, metaReverseTags);
   
      if (! srcIndex1.getIndexProperty("index.meta.key-names", "docno").equals(srcIndex2.getIndexProperty("index.meta.key-names", "docno")))
      {
        throw new Error("Meta fields in source indices must match");
      }
      final BitIndexPointer emptyPointer = new SimpleBitIndexPointer();
     
       
      final int srcFieldCount1 = srcIndex1.getIntIndexProperty("index.direct.fields.count", 0);
      final int srcFieldCount2 = srcIndex1.getIntIndexProperty("index.direct.fields.count", 0);
      if (srcFieldCount1 != srcFieldCount2)
      {
        throw new Error("FieldCounts in source indices must match");
      }
     
      final int fieldCount = srcFieldCount1;
     
     
      for(String property : new String[] {"index.direct.fields.names","index.direct.fields.count" } )
      {
        destIndex.setIndexProperty(property, srcIndex1.getIndexProperty(property, null));
      }
     
      DirectInvertedOutputStream dfOutput = null;
      try{
        dfOutput =
          (fieldCount > 0 ? fieldDirectFileOutputStreamClass : directFileOutputStreamClass)
          .getConstructor(String.class)
          .newInstance(destIndex.getPath() + ApplicationSetup.FILE_SEPARATOR + 
                destIndex.getPrefix() + ".direct" + BitIn.USUAL_EXTENSION);
      } catch (Exception e) {
        logger.error("Couldn't create specified DirectInvertedOutputStream", e);
        return;
      }
     
     
      final Iterator<DocumentIndexEntry> docidInput1 = (Iterator<DocumentIndexEntry>)srcIndex1.getIndexStructureInputStream("document");
      final PostingIndexInputStream dfInput1 = (PostingIndexInputStream)srcIndex1.getIndexStructureInputStream("direct");
      final MetaIndex metaInput1 = srcIndex1.getMetaIndex();
     
      int sourceDocid = 0;
      //traversing the direct index, without any change
      while(docidInput1.hasNext())
      {
        BitIndexPointer pointerDF = emptyPointer;
        DocumentIndexEntry die = docidInput1.next();
        if (die.getDocumentLength() > 0)
        {
          pointerDF = dfOutput.writePostings(dfInput1.next());
        }
        die.setBitIndexPointer(pointerDF);
        docidOutput.addEntryToBuffer(die);
        metaBuilder.writeDocumentEntry(metaInput1.getAllItems(sourceDocid));
        sourceDocid++;
      }
      dfInput1.close();
      metaInput1.close();
      IndexUtil.close(docidInput1);
      final Iterator<DocumentIndexEntry> docidInput2 = (Iterator<DocumentIndexEntry>)srcIndex2.getIndexStructureInputStream("document");
      final PostingIndexInputStream dfInput2 = (PostingIndexInputStream)srcIndex2.getIndexStructureInputStream("direct");
      final MetaIndex metaInput2 = srcIndex2.getMetaIndex();
     
      sourceDocid = 0;
      while (docidInput2.hasNext())
      {
        DocumentIndexEntry die = docidInput2.next();
     
        BitIndexPointer pointerDF = emptyPointer;
        if (die.getDocumentLength() > 0)
        {
          final IterablePosting postings = dfInput2.next();
         
          List<Posting> postingList = new ArrayList<Posting>();
          while(postings.next() != IterablePosting.EOL)
          {
            final Posting p = postings.asWritablePosting();
            p.setId(termcodeHashmap.get(postings.getId()));
            postingList.add(p);
          }
          Collections.sort(postingList, new PostingIdComparator());
          pointerDF = dfOutput.writePostings(postingList.iterator());
        }
        die.setBitIndexPointer(pointerDF);
        docidOutput.addEntryToBuffer(die);
        metaBuilder.writeDocumentEntry(metaInput2.getAllItems(sourceDocid));
        sourceDocid++;
      }
      dfInput2.close();
      IndexUtil.close(docidInput2);
      metaInput2.close();
     
      metaBuilder.close();
      dfOutput.close();
      docidOutput.finishedCollections();
      docidOutput.close();

      destIndex.addIndexStructure(
View Full Code Here

      final String[] metaTags = ArrayUtils.parseCommaDelimitedString(srcIndex1.getIndexProperty("index.meta.key-names", "docno"));
      final int[] metaTagLengths = ArrayUtils.parseCommaDelimitedInts(srcIndex1.getIndexProperty("index.meta.value-lengths", "20"));
      final String[] metaReverseTags = MetaReverse
        ? ArrayUtils.parseCommaDelimitedString(srcIndex1.getIndexProperty("index.meta.reverse-key-names", "docno"))
        : new String[0];
      final MetaIndexBuilder metaBuilder = new CompressingMetaIndexBuilder(destIndex, metaTags, metaTagLengths, metaReverseTags);
   
      if (! srcIndex1.getIndexProperty("index.meta.key-names", "docno").equals(srcIndex2.getIndexProperty("index.meta.key-names", "docno")))
      {
        throw new Error("Meta fields in source indices must match");
      }
     
      //opening the first set of files.
      final Iterator<DocumentIndexEntry> docidInput1 = (Iterator<DocumentIndexEntry>)srcIndex1.getIndexStructureInputStream("document");
      final Iterator<String[]> metaInput1 = (Iterator<String[]>)srcIndex1.getIndexStructureInputStream("meta");
     
      int srcFieldCount1 = srcIndex1.getIntIndexProperty("index.inverted.fields.count", 0);
      int srcFieldCount2 = srcIndex2.getIntIndexProperty("index.inverted.fields.count", 0);
      if (srcFieldCount1 != srcFieldCount2)
      {
        throw new Error("FieldCounts in source indices must match");
      }
      if (srcIndex1.getIndexProperty("index.document-factory.class", "").equals("org.terrier.structures.SimpleDocumentIndexEntry$Factory")
        || srcIndex1.getIndexProperty("index.document-factory.class", "").equals("org.terrier.structures.BasicDocumentIndexEntry$Factory"))
      {
        //for some reason, the source document index has not fields. so we shouldn't assume that fields are being used.
        srcFieldCount1 = 0;
      }
      final int fieldCount = srcFieldCount1;
     
      //traversing the first set of files, without any change
      while(docidInput1.hasNext())
      {
        metaInput1.hasNext();
        DocumentIndexEntry die = docidInput1.next();
        DocumentIndexEntry dieNew = (fieldCount > 0) ? die : new SimpleDocumentIndexEntry(die);
        docidOutput.addEntryToBuffer(dieNew);
        metaBuilder.writeDocumentEntry(metaInput1.next());
      }
     
      final Iterator<DocumentIndexEntry> docidInput2 = (Iterator<DocumentIndexEntry>)srcIndex2.getIndexStructureInputStream("document");
      final Iterator<String[]> metaInput2 = (Iterator<String[]>)srcIndex2.getIndexStructureInputStream("meta");
      //traversing the 2nd set of files, without any change
      while(docidInput2.hasNext())
      {
        metaInput2.hasNext();
        DocumentIndexEntry die = docidInput2.next();
        DocumentIndexEntry dieNew = (fieldCount > 0) ? die : new SimpleDocumentIndexEntry(die);
        docidOutput.addEntryToBuffer(dieNew);
        metaBuilder.writeDocumentEntry(metaInput2.next());
      }
     
      docidOutput.finishedCollections();
      docidOutput.close();
      metaBuilder.close();
      IndexUtil.close(docidInput1);
      IndexUtil.close(docidInput2);
      //destIndex.setIndexProperty("index.inverted.fields.count", ""+ fieldCount);
      if (fieldCount > 0)
      {
View Full Code Here

 
  protected void testBase(String name, String[] keyNames, int[] keyLengths, String[] revKeys, String[][] data) throws Exception
  {
    Index index = Index.createNewIndex(ApplicationSetup.TERRIER_INDEX_PATH, ApplicationSetup.TERRIER_INDEX_PREFIX);
    assertNotNull("Index should not be null", index);
    MetaIndexBuilder b = new CompressingMetaIndexBuilder(index, name,
        keyNames, keyLengths, revKeys);
    assertNotNull(b);
    Set<String> rev = new HashSet<String>();
    for(String revKey : revKeys)
    {
      rev.add(revKey);
    }
   
    for(String[] dataOne : data)
    {
      b.writeDocumentEntry(dataOne);
    }
    b.close();
    b = null;
    finishedCreatingMeta(index, name);
    //index.close();  Index.createIndex("/tmp", "test");
   
    int offset = 0;
View Full Code Here

TOP

Related Classes of org.terrier.structures.indexing.MetaIndexBuilder

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.