Examples of org.terrier.structures.Index$UpdatingCollectionStatistics

org.terrier.structures.Index
This collection statistics parses the associated index properties for each call. It doesnt support fields.

   * Builds the inverted file from scratch, single pass method
   */
  public void createSinglePass(){
    if (Index.existsIndex(path, prefix))
    {
      Index i = Index.createIndex(path, prefix);
      if (i.hasIndexStructure("inverted"))
      {
        logger.fatal("Cannot create an inverted structure while an index with a inverted structure exists at "+path + ","+ prefix);
        return;
      }
    }

View Full Code Here

    * @return null If an error occurred obtaining the matching class
    */
  protected Matching getMatchingModel(Request rq)
  {
    Matching rtr = null;
    Index _index = rq.getIndex();
    String ModelName = rq.getMatchingModel();
    //add the namespace if the modelname is not fully qualified
    
    final String ModelNames[] = ModelName.split("\\s*,\\s*");
    final int modelCount = ModelNames.length;

View Full Code Here

      TRECQuerying trecQuerying = new TRECQuerying(queryexpand);
      trecQuerying.processQueries(c, isParameterValueSpecified);
      trecQuerying.close();
    } else if (printdocid) {
      Index.setIndexLoadingProfileAsRetrieval(false);
      Index i = Index.createIndex();
      if (i == null)
      {
        logger.error("No such index : "+ Index.getLastIndexLoadError());        
      }
      IndexUtil.printDocumentIndex(i, "document");
      i.close();
    } else if (printmeta) {
      Index.setIndexLoadingProfileAsRetrieval(false);
      Index i = Index.createIndex();
      if (i == null)
      {
        logger.error("No such index : "+ Index.getLastIndexLoadError());        
      }
      IndexUtil.printMetaIndex(i, "meta");
      i.close();
    } else if (printlexicon) {
      Index.setIndexLoadingProfileAsRetrieval(false);
      Index i = Index.createIndex();
      if (i == null)
      {
        logger.error("No such index : "+ Index.getLastIndexLoadError());        
      }
      if (! i.hasIndexStructureInputStream("lexicon"))
      {
        //logger.warn("Sorry, no lexicon index structure in index");
      }
      LexiconUtil.printLexicon(i, "lexicon");
    } else if (printdirect) {
      Index.setIndexLoadingProfileAsRetrieval(false);
      Index i = Index.createIndex();
      if (i == null)
      {
        logger.error("No such index : "+ Index.getLastIndexLoadError());        
      }
      if (! i.hasIndexStructureInputStream("direct"))
      {
        //logger.warn("Sorry, no direct index structure in index");
      }
      else
      {
      DirectIndexInputStream dirIndex = (DirectIndexInputStream)(i.getIndexStructureInputStream("direct"));
      dirIndex.print();
      dirIndex.close();
      i.close();
      }
    } else if (printinverted) {
      Index.setIndexLoadingProfileAsRetrieval(false);
      Index i = Index.createIndex();
      if (i == null)
      {
        logger.error("No such index : "+ Index.getLastIndexLoadError());        
      }
      if (i.hasIndexStructureInputStream("inverted"))
      {
        InvertedIndexInputStream invIndex = (InvertedIndexInputStream)(i.getIndexStructureInputStream("inverted"));
        invIndex.print();
        invIndex.close();
      }
      else
      {
        //logger.warn("Sorry, no inverted index inputstream structure in index");
      }
      i.close();
    } else if (printstats) {
      Index.setIndexLoadingProfileAsRetrieval(false);
      Index i = Index.createIndex();
      if (i == null)
      {
        logger.error("No such index : "+ Index.getLastIndexLoadError());        
      }
      else if(logger.isInfoEnabled()){
        //logger.info("Collection statistics:");
        //logger.info("number of indexed documents: " + i.getCollectionStatistics().getNumberOfDocuments());
        //logger.info("size of vocabulary: " +  i.getCollectionStatistics().getNumberOfUniqueTerms());
        //logger.info("number of tokens: " +  i.getCollectionStatistics().getNumberOfTokens());
        //logger.info("number of pointers: " +  i.getCollectionStatistics().getNumberOfPointers());
      }
      i.close();
    } else if (evaluation) {
      Evaluation te = null;
      if (evaluation_type.equals("adhoc"))
        te = new AdhocEvaluation();
      else if (evaluation_type.equals("named"))

View Full Code Here

        existsIndices[i] = false;
        //logger.warn("No reduce "+i+" output : no output index ["+index_path+","+index_prefix+ "]");
      }
    }
    //2. the target index is the first source index
    Index dest = srcIndices[0] != null ? srcIndices[0] : Index.createIndex(index_path, ApplicationSetup.TERRIER_INDEX_PREFIX+"-"+0);
    if (dest == null)
    {
      throw new IllegalArgumentException("No index found at " + index_path + ","+ ApplicationSetup.TERRIER_INDEX_PREFIX+"-"+0);
    }
    
    //3. create the new lexicon
    LexiconOutputStream<String> lexOut = new FSOMapFileLexiconOutputStream(
        dest, tmpLexiconStructure, 
        (FixedSizeWriteableFactory<Text>) dest.getIndexStructure(lexiconStructure + "-keyfactory"),
        (Class<? extends FixedSizeWriteableFactory<LexiconEntry>>) dest.getIndexStructure(lexiconStructure + "-valuefactory").getClass());
    
    //4. append each source lexicon on to the new lexicon, amending the filenumber as we go
    int termId = 0;
    for(int i=0;i<numberOfReducers;i++)
    {
      //the partition did not have any stuff
      if (! existsIndices[i])
      {
        //touch an empty inverted index file for this segment, as BitPostingIndex requires that all of the files exist
        Files.writeFileStream(BitPostingIndexInputStream.getFilename(
            dest, invertedStructure, (byte)numberOfReducers, (byte)i)).close();
        continue;
      }
      //else, append the lexicon
      Iterator<Map.Entry<String,LexiconEntry>> lexIn = (Iterator<Map.Entry<String, LexiconEntry>>) srcIndices[i].getIndexStructureInputStream("lexicon");
      while(lexIn.hasNext())
      {
        Map.Entry<String,LexiconEntry> e = lexIn.next();
        e.getValue().setTermId(termId);
        ((BitIndexPointer)e.getValue()).setFileNumber((byte)i);
        lexOut.writeNextEntry(e.getKey(), e.getValue());
        termId++;
      }
      IndexUtil.close(lexIn);
      //rename the inverted file to be part of the destination index
      Files.rename(
          BitPostingIndexInputStream.getFilename(srcIndices[i], invertedStructure, (byte)1, (byte)1), 
          BitPostingIndexInputStream.getFilename(dest, invertedStructure, (byte)numberOfReducers, (byte)i));
    }
    lexOut.close();
    
    //5. change over lexicon structures
    final String[] structureSuffices = new String[]{"", "-entry-inputstream"};
    //remove old lexicon structures
    for (String suffix : structureSuffices)
    {
    //  if (! IndexUtil.deleteStructure(dest, lexiconStructure + suffix))
        //logger.warn("Structure " + lexiconStructure + suffix + " not found when removing");
    }
    //rename new lexicon structures
    for (String suffix : structureSuffices)
    {
    //  if (! IndexUtil.renameIndexStructure(dest, tmpLexiconStructure + suffix, lexiconStructure + suffix))
        //logger.warn("Structure " + tmpLexiconStructure + suffix + " not found when renaming");
    }
      
    //6. update destimation index
    
    if (FieldScore.FIELDS_COUNT > 0)
      dest.addIndexStructure("lexicon-valuefactory", FieldLexiconEntry.Factory.class.getName(), "java.lang.String", "${index.inverted.fields.count}");
    dest.setIndexProperty("index."+invertedStructure+".data-files", ""+numberOfReducers);
    LexiconBuilder.optimise(dest, lexiconStructure);
    dest.flush();
    
    //7. close source and dest indices
    for(Index src: srcIndices) //dest is also closed
    {
      if (src != null)

View Full Code Here

   * @param args
   */
  public static void main(String[]args)
  {
    Index.setIndexLoadingProfileAsRetrieval(false);
    Index index = Index.createIndex();
    if (index == null)
    {
      System.err.println("No such index");
      return;
    }
    DFRDependenceScoreModifier d = new DFRDependenceScoreModifier();
    d.setCollectionStatistics(index.getCollectionStatistics(), index);
    System.out.println(d.scoreFDSD(Integer.parseInt(args[0]), Integer.parseInt(args[1])));  
  }

View Full Code Here

      logger.fatal("Exiting ...");
      return;
    }
    
    Index.setIndexLoadingProfileAsRetrieval(false);
    Index indexSrc1 = Index.createIndex(args[0], args[1]);
    Index indexSrc2 = Index.createIndex(args[2], args[3]);
    Index indexDest = Index.createNewIndex(args[4], args[5]);
    
    StructureMerger sMerger = new StructureMerger(indexSrc1, indexSrc2, indexDest);
    long start = System.currentTimeMillis();
    //logger.info("started at " + (new Date()));
    if (ApplicationSetup.getProperty("merger.onlylexicons","false").equals("true")) {
      System.err.println("Use LexiconMerger");
      return;
    } else if (ApplicationSetup.getProperty("merger.onlydocids","false").equals("true")) {
      sMerger.mergeDocumentIndexFiles();
    } else {
      sMerger.mergeStructures();
    }
    indexSrc1.close();
    indexSrc2.close();
    indexDest.close();
    
    //logger.info("finished at " + (new Date()));
    long end = System.currentTimeMillis();
    //logger.info("time elapsed: " + ((end-start)*1.0d/1000.0d) + " sec.");
  }

View Full Code Here

      logger.fatal("usage: java org.terrier.structures.merging.LexiconMerger srcPath1 srcPrefix1 srcPath2 srcPrefix2 destPath1 destPrefix1 ");
      return;
    }
    Index.setIndexLoadingProfileAsRetrieval(false);
    
    Index indexSrc1 = Index.createIndex(args[0], args[1]);
    Index indexSrc2 = Index.createIndex(args[2], args[3]);
    Index indexDest = Index.createNewIndex(args[4], args[5]);


    LexiconMerger lMerger = new LexiconMerger(indexSrc1, indexSrc2, indexDest);
    long start = System.currentTimeMillis();
    if(logger.isInfoEnabled()){
      //logger.info("started at " + (new Date()));
    }
    lMerger.mergeLexicons();
    indexSrc1.close();
    indexSrc2.close();
    indexDest.close();


    if(logger.isInfoEnabled()){
      //logger.info("finished at " + (new Date()));
      long end = System.currentTimeMillis();
      //logger.info("time elapsed: " + ((end-start)*1.0d/1000.0d) + " sec.");

View Full Code Here

      logger.fatal("usage: java org.terrier.structures.merging.BlockStructureMerger srcPath1 srcPrefix1 srcPath2 srcPrefix2 destPath1 destPrefix1 ");
      logger.fatal("Exiting ...");
      return;
    }
    Index.setIndexLoadingProfileAsRetrieval(false);
    Index indexSrc1 = Index.createIndex(args[0], args[1]);
    Index indexSrc2 = Index.createIndex(args[2], args[3]);
    Index indexDest = Index.createNewIndex(args[4], args[5]);
    
    StructureMerger sMerger = new BlockStructureMerger(indexSrc1, indexSrc2, indexDest);
    long start = System.currentTimeMillis();
    //logger.info("started at " + (new Date()));
    if (ApplicationSetup.getProperty("merger.onlylexicons","false").equals("true")) {

View Full Code Here

    * @param index2 Path/Prefix of source index 2
    * @param outputIndex Path/Prefix of destination index 
    */
  protected static void mergeTwoIndices(String[] index1, String[] index2, String[] outputIndex){
    StructureMerger sMerger = null;
    Index src1 = Index.createIndex(index1[0], index1[1]);
    Index src2 = Index.createIndex(index2[0], index2[1]);
    Index dst = Index.createNewIndex(outputIndex[0], outputIndex[1]);
  //  //logger.info("Merging "+ src1+ " & "+ src2 +" to " + dst);
    if (ApplicationSetup.BLOCK_INDEXING) 
      sMerger = new BlockStructureMerger(src1, src2, dst);
    else 
      sMerger = new StructureMerger(src1, src2, dst);
                      
    //sMerger.setNumberOfBits(FieldScore.FIELDS_COUNT);
    sMerger.mergeStructures();
    try{
      src1.close(); src2.close(); dst.close();
    } catch (IOException ioe) {
  //    logger.error("Problem flushing index dst", ioe);
    }
    //delete old indices  
    try{

View Full Code Here

      Inv2DirectMultiReduce.invertStructure(index, HadoopPlugin.getJobFactory("inv2direct"), 1);
    }


    @Override
    protected void finishIndexing() throws Exception {
      Index i1 = Index.createIndex(ApplicationSetup.TERRIER_INDEX_PATH, ApplicationSetup.TERRIER_INDEX_PREFIX + "-0");
      Index i2 = Index.createIndex(ApplicationSetup.TERRIER_INDEX_PATH, ApplicationSetup.TERRIER_INDEX_PREFIX + "-0");
      Index dest = Index.createNewIndex(ApplicationSetup.TERRIER_INDEX_PATH, ApplicationSetup.TERRIER_INDEX_PREFIX);
      new StructureMerger(i1, i2, dest).mergeStructures();
    }

View Full Code Here

0 1 2 3

TOP

Related Classes of org.terrier.structures.Index$UpdatingCollectionStatistics

org.terrier.applications.HadoopIndexing

org.terrier.applications.TRECIndexing

org.terrier.applications.TrecTerrier

org.terrier.indexing.hadoop.Hadoop_BasicSinglePassIndexer

org.terrier.indexing.Indexer

org.terrier.indexing.IndexTestUtils

org.terrier.indexing.TestIndexers

org.terrier.matching.dsms.DFRDependenceScoreModifier

org.terrier.matching.TestMatching

org.terrier.matching.TestTRECResultsMatching

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.