Examples of ivory.core.RetrievalEnvironment.writeCollectionDocumentCount()

ivory.core.RetrievalEnvironment.writeCollectionDocumentCount()


      LOG.info("Job BuildTargetLangWeightedIntDocVectors finished in " +
          (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
      if (finalNumDocs > 0) {
        LOG.info("Changed doc count: " + env.readCollectionDocumentCount() +" => " + finalNumDocs);
        env.writeCollectionDocumentCount(finalNumDocs);
      }else {
        LOG.info("No document output! Terminating...");
        return -1;
      }
      // set Property.CollectionTermCount to the size of the target vocab. since all docs are translated into that vocab. This property is read by WriteRandomVectors via RunComputeSignatures.

View Full Code Here

    job1.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");


    // Write out number of postings.
    int collectionDocCount = (int) job1.getCounters().findCounter(Docs.Total).getValue();
    env.writeCollectionDocumentCount(collectionDocCount);


    Path dlFile = env.getDoclengthsData();
    if (fs.exists(dlFile)) {
      LOG.info("DocLength data exists: Skipping!");
      return 0;

View Full Code Here

      LOG.info("Job BuildTargetLangWeightedIntDocVectors finished in "+(System.currentTimeMillis()-startTime)/1000.0+" seconds");


      int finalNumDocs = weightedIntVectorsTool.run();
      if(finalNumDocs > 0){
        LOG.info("Changed doc count from "+env.readCollectionDocumentCount() + " to = "+finalNumDocs);
        env.writeCollectionDocumentCount(finalNumDocs);
      }
      // set Property.CollectionTermCount to the size of the target vocab. since all docs are translated into that vocab. This property is read by WriteRandomVectors via RunComputeSignatures.
      Vocab engVocabH = null;
      try {
        engVocabH = HadoopAlign.loadVocab(new Path(conf.get("Ivory.FinalVocab")), conf);

View Full Code Here


      LOG.info("Job BuildTargetLangWeightedIntDocVectors finished in " +
          (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
      if (finalNumDocs > 0) {
        LOG.info("Changed doc count: " + env.readCollectionDocumentCount() +" => " + finalNumDocs);
        env.writeCollectionDocumentCount(finalNumDocs);
      }else {
        LOG.info("No document output! Terminating...");
        return -1;
      }
      // set Property.CollectionTermCount to the size of the target vocab. since all docs are translated into that vocab. This property is read by WriteRandomVectors via RunComputeSignatures.

View Full Code Here


      LOG.info("Job BuildTargetLangWeightedIntDocVectors finished in " +
          (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
      if (finalNumDocs > 0) {
        LOG.info("Changed doc count: " + env.readCollectionDocumentCount() +" => " + finalNumDocs);
        env.writeCollectionDocumentCount(finalNumDocs);
      }else {
        LOG.info("No document output! Terminating...");
        return -1;
      }
      // set Property.CollectionTermCount to the size of the target vocab. since all docs are translated into that vocab. This property is read by WriteRandomVectors via RunComputeSignatures.

View Full Code Here

    job1.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");


    // Write out number of postings.
    int collectionDocCount = (int) job1.getCounters().findCounter(Docs.Total).getValue();
    env.writeCollectionDocumentCount(collectionDocCount);


    Path dlFile = env.getDoclengthsData();
    if (fs.exists(dlFile)) {
      LOG.info("DocLength data exists: Skipping!");
      return 0;

View Full Code Here


    RetrievalEnvironment env = new RetrievalEnvironment(dataOutputPath, fs);


    env.writeCollectionAverageDocumentLength(avgdl);
    env.writeCollectionLength(collectionLength);
    env.writeCollectionDocumentCount(docCount);


    return 0;
  }


  public static void main(String[] args) throws Exception {

View Full Code Here

      LOG.info("Job BuildTargetLangWeightedIntDocVectors finished in "+(System.currentTimeMillis()-startTime)/1000.0+" seconds");


      int finalNumDocs = weightedIntVectorsTool.run();
      if(finalNumDocs > 0){
        LOG.info("Changed doc count from "+env.readCollectionDocumentCount() + " to = "+finalNumDocs);
        env.writeCollectionDocumentCount(finalNumDocs);
      }
      // set Property.CollectionTermCount to the size of the target vocab. since all docs are translated into that vocab. This property is read by WriteRandomVectors via RunComputeSignatures.
      Vocab engVocabH = null;
      try {
        engVocabH = HadoopAlign.loadVocab(new Path(conf.get("Ivory.FinalVocab")), conf);

View Full Code Here

    job1.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");


    // Write out number of postings.
    int collectionDocCount = (int) job1.getCounters().findCounter(Docs.Total).getValue();
    env.writeCollectionDocumentCount(collectionDocCount);


    Path dlFile = env.getDoclengthsData();
    if (fs.exists(dlFile)) {
      LOG.info("DocLength data exists: Skipping!");
      return 0;

View Full Code Here

    job1.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");


    // Write out number of postings.
    int collectionDocCount = (int) job1.getCounters().findCounter(Docs.Total).getValue();
    env.writeCollectionDocumentCount(collectionDocCount);


    Path dlFile = env.getDoclengthsData();
    if (fs.exists(dlFile)) {
      LOG.info("DocLength data exists: Skipping!");
      return 0;

View Full Code Here

0 1

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.