Package ivory.core.data.stat

Examples of ivory.core.data.stat.DocLengthTable


    ///////Configuration setup

    conf.set("Ivory.IndexPath", indexPath);
    conf.set("Ivory.ScoringModel", scoringModel);
    DocLengthTable mDLTable;
    try {
      mDLTable = new DocLengthTable4B(env.getDoclengthsData(), fs);
    } catch (IOException e1) {
      throw new RuntimeException("Error initializing Doclengths file");
    }
    LOG.info(mDLTable.getAvgDocLength()+" is average doc len.");
    LOG.info(mDLTable.getDocCount()+" is num docs.");

    conf.setFloat("Ivory.AvgDocLen", mDLTable.getAvgDocLength());
    conf.setInt("Ivory.CollectionDocumentCount", env.readCollectionDocumentCount());
   
    conf.setNumMapTasks(300);     
    conf.setNumReduceTasks(0);
    conf.set("mapred.child.java.opts", "-Xmx2048m");
View Full Code Here


    LOG.info("Input path: " + inputPath);
    LOG.info("Target-language stopwords: " + eStopwords);
    LOG.info("Target-language stemmed stopwords: " + eStemmedStopwords);
    LOG.info("Target-language tokenizer model: " + eTokenizerModel);

    DocLengthTable mDLTable;
    try {
      mDLTable = new DocLengthTable4B(env.getDoclengthsData(), fs);
    } catch (IOException e1) {
      throw new RuntimeException("Error initializing Doclengths file");
    }
    LOG.info(mDLTable.getAvgDocLength()+" is average source-language document length.");
    LOG.info(targetEnv.readCollectionDocumentCount()+" is number of target-language docs. We use the target-side DF table so we set #docs to this value in our scoring model.");

    /////// Configuration setup

    conf.set(Constants.IndexPath, indexPath);
    conf.set("Ivory.ScoringModel", scoringModel);
    conf.setFloat("Ivory.AvgDocLen", mDLTable.getAvgDocLength());
    conf.setInt(Constants.CollectionDocumentCount, targetEnv.readCollectionDocumentCount());
    conf.set(Constants.Language, getConf().get("Ivory.Lang"));
    conf.set("Ivory.Normalize", getConf().get("Ivory.Normalize"));
    conf.set("Ivory.MinNumTerms", getConf().get("Ivory.MinNumTerms"));
View Full Code Here

    LOG.info("Input path: " + inputPath);
    LOG.info("Target-language stopwords: " + eStopwords);
    LOG.info("Target-language stemmed stopwords: " + eStemmedStopwords);
    LOG.info("Target-language tokenizer model: " + eTokenizerModel);

    DocLengthTable mDLTable;
    try {
      mDLTable = new DocLengthTable4B(env.getDoclengthsData(), fs);
    } catch (IOException e1) {
      throw new RuntimeException("Error initializing Doclengths file");
    }
    LOG.info(mDLTable.getAvgDocLength()+" is average source-language document length.");
    LOG.info(targetEnv.readCollectionDocumentCount()+" is number of target-language docs. We use the target-side DF table so we set #docs to this value in our scoring model.");

    /////// Configuration setup

    conf.set(Constants.IndexPath, indexPath);
    conf.set("Ivory.ScoringModel", scoringModel);
    conf.setFloat("Ivory.AvgDocLen", mDLTable.getAvgDocLength());
    conf.setInt(Constants.CollectionDocumentCount, targetEnv.readCollectionDocumentCount());
    conf.set(Constants.Language, getConf().get("Ivory.Lang"));
    conf.set("Ivory.Normalize", getConf().get("Ivory.Normalize"));
    conf.set("Ivory.MinNumTerms", getConf().get("Ivory.MinNumTerms"));
View Full Code Here

    LOG.info("Preparing to build document vectors using " + scoringModel);
    LOG.info("Document vectors to be stored in " + outputPath);
    LOG.info("CollectionName: " + collectionName);
    LOG.info("Input path: " + inputPath);

    DocLengthTable mDLTable;
    try {
      mDLTable = new DocLengthTable4B(env.getDoclengthsData(), fs);
    } catch (IOException e1) {
      throw new RuntimeException("Error initializing Doclengths file");
    }
    LOG.info(mDLTable.getAvgDocLength()+" is average source-language document length.");
    LOG.info(targetEnv.readCollectionDocumentCount()+" is number of target-language docs. We use the target-side DF table so we set #docs to this value in our scoring model.");

    /////// Configuration setup

    conf.set(Constants.IndexPath, indexPath);
    conf.set("Ivory.ScoringModel", scoringModel);
    conf.setFloat("Ivory.AvgDocLen", mDLTable.getAvgDocLength());
    conf.setInt(Constants.CollectionDocumentCount, targetEnv.readCollectionDocumentCount());
    conf.set(Constants.Language, getConf().get("Ivory.Lang"));
    conf.set("Ivory.Normalize", getConf().get("Ivory.Normalize"));
    conf.set("Ivory.MinNumTerms", getConf().get("Ivory.MinNumTerms"));
View Full Code Here

    LOG.info("Preparing to build document vectors using " + scoringModel);
    LOG.info("Document vectors to be stored in " + outputPath);
    LOG.info("CollectionName: " + collectionName);
    LOG.info("Input path: " + inputPath);

    DocLengthTable mDLTable;
    try {
      mDLTable = new DocLengthTable4B(env.getDoclengthsData(), fs);
    } catch (IOException e1) {
      throw new RuntimeException("Error initializing Doclengths file");
    }
    LOG.info(mDLTable.getAvgDocLength()+" is average source-language document length.");
    LOG.info(targetEnv.readCollectionDocumentCount()+" is number of target-language docs. We use the target-side DF table so we set #docs to this value in our scoring model.");

    /////// Configuration setup

    conf.set(Constants.IndexPath, indexPath);
    conf.set("Ivory.ScoringModel", scoringModel);
    conf.setFloat("Ivory.AvgDocLen", mDLTable.getAvgDocLength());
    conf.setInt(Constants.CollectionDocumentCount, targetEnv.readCollectionDocumentCount());
    conf.set(Constants.Language, getConf().get("Ivory.Lang"));
    conf.set("Ivory.Normalize", getConf().get("Ivory.Normalize"));
    conf.set("Ivory.MinNumTerms", getConf().get("Ivory.MinNumTerms"));
View Full Code Here

TOP

Related Classes of ivory.core.data.stat.DocLengthTable

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.