Package ivory.core.data.dictionary

Examples of ivory.core.data.dictionary.DefaultFrequencySortedDictionary


        en2DeProbs = new TTable_monolithic_IFAs(fs2, new Path(e2fttableFile), true);
      } catch (IOException e) {
        e.printStackTrace();
     

      DefaultFrequencySortedDictionary dict = new DefaultFrequencySortedDictionary(new Path(env.getIndexTermsData()), new Path(env.getIndexTermIdsData()), new Path(env.getIndexTermIdMappingData()), fs2);
      DfTableArray dfTable = new DfTableArray(new Path(dfByIntFile), fs2);

      HMapIFW transDfTable = CLIRUtils.translateDFTable(eVocab_e2f, fVocab_e2f, en2DeProbs, dict, dfTable);

      SequenceFile.Writer writer = SequenceFile.createWriter(fs2, conf, new Path(transDfFile), IntWritable.class, FloatWritable.class);
View Full Code Here


        String termidsFile = env.getIndexTermIdsData();
        String idToTermFile = env.getIndexTermIdMappingData();

        // Take a different code path if we're in standalone mode.
        if (conf.get("mapred.job.tracker").equals("local")) {
          dictionary = new DefaultFrequencySortedDictionary(new Path(termsFile),
              new Path(termidsFile), new Path(idToTermFile), FileSystem.getLocal(conf));
        } else {
          // We need to figure out which file in the DistributeCache is which...
          Map<String, Path> pathMapping = Maps.newHashMap();
          Path[] localFiles = DistributedCache.getLocalCacheFiles(context.getConfiguration());
          for (Path p : localFiles) {
            LOG.info("In DistributedCache: " + p);
            if (p.toString().contains(termsFile)) {
              pathMapping.put(termsFile, p);
            } else if (p.toString().contains(termidsFile)) {
              pathMapping.put(termidsFile, p);
            } else if (p.toString().contains(idToTermFile)) {
              pathMapping.put(idToTermFile, p);
            }
          }

          LOG.info(" - terms: " + pathMapping.get(termsFile));
          LOG.info(" - id: " + pathMapping.get(termidsFile));
          LOG.info(" - idToTerms: " + pathMapping.get(idToTermFile));

          dictionary = new DefaultFrequencySortedDictionary(pathMapping.get(termsFile),
              pathMapping.get(termidsFile), pathMapping.get(idToTermFile),
              FileSystem.getLocal(context.getConfiguration()));
        }
      } catch (Exception e) {
        e.printStackTrace();
View Full Code Here

      LOG.info(" - idToTerms: " + pathMapping.get(idToTermFile));
      LOG.info(" - df data: " + pathMapping.get(dfFile));
      LOG.info(" - dl data: " + pathMapping.get(dlFile));

      try{
        dict = new DefaultFrequencySortedDictionary(pathMapping.get(termsFile),
            pathMapping.get(termidsFile), pathMapping.get(idToTermFile), FileSystem.getLocal(conf));
        dfTable = new DfTableArray(pathMapping.get(dfFile), FileSystem.getLocal(conf));
      } catch (Exception e) {
        e.printStackTrace();
        throw new RuntimeException("Error loading Terms File for dictionary from "+localFiles[0]);
View Full Code Here

        LOG.info(" - id: " + pathMapping.get(termidsFile));
        LOG.info(" - idToTerms: " + pathMapping.get(idToTermFile));
        LOG.info(" - df data: " + pathMapping.get(dfFile));

        try{
          dict = new DefaultFrequencySortedDictionary(pathMapping.get(termsFile),
              pathMapping.get(termidsFile), pathMapping.get(idToTermFile), FileSystem.getLocal(conf));
          dfTable = new DfTableArray(pathMapping.get(dfFile), FileSystem.getLocal(conf));
        } catch (Exception e) {
          e.printStackTrace();
          throw new RuntimeException("Error loading Terms File for dictionary from "+localFiles[0]);
View Full Code Here

        en2DeProbs = new TTable_monolithic_IFAs(fs2, new Path(e2fttableFile), true);
      } catch (IOException e) {
        e.printStackTrace();
     

      DefaultFrequencySortedDictionary dict = new DefaultFrequencySortedDictionary(new Path(env.getIndexTermsData()), new Path(env.getIndexTermIdsData()), new Path(env.getIndexTermIdMappingData()), fs2);
      DfTableArray dfTable = new DfTableArray(new Path(dfByIntFile), fs2);

      HMapIFW transDfTable = CLIRUtils.translateDFTable(eVocab_e2f, fVocab_e2f, en2DeProbs, dict, dfTable);

      SequenceFile.Writer writer = SequenceFile.createWriter(fs2, conf, new Path(transDfFile), IntWritable.class, FloatWritable.class);
View Full Code Here

        idToTermFile = idToTermFile.substring(idToTermFile.lastIndexOf("/") + 1);

        LOG.info("Looking for the following files in dcache: " + termsFile + ", " + termidsFile + ", " + idToTermFile);
        // Take a different code path if we're in standalone mode.
        if (conf.get("mapred.job.tracker").equals("local")) {
          dictionary = new DefaultFrequencySortedDictionary(new Path(termsFile),
              new Path(termidsFile), new Path(idToTermFile), FileSystem.getLocal(conf));
        } else {
          // We need to figure out which file in the DistributeCache is which...
          Map<String, Path> pathMapping = Maps.newHashMap();
          Path[] localFiles = DistributedCache.getLocalCacheFiles(context.getConfiguration());
          for (Path p : localFiles) {
            LOG.info("In DistributedCache: " + p);
            if (p.toString().contains(termsFile)) {
              pathMapping.put(termsFile, p);
            } else if (p.toString().contains(termidsFile)) {
              pathMapping.put(termidsFile, p);
            } else if (p.toString().contains(idToTermFile)) {
              pathMapping.put(idToTermFile, p);
            }
          }

          LOG.info(" - terms: " + pathMapping.get(termsFile));
          LOG.info(" - id: " + pathMapping.get(termidsFile));
          LOG.info(" - idToTerms: " + pathMapping.get(idToTermFile));

    String s = localFiles.length + " " + localFiles[0].toString() + " " + localFiles[1].toString() + " " + localFiles[2].toString();
    if (pathMapping.get(termsFile) == null ) {

        throw new RuntimeException(s);
    }

          dictionary = new DefaultFrequencySortedDictionary(pathMapping.get(termsFile),
              pathMapping.get(termidsFile), pathMapping.get(idToTermFile), FileSystem.getLocal(conf));
        }
      } catch (Exception e) {
        e.printStackTrace();
        throw new RuntimeException("Error initializing data!", e);
View Full Code Here

TOP

Related Classes of ivory.core.data.dictionary.DefaultFrequencySortedDictionary

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.