Package ivory.core.data.dictionary

Examples of ivory.core.data.dictionary.DefaultCachedFrequencySortedDictionary


    postingsIndex = new IntPostingsForwardIndex(indexPath, fs);
    LOG.info(" - Number of terms: " + readCollectionTermCount());
    LOG.info("Done!");

    try {
      termidMap = new DefaultCachedFrequencySortedDictionary(new Path(getIndexTermsData()), new Path(getIndexTermIdsData()),
          new Path(getIndexTermIdMappingData()), 0.2f,  fs);
    } catch (Exception e) {
      throw new ConfigurationException("Error initializing term to term id mapping!", e);
    }
View Full Code Here


        String termidsFile = env.getIndexTermIdsData();
        String idToTermFile = env.getIndexTermIdMappingData();

        // Take a different code path if we're in standalone mode.
        if (conf.get("mapred.job.tracker").equals("local")) {
          dictionary = new DefaultCachedFrequencySortedDictionary(new Path(termsFile),
              new Path(termidsFile), new Path(idToTermFile), 0.3f, FileSystem.getLocal(conf));
        } else {
          // We need to figure out which file in the DistributeCache is which...
          Map<String, Path> pathMapping = Maps.newHashMap();
          Path[] localFiles = DistributedCache.getLocalCacheFiles(context.getConfiguration());
          for (Path p : localFiles) {
            LOG.info("In DistributedCache: " + p);
            if (p.toString().contains(termsFile)) {
              pathMapping.put(termsFile, p);
            } else if (p.toString().contains(termidsFile)) {
              pathMapping.put(termidsFile, p);
            } else if (p.toString().contains(idToTermFile)) {
              pathMapping.put(idToTermFile, p);
            }
          }

          LOG.info(" - terms: " + pathMapping.get(termsFile));
          LOG.info(" - id: " + pathMapping.get(termidsFile));
          LOG.info(" - idToTerms: " + pathMapping.get(idToTermFile));

          dictionary = new DefaultCachedFrequencySortedDictionary(pathMapping.get(termsFile),
              pathMapping.get(termidsFile), pathMapping.get(idToTermFile),
              0.3f, FileSystem.getLocal(context.getConfiguration()));
        }
      } catch (Exception e) {
        e.printStackTrace();
View Full Code Here

        gs.loadDFStats(localFiles[1], fs);
        gs.loadCFStats(localFiles[2], fs);

        String indexPath = job.get("Ivory.IndexPath");
        sLogger.info("loading TermIdMap from " + indexPath);
        mTermIdMap = new DefaultCachedFrequencySortedDictionary(localFiles[3], localFiles[4],
            localFiles[5], 0.2f, fs);
      } catch (IOException e) {
        e.printStackTrace();
        throw new RuntimeException("Error loading global term stats!");
      }
View Full Code Here

TOP

Related Classes of ivory.core.data.dictionary.DefaultCachedFrequencySortedDictionary

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.