Examples of FileLineIterable


Examples of org.apache.mahout.common.iterator.FileLineIterable

    FPGrowth<String> fp1 = new FPGrowth<String>();

    final Map<Set<String>,Long> results1 = Maps.newHashMap();
   
    fp1.generateTopKFrequentPatterns(
                                     new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename).openStream()), "\\s+"),

                                     fp1.generateFList(new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename)
                                                                                                     .openStream()), "\\s+"), minSupport), minSupport, 1000000,
                                     returnableFeatures,
                                     new OutputCollector<String,List<Pair<List<String>,Long>>>() {
       
                                       @Override
                                         public void collect(String key, List<Pair<List<String>,Long>> value) {
         
                                         for (Pair<List<String>,Long> v : value) {
                                           List<String> l = v.getFirst();
                                           results1.put(new HashSet<String>(l), v.getSecond());
                                           System.out.println("found pat ["+v.getSecond()+"]: "+ v.getFirst());
                                         }
                                       }
       
                                     }, new StatusUpdater() {
       
                                         @Override
                                           public void update(String status) {}
                                       });

    FPGrowthObj<String> fp2 = new FPGrowthObj<String>();
    final Map<Set<String>,Long> initialResults2 = Maps.newHashMap();
    fp2.generateTopKFrequentPatterns(
                                     new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename).openStream()), "\\s+"),

                                     fp2.generateFList(new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename)
                                                                                                     .openStream()), "\\s+"), minSupport), minSupport, 1000000,
                                     new HashSet<String>(),
                                     new OutputCollector<String,List<Pair<List<String>,Long>>>() {
       
                                       @Override
View Full Code Here

Examples of org.apache.mahout.common.iterator.FileLineIterable

      for (FileStatus fileStatus : fs.listStatus(verticesPath)) {
        InputStream in = null;
        try {
          in = HadoopUtil.openStream(fileStatus.getPath(), getConf());
          for (String line : new FileLineIterable(in)) {
            writer.append(new IntWritable(index++), new IntWritable(Integer.parseInt(line)));
          }
        } finally {
          Closeables.closeQuietly(in);
        }
View Full Code Here

Examples of org.apache.mahout.common.iterator.FileLineIterable

    File outputDir2 = getTestTempDir("frequentpatterns2");
    paramsImpl2.set(PFPGrowth.OUTPUT, outputDir2.getAbsolutePath());

    Writer writer = Files.newWriter(input, Charsets.UTF_8);
    try {
      StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
        "retail.dat").openStream()), "\\s+");
      Collection<List<String>> transactions = Lists.newArrayList();
     
      while (it.hasNext()) {
        Pair<List<String>,Long> next = it.next();
View Full Code Here

Examples of org.apache.mahout.common.iterator.FileLineIterable

      itemIDPadded.insert(0, '0');
    }
    List<Preference> prefs = Lists.newArrayList();
    File movieFile = new File(new File(dataDirectory, "training_set"), "mv_00" + itemIDPadded + ".txt");
    try {
      for (String line : new FileLineIterable(movieFile, true)) {
        int firstComma = line.indexOf(',');
        Integer userID = Integer.valueOf(line.substring(0, firstComma));
        int secondComma = line.indexOf(',', firstComma + 1);
        float rating = Float.parseFloat(line.substring(firstComma + 1, secondComma));
        prefs.add(new GenericPreference(userID, itemID, rating));
View Full Code Here

Examples of org.apache.mahout.common.iterator.FileLineIterable

   
    Map<String, List<String>> byUserEntryCache = new FastMap<String, List<String>>(100000);
   
    for (File byItemFile : byItemDirectory.listFiles()) {
      log.info("Processing {}", byItemFile);
      Iterator<String> lineIterator = new FileLineIterable(byItemFile, false).iterator();
      String line = lineIterator.next();
      String movieIDString = line.substring(0, line.length() - 1);
      while (lineIterator.hasNext()) {
        line = lineIterator.next();
        int firstComma = line.indexOf(',');
View Full Code Here

Examples of org.apache.mahout.common.iterator.FileLineIterable

      }

      String messageId = null;
      boolean inBody = false;
      Pattern quotedTextPattern = options.getQuotedTextPattern();
      for (String nextLine : new FileLineIterable(mboxFile, options.getCharset(), false)) {
        if (options.isStripQuotedText() && quotedTextPattern.matcher(nextLine).find()){
          continue;
        }
        for (int i = 0; i < matchers.length; i++) {
          Matcher matcher = matchers[i];
View Full Code Here

Examples of org.apache.mahout.common.iterator.FileLineIterable

          log.info("Testing: {}", file);
        }
        TimingStatistics operationStats = new TimingStatistics();
       
        long lineNum = 0;
        for (String line : new FileLineIterable(new File(file.getPath()), Charset.forName(params
            .get("encoding")), false)) {
         
          Map<String,List<String>> document = new NGrams(line, Integer.parseInt(params.get("gramSize")))
              .generateNGrams();
          for (Map.Entry<String,List<String>> stringListEntry : document.entrySet()) {
View Full Code Here

Examples of org.apache.mahout.common.iterator.FileLineIterable

  }

  private static List<DummyCandidate> loadPopulation(FileSystem fs, Path f) throws IOException {
    List<DummyCandidate> population = Lists.newArrayList();
    FSDataInputStream in = fs.open(f);
    for (String line : new FileLineIterable(in)) {
      population.add(StringUtils.<DummyCandidate>fromString(line));
    }
    return population;
  }
View Full Code Here

Examples of org.apache.mahout.common.iterator.FileLineIterable

    FastByIDMap<Collection<Preference>> userIDPrefMap = new FastByIDMap<Collection<Preference>>();
   
    int counter = 0;
    FilenameFilter filenameFilter = new MovieFilenameFilter();
    for (File movieFile : new File(dataDirectory, "training_set").listFiles(filenameFilter)) {
      Iterator<String> lineIterator = new FileLineIterable(movieFile, false).iterator();
      String line = lineIterator.next();
      long movieID = Long.parseLong(line.substring(0, line.length() - 1)); // strip colon
      while (lineIterator.hasNext()) {
        line = lineIterator.next();
        if (++counter % 100000 == 0) {
View Full Code Here

Examples of org.apache.mahout.common.iterator.FileLineIterable

        = new org.apache.mahout.fpm.pfpgrowth.fpgrowth2.FPGrowthObj<String>();
      Collection<String> features = new HashSet<String>();

      try {
        fp.generateTopKFrequentPatterns(
                new StringRecordIterator(new FileLineIterable(new File(input), encoding, false), pattern),
                fp.generateFList(
                        new StringRecordIterator(new FileLineIterable(new File(input), encoding, false), pattern),
                        minSupport),
                minSupport,
                maxHeapSize,
                features,
                new StringOutputConverter(new SequenceFileOutputCollector<Text, TopKStringPatterns>(writer)),
                new ContextStatusUpdater(null));
        } finally {
          Closeables.closeQuietly(writer);
        }
    } else {
      FPGrowth<String> fp = new FPGrowth<String>();
      Collection<String> features = new HashSet<String>();
      try {
        fp.generateTopKFrequentPatterns(
                new StringRecordIterator(new FileLineIterable(new File(input), encoding, false), pattern),
                fp.generateFList(
                        new StringRecordIterator(new FileLineIterable(new File(input), encoding, false), pattern),
                        minSupport),
                minSupport,
                maxHeapSize,
                features,
                new StringOutputConverter(new SequenceFileOutputCollector<Text, TopKStringPatterns>(writer)),
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.