Package org.apache.mahout.common.iterator

Examples of org.apache.mahout.common.iterator.FileLineIterable


    FastByIDMap<Collection<Preference>> userIDPrefMap = new FastByIDMap<Collection<Preference>>();
   
    int counter = 0;
    FilenameFilter filenameFilter = new MovieFilenameFilter();
    for (File movieFile : new File(dataDirectory, "training_set").listFiles(filenameFilter)) {
      Iterator<String> lineIterator = new FileLineIterable(movieFile, false).iterator();
      String line = lineIterator.next();
      long movieID = Long.parseLong(line.substring(0, line.length() - 1)); // strip colon
      while (lineIterator.hasNext()) {
        line = lineIterator.next();
        if (++counter % 100000 == 0) {
View Full Code Here


        = new org.apache.mahout.fpm.pfpgrowth.fpgrowth2.FPGrowthObj<String>();
      Collection<String> features = new HashSet<String>();

      try {
        fp.generateTopKFrequentPatterns(
                new StringRecordIterator(new FileLineIterable(new File(input), encoding, false), pattern),
                fp.generateFList(
                        new StringRecordIterator(new FileLineIterable(new File(input), encoding, false), pattern),
                        minSupport),
                minSupport,
                maxHeapSize,
                features,
                new StringOutputConverter(new SequenceFileOutputCollector<Text, TopKStringPatterns>(writer)),
                new ContextStatusUpdater(null));
        } finally {
          Closeables.closeQuietly(writer);
        }
    } else {
      FPGrowth<String> fp = new FPGrowth<String>();
      Collection<String> features = new HashSet<String>();
      try {
        fp.generateTopKFrequentPatterns(
                new StringRecordIterator(new FileLineIterable(new File(input), encoding, false), pattern),
                fp.generateFList(
                        new StringRecordIterator(new FileLineIterable(new File(input), encoding, false), pattern),
                        minSupport),
                minSupport,
                maxHeapSize,
                features,
                new StringOutputConverter(new SequenceFileOutputCollector<Text, TopKStringPatterns>(writer)),
View Full Code Here

    assertEquals("files Size: " + files.length + " is not: " + 0, 0, files.length);
    BayesFileFormatter.collapse("animal", analyzer, input, Charsets.UTF_8, new File(out, "animal"));
    files = out.listFiles();
    assertEquals("files Size: " + files.length + " is not: " + 1, 1, files.length);
    int count = 0;
    for (String line : new FileLineIterable(files[0])) {
      assertTrue("line does not start with label", line.startsWith("animal"));
      count++;
    }
    assertEquals(count + " does not equal: " + WORDS.length, count, WORDS.length);
  }
View Full Code Here

     */
    HadoopUtil.delete(conf, outPath);
   
    Set<String> categories = new HashSet<String>();
    if (!catFile.isEmpty()) {
      for (String line : new FileLineIterable(new File(catFile))) {
        categories.add(line.trim().toLowerCase(Locale.ENGLISH));
      }
    }
   
    DefaultStringifier<Set<String>> setStringifier =
View Full Code Here

             + "org.apache.hadoop.io.serializer.WritableSerialization");
    // Dont ever forget this. People should keep track of how hadoop conf
    // parameters can make or break a piece of code
   
    Set<String> categories = new HashSet<String>();
    for (String line : new FileLineIterable(new File(catFile))) {
      categories.add(line.trim().toLowerCase(Locale.ENGLISH));
    }
   
    DefaultStringifier<Set<String>> setStringifier =
        new DefaultStringifier<Set<String>>(conf, GenericsUtil.getClass(categories));
View Full Code Here

    indexItemIDMap = TasteHadoopUtils.readIDIndexMap(conf.get(ITEMID_INDEX_PATH), conf);

    String itemFilePathString = conf.get(ITEMS_FILE);
    if (itemFilePathString != null) {
      itemsToRecommendFor = new FastIDSet();
      for (String line : new FileLineIterable(HadoopUtil.openStream(new Path(itemFilePathString), conf))) {
        try {
          itemsToRecommendFor.add(Long.parseLong(line));
        } catch (NumberFormatException nfe) {
          log.warn("itemsFile line ignored: {}", line);
        }
View Full Code Here

    }
  }

  private FastByIDMap<String> buildMapping() throws IOException {
    FastByIDMap<String> mapping = new FastByIDMap<String>();
    for (String line : new FileLineIterable(dataFile)) {
      mapping.put(toLongID(line), line);
    }
    lastModified = dataFile.lastModified();
    return mapping;
  }
View Full Code Here

      try {
        inputStream = fs.open(input);
        inputStreamAgain = fs.open(input);
        fp.generateTopKFrequentPatterns(
                new StringRecordIterator(new FileLineIterable(inputStream, encoding, false), pattern),
                fp.generateFList(
                        new StringRecordIterator(new FileLineIterable(inputStreamAgain, encoding, false), pattern),
                        minSupport),
                minSupport,
                maxHeapSize,
                features,
                new StringOutputConverter(new SequenceFileOutputCollector<Text, TopKStringPatterns>(writer)),
                new ContextStatusUpdater(null));
      } finally {
        Closeables.close(writer, false);
        Closeables.close(inputStream, true);
        Closeables.close(inputStreamAgain, true);
      }
    } else {
      FPGrowth<String> fp = new FPGrowth<String>();


      inputStream = fs.open(input);
      inputStreamAgain = fs.open(input);
      try {
        fp.generateTopKFrequentPatterns(
                new StringRecordIterator(new FileLineIterable(inputStream, encoding, false), pattern),
                fp.generateFList(
                        new StringRecordIterator(new FileLineIterable(inputStreamAgain, encoding, false), pattern),
                        minSupport),
                minSupport,
                maxHeapSize,
                features,
                new StringOutputConverter(new SequenceFileOutputCollector<Text, TopKStringPatterns>(writer)),
View Full Code Here

        Path unqualifiedUsersFilePath = new Path(usersFilePathString);
        FileSystem fs = FileSystem.get(unqualifiedUsersFilePath.toUri(), jobConf);
        usersToRecommendFor = new FastIDSet();
        Path usersFilePath = unqualifiedUsersFilePath.makeQualified(fs);
        in = fs.open(usersFilePath);
        for (String line : new FileLineIterable(in)) {
          try {
            usersToRecommendFor.add(Long.parseLong(line));
          } catch (NumberFormatException nfe) {
            log.warn("usersFile line ignored: {}", line);
          }
View Full Code Here

      }

      String messageId = null;
      boolean inBody = false;
      Pattern quotedTextPattern = options.getQuotedTextPattern();
      for (String nextLine : new FileLineIterable(mboxFile, options.getCharset(), false)) {
        if (options.isStripQuotedText() && quotedTextPattern.matcher(nextLine).find()) {
          continue;
        }
        for (int i = 0; i < matchers.length; i++) {
          Matcher matcher = matchers[i];
View Full Code Here

TOP

Related Classes of org.apache.mahout.common.iterator.FileLineIterable

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.