Package org.apache.mahout.common.iterator

Examples of org.apache.mahout.common.iterator.FileLineIterable


  /**
   * Test Parallel FPGrowth on retail data using top-level runPFPGrowth() method
   */
  @Test
  public void testRetailDataMinSup100() throws Exception {
    StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
      "retail_results_with_min_sup_100.dat").openStream()), "\\s+");
    Map<Set<String>,Long> expectedResults = Maps.newHashMap();
    while (it.hasNext()) {
      Pair<List<String>,Long> next = it.next();
      List<String> items = Lists.newArrayList(next.getFirst());
View Full Code Here


  /**
   * Test Parallel FPG on retail data, running various stages individually
   */
  @Test
  public void testRetailDataMinSup100InSteps() throws Exception {
    StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
      "retail_results_with_min_sup_100.dat").openStream()), "\\s+");  
    Map<Set<String>,Long> expectedResults = Maps.newHashMap();
    while (it.hasNext()) {
      Pair<List<String>,Long> next = it.next();
      List<String> items = Lists.newArrayList(next.getFirst());
View Full Code Here

     */
    HadoopUtil.delete(conf, outPath);
   
    Set<String> categories = Sets.newHashSet();
    if (!catFile.isEmpty()) {
      for (String line : new FileLineIterable(new File(catFile))) {
        categories.add(line.trim().toLowerCase(Locale.ENGLISH));
      }
    }
   
    Stringifier<Set<String>> setStringifier =
View Full Code Here

      String messageId = null;
      boolean inBody = false;
      Pattern quotedTextPattern = options.getQuotedTextPattern();

      for (String nextLine : new FileLineIterable(mailBoxInputStream, options.getCharset(), false, filename)) {
        if (!options.isStripQuotedText() || !quotedTextPattern.matcher(nextLine).find()) {
          for (int i = 0; i < matches.length; i++) {
            Matcher matcher = matches[i];
            matcher.reset(nextLine);
            if (matcher.matches()) {
View Full Code Here

   
    DataSet dataset = FileInfoParser.parseFile(fs, inpath);
    DataSet.initialize(dataset);

    DataLine dl = new DataLine();
    for (String line : new FileLineIterable(new File(Resources.getResource("wdbc/wdbc.data").getPath()))) {
      dl.set(line);
      for (int index = 0; index < dataset.getNbAttributes(); index++) {
        if (dataset.isNumerical(index)) {
          CDMutationTest.assertInRange(dl.getAttribute(index), dataset.getMin(index), dataset
              .getMax(index));
View Full Code Here

        boolean inBody = false;
        Matcher subjectMatcher = SUBJECT_PREFIX.matcher("");
        Matcher messageIdMatcher = MESSAGE_ID_PREFIX.matcher("");
        Matcher messageBoundaryMatcher = MESSAGE_START.matcher("");
       
        for (String nextLine : new FileLineIterable(current, charset, false)) {

          // subject may come before message ID
          subjectMatcher.reset(nextLine);
          if (subjectMatcher.matches()) {
            file.append(subjectMatcher.group(1)).append('\n');
View Full Code Here

      fs.listStatus(fst.getPath(),
                    new SequenceFilesFromCsvFilter(conf, prefix + Path.SEPARATOR + current.getName(),
                        this.options, writer));
    } else {
      InputStream in = fs.open(fst.getPath());
      for (CharSequence aFit : new FileLineIterable(in, charset, false)) {
        String[] columns = TAB.split(aFit);
        log.info("key : {}, value : {}", columns[keyColumn], columns[valueColumn]);
        String key = columns[keyColumn];
        String value = columns[valueColumn];
        writer.write(prefix + key, value);
View Full Code Here

      InputStream in = null;
      try {
        in = fs.open(fst.getPath());

        StringBuilder file = new StringBuilder();
        for (String aFit : new FileLineIterable(in, charset, false)) {
          file.append(aFit).append('\n');
        }
        String name = current.getName().equals(fst.getPath().getName())
            ? current.getName()
            : current.getName() + Path.SEPARATOR + fst.getPath().getName();
View Full Code Here

        Path unqualifiedUsersFilePath = new Path(usersFilePathString);
        FileSystem fs = FileSystem.get(unqualifiedUsersFilePath.toUri(), jobConf);
        usersToRecommendFor = new FastIDSet();
        Path usersFilePath = unqualifiedUsersFilePath.makeQualified(fs);
        in = fs.open(usersFilePath);
        for (String line : new FileLineIterable(in)) {
          usersToRecommendFor.add(Long.parseLong(line));
        }
      } finally {
        IOUtils.closeStream(in);
      }
View Full Code Here

             + "org.apache.hadoop.io.serializer.WritableSerialization");
    // Dont ever forget this. People should keep track of how hadoop conf
    // parameters can make or break a piece of code
   
    Set<String> categories = new HashSet<String>();
    for (String line : new FileLineIterable(new File(catFile))) {
      categories.add(line.trim().toLowerCase(Locale.ENGLISH));
    }
   
    DefaultStringifier<Set<String>> setStringifier =
        new DefaultStringifier<Set<String>>(conf, GenericsUtil.getClass(categories));
View Full Code Here

TOP

Related Classes of org.apache.mahout.common.iterator.FileLineIterable

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.