Package org.apache.mahout.common.iterator

Examples of org.apache.mahout.common.iterator.FileLineIterable


    MathHelper.assertMatrixEquals(expectedAdjacencyMatrix, actualAdjacencyMatrix);
    MathHelper.assertMatrixEquals(expectedTransitionMatrix, actualTransitionMatrix);

    Map<Integer,Double> rankPerVertex = Maps.newHashMapWithExpectedSize(numVertices);
    for (CharSequence line : new FileLineIterable(new File(outputDir, "part-m-00000"))) {
      String[] tokens = Iterables.toArray(Splitter.on("\t").split(line), String.class);
      rankPerVertex.put(Integer.parseInt(tokens[0]), Double.parseDouble(tokens[1]));
    }

    assertEquals(4, rankPerVertex.size());
View Full Code Here


    FPGrowthObj<String> fp = new FPGrowthObj<String>();
   
    String inputFilename = "FPGsynth.dat";
    int minSupport = 50;

    StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
                                                                                                  inputFilename).openStream()), "\\s+");
    int patternCnt_10_13_1669 = 0;
    int patternCnt_10_13 = 0;
    while (it.hasNext()) {
      Pair<List<String>,Long> next = it.next();
      List<String> items = next.getFirst();
      if (items.contains("10") && items.contains("13")) {
        patternCnt_10_13++;
        if (items.contains("1669")) {
          patternCnt_10_13_1669++;
        }
      }
    }
   
    if (patternCnt_10_13_1669 < minSupport)
      throw new IllegalStateException("the test is broken or data is missing ("
                                      + patternCnt_10_13_1669+", "
                                      + patternCnt_10_13+")");

    final Map<Set<String>,Long> results = Maps.newHashMap();
   
    Set<String> features_10_13 = new HashSet<String>();
    features_10_13.add("10");
    features_10_13.add("13");

    Set<String> returnableFeatures = new HashSet<String>();
    returnableFeatures.add("10");
    returnableFeatures.add("13");
    returnableFeatures.add("1669");
   
    fp.generateTopKFrequentPatterns(
                                    new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename).openStream()), "\\s+"),

                                    fp.generateFList(new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename)
                                                                                                   .openStream()), "\\s+"), minSupport), minSupport, 100000,
                                    returnableFeatures,
                                    new OutputCollector<String,List<Pair<List<String>,Long>>>() {
       
                                      @Override
View Full Code Here

    FPGrowth<String> fp1 = new FPGrowth<String>();

    final Map<Set<String>,Long> results1 = Maps.newHashMap();
   
    fp1.generateTopKFrequentPatterns(
                                     new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename).openStream()), "\\s+"),

                                     fp1.generateFList(new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename)
                                                                                                     .openStream()), "\\s+"), minSupport), minSupport, 1000000,
                                     returnableFeatures,
                                     new OutputCollector<String,List<Pair<List<String>,Long>>>() {
       
                                       @Override
                                         public void collect(String key, List<Pair<List<String>,Long>> value) {
         
                                         for (Pair<List<String>,Long> v : value) {
                                           List<String> l = v.getFirst();
                                           results1.put(new HashSet<String>(l), v.getSecond());
                                           System.out.println("found pat ["+v.getSecond()+"]: "+ v.getFirst());
                                         }
                                       }
       
                                     }, new StatusUpdater() {
       
                                         @Override
                                           public void update(String status) {}
                                       });

    FPGrowthObj<String> fp2 = new FPGrowthObj<String>();
    final Map<Set<String>,Long> initialResults2 = Maps.newHashMap();
    fp2.generateTopKFrequentPatterns(
                                     new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename).openStream()), "\\s+"),

                                     fp2.generateFList(new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename)
                                                                                                     .openStream()), "\\s+"), minSupport), minSupport, 1000000,
                                     new HashSet<String>(),
                                     new OutputCollector<String,List<Pair<List<String>,Long>>>() {
       
                                       @Override
View Full Code Here

      for (FileStatus fileStatus : fs.listStatus(verticesPath)) {
        InputStream in = null;
        try {
          in = HadoopUtil.openStream(fileStatus.getPath(), getConf());
          for (String line : new FileLineIterable(in)) {
            writer.append(new IntWritable(index++), new IntWritable(Integer.parseInt(line)));
          }
        } finally {
          Closeables.closeQuietly(in);
        }
View Full Code Here

    File outputDir2 = getTestTempDir("frequentpatterns2");
    paramsImpl2.set(PFPGrowth.OUTPUT, outputDir2.getAbsolutePath());

    Writer writer = Files.newWriter(input, Charsets.UTF_8);
    try {
      StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
        "retail.dat").openStream()), "\\s+");
      Collection<List<String>> transactions = Lists.newArrayList();
     
      while (it.hasNext()) {
        Pair<List<String>,Long> next = it.next();
View Full Code Here

      itemIDPadded.insert(0, '0');
    }
    List<Preference> prefs = Lists.newArrayList();
    File movieFile = new File(new File(dataDirectory, "training_set"), "mv_00" + itemIDPadded + ".txt");
    try {
      for (String line : new FileLineIterable(movieFile, true)) {
        int firstComma = line.indexOf(',');
        Integer userID = Integer.valueOf(line.substring(0, firstComma));
        int secondComma = line.indexOf(',', firstComma + 1);
        float rating = Float.parseFloat(line.substring(firstComma + 1, secondComma));
        prefs.add(new GenericPreference(userID, itemID, rating));
View Full Code Here

   
    Map<String, List<String>> byUserEntryCache = new FastMap<String, List<String>>(100000);
   
    for (File byItemFile : byItemDirectory.listFiles()) {
      log.info("Processing {}", byItemFile);
      Iterator<String> lineIterator = new FileLineIterable(byItemFile, false).iterator();
      String line = lineIterator.next();
      String movieIDString = line.substring(0, line.length() - 1);
      while (lineIterator.hasNext()) {
        line = lineIterator.next();
        int firstComma = line.indexOf(',');
View Full Code Here

      }

      String messageId = null;
      boolean inBody = false;
      Pattern quotedTextPattern = options.getQuotedTextPattern();
      for (String nextLine : new FileLineIterable(mboxFile, options.getCharset(), false)) {
        if (options.isStripQuotedText() && quotedTextPattern.matcher(nextLine).find()){
          continue;
        }
        for (int i = 0; i < matchers.length; i++) {
          Matcher matcher = matchers[i];
View Full Code Here

          log.info("Testing: {}", file);
        }
        TimingStatistics operationStats = new TimingStatistics();
       
        long lineNum = 0;
        for (String line : new FileLineIterable(new File(file.getPath()), Charset.forName(params
            .get("encoding")), false)) {
         
          Map<String,List<String>> document = new NGrams(line, Integer.parseInt(params.get("gramSize")))
              .generateNGrams();
          for (Map.Entry<String,List<String>> stringListEntry : document.entrySet()) {
View Full Code Here

  }

  private static List<DummyCandidate> loadPopulation(FileSystem fs, Path f) throws IOException {
    List<DummyCandidate> population = Lists.newArrayList();
    FSDataInputStream in = fs.open(f);
    for (String line : new FileLineIterable(in)) {
      population.add(StringUtils.<DummyCandidate>fromString(line));
    }
    return population;
  }
View Full Code Here

TOP

Related Classes of org.apache.mahout.common.iterator.FileLineIterable

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.