Package org.apache.mahout.common.iterator

Examples of org.apache.mahout.common.iterator.FileLineIterable


    File resultFile = new File(new File(System.getProperty("java.io.tmpdir")), "taste.bookcrossing.txt");
    resultFile.delete();
    PrintWriter writer = null;
    try {
      writer = new PrintWriter(new OutputStreamWriter(new FileOutputStream(resultFile), Charsets.UTF_8));
      for (String line : new FileLineIterable(originalFile, true)) {
        // 0 ratings are basically "no rating", ignore them (thanks h.9000)
        if (line.endsWith("\"0\"")) {
          continue;
        }
        // Delete replace anything that isn't numeric, or a semicolon delimiter. Make comma the delimiter.
View Full Code Here


      itemIDPadded.insert(0, '0');
    }
    List<Preference> prefs = new ArrayList<Preference>();
    File movieFile = new File(new File(dataDirectory, "training_set"), "mv_00" + itemIDPadded + ".txt");
    try {
      for (String line : new FileLineIterable(movieFile, true)) {
        int firstComma = line.indexOf(',');
        Integer userID = Integer.valueOf(line.substring(0, firstComma));
        int secondComma = line.indexOf(',', firstComma + 1);
        float rating = Float.parseFloat(line.substring(firstComma + 1, secondComma));
        prefs.add(new GenericPreference(userID, itemID, rating));
View Full Code Here

   
    Map<String, List<String>> byUserEntryCache = new FastMap<String, List<String>>(100000);
   
    for (File byItemFile : byItemDirectory.listFiles()) {
      log.info("Processing {}", byItemFile);
      Iterator<String> lineIterator = new FileLineIterable(byItemFile, false).iterator();
      String line = lineIterator.next();
      String movieIDString = line.substring(0, line.length() - 1);
      while (lineIterator.hasNext()) {
        line = lineIterator.next();
        int firstComma = line.indexOf(',');
View Full Code Here

  @Test
  public void testSpecificCaseFromRetailDataMinSup500() throws IOException {
    FPGrowth<String> fp = new FPGrowth<String>();
   
    StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
      "retail.dat").openStream()), "\\s+");
    int pattern_41_36_39 = 0;
    while (it.hasNext()) {
      Pair<List<String>,Long> next = it.next();
      List<String> items = next.getFirst();
      if (items.contains("41") && items.contains("36") && items.contains("39")) {
        pattern_41_36_39++;
      }
    }
   
    final Map<Set<String>,Long> results = new HashMap<Set<String>,Long>();
   
    Set<String> returnableFeatures = new HashSet<String>();
    returnableFeatures.add("41");
    returnableFeatures.add("36");
    returnableFeatures.add("39");
   
    fp.generateTopKFrequentPatterns(
      new StringRecordIterator(new FileLineIterable(Resources.getResource("retail.dat").openStream()), "\\s+"),

      fp.generateFList(new StringRecordIterator(new FileLineIterable(Resources.getResource("retail.dat")
          .openStream()), "\\s+"), 500), 500, 1000, returnableFeatures,
      new OutputCollector<String,List<Pair<List<String>,Long>>>() {
       
        @Override
        public void collect(String key, List<Pair<List<String>,Long>> value) {
View Full Code Here

  }

  private static List<DummyCandidate> loadPopulation(FileSystem fs, Path f) throws IOException {
    List<DummyCandidate> population = new ArrayList<DummyCandidate>();
    FSDataInputStream in = fs.open(f);
    for (String line : new FileLineIterable(in)) {
      population.add(StringUtils.<DummyCandidate>fromString(line));
    }
    return population;
  }
View Full Code Here

    assertEquals("files Size: " + files.length + " is not: " + 0, 0, files.length);
    BayesFileFormatter.collapse("animal", analyzer, input, Charsets.UTF_8, new File(out, "animal"));
    files = out.listFiles();
    assertEquals("files Size: " + files.length + " is not: " + 1, 1, files.length);
    int count = 0;
    for (String line : new FileLineIterable(files[0])) {
      assertTrue("line does not start with label", line.startsWith("animal"));
      count++;
    }
    assertEquals(count + " does not equal: " + WORDS.length, count, WORDS.length);
  }
View Full Code Here

          log.info("Testing: {}", file);
        }
        TimingStatistics operationStats = new TimingStatistics();
       
        long lineNum = 0;
        for (String line : new FileLineIterable(new File(file.getPath()), Charset.forName(params
            .get("encoding")), false)) {
         
          Map<String,List<String>> document = new NGrams(line, Integer.parseInt(params.get("gramSize")))
              .generateNGrams();
          for (Map.Entry<String,List<String>> stringListEntry : document.entrySet()) {
View Full Code Here

      resultFile.delete();
    }
    Writer writer = null;
    try {
      writer = new OutputStreamWriter(new FileOutputStream(resultFile), Charsets.UTF_8);
      for (String line : new FileLineIterable(originalFile, false)) {
        int lastDelimiterStart = line.lastIndexOf(COLON_DELIMTER);
        if (lastDelimiterStart < 0) {
          throw new IOException("Unexpected input format on line: " + line);
        }
        String subLine = line.substring(0, lastDelimiterStart);
View Full Code Here

    File resultFile = new File(new File(System.getProperty("java.io.tmpdir")), "taste.bookcrossing.txt");
    resultFile.delete();
    Writer writer = null;
    try {
      writer = new OutputStreamWriter(new FileOutputStream(resultFile), Charsets.UTF_8);
      for (String line : new FileLineIterable(originalFile, true)) {
        // 0 ratings are basically "no rating", ignore them (thanks h.9000)
        if (line.endsWith("\"0\"")) {
          continue;
        }
        // Delete replace anything that isn't numeric, or a semicolon delimiter. Make comma the delimiter.
View Full Code Here

      FPGrowth<String> fp1 = new org.apache.mahout.fpm.pfpgrowth.fpgrowth.FPGrowth<String>();

    Map<Set<String>,Long> results1 = Maps.newHashMap();
   
    fp1.generateTopKFrequentPatterns(
      new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename).openStream()), "\\s+"),

      fp1.generateFList(new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename)
           .openStream()), "\\s+"), minSupport), minSupport, 100000,
      returnableFeatures,
      new MapCollector(results1), new DummyUpdater());

    FPGrowthObj<String> fp2 = new FPGrowthObj<String>();
    Map<Set<String>,Long> initialResults2 = Maps.newHashMap();
    fp2.generateTopKFrequentPatterns(
      new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename).openStream()), "\\s+"),

      fp2.generateFList(new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename)
           .openStream()), "\\s+"), minSupport), minSupport, 100000,
        Sets.<String>newHashSet(),
      new MapCollector(initialResults2));

    Map<Set<String>, Long> results2;
View Full Code Here

TOP

Related Classes of org.apache.mahout.common.iterator.FileLineIterable

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.