Examples of org.apache.mahout.common.iterator.FileLineIterable

Package org.apache.mahout.common.iterator

Examples of org.apache.mahout.common.iterator.FileLineIterable

org.apache.mahout.common.iterator.FileLineIterable
Iterable representing the lines of a text file. It can produce an {@link Iterator} over those lines. Thisassumes the text file's lines are delimited in a manner consistent with how {@link java.io.BufferedReader}defines lines.

      FPGrowth<String> fp1 = new org.apache.mahout.fpm.pfpgrowth.fpgrowth.FPGrowth<String>();


    Map<Set<String>,Long> results1 = Maps.newHashMap();
    
    fp1.generateTopKFrequentPatterns(
      new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename).openStream()), "\\s+"),


      fp1.generateFList(new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename)
           .openStream()), "\\s+"), minSupport), minSupport, 100000, 
      returnableFeatures,
      new MapCollector(results1), new DummyUpdater());


    FPGrowthObj<String> fp2 = new FPGrowthObj<String>();
    Map<Set<String>,Long> initialResults2 = Maps.newHashMap();
    fp2.generateTopKFrequentPatterns(
      new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename).openStream()), "\\s+"),


      fp2.generateFList(new StringRecordIterator(new FileLineIterable(Resources.getResource(inputFilename)
           .openStream()), "\\s+"), minSupport), minSupport, 100000,
        Sets.<String>newHashSet(),
      new MapCollector(initialResults2), new DummyUpdater());


    Map<Set<String>, Long> results2;

View Full Code Here

      InputStream in = null;
      try {
        in = fs.open(fst.getPath());


        StringBuilder file = new StringBuilder();
        for (String aFit : new FileLineIterable(in, getCharset(), false)) {
          file.append(aFit).append('\n');
        }
        String name = current.getName().equals(fst.getPath().getName())
            ? current.getName()
            : current.getName() + Path.SEPARATOR + fst.getPath().getName();

View Full Code Here

    }


    log.info("Reading probes...");
    List<Preference> probes = Lists.newArrayListWithExpectedSize(2817131);
    long currentMovieID = -1;
    for (String line : new FileLineIterable(new File(qualifyingTxt))) {
      if (line.contains(MOVIE_DENOTER)) {
        currentMovieID = Long.parseLong(line.replaceAll(MOVIE_DENOTER, ""));
      } else {
        long userID = Long.parseLong(SEPARATOR.split(line)[0]);
        probes.add(new GenericPreference(userID, currentMovieID, 0));
      }
    }
    log.info("{} probes read...", probes.size());


    log.info("Reading ratings, creating probe set at {}/probeSet/ratings.tsv ...", outputPath);
    writer = null;
    try {
      FSDataOutputStream outputStream = fs.create(new Path(outputPath, "probeSet/ratings.tsv"));
      writer = new BufferedWriter(new OutputStreamWriter(outputStream, Charsets.UTF_8));


      int ratingsProcessed = 0;
      for (String line : new FileLineIterable(new File(judgingTxt))) {
        if (line.contains(MOVIE_DENOTER)) {
          currentMovieID = Long.parseLong(line.replaceAll(MOVIE_DENOTER, ""));
        } else {
          float rating = Float.parseFloat(SEPARATOR.split(line)[0]);
          Preference pref = probes.get(ratingsProcessed);

View Full Code Here

      resultFile.delete();
    }
    Writer writer = null;
    try {
      writer = new OutputStreamWriter(new FileOutputStream(resultFile), Charsets.UTF_8);
      for (String line : new FileLineIterable(originalFile, false)) {
        int lastDelimiterStart = line.lastIndexOf(COLON_DELIMTER);
        if (lastDelimiterStart < 0) {
          throw new IOException("Unexpected input format on line: " + line);
        }
        String subLine = line.substring(0, lastDelimiterStart);

View Full Code Here

    File resultFile = new File(new File(System.getProperty("java.io.tmpdir")), "taste.bookcrossing.txt");
    resultFile.delete();
    Writer writer = null;
    try {
      writer = new OutputStreamWriter(new FileOutputStream(resultFile), Charsets.UTF_8);
      for (String line : new FileLineIterable(originalFile, true)) {
        // 0 ratings are basically "no rating", ignore them (thanks h.9000)
        if (line.endsWith("\"0\"")) {
          continue;
        }
        // Delete replace anything that isn't numeric, or a semicolon delimiter. Make comma the delimiter.

View Full Code Here


  @Test
  public void testSpecificCaseFromRetailDataMinSup500() throws IOException {
    FPGrowth<String> fp = new FPGrowth<String>();
    
    StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
      "retail.dat").openStream()), "\\s+");
    int pattern_41_36_39 = 0;
    while (it.hasNext()) {
      Pair<List<String>,Long> next = it.next();
      List<String> items = next.getFirst();
      if (items.contains("41") && items.contains("36") && items.contains("39")) {
        pattern_41_36_39++;
      }
    }
    
    final Map<Set<String>,Long> results = Maps.newHashMap();
    
    Set<String> returnableFeatures = Sets.newHashSet();
    returnableFeatures.add("41");
    returnableFeatures.add("36");
    returnableFeatures.add("39");
    
    fp.generateTopKFrequentPatterns(
      new StringRecordIterator(new FileLineIterable(Resources.getResource("retail.dat").openStream()), "\\s+"),


      fp.generateFList(new StringRecordIterator(new FileLineIterable(Resources.getResource("retail.dat")
          .openStream()), "\\s+"), 500), 500, 1000, returnableFeatures,
      new OutputCollector<String,List<Pair<List<String>,Long>>>() {
        
        @Override
        public void collect(String key, List<Pair<List<String>,Long>> value) {

View Full Code Here

    File input = new File(inputDir, "test.txt");
    params.set(PFPGrowth.INPUT, input.getAbsolutePath());
    params.set(PFPGrowth.OUTPUT, outputDir.getAbsolutePath());
    Writer writer = Files.newWriter(input, Charsets.UTF_8);
    try {
      StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
        "retail.dat").openStream()), "\\s+");
      Collection<List<String>> transactions = Lists.newArrayList();
      
      while (it.hasNext()) {
        Pair<List<String>,Long> next = it.next();

View Full Code Here

  /**
   * Test Parallel FPGrowth on retail data using top-level runPFPGrowth() method
   */ 
  @Test
  public void testRetailDataMinSup100() throws Exception {
    StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
      "retail_results_with_min_sup_100.dat").openStream()), "\\s+");
    Map<Set<String>,Long> expectedResults = Maps.newHashMap();
    while (it.hasNext()) {
      Pair<List<String>,Long> next = it.next();
      List<String> items = Lists.newArrayList(next.getFirst());

View Full Code Here

  /**
   * Test Parallel FPG on retail data, running various stages individually
   */ 
  @Test
  public void testRetailDataMinSup100InSteps() throws Exception {
    StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
      "retail_results_with_min_sup_100.dat").openStream()), "\\s+");    
    Map<Set<String>,Long> expectedResults = Maps.newHashMap();
    while (it.hasNext()) {
      Pair<List<String>,Long> next = it.next();
      List<String> items = Lists.newArrayList(next.getFirst());

View Full Code Here

    params.set(PFPGrowth.INPUT, input.getAbsolutePath());
    params.set(PFPGrowth.OUTPUT, outputDir.getAbsolutePath());


    Writer writer = Files.newWriter(input, Charsets.UTF_8);
    try {
      StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
        "retail.dat").openStream()), "\\s+");
      Collection<List<String>> transactions = Lists.newArrayList();
      
      while (it.hasNext()) {
        Pair<List<String>,Long> next = it.next();

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.mahout.common.iterator.FileLineIterable

mia.recommender.ch05.GenderRescorer

org.apache.mahout.cf.taste.example.bookcrossing.BookCrossingDataModel

org.apache.mahout.cf.taste.example.grouplens.BooleanDataAndDateDataModel

org.apache.mahout.cf.taste.example.grouplens.GroupLensDataModel

org.apache.mahout.cf.taste.example.kddcup.track2.TrackItemSimilarity

org.apache.mahout.cf.taste.example.netflix.NetflixDataModel

org.apache.mahout.cf.taste.example.netflix.NetflixFileDataModel

org.apache.mahout.cf.taste.example.netflix.TransposeToByUser

org.apache.mahout.cf.taste.hadoop.example.als.netflix.NetflixDatasetConverter

org.apache.mahout.cf.taste.hadoop.item.AggregateAndRecommendReducer

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.