Package org.apache.mahout.common.iterator

Examples of org.apache.mahout.common.iterator.FileLineIterable


    File outputDir2 = getTestTempDir("frequentpatterns2");
    paramsImpl2.set(PFPGrowth.OUTPUT, outputDir2.getAbsolutePath());

    Writer writer = Files.newWriter(input, Charsets.UTF_8);
    try {
      StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
        "retail.dat").openStream()), "\\s+");
      Collection<List<String>> transactions = Lists.newArrayList();
     
      while (it.hasNext()) {
        Pair<List<String>,Long> next = it.next();
View Full Code Here


  @Test
  public void testSpecificCaseFromRetailDataMinSup500() throws IOException {
    FPGrowthObj<String> fp = new FPGrowthObj<String>();
   
    StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
      "retail.dat").openStream()), "\\s+");
    int pattern_41_36_39 = 0;
    while (it.hasNext()) {
      Pair<List<String>,Long> next = it.next();
      List<String> items = next.getFirst();
      if (items.contains("41") && items.contains("36") && items.contains("39")) {
        pattern_41_36_39++;
      }
    }
   
    final Map<Set<String>,Long> results = Maps.newHashMap();
   
    Set<String> returnableFeatures = Sets.newHashSet();
    returnableFeatures.add("41");
    returnableFeatures.add("36");
    returnableFeatures.add("39");
   
    fp.generateTopKFrequentPatterns(
      new StringRecordIterator(new FileLineIterable(Resources.getResource("retail.dat").openStream()), "\\s+"),

      fp.generateFList(new StringRecordIterator(new FileLineIterable(Resources.getResource("retail.dat")
          .openStream()), "\\s+"), 500), 500, 1000, returnableFeatures,
      new OutputCollector<String,List<Pair<List<String>,Long>>>() {
       
        @Override
        public void collect(String key, List<Pair<List<String>,Long>> value) {
View Full Code Here

  @Test
  public void testSpecificCaseFromRetailDataMinSup500() throws IOException {
    FPGrowth<String> fp = new FPGrowth<String>();
   
    StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
      "retail.dat").openStream()), "\\s+");
    int pattern_41_36_39 = 0;
    while (it.hasNext()) {
      Pair<List<String>,Long> next = it.next();
      List<String> items = next.getFirst();
      if (items.contains("41") && items.contains("36") && items.contains("39")) {
        pattern_41_36_39++;
      }
    }
   
    final Map<Set<String>,Long> results = Maps.newHashMap();
   
    Set<String> returnableFeatures = Sets.newHashSet();
    returnableFeatures.add("41");
    returnableFeatures.add("36");
    returnableFeatures.add("39");
   
    fp.generateTopKFrequentPatterns(
      new StringRecordIterator(new FileLineIterable(Resources.getResource("retail.dat").openStream()), "\\s+"),

      fp.generateFList(new StringRecordIterator(new FileLineIterable(Resources.getResource("retail.dat")
          .openStream()), "\\s+"), 500), 500, 1000, returnableFeatures,
      new OutputCollector<String,List<Pair<List<String>,Long>>>() {
       
        @Override
        public void collect(String key, List<Pair<List<String>,Long>> value) {
View Full Code Here

    File input = new File(inputDir, "test.txt");
    params.set(PFPGrowth.INPUT, input.getAbsolutePath());
    params.set(PFPGrowth.OUTPUT, outputDir.getAbsolutePath());
    Writer writer = Files.newWriter(input, Charsets.UTF_8);
    try {
      StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
        "retail.dat").openStream()), "\\s+");
      Collection<List<String>> transactions = Lists.newArrayList();
     
      while (it.hasNext()) {
        Pair<List<String>,Long> next = it.next();
View Full Code Here

  /**
   * Test Parallel FPGrowth on retail data using top-level runPFPGrowth() method
   */
  @Test
  public void testRetailDataMinSup100() throws Exception {
    StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
      "retail_results_with_min_sup_100.dat").openStream()), "\\s+");
    Map<Set<String>,Long> expectedResults = Maps.newHashMap();
    while (it.hasNext()) {
      Pair<List<String>,Long> next = it.next();
      List<String> items = Lists.newArrayList(next.getFirst());
View Full Code Here

  /**
   * Test Parallel FPG on retail data, running various stages individually
   */
  @Test
  public void testRetailDataMinSup100InSteps() throws Exception {
    StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
      "retail_results_with_min_sup_100.dat").openStream()), "\\s+");   
    Map<Set<String>,Long> expectedResults = Maps.newHashMap();
    while (it.hasNext()) {
      Pair<List<String>,Long> next = it.next();
      List<String> items = Lists.newArrayList(next.getFirst());
View Full Code Here

    indexItemIDMap = TasteHadoopUtils.readItemIDIndexMap(conf.get(ITEMID_INDEX_PATH), conf);

    String itemFilePathString = conf.get(ITEMS_FILE);
    if (itemFilePathString != null) {
      itemsToRecommendFor = new FastIDSet();
      for (String line : new FileLineIterable(HadoopUtil.openStream(new Path(itemFilePathString), conf))) {
        try {
          itemsToRecommendFor.add(Long.parseLong(line));
        } catch (NumberFormatException nfe) {
          log.warn("itemsFile line ignored: {}", line);
        }
View Full Code Here

    }

    log.info("Reading probes...");
    List<Preference> probes = Lists.newArrayListWithExpectedSize(2817131);
    long currentMovieID = -1;
    for (String line : new FileLineIterable(new File(qualifyingTxt))) {
      if (line.contains(MOVIE_DENOTER)) {
        currentMovieID = Long.parseLong(line.replaceAll(MOVIE_DENOTER, ""));
      } else {
        long userID = Long.parseLong(SEPARATOR.split(line)[0]);
        probes.add(new GenericPreference(userID, currentMovieID, 0));
      }
    }
    log.info("{} probes read...", probes.size());

    log.info("Reading ratings, creating probe set at {}/probeSet/ratings.tsv ...", outputPath);
    writer = null;
    try {
      FSDataOutputStream outputStream = fs.create(new Path(outputPath, "probeSet/ratings.tsv"));
      writer = new BufferedWriter(new OutputStreamWriter(outputStream, Charsets.UTF_8));

      int ratingsProcessed = 0;
      for (String line : new FileLineIterable(new File(judgingTxt))) {
        if (line.contains(MOVIE_DENOTER)) {
          currentMovieID = Long.parseLong(line.replaceAll(MOVIE_DENOTER, ""));
        } else {
          float rating = Float.parseFloat(SEPARATOR.split(line)[0]);
          Preference pref = probes.get(ratingsProcessed);
View Full Code Here

        Path unqualifiedUsersFilePath = new Path(usersFilePathString);
        FileSystem fs = FileSystem.get(unqualifiedUsersFilePath.toUri(), jobConf);
        usersToRecommendFor = new FastIDSet();
        Path usersFilePath = unqualifiedUsersFilePath.makeQualified(fs);
        in = fs.open(usersFilePath);
        for (String line : new FileLineIterable(in)) {
          try {
            usersToRecommendFor.add(Long.parseLong(line));
          } catch (NumberFormatException nfe) {
            log.warn("usersFile line ignored: {}", line);
          }
View Full Code Here

        = new org.apache.mahout.fpm.pfpgrowth.fpgrowth2.FPGrowthObj<String>();
      Collection<String> features = new HashSet<String>();

      try {
        fp.generateTopKFrequentPatterns(
                new StringRecordIterator(new FileLineIterable(new File(input), encoding, false), pattern),
                fp.generateFList(
                        new StringRecordIterator(new FileLineIterable(new File(input), encoding, false), pattern),
                        minSupport),
                minSupport,
                maxHeapSize,
                features,
                new StringOutputConverter(new SequenceFileOutputCollector<Text, TopKStringPatterns>(writer)),
                new ContextStatusUpdater(null));
      } finally {
        Closeables.closeQuietly(writer);
      }
    } else {
      FPGrowth<String> fp = new FPGrowth<String>();
      Collection<String> features = new HashSet<String>();
      try {
        fp.generateTopKFrequentPatterns(
                new StringRecordIterator(new FileLineIterable(new File(input), encoding, false), pattern),
                fp.generateFList(
                        new StringRecordIterator(new FileLineIterable(new File(input), encoding, false), pattern),
                        minSupport),
                minSupport,
                maxHeapSize,
                features,
                new StringOutputConverter(new SequenceFileOutputCollector<Text, TopKStringPatterns>(writer)),
View Full Code Here

TOP

Related Classes of org.apache.mahout.common.iterator.FileLineIterable

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.