Examples of org.apache.mahout.common.iterator.StringRecordIterator

Package org.apache.mahout.common.iterator

Examples of org.apache.mahout.common.iterator.StringRecordIterator

org.apache.mahout.common.iterator.StringRecordIterator

    File input = new File(inputDir, "test.txt");
    params.set(PFPGrowth.INPUT, input.getAbsolutePath());
    params.set(PFPGrowth.OUTPUT, outputDir.getAbsolutePath());
    Writer writer = Files.newWriter(input, Charsets.UTF_8);
    try {
      StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
        "retail.dat").openStream()), "\\s+");
      Collection<List<String>> transactions = Lists.newArrayList();
      
      while (it.hasNext()) {
        Pair<List<String>,Long> next = it.next();
        transactions.add(next.getFirst());
      }
      
      for (List<String> transaction : transactions) {
        String sep = "";

View Full Code Here

  /**
   * Test Parallel FPGrowth on retail data using top-level runPFPGrowth() method
   */ 
  @Test
  public void testRetailDataMinSup100() throws Exception {
    StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
      "retail_results_with_min_sup_100.dat").openStream()), "\\s+");
    Map<Set<String>,Long> expectedResults = Maps.newHashMap();
    while (it.hasNext()) {
      Pair<List<String>,Long> next = it.next();
      List<String> items = Lists.newArrayList(next.getFirst());
      String supportString = items.remove(items.size() - 1);
      Long support = Long.parseLong(supportString.substring(1, supportString.length() - 1));
      expectedResults.put(Sets.newHashSet(items), support);
    }

View Full Code Here

  /**
   * Test Parallel FPG on retail data, running various stages individually
   */ 
  @Test
  public void testRetailDataMinSup100InSteps() throws Exception {
    StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
      "retail_results_with_min_sup_100.dat").openStream()), "\\s+");    
    Map<Set<String>,Long> expectedResults = Maps.newHashMap();
    while (it.hasNext()) {
      Pair<List<String>,Long> next = it.next();
      List<String> items = Lists.newArrayList(next.getFirst());
      String supportString = items.remove(items.size() - 1);
      Long support = Long.parseLong(supportString.substring(1, supportString.length() - 1));
      expectedResults.put(Sets.newHashSet(items), support);
    }

View Full Code Here

    params.set(PFPGrowth.INPUT, input.getAbsolutePath());
    params.set(PFPGrowth.OUTPUT, outputDir.getAbsolutePath());


    Writer writer = Files.newWriter(input, Charsets.UTF_8);
    try {
      StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
        "retail.dat").openStream()), "\\s+");
      Collection<List<String>> transactions = Lists.newArrayList();
      
      while (it.hasNext()) {
        Pair<List<String>,Long> next = it.next();
        transactions.add(next.getFirst());
      }
      
      for (List<String> transaction : transactions) {
        String sep = "";

View Full Code Here

  /**
   * Test Parallel FPGrowth on retail data using top-level runPFPGrowth() method
   */ 
  @Test
  public void testRetailDataMinSup100() throws Exception {
    StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
      "retail_results_with_min_sup_100.dat").openStream()), "\\s+");
    Map<Set<String>,Long> expectedResults = Maps.newHashMap();
    while (it.hasNext()) {
      Pair<List<String>,Long> next = it.next();
      List<String> items = Lists.newArrayList(next.getFirst());
      String supportString = items.remove(items.size() - 1);
      Long support = Long.parseLong(supportString.substring(1, supportString.length() - 1));
      expectedResults.put(Sets.newHashSet(items), support);
    }

View Full Code Here

  /**
   * Test Parallel FPG on retail data, running various stages individually
   */ 
  @Test
  public void testRetailDataMinSup100InSteps() throws Exception {
    StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
      "retail_results_with_min_sup_100.dat").openStream()), "\\s+");   
    Map<Set<String>,Long> expectedResults = Maps.newHashMap();
    while (it.hasNext()) {
      Pair<List<String>,Long> next = it.next();
      List<String> items = Lists.newArrayList(next.getFirst());
      String supportString = items.remove(items.size() - 1);
      Long support = Long.parseLong(supportString.substring(1, supportString.length() - 1));
      expectedResults.put(Sets.newHashSet(items), support);
    }

View Full Code Here

    File input = new File(inputDir, "test.txt");
    params.set(PFPGrowth.INPUT, input.getAbsolutePath());
    params.set(PFPGrowth.OUTPUT, outputDir.getAbsolutePath());
    Writer writer = Files.newWriter(input, Charsets.UTF_8);
    try {
      StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
        "retail.dat").openStream()), "\\s+");
      Collection<List<String>> transactions = new ArrayList<List<String>>();
      
      while (it.hasNext()) {
        Pair<List<String>,Long> next = it.next();
        transactions.add(next.getFirst());
      }
      
      for (List<String> transaction : transactions) {
        String sep = "";

View Full Code Here

    }
  }
  
  @Test
  public void testRetailDataMinSup100() throws Exception {
    StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
      "retail_results_with_min_sup_100.dat").openStream()), "\\s+");
    
    Map<Set<String>,Long> expectedResults = new HashMap<Set<String>,Long>();
    while (it.hasNext()) {
      Pair<List<String>,Long> next = it.next();
      List<String> items = new ArrayList<String>(next.getFirst());
      String supportString = items.remove(items.size() - 1);
      Long support = Long.parseLong(supportString.substring(1, supportString.length() - 1));
      expectedResults.put(new HashSet<String>(items), support);
    }

View Full Code Here

    
    FPGrowth<String> fp = new FPGrowth<String>();
    Collection<String> features = new HashSet<String>();
    
    fp.generateTopKFrequentPatterns(
        new StringRecordIterator(new FileLineIterable(new File(input), encoding, false), pattern),
        fp.generateFList(
            new StringRecordIterator(new FileLineIterable(new File(input), encoding, false), pattern),
            minSupport),
        minSupport,
        maxHeapSize,
        features,
        new StringOutputConverter(new SequenceFileOutputCollector<Text,TopKStringPatterns>(writer)),

View Full Code Here


  @Test
  public void testSpecificCaseFromRetailDataMinSup500() throws IOException {
    FPGrowth<String> fp = new FPGrowth<String>();
    
    StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
      "retail.dat").openStream()), "\\s+");
    int pattern_41_36_39 = 0;
    while (it.hasNext()) {
      Pair<List<String>,Long> next = it.next();
      List<String> items = next.getFirst();
      if (items.contains("41") && items.contains("36") && items.contains("39")) {
        pattern_41_36_39++;
      }
    }
    
    final Map<Set<String>,Long> results = new HashMap<Set<String>,Long>();
    
    Set<String> returnableFeatures = new HashSet<String>();
    returnableFeatures.add("41");
    returnableFeatures.add("36");
    returnableFeatures.add("39");
    
    fp.generateTopKFrequentPatterns(
      new StringRecordIterator(new FileLineIterable(Resources.getResource("retail.dat").openStream()), "\\s+"),


      fp.generateFList(new StringRecordIterator(new FileLineIterable(Resources.getResource("retail.dat")
          .openStream()), "\\s+"), 500), 500, 1000, returnableFeatures,
      new OutputCollector<String,List<Pair<List<String>,Long>>>() {
        
        @Override
        public void collect(String key, List<Pair<List<String>,Long>> value) {

View Full Code Here

0 1 2 3 4 5

TOP

Related Classes of org.apache.mahout.common.iterator.StringRecordIterator

org.apache.mahout.fpm.pfpgrowth.FPGrowthDriver

org.apache.mahout.fpm.pfpgrowth.FPGrowthRetailDataTest

org.apache.mahout.fpm.pfpgrowth.FPGrowthRetailDataTest2

org.apache.mahout.fpm.pfpgrowth.FPGrowthRetailDataTestVs

org.apache.mahout.fpm.pfpgrowth.FPGrowthSyntheticDataTest

org.apache.mahout.fpm.pfpgrowth.PFPGrowthRetailDataTest

org.apache.mahout.fpm.pfpgrowth.PFPGrowthRetailDataTest2

org.apache.mahout.fpm.pfpgrowth.PFPGrowthRetailDataTestVs

org.apache.mahout.fpm.pfpgrowth.PFPGrowthSynthDataTest2

org.apache.mahout.fpm.pfpgrowth2.PFPGrowthRetailDataTest2

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.