Package org.apache.mahout.common.iterator

Examples of org.apache.mahout.common.iterator.StringRecordIterator


    File input = new File(inputDir, "test.txt");
    params.set(PFPGrowth.INPUT, input.getAbsolutePath());
    params.set(PFPGrowth.OUTPUT, outputDir.getAbsolutePath());
    Writer writer = Files.newWriter(input, Charsets.UTF_8);
    try {
      StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
        "retail.dat").openStream()), "\\s+");
      Collection<List<String>> transactions = Lists.newArrayList();
     
      while (it.hasNext()) {
        Pair<List<String>,Long> next = it.next();
        transactions.add(next.getFirst());
      }
     
      for (List<String> transaction : transactions) {
        String sep = "";
View Full Code Here


  /**
   * Test Parallel FPGrowth on retail data using top-level runPFPGrowth() method
   */
  @Test
  public void testRetailDataMinSup100() throws Exception {
    StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
      "retail_results_with_min_sup_100.dat").openStream()), "\\s+");
    Map<Set<String>,Long> expectedResults = Maps.newHashMap();
    while (it.hasNext()) {
      Pair<List<String>,Long> next = it.next();
      List<String> items = Lists.newArrayList(next.getFirst());
      String supportString = items.remove(items.size() - 1);
      Long support = Long.parseLong(supportString.substring(1, supportString.length() - 1));
      expectedResults.put(Sets.newHashSet(items), support);
    }
View Full Code Here

  /**
   * Test Parallel FPG on retail data, running various stages individually
   */
  @Test
  public void testRetailDataMinSup100InSteps() throws Exception {
    StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
      "retail_results_with_min_sup_100.dat").openStream()), "\\s+");   
    Map<Set<String>,Long> expectedResults = Maps.newHashMap();
    while (it.hasNext()) {
      Pair<List<String>,Long> next = it.next();
      List<String> items = Lists.newArrayList(next.getFirst());
      String supportString = items.remove(items.size() - 1);
      Long support = Long.parseLong(supportString.substring(1, supportString.length() - 1));
      expectedResults.put(Sets.newHashSet(items), support);
    }
View Full Code Here

    params.set(PFPGrowth.INPUT, input.getAbsolutePath());
    params.set(PFPGrowth.OUTPUT, outputDir.getAbsolutePath());

    Writer writer = Files.newWriter(input, Charsets.UTF_8);
    try {
      StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
        "retail.dat").openStream()), "\\s+");
      Collection<List<String>> transactions = Lists.newArrayList();
     
      while (it.hasNext()) {
        Pair<List<String>,Long> next = it.next();
        transactions.add(next.getFirst());
      }
     
      for (List<String> transaction : transactions) {
        String sep = "";
View Full Code Here

  /**
   * Test Parallel FPGrowth on retail data using top-level runPFPGrowth() method
   */
  @Test
  public void testRetailDataMinSup100() throws Exception {
    StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
      "retail_results_with_min_sup_100.dat").openStream()), "\\s+");
    Map<Set<String>,Long> expectedResults = Maps.newHashMap();
    while (it.hasNext()) {
      Pair<List<String>,Long> next = it.next();
      List<String> items = Lists.newArrayList(next.getFirst());
      String supportString = items.remove(items.size() - 1);
      Long support = Long.parseLong(supportString.substring(1, supportString.length() - 1));
      expectedResults.put(Sets.newHashSet(items), support);
    }
View Full Code Here

  /**
   * Test Parallel FPG on retail data, running various stages individually
   */
  @Test
  public void testRetailDataMinSup100InSteps() throws Exception {
    StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
      "retail_results_with_min_sup_100.dat").openStream()), "\\s+");  
    Map<Set<String>,Long> expectedResults = Maps.newHashMap();
    while (it.hasNext()) {
      Pair<List<String>,Long> next = it.next();
      List<String> items = Lists.newArrayList(next.getFirst());
      String supportString = items.remove(items.size() - 1);
      Long support = Long.parseLong(supportString.substring(1, supportString.length() - 1));
      expectedResults.put(Sets.newHashSet(items), support);
    }
View Full Code Here

    File input = new File(inputDir, "test.txt");
    params.set(PFPGrowth.INPUT, input.getAbsolutePath());
    params.set(PFPGrowth.OUTPUT, outputDir.getAbsolutePath());
    Writer writer = Files.newWriter(input, Charsets.UTF_8);
    try {
      StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
        "retail.dat").openStream()), "\\s+");
      Collection<List<String>> transactions = new ArrayList<List<String>>();
     
      while (it.hasNext()) {
        Pair<List<String>,Long> next = it.next();
        transactions.add(next.getFirst());
      }
     
      for (List<String> transaction : transactions) {
        String sep = "";
View Full Code Here

    }
  }
 
  @Test
  public void testRetailDataMinSup100() throws Exception {
    StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
      "retail_results_with_min_sup_100.dat").openStream()), "\\s+");
   
    Map<Set<String>,Long> expectedResults = new HashMap<Set<String>,Long>();
    while (it.hasNext()) {
      Pair<List<String>,Long> next = it.next();
      List<String> items = new ArrayList<String>(next.getFirst());
      String supportString = items.remove(items.size() - 1);
      Long support = Long.parseLong(supportString.substring(1, supportString.length() - 1));
      expectedResults.put(new HashSet<String>(items), support);
    }
View Full Code Here

   
    FPGrowth<String> fp = new FPGrowth<String>();
    Collection<String> features = new HashSet<String>();
   
    fp.generateTopKFrequentPatterns(
        new StringRecordIterator(new FileLineIterable(new File(input), encoding, false), pattern),
        fp.generateFList(
            new StringRecordIterator(new FileLineIterable(new File(input), encoding, false), pattern),
            minSupport),
        minSupport,
        maxHeapSize,
        features,
        new StringOutputConverter(new SequenceFileOutputCollector<Text,TopKStringPatterns>(writer)),
View Full Code Here

  @Test
  public void testSpecificCaseFromRetailDataMinSup500() throws IOException {
    FPGrowth<String> fp = new FPGrowth<String>();
   
    StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
      "retail.dat").openStream()), "\\s+");
    int pattern_41_36_39 = 0;
    while (it.hasNext()) {
      Pair<List<String>,Long> next = it.next();
      List<String> items = next.getFirst();
      if (items.contains("41") && items.contains("36") && items.contains("39")) {
        pattern_41_36_39++;
      }
    }
   
    final Map<Set<String>,Long> results = new HashMap<Set<String>,Long>();
   
    Set<String> returnableFeatures = new HashSet<String>();
    returnableFeatures.add("41");
    returnableFeatures.add("36");
    returnableFeatures.add("39");
   
    fp.generateTopKFrequentPatterns(
      new StringRecordIterator(new FileLineIterable(Resources.getResource("retail.dat").openStream()), "\\s+"),

      fp.generateFList(new StringRecordIterator(new FileLineIterable(Resources.getResource("retail.dat")
          .openStream()), "\\s+"), 500), 500, 1000, returnableFeatures,
      new OutputCollector<String,List<Pair<List<String>,Long>>>() {
       
        @Override
        public void collect(String key, List<Pair<List<String>,Long>> value) {
View Full Code Here

TOP

Related Classes of org.apache.mahout.common.iterator.StringRecordIterator

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.