Package org.apache.mahout.common.iterator

Examples of org.apache.mahout.common.iterator.FileLineIterable


        Path unqualifiedItemsFilePath = new Path(itemFilePathString);
        FileSystem fs = FileSystem.get(unqualifiedItemsFilePath.toUri(), jobConf);
        itemsToRecommendFor = new FastIDSet();
        Path itemsFilePath = unqualifiedItemsFilePath.makeQualified(fs);
        in = fs.open(itemsFilePath);
        for (String line : new FileLineIterable(in)) {
          itemsToRecommendFor.add(Long.parseLong(line));
        }
      }
    } finally {
      IOUtils.closeStream(in);
View Full Code Here


    }
  }

  private FastByIDMap<String> buildMapping() throws IOException {
    FastByIDMap<String> mapping = new FastByIDMap<String>();
    for (String line : new FileLineIterable(dataFile)) {
      mapping.put(toLongID(line), line);
    }
    lastModified = dataFile.lastModified();
    return mapping;
  }
View Full Code Here

    File input = new File(inputDir, "test.txt");
    params.set(PFPGrowth.INPUT, input.getAbsolutePath());
    params.set(PFPGrowth.OUTPUT, outputDir.getAbsolutePath());
    Writer writer = Files.newWriter(input, Charsets.UTF_8);
    try {
      StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
        "retail.dat").openStream()), "\\s+");
      Collection<List<String>> transactions = new ArrayList<List<String>>();
     
      while (it.hasNext()) {
        Pair<List<String>,Long> next = it.next();
View Full Code Here

    }
  }
 
  @Test
  public void testRetailDataMinSup100() throws Exception {
    StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
      "retail_results_with_min_sup_100.dat").openStream()), "\\s+");
   
    Map<Set<String>,Long> expectedResults = new HashMap<Set<String>,Long>();
    while (it.hasNext()) {
      Pair<List<String>,Long> next = it.next();
View Full Code Here

   }


  static Map<Long,List<RecommendedItem>> readRecommendations(File file) throws IOException {
    Map<Long,List<RecommendedItem>> recommendations = new HashMap<Long,List<RecommendedItem>>();
    Iterable<String> lineIterable = new FileLineIterable(file);
    for (String line : lineIterable) {

      String[] keyValue = line.split("\t");
      long userID = Long.parseLong(keyValue[0]);
      String[] tokens = keyValue[1].replaceAll("\\[", "")
View Full Code Here

    FastByIDMap<Collection<Preference>> userIDPrefMap = new FastByIDMap<Collection<Preference>>();
   
    int counter = 0;
    FilenameFilter filenameFilter = new MovieFilenameFilter();
    for (File movieFile : new File(dataDirectory, "training_set").listFiles(filenameFilter)) {
      Iterator<String> lineIterator = new FileLineIterable(movieFile, false).iterator();
      String line = lineIterator.next();
      long movieID = Long.parseLong(line.substring(0, line.length() - 1)); // strip colon
      while (lineIterator.hasNext()) {
        line = lineIterator.next();
        if (++counter % 100000 == 0) {
View Full Code Here

      resultFile.delete();
    }
    PrintWriter writer = null;
    try {
      writer = new PrintWriter(new OutputStreamWriter(new FileOutputStream(resultFile), Charsets.UTF_8));
      for (String line : new FileLineIterable(originalFile, false)) {
        int lastDelimiterStart = line.lastIndexOf(COLON_DELIMTER);
        if (lastDelimiterStart < 0) {
          throw new IOException("Unexpected input format on line: " + line);
        }
        String subLine = line.substring(0, lastDelimiterStart);
View Full Code Here

  private final FastByIDMap<TrackData> trackData;

  TrackItemSimilarity(File dataFileDirectory) throws IOException {
    trackData = new FastByIDMap<TrackData>();
    for (String line : new FileLineIterable(KDDCupDataModel.getTrackFile(dataFileDirectory))) {
      TrackData trackDatum = new TrackData(line);
      trackData.put(trackDatum.getTrackID(), trackDatum);
    }
  }
View Full Code Here

     */
    HadoopUtil.delete(conf, outPath);
   
    Set<String> categories = new HashSet<String>();
    if (catFile.length() > 0) {
      for (String line : new FileLineIterable(new File(catFile))) {
        categories.add(line.trim().toLowerCase(Locale.ENGLISH));
      }
    }
   
    DefaultStringifier<Set<String>> setStringifier =
View Full Code Here

   
    FPGrowth<String> fp = new FPGrowth<String>();
    Collection<String> features = new HashSet<String>();
   
    fp.generateTopKFrequentPatterns(
        new StringRecordIterator(new FileLineIterable(new File(input), encoding, false), pattern),
        fp.generateFList(
            new StringRecordIterator(new FileLineIterable(new File(input), encoding, false), pattern),
            minSupport),
        minSupport,
        maxHeapSize,
        features,
        new StringOutputConverter(new SequenceFileOutputCollector<Text,TopKStringPatterns>(writer)),
View Full Code Here

TOP

Related Classes of org.apache.mahout.common.iterator.FileLineIterable

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.