Package org.apache.mahout.common

Examples of org.apache.mahout.common.FileLineIterable


   
    DataSet dataset = FileInfoParser.parseFile(fs, inpath);
    DataSet.initialize(dataset);

    DataLine dl = new DataLine();
    for (String line : new FileLineIterable(new File(Resources.getResource("wdbc/wdbc.data").getPath()))) {
      dl.set(line);
      for (int index = 0; index < dataset.getNbAttributes(); index++) {
        if (dataset.isNumerical(index)) {
          CDMutationTest.assertInRange(dl.getAttribute(index), dataset.getMin(index), dataset
              .getMax(index));
View Full Code Here


        current.listFiles(new PrefixAdditionFilter(prefix + File.separator + current.getName(), writer,
            charset));
      } else {
        try {
          StringBuilder file = new StringBuilder();
          for (String aFit : new FileLineIterable(current, charset, false)) {
            file.append(aFit).append('\n');
          }
          writer.write(prefix + File.separator + current.getName(), file.toString());
         
        } catch (FileNotFoundException e) {
View Full Code Here

      itemIDPadded.insert(0, '0');
    }
    List<Preference> prefs = new ArrayList<Preference>();
    File movieFile = new File(new File(dataDirectory, "training_set"), "mv_00" + itemIDPadded + ".txt");
    try {
      for (String line : new FileLineIterable(movieFile, true)) {
        int firstComma = line.indexOf(',');
        Integer userID = Integer.valueOf(line.substring(0, firstComma));
        int secondComma = line.indexOf(',', firstComma + 1);
        float rating = Float.parseFloat(line.substring(firstComma + 1, secondComma));
        prefs.add(new GenericPreference(userID, itemID, rating));
View Full Code Here

   
    Map<String, List<String>> byUserEntryCache = new FastMap<String, List<String>>(100000);
   
    for (File byItemFile : byItemDirectory.listFiles()) {
      log.info("Processing {}", byItemFile);
      Iterator<String> lineIterator = new FileLineIterable(byItemFile, false).iterator();
      String line = lineIterator.next();
      String movieIDString = line.substring(0, line.length() - 1);
      while (lineIterator.hasNext()) {
        line = lineIterator.next();
        int firstComma = line.indexOf(',');
View Full Code Here

      resultFile.delete();
    }
    PrintWriter writer = null;
    try {
      writer = new PrintWriter(new OutputStreamWriter(new FileOutputStream(resultFile), Charset.forName("UTF-8")));
      for (String line : new FileLineIterable(originalFile, false)) {
        int lastDelimiterStart = line.lastIndexOf(COLON_DELIMTER);
        if (lastDelimiterStart < 0) {
          throw new IOException("Unexpected input format on line: " + line);
        }
        String subLine = line.substring(0, lastDelimiterStart);
View Full Code Here

    FastByIDMap<Collection<Preference>> userIDPrefMap = new FastByIDMap<Collection<Preference>>();
   
    int counter = 0;
    FilenameFilter filenameFilter = new MovieFilenameFilter();
    for (File movieFile : new File(dataDirectory, "training_set").listFiles(filenameFilter)) {
      Iterator<String> lineIterator = new FileLineIterable(movieFile, false).iterator();
      String line = lineIterator.next();
      long movieID = Long.parseLong(line.substring(0, line.length() - 1)); // strip colon
      while (lineIterator.hasNext()) {
        line = lineIterator.next();
        if (++counter % 100000 == 0) {
View Full Code Here

    }
  }

  private FastByIDMap<String> buildMapping() throws IOException {
    FastByIDMap<String> mapping = new FastByIDMap<String>();
    for (String line : new FileLineIterable(dataFile)) {
      mapping.put(toLongID(line), line);
    }
    lastModified = dataFile.lastModified();
    return mapping;
  }
View Full Code Here

          log.info("Testing: {}", file);
        }
        TimingStatistics operationStats = new TimingStatistics();
       
        long lineNum = 0;
        for (String line : new FileLineIterable(new File(file.getPath()), Charset.forName(params
            .get("encoding")), false)) {
         
          Map<String,List<String>> document = new NGrams(line, Integer.parseInt(params.get("gramSize")))
              .generateNGrams();
          for (Map.Entry<String,List<String>> stringListEntry : document.entrySet()) {
View Full Code Here

     */
    HadoopUtil.overwriteOutput(outPath);   
   
    Set<String> categories = new HashSet<String>();
    if (catFile.length() > 0) {
      for (String line : new FileLineIterable(new File(catFile))) {
        categories.add(line.trim().toLowerCase(Locale.ENGLISH));
      }
    }
   
    DefaultStringifier<Set<String>> setStringifier =
View Full Code Here

             + "org.apache.hadoop.io.serializer.WritableSerialization");
    // Dont ever forget this. People should keep track of how hadoop conf
    // parameters can make or break a piece of code
   
    Set<String> categories = new HashSet<String>();
    for (String line : new FileLineIterable(new File(catFile))) {
      categories.add(line.trim().toLowerCase(Locale.ENGLISH));
    }
   
    DefaultStringifier<Set<String>> setStringifier =
        new DefaultStringifier<Set<String>>(conf, GenericsUtil.getClass(categories));
View Full Code Here

TOP

Related Classes of org.apache.mahout.common.FileLineIterable

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.