Package org.apache.mahout.cf.taste.impl.common

Examples of org.apache.mahout.cf.taste.impl.common.FileLineIterable


  @Override
  protected void reload() {
    bookMap = new FastMap<String, Book>(5001);
    userDataMap = new FastMap<String, String[]>(5001);

    for (String line : new FileLineIterable(booksFile, true)) {
      String[] tokens = tokenizeLine(line, 5);
      if (tokens != null) {
        String id = tokens[0];
        bookMap.put(id, new Book(id, tokens[1], tokens[2], Integer.parseInt(tokens[3]), tokens[4]));
      }
    }
    for (String line : new FileLineIterable(usersFile, true)) {
      String[] tokens = tokenizeLine(line, 3);
      if (tokens != null) {
        String id = tokens[0];
        userDataMap.put(id, new String[] { tokens[1], tokens[2] });
      }
View Full Code Here


    File resultFile = new File(new File(System.getProperty("java.io.tmpdir")), "taste.bookcrossing.txt");
    if (!resultFile.exists()) {
      PrintWriter writer = null;
      try {
        writer = new PrintWriter(new OutputStreamWriter(new FileOutputStream(resultFile), Charset.forName("UTF-8")));
        for (String line : new FileLineIterable(originalFile, true)) {
          if (line.indexOf(',') >= 0) {
            // crude hack to work around corruptions in data file -- some bad lines with commas in them
            continue;
          }
          String convertedLine = line.replace(';', ',').replace("\"", "");
View Full Code Here

    return id + ":" + title;
  }

  static List<NetflixMovie> readMovies(File dataDirectory) {
    List<NetflixMovie> movies = new ArrayList<NetflixMovie>(17770);
    for (String line : new FileLineIterable(new File(dataDirectory, "movie_titles.txt"), false)) {
      int firstComma = line.indexOf((int) ',');
      int id = Integer.parseInt(line.substring(0, firstComma));
      int secondComma = line.indexOf((int) ',', firstComma + 1);
      String title = line.substring(secondComma + 1);
      movies.add(new NetflixMovie(id, title));
View Full Code Here

      itemIDPadded.insert(0, '0');
    }
    List<Preference> prefs = new ArrayList<Preference>();
    Item movie = getItem(itemID);
    File movieFile = new File(new File(dataDirectory, "training_set"), "mv_00" + itemIDPadded + ".txt");
    for (String line : new FileLineIterable(movieFile, true)) {
      int firstComma = line.indexOf((int) ',');
      Integer userID = Integer.valueOf(line.substring(0, firstComma));
      int secondComma = line.indexOf((int) ',', firstComma + 1);
      double rating = Double.parseDouble(line.substring(firstComma + 1, secondComma));
      prefs.add(new GenericPreference(getUser(userID), movie, rating));
View Full Code Here

    Map<String, List<String>> byUserEntryCache = new FastMap<String, List<String>>(100000);

    for (File byItemFile : byItemDirectory.listFiles()) {
      log.info("Processing {}", byItemFile);
      Iterator<String> lineIterator = new FileLineIterable(byItemFile, false).iterator();
      String line = lineIterator.next();
      String movieIDString = line.substring(0, line.length() - 1);
      while (lineIterator.hasNext()) {
        line = lineIterator.next();
        int firstComma = line.indexOf((int) ',');
View Full Code Here

      convertedMoviesFile = convertGLFile(moviesFile, false);
    } catch (IOException ioe) {
      throw new RuntimeException(ioe); // better way?
    }
    movieMap = new FastMap<String, Movie>(5001);
    for (String line : new FileLineIterable(convertedMoviesFile, false)) {
      String[] tokens = line.split(",");
      String id = tokens[0];
      movieMap.put(id, new Movie(id, tokens[1], tokens[2]));
    }
    super.reload();
View Full Code Here

                                        (ratings ? "ratings" : "movies") + ".txt");
    if (!resultFile.exists()) {
      PrintWriter writer = null;
      try {
        writer = new PrintWriter(new OutputStreamWriter(new FileOutputStream(resultFile), Charset.forName("UTF-8")));
        for (String line : new FileLineIterable(originalFile, false)) {
          String convertedLine;
          if (ratings) {
            // toss the last column of data, which is a timestamp we don't want
            convertedLine = line.substring(0, line.lastIndexOf("::")).replace("::", ",");
          } else {
View Full Code Here

    Map<Integer, List<Preference>> userIDPrefMap = new FastMap<Integer, List<Preference>>();

    int counter = 0;
    FilenameFilter filenameFilter = new MovieFilenameFilter();
    for (File movieFile : new File(dataDirectory, "training_set").listFiles(filenameFilter)) {
      Iterator<String> lineIterator = new FileLineIterable(movieFile, false).iterator();
      String line = lineIterator.next();
      int movieID = Integer.parseInt(line.substring(0, line.length() - 1)); // strip colon
      NetflixMovie movie = movies.get(movieID - 1);
      if (movie == null) {
        throw new IllegalArgumentException("No such movie: " + movieID);
View Full Code Here

  protected void processFile(File dataOrUpdateFile, Map<String, List<Preference>> data) {
    log.info("Reading file info...");
    Map<String, Item> itemCache = new FastMap<String, Item>(1001);
    AtomicInteger count = new AtomicInteger();
    for (String line : new FileLineIterable(dataOrUpdateFile, false)) {
      if (line.length() > 0) {
        log.debug("Read line: {}", line);
        if (delimiter == UNKNOWN_DELIMITER) {
          delimiter = determineDelimiter(line);
        }
View Full Code Here

    }
  }

  private void processFile(Map<String, FastSet<Object>> data) {
    log.info("Reading file info...");
    for (String line : new FileLineIterable(dataFile, false)) {
      if (line.length() > 0) {
        log.debug("Read line: {}", line);
        processLine(line, data);
      }
    }
View Full Code Here

TOP

Related Classes of org.apache.mahout.cf.taste.impl.common.FileLineIterable

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.