Package net.myrrix.common.iterator

Examples of net.myrrix.common.iterator.FileLineIterable


    log.info("Reading IDs...");   
    FastIDSet userIDsSet = new FastIDSet();
    FastIDSet itemIDsSet = new FastIDSet();
    Splitter comma = Splitter.on(',');
    for (File f : dataDirectory.listFiles(new PatternFilenameFilter(".+\\.csv(\\.(zip|gz))?"))) {
      for (CharSequence line : new FileLineIterable(f)) {
        Iterator<String> it = comma.split(line).iterator();
        userIDsSet.add(Long.parseLong(it.next()));
        itemIDsSet.add(Long.parseLong(it.next()));
      }
    }
View Full Code Here


    int lines = 0;
    int badLines = 0;
    for (File inputFile : inputFiles) {
      log.info("Reading {}", inputFile);
      for (String line : new FileLineIterable(inputFile)) {
       
        if (badLines > 100) { // Crude check
          throw new IOException("Too many bad lines; aborting");
        }
       
View Full Code Here

  private static Multimap<Long,RecommendedItem> readAndCopyDataFiles(File dataDir, File tempDir) throws IOException {
    Multimap<Long,RecommendedItem> data = ArrayListMultimap.create();
    for (File dataFile : dataDir.listFiles(new PatternFilenameFilter(".+\\.csv(\\.(zip|gz))?"))) {
      log.info("Reading {}", dataFile);
      int count = 0;
      for (CharSequence line : new FileLineIterable(dataFile)) {
        Iterator<String> parts = COMMA_TAB_SPLIT.split(line).iterator();
        long userID = Long.parseLong(parts.next());
        long itemID = Long.parseLong(parts.next());
        if (parts.hasNext()) {
          String token = parts.next().trim();
View Full Code Here

  }

  @Override
  public void addItemIDs(File idFile) throws TasteException {
    try {
      addItemIDs(new FileLineIterable(idFile));
    } catch (IOException ioe) {
      throw new TasteException(ioe);
    }
  }
View Full Code Here

    Multimap<String,RecommendedItem> userTags = ArrayListMultimap.create();

    for (File dataFile : dataDir.listFiles(new PatternFilenameFilter(".+\\.csv(\\.(zip|gz))?"))) {
      log.info("Reading {}", dataFile);
      int count = 0;
      for (CharSequence line : new FileLineIterable(dataFile)) {
        Iterator<String> parts = COMMA_TAB_SPLIT.split(line).iterator();
        String userIDString = parts.next();
        if (userIDString.hashCode() % 1000000 <= perMillion) {
          String itemIDString = parts.next();
          if (itemIDString.hashCode() % 1000000 <= perMillion) {
View Full Code Here

TOP

Related Classes of net.myrrix.common.iterator.FileLineIterable

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.