Package net.myrrix.online.eval

Source Code of net.myrrix.online.eval.ReconstructionEvaluator

/*
* Copyright Myrrix Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*      http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package net.myrrix.online.eval;

import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import java.util.Map;

import com.google.common.base.CharMatcher;
import com.google.common.base.Preconditions;
import com.google.common.base.Splitter;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.Multimap;
import com.google.common.io.Files;
import com.google.common.io.PatternFilenameFilter;
import org.apache.commons.math3.stat.descriptive.moment.Mean;
import org.apache.commons.math3.util.FastMath;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.recommender.GenericRecommendedItem;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import net.myrrix.common.LangUtils;
import net.myrrix.common.iterator.FileLineIterable;
import net.myrrix.common.math.SimpleVectorMath;
import net.myrrix.common.collection.FastByIDMap;
import net.myrrix.common.io.IOUtils;
import net.myrrix.online.ServerRecommender;
import net.myrrix.online.generation.Generation;

/**
* <p>A different kind of evaluator that tests not so much the quality of the recommendations versus input,
* but the quality of the reconstruction of the input by factored matrices. Perfect reconstruction is not
* possible in a low-dimension space (or even desirable). The reconstruction over the non-zero entries of
* P (input reduced to 0/1) however should be fairly small and this provides the means to check that.</p>
*
* <p>The output is the average difference between the reconstruction of a value for an existing user-item
* pair. Negative differences (where > 1 was predicted) are counted as 0.</p>
*
* <p>This class can be run as a Java program; the single argument is a directory containing test data.
* The {@link EvaluationResult} is printed to standard out.</p>
*
* @author Sean Owen
* @since 1.0
*/
public final class ReconstructionEvaluator {

  private static final Logger log = LoggerFactory.getLogger(ReconstructionEvaluator.class);

  private static final Splitter COMMA_TAB_SPLIT = Splitter.on(CharMatcher.anyOf(",\t")).omitEmptyStrings();

  public EvaluationResult evaluate(File originalDataDir) throws TasteException, IOException, InterruptedException {

    Preconditions.checkArgument(originalDataDir.exists() && originalDataDir.isDirectory(),
                                "%s is not a directory", originalDataDir);
    File tempDir = Files.createTempDir();

    ServerRecommender recommender = null;
    try {

      Multimap<Long,RecommendedItem> data;
      try {
        data = readAndCopyDataFiles(originalDataDir, tempDir);
      } catch (IOException ioe) {
        throw new TasteException(ioe);
      }

      recommender = new ServerRecommender(tempDir);
      recommender.await();

      Generation generation = recommender.getGenerationManager().getCurrentGeneration();
      FastByIDMap<float[]> X = generation.getX();
      FastByIDMap<float[]> Y = generation.getY();

      Mean averageError = new Mean();
      // Only compute average over existing entries...
      for (Map.Entry<Long,RecommendedItem> entry : data.entries()) {
        long userID = entry.getKey();
        long itemID = entry.getValue().getItemID();
        // Each of which was a "1" in the factor P matrix
        double value = SimpleVectorMath.dot(X.get(userID), Y.get(itemID));
        // So store abs(1-value), except, don't penalize for reconstructing > 1. Error is 0 in this case.
        averageError.increment(FastMath.max(0.0, 1.0 - value));
      }

      return new EvaluationResultImpl(averageError.getResult());
    } finally {
      recommender.close();
      IOUtils.deleteRecursively(tempDir);
    }
  }

  private static Multimap<Long,RecommendedItem> readAndCopyDataFiles(File dataDir, File tempDir) throws IOException {
    Multimap<Long,RecommendedItem> data = ArrayListMultimap.create();
    for (File dataFile : dataDir.listFiles(new PatternFilenameFilter(".+\\.csv(\\.(zip|gz))?"))) {
      log.info("Reading {}", dataFile);
      int count = 0;
      for (CharSequence line : new FileLineIterable(dataFile)) {
        Iterator<String> parts = COMMA_TAB_SPLIT.split(line).iterator();
        long userID = Long.parseLong(parts.next());
        long itemID = Long.parseLong(parts.next());
        if (parts.hasNext()) {
          String token = parts.next().trim();
          if (!token.isEmpty()) {
            data.put(userID, new GenericRecommendedItem(itemID, LangUtils.parseFloat(token)));
          }
          // Ignore remove lines
        } else {
          data.put(userID, new GenericRecommendedItem(itemID, 1.0f));
        }
        if (++count % 1000000 == 0) {
          log.info("Finished {} lines", count);
        }
      }

      Files.copy(dataFile, new File(tempDir, dataFile.getName()));
    }
    return data;
  }

  public static void main(String[] args) throws Exception {
    EvaluationResult result = new ReconstructionEvaluator().evaluate(new File(args[0]));
    log.info(String.valueOf(result));
  }

}
TOP

Related Classes of net.myrrix.online.eval.ReconstructionEvaluator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.