Package org.apache.mahout.cf.taste.impl.recommender

Source Code of org.apache.mahout.cf.taste.impl.recommender.SamplingCandidateItemsStrategy

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.mahout.cf.taste.impl.recommender;

import com.google.common.base.Preconditions;

import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.common.FastIDSet;
import org.apache.mahout.cf.taste.impl.common.LongPrimitiveArrayIterator;
import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
import org.apache.mahout.cf.taste.impl.common.SamplingLongPrimitiveIterator;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.model.Preference;
import org.apache.mahout.cf.taste.model.PreferenceArray;
import org.apache.mahout.common.iterator.FixedSizeSamplingIterator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Iterator;

/**
* <p>Returns all items that have not been rated by the user <em>(3)</em> and that were preferred by another user
* <em>(2)</em> that has preferred at least one item <em>(1)</em> that the current user has preferred too.</p>
*
* <p>This strategy uses sampling to limit the number of items that are considered, by sampling three different
* things, noted above:</p>
*
* <ol>
*   <li>The items that the user has preferred</li>
*   <li>The users who also prefer each of those items</li>
*   <li>The items those users also prefer</li>
* </ol>
*
* <p>There is a maximum associated with each of these three things; if the number of items or users exceeds
* that max, it is sampled so that the expected number of items or users actually used in that part of the
* computation is equal to the max.</p>
*
* <p>Three arguments control these three maxima. Each is a "factor" f, which establishes the max at
* f * log2(n), where n is the number of users or items in the data. For example if factor #2 is 5,
* which controls the number of users sampled per item, then 5 * log2(# users) is the maximum for this
* part of the computation.</p>
*
* <p>Each can be set to not do any limiting with value {@link #NO_LIMIT_FACTOR}.</p>
*/
public class SamplingCandidateItemsStrategy extends AbstractCandidateItemsStrategy {

  private static final Logger log = LoggerFactory.getLogger(SamplingCandidateItemsStrategy.class);

  /**
   * Default factor used if not otherwise specified, for all limits. (30).
   */
  public static final int DEFAULT_FACTOR = 30;
  /**
   * Specify this value as a factor to mean no limit.
   */
  public static final int NO_LIMIT_FACTOR = Integer.MAX_VALUE;
  private static final int MAX_LIMIT = Integer.MAX_VALUE;
  private static final double LOG2 = Math.log(2.0);

  private final int maxItems;
  private final int maxUsersPerItem;
  private final int maxItemsPerUser;

  /**
   * Defaults to using no limit ({@link #NO_LIMIT_FACTOR}) for all factors, except
   * {@code candidatesPerUserFactor} which defaults to {@link #DEFAULT_FACTOR}.
   *
   * @see #SamplingCandidateItemsStrategy(int, int, int, int, int)
   */
  public SamplingCandidateItemsStrategy(int numUsers, int numItems) {
    this(DEFAULT_FACTOR, DEFAULT_FACTOR, DEFAULT_FACTOR, numUsers, numItems);
  }

  /**
   * @param itemsFactor factor controlling max items considered for a user
   * @param usersPerItemFactor factor controlling max users considered for each of those items
   * @param candidatesPerUserFactor factor controlling max candidate items considered from each of those users
   * @param numUsers number of users currently in the data
   * @param numItems number of items in the data
   */
  public SamplingCandidateItemsStrategy(int itemsFactor,
                                        int usersPerItemFactor,
                                        int candidatesPerUserFactor,
                                        int numUsers,
                                        int numItems) {
    Preconditions.checkArgument(itemsFactor > 0);
    Preconditions.checkArgument(usersPerItemFactor > 0);
    Preconditions.checkArgument(candidatesPerUserFactor > 0);
    Preconditions.checkArgument(numUsers > 0);
    Preconditions.checkArgument(numItems > 0);
    maxItems = computeMaxFrom(itemsFactor, numItems);
    maxUsersPerItem = computeMaxFrom(usersPerItemFactor, numUsers);
    maxItemsPerUser = computeMaxFrom(candidatesPerUserFactor, numItems);
    log.debug("maxItems {}, maxUsersPerItem {}, maxItemsPerUser {}", maxItems, maxUsersPerItem, maxItemsPerUser);
  }

  private static int computeMaxFrom(int factor, int numThings) {
    if (factor == NO_LIMIT_FACTOR) {
      return MAX_LIMIT;
    }
    long max = (long) (factor * (1.0 + Math.log(numThings) / LOG2));
    return max > MAX_LIMIT ? MAX_LIMIT : (int) max;
  }

  @Override
  protected FastIDSet doGetCandidateItems(long[] preferredItemIDs, DataModel dataModel) throws TasteException {
    LongPrimitiveIterator preferredItemIDsIterator = new LongPrimitiveArrayIterator(preferredItemIDs);
    if (preferredItemIDs.length > maxItems) {
      double samplingRate = (double) maxItems / preferredItemIDs.length;
//      log.info("preferredItemIDs.length {}, samplingRate {}", preferredItemIDs.length, samplingRate);
      preferredItemIDsIterator =
          new SamplingLongPrimitiveIterator(preferredItemIDsIterator, samplingRate);
    }
    FastIDSet possibleItemsIDs = new FastIDSet();
    while (preferredItemIDsIterator.hasNext()) {
      long itemID = preferredItemIDsIterator.nextLong();
      PreferenceArray prefs = dataModel.getPreferencesForItem(itemID);
      int prefsLength = prefs.length();
      if (prefsLength > maxUsersPerItem) {
        Iterator<Preference> sampledPrefs =
            new FixedSizeSamplingIterator<Preference>(maxUsersPerItem, prefs.iterator());
        while (sampledPrefs.hasNext()) {
          addSomeOf(possibleItemsIDs, dataModel.getItemIDsFromUser(sampledPrefs.next().getUserID()));
        }
      } else {
        for (int i = 0; i < prefsLength; i++) {
          addSomeOf(possibleItemsIDs, dataModel.getItemIDsFromUser(prefs.getUserID(i)));
        }
      }
    }
    possibleItemsIDs.removeAll(preferredItemIDs);
    return possibleItemsIDs;
  }

  private void addSomeOf(FastIDSet possibleItemIDs, FastIDSet itemIDs) {
    if (itemIDs.size() > maxItemsPerUser) {
      LongPrimitiveIterator it =
          new SamplingLongPrimitiveIterator(itemIDs.iterator(), (double) maxItemsPerUser / itemIDs.size());
      while (it.hasNext()) {
        possibleItemIDs.add(it.nextLong());
      }
    } else {
      possibleItemIDs.addAll(itemIDs);
    }
  }

}
TOP

Related Classes of org.apache.mahout.cf.taste.impl.recommender.SamplingCandidateItemsStrategy

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.