Package com.flaptor.indextank.index.scorer

Source Code of com.flaptor.indextank.index.scorer.DynamicDataManager

/*
* Copyright (c) 2011 LinkedIn, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/

package com.flaptor.indextank.index.scorer;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.Serializable;
import java.util.HashMap;
import java.util.Map;
import java.util.Scanner;
import java.util.Set;
import java.util.Map.Entry;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.locks.ReentrantReadWriteLock;

import org.apache.log4j.Logger;

import com.flaptor.indextank.index.DocId;
import com.flaptor.indextank.index.scorer.CategoryMaskManager.CategoryInfo;
import com.flaptor.indextank.index.scorer.CategoryMaskManager.CategoryValueInfo;
import com.flaptor.indextank.index.scorer.DynamicBoostsManager.DynamicBoosts;
import com.flaptor.util.Execute;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Maps;

@SuppressWarnings("deprecation")
public class DynamicDataManager implements BoostsManager {
  private static final Logger logger = Logger.getLogger(Execute.whoAmI());
  private static final String OLD_MAIN_FILE_NAME = "dynamicBoosts";
  private static final String MAIN_FILE_NAME = "dynamicData";

  private final int numberOfBoosts;
    private final ConcurrentMap<DocId, DynamicData> dynamicDataMap;
    private final File backupDir;
    private final DynamicData emptyData;
    private final CategoryMaskManager maskManager;
    private final ReentrantReadWriteLock dumpLock = new ReentrantReadWriteLock();

    /**
     * Build a {@link DynamicDataManager} with a backupDir. If the directory containts a
     * dynamic boosts file, it loads the data from it. Otherwise, creates a new boosts map 
     *
     * @param numberOfBoosts the number of boosting doubles that this Scorer will store.
     * @param backupDir the directory to which the data stored in this Scorer shall be
     */
    @SuppressWarnings("unchecked")
  public DynamicDataManager(int numberOfBoosts, File backupDir) {
    Preconditions.checkArgument(numberOfBoosts > 0);
      checkDirArgument(backupDir);
        this.numberOfBoosts = numberOfBoosts;
        this.backupDir = backupDir;
        this.emptyData = new DynamicData(numberOfBoosts);
        this.maskManager = new CategoryMaskManager(dumpLock);

        File oldFormatFile = new File(this.backupDir, OLD_MAIN_FILE_NAME);
        File newFormatFile = new File(this.backupDir, MAIN_FILE_NAME);
        if (!newFormatFile.exists() && oldFormatFile.exists()) {
            logger.info("Found old format file, loading it.");
            ObjectInputStream is = null;
            try {
                is = new ObjectInputStream(new BufferedInputStream(new FileInputStream(oldFormatFile)));
                int storedNumberOfBoosts = is.readInt();
                if (storedNumberOfBoosts != numberOfBoosts) {
                  throw new IllegalArgumentException("Number of boosts specified in Manager construction differ from the one stored in the backup file (" + numberOfBoosts + " vs. " + storedNumberOfBoosts +")");
                }
                try {
                    ConcurrentMap<?, ?> read = (ConcurrentMap<?, ?>) is.readObject();
                    Set<?> keys = read.keySet();
                    if (keys.isEmpty() || (keys.iterator().next() instanceof DocId && read.values().iterator().next() instanceof DynamicData)) {
                        // last version, assign directly to field
                        dynamicDataMap = (ConcurrentMap<DocId, DynamicData>) read;
                    } else {
                        // values are definitely Boosts, we'll need to transform them
                        // and check whether the keys are strings (v1) or DocIds (v2)
                        dynamicDataMap = new ConcurrentHashMap<DocId, DynamicData>();
                        boolean areDocids = keys.iterator().next() instanceof DocId;
                        for (Map.Entry<?,?> e : read.entrySet()) {
                            // convert key to docid if necessary
                            DocId docId = areDocids ? (DocId)e.getKey() : new DocId((String)e.getKey());
                            // convert value and add to the map
                            dynamicDataMap.put(docId, new DynamicData((DynamicBoosts)e.getValue()));
                        }
                    }
                    logger.info("State loaded.");
                } catch (ClassNotFoundException e) {
                    throw new IllegalStateException(e);
                }
            } catch (FileNotFoundException e) {
        throw new RuntimeException(e);
      } catch (IOException e) {
        throw new RuntimeException(e);
      } finally {
                Execute.close(is);
            }
        } else {
          this.dynamicDataMap = new ConcurrentHashMap<DocId, DynamicData>();
          if (newFormatFile.exists()) {
              readFromDisk();
          }
        }
    }
   
  @Override
  public Boosts getBoosts(DocId documentId) {
      DynamicData data = dynamicDataMap.get(documentId);
      if (data == null) {
          logger.warn("Failed to find boosts for document " + documentId);
          return this.emptyData;
      }
      return data;
  }

    public Map<Integer, Double> getVariablesAsMap(DocId documentId) {
        DynamicData data = dynamicDataMap.get(documentId);
        if (data == null) {
            logger.warn("Failed to find variables for document " + documentId);
            return ImmutableMap.of();
        }
        return data.getVariablesAsMap(numberOfBoosts);
    }

    public Map<String, String> getCategoriesAsMap(DocId documentId) {
        try {
            return getCategoryValues(documentId);
        } catch (IllegalArgumentException e) {
            logger.warn(e.getMessage());
        }
        return ImmutableMap.of();
    }

  public int getNumberOfBoosts() {
    return numberOfBoosts;
  }
 
  @Override
  public int getDocumentCount() {
    return dynamicDataMap.size();
  }

  DynamicData getDynamicData(DocId docId) {
    return dynamicDataMap.get(docId);
  }
 
  @Override
  public void removeBoosts(String documentId) {
    dynamicDataMap.remove(new DocId(documentId));
  }

  @Override
  public void setBoosts(String documentId, Map<Integer, Float> boosts) {
    setBoosts(documentId, null, boosts);
  }
 
    public void setCategoryValues(String documentId, Map<String, String> categories) {
      DynamicData data = getOrCreateData(documentId);
        for (Map.Entry<String, String> entry : categories.entrySet()) {
            CategoryValueInfo catInfo = maskManager.getCategoryValueInfo(entry.getKey(), entry.getValue());
            if (catInfo != null) {
              data.setCategoryValue(catInfo.getBitmask(), catInfo.getValueCode());
            }
        }
    }

    public Map<String, String> getCategoryValues(DocId documentId) {
      DynamicData data = dynamicDataMap.get(documentId);
      if (null == data) {
        throw new IllegalArgumentException("no data for document " + documentId);
      }
      Map<String, String> results = Maps.newHashMap();
     
      Map<String, CategoryInfo> categoryInfos = maskManager.getCategoryInfos();
     
      for (Entry<String, CategoryInfo> entry : categoryInfos.entrySet()) {
        CategoryInfo categoryInfo = entry.getValue();
        int valueCode = data.getCategoryValue(categoryInfo.getBitmask());
        if (valueCode != 0) {
          results.put(entry.getKey(), categoryInfo.getValue(valueCode));
        }
      }
     
      return results;
    }

    public interface FacetsCollector {
      public void addCategoryValue(String category, Integer valueCode);
    }
   
    public void populateCollector(DocId documentId, FacetsCollector collector) {
      DynamicData data = dynamicDataMap.get(documentId);
      if (null == data) {
        throw new IllegalArgumentException("no data for document " + documentId);
      }
      Map<String, CategoryInfo> categoryInfos = maskManager.getCategoryInfos();
     
      for (Entry<String, CategoryInfo> entry : categoryInfos.entrySet()) {
        CategoryInfo categoryInfo = entry.getValue();
        int valueCode = data.getCategoryValue(categoryInfo.getBitmask());
        if (valueCode != 0) {
          collector.addCategoryValue(entry.getKey(), valueCode);
        }
      }
     
    }
   
    public CategoryMaskManager getMaskManager() {
      return maskManager;
    }
   
   
  @Override
  public void setBoosts(String documentId, Integer timestamp, Map<Integer, Float> boosts) {
    Preconditions.checkNotNull(documentId);

    for (Integer index : boosts.keySet()) {
      if (index >= numberOfBoosts || index < 0) {
        throw new IllegalArgumentException("Invalid boost index (" + index + " for a Scorer with a maximum of " + numberOfBoosts + " boosts)");
      }
    }
    DynamicData data = getOrCreateData(documentId);
    for (Entry<Integer, Float> entry : boosts.entrySet()) {
      data.setBoost(entry.getKey(), entry.getValue());
    }
    if (timestamp != null) {
            data.setTimestamp(timestamp);
    }
  }

    private DynamicData getOrCreateData(String docid) {
        DocId key = new DocId(docid);
        DynamicData data = dynamicDataMap.get(key);
    if (data == null) {
      data = new DynamicData(numberOfBoosts);
     
      DynamicData previousValue = dynamicDataMap.putIfAbsent(key, data);
      if (previousValue != null) {
        data = previousValue;
      }
    }
        return data;
    }

    /*
     * Check the synching block
     */
   
    @Override
    public void dump() throws IOException {
        logger.info("Starting DynamicDataManager's dump.");
        dumpLock.writeLock().lock();
        try {
            newSyncToDisk();
        } finally {
            dumpLock.writeLock().unlock();
        }
        logger.info("DynamicDataManager's dump completed.");
    }

    /**
     * Syncs the stored data to disk.
     * This method is non-blocking, and does not ensure that the operation will be completed
     * in time, or at all.
     */
    public void nonBlockingSync() {
        (new SyncerThread()).start();
    }

    private static final int SERIALIZATION_VERSION = 1;
    private synchronized void newSyncToDisk() throws IOException {
        File f = new File(backupDir, MAIN_FILE_NAME);
        DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(f)));
        try {
            dos.writeInt(SERIALIZATION_VERSION);
            dos.writeInt(numberOfBoosts);
           
            for (Entry<DocId, DynamicData> entry : dynamicDataMap.entrySet()) {
                entry.getKey().writeData(dos);
                entry.getValue().writeData(dos);
            }
            DocId.writeNull(dos);
           
            maskManager.writeData(dos);
        } finally {
            Execute.close(dos);
        }
    }
   
    private synchronized void readFromDisk() {
        File f = new File(backupDir, MAIN_FILE_NAME);
        DataInputStream dis = null;
        try {
            dis = new DataInputStream(new BufferedInputStream(new FileInputStream(f)));
            int version = dis.readInt();
            if (version > SERIALIZATION_VERSION) {
                throw new IllegalStateException(String.format("File version is newer than known by this class: %d > %d", version, SERIALIZATION_VERSION));
            }
           
            int fileBoosts = dis.readInt();
            if (numberOfBoosts != fileBoosts) {
                throw new IllegalStateException(String.format("Incorrect number of boosts in file. Actual: %d, Expected: %d", fileBoosts, numberOfBoosts));
            }

            while (true) {
                DocId docid = DocId.readData(dis);
                if (docid == null) {
                    break;
                }
                DynamicData data = DynamicData.readData(numberOfBoosts, dis);
                dynamicDataMap.put(docid, data);
            }
           
            maskManager.readData(dis);
        } catch (IOException e) {
            logger.fatal("Error while loading dynamic data", e);
            throw new RuntimeException(e);
        } finally {
            Execute.close(dis);
        }
       
    }
   
    //----------------------------------------------------------------------------------------
    //STATIC METHODS
    private static void checkDirArgument(File backupDir) {
        Preconditions.checkNotNull(backupDir);
        if (!backupDir.canRead()) {
            String s = "Don't have read permission over the backup directory(" + backupDir.getAbsolutePath() + ").";
            logger.error(s);
            throw new IllegalArgumentException(s);
        }
        if (!backupDir.canWrite()) {
            String s = "Don't have write permission over the backup directory(" + backupDir.getAbsolutePath() + ").";
            logger.error(s);
            throw new IllegalArgumentException(s);
        }
    }

    //----------------------------------------------------------------------------------------
    //PRIVATE CLASSES

    static class DynamicData implements Serializable, Boosts {
        private static final long serialVersionUID = 1L;
       
        private int[] data; // timestamp = data[0], variables = data[1-n], categories = data[n+1, m]
        private int dataBoundary;

        DynamicData(int numberOfBoosts) {
            data = new int[1 + numberOfBoosts];
            dataBoundary = numberOfBoosts + 1;
        }

        DynamicData(DynamicBoosts oldBoosts) {
            data = new int[1 + oldBoosts.boosts.length];
            data[0] = oldBoosts.timestamp;
            for (int i = 0; i < oldBoosts.boosts.length; i++) {
                data[1 + i] = Float.floatToRawIntBits(oldBoosts.boosts[i]);
            }
        }
     
        public DynamicData(int numberOfBoosts, int[] data) {
            this.data = data;
            this.dataBoundary = numberOfBoosts + 1;
        }

        public int[] getData() {
          return data;
        }

        public Map<Integer, Double> getVariablesAsMap(int numberOfVariables) {
            HashMap<Integer, Double> map = new HashMap<Integer, Double>(numberOfVariables);
            for(int id = 0; id < numberOfVariables; id++) {
                map.put(id, Double.valueOf(getBoost(id)));
            }
            return map;
        }
       
        @Override
    public float getBoost(int boostIndex) {
      return Float.intBitsToFloat(data[1 + boostIndex]);
    }

        public void setBoost(int boostIndex, float boostValue) {
            data[1 + boostIndex] = Float.floatToRawIntBits(boostValue);
        }

        public void setCategoryValue(int[] bitmask, int value) {
          if (data.length - dataBoundary < bitmask.length) {
            int[] newData = new int[bitmask.length + dataBoundary];
            System.arraycopy(data, 0, newData, 0, data.length);
            data = newData;
          }
            data = CategoryEncoder.encode(data, dataBoundary, bitmask, value);
        }

        public int getCategoryValue(int[] bitmask) {
            return CategoryEncoder.decode(data, dataBoundary, bitmask);
        }

        @Override
    public int getTimestamp() {
      return data[0];
    }

        public void setTimestamp(int timestamp) {
            data[0] = timestamp;
        }
       
        void writeData(DataOutputStream dos) throws IOException {
            int len = data.length;
            dos.writeInt(len);
            for (int i = 0; i < len; i++) {
                dos.writeInt(data[i]);
            }
        }

        static DynamicData readData(int numberOfBoosts, DataInputStream dis) throws IOException {
            int len = dis.readInt();
            int[] data = new int[len];
            for (int i = 0; i < len; i++) {
                data[i] = dis.readInt();
            }
            return new DynamicData(numberOfBoosts, data);
        }
       
       
    }
   
    private class SyncerThread extends Thread {
        public SyncerThread() {
            setName("DynamicDataManager's syncer thread");
        }
        @Override
        public void run() {
            try {
                newSyncToDisk();
            } catch (Exception e) {
                logger.error(e);
            }
        }
    }
   
    public static void main(String[] args) {
        int boosts = Integer.parseInt(args[0]);
        DynamicDataManager ddm = new DynamicDataManager(boosts, new File(args[1]));
        System.out.println("Count: " + ddm.getDocumentCount());
       
        Scanner in = new Scanner(System.in);

        while (in.hasNextLine()) {
            String line = in.nextLine();
            DocId docId = new DocId(line);
            DynamicData data = ddm.getDynamicData(docId);
            System.out.println("timestamp: " + data.getTimestamp());
            for (int i = 0; i < boosts; i++) {
                System.out.println("var["+i+"]: " + data.getBoost(i));
            }
            System.out.println(ddm.getCategoryValues(docId));
        }

    }

    public Map<String, String> getStats() {
        HashMap<String, String> stats = Maps.newHashMap();
        stats.putAll(maskManager.getStats());
        stats.put("dynamic_data_count", String.valueOf(this.dynamicDataMap.size()));
        stats.put("dynamic_data_variables", String.valueOf(this.numberOfBoosts));
        return stats;
    }

}
TOP

Related Classes of com.flaptor.indextank.index.scorer.DynamicDataManager

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.