Package edu.brown.markov

Source Code of edu.brown.markov.FeatureSet

package edu.brown.markov;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Vector;

import org.apache.commons.collections15.map.ListOrderedMap;
import org.apache.commons.collections15.set.ListOrderedSet;
import org.apache.log4j.Logger;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.json.JSONStringer;
import org.voltdb.VoltType;
import org.voltdb.catalog.Database;
import org.voltdb.utils.VoltTypeUtil;

import weka.core.Attribute;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import edu.brown.logging.LoggerUtil;
import edu.brown.logging.LoggerUtil.LoggerBoolean;
import edu.brown.markov.features.AbstractFeature;
import edu.brown.markov.features.BasePartitionFeature;
import edu.brown.markov.features.FeatureUtil;
import edu.brown.markov.features.TransactionIdFeature;
import edu.brown.statistics.HistogramUtil;
import edu.brown.statistics.ObjectHistogram;
import edu.brown.utils.ClassUtil;
import edu.brown.utils.CollectionUtil;
import edu.brown.utils.JSONSerializable;
import edu.brown.utils.JSONUtil;
import edu.brown.workload.TransactionTrace;

public class FeatureSet implements JSONSerializable {
    private static final Logger LOG = Logger.getLogger(FeatureSet.class);
    private final static LoggerBoolean debug = new LoggerBoolean();
    private final static LoggerBoolean trace = new LoggerBoolean();
    static {
        LoggerUtil.attachObserver(LOG, debug, trace);
    }
   
    public enum Members {
        TXN_VALUES,
        ATTRIBUTES,
        LAST_NUM_ATTRIBUTES,
        ATTRIBUTE_HISTOGRAMS,
        ATTRIBUTE_TYPES,
    }
   
    public enum Type {
        NUMERIC,
        STRING,
        RANGE,
        BOOLEAN,
    }

    /**
     * The row values for each txn record
     */
    public final Map<Long, Vector<Object>> txn_values = new ListOrderedMap<Long, Vector<Object>>();
   
    /**
     * The list of attributes that each txn should have
     */
    public final ListOrderedMap<String, Type> attributes = new ListOrderedMap<String, Type>();
    public int last_num_attributes = 0;
   
    /**
     * Attribute Value Histograms
     */
    public final Map<String, ObjectHistogram> attribute_histograms = new HashMap<String, ObjectHistogram>();
   
    /**
     *
     */
    public final Map<String, VoltType> attribute_types = new HashMap<String, VoltType>();
   
    /**
     * Constructor
     */
    public FeatureSet() {
        // Nothing for now...
    }

    /**
     * Total number of attributes stored in this FeatureSet
     */
    public int getAttributeCount() {
        return (this.attributes.size());
    }
   
    /**
     * Total number of transactions that we have extracted features for
     */
    public int getTransactionCount() {
        return (this.txn_values.size());
    }
   
    public List<String> getFeatures() {
        return (this.attributes.asList());
    }
   
    public Type getFeatureType(String key) {
        return (this.attributes.get(key));
    }

    public void addFeature(TransactionTrace txn, String key, Object val) {
        this.addFeature(txn, key, val, null);
    }
   
    /**
     * Returns true if the given key has been included in the set of attributes
     * @param key
     * @return
     */
    public boolean hasFeature(String key) {
        return (this.attributes.containsKey(key));
    }

    /**
     * Return the index in the list of attributes for the given key
     * @param key
     * @return
     */
    public Integer getFeatureIndex(String key) {
        int idx = this.attributes.indexOf(key);
        return (idx != -1 ? idx : null);
    }

    /**
     * Return the indexes of all the features for the given prefix
     * @param feature_class
     * @return
     */
    public Set<Integer> getFeatureIndexes(Class<? extends AbstractFeature> feature_class) {
        Set<Integer> ret = new HashSet<Integer>();
        String prefix = FeatureUtil.getFeatureKeyPrefix(feature_class);
        for (int i = 0, cnt = this.attributes.size(); i < cnt; i++) {
            String key = this.attributes.get(i);
            if (key.startsWith(prefix)) ret.add(i);
        } // FOR
        return (ret);
    }
   
    public List<Object> getFeatureValues(Long txn_id) {
        return (this.txn_values.get(txn_id));
    }

    public List<Object> getFeatureValues(TransactionTrace txn_trace) {
        return (this.getFeatureValues(txn_trace.getTransactionId()));
    }

    @SuppressWarnings("unchecked")
    public <T> T getFeatureValue(Long txn_id, String key) {
        int idx = this.attributes.indexOf(key);
        return ((T)this.txn_values.get(txn_id).get(idx));
    }
   
    @SuppressWarnings("unchecked")
    public <T> T getFeatureValue(TransactionTrace txn_trace, String key) {
        return ((T)this.getFeatureValue(txn_trace.getTransactionId(), key));
    }
   
    /**
     *
     * @param txn
     * @param key
     * @param val
     * @param type
     */
    public synchronized void addFeature(TransactionTrace txn, String key, Object val, Type type) {
        long txn_id = txn.getTransactionId();
       
        // Add the attribute if it's new
        if (!this.attributes.containsKey(key)) {
            // Figure out what type it is
            if (type == null) {
                Class<?> valClass = val.getClass();
                if (valClass.equals(Boolean.class) || valClass.equals(boolean.class)) {
                    type = Type.BOOLEAN;
                } else if (ClassUtil.getSuperClasses(valClass).contains(Number.class)) {
                    type = Type.NUMERIC;
                } else if (val instanceof String) {
                    type = Type.STRING;
                } else {
                    type = Type.RANGE;
                }
            }
            if (debug.val) LOG.debug("Adding new attribute " + key + " [" + type + "]");
            this.attributes.put(key, type);
            this.attribute_histograms.put(key, new ObjectHistogram());
            this.attribute_types.put(key, VoltType.NULL);
        }
        // HACK
        if (val != null && (val.getClass().equals(int.class) || val.getClass().equals(Integer.class))) {
            val = new Long((Integer)val);
        }
       
        // Always store the values in a histogram so we can normalize them later on
        try {
            this.attribute_histograms.get(key).put(val);
        } catch (Exception ex) {
            LOG.error("\n" + this.attribute_histograms.get(key));
            LOG.error("Invalid value '" + val + "' for attribute '" + key + "'", ex);
            System.exit(1);
        }

        // Now add the values into this txn's feature vector
        int idx = this.attributes.indexOf(key);
        int num_attributes = this.attributes.size();
        Vector<Object> values = this.txn_values.get(txn_id);
        if (values == null) {
            if (trace.val) LOG.trace("Creating new feature vector for " + txn_id);
            values = new Vector<Object>(num_attributes);
            values.setSize(num_attributes);
            this.txn_values.put(txn_id, values);
        }
        if (num_attributes != this.last_num_attributes) {
            assert(num_attributes > this.last_num_attributes);
            for (Vector<Object> v : this.txn_values.values()) {
                v.setSize(num_attributes);
            } // FOR
            this.last_num_attributes = num_attributes;
            if (trace.val) LOG.trace("Increased FeatureSet size to " + this.last_num_attributes + " attributes");
        }
        this.txn_values.get(txn_id).set(idx, val);
       
        if (val != null && this.attribute_types.get(key) == VoltType.NULL) {
            this.attribute_types.put(key, VoltType.typeFromClass(val.getClass()));
        }
       
        if (trace.val) LOG.trace(txn_id + ": " + key + " => " + val);
    }

    /**
     * Export the FeatureSet to a Weka Instances
     * @param name
     * @return
     */
    public Instances export(String name) {
        return (this.export(name, false, this.attributes.keySet()));
    }

    public Instances export(String name, boolean normalize) {
        return (this.export(name, normalize, this.attributes.keySet()));
    }
   
    /**
     * Export this FeatureSet to a Weka Instances data set
     * @param name
     * @param normalize
     * @param prefix_include
     * @return
     */
    @SuppressWarnings("unchecked")
    public Instances export(String name, boolean normalize, Collection<String> prefix_include) {
        // Figure out what attributes we want to export
        Set<String> export_attrs = new ListOrderedSet<String>();
        for (String key : this.attributes.keySet()) {
            boolean include = false;
            for (String prefix : prefix_include) {
                if (key.startsWith(prefix)) {
                    include = true;
                    break;
                }
            } // FOR
            if (include) export_attrs.add(key);
        } // FOR
        if (debug.val) LOG.debug("# of Attributes to Export: " + export_attrs.size());
       
        List<Map<Object, Double>> normalized_values = null;
        Set<String> normalize_ignore = new HashSet<String>();
        if (normalize) {
            normalize_ignore.add(FeatureUtil.getFeatureKeyPrefix(TransactionIdFeature.class));
            normalize_ignore.add(FeatureUtil.getFeatureKeyPrefix(BasePartitionFeature.class));
           
            if (debug.val) LOG.debug("Normalizing values!");
            normalized_values = new ArrayList<Map<Object,Double>>();
            for (String key : export_attrs) {
                if (normalize_ignore.contains(key) == false) {
                    normalized_values.add(HistogramUtil.normalize(this.attribute_histograms.get(key)));
                } else {
                    normalized_values.add(null);
                }
            } // FOR
        }
       
        // Attributes
        FastVector attrs = new FastVector();
        for (String key : export_attrs) {
            Type type = this.attributes.get(key);
            Attribute a = null;
            boolean normalize_attr = (normalize && normalize_ignore.contains(key) == false);

            // Normalized values will always just be numeric
            if (normalize_attr) {
                a = new Attribute(key);
               
            // Otherwise we can play games with ranges and strings
            } else {
                switch (type) {
                    case RANGE:
                    case BOOLEAN: {
                        FastVector range_values = new FastVector();
                        for (Object v : this.attribute_histograms.get(key).values()) {
                            range_values.addElement(v.toString());
                        } // FOR
                        a = new Attribute(key, range_values);
                        break;
                    }
                    case STRING:
                        a = new Attribute(key, (FastVector)null);
                        break;
                    default:
                        a = new Attribute(key);
                } // SWITCH
            }
            attrs.addElement(a);
        } // FOR
        assert(attrs.size() == export_attrs.size());

        Instances data = new Instances(name, attrs, 0);
       
        // Instance Values
        for (Vector<Object> values : this.txn_values.values()) {
            double instance[] = new double[data.numAttributes()];
            int i = 0;
            for (String key : export_attrs) {
                int attr_idx = this.attributes.indexOf(key);
                Object value = values.get(attr_idx);
                Type type = this.attributes.getValue(attr_idx);
                boolean normalize_attr = (normalize && normalize_ignore.contains(key) == false);

                // Null => Missing Value Placeholder
                if (value == null) {
                    instance[i] = Instance.missingValue();
                // Normalized
                } else if (normalize_attr) {
                    assert(normalized_values != null);
                    try {
                        instance[i] = normalized_values.get(i).get(value);
                    } catch (Exception ex) {
                        System.err.println(normalized_values.get(i));
                        LOG.fatal("Failed to get normalized value '" + value + "' for Attribute '" + key + "'");
                        throw new RuntimeException(ex);
                    }
                // Actual Values
                } else {
                    switch (type) {
                        case NUMERIC:
                            instance[i] = ((Number)value).doubleValue();
                            break;
                        case STRING:
                            instance[i] = data.attribute(i).addStringValue(value.toString());
                            break;
                        case BOOLEAN:
                            instance[i] = data.attribute(i).indexOfValue(Boolean.toString((Boolean)value));
                            break;
                        case RANGE:
                            instance[i] = data.attribute(i).indexOfValue(value.toString());
                            break;
                        default:
                            assert(false) : "Unexpected attribute type " + type;
                    } // SWITCH
                }
                i += 1;
            } // FOR
            data.add(new Instance(1.0, instance));
        } // FOR
       
        return (data);
    }
   
    // -----------------------------------------------------------------
    // SERIALIZATION
    // -----------------------------------------------------------------

    @Override
    public void load(File input_path, Database catalog_db) throws IOException {
        JSONUtil.load(this, catalog_db, input_path);
    }

    @Override
    public void save(File output_path) throws IOException {
        JSONUtil.save(this, output_path);
    }

    @Override
    public String toJSONString() {
        return (JSONUtil.toJSONString(this));
    }

    @Override
    public void toJSON(JSONStringer stringer) throws JSONException {
        JSONUtil.fieldsToJSON(stringer, this, FeatureSet.class, FeatureSet.Members.values());
    }

    @Override
    public void fromJSON(JSONObject json_object, Database catalog_db) throws JSONException {
        // First deserialize all of the fields except for TXN_VALUES
        Set<Members> fields = CollectionUtil.getAllExcluding(FeatureSet.Members.values(), FeatureSet.Members.TXN_VALUES);
        JSONUtil.fieldsFromJSON(json_object, catalog_db, this, FeatureSet.class, fields.toArray(new Members[0]));
       
        // Then we have reconstruct this mofo ourselves because the object types are implicit
        JSONObject inner_obj = json_object.getJSONObject(Members.TXN_VALUES.name());
        assert(inner_obj != null);
        Iterator<String> it = inner_obj.keys();
        while (it.hasNext()) {
            String inner_key = it.next();
            long txn_id = Long.valueOf(inner_key);
            this.txn_values.put(txn_id, new Vector<Object>());
           
            JSONArray inner_arr = inner_obj.getJSONArray(inner_key);
            for (int i = 0, cnt = inner_arr.length(); i < cnt; i++) {
                Object val = null;
                if (inner_arr.isNull(i) == false) {
                    String json_val = inner_arr.getString(i);
                    String attr_key = this.attributes.get(i);
                    Type attr_type = this.attributes.getValue(i);
                    try {
                        switch (attr_type) {
                            case NUMERIC:
                                val = VoltTypeUtil.getObjectFromString(this.attribute_types.get(attr_key), json_val);
                                break;
                            case BOOLEAN:
                                val = Boolean.valueOf(json_val);
                                break;
                            case RANGE: {
                                // Get the value type from the histogram
                                ObjectHistogram h = this.attribute_histograms.get(attr_key);
                                assert(h != null);
                                VoltType volt_type = h.getEstimatedType();
                                assert(volt_type != VoltType.INVALID);
                                val = VoltTypeUtil.getObjectFromString(volt_type, json_val);
                                break;
                            }
                            case STRING:
                                val = json_val;
                                break;
                            default:
                                assert(false) : "Unexpected Type: " + attr_type;
                        } // SWITCH
                    } catch (Exception ex) {
                        LOG.fatal("Failed to deserialize TXN_VALUES-" + inner_key + "-" + i);
                        throw new JSONException(ex);
                    }
                }
                this.txn_values.get(txn_id).add(val);
            } // FOR
        } // WHILE
    }
}
TOP

Related Classes of edu.brown.markov.FeatureSet

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.