Package edu.brown.markov

Source Code of edu.brown.markov.FeatureExtractor

package edu.brown.markov;

import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;

import org.apache.log4j.Logger;
import org.voltdb.CatalogContext;
import org.voltdb.catalog.Procedure;

import weka.core.Instances;
import edu.brown.markov.features.AbstractFeature;
import edu.brown.markov.features.BasePartitionFeature;
import edu.brown.markov.features.ParamArrayAllSameHashFeature;
import edu.brown.markov.features.ParamArrayLengthFeature;
import edu.brown.markov.features.ParamHashEqualsBasePartitionFeature;
import edu.brown.markov.features.ParamHashPartitionFeature;
import edu.brown.markov.features.TransactionIdFeature;
import edu.brown.utils.ArgumentsParser;
import edu.brown.utils.ClassUtil;
import edu.brown.utils.FileUtil;
import edu.brown.utils.PartitionEstimator;
import edu.brown.workload.TransactionTrace;
import edu.brown.workload.Workload;

/**
*
* @author pavlo
*/
public class FeatureExtractor {
    private static final Logger LOG = Logger.getLogger(FeatureExtractor.class);

    // HACK: What position is the TransactionId in all of our FeatureSets
    public static final int TXNID_ATTRIBUTE_IDX = 0;
   
    private final CatalogContext catalogContext;
    private final PartitionEstimator p_estimator;
    private final Map<Procedure, List<AbstractFeature>> proc_features = new HashMap<Procedure, List<AbstractFeature>>();
   
    private static final Class<?> DEFAULT_FEATURE_CLASSES[] = new Class<?>[] {
        TransactionIdFeature.class,
        BasePartitionFeature.class,
        // ParamNumericValuesFeature.class,
        ParamArrayAllSameHashFeature.class,
        ParamHashPartitionFeature.class,
        ParamArrayLengthFeature.class,
        ParamHashEqualsBasePartitionFeature.class
    };

    /**
     * Full Constructor
     * @param catalog_db
     * @param feature_classes
     */
    public FeatureExtractor(CatalogContext catalogContext, PartitionEstimator p_estimator, Class<? extends AbstractFeature>...feature_classes) {
        this.catalogContext = catalogContext;
        this.p_estimator = p_estimator;
        for (Class<? extends AbstractFeature> fclass : feature_classes) {
            this.addFeatureClass(fclass);
        } // FOR
    }
   
    public FeatureExtractor(CatalogContext catalogContext, Class<? extends AbstractFeature>...feature_classes) {
        this(catalogContext, new PartitionEstimator(catalogContext), feature_classes);
    }
   
    @SuppressWarnings("unchecked")
    public FeatureExtractor(CatalogContext catalogContext, PartitionEstimator p_estimator) {
        this(catalogContext, p_estimator, (Class<? extends AbstractFeature>[])DEFAULT_FEATURE_CLASSES);
    }
   
    /**
     * Constructor
     * @param catalog_db
     */
    @SuppressWarnings("unchecked")
    public FeatureExtractor(CatalogContext catalogContext) {
        this(catalogContext, (Class<? extends AbstractFeature>[])DEFAULT_FEATURE_CLASSES);
    }
   
    /**
     * Add a feature class to this extractor
     * @param feature_class
     */
    public void addFeatureClass(Class<? extends AbstractFeature> feature_class) {
        assert(feature_class != null);
        if (LOG.isDebugEnabled()) LOG.debug("Adding " + feature_class.getSimpleName());

        for (Procedure catalog_proc : catalogContext.database.getProcedures()) {
            if (catalog_proc.getSystemproc()) continue;
            if (!this.proc_features.containsKey(catalog_proc)) {
                this.proc_features.put(catalog_proc, new ArrayList<AbstractFeature>());
            }
            AbstractFeature f = (AbstractFeature)ClassUtil.newInstance(
                                    feature_class,
                                    new Object[]{ this.p_estimator, catalog_proc },
                                    new Class[] { PartitionEstimator.class, Procedure.class });
            this.proc_features.get(catalog_proc).add(f);
        } // fOR
    }
   
    /**
     *
     * @param workload
     * @return
     */
    public Map<Procedure, FeatureSet> calculate(Workload workload) throws Exception {
        Map<Procedure, FeatureSet> fsets = new HashMap<Procedure, FeatureSet>();
       
        for (TransactionTrace txn_trace : workload.getTransactions()) {
            final boolean trace = LOG.isTraceEnabled();
            if (trace) LOG.trace("Processing " + txn_trace);
           
            Procedure catalog_proc = txn_trace.getCatalogItem(catalogContext.database);
            assert(catalog_proc != null) : "Invalid procedure: " + txn_trace.getCatalogItemName();
            FeatureSet fset = fsets.get(catalog_proc);
            if (fset == null) {
                fset = new FeatureSet();
                fsets.put(catalog_proc, fset);
            }
           
            for (AbstractFeature f : this.proc_features.get(catalog_proc)) {
                LOG.trace(txn_trace + " - " + f.getClass().getSimpleName());
                f.extract(fset, txn_trace);
            }
           
            if (trace) LOG.trace(txn_trace + ": " + fset.getFeatureValues(txn_trace));
        } // FOR
        return (fsets);
    }

    public static void main(String[] vargs) throws Exception {
        ArgumentsParser args = ArgumentsParser.load(vargs);
        args.require(
            ArgumentsParser.PARAM_CATALOG,
            ArgumentsParser.PARAM_WORKLOAD,
            ArgumentsParser.PARAM_MAPPINGS
        );
       
        FeatureExtractor extractor = new FeatureExtractor(args.catalogContext);
        Map<Procedure, FeatureSet> fsets = extractor.calculate(args.workload);
       
//        List<String> targets = args.getOptParams();
       
        for (Entry<Procedure, FeatureSet> e : fsets.entrySet()) {
            String proc_name = e.getKey().getName();
//            if (targets.contains(proc_name) == false) continue;
           
//            File path = new File(proc_name + ".fset");
//            e.getValue().save(path.getAbsolutePath());
//            LOG.info(String.format("Wrote FeatureSet with %d instances to '%s'", e.getValue().getTransactionCount(), path.getAbsolutePath()));

            File path = new File(proc_name + ".arff");
            Instances data = e.getValue().export(proc_name, false);
            FileUtil.writeStringToFile(path, data.toString());
            LOG.info(String.format("Wrote FeatureSet with %d instances to '%s'", data.numInstances(), path.getAbsolutePath()));
        }
       
    }
}
TOP

Related Classes of edu.brown.markov.FeatureExtractor

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.