Package weka.filters.unsupervised.attribute

Source Code of weka.filters.unsupervised.attribute.AddValues

/*
*    This program is free software; you can redistribute it and/or modify
*    it under the terms of the GNU General Public License as published by
*    the Free Software Foundation; either version 2 of the License, or
*    (at your option) any later version.
*
*    This program is distributed in the hope that it will be useful,
*    but WITHOUT ANY WARRANTY; without even the implied warranty of
*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
*    GNU General Public License for more details.
*
*    You should have received a copy of the GNU General Public License
*    along with this program; if not, write to the Free Software
*    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

/*
* AddValues.java
* Copyright (C) 2006 University of Waikato, Hamilton, New Zealand
*
*/

package weka.filters.unsupervised.attribute;

import weka.core.Attribute;
import weka.core.Capabilities;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.DenseInstance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.SingleIndex;
import weka.core.UnsupportedAttributeTypeException;
import weka.core.Utils;
import weka.core.Capabilities.Capability;
import weka.filters.Filter;
import weka.filters.StreamableFilter;
import weka.filters.UnsupervisedFilter;

import java.util.Collections;
import java.util.Enumeration;
import java.util.Vector;

/**
<!-- globalinfo-start -->
* Adds the labels from the given list to an attribute if they are missing. The labels can also be sorted in an ascending manner. If no labels are provided then only the (optional) sorting applies.
* <p/>
<!-- globalinfo-end -->
*
<!-- options-start -->
* Valid options are: <p/>
*
* <pre> -C &lt;col&gt;
*  Sets the attribute index
*  (default last).</pre>
*
* <pre> -L &lt;label1,label2,...&gt;
*  Comma-separated list of labels to add.
*  (default: none)</pre>
*
* <pre> -S
*  Turns on the sorting of the labels.</pre>
*
<!-- options-end -->
*
* Based on code from AddValues.
*
* @author  FracPete (fracpete at waikato dot ac dot nz)
* @version $Revision: 6898 $
* @see     AddValues
*/
public class AddValues
  extends Filter
  implements UnsupervisedFilter, StreamableFilter, OptionHandler {

  /** for serialization */
  private static final long serialVersionUID = -8100622241742393656L;

  /** The attribute's index setting. */
  protected SingleIndex m_AttIndex = new SingleIndex("last");

  /** The values to add. */
  protected Vector m_Labels = new Vector();

  /** Whether to sort the values. */
  protected boolean m_Sort = false;

  /** the array with the sorted label indices */
  protected int[] m_SortedIndices;
 
  /**
   * Returns a string describing this filter
   *
   * @return     a description of the filter suitable for
   *       displaying in the explorer/experimenter gui
   */
  public String globalInfo() {
    return
        "Adds the labels from the given list to an attribute if they are "
      + "missing. The labels can also be sorted in an ascending manner. "
      + "If no labels are provided then only the (optional) sorting applies.";
  }

  /**
   * Returns an enumeration describing the available options.
   *
   * @return an enumeration of all the available options.
   */
  public Enumeration listOptions() {
    Vector  result;
   
    result = new Vector();

    result.addElement(new Option(
  "\tSets the attribute index\n"
  + "\t(default last).",
  "C", 1, "-C <col>"));

    result.addElement(new Option(
  "\tComma-separated list of labels to add.\n"
  + "\t(default: none)",
  "L", 1, "-L <label1,label2,...>"));

    result.addElement(new Option(
  "\tTurns on the sorting of the labels.",
  "S", 0, "-S"));

    return result.elements();
  }


  /**
   * Parses a given list of options. <p/>
   *
   <!-- options-start -->
   * Valid options are: <p/>
   *
   * <pre> -C &lt;col&gt;
   *  Sets the attribute index
   *  (default last).</pre>
   *
   * <pre> -L &lt;label1,label2,...&gt;
   *  Comma-separated list of labels to add.
   *  (default: none)</pre>
   *
   * <pre> -S
   *  Turns on the sorting of the labels.</pre>
   *
   <!-- options-end -->
   *
   * @param options the list of options as an array of strings
   * @throws Exception if an option is not supported
   */
  public void setOptions(String[] options) throws Exception {
    String  tmpStr;
   
    tmpStr = Utils.getOption('C', options);
    if (tmpStr.length() != 0)
      setAttributeIndex(tmpStr);
    else
      setAttributeIndex("last");

    tmpStr = Utils.getOption('L', options);
    if (tmpStr.length() != 0)
      setLabels(tmpStr);
    else
      setLabels("");

    setSort(Utils.getFlag('S', options));
  
    if (getInputFormat() != null)
      setInputFormat(getInputFormat());
  }

  /**
   * Gets the current settings of the filter.
   *
   * @return an array of strings suitable for passing to setOptions
   */
  public String[] getOptions() {
    Vector  result;
   
    result = new Vector();

    result.add("-C");
    result.add("" + getAttributeIndex());
   
    result.add("-L");
    result.add("" + getLabels());
   
    if (getSort())
      result.add("-S");

    return (String[]) result.toArray(new String[result.size()]);
  }

  /**
   * Returns the Capabilities of this filter.
   *
   * @return            the capabilities of this object
   * @see               Capabilities
   */
  public Capabilities getCapabilities() {
    Capabilities result = super.getCapabilities();
    result.disableAll();

    // attributes
    result.enableAllAttributes();
    result.enable(Capability.MISSING_VALUES);
   
    // class
    result.enableAllClasses();
    result.enable(Capability.MISSING_CLASS_VALUES);
    result.enable(Capability.NO_CLASS);
   
    return result;
  }

  /**
   * Sets the format of the input instances.
   *
   * @param instanceInfo   an Instances object containing the input
   *         instance structure (any instances contained
   *         in the object are ignored - only the
   *         structure is required).
   * @return       true if the outputFormat may be collected
   *         immediately
   * @throws Exception     if the input format can't be set successfully
   */
  public boolean setInputFormat(Instances instanceInfo) throws Exception {
    Attribute   att;
    Attribute   attNew;
    Vector   allLabels;
    Enumeration enm;
    int   i;
    FastVector  values;
    FastVector  atts;
    Instances  instNew;

    super.setInputFormat(instanceInfo);
   
    m_AttIndex.setUpper(instanceInfo.numAttributes() - 1);
    att = instanceInfo.attribute(m_AttIndex.getIndex());
    if (!att.isNominal())
      throw new UnsupportedAttributeTypeException("Chosen attribute not nominal.");
   
    // merge labels
    allLabels = new Vector();
    enm = att.enumerateValues();
    while (enm.hasMoreElements())
      allLabels.add(enm.nextElement());
    for (i = 0; i < m_Labels.size(); i++) {
      if (!allLabels.contains(m_Labels.get(i)))
  allLabels.add(m_Labels.get(i));
    }
   
    // generate index array
    if (getSort())
      Collections.sort(allLabels);
    m_SortedIndices = new int[att.numValues()];
    enm             = att.enumerateValues();
    i               = 0;
    while (enm.hasMoreElements()) {
      m_SortedIndices[i] = allLabels.indexOf(enm.nextElement());
      i++;
    }
   
    // generate new header
    values = new FastVector();
    for (i = 0; i < allLabels.size(); i++)
      values.addElement(allLabels.get(i));
    attNew = new Attribute(att.name(), values);

    atts = new FastVector();
    for (i = 0; i < instanceInfo.numAttributes(); i++) {
      if (i == m_AttIndex.getIndex())
  atts.addElement(attNew);
      else
  atts.addElement(instanceInfo.attribute(i));
    }
   
    instNew = new Instances(instanceInfo.relationName(), atts, 0);
    instNew.setClassIndex(instanceInfo.classIndex());
   
    // set new format
    setOutputFormat(instNew);
   
    return true;
  }

  /**
   * Input an instance for filtering. The instance is processed
   * and made available for output immediately.
   *
   * @param instance   the input instance
   * @return     true if the filtered instance may now be
   *       collected with output().
   * @throws IllegalStateException if no input format has been set.
   */
  public boolean input(Instance instance) {
    Instance   newInstance;
    double[]  values;

    if (getInputFormat() == null)
      throw new IllegalStateException("No input instance format defined");

    if (m_NewBatch) {
      resetQueue();
      m_NewBatch = false;
    }
   
    // generate new Instance
    values = instance.toDoubleArray();
    if (!instance.isMissing(m_AttIndex.getIndex()))
      values[m_AttIndex.getIndex()] = m_SortedIndices[(int) values[m_AttIndex.getIndex()]];
    newInstance = new DenseInstance(instance.weight(), values);

    // copy string values etc. from input to output
    copyValues(instance, false, instance.dataset(), getOutputFormat());
   
    push(newInstance);
   
    return true;
  }

  /**
   * Returns the tip text for this property
   *
   * @return     tip text for this property suitable for
   *       displaying in the explorer/experimenter gui
   */
  public String attributeIndexTipText() {
    return "Sets which attribute to process. This "
      + "attribute must be nominal (\"first\" and \"last\" are valid values)";
  }

  /**
   * Get the index of the attribute used.
   *
   * @return     the index of the attribute
   */
  public String getAttributeIndex() {
    return m_AttIndex.getSingleIndex();
  }

  /**
   * Sets index of the attribute used.
   *
   * @param attIndex   the index of the attribute
   */
  public void setAttributeIndex(String attIndex) {
    m_AttIndex.setSingleIndex(attIndex);
  }

  /**
   * Returns the tip text for this property
   *
   * @return     tip text for this property suitable for
   *       displaying in the explorer/experimenter gui
   */
  public String labelsTipText() {
    return "Comma-separated list of lables to add.";
  }

  /**
   * Get the comma-separated list of labels that are added.
   *
   * @return     the list of labels
   */
  public String getLabels() {
    String  result;
    int    i;

    result = "";
    for (i = 0; i < m_Labels.size(); i++) {
      if (i > 0)
  result += ",";
      result += Utils.quote((String) m_Labels.get(i));
    }
   
    return result;
  }

  /**
   * Sets the comma-separated list of labels.
   *
   * @param value  the list
   */
  public void setLabels(String value) {
    int    i;
    String  label;
    boolean  quoted;
    boolean  add;
   
    m_Labels.clear();
   
    label  = "";
    quoted = false;
    add    = false;
   
    for (i = 0; i < value.length(); i++) {
      // quotes?
      if (value.charAt(i) == '"') {
  quoted = !quoted;
  if (!quoted)
    add = true;
      }
      // comma
      else if ( (value.charAt(i) == ',') && (!quoted) ) {
  add = true;
      }
      // normal character
      else {
  label += value.charAt(i);
  // last character?
  if (i == value.length() - 1)
    add = true;
      }
     
      if (add) {
  if (label.length() != 0)
    m_Labels.add(label);
  label = "";
  add   = false;
      }
    }
  }

  /**
   * Returns the tip text for this property
   *
   * @return     tip text for this property suitable for
   *       displaying in the explorer/experimenter gui
   */
  public String sortTipText() {
    return "Whether to sort the labels alphabetically.";
  }

  /**
   * Gets whether the labels are sorted or not.
   *
   * @return     true if the labels are sorted
   */
  public boolean getSort() {
    return m_Sort;
  }

  /**
   * Sets whether the labels are sorted.
   *
   * @param value  if true the labels are sorted
   */
  public void setSort(boolean value) {
    m_Sort = value;
  }
 
  /**
   * Returns the revision string.
   *
   * @return    the revision
   */
  public String getRevision() {
    return RevisionUtils.extract("$Revision: 6898 $");
  }
 
  /**
   * Main method for testing and running this class.
   *
   * @param args   should contain arguments to the filter:
   *       use -h for help
   */
  public static void main(String[] args) {
    runFilter(new AddValues(), args);
  }
}
TOP

Related Classes of weka.filters.unsupervised.attribute.AddValues

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.