Package weka.filters.unsupervised.attribute

Source Code of weka.filters.unsupervised.attribute.SortLabels$CaseInsensitiveComparator

/*
*    This program is free software; you can redistribute it and/or modify
*    it under the terms of the GNU General Public License as published by
*    the Free Software Foundation; either version 2 of the License, or
*    (at your option) any later version.
*
*    This program is distributed in the hope that it will be useful,
*    but WITHOUT ANY WARRANTY; without even the implied warranty of
*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
*    GNU General Public License for more details.
*
*    You should have received a copy of the GNU General Public License
*    along with this program; if not, write to the Free Software
*    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

/*
* SortLabels.java
* Copyright (C) 2009 University of Waikato, Hamilton, New Zealand
*/

package weka.filters.unsupervised.attribute;

import weka.core.Attribute;
import weka.core.Capabilities;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.DenseInstance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.Range;
import weka.core.RevisionUtils;
import weka.core.SelectedTag;
import weka.core.Tag;
import weka.core.Utils;
import weka.core.Capabilities.Capability;
import weka.filters.SimpleStreamFilter;

import java.io.Serializable;
import java.util.Collections;
import java.util.Comparator;
import java.util.Enumeration;
import java.util.Vector;

/**
<!-- globalinfo-start -->
* A simple filter for sorting the labels of nominal attributes.
* <p/>
<!-- globalinfo-end -->
*
<!-- options-start -->
* Valid options are: <p/>
*
* <pre> -D
*  Turns on output of debugging information.</pre>
*
* <pre> -R &lt;index1,index2-index4,...&gt;
*  Specify list of string attributes to convert to words.
*  (default: select all relational attributes)</pre>
*
* <pre> -V
*  Inverts the matching sense of the selection.</pre>
*
* <pre> -S &lt;CASE|NON-CASE&gt;
*  Determines the type of sorting:
*  CASE = Case-sensitive
*  NON-CASE = Case-insensitive
*  (default: CASE)</pre>
*
<!-- options-end -->
*
* @author  fracpete (fracpete at waikato dot ac dot nz)
* @version $Revision: 5987 $
*/
public class SortLabels
  extends SimpleStreamFilter {

  /** for serialization. */
  private static final long serialVersionUID = 7815204879694105691L;
 
  /**
   * Represents a case-sensitive comparator for two strings.
   *
   * @author  fracpete (fracpete at waikato dot ac dot nz)
   * @version $Revision: 5987 $
   */
  public static class CaseSensitiveComparator
    implements Comparator, Serializable {

    /** for serialization. */
    private static final long serialVersionUID = 7071450356783873277L;

    /**
     * compares the two strings, returns -1 if o1 is smaller than o2, 0
     * if equal and +1 if greater.
     *
     * @param o1  the first string to compare
     * @param o2  the second string to compare
     * @return    returns -1 if o1 is smaller than o2, 0 if equal and +1
     *      if greater
     */
    public int compare(Object o1, Object o2) {
      String  s1;
      String  s2;
     
      if ((o1 == null) && (o2 == null))
  return 0;
      else if (o1 == null)
  return -1;
      else if (o2 == null)
  return +1;
     
      s1 = (String) o1;
      s2 = (String) o2;
     
      return s1.compareTo(s2);
    }
  }
 
  /**
   * Represents a case-insensitive comparator for two strings.
   *
   * @author  fracpete (fracpete at waikato dot ac dot nz)
   * @version $Revision: 5987 $
   */
  public static class CaseInsensitiveComparator
    implements Comparator, Serializable {

    /** for serialization. */
    private static final long serialVersionUID = -4515292733342486066L;

    /**
     * compares the two strings, returns -1 if o1 is smaller than o2, 0
     * if equal and +1 if greater.
     *
     * @param o1  the first string to compare
     * @param o2  the second string to compare
     * @return    returns -1 if o1 is smaller than o2, 0 if equal and +1
     *      if greater
     */
    public int compare(Object o1, Object o2) {
      String  s1;
      String  s2;
     
      if ((o1 == null) && (o2 == null))
  return 0;
      else if (o1 == null)
  return -1;
      else if (o2 == null)
  return +1;
     
      s1 = (String) o1;
      s2 = (String) o2;
     
      return s1.toLowerCase().compareTo(s2.toLowerCase());
    }
  }
 
  /** sorts the strings case-sensitive. */
  public final static int SORT_CASESENSITIVE = 0;
 
  /** sorts the strings case-insensitive. */
  public final static int SORT_CASEINSENSITIVE = 1;
 
  /** Tag allowing selection of sort type. */
  public final static Tag[] TAGS_SORTTYPE = {
    new Tag(SORT_CASESENSITIVE, "case", "Case-sensitive"),
    new Tag(SORT_CASEINSENSITIVE, "non-case", "Case-insensitive")
  };
 
  /** the range of attributes to process (only relational ones will be processed). */
  protected Range m_AttributeIndices = new Range("first-last");

  /** the new order for the labels. */
  protected int[][] m_NewOrder = null;

  /** the sort type. */
  protected int m_SortType = SORT_CASEINSENSITIVE;
 
  /** the comparator to use for sorting. */
  protected Comparator m_Comparator = new CaseSensitiveComparator();
 
  /**
   * Returns a string describing this filter.
   *
   * @return     a description of the filter suitable for
   *       displaying in the explorer/experimenter gui
   */
  public String globalInfo() {
    return "A simple filter for sorting the labels of nominal attributes.";
  }

  /**
   * Returns an enumeration describing the available options.
   *
   * @return     an enumeration of all the available options.
   */
  public Enumeration listOptions() {
    Vector          result;
    Enumeration     en;
    String    desc;
    int      i;
    SelectedTag    tag;

    result = new Vector();

    en = super.listOptions();
    while (en.hasMoreElements())
      result.addElement(en.nextElement());

    result.addElement(new Option(
  "\tSpecify list of string attributes to convert to words.\n"
  + "\t(default: select all relational attributes)",
  "R", 1, "-R <index1,index2-index4,...>"));

    result.addElement(new Option(
  "\tInverts the matching sense of the selection.",
  "V", 0, "-V"));

    desc  = "";
    for (i = 0; i < TAGS_SORTTYPE.length; i++) {
      tag = new SelectedTag(TAGS_SORTTYPE[i].getID(), TAGS_SORTTYPE);
      desc  +=   "\t" + tag.getSelectedTag().getIDStr()
               + " = " + tag.getSelectedTag().getReadable()
               + "\n";
    }
    result.addElement(new Option(
  "\tDetermines the type of sorting:\n"
  + desc
  + "\t(default: " + new SelectedTag(SORT_CASESENSITIVE, TAGS_SORTTYPE) + ")",
  "S", 1, "-S " + Tag.toOptionList(TAGS_SORTTYPE)));

    return result.elements();
  }

  /**
   * Parses the options for this object. <p/>
   *
   <!-- options-start -->
   * Valid options are: <p/>
   *
   * <pre> -D
   *  Turns on output of debugging information.</pre>
   *
   * <pre> -R &lt;index1,index2-index4,...&gt;
   *  Specify list of string attributes to convert to words.
   *  (default: select all relational attributes)</pre>
   *
   * <pre> -V
   *  Inverts the matching sense of the selection.</pre>
   *
   * <pre> -S &lt;CASE|NON-CASE&gt;
   *  Determines the type of sorting:
   *  CASE = Case-sensitive
   *  NON-CASE = Case-insensitive
   *  (default: CASE)</pre>
   *
   <!-- options-end -->
   *
   * @param options  the options to use
   * @throws Exception  if setting of options fails
   */
  public void setOptions(String[] options) throws Exception {
    String  tmpStr;

    tmpStr = Utils.getOption('R', options);
    if (tmpStr.length() != 0)
      setAttributeIndices(tmpStr);
    else
      setAttributeIndices("first-last");

    setInvertSelection(Utils.getFlag('V', options));

    tmpStr = Utils.getOption('S', options);
    if (tmpStr.length() != 0)
      setSortType(new SelectedTag(tmpStr, TAGS_SORTTYPE));
    else
      setSortType(new SelectedTag(SORT_CASESENSITIVE, TAGS_SORTTYPE));

    super.setOptions(options);
  }

  /**
   * Gets the current settings of the classifier.
   *
   * @return     an array of strings suitable for passing to setOptions
   */
  public String[] getOptions() {
    int      i;
    Vector<String>  result;
    String[]    options;

    result = new Vector<String>();

    options = super.getOptions();
    for (i = 0; i < options.length; i++)
      result.add(options[i]);

    result.add("-R");
    result.add(getAttributeIndices().getRanges());

    if (getInvertSelection())
      result.add("-V");

    result.add("-S");
    result.add("" + getSortType());
   
    return result.toArray(new String[result.size()]);   
  }

  /**
   * Returns the tip text for this property.
   *
   * @return     tip text for this property suitable for
   *       displaying in the explorer/experimenter gui
   */
  public String attributeIndicesTipText() {
    return
        "Specify range of attributes to act on; "
      + "this is a comma separated list of attribute indices, with "
      + "\"first\" and \"last\" valid values; Specify an inclusive "
      + "range with \"-\"; eg: \"first-3,5,6-10,last\".";
  }
 
  /**
   * Set the range of attributes to process.
   *
   * @param value   the new range.
   */
  public void setAttributeIndices(String value) {
    m_AttributeIndices = new Range(value);
  }

  /**
   * Gets the current selected attributes.
   *
   * @return     current selection.
   */
  public Range getAttributeIndices() {
    return m_AttributeIndices;
  }

  /**
   * Returns the tip text for this property.
   *
   * @return     tip text for this property suitable for
   *       displaying in the explorer/experimenter gui
   */
  public String invertSelectionTipText() {
    return
        "Set attribute selection mode. If false, only selected "
      + "attributes in the range will be worked on; if "
      + "true, only non-selected attributes will be processed.";
  }

  /**
   * Sets whether selected columns should be processed or skipped.
   *
   * @param value   the new invert setting
   */
  public void setInvertSelection(boolean value) {
    m_AttributeIndices.setInvert(value);
  }

  /**
   * Gets whether the supplied columns are to be processed or skipped.
   *
   * @return     true if the supplied columns will be kept
   */
  public boolean getInvertSelection() {
    return m_AttributeIndices.getInvert();
  }

  /**
   * Returns the tip text for this property.
   *
   * @return     tip text for this property suitable for
   *       displaying in the explorer/experimenter gui
   */
  public String sortTypeTipText() {
    return "The type of sorting to use.";
  }

  /**
   * Sets the sort type to be used.
   *
   * @param type   the type of sorting
   */
  public void setSortType(SelectedTag type) {
    if (type.getTags() == TAGS_SORTTYPE) {
      m_SortType = type.getSelectedTag().getID();
     
      if (m_SortType == SORT_CASESENSITIVE)
  m_Comparator = new CaseSensitiveComparator();
      else if (m_SortType == SORT_CASEINSENSITIVE)
  m_Comparator = new CaseInsensitiveComparator();
      else
  throw new IllegalStateException("Unhandled sort type '" + type + "'!");
    }
  }

  /**
   * Gets the sort type to be used.
   *
   * @return     the sort type
   */
  public SelectedTag getSortType() {
    return new SelectedTag(m_SortType, TAGS_SORTTYPE);
  }

  /**
   * Returns the Capabilities of this filter.
   *
   * @return            the capabilities of this object
   * @see               Capabilities
   */
  public Capabilities getCapabilities() {
    Capabilities result = super.getCapabilities();
    result.disableAll();

    // attributes
    result.enableAllAttributes();
    result.enable(Capability.MISSING_VALUES);
   
    // class
    result.enableAllClasses();
    result.enable(Capability.MISSING_CLASS_VALUES);
    result.enable(Capability.NO_CLASS);
   
    return result;
  }
 
  /**
   * Determines the output format based on the input format and returns
   * this.
   *
   * @param inputFormat     the input format to base the output format on
   * @return                the output format
   * @throws Exception      in case the determination goes wrong
   */
  protected Instances determineOutputFormat(Instances inputFormat) throws Exception {
    Instances    result;
    Attribute    att;
    Attribute    attSorted;
    FastVector    atts;
    FastVector    values;
    Vector<String>  sorted;
    int      i;
    int      n;
   
    m_AttributeIndices.setUpper(inputFormat.numAttributes() - 1);
   
    // determine sorted indices
    atts       = new FastVector();
    m_NewOrder = new int[inputFormat.numAttributes()][];
    for (i = 0; i < inputFormat.numAttributes(); i++) {
      att = inputFormat.attribute(i);
      if (!att.isNominal() || !m_AttributeIndices.isInRange(i)) {
  m_NewOrder[i] = new int[0];
  atts.addElement(inputFormat.attribute(i).copy());
  continue;
      }

      // sort labels
      sorted = new Vector<String>();
      for (n = 0; n < att.numValues(); n++)
  sorted.add(att.value(n));
      Collections.sort(sorted, m_Comparator);
     
      // determine new indices
      m_NewOrder[i] = new int[att.numValues()];
      values        = new FastVector();
      for (n = 0; n < att.numValues(); n++) {
  m_NewOrder[i][n] = sorted.indexOf(att.value(n));
  values.addElement(sorted.get(n));
      }
      attSorted = new Attribute(att.name(), values);
      attSorted.setWeight(att.weight());
      atts.addElement(attSorted);
    }
   
    // generate new header
    result = new Instances(inputFormat.relationName(), atts, 0);
    result.setClassIndex(inputFormat.classIndex());
   
    return result;
  }

  /**
   * processes the given instance (may change the provided instance) and
   * returns the modified version.
   *
   * @param instance    the instance to process
   * @return            the modified data
   * @throws Exception  in case the processing goes wrong
   */
  protected Instance process(Instance instance) throws Exception {
    Instance  result;
    Attribute  att;
    double[]  values;
    int    i;

    // adjust indices
    values = new double[instance.numAttributes()];
    for (i = 0; i < instance.numAttributes(); i++) {
      att = instance.attribute(i);
      if (!att.isNominal() || !m_AttributeIndices.isInRange(i) || instance.isMissing(i))
  values[i] = instance.value(i);
      else
  values[i] = m_NewOrder[i][(int) instance.value(i)];
    }

    // create new instance
    result = new DenseInstance(instance.weight(), values);
   
    return result;
  }
 
  /**
   * Returns the revision string.
   *
   * @return    the revision
   */
  public String getRevision() {
    return RevisionUtils.extract("$Revision: 5987 $");
  }

  /**
   * runs the filter with the given arguments.
   *
   * @param args      the commandline arguments
   */
  public static void main(String[] args) {
    runFilter(new SortLabels(), args);
  }
}
TOP

Related Classes of weka.filters.unsupervised.attribute.SortLabels$CaseInsensitiveComparator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.