Package weka.core

Examples of weka.core.Attribute

Typical usage (code from the main() method of this class):

...
// Create numeric attributes "length" and "weight"
Attribute length = new Attribute("length");
Attribute weight = new Attribute("weight");

// Create list to hold nominal values "first", "second", "third"
List my_nominal_values = new ArrayList(3);
my_nominal_values.add("first");
my_nominal_values.add("second");
my_nominal_values.add("third");

// Create nominal attribute "position"
Attribute position = new Attribute("position", my_nominal_values);
...

@author Eibe Frank (eibe@cs.waikato.ac.nz) @version $Revision: 6889 $


    // 2. distribution?
    if (getOutputDistribution()) {
      if (inputFormat.classAttribute().isNominal()) {
  for (i = 0; i < inputFormat.classAttribute().numValues(); i++) {
    atts.addElement(new Attribute("distribution_" + inputFormat.classAttribute().value(i)));
  }
      }
      else {
  atts.addElement(new Attribute("distribution"));
      }
    }

    // 2. error flag?
    if (getOutputErrorFlag()) {
      if (inputFormat.classAttribute().isNominal()) {
  values = new FastVector();
  values.addElement("no");
  values.addElement("yes");
  atts.addElement(new Attribute("error", values));
      }
      else {
  atts.addElement(new Attribute("error"));
      }
    }

    // generate new header
    result = new Instances(inputFormat.relationName(), atts, 0);
View Full Code Here


    // compute Value Distance Metric matrices for nominal features
    Map vdmMap = new HashMap();
    Enumeration attrEnum = getInputFormat().enumerateAttributes();
    while(attrEnum.hasMoreElements()) {
      Attribute attr = (Attribute) attrEnum.nextElement();
      if (!attr.equals(getInputFormat().classAttribute())) {
  if (attr.isNominal() || attr.isString()) {
    double[][] vdm = new double[attr.numValues()][attr.numValues()];
    vdmMap.put(attr, vdm);
    int[] featureValueCounts = new int[attr.numValues()];
    int[][] featureValueCountsByClass = new int[getInputFormat().classAttribute().numValues()][attr.numValues()];
    instanceEnum = getInputFormat().enumerateInstances();
    while(instanceEnum.hasMoreElements()) {
      Instance instance = (Instance) instanceEnum.nextElement();
      int value = (int) instance.value(attr);
      int classValue = (int) instance.classValue();
      featureValueCounts[value]++;
      featureValueCountsByClass[classValue][value]++;
    }
    for (int valueIndex1 = 0; valueIndex1 < attr.numValues(); valueIndex1++) {
      for (int valueIndex2 = 0; valueIndex2 < attr.numValues(); valueIndex2++) {
        double sum = 0;
        for (int classValueIndex = 0; classValueIndex < getInputFormat().numClasses(); classValueIndex++) {
    double c1i = (double) featureValueCountsByClass[classValueIndex][valueIndex1];
    double c2i = (double) featureValueCountsByClass[classValueIndex][valueIndex2];
    double c1 = (double) featureValueCounts[valueIndex1];
    double c2 = (double) featureValueCounts[valueIndex2];
    double term1 = c1i / c1;
    double term2 = c2i / c2;
    sum += Math.abs(term1 - term2);
        }
        vdm[valueIndex1][valueIndex2] = sum;
      }
    }
  }
      }
    }

    // use this random source for all required randomness
    Random rand = new Random(getRandomSeed());

    // find the set of extra indices to use if the percentage is not evenly divisible by 100
    List extraIndices = new LinkedList();
    double percentageRemainder = (getPercentage() / 100) - Math.floor(getPercentage() / 100.0);
    int extraIndicesCount = (int) (percentageRemainder * sample.numInstances());
    if (extraIndicesCount >= 1) {
      for (int i = 0; i < sample.numInstances(); i++) {
  extraIndices.add(i);
      }
    }
    Collections.shuffle(extraIndices, rand);
    extraIndices = extraIndices.subList(0, extraIndicesCount);
    Set extraIndexSet = new HashSet(extraIndices);

    // the main loop to handle computing nearest neighbors and generating SMOTE
    // examples from each instance in the original minority class data
    Instance[] nnArray = new Instance[nearestNeighbors];
    for (int i = 0; i < sample.numInstances(); i++) {
      Instance instanceI = sample.instance(i);
      // find k nearest neighbors for each instance
      List distanceToInstance = new LinkedList();
      for (int j = 0; j < sample.numInstances(); j++) {
  Instance instanceJ = sample.instance(j);
  if (i != j) {
    double distance = 0;
    attrEnum = getInputFormat().enumerateAttributes();
    while(attrEnum.hasMoreElements()) {
      Attribute attr = (Attribute) attrEnum.nextElement();
      if (!attr.equals(getInputFormat().classAttribute())) {
        double iVal = instanceI.value(attr);
        double jVal = instanceJ.value(attr);
        if (attr.isNumeric()) {
    distance += Math.pow(iVal - jVal, 2);
        } else {
    distance += ((double[][]) vdmMap.get(attr))[(int) iVal][(int) jVal];
        }
      }
    }
    distance = Math.pow(distance, .5);
    distanceToInstance.add(new Object[] {distance, instanceJ});
  }
      }

      // sort the neighbors according to distance
      Collections.sort(distanceToInstance, new Comparator() {
  public int compare(Object o1, Object o2) {
    double distance1 = (Double) ((Object[]) o1)[0];
    double distance2 = (Double) ((Object[]) o2)[0];
    return (int) Math.ceil(distance1 - distance2);
  }
      });

      // populate the actual nearest neighbor instance array
      Iterator entryIterator = distanceToInstance.iterator();
      int j = 0;
      while(entryIterator.hasNext() && j < nearestNeighbors) {
  nnArray[j] = (Instance) ((Object[])entryIterator.next())[1];
  j++;
      }

      // create synthetic examples
      int n = (int) Math.floor(getPercentage() / 100);
      while(n > 0 || extraIndexSet.remove(i)) {
  double[] values = new double[sample.numAttributes()];
  int nn = rand.nextInt(nearestNeighbors);
  attrEnum = getInputFormat().enumerateAttributes();
  while(attrEnum.hasMoreElements()) {
    Attribute attr = (Attribute) attrEnum.nextElement();
    if (!attr.equals(getInputFormat().classAttribute())) {
      if (attr.isNumeric()) {
        double dif = nnArray[nn].value(attr) - instanceI.value(attr);
        double gap = rand.nextDouble();
        values[attr.index()] = (double) (instanceI.value(attr) + gap * dif);
      } else if (attr.isDate()) {
        double dif = nnArray[nn].value(attr) - instanceI.value(attr);
        double gap = rand.nextDouble();
        values[attr.index()] = (long) (instanceI.value(attr) + gap * dif);
      } else {
        int[] valueCounts = new int[attr.numValues()];
        int iVal = (int) instanceI.value(attr);
        valueCounts[iVal]++;
        for (int nnEx = 0; nnEx < nearestNeighbors; nnEx++) {
    int val = (int) nnArray[nnEx].value(attr);
    valueCounts[val]++;
        }
        int maxIndex = 0;
        int max = Integer.MIN_VALUE;
        for (int index = 0; index < attr.numValues(); index++) {
    if (valueCounts[index] > max) {
      max = valueCounts[index];
      maxIndex = index;
    }
        }
        values[attr.index()] = maxIndex;
      }
    }
  }
  values[sample.classIndex()] = minIndex;
  Instance synthetic = new Instance(1.0, values);
View Full Code Here

      + Utils.doubleToString(m_CutPoints[i][j - 1], 6) + "-"
      + Utils.doubleToString(m_CutPoints[i][j], 6) + "]'");
        }
      }
    }
    attributes.addElement(new Attribute(getInputFormat().
                attribute(i).name(),
                attribValues));
  } else {
    if (m_CutPoints[i] == null) {
      FastVector attribValues = new FastVector(1);
      attribValues.addElement("'All'");
      attributes.addElement(new Attribute(getInputFormat().
            attribute(i).name(),
            attribValues));
    } else {
      if (i < getInputFormat().classIndex()) {
        classIndex += m_CutPoints[i].length - 1;
      }
      for(int j = 0; j < m_CutPoints[i].length; j++) {
        FastVector attribValues = new FastVector(2);
        attribValues.addElement("'(-inf-"
          + Utils.doubleToString(m_CutPoints[i][j], 6) + "]'");
        attribValues.addElement("'("
          + Utils.doubleToString(m_CutPoints[i][j], 6) + "-inf)'");
        attributes.addElement(new Attribute(getInputFormat().
              attribute(i).name() + "_" + (j+1),
              attribValues));
      }
    }
  }
View Full Code Here

    } else if (classColString.trim().toLowerCase().compareTo("first") == 0 ||
        classColString.equalsIgnoreCase("/first")) {
      // nothing to do
    } else {
      // try to look up class attribute as a label
      Attribute classAtt = format.attribute(classColString);
      if (classAtt != null) {
        existingClassCol = classAtt.index();
      } else {
        // parse it as a number
        existingClassCol = Integer.parseInt(classColString);
        if (existingClassCol < 0) {
          existingClassCol = -1; // no class
View Full Code Here

    double [] counts;

    double [][] avgClassValues = new double[getInputFormat().numAttributes()][0];
    m_Indices = new int[getInputFormat().numAttributes()][0];
    for (int j = 0; j < getInputFormat().numAttributes(); j++) {
      Attribute att = getInputFormat().attribute(j);
      if (att.isNominal()) {
  avgClassValues[j] = new double [att.numValues()];
  counts = new double [att.numValues()];
  for (int i = 0; i < getInputFormat().numInstances(); i++) {
    instance = getInputFormat().instance(i);
    if (!instance.classIsMissing() &&
        (!instance.isMissing(j))) {
      counts[(int)instance.value(j)] += instance.weight();
      avgClassValues[j][(int)instance.value(j)] +=
        instance.weight() * instance.classValue();
    }
  }
  sum = Utils.sum(avgClassValues[j]);
  totalCounts = Utils.sum(counts);
  if (Utils.gr(totalCounts, 0)) {
    for (int k = 0; k < att.numValues(); k++) {
      if (Utils.gr(counts[k], 0)) {
        avgClassValues[j][k] /= (double)counts[k];
      } else {
        avgClassValues[j][k] = sum / (double)totalCounts;
      }
View Full Code Here

    // Compute new attributes

    newClassIndex = getInputFormat().classIndex();
    newAtts = new FastVector();
    for (int j = 0; j < getInputFormat().numAttributes(); j++) {
      Attribute att = getInputFormat().attribute(j);
      if ((!att.isNominal()) ||
    (j == getInputFormat().classIndex())) {
  newAtts.addElement(att.copy());
      } else {
  if ( (att.numValues() <= 2) && (!m_TransformAll) ) {
    if (m_Numeric) {
      newAtts.addElement(new Attribute(att.name()));
    } else {
      newAtts.addElement(att.copy());
    }
  } else {

    if (j < getInputFormat().classIndex()) {
      newClassIndex += att.numValues() - 1;
    }

    // Compute values for new attributes
    for (int k = 0; k < att.numValues(); k++) {
      attributeName =
        new StringBuffer(att.name() + "=");
      attributeName.append(att.value(k));
      if (m_Numeric) {
        newAtts.
    addElement(new Attribute(attributeName.toString()));
      } else {
        vals = new FastVector(2);
        vals.addElement("f"); vals.addElement("t");
        newAtts.
    addElement(new Attribute(attributeName.toString(), vals));
      }
    }
  }
      }
    }
View Full Code Here

    // Compute new attributes

    newClassIndex = getInputFormat().classIndex();
    newAtts = new FastVector();
    for (int j = 0; j < getInputFormat().numAttributes(); j++) {
      Attribute att = getInputFormat().attribute(j);
      if ((!att.isNominal()) ||
    (j == getInputFormat().classIndex())) {
  newAtts.addElement(att.copy());
      } else {
  if (j < getInputFormat().classIndex())
    newClassIndex += att.numValues() - 2;
   
  // Compute values for new attributes
   
  for (int k = 1; k < att.numValues(); k++) {
    attributeName =
      new StringBuffer(att.name() + "=");
    for (int l = k; l < att.numValues(); l++) {
      if (l > k) {
        attributeName.append(',');
      }
      attributeName.append(att.value(m_Indices[j][l]));
    }
    if (m_Numeric) {
      newAtts.
        addElement(new Attribute(attributeName.toString()));
    } else {
      vals = new FastVector(2);
      vals.addElement("f"); vals.addElement("t");
      newAtts.
        addElement(new Attribute(attributeName.toString(), vals));
    }
  }
      }
    }
    outputFormat = new Instances(getInputFormat().relationName(),
View Full Code Here

    double [] vals = new double [outputFormatPeek().numAttributes()];
    int attSoFar = 0;

    for(int j = 0; j < getInputFormat().numAttributes(); j++) {
      Attribute att = getInputFormat().attribute(j);
      if ((!att.isNominal()) || (j == getInputFormat().classIndex())) {
  vals[attSoFar] = instance.value(j);
  attSoFar++;
      } else {
  if ( (att.numValues() <= 2) && (!m_TransformAll) ) {
    vals[attSoFar] = instance.value(j);
    attSoFar++;
  } else {
    if (instance.isMissing(j)) {
      for (int k = 0; k < att.numValues(); k++) {
              vals[attSoFar + k] = instance.value(j);
      }
    } else {
      for (int k = 0; k < att.numValues(); k++) {
        if (k == (int)instance.value(j)) {
                vals[attSoFar + k] = 1;
        } else {
                vals[attSoFar + k] = 0;
        }
      }
    }
    attSoFar += att.numValues();
  }
      }
    }
    Instance inst = null;
    if (instance instanceof SparseInstance) {
View Full Code Here

    double [] vals = new double [outputFormatPeek().numAttributes()];
    int attSoFar = 0;

    for(int j = 0; j < getInputFormat().numAttributes(); j++) {
      Attribute att = getInputFormat().attribute(j);
      if ((!att.isNominal()) || (j == getInputFormat().classIndex())) {
  vals[attSoFar] = instance.value(j);
  attSoFar++;
      } else {
  if (instance.isMissing(j)) {
    for (int k = 0; k < att.numValues() - 1; k++) {
            vals[attSoFar + k] = instance.value(j);
    }
  } else {
    int k = 0;
    while ((int)instance.value(j) != m_Indices[j][k]) {
            vals[attSoFar + k] = 1;
      k++;
    }
    while (k < att.numValues() - 1) {
            vals[attSoFar + k] = 0;
      k++;
    }
  }
  attSoFar += att.numValues() - 1;
      }
    }
    Instance inst = null;
    if (instance instanceof SparseInstance) {
      inst = new SparseInstance(instance.weight(), vals);
View Full Code Here

    // generate header
    FastVector atts = new FastVector();
    String prefix = getAlgorithm().getSelectedTag().getReadable();
    for (int i = 0; i < getNumComponents(); i++)
      atts.addElement(new Attribute(prefix + "_" + (i+1)));
    atts.addElement(new Attribute("Class"));
    Instances result = new Instances(prefix, atts, 0);
    result.setClassIndex(result.numAttributes() - 1);
   
    return result;
  }
View Full Code Here

TOP

Related Classes of weka.core.Attribute

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.