Package weka.core

Examples of weka.core.FastVector


   * @param params the CVParameters to use
   * @throws Exception if the setting of the CVParameters fails
   */
  public void setCVParameters(Object[] params) throws Exception {
     
      FastVector backup = m_CVParams;
      m_CVParams = new FastVector();
     
      for(int i=0; i<params.length; i++) {
          try{
          addCVParameter((String)params[i]);
          }
View Full Code Here


    double pred = 0;
    classMissing.setDataset(instance.dataset());
    classMissing.setClassMissing();
    if (m_ClassIsNominal) {
      if (m_Predictions == null) {
  m_Predictions = new FastVector();
      }
      double [] dist = classifier.distributionForInstance(classMissing);
      pred = Utils.maxIndex(dist);
      if (dist[(int)pred] <= 0) {
  pred = Instance.missingValue();
View Full Code Here

  public double evaluateModelOnceAndRecordPrediction(double [] dist,
      Instance instance) throws Exception {
    double pred;
    if (m_ClassIsNominal) {
      if (m_Predictions == null) {
  m_Predictions = new FastVector();
      }
      pred = Utils.maxIndex(dist);
      if (dist[(int)pred] <= 0) {
  pred = Instance.missingValue();
      }
View Full Code Here

    m_ReducedHeaders = new Instances[ m_Classifiers.length ][];

    // Construction of the base classifiers
    for(int i = 0; i < m_Classifiers.length; i++) {
      m_ReducedHeaders[i] = new Instances[ m_Groups[i].length ];
      FastVector transformedAttributes = new FastVector( data.numAttributes() );

      // Construction of the dataset for each group of attributes
      for( int j = 0; j < m_Groups[ i ].length; j++ ) {
        FastVector fv = new FastVector( m_Groups[i][j].length + 1 );
        for( int k = 0; k < m_Groups[i][j].length; k++ ) {
          String newName = data.attribute( m_Groups[i][j][k] ).name()
            + "_" + k;
          fv.addElement( data.attribute( m_Groups[i][j][k] ).copy(newName) );
        }
        fv.addElement( data.classAttribute( ).copy() );
        Instances dataSubSet = new Instances( "rotated-" + i + "-" + j + "-",
      fv, 0);
        dataSubSet.setClassIndex( dataSubSet.numAttributes() - 1 );

        // Select instances for the dataset
View Full Code Here

 
    if(data == null)
      throw new Exception(" Unable to randomize the class orders.");
   
    m_Class = data.classAttribute()
    m_Ruleset = new FastVector();
    m_RulesetStats = new FastVector();
    m_Distributions = new FastVector();

    // Sort by classes frequency
    double[] orderedClasses = ((ClassOrder)m_Filter).getClassCounts();
    if(m_Debug){
      System.err.println("Sorted classes:");
View Full Code Here

           double defDL)
    throws Exception {
 
    Instances newData = data, growData, pruneData;   
    boolean stop = false;
    FastVector ruleset = new FastVector();   
 
    double dl = defDL, minDL = defDL;
    RuleStats rstats = null;
    double[] rst;
 
    // Check whether data have positive examples
    boolean defHasPositive = true; // No longer used
    boolean hasPositive = defHasPositive;
 
    /********************** Building stage ***********************/ 
    if(m_Debug)
      System.err.println("\n*** Building stage ***");
   
    while((!stop) && hasPositive){ // Generate new rules until
      // stopping criteria met
      RipperRule oneRule;
      if(m_UsePruning){   
  /* Split data into Grow and Prune*/

  // We should have stratified the data, but ripper seems
  // to have a bug that makes it not to do so.  In order
  // to simulate it more precisely, we do the same thing.
  //newData.randomize(m_Random); 
  newData = RuleStats.stratify(newData, m_Folds, m_Random);   
  Instances[] part = RuleStats.partition(newData, m_Folds);
  growData=part[0];
  pruneData=part[1];
  //growData=newData.trainCV(m_Folds, m_Folds-1);
  //pruneData=newData.testCV(m_Folds, m_Folds-1); 
   
  oneRule = new RipperRule();
  oneRule.setConsequent(classIndex)// Must set first
   
  if(m_Debug)
    System.err.println("\nGrowing a rule ...")
  oneRule.grow(growData);             // Build the rule
  if(m_Debug)
    System.err.println("One rule found before pruning:"+
           oneRule.toString(m_Class));
   
  if(m_Debug)
    System.err.println("\nPruning the rule ...")
  oneRule.prune(pruneData, false);    // Prune the rule
  if(m_Debug)
    System.err.println("One rule found after pruning:"+
           oneRule.toString(m_Class));
      }
      else{
  oneRule = new RipperRule();
  oneRule.setConsequent(classIndex)// Must set first
  if(m_Debug)
    System.err.println("\nNo pruning: growing a rule ...");
  oneRule.grow(newData);             // Build the rule
  if(m_Debug)
    System.err.println("No pruning: one rule found:\n"+
           oneRule.toString(m_Class));
      }
     
      // Compute the DL of this ruleset
      if(rstats == null){ // First rule
  rstats = new RuleStats();
  rstats.setNumAllConds(m_Total);
  rstats.setData(newData);
      }
     
      rstats.addAndUpdate(oneRule);       
      int last = rstats.getRuleset().size()-1; // Index of last rule
      dl += rstats.relativeDL(last, expFPRate, m_CheckErr);
     
      if(Double.isNaN(dl) || Double.isInfinite(dl))
  throw new Exception("Should never happen: dl in "+
          "building stage NaN or infinite!");
      if(m_Debug)
  System.err.println("Before optimization("+last+
         "): the dl = "+dl+" | best: "+minDL);
     
      if(dl < minDL)
  minDL = dl;  // The best dl so far 
     
      rst = rstats.getSimpleStats(last);     
      if(m_Debug)
  System.err.println("The rule covers: "+rst[0]+
         " | pos = " + rst[2] +
         " | neg = " + rst[4]+
         "\nThe rule doesn't cover: "+rst[1]+
         " | pos = " + rst[5]);
     
      stop = checkStop(rst, minDL, dl);
     
      if(!stop){       
  ruleset.addElement(oneRule);          // Accepted
  newData = rstats.getFiltered(last)[1];// Data not covered
  hasPositive = Utils.gr(rst[5], 0.0)// Positives remaining?
  if(m_Debug)
    System.err.println("One rule added: has positive? "
           +hasPositive);
      }
      else{
  if(m_Debug)
    System.err.println("Quit rule");
  rstats.removeLast(); // Remove last to be re-used
      }
    }// while !stop 
 
    /******************** Optimization stage *******************/
    RuleStats finalRulesetStat = null;
    if(m_UsePruning){  
      for(int z=0; z < m_Optimizations; z++){
  if(m_Debug)
    System.err.println("\n*** Optimization: run #"
           +z+" ***");
   
  newData = data;       
  finalRulesetStat = new RuleStats();
  finalRulesetStat.setData(newData);
  finalRulesetStat.setNumAllConds(m_Total);
  int position=0;
  stop = false;
  boolean isResidual = false;     
  hasPositive = defHasPositive;       
  dl = minDL = defDL;
   
      oneRule:   
  while(!stop && hasPositive){     
       
    isResidual = (position>=ruleset.size()); // Cover residual positive examples 
    // Re-do shuffling and stratification   
    //newData.randomize(m_Random); 
    newData = RuleStats.stratify(newData, m_Folds, m_Random);
    Instances[] part = RuleStats.partition(newData, m_Folds);
    growData=part[0];
    pruneData=part[1];
    //growData=newData.trainCV(m_Folds, m_Folds-1);
    //pruneData=newData.testCV(m_Folds, m_Folds-1);    
    RipperRule finalRule;
       
    if(m_Debug)
      System.err.println("\nRule #"+position +
             "| isResidual?" + isResidual+
             "| data size: "+newData.sumOfWeights());
       
    if(isResidual){
      RipperRule newRule = new RipperRule();  
      newRule.setConsequent(classIndex);
      if(m_Debug)
        System.err.println("\nGrowing and pruning"+
         " a new rule ...");
      newRule.grow(growData);
      newRule.prune(pruneData, false);
      finalRule = newRule;
      if(m_Debug)
        System.err.println("\nNew rule found: "+
         newRule.toString(m_Class));
    }
    else{
      RipperRule oldRule = (RipperRule)ruleset.elementAt(position);
      boolean covers = false;
      // Test coverage of the next old rule
      for(int i=0; i<newData.numInstances(); i++)
        if(oldRule.covers(newData.instance(i))){
    covers = true;
    break;
        }
     
      if(!covers){// Null coverage, no variants can be generated
        finalRulesetStat.addAndUpdate(oldRule);
        position++;
        continue oneRule;
     
     
      // 2 variants
      if(m_Debug)
        System.err.println("\nGrowing and pruning"+
         " Replace ...");
      RipperRule replace = new RipperRule();  
      replace.setConsequent(classIndex);
      replace.grow(growData);
     
      // Remove the pruning data covered by the following
      // rules, then simply compute the error rate of the
      // current rule to prune it.  According to Ripper,
      // it's equivalent to computing the error of the
      // whole ruleset -- is it true?
      pruneData = RuleStats.rmCoveredBySuccessives(pruneData,ruleset, position);       
      replace.prune(pruneData, true);
     
      if(m_Debug)
        System.err.println("\nGrowing and pruning"+
         " Revision ...");
      RipperRule revision = (RipperRule)oldRule.copy();
     
      // For revision, first rm the data covered by the old rule
      Instances newGrowData = new Instances(growData, 0);
      for(int b=0; b<growData.numInstances(); b++){
        Instance inst = growData.instance(b);
        if(revision.covers(inst))
    newGrowData.add(inst);
      }
      revision.grow(newGrowData);       
      revision.prune(pruneData, true);
     
      double[][] prevRuleStats = new double[position][6];
      for(int c=0; c < position; c++)
    prevRuleStats[c] = finalRulesetStat.getSimpleStats(c);

      // Now compare the relative DL of variants
      FastVector tempRules = (FastVector)ruleset.copyElements();
      tempRules.setElementAt(replace, position);
     
      RuleStats repStat = new RuleStats(data, tempRules);
      repStat.setNumAllConds(m_Total);
      repStat.countData(position, newData, prevRuleStats);
      //repStat.countData();
      rst = repStat.getSimpleStats(position);     
      if(m_Debug)
        System.err.println("Replace rule covers: "+rst[0]+
         " | pos = " + rst[2] +
         " | neg = " + rst[4]+
         "\nThe rule doesn't cover: "+rst[1]+
         " | pos = " + rst[5]);
     
      double repDL = repStat.relativeDL(position, expFPRate,
                m_CheckErr);
      if(m_Debug)
        System.err.println("\nReplace: "+
         replace.toString(m_Class)
         +" |dl = "+repDL);
     
      if(Double.isNaN(repDL) || Double.isInfinite(repDL))
        throw new Exception("Should never happen: repDL"+
          "in optmz. stage NaN or "+
          "infinite!");
     
      tempRules.setElementAt(revision, position);
      RuleStats revStat = new RuleStats(data, tempRules);
      revStat.setNumAllConds(m_Total);
      revStat.countData(position, newData, prevRuleStats);
      //revStat.countData();
      double revDL = revStat.relativeDL(position, expFPRate,
View Full Code Here

 
    StringBuffer sb = new StringBuffer("JRIP rules:\n"+
               "===========\n\n");
    for(int j=0; j<m_RulesetStats.size(); j++){
      RuleStats rs = (RuleStats)m_RulesetStats.elementAt(j);
      FastVector rules = rs.getRuleset();
      for(int k=0; k<rules.size(); k++){
  double[] simStats = rs.getSimpleStats(k);
  sb.append(((RipperRule)rules.elementAt(k)).toString(m_Class)
      + " ("+simStats[0]+"/"+simStats[4]+")\n");
      }         
    }
    if(m_Debug){
      System.err.println("Inside m_Ruleset");
View Full Code Here

 
      case STRING :
  //System.err.println("String --> nominal");
  attributeTypes[i - 1] = Attribute.NOMINAL;
  nominalIndexes[i - 1] = new Hashtable();
  nominalStrings[i - 1] = new FastVector();
  break;
      case TEXT:
  //System.err.println("Text --> string");
  attributeTypes[i - 1] = Attribute.STRING;
  nominalIndexes[i - 1] = new Hashtable();
  nominalStrings[i - 1] = new FastVector();
  break;
      case BOOL:
  //System.err.println("boolean --> nominal");
  attributeTypes[i - 1] = Attribute.NOMINAL;
  nominalIndexes[i - 1] = new Hashtable();
  nominalIndexes[i - 1].put("false", new Double(0));
  nominalIndexes[i - 1].put("true", new Double(1));
  nominalStrings[i - 1] = new FastVector();
  nominalStrings[i - 1].addElement("false");
  nominalStrings[i - 1].addElement("true");
  break;
      case DOUBLE:
  //System.err.println("BigDecimal --> numeric");
  attributeTypes[i - 1] = Attribute.NUMERIC;
  break;
      case BYTE:
  //System.err.println("byte --> numeric");
  attributeTypes[i - 1] = Attribute.NUMERIC;
  break;
      case SHORT:
  //System.err.println("short --> numeric");
  attributeTypes[i - 1] = Attribute.NUMERIC;
  break;
      case INTEGER:
  //System.err.println("int --> numeric");
  attributeTypes[i - 1] = Attribute.NUMERIC;
  break;
      case LONG:
  //System.err.println("long --> numeric");
  attributeTypes[i - 1] = Attribute.NUMERIC;
  break;
      case FLOAT:
  //System.err.println("float --> numeric");
  attributeTypes[i - 1] = Attribute.NUMERIC;
  break;
      case DATE:
  attributeTypes[i - 1] = Attribute.DATE;
  break;
      case TIME:
  attributeTypes[i - 1] = Attribute.DATE;
  break;
      default:
  //System.err.println("Unknown column type");
  attributeTypes[i - 1] = Attribute.STRING;
      }
    }

    // For sqlite
    // cache column names because the last while(rs.next()) { iteration for
    // the tuples below will close the md object: 
    Vector<String> columnNames = new Vector<String>();
    for (int i = 0; i < numAttributes; i++) {
      columnNames.add(md.getColumnLabel(i + 1));
    }

    // Step through the tuples
    if (m_Debug)
      System.err.println("Creating instances...");
    FastVector instances = new FastVector();
    int rowCount = 0;
    while(rs.next()) {
      if (rowCount % 100 == 0) {
        if (m_Debug)  {
    System.err.print("read " + rowCount + " instances \r");
    System.err.flush();
        }
      }
      double[] vals = new double[numAttributes];
      for(int i = 1; i <= numAttributes; i++) {
  /*switch (md.getColumnType(i)) {
  case Types.CHAR:
  case Types.VARCHAR:
  case Types.LONGVARCHAR:
  case Types.BINARY:
  case Types.VARBINARY:
  case Types.LONGVARBINARY:*/
  switch (translateDBColumnType(md.getColumnTypeName(i))) {
  case STRING :
    String str = rs.getString(i);
   
    if (rs.wasNull()) {
      vals[i - 1] = Instance.missingValue();
    } else {
      Double index = (Double)nominalIndexes[i - 1].get(str);
      if (index == null) {
        index = new Double(nominalStrings[i - 1].size());
        nominalIndexes[i - 1].put(str, index);
        nominalStrings[i - 1].addElement(str);
      }
      vals[i - 1] = index.doubleValue();
    }
    break;
  case TEXT:
    String txt = rs.getString(i);
   
    if (rs.wasNull()) {
      vals[i - 1] = Instance.missingValue();
    } else {
      Double index = (Double)nominalIndexes[i - 1].get(txt);
      if (index == null) {
        index = new Double(nominalStrings[i - 1].size());
        nominalIndexes[i - 1].put(txt, index);
        nominalStrings[i - 1].addElement(txt);
      }
      vals[i - 1] = index.doubleValue();
    }
    break;
  case BOOL:
    boolean boo = rs.getBoolean(i);
    if (rs.wasNull()) {
      vals[i - 1] = Instance.missingValue();
    } else {
      vals[i - 1] = (boo ? 1.0 : 0.0);
    }
    break;
  case DOUBLE:
    //    BigDecimal bd = rs.getBigDecimal(i, 4);
    double dd = rs.getDouble(i);
    // Use the column precision instead of 4?
    if (rs.wasNull()) {
      vals[i - 1] = Instance.missingValue();
    } else {
      //      newInst.setValue(i - 1, bd.doubleValue());
      vals[i - 1] =  dd;
    }
    break;
  case BYTE:
    byte by = rs.getByte(i);
    if (rs.wasNull()) {
      vals[i - 1] = Instance.missingValue();
    } else {
      vals[i - 1] = (double)by;
    }
    break;
  case SHORT:
    short sh = rs.getShort(i);
    if (rs.wasNull()) {
      vals[i - 1] = Instance.missingValue();
    } else {
      vals[i - 1] = (double)sh;
    }
    break;
  case INTEGER:
    int in = rs.getInt(i);
    if (rs.wasNull()) {
      vals[i - 1] = Instance.missingValue();
    } else {
      vals[i - 1] = (double)in;
    }
    break;
  case LONG:
    long lo = rs.getLong(i);
    if (rs.wasNull()) {
      vals[i - 1] = Instance.missingValue();
    } else {
      vals[i - 1] = (double)lo;
    }
    break;
  case FLOAT:
    float fl = rs.getFloat(i);
    if (rs.wasNull()) {
      vals[i - 1] = Instance.missingValue();
    } else {
      vals[i - 1] = (double)fl;
    }
    break;
  case DATE:
          Date date = rs.getDate(i);
          if (rs.wasNull()) {
      vals[i - 1] = Instance.missingValue();
    } else {
            // TODO: Do a value check here.
            vals[i - 1] = (double)date.getTime();
          }
          break;
  case TIME:
          Time time = rs.getTime(i);
          if (rs.wasNull()) {
      vals[i - 1] = Instance.missingValue();
    } else {
            // TODO: Do a value check here.
            vals[i - 1] = (double) time.getTime();
          }
          break;
  default:
    vals[i - 1] = Instance.missingValue();
  }
      }
      Instance newInst;
      if (m_CreateSparseData) {
  newInst = new SparseInstance(1.0, vals);
      } else {
  newInst = new Instance(1.0, vals);
      }
      instances.addElement(newInst);
      rowCount++;
    }
    //disconnectFromDatabase();  (perhaps other queries might be made)
   
    // Create the header and add the instances to the dataset
    if (m_Debug)
      System.err.println("Creating header...");
    FastVector attribInfo = new FastVector();
    for (int i = 0; i < numAttributes; i++) {
      /* Fix for databases that uppercase column names */
      // String attribName = attributeCaseFix(md.getColumnName(i + 1));
      String attribName = attributeCaseFix(columnNames.get(i));
      switch (attributeTypes[i]) {
      case Attribute.NOMINAL:
  attribInfo.addElement(new Attribute(attribName, nominalStrings[i]));
  break;
      case Attribute.NUMERIC:
  attribInfo.addElement(new Attribute(attribName));
  break;
      case Attribute.STRING:
  Attribute att = new Attribute(attribName, (FastVector) null);
  attribInfo.addElement(att);
  for (int n = 0; n < nominalStrings[i].size(); n++) {
    att.addStringValue((String) nominalStrings[i].elementAt(n));
  }
  break;
      case Attribute.DATE:
  attribInfo.addElement(new Attribute(attribName, (String)null));
  break;
      default:
  throw new Exception("Unknown attribute type");
      }
    }
View Full Code Here

    if(m_ClassAttribute.isNominal())
      m_NumClasses = m_ClassAttribute.numValues();
    else
      m_NumClasses = 1;
 
    m_Antds = new FastVector();
    m_DefDstr = new double[m_NumClasses];
    m_Cnsqt = new double[m_NumClasses];
    m_Targets = new FastVector();     
    m_Random = new Random(m_Seed);
   
    if(m_NumAntds != -1){
      grow(data);
    }
View Full Code Here

    protected void fitLogistic(Instances insts, int cl1, int cl2,
        int numFolds, Random random)
      throws Exception {

      // Create header of instances object
      FastVector atts = new FastVector(2);
      atts.addElement(new Attribute("pred"));
      FastVector attVals = new FastVector(2);
      attVals.addElement(insts.classAttribute().value(cl1));
      attVals.addElement(insts.classAttribute().value(cl2));
      atts.addElement(new Attribute("class", attVals));
      Instances data = new Instances("data", atts, insts.numInstances());
      data.setClassIndex(1);

      // Collect data for fitting the logistic model
View Full Code Here

TOP

Related Classes of weka.core.FastVector

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.