Examples of weka.core.Instances

weka.core.Instances
all the instances in the file (ARFF, CSV, XRFF, ...) DataSource source = new DataSource(filename); Instances instances = source.getDataSet(); // Make the last attribute be the class instances.setClassIndex(instances.numAttributes() - 1); // Print header and instances. System.out.println("\nDataset:\n"); System.out.println(instances); ...
All methods that change a set of instances are safe, ie. a change of a set of instances does not affect any other sets of instances. All methods that change a datasets's attribute information clone the dataset before it is changed. @author Eibe Frank (eibe@cs.waikato.ac.nz) @author Len Trigg (trigg@cs.waikato.ac.nz) @author FracPete (fracpete at waikato dot ac dot nz) @version $Revision: 6996 $

    
    // Do some initialization if this is the first set of the first run
    if (e.getRunNumber() == 1 && e.getSetNumber() == 1) {
//      m_oldText = m_visual.getText();
      // store the training header
      m_trainingSet = new Instances(e.getTrainingSet(), 0);
      m_state = BUILDING_MODEL;
      
      String msg = "[Classifier] " + statusMessagePrefix() 
        + " starting executor pool ("
        + getExecutionSlots() + " slots)...";

View Full Code Here

    }
    
    
    weka.classifiers.Classifier classifierToUse = m_Classifier;
    
    Instances testSet = e.getTestSet();
    if (testSet != null) {
      if (testSet.classIndex() < 0) {
  //        testSet.setClassIndex(testSet.numAttributes() - 1);
        // stop all processing
        stop();
        String errorMessage = statusMessagePrefix()
            + "ERROR: no class attribute set in test data!";
        if (m_log != null) {
          m_log.statusMessage(errorMessage);
          m_log.logMessage("[Classifier] " + errorMessage);
        } else {
          System.err.println("[Classifier] " + errorMessage);
        }
        return;
      }
    }


    // If we just have a test set connection or
    // there is just one run involving one set (and we are not
    // currently building a model), then use the
    // last saved model
    if (classifierToUse != null && m_state == IDLE && 
        (!m_listenees.containsKey("trainingSet") || 
        (e.getMaxRunNumber() == 1 && e.getMaxSetNumber() == 1))) {
      // if this is structure only then just return at this point
      if (e.getTestSet() != null && e.isStructureOnly()) {
        return;
      }
      
      if (classifierToUse instanceof EnvironmentHandler && m_env != null) {
        ((EnvironmentHandler)classifierToUse).setEnvironment(m_env);
      }
      
      if (classifierToUse instanceof weka.classifiers.misc.InputMappedClassifier) {
        // make sure that we have the correct training header (if InputMappedClassifier
        // is loading a model from a file).
        try {
          m_trainingSet = 
            ((weka.classifiers.misc.InputMappedClassifier)classifierToUse).
              getModelHeader(m_trainingSet); // this returns the argument if a model is not being loaded
        } catch (Exception e1) {
          // TODO Auto-generated catch block
          e1.printStackTrace();
        }
      }
      
      // check that we have a training set/header (if we don't,
      // then it means that no model has been loaded
      if (m_trainingSet == null) {
        stop();
        String errorMessage = statusMessagePrefix()
            + "ERROR: no trained/loaded classifier to use for prediction!";
        if (m_log != null) {
          m_log.statusMessage(errorMessage);
          m_log.logMessage("[Classifier] " + errorMessage);
        } else {
          System.err.println("[Classifier] " + errorMessage);
        }
        return;
      }
      
      testSet = e.getTestSet();
      if (e.getRunNumber() == 1 && e.getSetNumber() == 1) {
        m_currentBatchIdentifier = new Date();
      }
      
      if (testSet != null) {        
        if (!m_trainingSet.equalHeaders(testSet) && 
            !(classifierToUse instanceof weka.classifiers.misc.InputMappedClassifier)) {
          boolean wrapClassifier = false;
          if (!Utils.
              getDontShowDialog("weka.gui.beans.Classifier.AutoWrapInInputMappedClassifier")) {
            
            java.awt.GraphicsEnvironment ge = 
              java.awt.GraphicsEnvironment.getLocalGraphicsEnvironment();
            if (!ge.isHeadless()) {
              JCheckBox dontShow = new JCheckBox("Do not show this message again");
              Object[] stuff = new Object[2];
              stuff[0] = "Data used to train model and test set are not compatible.\n" +
              "Would you like to automatically wrap the classifier in\n" + 
              "an \"InputMappedClassifier\" before proceeding?.\n";
              stuff[1] = dontShow;


              int result = JOptionPane.showConfirmDialog(this, stuff, 
                  "KnowledgeFlow:Classifier", JOptionPane.YES_OPTION);


              if (result == JOptionPane.YES_OPTION) {
                wrapClassifier = true;
              }


              if (dontShow.isSelected()) {
                String response = (wrapClassifier) ? "yes" : "no";
                try {
                  Utils.
                  setDontShowDialogResponse("weka.gui.explorer.ClassifierPanel.AutoWrapInInputMappedClassifier", 
                      response);
                } catch (Exception e1) {
                  // TODO Auto-generated catch block
                  e1.printStackTrace();
                }
              }
            } else {
              // running headless, so just go ahead and wrap anyway
              wrapClassifier = true;
            }
          } else {
            // What did the user say - do they want to autowrap or not?
            String response;
            try {
              response = Utils.getDontShowDialogResponse("weka.gui.explorer.ClassifierPanel.AutoWrapInInputMappedClassifier");
              if (response != null && response.equalsIgnoreCase("yes")) {
                wrapClassifier = true;
              }
            } catch (Exception e1) {
              // TODO Auto-generated catch block
              e1.printStackTrace();
            }
          }
          
          if (wrapClassifier) {
            weka.classifiers.misc.InputMappedClassifier temp =
              new weka.classifiers.misc.InputMappedClassifier();


            temp.setClassifier(classifierToUse);
            temp.setModelHeader(new Instances(m_trainingSet, 0));
            classifierToUse = temp;
          }          
        }         
        
        if (m_trainingSet.equalHeaders(testSet) || 
            (classifierToUse instanceof weka.classifiers.misc.InputMappedClassifier)) {
          BatchClassifierEvent ce =
            new BatchClassifierEvent(this, classifierToUse,                                       
                new DataSetEvent(this, m_trainingSet),
                new DataSetEvent(this, e.getTestSet()),
           e.getRunNumber(), e.getMaxRunNumber(), 
           e.getSetNumber(), e.getMaxSetNumber());
          ce.setGroupIdentifier(m_currentBatchIdentifier.getTime());
          
          if (m_log != null && !e.isStructureOnly()) {
            m_log.statusMessage(statusMessagePrefix() + "Finished.");
          }
          notifyBatchClassifierListeners(ce);          
        } else {
          // if headers do not match check to see if it's
          // just the class that is different and that
          // all class values are missing
          if (testSet.numInstances() > 0) {
            if (testSet.classIndex() == m_trainingSet.classIndex() && 
                testSet.attributeStats(testSet.classIndex()).missingCount ==
                testSet.numInstances()) {
              // now check the other attributes against the training
              // structure
              boolean ok = true;
              for (int i = 0; i < testSet.numAttributes(); i++) {
                if (i != testSet.classIndex()) {
                  ok = testSet.attribute(i).equals(m_trainingSet.attribute(i));
                  if (!ok) {
                    break;
                  }
                }
              }

View Full Code Here

                                + FILE_EXTENSION);
          }
        }


        weka.classifiers.Classifier temp = null;
        Instances tempHeader = null;
        // KOML ?
        if ((KOML.isPresent()) &&
            (loadFrom.getAbsolutePath().toLowerCase().
             endsWith(KOML.FILE_EXTENSION + FILE_EXTENSION))) {
          Vector v = (Vector) KOML.read(loadFrom.getAbsolutePath());

View Full Code Here

            saveTo.getAbsolutePath().toLowerCase().
            endsWith(KOML.FILE_EXTENSION + FILE_EXTENSION)) {
          SerializedModelSaver.saveKOML(saveTo,
                                        m_Classifier,
                                        (m_trainingSet != null)
                                        ? new Instances(m_trainingSet, 0)
                                        : null);
          /*          Vector v = new Vector();
          v.add(m_Classifier);
          if (m_trainingSet != null) {
            v.add(new Instances(m_trainingSet, 0));
          }
          v.trimToSize();
          KOML.write(saveTo.getAbsolutePath(), v); */
        } /* XStream */ else if ((XStream.isPresent()) &&
                                 saveTo.getAbsolutePath().toLowerCase().
            endsWith(XStream.FILE_EXTENSION + FILE_EXTENSION)) {


          SerializedModelSaver.saveXStream(saveTo,
                                           m_Classifier,
                                           (m_trainingSet != null)
                                           ? new Instances(m_trainingSet, 0)
                                           : null);
          /*          Vector v = new Vector();
          v.add(m_Classifier);
          if (m_trainingSet != null) {
            v.add(new Instances(m_trainingSet, 0));
          }
          v.trimToSize();
          XStream.write(saveTo.getAbsolutePath(), v); */
        } else /* binary */ {
          ObjectOutputStream os = 
            new ObjectOutputStream(new BufferedOutputStream(
                                   new FileOutputStream(saveTo)));
          os.writeObject(m_Classifier);
          if (m_trainingSet != null) {
            Instances header = new Instances(m_trainingSet, 0);
            os.writeObject(header);
          }
          os.close();
        }
        if (m_log != null) {

View Full Code Here


    // can classifier handle the data?
    getCapabilities().testWithFail(data);


    // remove instances with missing class
    data = new Instances(data);
    data.deleteWithMissingClass();
    
    m_bagger = new Bagging();
    RandomTree rTree = new RandomTree();

View Full Code Here

   * @param json  the JSON object to convert
   * @param onlyHeader  whether to retrieve only the header
   * @return    the generated Instances object, null if not possible
   */
  protected static Instances toInstances(JSONNode json, boolean onlyHeader) {
    Instances  result;
    JSONNode  header;
    JSONNode  attributes;
    JSONNode  data;
    ArrayList<Attribute>  atts;
    Attribute  att;
    Instance  inst;
    int    i;
    int    classIndex;
    boolean[]  classAtt;
    
    header = json.getChild(HEADER);
    if (header == null) {
      System.err.println("No '" + HEADER + "' section!");
      return null;
    }
    data = json.getChild(DATA);
    if (data == null) {
      System.err.println("No '" + DATA + "' section!");
      return null;
    }
    
    // attributes
    attributes = header.getChild(ATTRIBUTES);
    if (attributes == null) {
      System.err.println("No '" + ATTRIBUTES + "' array!");
      return null;
    }
    atts       = new ArrayList<Attribute>();
    classAtt   = new boolean[1];
    classIndex = -1;
    for (i = 0; i < attributes.getChildCount(); i++) {
      att = toAttribute((JSONNode) attributes.getChildAt(i), classAtt);
      if (att == null) {
  System.err.println("Could not convert attribute #" + (i+1) + "!");
  return null;
      }
      if (classAtt[0])
  classIndex = i;
      atts.add(att);
    }
    result = new Instances(
  header.getChild(RELATION).getValue("unknown").toString(), 
  atts, 
  (onlyHeader ? 0 : data.getChildCount()));
    result.setClassIndex(classIndex);
    
    // data
    if (!onlyHeader) {
      for (i = 0; i < data.getChildCount(); i++) {
  inst = toInstance((JSONNode) data.getChildAt(i), result);
  if (inst == null) {
    System.err.println("Could not convert instance #" + (i+1) + "!");
    return null;
  }
  result.add(inst);
      }
    }
    
    return result;
  }

View Full Code Here

      System.err.println("No dataset supplied!");
      System.exit(1);
    }


    // load dataset
    Instances data = DataSource.read(args[0]);
    
    // turn Instances into JSON object and output it
    JSONNode json = toJSON(data);
    StringBuffer buffer = new StringBuffer();
    json.toString(buffer);
    System.out.println(buffer.toString());
    
    // turn JSON object back into Instances and output it
    Instances inst = toInstances(json);
    System.out.println(inst);
  }

View Full Code Here

      m_ignoreBut.setEnabled(false);
      m_RunThread = new Thread() {
  public void run() {
    // Copy the current state of things
    m_Log.statusMessage("Setting up...");
    Instances inst = new Instances(m_Instances);
    inst.setClassIndex(-1);
    Instances userTest = null;
    ClustererAssignmentsPlotInstances plotInstances = ExplorerDefaults.getClustererAssignmentsPlotInstances();
    plotInstances.setClusterer((Clusterer) m_ClustererEditor.getValue());
    if (m_TestInstances != null) {
      userTest = new Instances(m_TestInstances);
    }
    
    boolean saveVis = m_StorePredictionsBut.isSelected();
    String grph = null;
    int[] ignoredAtts = null;


    int testMode = 0;
    int percent = 66;
    Clusterer clusterer = (Clusterer) m_ClustererEditor.getValue();
    Clusterer fullClusterer = null;
    StringBuffer outBuff = new StringBuffer();
    String name = (new SimpleDateFormat("HH:mm:ss - ")).format(new Date());
    String cname = clusterer.getClass().getName();
    if (cname.startsWith("weka.clusterers.")) {
      name += cname.substring("weka.clusterers.".length());
    } else {
      name += cname;
    }
          String cmd = m_ClustererEditor.getValue().getClass().getName();
          if (m_ClustererEditor.getValue() instanceof OptionHandler)
            cmd += " " + Utils.joinOptions(((OptionHandler) m_ClustererEditor.getValue()).getOptions());
    try {
      m_Log.logMessage("Started " + cname);
      m_Log.logMessage("Command: " + cmd);
      if (m_Log instanceof TaskLogger) {
        ((TaskLogger)m_Log).taskStarted();
      }
      if (m_PercentBut.isSelected()) {
        testMode = 2;
        percent = Integer.parseInt(m_PercentText.getText());
        if ((percent <= 0) || (percent >= 100)) {
    throw new Exception("Percentage must be between 0 and 100");
        }
      } else if (m_TrainBut.isSelected()) {
        testMode = 3;
      } else if (m_TestSplitBut.isSelected()) {
        testMode = 4;
        // Check the test instance compatibility
        if (userTest == null) {
    throw new Exception("No user test set has been opened");
        }
        if (!inst.equalHeaders(userTest)) {
    throw new Exception("Train and test set are not compatible\n" + inst.equalHeadersMsg(userTest));
        }
      } else if (m_ClassesToClustersBut.isSelected()) {
        testMode = 5;
      } else {
        throw new Exception("Unknown test mode");
      }


      Instances trainInst = new Instances(inst);
      if (m_ClassesToClustersBut.isSelected()) {
        trainInst.setClassIndex(m_ClassCombo.getSelectedIndex());
        inst.setClassIndex(m_ClassCombo.getSelectedIndex());
        if (inst.classAttribute().isNumeric()) {
    throw new Exception("Class must be nominal for class based "
            +"evaluation!");
        }
      }
      if (!m_ignoreKeyList.isSelectionEmpty()) {
        trainInst = removeIgnoreCols(trainInst);
      }


      // Output some header information
      outBuff.append("=== Run information ===\n\n");
      outBuff.append("Scheme:       " + cname);
      if (clusterer instanceof OptionHandler) {
        String [] o = ((OptionHandler) clusterer).getOptions();
        outBuff.append(" " + Utils.joinOptions(o));
      }
      outBuff.append("\n");
      outBuff.append("Relation:     " + inst.relationName() + '\n');
      outBuff.append("Instances:    " + inst.numInstances() + '\n');
      outBuff.append("Attributes:   " + inst.numAttributes() + '\n');
      if (inst.numAttributes() < 100) {
        boolean [] selected = new boolean [inst.numAttributes()];
        for (int i = 0; i < inst.numAttributes(); i++) {
    selected[i] = true;
        }
        if (!m_ignoreKeyList.isSelectionEmpty()) {
    int [] indices = m_ignoreKeyList.getSelectedIndices();
    for (int i = 0; i < indices.length; i++) {
      selected[indices[i]] = false;
    }
        }
        if (m_ClassesToClustersBut.isSelected()) {
    selected[m_ClassCombo.getSelectedIndex()] = false;
        }
        for (int i = 0; i < inst.numAttributes(); i++) {
    if (selected[i]) {
      outBuff.append("              " + inst.attribute(i).name()
         + '\n');
    }
        }
        if (!m_ignoreKeyList.isSelectionEmpty() 
      || m_ClassesToClustersBut.isSelected()) {
    outBuff.append("Ignored:\n");
    for (int i = 0; i < inst.numAttributes(); i++) {
      if (!selected[i]) {
        outBuff.append("              " + inst.attribute(i).name()
           + '\n');
      }
    }
        }
      } else {
        outBuff.append("              [list of attributes omitted]\n");
      }


      if (!m_ignoreKeyList.isSelectionEmpty()) {
        ignoredAtts = m_ignoreKeyList.getSelectedIndices();
      }


      if (m_ClassesToClustersBut.isSelected()) {
        // add class to ignored list
        if (ignoredAtts == null) {
    ignoredAtts = new int[1];
    ignoredAtts[0] = m_ClassCombo.getSelectedIndex();
        } else {
    int[] newIgnoredAtts = new int[ignoredAtts.length+1];
    System.arraycopy(ignoredAtts, 0, newIgnoredAtts, 0, ignoredAtts.length);
    newIgnoredAtts[ignoredAtts.length] = m_ClassCombo.getSelectedIndex();
    ignoredAtts = newIgnoredAtts;
        }
      }




      outBuff.append("Test mode:    ");
      switch (testMode) {
        case 3: // Test on training
        outBuff.append("evaluate on training data\n");
        break;
        case 2: // Percent split
        outBuff.append("split " + percent
             + "% train, remainder test\n");
        break;
        case 4: // Test on user split
        outBuff.append("user supplied test set: "
           + userTest.numInstances() + " instances\n");
        break;
      case 5: // Classes to clusters evaluation on training
        outBuff.append("Classes to clusters evaluation on training data");
        
        break;
      }
      outBuff.append("\n");
      m_History.addResult(name, outBuff);
      m_History.setSingle(name);
      
      // Build the model and output it.
      m_Log.statusMessage("Building model on training data...");


      // remove the class attribute (if set) and build the clusterer
      clusterer.buildClusterer(removeClass(trainInst));
      
      if (testMode == 2) {
        outBuff.append("\n=== Clustering model (full training set) ===\n\n");
      
        outBuff.append(clusterer.toString() + '\n');
      }
      m_History.updateResult(name);
      if (clusterer instanceof Drawable) {
        try {
    grph = ((Drawable)clusterer).graph();
        } catch (Exception ex) {
        }
      }
      // copy full model for output
      SerializedObject so = new SerializedObject(clusterer);
      fullClusterer = (Clusterer) so.getObject();
      
      ClusterEvaluation eval = new ClusterEvaluation();
      eval.setClusterer(clusterer);
      switch (testMode) {
        case 3: case 5: // Test on training
        m_Log.statusMessage("Clustering training data...");
        eval.evaluateClusterer(trainInst);
        plotInstances.setInstances(inst);
        plotInstances.setClusterEvaluation(eval);
        outBuff.append("=== Model and evaluation on training set ===\n\n");
        break;


        case 2: // Percent split
        m_Log.statusMessage("Randomizing instances...");
        inst.randomize(new Random(1));
        trainInst.randomize(new Random(1));
        int trainSize = trainInst.numInstances() * percent / 100;
        int testSize = trainInst.numInstances() - trainSize;
        Instances train = new Instances(trainInst, 0, trainSize);
        Instances test = new Instances(trainInst, trainSize, testSize);
        Instances testVis = new Instances(inst, trainSize, testSize);
        m_Log.statusMessage("Building model on training split...");
        clusterer.buildClusterer(train);
        m_Log.statusMessage("Evaluating on test split...");
        eval.evaluateClusterer(test);
        plotInstances.setInstances(testVis);
        plotInstances.setClusterEvaluation(eval);
        outBuff.append("=== Model and evaluation on test split ===\n");
        break;
    
        case 4: // Test on user split
        m_Log.statusMessage("Evaluating on test data...");
        Instances userTestT = new Instances(userTest);
        if (!m_ignoreKeyList.isSelectionEmpty()) {
    userTestT = removeIgnoreCols(userTestT);
        }
        eval.evaluateClusterer(userTestT);
        plotInstances.setInstances(userTest);
        plotInstances.setClusterEvaluation(eval);
        outBuff.append("=== Model and evaluation on test set ===\n");
        break;


        default:
        throw new Exception("Test mode not implemented");
      }
      outBuff.append(eval.clusterResultsToString());
      outBuff.append("\n");
      m_History.updateResult(name);
      m_Log.logMessage("Finished " + cname);
      m_Log.statusMessage("OK");
    } catch (Exception ex) {
      ex.printStackTrace();
      m_Log.logMessage(ex.getMessage());
      JOptionPane.showMessageDialog(ClustererPanel.this,
            "Problem evaluating clusterer:\n"
            + ex.getMessage(),
            "Evaluate clusterer",
            JOptionPane.ERROR_MESSAGE);
      m_Log.statusMessage("Problem evaluating clusterer");
    } finally {
      if (plotInstances != null) {
        plotInstances.setUp();
        m_CurrentVis = new VisualizePanel();
        m_CurrentVis.setName(name+" ("+inst.relationName()+")");
        m_CurrentVis.setLog(m_Log);
        try {
    m_CurrentVis.addPlot(plotInstances.getPlotData(name));
        } catch (Exception ex) {
    System.err.println(ex);
        }
        plotInstances.cleanUp();


        FastVector vv = new FastVector();
        vv.addElement(fullClusterer);
        Instances trainHeader = new Instances(m_Instances, 0);
        vv.addElement(trainHeader);
        if (ignoredAtts != null) vv.addElement(ignoredAtts);
        if (saveVis) {
    vv.addElement(m_CurrentVis);
    if (grph != null) {

View Full Code Here

    }
  }


  private Instances removeClass(Instances inst) {
    Remove af = new Remove();
    Instances retI = null;
    
    try {
      if (inst.classIndex() < 0) {
  retI = inst;
      } else {

View Full Code Here

  m_ignoreKeyList.removeSelectionInterval(classIndex, classIndex);
      }
    }
    int [] selected = m_ignoreKeyList.getSelectedIndices();
    Remove af = new Remove();
    Instances retI = null;


    try {
      af.setAttributeIndicesArray(selected);
      af.setInvertSelection(false);
      af.setInputFormat(inst);

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of weka.core.Instances

edu.brown.markov.FeatureClusterer

org.goai.classification.impl.WekaClassifier

weka.associations.Apriori

weka.associations.AssociatorEvaluation

weka.associations.FPGrowth

weka.classifiers.bayes.net.EditableBayesNet

weka.classifiers.bayes.net.estimate.MultiNomialBMAEstimator

weka.classifiers.bayes.net.search.local.LocalScoreSearchAlgorithm

weka.classifiers.evaluation.output.prediction.AbstractOutput

weka.classifiers.evaluation.ThresholdCurve

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.