Package weka.core.converters.ConverterUtils

Examples of weka.core.converters.ConverterUtils.DataSource


  protected Instances process(Instances instances) throws Exception {
    // initializing necessary?
    if (!m_Initialized) {
      // do we have a file to initialize with?
      if ((getInitFile() != null) && getInitFile().isFile()) {
  DataSource source = new DataSource(getInitFile().getAbsolutePath());
  Instances data = source.getDataSet();
  m_InitFileClassIndex.setUpper(data.numAttributes() - 1);
  data.setClassIndex(m_InitFileClassIndex.getIndex());
  initFilter(data);
      }
      else {
View Full Code Here


    throws Exception {

    String trainFileString = "";
    String graphFileName = "";
    AssociatorEvaluation eval;
    DataSource loader;

    // help?
    if (Utils.getFlag('h', options))
      throw new Exception("\nHelp requested.\n" + makeOptionString(associator));
   
    try {
      // general options
      trainFileString = Utils.getOption('t', options);
      if (trainFileString.length() == 0)
  throw new Exception("No training file given!");
      loader = new DataSource(trainFileString);

      graphFileName = Utils.getOption('g', options);

      // associator specific options
      if (associator instanceof OptionHandler) {
        ((OptionHandler) associator).setOptions(options);
      }
     
      // left-over options?
      Utils.checkForRemainingOptions(options);
    }
    catch (Exception e) {
      throw new Exception(
    "\nWeka exception: "
    + e.getMessage() + "\n"
    + makeOptionString(associator));
    }
   
    // load file and build associations
    eval = new AssociatorEvaluation();
    String results = eval.evaluate(associator, new Instances(loader.getDataSet()));

    // If associator is drawable output string describing graph
    if ((associator instanceof Drawable) && (graphFileName.length() != 0)) {
      BufferedWriter writer = new BufferedWriter(new FileWriter(graphFileName));
      writer.write(((Drawable) associator).graph());
View Full Code Here

    Instances testRaw = null;
    boolean hasClass = (test.classIndex() >= 0);
    int unclusteredInstances = 0;
    Vector<Double> clusterAssignments = new Vector<Double>();
    Filter filter = null;
    DataSource source = null;
    Instance inst;

    if (testFileName == null)
      testFileName = "";
   
    // load data
    if (testFileName.length() != 0)
      source = new DataSource(testFileName);
    else
      source = new DataSource(test);
    testRaw = source.getStructure(test.classIndex());
   
    // If class is set then do class based evaluation as well
    if (hasClass) {
      if (testRaw.classAttribute().isNumeric())
  throw new Exception("ClusterEvaluation: Class must be nominal!");

      filter = new Remove();
      ((Remove) filter).setAttributeIndices("" + (testRaw.classIndex() + 1));
      ((Remove) filter).setInvertSelection(false);
      filter.setInputFormat(testRaw);
    }
   
    i = 0;
    while (source.hasMoreElements(testRaw)) {
      // next instance
      inst = source.nextElement(testRaw);
      if (filter != null) {
  filter.input(inst);
  filter.batchFinished();
  inst = filter.output();
      }
View Full Code Here

    int numClasses = inst.classAttribute().numValues();
    int[][] counts = new int [m_numClusters][numClasses];
    int[] clusterTotals = new int[m_numClusters];
    double[] best = new double[m_numClusters+1];
    double[] current = new double[m_numClusters+1];
    DataSource source = null;
    Instances instances = null;
    Instance instance = null;
    int i;
    int numInstances;
       

    if (fileName == null)
      fileName = "";
   
    if (fileName.length() != 0) {
      source = new DataSource(fileName);
    }
    else
      source = new DataSource(inst);
    instances = source.getStructure(inst.classIndex());

    i = 0;
    while (source.hasMoreElements(instances)) {
      instance = source.nextElement(instances);
      if (m_clusterAssignments[i] >= 0) {
        counts[(int)m_clusterAssignments[i]][(int)instance.classValue()]++;
        clusterTotals[(int)m_clusterAssignments[i]]++;       
      }
      i++;
View Full Code Here

    boolean printClusterAssignments = false;
    Range attributesToOutput = null;
    StringBuffer text = new StringBuffer();
    int theClass = -1; // class based evaluation of clustering
    boolean updateable = (clusterer instanceof UpdateableClusterer);
    DataSource source = null;
    Instance inst;

    if (Utils.getFlag('h', options) || Utils.getFlag("help", options)) {
     
      // global info requested as well?
      boolean globalInfo = Utils.getFlag("synopsis", options) ||
        Utils.getFlag("info", options);
     
      throw  new Exception("Help requested."
          + makeOptionString(clusterer, globalInfo));
    }
   
    try {
      // Get basic options (options the same for all clusterers
      //printClusterAssignments = Utils.getFlag('p', options);
      objectInputFileName = Utils.getOption('l', options);
      objectOutputFileName = Utils.getOption('d', options);
      trainFileName = Utils.getOption('t', options);
      testFileName = Utils.getOption('T', options);
      graphFileName = Utils.getOption('g', options);

      // Check -p option
      try {
  attributeRangeString = Utils.getOption('p', options);
      }
      catch (Exception e) {
  throw new Exception(e.getMessage() + "\nNOTE: the -p option has changed. " +
          "It now expects a parameter specifying a range of attributes " +
          "to list with the predictions. Use '-p 0' for none.");
      }
      if (attributeRangeString.length() != 0) {
  printClusterAssignments = true;
  if (!attributeRangeString.equals("0"))
    attributesToOutput = new Range(attributeRangeString);
      }

      if (trainFileName.length() == 0) {
        if (objectInputFileName.length() == 0) {
          throw  new Exception("No training file and no object "
             + "input file given.");
        }

        if (testFileName.length() == 0) {
          throw  new Exception("No training file and no test file given.");
        }
      }
      else {
  if ((objectInputFileName.length() != 0)
      && (printClusterAssignments == false)) {
    throw  new Exception("Can't use both train and model file "
             + "unless -p specified.");
  }
      }

      seedString = Utils.getOption('s', options);

      if (seedString.length() != 0) {
  seed = Integer.parseInt(seedString);
      }

      foldsString = Utils.getOption('x', options);

      if (foldsString.length() != 0) {
  folds = Integer.parseInt(foldsString);
  doXval = true;
      }
    }
    catch (Exception e) {
      throw  new Exception('\n' + e.getMessage()
         + makeOptionString(clusterer, false));
    }

    try {
      if (trainFileName.length() != 0) {
  source = new DataSource(trainFileName);
  train  = source.getStructure();

  String classString = Utils.getOption('c',options);
  if (classString.length() != 0) {
    if (classString.compareTo("last") == 0)
      theClass = train.numAttributes();
    else if (classString.compareTo("first") == 0)
      theClass = 1;
    else
      theClass = Integer.parseInt(classString);

    if (theClass != -1) {
      if (doXval || testFileName.length() != 0)
        throw new Exception("Can only do class based evaluation on the "
      +"training data");

      if (objectInputFileName.length() != 0)
        throw new Exception("Can't load a clusterer and do class based "
      +"evaluation");

      if (objectOutputFileName.length() != 0)
        throw new Exception(
      "Can't do class based evaluation and save clusterer");
    }
  }
  else {
    // if the dataset defines a class attribute, use it
    if (train.classIndex() != -1) {
      theClass = train.classIndex() + 1;
      System.err.println(
    "Note: using class attribute from dataset, i.e., attribute #"
    + theClass);
    }
  }

  if (theClass != -1) {
    if (theClass < 1 || theClass > train.numAttributes())
      throw new Exception("Class is out of range!");

    if (!train.attribute(theClass - 1).isNominal())
      throw new Exception("Class must be nominal!");
   
    train.setClassIndex(theClass - 1);
  }
      }
    }
    catch (Exception e) {
      throw  new Exception("ClusterEvaluation: " + e.getMessage() + '.');
    }

    // Save options
    if (options != null) {
      savedOptions = new String[options.length];
      System.arraycopy(options, 0, savedOptions, 0, options.length);
    }

    if (objectInputFileName.length() != 0)
      Utils.checkForRemainingOptions(options);

    // Set options for clusterer
    if (clusterer instanceof OptionHandler)
      ((OptionHandler)clusterer).setOptions(options);

    Utils.checkForRemainingOptions(options);

    Instances trainHeader = train;
    if (objectInputFileName.length() != 0) {
      // Load the clusterer from file
      //      clusterer = (Clusterer) SerializationHelper.read(objectInputFileName);
      java.io.ObjectInputStream ois =
        new java.io.ObjectInputStream(
        new java.io.BufferedInputStream(
        new java.io.FileInputStream(objectInputFileName)));
      clusterer = (Clusterer) ois.readObject();
      // try and get the training header
      try {
        trainHeader = (Instances) ois.readObject();
      } catch (Exception ex) {
        // don't moan if we cant
      }
    }
    else {
      // Build the clusterer if no object file provided
      if (theClass == -1) {
  if (updateable) {
    clusterer.buildClusterer(source.getStructure());
    while (source.hasMoreElements(train)) {
      inst = source.nextElement(train);
      ((UpdateableClusterer) clusterer).updateClusterer(inst);
    }
    ((UpdateableClusterer) clusterer).updateFinished();
  }
  else {
    clusterer.buildClusterer(source.getDataSet());
  }
      }
      else {
  Remove removeClass = new Remove();
  removeClass.setAttributeIndices("" + theClass);
  removeClass.setInvertSelection(false);
  removeClass.setInputFormat(train);
  if (updateable) {
    Instances clusterTrain = Filter.useFilter(train, removeClass);
    clusterer.buildClusterer(clusterTrain);
          trainHeader = clusterTrain;
    while (source.hasMoreElements(train)) {
      inst = source.nextElement(train);
      removeClass.input(inst);
      removeClass.batchFinished();
      Instance clusterTrainInst = removeClass.output();
      ((UpdateableClusterer) clusterer).updateClusterer(clusterTrainInst);
    }
    ((UpdateableClusterer) clusterer).updateFinished();
  }
  else {
    Instances clusterTrain = Filter.useFilter(source.getDataSet(), removeClass);
    clusterer.buildClusterer(clusterTrain);
          trainHeader = clusterTrain;
  }
  ClusterEvaluation ce = new ClusterEvaluation();
  ce.setClusterer(clusterer);
  ce.evaluateClusterer(train, trainFileName);
 
  return "\n\n=== Clustering stats for training data ===\n\n" +
    ce.clusterResultsToString();
      }
    }

    /* Output cluster predictions only (for the test data if specified,
       otherwise for the training data */
    if (printClusterAssignments) {
      return printClusterings(clusterer, trainFileName, testFileName, attributesToOutput);
    }

    text.append(clusterer.toString());
    text.append("\n\n=== Clustering stats for training data ===\n\n"
    + printClusterStats(clusterer, trainFileName));

    if (testFileName.length() != 0) {
      // check header compatibility
      DataSource test = new DataSource(testFileName);
      Instances testStructure = test.getStructure();
      if (!trainHeader.equalHeaders(testStructure)) {
        throw new Exception("Training and testing data are not compatible\n" + trainHeader.equalHeadersMsg(testStructure));
      }

      text.append("\n\n=== Clustering stats for testing data ===\n\n"
View Full Code Here

    int cc = clusterer.numberOfClusters();
    double[] instanceStats = new double[cc];
    int unclusteredInstances = 0;

    if (fileName.length() != 0) {
      DataSource source = new DataSource(fileName);
      Instances structure = source.getStructure();
      Instance inst;
      while (source.hasMoreElements(structure)) {
  inst = source.nextElement(structure);
  try {
    cnum = clusterer.clusterInstance(inst);

    if (clusterer instanceof DensityBasedClusterer) {
      loglk += ((DensityBasedClusterer)clusterer).
View Full Code Here

    throws Exception {

    StringBuffer text = new StringBuffer();
    int i = 0;
    int cnum;
    DataSource source = null;
    Instance inst;
    Instances structure;
   
    if (testFileName.length() != 0)
      source = new DataSource(testFileName);
    else
      source = new DataSource(trainFileName);
   
    structure = source.getStructure();
    while (source.hasMoreElements(structure)) {
      inst = source.nextElement(structure);
      try {
  cnum = clusterer.clusterInstance(inst);
 
  text.append(i + " " + cnum + " "
      + attributeValuesString(inst, attributesToOutput) + "\n");
View Full Code Here

  public static void filterFile(Filter filter, String [] options)
    throws Exception {

    boolean debug = false;
    Instances data = null;
    DataSource input = null;
    PrintWriter output = null;
    boolean helpRequest;
    String sourceCode = "";

    try {
       helpRequest = Utils.getFlag('h', options);

      if (Utils.getFlag('d', options)) {
  debug = true;
      }
      String infileName = Utils.getOption('i', options);
      String outfileName = Utils.getOption('o', options);
      String classIndex = Utils.getOption('c', options);
      if (filter instanceof Sourcable)
  sourceCode = Utils.getOption('z', options);
     
      if (filter instanceof OptionHandler) {
  ((OptionHandler)filter).setOptions(options);
      }

      Utils.checkForRemainingOptions(options);
      if (helpRequest) {
  throw new Exception("Help requested.\n");
      }
      if (infileName.length() != 0) {
  input = new DataSource(infileName);
      } else {
  input = new DataSource(System.in);
      }
      if (outfileName.length() != 0) {
  output = new PrintWriter(new FileOutputStream(outfileName));
      } else {
  output = new PrintWriter(System.out);
      }

      data = input.getStructure();
      if (classIndex.length() != 0) {
  if (classIndex.equals("first")) {
    data.setClassIndex(0);
  } else if (classIndex.equals("last")) {
    data.setClassIndex(data.numAttributes() - 1);
  } else {
    data.setClassIndex(Integer.parseInt(classIndex) - 1);
  }
      }
    } catch (Exception ex) {
      String filterOptions = "";
      // Output the error and also the valid options
      if (filter instanceof OptionHandler) {
  filterOptions += "\nFilter options:\n\n";
  Enumeration enu = ((OptionHandler)filter).listOptions();
  while (enu.hasMoreElements()) {
    Option option = (Option) enu.nextElement();
    filterOptions += option.synopsis() + '\n'
      + option.description() + "\n";
  }
      }

      String genericOptions = "\nGeneral options:\n\n"
  + "-h\n"
  + "\tGet help on available options.\n"
  + "\t(use -b -h for help on batch mode.)\n"
  + "-i <file>\n"
  + "\tThe name of the file containing input instances.\n"
  + "\tIf not supplied then instances will be read from stdin.\n"
  + "-o <file>\n"
  + "\tThe name of the file output instances will be written to.\n"
  + "\tIf not supplied then instances will be written to stdout.\n"
  + "-c <class index>\n"
  + "\tThe number of the attribute to use as the class.\n"
  + "\t\"first\" and \"last\" are also valid entries.\n"
  + "\tIf not supplied then no class is assigned.\n";

      if (filter instanceof Sourcable) {
  genericOptions +=
    "-z <class name>\n"
    + "\tOutputs the source code representing the trained filter.\n";
      }
     
      throw new Exception('\n' + ex.getMessage()
        + filterOptions+genericOptions);
    }
   
    if (debug) {
      System.err.println("Setting input format");
    }
    boolean printedHeader = false;
    if (filter.setInputFormat(data)) {
      if (debug) {
  System.err.println("Getting output format");
      }
      output.println(filter.getOutputFormat().toString());
      printedHeader = true;
    }
   
    // Pass all the instances to the filter
    Instance inst;
    while (input.hasMoreElements(data)) {
      inst = input.nextElement(data);
      if (debug) {
  System.err.println("Input instance to filter");
      }
      if (filter.input(inst)) {
  if (debug) {
View Full Code Here

  public static void batchFilterFile(Filter filter, String [] options)
    throws Exception {

    Instances firstData = null;
    Instances secondData = null;
    DataSource firstInput = null;
    DataSource secondInput = null;
    PrintWriter firstOutput = null;
    PrintWriter secondOutput = null;
    boolean helpRequest;
    String sourceCode = "";

    try {
      helpRequest = Utils.getFlag('h', options);

      String fileName = Utils.getOption('i', options);
      if (fileName.length() != 0) {
  firstInput = new DataSource(fileName);
      } else {
  throw new Exception("No first input file given.\n");
      }

      fileName = Utils.getOption('r', options);
      if (fileName.length() != 0) {
  secondInput = new DataSource(fileName);
      } else {
  throw new Exception("No second input file given.\n");
      }

      fileName = Utils.getOption('o', options);
      if (fileName.length() != 0) {
  firstOutput = new PrintWriter(new FileOutputStream(fileName));
      } else {
  firstOutput = new PrintWriter(System.out);
      }
     
      fileName = Utils.getOption('s', options);
      if (fileName.length() != 0) {
  secondOutput = new PrintWriter(new FileOutputStream(fileName));
      } else {
  secondOutput = new PrintWriter(System.out);
      }
      String classIndex = Utils.getOption('c', options);
      if (filter instanceof Sourcable)
  sourceCode = Utils.getOption('z', options);

      if (filter instanceof OptionHandler) {
  ((OptionHandler)filter).setOptions(options);
      }
      Utils.checkForRemainingOptions(options);
     
      if (helpRequest) {
  throw new Exception("Help requested.\n");
      }
      firstData = firstInput.getStructure();
      secondData = secondInput.getStructure();
      if (!secondData.equalHeaders(firstData)) {
  throw new Exception("Input file formats differ.\n" + secondData.equalHeadersMsg(firstData) + "\n");
      }
      if (classIndex.length() != 0) {
  if (classIndex.equals("first")) {
    firstData.setClassIndex(0);
    secondData.setClassIndex(0);
  } else if (classIndex.equals("last")) {
    firstData.setClassIndex(firstData.numAttributes() - 1);
    secondData.setClassIndex(secondData.numAttributes() - 1);
  } else {
    firstData.setClassIndex(Integer.parseInt(classIndex) - 1);
    secondData.setClassIndex(Integer.parseInt(classIndex) - 1);
  }
      }
    } catch (Exception ex) {
      String filterOptions = "";
      // Output the error and also the valid options
      if (filter instanceof OptionHandler) {
  filterOptions += "\nFilter options:\n\n";
  Enumeration enu = ((OptionHandler)filter).listOptions();
  while (enu.hasMoreElements()) {
    Option option = (Option) enu.nextElement();
    filterOptions += option.synopsis() + '\n'
      + option.description() + "\n";
  }
      }

      String genericOptions = "\nGeneral options:\n\n"
  + "-h\n"
  + "\tGet help on available options.\n"
  + "-i <filename>\n"
  + "\tThe file containing first input instances.\n"
  + "-o <filename>\n"
  + "\tThe file first output instances will be written to.\n"
  + "-r <filename>\n"
  + "\tThe file containing second input instances.\n"
  + "-s <filename>\n"
  + "\tThe file second output instances will be written to.\n"
  + "-c <class index>\n"
  + "\tThe number of the attribute to use as the class.\n"
  + "\t\"first\" and \"last\" are also valid entries.\n"
  + "\tIf not supplied then no class is assigned.\n";

      if (filter instanceof Sourcable) {
  genericOptions +=
    "-z <class name>\n"
    + "\tOutputs the source code representing the trained filter.\n";
      }
     
      throw new Exception('\n' + ex.getMessage()
        + filterOptions+genericOptions);
    }
    boolean printedHeader = false;
    if (filter.setInputFormat(firstData)) {
      firstOutput.println(filter.getOutputFormat().toString());
      printedHeader = true;
    }
   
    // Pass all the instances to the filter
    Instance inst;
    while (firstInput.hasMoreElements(firstData)) {
      inst = firstInput.nextElement(firstData);
      if (filter.input(inst)) {
  if (!printedHeader) {
    throw new Error("Filter didn't return true from setInputFormat() "
        + "earlier!");
  }
  firstOutput.println(filter.output().toString());
      }
    }
   
    // Say that input has finished, and print any pending output instances
    if (filter.batchFinished()) {
      if (!printedHeader) {
  firstOutput.println(filter.getOutputFormat().toString());
      }
      while (filter.numPendingOutput() > 0) {
  firstOutput.println(filter.output().toString());
      }
    }
   
    if (firstOutput != null) {
      firstOutput.close();
    }   
    printedHeader = false;
    if (filter.isOutputFormatDefined()) {
      secondOutput.println(filter.getOutputFormat().toString());
      printedHeader = true;
    }
    // Pass all the second instances to the filter
    while (secondInput.hasMoreElements(secondData)) {
      inst = secondInput.nextElement(secondData);
      if (filter.input(inst)) {
  if (!printedHeader) {
    throw new Error("Filter didn't return true from"
        + " isOutputFormatDefined() earlier!");
  }
View Full Code Here

  public void run() {
    // Copy the current state of things
    m_Log.statusMessage("Setting up...");
    CostMatrix costMatrix = null;
    Instances inst = new Instances(m_Instances);
    DataSource source = null;
          Instances userTestStructure = null;
    ClassifierErrorsPlotInstances plotInstances = null;
   
    // for timing
    long trainTimeStart = 0, trainTimeElapsed = 0;

          try {
            if (m_TestLoader != null && m_TestLoader.getStructure() != null) {
              m_TestLoader.reset();
              source = new DataSource(m_TestLoader);
              userTestStructure = source.getStructure();
              userTestStructure.setClassIndex(m_TestClassIndex);
            }
          } catch (Exception ex) {
            ex.printStackTrace();
          }
    if (m_EvalWRTCostsBut.isSelected()) {
      costMatrix = new CostMatrix((CostMatrix) m_CostMatrixEditor
          .getValue());
    }
    boolean outputModel = m_OutputModelBut.isSelected();
    boolean outputConfusion = m_OutputConfusionBut.isSelected();
    boolean outputPerClass = m_OutputPerClassBut.isSelected();
    boolean outputSummary = true;
          boolean outputEntropy = m_OutputEntropyBut.isSelected();
    boolean saveVis = m_StorePredictionsBut.isSelected();
    boolean outputPredictionsText = (m_ClassificationOutputEditor.getValue().getClass() != Null.class);

    String grph = null;

    int testMode = 0;
    int numFolds = 10;
          double percent = 66;
    int classIndex = m_ClassCombo.getSelectedIndex();
    inst.setClassIndex(classIndex);
    Classifier classifier = (Classifier) m_ClassifierEditor.getValue();
    Classifier template = null;
    try {
      template = AbstractClassifier.makeCopy(classifier);
    } catch (Exception ex) {
      m_Log.logMessage("Problem copying classifier: " + ex.getMessage());
    }
    Classifier fullClassifier = null;
    StringBuffer outBuff = new StringBuffer();
    AbstractOutput classificationOutput = null;
    if (outputPredictionsText) {
      classificationOutput = (AbstractOutput) m_ClassificationOutputEditor.getValue();
      Instances header = new Instances(inst, 0);
      header.setClassIndex(classIndex);
      classificationOutput.setHeader(header);
      classificationOutput.setBuffer(outBuff);
    }
    String name = (new SimpleDateFormat("HH:mm:ss - ")).format(new Date());
    String cname = "";
          String cmd = "";
    Evaluation eval = null;
    try {
      if (m_CVBut.isSelected()) {
        testMode = 1;
        numFolds = Integer.parseInt(m_CVText.getText());
        if (numFolds <= 1) {
    throw new Exception("Number of folds must be greater than 1");
        }
      } else if (m_PercentBut.isSelected()) {
        testMode = 2;
        percent = Double.parseDouble(m_PercentText.getText());
        if ((percent <= 0) || (percent >= 100)) {
    throw new Exception("Percentage must be between 0 and 100");
        }
      } else if (m_TrainBut.isSelected()) {
        testMode = 3;
      } else if (m_TestSplitBut.isSelected()) {
        testMode = 4;
        // Check the test instance compatibility
        if (source == null) {
          throw new Exception("No user test set has been specified");
        }
       
        if (!(classifier instanceof weka.classifiers.misc.InputMappedClassifier)) {
          if (!inst.equalHeaders(userTestStructure)) {
            boolean wrapClassifier = false;
            if (!Utils.
                getDontShowDialog("weka.gui.explorer.ClassifierPanel.AutoWrapInInputMappedClassifier")) {
              JCheckBox dontShow = new JCheckBox("Do not show this message again");
              Object[] stuff = new Object[2];
              stuff[0] = "Train and test set are not compatible.\n" +
              "Would you like to automatically wrap the classifier in\n" +
              "an \"InputMappedClassifier\" before proceeding?.\n";
              stuff[1] = dontShow;

              int result = JOptionPane.showConfirmDialog(ClassifierPanel.this, stuff,
                  "ClassifierPanel", JOptionPane.YES_OPTION);
             
              if (result == JOptionPane.YES_OPTION) {
                wrapClassifier = true;
              }
             
              if (dontShow.isSelected()) {
                String response = (wrapClassifier) ? "yes" : "no";
                Utils.
                  setDontShowDialogResponse("weka.gui.explorer.ClassifierPanel.AutoWrapInInputMappedClassifier",
                      response);
              }

            } else {
              // What did the user say - do they want to autowrap or not?
              String response =
                Utils.getDontShowDialogResponse("weka.gui.explorer.ClassifierPanel.AutoWrapInInputMappedClassifier");
              if (response != null && response.equalsIgnoreCase("yes")) {
                wrapClassifier = true;
              }
            }

            if (wrapClassifier) {
              weka.classifiers.misc.InputMappedClassifier temp =
                new weka.classifiers.misc.InputMappedClassifier();

              // pass on the known test structure so that we get the
              // correct mapping report from the toString() method
              // of InputMappedClassifier
              temp.setClassifier(classifier);
              temp.setTestStructure(userTestStructure);
              classifier = temp;
            } else {
              throw new Exception("Train and test set are not compatible\n" + inst.equalHeadersMsg(userTestStructure));
            }
          }
        }
             
      } else {
        throw new Exception("Unknown test mode");
      }

      cname = classifier.getClass().getName();
      if (cname.startsWith("weka.classifiers.")) {
        name += cname.substring("weka.classifiers.".length());
      } else {
        name += cname;
      }
      cmd = classifier.getClass().getName();
      if (classifier instanceof OptionHandler)
        cmd += " " + Utils.joinOptions(((OptionHandler) classifier).getOptions());
     
      // set up the structure of the plottable instances for
      // visualization
      plotInstances = ExplorerDefaults.getClassifierErrorsPlotInstances();
      plotInstances.setInstances(inst);
      plotInstances.setClassifier(classifier);
      plotInstances.setClassIndex(inst.classIndex());
      plotInstances.setSaveForVisualization(saveVis);

      // Output some header information
      m_Log.logMessage("Started " + cname);
      m_Log.logMessage("Command: " + cmd);
      if (m_Log instanceof TaskLogger) {
        ((TaskLogger)m_Log).taskStarted();
      }
      outBuff.append("=== Run information ===\n\n");
      outBuff.append("Scheme:       " + cname);
      if (classifier instanceof OptionHandler) {
        String [] o = ((OptionHandler) classifier).getOptions();
        outBuff.append(" " + Utils.joinOptions(o));
      }
      outBuff.append("\n");
      outBuff.append("Relation:     " + inst.relationName() + '\n');
      outBuff.append("Instances:    " + inst.numInstances() + '\n');
      outBuff.append("Attributes:   " + inst.numAttributes() + '\n');
      if (inst.numAttributes() < 100) {
        for (int i = 0; i < inst.numAttributes(); i++) {
    outBuff.append("              " + inst.attribute(i).name()
             + '\n');
        }
      } else {
        outBuff.append("              [list of attributes omitted]\n");
      }

      outBuff.append("Test mode:    ");
      switch (testMode) {
        case 3: // Test on training
    outBuff.append("evaluate on training data\n");
    break;
        case 1: // CV mode
    outBuff.append("" + numFolds + "-fold cross-validation\n");
    break;
        case 2: // Percent split
    outBuff.append("split " + percent
        + "% train, remainder test\n");
    break;
        case 4: // Test on user split
    if (source.isIncremental())
      outBuff.append("user supplied test set: "
          + " size unknown (reading incrementally)\n");
    else
      outBuff.append("user supplied test set: "
          + source.getDataSet().numInstances() + " instances\n");
    break;
      }
            if (costMatrix != null) {
               outBuff.append("Evaluation cost matrix:\n")
               .append(costMatrix.toString()).append("\n");
            }
      outBuff.append("\n");
      m_History.addResult(name, outBuff);
      m_History.setSingle(name);
     
      // Build the model and output it.
      if (outputModel || (testMode == 3) || (testMode == 4)) {
        m_Log.statusMessage("Building model on training data...");

        trainTimeStart = System.currentTimeMillis();
        classifier.buildClassifier(inst);
        trainTimeElapsed = System.currentTimeMillis() - trainTimeStart;
      }

      if (outputModel) {
        outBuff.append("=== Classifier model (full training set) ===\n\n");
        outBuff.append(classifier.toString() + "\n");
        outBuff.append("\nTime taken to build model: " +
           Utils.doubleToString(trainTimeElapsed / 1000.0,2)
           + " seconds\n\n");
        m_History.updateResult(name);
        if (classifier instanceof Drawable) {
    grph = null;
    try {
      grph = ((Drawable)classifier).graph();
    } catch (Exception ex) {
    }
        }
        // copy full model for output
        SerializedObject so = new SerializedObject(classifier);
        fullClassifier = (Classifier) so.getObject();
      }
     
      switch (testMode) {
        case 3: // Test on training
        m_Log.statusMessage("Evaluating on training data...");
        eval = new Evaluation(inst, costMatrix);
       
        // make adjustments if the classifier is an InputMappedClassifier
        eval = setupEval(eval, classifier, inst, costMatrix,
            plotInstances, classificationOutput, false);
       
        //plotInstances.setEvaluation(eval);
              plotInstances.setUp();
       
        if (outputPredictionsText) {
    printPredictionsHeader(outBuff, classificationOutput, "training set");
        }

        for (int jj=0;jj<inst.numInstances();jj++) {
    plotInstances.process(inst.instance(jj), classifier, eval);
   
    if (outputPredictionsText) {
      classificationOutput.printClassification(classifier, inst.instance(jj), jj);
    }
    if ((jj % 100) == 0) {
      m_Log.statusMessage("Evaluating on training data. Processed "
              +jj+" instances...");
    }
        }
        if (outputPredictionsText)
    classificationOutput.printFooter();
        if (outputPredictionsText && classificationOutput.generatesOutput()) {
    outBuff.append("\n");
        }
        outBuff.append("=== Evaluation on training set ===\n");
        break;

        case 1: // CV mode
        m_Log.statusMessage("Randomizing instances...");
        int rnd = 1;
        try {
    rnd = Integer.parseInt(m_RandomSeedText.getText().trim());
    // System.err.println("Using random seed "+rnd);
        } catch (Exception ex) {
    m_Log.logMessage("Trouble parsing random seed value");
    rnd = 1;
        }
        Random random = new Random(rnd);
        inst.randomize(random);
        if (inst.attribute(classIndex).isNominal()) {
    m_Log.statusMessage("Stratifying instances...");
    inst.stratify(numFolds);
        }
        eval = new Evaluation(inst, costMatrix);
       
         // make adjustments if the classifier is an InputMappedClassifier
              eval = setupEval(eval, classifier, inst, costMatrix,
                  plotInstances, classificationOutput, false);
       
//        plotInstances.setEvaluation(eval);
              plotInstances.setUp();
     
        if (outputPredictionsText) {
    printPredictionsHeader(outBuff, classificationOutput, "test data");
        }

        // Make some splits and do a CV
        for (int fold = 0; fold < numFolds; fold++) {
    m_Log.statusMessage("Creating splits for fold "
            + (fold + 1) + "...");
    Instances train = inst.trainCV(numFolds, fold, random);
   
    // make adjustments if the classifier is an InputMappedClassifier
          eval = setupEval(eval, classifier, train, costMatrix,
              plotInstances, classificationOutput, true);
         
//    eval.setPriors(train);
    m_Log.statusMessage("Building model for fold "
            + (fold + 1) + "...");
    Classifier current = null;
    try {
      current = AbstractClassifier.makeCopy(template);
    } catch (Exception ex) {
      m_Log.logMessage("Problem copying classifier: " + ex.getMessage());
    }
    current.buildClassifier(train);
    Instances test = inst.testCV(numFolds, fold);
    m_Log.statusMessage("Evaluating model for fold "
            + (fold + 1) + "...");
    for (int jj=0;jj<test.numInstances();jj++) {
      plotInstances.process(test.instance(jj), current, eval);
      if (outputPredictionsText) {
        classificationOutput.printClassification(current, test.instance(jj), jj);
      }
    }
        }
        if (outputPredictionsText)
    classificationOutput.printFooter();
        if (outputPredictionsText) {
    outBuff.append("\n");
        }
        if (inst.attribute(classIndex).isNominal()) {
    outBuff.append("=== Stratified cross-validation ===\n");
        } else {
    outBuff.append("=== Cross-validation ===\n");
        }
        break;
   
        case 2: // Percent split
        if (!m_PreserveOrderBut.isSelected()) {
    m_Log.statusMessage("Randomizing instances...");
    try {
      rnd = Integer.parseInt(m_RandomSeedText.getText().trim());
    } catch (Exception ex) {
      m_Log.logMessage("Trouble parsing random seed value");
      rnd = 1;
    }
    inst.randomize(new Random(rnd));
        }
        int trainSize = (int) Math.round(inst.numInstances() * percent / 100);
        int testSize = inst.numInstances() - trainSize;
        Instances train = new Instances(inst, 0, trainSize);
        Instances test = new Instances(inst, trainSize, testSize);
        m_Log.statusMessage("Building model on training split ("+trainSize+" instances)...");
        Classifier current = null;
        try {
    current = AbstractClassifier.makeCopy(template);
        } catch (Exception ex) {
    m_Log.logMessage("Problem copying classifier: " + ex.getMessage());
        }
        current.buildClassifier(train);
        eval = new Evaluation(train, costMatrix);
       
        // make adjustments if the classifier is an InputMappedClassifier
              eval = setupEval(eval, classifier, train, costMatrix,
                  plotInstances, classificationOutput, false);
                     
//        plotInstances.setEvaluation(eval);
              plotInstances.setUp();
        m_Log.statusMessage("Evaluating on test split...");
      
        if (outputPredictionsText) {
    printPredictionsHeader(outBuff, classificationOutput, "test split");
        }
    
        for (int jj=0;jj<test.numInstances();jj++) {
    plotInstances.process(test.instance(jj), current, eval);
    if (outputPredictionsText) {
      classificationOutput.printClassification(current, test.instance(jj), jj);
    }
    if ((jj % 100) == 0) {
      m_Log.statusMessage("Evaluating on test split. Processed "
              +jj+" instances...");
    }
        }
        if (outputPredictionsText)
    classificationOutput.printFooter();
        if (outputPredictionsText) {
    outBuff.append("\n");
        }
        outBuff.append("=== Evaluation on test split ===\n");
        break;
   
        case 4: // Test on user split
        m_Log.statusMessage("Evaluating on test data...");
        eval = new Evaluation(inst, costMatrix);
        // make adjustments if the classifier is an InputMappedClassifier
              eval = setupEval(eval, classifier, inst, costMatrix,
                  plotInstances, classificationOutput, false);
             
//        plotInstances.setEvaluation(eval);
              plotInstances.setUp();
       
        if (outputPredictionsText) {
    printPredictionsHeader(outBuff, classificationOutput, "test set");
        }

        Instance instance;
        int jj = 0;
        while (source.hasMoreElements(userTestStructure)) {
    instance = source.nextElement(userTestStructure);
    plotInstances.process(instance, classifier, eval);
    if (outputPredictionsText) {
      classificationOutput.printClassification(classifier, instance, jj);
    }
    if ((++jj % 100) == 0) {
View Full Code Here

TOP

Related Classes of weka.core.converters.ConverterUtils.DataSource

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.