Package weka.core.converters.ConverterUtils

Examples of weka.core.converters.ConverterUtils.DataSource


    throws Exception {

    StringBuffer text = new StringBuffer();
    int i = 0;
    int cnum;
    DataSource source = null;
    Instance inst;
    Instances structure;
   
    if (testFileName.length() != 0)
      source = new DataSource(testFileName);
    else
      source = new DataSource(trainFileName);
   
    structure = source.getStructure();
    while (source.hasMoreElements(structure)) {
      inst = source.nextElement(structure);
      try {
  cnum = clusterer.clusterInstance(inst);
 
  text.append(i + " " + cnum + " "
      + attributeValuesString(inst, attributesToOutput) + "\n");
View Full Code Here


    boolean noOutput = false,
        printClassifications = false, trainStatistics = true,
        printMargins = false, printComplexityStatistics = false,
        printGraph = false, classStatistics = false, printSource = false;
    StringBuffer text = new StringBuffer();
    DataSource trainSource = null, testSource = null;
    ObjectInputStream objectInputStream = null;
    BufferedInputStream xmlInputStream = null;
    CostMatrix costMatrix = null;
    StringBuffer schemeOptionsText = null;
    Range attributesToOutput = null;
    long trainTimeStart = 0, trainTimeElapsed = 0,
        testTimeStart = 0, testTimeElapsed = 0;
    String xml = "";
    String[] optionsTmp = null;
    Classifier classifierBackup;
    Classifier classifierClassifications = null;
    boolean printDistribution = false;
    int actualClassIndex = -1// 0-based class index
    String splitPercentageString = "";
    int splitPercentage = -1;
    boolean preserveOrder = false;
    boolean trainSetPresent = false;
    boolean testSetPresent = false;
    String thresholdFile;
    String thresholdLabel;
    StringBuffer predsBuff = null; // predictions from cross-validation

    // help requested?
    if (Utils.getFlag("h", options) || Utils.getFlag("help", options)) {
      throw new Exception("\nHelp requested." + makeOptionString(classifier));
    }

    try {
      // do we get the input from XML instead of normal parameters?
      xml = Utils.getOption("xml", options);
      if (!xml.equals("")) {
        options = new XMLOptions(xml).toArray();
      }

      // is the input model only the XML-Options, i.e. w/o built model?
      optionsTmp = new String[options.length];
      for (int i = 0; i < options.length; i++) {
        optionsTmp[i] = options[i];
      }

      if (Utils.getOption('l', optionsTmp).toLowerCase().endsWith(".xml")) {
        // load options from serialized data ('-l' is automatically erased!)
        XMLClassifier xmlserial = new XMLClassifier();
        Classifier cl = (Classifier) xmlserial.read(Utils.getOption('l', options));
        // merge options
        optionsTmp = new String[options.length + cl.getOptions().length];
        System.arraycopy(cl.getOptions(), 0, optionsTmp, 0, cl.getOptions().length);
        System.arraycopy(options, 0, optionsTmp, cl.getOptions().length, options.length);
        options = optionsTmp;
      }

      noCrossValidation = Utils.getFlag("no-cv", options);
      // Get basic options (options the same for all schemes)
      classIndexString = Utils.getOption('c', options);
      if (classIndexString.length() != 0) {
        if (classIndexString.equals("first")) {
          classIndex = 1;
        } else if (classIndexString.equals("last")) {
          classIndex = -1;
        } else {
          classIndex = Integer.parseInt(classIndexString);
        }
      }
      trainFileName = Utils.getOption('t', options);
      objectInputFileName = Utils.getOption('l', options);
      objectOutputFileName = Utils.getOption('d', options);
      testFileName = Utils.getOption('T', options);
      foldsString = Utils.getOption('x', options);
      if (foldsString.length() != 0) {
        folds = Integer.parseInt(foldsString);
      }
      seedString = Utils.getOption('s', options);
      if (seedString.length() != 0) {
        seed = Integer.parseInt(seedString);
      }
      if (trainFileName.length() == 0) {
        if (objectInputFileName.length() == 0) {
          throw new Exception("No training file and no object " +
              "input file given.");
        }
        if (testFileName.length() == 0) {
          throw new Exception("No training file and no test " +
              "file given.");
        }
      } else if ((objectInputFileName.length() != 0) &&
          ((!(classifier instanceof UpdateableClassifier)) ||
          (testFileName.length() == 0))) {
        throw new Exception("Classifier not incremental, or no " +
            "test file provided: can't " +
            "use both train and model file.");
      }
      try {
        if (trainFileName.length() != 0) {
          trainSetPresent = true;
          trainSource = new DataSource(trainFileName);
        }
        if (testFileName.length() != 0) {
          testSetPresent = true;
          testSource = new DataSource(testFileName);
        }
        if (objectInputFileName.length() != 0) {
          InputStream is = new FileInputStream(objectInputFileName);
          if (objectInputFileName.endsWith(".gz")) {
            is = new GZIPInputStream(is);
          }
          // load from KOML?
          if (!(objectInputFileName.endsWith(".koml") && KOML.isPresent())) {
            objectInputStream = new ObjectInputStream(is);
            xmlInputStream = null;
          } else {
            objectInputStream = null;
            xmlInputStream = new BufferedInputStream(is);
          }
        }
      } catch (Exception e) {
        throw new Exception("Can't open file " + e.getMessage() + '.');
      }
      if (testSetPresent) {
        template = test = testSource.getStructure();
        if (classIndex != -1) {
          test.setClassIndex(classIndex - 1);
        } else {
          if ((test.classIndex() == -1) || (classIndexString.length() != 0)) {
            test.setClassIndex(test.numAttributes() - 1);
          }
        }
        actualClassIndex = test.classIndex();
      } else {
        // percentage split
        splitPercentageString = Utils.getOption("split-percentage", options);
        if (splitPercentageString.length() != 0) {
          if (foldsString.length() != 0) {
            throw new Exception(
                "Percentage split cannot be used in conjunction with " + "cross-validation ('-x').");
          }
          splitPercentage = Integer.parseInt(splitPercentageString);
          if ((splitPercentage <= 0) || (splitPercentage >= 100)) {
            throw new Exception("Percentage split value needs be >0 and <100.");
          }
        } else {
          splitPercentage = -1;
        }
        preserveOrder = Utils.getFlag("preserve-order", options);
        if (preserveOrder) {
          if (splitPercentage == -1) {
            throw new Exception("Percentage split ('-percentage-split') is missing.");
          }
        }
        // create new train/test sources
        if (splitPercentage > 0) {
          testSetPresent = true;
          Instances tmpInst = trainSource.getDataSet(actualClassIndex);
          if (!preserveOrder) {
            tmpInst.randomize(new Random(seed));
          }
          int trainSize = tmpInst.numInstances() * splitPercentage / 100;
          int testSize = tmpInst.numInstances() - trainSize;
          Instances trainInst = new Instances(tmpInst, 0, trainSize);
          Instances testInst = new Instances(tmpInst, trainSize, testSize);
          trainSource = new DataSource(trainInst);
          testSource = new DataSource(testInst);
          template = test = testSource.getStructure();
          if (classIndex != -1) {
            test.setClassIndex(classIndex - 1);
          } else {
            if ((test.classIndex() == -1) || (classIndexString.length() != 0)) {
              test.setClassIndex(test.numAttributes() - 1);
            }
          }
          actualClassIndex = test.classIndex();
        }
      }
      if (trainSetPresent) {
        template = train = trainSource.getStructure();
        if (classIndex != -1) {
          train.setClassIndex(classIndex - 1);
        } else {
          if ((train.classIndex() == -1) || (classIndexString.length() != 0)) {
            train.setClassIndex(train.numAttributes() - 1);
          }
        }
        actualClassIndex = train.classIndex();
        if ((testSetPresent) && !test.equalHeaders(train)) {
          throw new IllegalArgumentException("Train and test file not compatible!");
        }
      }
      if (template == null) {
        throw new Exception("No actual dataset provided to use as template");
      }
      costMatrix = handleCostOption(
          Utils.getOption('m', options), template.numClasses());

      classStatistics = Utils.getFlag('i', options);
      noOutput = Utils.getFlag('o', options);
      trainStatistics = !Utils.getFlag('v', options);
      printComplexityStatistics = Utils.getFlag('k', options);
      printMargins = Utils.getFlag('r', options);
      printGraph = Utils.getFlag('g', options);
      sourceClass = Utils.getOption('z', options);
      printSource = (sourceClass.length() != 0);
      printDistribution = Utils.getFlag("distribution", options);
      thresholdFile = Utils.getOption("threshold-file", options);
      thresholdLabel = Utils.getOption("threshold-label", options);

      // Check -p option
      try {
        attributeRangeString = Utils.getOption('p', options);
      } catch (Exception e) {
        throw new Exception(e.getMessage() + "\nNOTE: the -p option has changed. " +
            "It now expects a parameter specifying a range of attributes " +
            "to list with the predictions. Use '-p 0' for none.");
      }
      if (attributeRangeString.length() != 0) {
        printClassifications = true;
        if (!attributeRangeString.equals("0")) {
          attributesToOutput = new Range(attributeRangeString);
        }
      }

      if (!printClassifications && printDistribution) {
        throw new Exception("Cannot print distribution without '-p' option!");
      }

      // if no training file given, we don't have any priors
      if ((!trainSetPresent) && (printComplexityStatistics)) {
        throw new Exception("Cannot print complexity statistics ('-k') without training file ('-t')!");
      }

      // If a model file is given, we can't process
      // scheme-specific options
      if (objectInputFileName.length() != 0) {
        Utils.checkForRemainingOptions(options);
      } else {

        // Set options for classifier
        if (classifier instanceof OptionHandler) {
          for (int i = 0; i < options.length; i++) {
            if (options[i].length() != 0) {
              if (schemeOptionsText == null) {
                schemeOptionsText = new StringBuffer();
              }
              if (options[i].indexOf(' ') != -1) {
                schemeOptionsText.append('"' + options[i] + "\" ");
              } else {
                schemeOptionsText.append(options[i] + " ");
              }
            }
          }
          ((OptionHandler) classifier).setOptions(options);
        }
      }
      Utils.checkForRemainingOptions(options);
    } catch (Exception e) {
      throw new Exception("\nWeka exception: " + e.getMessage() + makeOptionString(classifier));
    }

    // Setup up evaluation objects
    Evaluation trainingEvaluation = new Evaluation(new Instances(template, 0), costMatrix);
    Evaluation testingEvaluation = new Evaluation(new Instances(template, 0), costMatrix);

    // disable use of priors if no training file given
    if (!trainSetPresent) {
      testingEvaluation.useNoPriors();
    }

    if (objectInputFileName.length() != 0) {
      // Load classifier from file
      if (objectInputStream != null) {
        classifier = (Classifier) objectInputStream.readObject();
        // try and read a header (if present)
        Instances savedStructure = null;
        try {
          savedStructure = (Instances) objectInputStream.readObject();
        } catch (Exception ex) {
          // don't make a fuss
        }
        if (savedStructure != null) {
          // test for compatibility with template
          if (!template.equalHeaders(savedStructure)) {
            throw new Exception("training and test set are not compatible");
          }
        }
        objectInputStream.close();
      } else {
        // whether KOML is available has already been checked (objectInputStream would null otherwise)!
        classifier = (Classifier) KOML.read(xmlInputStream);
        xmlInputStream.close();
      }
    }

    // backup of fully setup classifier for cross-validation
    classifierBackup = Classifier.makeCopy(classifier);

    // Build the classifier if no object file provided
    if ((classifier instanceof UpdateableClassifier) &&
        (testSetPresent || noCrossValidation) &&
        (costMatrix == null) &&
        (trainSetPresent)) {
      // Build classifier incrementally
      trainingEvaluation.setPriors(train);
      testingEvaluation.setPriors(train);
      trainTimeStart = System.currentTimeMillis();
      if (objectInputFileName.length() == 0) {
        classifier.buildClassifier(train);
      }
      Instance trainInst;
      while (trainSource.hasMoreElements(train)) {
        trainInst = trainSource.nextElement(train);
        trainingEvaluation.updatePriors(trainInst);
        testingEvaluation.updatePriors(trainInst);
        ((UpdateableClassifier) classifier).updateClassifier(trainInst);
      }
      trainTimeElapsed = System.currentTimeMillis() - trainTimeStart;
    } else if (objectInputFileName.length() == 0) {
      // Build classifier in one go
      tempTrain = trainSource.getDataSet(actualClassIndex);
      trainingEvaluation.setPriors(tempTrain);
      testingEvaluation.setPriors(tempTrain);
      trainTimeStart = System.currentTimeMillis();
      classifier.buildClassifier(tempTrain);
      trainTimeElapsed = System.currentTimeMillis() - trainTimeStart;
    }

/*  FOR LARGE DATA SETS
    // backup of fully trained classifier for printing the classifications
    if (printClassifications) {
      classifierClassifications = Classifier.makeCopy(classifier);
    }
*/
    // Save the classifier if an object output file is provided
    if (objectOutputFileName.length() != 0) {
      OutputStream os = new FileOutputStream(objectOutputFileName);
      // binary
      if (!(objectOutputFileName.endsWith(".xml") || (objectOutputFileName.endsWith(".koml") && KOML.isPresent()))) {
        if (objectOutputFileName.endsWith(".gz")) {
          os = new GZIPOutputStream(os);
        }
        ObjectOutputStream objectOutputStream = new ObjectOutputStream(os);
        objectOutputStream.writeObject(classifier);
        if (template != null) {
          objectOutputStream.writeObject(template);
        }
        objectOutputStream.flush();
        objectOutputStream.close();
      } // KOML/XML
      else {
        BufferedOutputStream xmlOutputStream = new BufferedOutputStream(os);
        if (objectOutputFileName.endsWith(".xml")) {
          XMLSerialization xmlSerial = new XMLClassifier();
          xmlSerial.write(xmlOutputStream, classifier);
        } else // whether KOML is present has already been checked
        // if not present -> ".koml" is interpreted as binary - see above
        if (objectOutputFileName.endsWith(".koml")) {
          KOML.write(xmlOutputStream, classifier);
        }
        xmlOutputStream.close();
      }
    }

/* FOR LARGE DATA SETS
    // If classifier is drawable output string describing graph
    if ((classifier instanceof Drawable) && (printGraph)) {
      return ((Drawable) classifier).graph();
    }

    // Output the classifier as equivalent source
    if ((classifier instanceof Sourcable) && (printSource)) {
      return wekaStaticWrapper((Sourcable) classifier, sourceClass);
    }

    // Output model
    if (!(noOutput || printMargins)) {
      if (classifier instanceof OptionHandler) {
        if (schemeOptionsText != null) {
          text.append("\nOptions: " + schemeOptionsText);
          text.append("\n");
        }
      }
      text.append("\n" + classifier.toString() + "\n");
    }

    if (!printMargins && (costMatrix != null)) {
      text.append("\n=== Evaluation Cost Matrix ===\n\n");
      text.append(costMatrix.toString());
    }
*/ // FOR LARGE DATA SETS
    // Output test instance predictions only
    if (printClassifications) {
      DataSource source = testSource;
      predsBuff = new StringBuffer();
      // no test set -> use train set
      if (source == null && noCrossValidation) {
        source = trainSource;
        predsBuff.append("\n=== Predictions on training data ===\n\n");
View Full Code Here

    boolean     result;
    Classifier  cls;
    Classifier  code;
    int         i;
    Instances   data;
    DataSource  source;
    boolean     numeric;
    boolean     different;
    double      predClassifier;
    double      predSource;
   
    result = true;
   
    // a few checks
    if (getClassifier() == null)
      throw new Exception("No classifier set!");
    if (getSourceCode() == null)
      throw new Exception("No source code set!");
    if (getDataset() == null)
      throw new Exception("No dataset set!");
    if (!getDataset().exists())
      throw new Exception(
          "Dataset '" + getDataset().getAbsolutePath() + "' does not exist!");
   
    // load data
    source = new DataSource(getDataset().getAbsolutePath());
    data   = source.getDataSet();
    if (getClassIndex() == -1)
      data.setClassIndex(data.numAttributes() - 1);
    else
      data.setClassIndex(getClassIndex());
    numeric = data.classAttribute().isNumeric();
View Full Code Here

    Instances testRaw = null;
    boolean hasClass = (test.classIndex() >= 0);
    int unclusteredInstances = 0;
    Vector<Double> clusterAssignments = new Vector<Double>();
    Filter filter = null;
    DataSource source = null;
    Instance inst;

    if (testFileName == null)
      testFileName = "";
   
    // load data
    if (testFileName.length() != 0)
      source = new DataSource(testFileName);
    else
      source = new DataSource(test);
    testRaw = source.getStructure(test.classIndex());
   
    // If class is set then do class based evaluation as well
    if (hasClass) {
      if (testRaw.classAttribute().isNumeric())
  throw new Exception("ClusterEvaluation: Class must be nominal!");

      filter = new Remove();
      ((Remove) filter).setAttributeIndices("" + (testRaw.classIndex() + 1));
      ((Remove) filter).setInvertSelection(false);
      filter.setInputFormat(testRaw);
    }
   
    i = 0;
    while (source.hasMoreElements(testRaw)) {
      // next instance
      inst = source.nextElement(testRaw);
      if (filter != null) {
  filter.input(inst);
  filter.batchFinished();
  inst = filter.output();
      }
View Full Code Here

    int numClasses = inst.classAttribute().numValues();
    int[][] counts = new int [m_numClusters][numClasses];
    int[] clusterTotals = new int[m_numClusters];
    double[] best = new double[m_numClusters+1];
    double[] current = new double[m_numClusters+1];
    DataSource source = null;
    Instances instances = null;
    Instance instance = null;
    int i;
    int numInstances;

    if (fileName == null)
      fileName = "";
   
    if (fileName.length() != 0)
      source = new DataSource(fileName);
    else
      source = new DataSource(inst);
    instances = source.getStructure(inst.classIndex());

    i = 0;
    while (source.hasMoreElements(instances)) {
      instance = source.nextElement(instances);
      counts[(int)m_clusterAssignments[i]][(int)instance.classValue()]++;
      clusterTotals[(int)m_clusterAssignments[i]]++;
      i++;
    }
    numInstances = i;
View Full Code Here

    boolean printClusterAssignments = false;
    Range attributesToOutput = null;
    StringBuffer text = new StringBuffer();
    int theClass = -1; // class based evaluation of clustering
    boolean updateable = (clusterer instanceof UpdateableClusterer);
    DataSource source = null;
    Instance inst;

    try {
      if (Utils.getFlag('h', options)) {
        throw  new Exception("Help requested.");
      }

      // Get basic options (options the same for all clusterers
      //printClusterAssignments = Utils.getFlag('p', options);
      objectInputFileName = Utils.getOption('l', options);
      objectOutputFileName = Utils.getOption('d', options);
      trainFileName = Utils.getOption('t', options);
      testFileName = Utils.getOption('T', options);
      graphFileName = Utils.getOption('g', options);

      // Check -p option
      try {
  attributeRangeString = Utils.getOption('p', options);
      }
      catch (Exception e) {
  throw new Exception(e.getMessage() + "\nNOTE: the -p option has changed. " +
          "It now expects a parameter specifying a range of attributes " +
          "to list with the predictions. Use '-p 0' for none.");
      }
      if (attributeRangeString.length() != 0) {
  printClusterAssignments = true;
  if (!attributeRangeString.equals("0"))
    attributesToOutput = new Range(attributeRangeString);
      }

      if (trainFileName.length() == 0) {
        if (objectInputFileName.length() == 0) {
          throw  new Exception("No training file and no object "
             + "input file given.");
        }

        if (testFileName.length() == 0) {
          throw  new Exception("No training file and no test file given.");
        }
      }
      else {
  if ((objectInputFileName.length() != 0)
      && (printClusterAssignments == false)) {
    throw  new Exception("Can't use both train and model file "
             + "unless -p specified.");
  }
      }

      seedString = Utils.getOption('s', options);

      if (seedString.length() != 0) {
  seed = Integer.parseInt(seedString);
      }

      foldsString = Utils.getOption('x', options);

      if (foldsString.length() != 0) {
  folds = Integer.parseInt(foldsString);
  doXval = true;
      }
    }
    catch (Exception e) {
      throw  new Exception('\n' + e.getMessage()
         + makeOptionString(clusterer));
    }

    try {
      if (trainFileName.length() != 0) {
  source = new DataSource(trainFileName);
  train  = source.getStructure();

  String classString = Utils.getOption('c',options);
  if (classString.length() != 0) {
    if (classString.compareTo("last") == 0)
      theClass = train.numAttributes();
    else if (classString.compareTo("first") == 0)
      theClass = 1;
    else
      theClass = Integer.parseInt(classString);

    if (theClass != -1) {
      if (doXval || testFileName.length() != 0)
        throw new Exception("Can only do class based evaluation on the "
      +"training data");

      if (objectInputFileName.length() != 0)
        throw new Exception("Can't load a clusterer and do class based "
      +"evaluation");

      if (objectOutputFileName.length() != 0)
        throw new Exception(
      "Can't do class based evaluation and save clusterer");
    }
  }
  else {
    // if the dataset defines a class attribute, use it
    if (train.classIndex() != -1) {
      theClass = train.classIndex() + 1;
      System.err.println(
    "Note: using class attribute from dataset, i.e., attribute #"
    + theClass);
    }
  }

  if (theClass != -1) {
    if (theClass < 1 || theClass > train.numAttributes())
      throw new Exception("Class is out of range!");

    if (!train.attribute(theClass - 1).isNominal())
      throw new Exception("Class must be nominal!");
   
    train.setClassIndex(theClass - 1);
  }
      }
    }
    catch (Exception e) {
      throw  new Exception("ClusterEvaluation: " + e.getMessage() + '.');
    }

    // Save options
    if (options != null) {
      savedOptions = new String[options.length];
      System.arraycopy(options, 0, savedOptions, 0, options.length);
    }

    if (objectInputFileName.length() != 0)
      Utils.checkForRemainingOptions(options);

    // Set options for clusterer
    if (clusterer instanceof OptionHandler)
      ((OptionHandler)clusterer).setOptions(options);

    Utils.checkForRemainingOptions(options);

    Instances trainHeader = train;
    if (objectInputFileName.length() != 0) {
      // Load the clusterer from file
      //      clusterer = (Clusterer) SerializationHelper.read(objectInputFileName);
      java.io.ObjectInputStream ois =
        new java.io.ObjectInputStream(
        new java.io.BufferedInputStream(
        new java.io.FileInputStream(objectInputFileName)));
      clusterer = (Clusterer) ois.readObject();
      // try and get the training header
      try {
        trainHeader = (Instances) ois.readObject();
      } catch (Exception ex) {
        // don't moan if we cant
      }
    }
    else {
      // Build the clusterer if no object file provided
      if (theClass == -1) {
  if (updateable) {
    clusterer.buildClusterer(source.getStructure());
    while (source.hasMoreElements(train)) {
      inst = source.nextElement(train);
      ((UpdateableClusterer) clusterer).updateClusterer(inst);
    }
    ((UpdateableClusterer) clusterer).updateFinished();
  }
  else {
    clusterer.buildClusterer(source.getDataSet());
  }
      }
      else {
  Remove removeClass = new Remove();
  removeClass.setAttributeIndices("" + theClass);
  removeClass.setInvertSelection(false);
  removeClass.setInputFormat(train);
  if (updateable) {
    Instances clusterTrain = Filter.useFilter(train, removeClass);
    clusterer.buildClusterer(clusterTrain);
          trainHeader = clusterTrain;
    while (source.hasMoreElements(train)) {
      inst = source.nextElement(train);
      removeClass.input(inst);
      removeClass.batchFinished();
      Instance clusterTrainInst = removeClass.output();
      ((UpdateableClusterer) clusterer).updateClusterer(clusterTrainInst);
    }
    ((UpdateableClusterer) clusterer).updateFinished();
  }
  else {
    Instances clusterTrain = Filter.useFilter(source.getDataSet(), removeClass);
    clusterer.buildClusterer(clusterTrain);
          trainHeader = clusterTrain;
  }
  ClusterEvaluation ce = new ClusterEvaluation();
  ce.setClusterer(clusterer);
  ce.evaluateClusterer(train, trainFileName);
 
  return "\n\n=== Clustering stats for training data ===\n\n" +
    ce.clusterResultsToString();
      }
    }

    /* Output cluster predictions only (for the test data if specified,
       otherwise for the training data */
    if (printClusterAssignments) {
      return printClusterings(clusterer, trainFileName, testFileName, attributesToOutput);
    }

    text.append(clusterer.toString());
    text.append("\n\n=== Clustering stats for training data ===\n\n"
    + printClusterStats(clusterer, trainFileName));

    if (testFileName.length() != 0) {
      // check header compatibility
      DataSource test = new DataSource(testFileName);
      Instances testStructure = test.getStructure();
      if (!trainHeader.equalHeaders(testStructure)) {
        throw new Exception("Training and testing data are not compatible");
      }

      text.append("\n\n=== Clustering stats for testing data ===\n\n"
View Full Code Here

      if (selected != -1) {
  ViewerDialog dialog = new ViewerDialog(null);
  String filename = m_List.getSelectedValue().toString();
  int result;
  try {
    DataSource source = new DataSource(filename);
    result = dialog.showDialog(source.getDataSet());
    // nasty workaround for Windows regarding locked files:
    // if file Reader in Loader is not closed explicitly, we cannot
    // overwrite the file.
    source = null;
    System.gc();
View Full Code Here

  public static void filterFile(Filter filter, String [] options)
    throws Exception {

    boolean debug = false;
    Instances data = null;
    DataSource input = null;
    PrintWriter output = null;
    boolean helpRequest;
    String sourceCode = "";

    try {
       helpRequest = Utils.getFlag('h', options);

      if (Utils.getFlag('d', options)) {
  debug = true;
      }
      String infileName = Utils.getOption('i', options);
      String outfileName = Utils.getOption('o', options);
      String classIndex = Utils.getOption('c', options);
      if (filter instanceof Sourcable)
  sourceCode = Utils.getOption('z', options);
     
      if (filter instanceof OptionHandler) {
  ((OptionHandler)filter).setOptions(options);
      }

      Utils.checkForRemainingOptions(options);
      if (helpRequest) {
  throw new Exception("Help requested.\n");
      }
      if (infileName.length() != 0) {
  input = new DataSource(infileName);
      } else {
  input = new DataSource(System.in);
      }
      if (outfileName.length() != 0) {
  output = new PrintWriter(new FileOutputStream(outfileName));
      } else {
  output = new PrintWriter(System.out);
      }

      data = input.getStructure();
      if (classIndex.length() != 0) {
  if (classIndex.equals("first")) {
    data.setClassIndex(0);
  } else if (classIndex.equals("last")) {
    data.setClassIndex(data.numAttributes() - 1);
  } else {
    data.setClassIndex(Integer.parseInt(classIndex) - 1);
  }
      }
    } catch (Exception ex) {
      String filterOptions = "";
      // Output the error and also the valid options
      if (filter instanceof OptionHandler) {
  filterOptions += "\nFilter options:\n\n";
  Enumeration enu = ((OptionHandler)filter).listOptions();
  while (enu.hasMoreElements()) {
    Option option = (Option) enu.nextElement();
    filterOptions += option.synopsis() + '\n'
      + option.description() + "\n";
  }
      }

      String genericOptions = "\nGeneral options:\n\n"
  + "-h\n"
  + "\tGet help on available options.\n"
  + "\t(use -b -h for help on batch mode.)\n"
  + "-i <file>\n"
  + "\tThe name of the file containing input instances.\n"
  + "\tIf not supplied then instances will be read from stdin.\n"
  + "-o <file>\n"
  + "\tThe name of the file output instances will be written to.\n"
  + "\tIf not supplied then instances will be written to stdout.\n"
  + "-c <class index>\n"
  + "\tThe number of the attribute to use as the class.\n"
  + "\t\"first\" and \"last\" are also valid entries.\n"
  + "\tIf not supplied then no class is assigned.\n";

      if (filter instanceof Sourcable) {
  genericOptions +=
    "-z <class name>\n"
    + "\tOutputs the source code representing the trained filter.\n";
      }
     
      throw new Exception('\n' + ex.getMessage()
        + filterOptions+genericOptions);
    }
   
    if (debug) {
      System.err.println("Setting input format");
    }
    boolean printedHeader = false;
    if (filter.setInputFormat(data)) {
      if (debug) {
  System.err.println("Getting output format");
      }
      output.println(filter.getOutputFormat().toString());
      printedHeader = true;
    }
   
    // Pass all the instances to the filter
    Instance inst;
    while (input.hasMoreElements(data)) {
      inst = input.nextElement(data);
      if (debug) {
  System.err.println("Input instance to filter");
      }
      if (filter.input(inst)) {
  if (debug) {
View Full Code Here

  public static void batchFilterFile(Filter filter, String [] options)
    throws Exception {

    Instances firstData = null;
    Instances secondData = null;
    DataSource firstInput = null;
    DataSource secondInput = null;
    PrintWriter firstOutput = null;
    PrintWriter secondOutput = null;
    boolean helpRequest;
    String sourceCode = "";

    try {
      helpRequest = Utils.getFlag('h', options);

      String fileName = Utils.getOption('i', options);
      if (fileName.length() != 0) {
  firstInput = new DataSource(fileName);
      } else {
  throw new Exception("No first input file given.\n");
      }

      fileName = Utils.getOption('r', options);
      if (fileName.length() != 0) {
  secondInput = new DataSource(fileName);
      } else {
  throw new Exception("No second input file given.\n");
      }

      fileName = Utils.getOption('o', options);
      if (fileName.length() != 0) {
  firstOutput = new PrintWriter(new FileOutputStream(fileName));
      } else {
  firstOutput = new PrintWriter(System.out);
      }
     
      fileName = Utils.getOption('s', options);
      if (fileName.length() != 0) {
  secondOutput = new PrintWriter(new FileOutputStream(fileName));
      } else {
  secondOutput = new PrintWriter(System.out);
      }
      String classIndex = Utils.getOption('c', options);
      if (filter instanceof Sourcable)
  sourceCode = Utils.getOption('z', options);

      if (filter instanceof OptionHandler) {
  ((OptionHandler)filter).setOptions(options);
      }
      Utils.checkForRemainingOptions(options);
     
      if (helpRequest) {
  throw new Exception("Help requested.\n");
      }
      firstData = firstInput.getStructure();
      secondData = secondInput.getStructure();
      if (!secondData.equalHeaders(firstData)) {
  throw new Exception("Input file formats differ.\n");
      }
      if (classIndex.length() != 0) {
  if (classIndex.equals("first")) {
    firstData.setClassIndex(0);
    secondData.setClassIndex(0);
  } else if (classIndex.equals("last")) {
    firstData.setClassIndex(firstData.numAttributes() - 1);
    secondData.setClassIndex(secondData.numAttributes() - 1);
  } else {
    firstData.setClassIndex(Integer.parseInt(classIndex) - 1);
    secondData.setClassIndex(Integer.parseInt(classIndex) - 1);
  }
      }
    } catch (Exception ex) {
      String filterOptions = "";
      // Output the error and also the valid options
      if (filter instanceof OptionHandler) {
  filterOptions += "\nFilter options:\n\n";
  Enumeration enu = ((OptionHandler)filter).listOptions();
  while (enu.hasMoreElements()) {
    Option option = (Option) enu.nextElement();
    filterOptions += option.synopsis() + '\n'
      + option.description() + "\n";
  }
      }

      String genericOptions = "\nGeneral options:\n\n"
  + "-h\n"
  + "\tGet help on available options.\n"
  + "-i <filename>\n"
  + "\tThe file containing first input instances.\n"
  + "-o <filename>\n"
  + "\tThe file first output instances will be written to.\n"
  + "-r <filename>\n"
  + "\tThe file containing second input instances.\n"
  + "-s <filename>\n"
  + "\tThe file second output instances will be written to.\n"
  + "-c <class index>\n"
  + "\tThe number of the attribute to use as the class.\n"
  + "\t\"first\" and \"last\" are also valid entries.\n"
  + "\tIf not supplied then no class is assigned.\n";

      if (filter instanceof Sourcable) {
  genericOptions +=
    "-z <class name>\n"
    + "\tOutputs the source code representing the trained filter.\n";
      }
     
      throw new Exception('\n' + ex.getMessage()
        + filterOptions+genericOptions);
    }
    boolean printedHeader = false;
    if (filter.setInputFormat(firstData)) {
      firstOutput.println(filter.getOutputFormat().toString());
      printedHeader = true;
    }
   
    // Pass all the instances to the filter
    Instance inst;
    while (firstInput.hasMoreElements(firstData)) {
      inst = firstInput.nextElement(firstData);
      if (filter.input(inst)) {
  if (!printedHeader) {
    throw new Error("Filter didn't return true from setInputFormat() "
        + "earlier!");
  }
  firstOutput.println(filter.output().toString());
      }
    }
   
    // Say that input has finished, and print any pending output instances
    if (filter.batchFinished()) {
      if (!printedHeader) {
  firstOutput.println(filter.getOutputFormat().toString());
      }
      while (filter.numPendingOutput() > 0) {
  firstOutput.println(filter.output().toString());
      }
    }
   
    if (firstOutput != null) {
      firstOutput.close();
    }   
    printedHeader = false;
    if (filter.isOutputFormatDefined()) {
      secondOutput.println(filter.getOutputFormat().toString());
      printedHeader = true;
    }
    // Pass all the second instances to the filter
    while (secondInput.hasMoreElements(secondData)) {
      inst = secondInput.nextElement(secondData);
      if (filter.input(inst)) {
  if (!printedHeader) {
    throw new Error("Filter didn't return true from"
        + " isOutputFormatDefined() earlier!");
  }
View Full Code Here

    Instances   data;
    Instance  filteredInstance;
    Instances  filteredInstances;
    Instance  filteredInstanceSource;
    Instances  filteredInstancesSource;
    DataSource  source;
    Filter  filter;
    Filter  filterSource;
    int    i;
   
    result = true;
   
    // a few checks
    if (getFilter() == null)
      throw new Exception("No filter set!");
    if (getSourceCode() == null)
      throw new Exception("No source code set!");
    if (getDataset() == null)
      throw new Exception("No dataset set!");
    if (!getDataset().exists())
      throw new Exception(
          "Dataset '" + getDataset().getAbsolutePath() + "' does not exist!");
   
    // load data
    source = new DataSource(getDataset().getAbsolutePath());
    data   = source.getDataSet();
    if (getClassIndex() == -2)
      data.setClassIndex(data.numAttributes() - 1);
    else
      data.setClassIndex(getClassIndex());
   
View Full Code Here

TOP

Related Classes of weka.core.converters.ConverterUtils.DataSource

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.