Package weka.core.converters.ConverterUtils

Examples of weka.core.converters.ConverterUtils.DataSource


     * @attribute assessorID string
     * @attribute assesseeID string
     * @attribute feedbackValue string
     */
   
    DataSource source;
    try
    {
      source = new DataSource(arffFileName);
      Instances instances = source.getDataSet();
      feedbacks = new ArrayList<Feedback>();
      logger.debug("Number of instances in arff file is " + instances.numInstances());
     
      Enumeration enu = instances.enumerateInstances();
      //get all the feedback lines
View Full Code Here


   */
  @Override
  public void evaluateAttributesFromFile(String corpusName,
      String featureSetName, String splitName, String file)
      throws Exception {
    DataSource ds = new DataSource(file);
    Instances inst = ds.getDataSet();
    String label = FileUtil.parseLabelFromFileName(inst.relationName());
    Integer run = FileUtil.parseRunFromFileName(inst.relationName());
    Integer fold = FileUtil.parseFoldFromFileName(inst.relationName());
    evaluateAttributes(corpusName, featureSetName, splitName, inst, label,
        run, fold);
View Full Code Here

    boolean noOutput = false,
    printClassifications = false, trainStatistics = true,
    printMargins = false, printComplexityStatistics = false,
    printGraph = false, classStatistics = false, printSource = false;
    StringBuffer text = new StringBuffer();
    DataSource trainSource = null, testSource = null;
    ObjectInputStream objectInputStream = null;
    BufferedInputStream xmlInputStream = null;
    CostMatrix costMatrix = null;
    StringBuffer schemeOptionsText = null;
    Range attributesToOutput = null;
    long trainTimeStart = 0, trainTimeElapsed = 0,
    testTimeStart = 0, testTimeElapsed = 0;
    String xml = "";
    String[] optionsTmp = null;
    Classifier classifierBackup;
    Classifier classifierClassifications = null;
    boolean printDistribution = false;
    int actualClassIndex = -1// 0-based class index
    String splitPercentageString = "";
    double splitPercentage = -1;
    boolean preserveOrder = false;
    boolean trainSetPresent = false;
    boolean testSetPresent = false;
    String thresholdFile;
    String thresholdLabel;
    StringBuffer predsBuff = null; // predictions from cross-validation

    // help requested?
    if (Utils.getFlag("h", options) || Utils.getFlag("help", options)) {
     
      // global info requested as well?
      boolean globalInfo = Utils.getFlag("synopsis", options) ||
        Utils.getFlag("info", options);
     
      throw new Exception("\nHelp requested."
          + makeOptionString(classifier, globalInfo));
    }
   
    try {
      // do we get the input from XML instead of normal parameters?
      xml = Utils.getOption("xml", options);
      if (!xml.equals(""))
  options = new XMLOptions(xml).toArray();

      // is the input model only the XML-Options, i.e. w/o built model?
      optionsTmp = new String[options.length];
      for (int i = 0; i < options.length; i++)
  optionsTmp[i] = options[i];

      String tmpO = Utils.getOption('l', optionsTmp);
      //if (Utils.getOption('l', optionsTmp).toLowerCase().endsWith(".xml")) {
      if (tmpO.endsWith(".xml")) {
  // try to load file as PMML first
  boolean success = false;
  try {
    PMMLModel pmmlModel = PMMLFactory.getPMMLModel(tmpO);
    if (pmmlModel instanceof PMMLClassifier) {
      classifier = ((PMMLClassifier)pmmlModel);
      success = true;
    }
  } catch (IllegalArgumentException ex) {
    success = false;
  }
  if (!success) {
    // load options from serialized data  ('-l' is automatically erased!)
    XMLClassifier xmlserial = new XMLClassifier();
    Classifier cl = (Classifier) xmlserial.read(Utils.getOption('l', options));
   
    // merge options
    optionsTmp = new String[options.length + cl.getOptions().length];
    System.arraycopy(cl.getOptions(), 0, optionsTmp, 0, cl.getOptions().length);
    System.arraycopy(options, 0, optionsTmp, cl.getOptions().length, options.length);
    options = optionsTmp;
  }
      }

      noCrossValidation = Utils.getFlag("no-cv", options);
      // Get basic options (options the same for all schemes)
      classIndexString = Utils.getOption('c', options);
      if (classIndexString.length() != 0) {
  if (classIndexString.equals("first"))
    classIndex = 1;
  else if (classIndexString.equals("last"))
    classIndex = -1;
  else
    classIndex = Integer.parseInt(classIndexString);
      }
      trainFileName = Utils.getOption('t', options);
      objectInputFileName = Utils.getOption('l', options);
      objectOutputFileName = Utils.getOption('d', options);
      testFileName = Utils.getOption('T', options);
      foldsString = Utils.getOption('x', options);
      if (foldsString.length() != 0) {
  folds = Integer.parseInt(foldsString);
      }
      seedString = Utils.getOption('s', options);
      if (seedString.length() != 0) {
  seed = Integer.parseInt(seedString);
      }
      if (trainFileName.length() == 0) {
  if (objectInputFileName.length() == 0) {
    throw new Exception("No training file and no object "+
    "input file given.");
  }
  if (testFileName.length() == 0) {
    throw new Exception("No training file and no test "+
    "file given.");
  }
      } else if ((objectInputFileName.length() != 0) &&
    ((!(classifier instanceof UpdateableClassifier)) ||
        (testFileName.length() == 0))) {
  throw new Exception("Classifier not incremental, or no " +
      "test file provided: can't "+
  "use both train and model file.");
      }
      try {
  if (trainFileName.length() != 0) {
    trainSetPresent = true;
    trainSource = new DataSource(trainFileName);
  }
  if (testFileName.length() != 0) {
    testSetPresent = true;
    testSource = new DataSource(testFileName);
  }
  if (objectInputFileName.length() != 0) {
    if (objectInputFileName.endsWith(".xml")) {
      // if this is the case then it means that a PMML classifier was
      // successfully loaded earlier in the code
      objectInputStream = null;
      xmlInputStream = null;
    } else {
      InputStream is = new FileInputStream(objectInputFileName);
      if (objectInputFileName.endsWith(".gz")) {
        is = new GZIPInputStream(is);
      }
      // load from KOML?
      if (!(objectInputFileName.endsWith(".koml") && KOML.isPresent()) ) {
        objectInputStream = new ObjectInputStream(is);
        xmlInputStream    = null;
      }
      else {
        objectInputStream = null;
        xmlInputStream    = new BufferedInputStream(is);
      }
    }
  }
      } catch (Exception e) {
  throw new Exception("Can't open file " + e.getMessage() + '.');
      }
      if (testSetPresent) {
  template = test = testSource.getStructure();
  if (classIndex != -1) {
    test.setClassIndex(classIndex - 1);
  } else {
    if ( (test.classIndex() == -1) || (classIndexString.length() != 0) )
      test.setClassIndex(test.numAttributes() - 1);
  }
  actualClassIndex = test.classIndex();
      }
      else {
  // percentage split
  splitPercentageString = Utils.getOption("split-percentage", options);
  if (splitPercentageString.length() != 0) {
    if (foldsString.length() != 0)
      throw new Exception(
    "Percentage split cannot be used in conjunction with "
    + "cross-validation ('-x').");
    splitPercentage = Double.parseDouble(splitPercentageString);
    if ((splitPercentage <= 0) || (splitPercentage >= 100))
      throw new Exception("Percentage split value needs be >0 and <100.");
  }
  else {
    splitPercentage = -1;
  }
  preserveOrder = Utils.getFlag("preserve-order", options);
  if (preserveOrder) {
    if (splitPercentage == -1)
      throw new Exception("Percentage split ('-percentage-split') is missing.");
  }
  // create new train/test sources
  if (splitPercentage > 0) {
    testSetPresent = true;
    Instances tmpInst = trainSource.getDataSet(actualClassIndex);
    if (!preserveOrder)
      tmpInst.randomize(new Random(seed));
    int trainSize =
            (int) Math.round(tmpInst.numInstances() * splitPercentage / 100);
    int testSize  = tmpInst.numInstances() - trainSize;
    Instances trainInst = new Instances(tmpInst, 0, trainSize);
    Instances testInst  = new Instances(tmpInst, trainSize, testSize);
    trainSource = new DataSource(trainInst);
    testSource  = new DataSource(testInst);
    template = test = testSource.getStructure();
    if (classIndex != -1) {
      test.setClassIndex(classIndex - 1);
    } else {
      if ( (test.classIndex() == -1) || (classIndexString.length() != 0) )
        test.setClassIndex(test.numAttributes() - 1);
    }
    actualClassIndex = test.classIndex();
  }
      }
      if (trainSetPresent) {
  template = train = trainSource.getStructure();
  if (classIndex != -1) {
    train.setClassIndex(classIndex - 1);
  } else {
    if ( (train.classIndex() == -1) || (classIndexString.length() != 0) )
      train.setClassIndex(train.numAttributes() - 1);
  }
  actualClassIndex = train.classIndex();
  if ((testSetPresent) && !test.equalHeaders(train)) {
    throw new IllegalArgumentException("Train and test file not compatible!");
  }
      }
      if (template == null) {
  throw new Exception("No actual dataset provided to use as template");
      }
      costMatrix = handleCostOption(
    Utils.getOption('m', options), template.numClasses());

      classStatistics = Utils.getFlag('i', options);
      noOutput = Utils.getFlag('o', options);
      trainStatistics = !Utils.getFlag('v', options);
      printComplexityStatistics = Utils.getFlag('k', options);
      printMargins = Utils.getFlag('r', options);
      printGraph = Utils.getFlag('g', options);
      sourceClass = Utils.getOption('z', options);
      printSource = (sourceClass.length() != 0);
      printDistribution = Utils.getFlag("distribution", options);
      thresholdFile = Utils.getOption("threshold-file", options);
      thresholdLabel = Utils.getOption("threshold-label", options);

      // Check -p option
      try {
  attributeRangeString = Utils.getOption('p', options);
      }
      catch (Exception e) {
  throw new Exception(e.getMessage() + "\nNOTE: the -p option has changed. " +
      "It now expects a parameter specifying a range of attributes " +
  "to list with the predictions. Use '-p 0' for none.");
      }
      if (attributeRangeString.length() != 0) {
  printClassifications = true;
  noOutput = true;
  if (!attributeRangeString.equals("0"))
    attributesToOutput = new Range(attributeRangeString);
      }

      if (!printClassifications && printDistribution)
  throw new Exception("Cannot print distribution without '-p' option!");

      // if no training file given, we don't have any priors
      if ( (!trainSetPresent) && (printComplexityStatistics) )
  throw new Exception("Cannot print complexity statistics ('-k') without training file ('-t')!");

      // If a model file is given, we can't process
      // scheme-specific options
      if (objectInputFileName.length() != 0) {
  Utils.checkForRemainingOptions(options);
      } else {

  // Set options for classifier
  if (classifier instanceof OptionHandler) {
    for (int i = 0; i < options.length; i++) {
      if (options[i].length() != 0) {
        if (schemeOptionsText == null) {
    schemeOptionsText = new StringBuffer();
        }
        if (options[i].indexOf(' ') != -1) {
    schemeOptionsText.append('"' + options[i] + "\" ");
        } else {
    schemeOptionsText.append(options[i] + " ");
        }
      }
    }
    ((OptionHandler)classifier).setOptions(options);
  }
      }
      Utils.checkForRemainingOptions(options);
    } catch (Exception e) {
      throw new Exception("\nWeka exception: " + e.getMessage()
    + makeOptionString(classifier, false));
    }

    // Setup up evaluation objects
    Evaluation trainingEvaluation = new Evaluation(new Instances(template, 0), costMatrix);
    Evaluation testingEvaluation = new Evaluation(new Instances(template, 0), costMatrix);

    // disable use of priors if no training file given
    if (!trainSetPresent)
      testingEvaluation.useNoPriors();

    if (objectInputFileName.length() != 0) {
      // Load classifier from file
      if (objectInputStream != null) {
  classifier = (Classifier) objectInputStream.readObject();
        // try and read a header (if present)
        Instances savedStructure = null;
        try {
          savedStructure = (Instances) objectInputStream.readObject();
        } catch (Exception ex) {
          // don't make a fuss
        }
        if (savedStructure != null) {
          // test for compatibility with template
          if (!template.equalHeaders(savedStructure)) {
            throw new Exception("training and test set are not compatible");
          }
        }
  objectInputStream.close();
      }
      else if (xmlInputStream != null) {
  // whether KOML is available has already been checked (objectInputStream would null otherwise)!
  classifier = (Classifier) KOML.read(xmlInputStream);
  xmlInputStream.close();
      }
    }

    // backup of fully setup classifier for cross-validation
    classifierBackup = Classifier.makeCopy(classifier);

    // Build the classifier if no object file provided
    if ((classifier instanceof UpdateableClassifier) &&
  (testSetPresent || noCrossValidation) &&
  (costMatrix == null) &&
  (trainSetPresent)) {
      // Build classifier incrementally
      trainingEvaluation.setPriors(train);
      testingEvaluation.setPriors(train);
      trainTimeStart = System.currentTimeMillis();
      if (objectInputFileName.length() == 0) {
  classifier.buildClassifier(train);
      }
      Instance trainInst;
      while (trainSource.hasMoreElements(train)) {
  trainInst = trainSource.nextElement(train);
  trainingEvaluation.updatePriors(trainInst);
  testingEvaluation.updatePriors(trainInst);
  ((UpdateableClassifier)classifier).updateClassifier(trainInst);
      }
      trainTimeElapsed = System.currentTimeMillis() - trainTimeStart;
    } else if (objectInputFileName.length() == 0) {
      // Build classifier in one go
      tempTrain = trainSource.getDataSet(actualClassIndex);
      trainingEvaluation.setPriors(tempTrain);
      testingEvaluation.setPriors(tempTrain);
      trainTimeStart = System.currentTimeMillis();
      classifier.buildClassifier(tempTrain);
      trainTimeElapsed = System.currentTimeMillis() - trainTimeStart;
    }

    // backup of fully trained classifier for printing the classifications
    if (printClassifications)
      classifierClassifications = Classifier.makeCopy(classifier);

    // Save the classifier if an object output file is provided
    if (objectOutputFileName.length() != 0) {
      OutputStream os = new FileOutputStream(objectOutputFileName);
      // binary
      if (!(objectOutputFileName.endsWith(".xml") || (objectOutputFileName.endsWith(".koml") && KOML.isPresent()))) {
  if (objectOutputFileName.endsWith(".gz")) {
    os = new GZIPOutputStream(os);
  }
  ObjectOutputStream objectOutputStream = new ObjectOutputStream(os);
  objectOutputStream.writeObject(classifier);
        if (template != null) {
          objectOutputStream.writeObject(template);
        }
  objectOutputStream.flush();
  objectOutputStream.close();
      }
      // KOML/XML
      else {
  BufferedOutputStream xmlOutputStream = new BufferedOutputStream(os);
  if (objectOutputFileName.endsWith(".xml")) {
    XMLSerialization xmlSerial = new XMLClassifier();
    xmlSerial.write(xmlOutputStream, classifier);
  }
  else
    // whether KOML is present has already been checked
    // if not present -> ".koml" is interpreted as binary - see above
    if (objectOutputFileName.endsWith(".koml")) {
      KOML.write(xmlOutputStream, classifier);
    }
  xmlOutputStream.close();
      }
    }

    // If classifier is drawable output string describing graph
    if ((classifier instanceof Drawable) && (printGraph)){
      return ((Drawable)classifier).graph();
    }

    // Output the classifier as equivalent source
    if ((classifier instanceof Sourcable) && (printSource)){
      return wekaStaticWrapper((Sourcable) classifier, sourceClass);
    }

    // Output model
    if (!(noOutput || printMargins)) {
      if (classifier instanceof OptionHandler) {
  if (schemeOptionsText != null) {
    text.append("\nOptions: "+schemeOptionsText);
    text.append("\n");
  }
      }
      text.append("\n" + classifier.toString() + "\n");
    }

    if (!printMargins && (costMatrix != null)) {
      text.append("\n=== Evaluation Cost Matrix ===\n\n");
      text.append(costMatrix.toString());
    }

    // Output test instance predictions only
    if (printClassifications) {
      DataSource source = testSource;
      predsBuff = new StringBuffer();
      // no test set -> use train set
      if (source == null && noCrossValidation) {
  source = trainSource;
        predsBuff.append("\n=== Predictions on training data ===\n\n");
View Full Code Here

    try {
      Instances i;
      // read from stdin and print statistics
      if (args.length == 0) {
  DataSource source = new DataSource(System.in);
  i = source.getDataSet();
  System.out.println(i.toSummaryString());
      }
      // read file and print statistics
      else if ((args.length == 1) && (!args[0].equals("-h")) && (!args[0].equals("help"))) {
  DataSource source = new DataSource(args[0]);
  i = source.getDataSet();
  System.out.println(i.toSummaryString());
      }
      // read two files, merge them and print result to stdout
      else if ((args.length == 3) && (args[0].toLowerCase().equals("merge"))) {
  DataSource source1 = new DataSource(args[1]);
  DataSource source2 = new DataSource(args[2]);
  i = Instances.mergeInstances(source1.getDataSet(), source2.getDataSet());
  System.out.println(i);
      }
      // read two files, append them and print result to stdout
      else if ((args.length == 3) && (args[0].toLowerCase().equals("append"))) {
  DataSource source1 = new DataSource(args[1]);
  DataSource source2 = new DataSource(args[2]);
  if (!source1.getStructure().equalHeaders(source2.getStructure()))
    throw new Exception("The two datasets have different headers!");
  Instances structure = source1.getStructure();
  System.out.println(source1.getStructure());
  while (source1.hasMoreElements(structure))
    System.out.println(source1.nextElement(structure));
  structure = source2.getStructure();
  while (source2.hasMoreElements(structure))
    System.out.println(source2.nextElement(structure));
      }
      // read two files and compare their headers
      else if ((args.length == 3) && (args[0].toLowerCase().equals("headers"))) {
  DataSource source1 = new DataSource(args[1]);
  DataSource source2 = new DataSource(args[2]);
  if (source1.getStructure().equalHeaders(source2.getStructure()))
    System.out.println("Headers match");
  else
    System.out.println("Headers don't match");
      }
      // read file and seed value, randomize data and print result to stdout
      else if ((args.length == 3) && (args[0].toLowerCase().equals("randomize"))) {
  DataSource source = new DataSource(args[2]);
  i = source.getDataSet();
  i.randomize(new Random(Integer.parseInt(args[1])));
  System.out.println(i);
      }
      // wrong parameters
      else {
View Full Code Here

    Instances testRaw = null;
    boolean hasClass = (test.classIndex() >= 0);
    int unclusteredInstances = 0;
    Vector<Double> clusterAssignments = new Vector<Double>();
    Filter filter = null;
    DataSource source = null;
    Instance inst;

    if (testFileName == null)
      testFileName = "";
   
    // load data
    if (testFileName.length() != 0)
      source = new DataSource(testFileName);
    else
      source = new DataSource(test);
    testRaw = source.getStructure(test.classIndex());
   
    // If class is set then do class based evaluation as well
    if (hasClass) {
      if (testRaw.classAttribute().isNumeric())
  throw new Exception("ClusterEvaluation: Class must be nominal!");

      filter = new Remove();
      ((Remove) filter).setAttributeIndices("" + (testRaw.classIndex() + 1));
      ((Remove) filter).setInvertSelection(false);
      filter.setInputFormat(testRaw);
    }
   
    i = 0;
    while (source.hasMoreElements(testRaw)) {
      // next instance
      inst = source.nextElement(testRaw);
      if (filter != null) {
  filter.input(inst);
  filter.batchFinished();
  inst = filter.output();
      }
View Full Code Here

    int numClasses = inst.classAttribute().numValues();
    int[][] counts = new int [m_numClusters][numClasses];
    int[] clusterTotals = new int[m_numClusters];
    double[] best = new double[m_numClusters+1];
    double[] current = new double[m_numClusters+1];
    DataSource source = null;
    Instances instances = null;
    Instance instance = null;
    int i;
    int numInstances;

    if (fileName == null)
      fileName = "";
   
    if (fileName.length() != 0)
      source = new DataSource(fileName);
    else
      source = new DataSource(inst);
    instances = source.getStructure(inst.classIndex());

    i = 0;
    while (source.hasMoreElements(instances)) {
      instance = source.nextElement(instances);
      if (m_clusterAssignments[i] >= 0) {
        counts[(int)m_clusterAssignments[i]][(int)instance.classValue()]++;
        clusterTotals[(int)m_clusterAssignments[i]]++;
      }
      i++;
View Full Code Here

    boolean printClusterAssignments = false;
    Range attributesToOutput = null;
    StringBuffer text = new StringBuffer();
    int theClass = -1; // class based evaluation of clustering
    boolean updateable = (clusterer instanceof UpdateableClusterer);
    DataSource source = null;
    Instance inst;

    if (Utils.getFlag('h', options) || Utils.getFlag("help", options)) {
     
      // global info requested as well?
      boolean globalInfo = Utils.getFlag("synopsis", options) ||
        Utils.getFlag("info", options);
     
      throw  new Exception("Help requested."
          + makeOptionString(clusterer, globalInfo));
    }
   
    try {
      // Get basic options (options the same for all clusterers
      //printClusterAssignments = Utils.getFlag('p', options);
      objectInputFileName = Utils.getOption('l', options);
      objectOutputFileName = Utils.getOption('d', options);
      trainFileName = Utils.getOption('t', options);
      testFileName = Utils.getOption('T', options);
      graphFileName = Utils.getOption('g', options);

      // Check -p option
      try {
  attributeRangeString = Utils.getOption('p', options);
      }
      catch (Exception e) {
  throw new Exception(e.getMessage() + "\nNOTE: the -p option has changed. " +
          "It now expects a parameter specifying a range of attributes " +
          "to list with the predictions. Use '-p 0' for none.");
      }
      if (attributeRangeString.length() != 0) {
  printClusterAssignments = true;
  if (!attributeRangeString.equals("0"))
    attributesToOutput = new Range(attributeRangeString);
      }

      if (trainFileName.length() == 0) {
        if (objectInputFileName.length() == 0) {
          throw  new Exception("No training file and no object "
             + "input file given.");
        }

        if (testFileName.length() == 0) {
          throw  new Exception("No training file and no test file given.");
        }
      }
      else {
  if ((objectInputFileName.length() != 0)
      && (printClusterAssignments == false)) {
    throw  new Exception("Can't use both train and model file "
             + "unless -p specified.");
  }
      }

      seedString = Utils.getOption('s', options);

      if (seedString.length() != 0) {
  seed = Integer.parseInt(seedString);
      }

      foldsString = Utils.getOption('x', options);

      if (foldsString.length() != 0) {
  folds = Integer.parseInt(foldsString);
  doXval = true;
      }
    }
    catch (Exception e) {
      throw  new Exception('\n' + e.getMessage()
         + makeOptionString(clusterer, false));
    }

    try {
      if (trainFileName.length() != 0) {
  source = new DataSource(trainFileName);
  train  = source.getStructure();

  String classString = Utils.getOption('c',options);
  if (classString.length() != 0) {
    if (classString.compareTo("last") == 0)
      theClass = train.numAttributes();
    else if (classString.compareTo("first") == 0)
      theClass = 1;
    else
      theClass = Integer.parseInt(classString);

    if (theClass != -1) {
      if (doXval || testFileName.length() != 0)
        throw new Exception("Can only do class based evaluation on the "
      +"training data");

      if (objectInputFileName.length() != 0)
        throw new Exception("Can't load a clusterer and do class based "
      +"evaluation");

      if (objectOutputFileName.length() != 0)
        throw new Exception(
      "Can't do class based evaluation and save clusterer");
    }
  }
  else {
    // if the dataset defines a class attribute, use it
    if (train.classIndex() != -1) {
      theClass = train.classIndex() + 1;
      System.err.println(
    "Note: using class attribute from dataset, i.e., attribute #"
    + theClass);
    }
  }

  if (theClass != -1) {
    if (theClass < 1 || theClass > train.numAttributes())
      throw new Exception("Class is out of range!");

    if (!train.attribute(theClass - 1).isNominal())
      throw new Exception("Class must be nominal!");
   
    train.setClassIndex(theClass - 1);
  }
      }
    }
    catch (Exception e) {
      throw  new Exception("ClusterEvaluation: " + e.getMessage() + '.');
    }

    // Save options
    if (options != null) {
      savedOptions = new String[options.length];
      System.arraycopy(options, 0, savedOptions, 0, options.length);
    }

    if (objectInputFileName.length() != 0)
      Utils.checkForRemainingOptions(options);

    // Set options for clusterer
    if (clusterer instanceof OptionHandler)
      ((OptionHandler)clusterer).setOptions(options);

    Utils.checkForRemainingOptions(options);

    Instances trainHeader = train;
    if (objectInputFileName.length() != 0) {
      // Load the clusterer from file
      //      clusterer = (Clusterer) SerializationHelper.read(objectInputFileName);
      java.io.ObjectInputStream ois =
        new java.io.ObjectInputStream(
        new java.io.BufferedInputStream(
        new java.io.FileInputStream(objectInputFileName)));
      clusterer = (Clusterer) ois.readObject();
      // try and get the training header
      try {
        trainHeader = (Instances) ois.readObject();
      } catch (Exception ex) {
        // don't moan if we cant
      }
    }
    else {
      // Build the clusterer if no object file provided
      if (theClass == -1) {
  if (updateable) {
    clusterer.buildClusterer(source.getStructure());
    while (source.hasMoreElements(train)) {
      inst = source.nextElement(train);
      ((UpdateableClusterer) clusterer).updateClusterer(inst);
    }
    ((UpdateableClusterer) clusterer).updateFinished();
  }
  else {
    clusterer.buildClusterer(source.getDataSet());
  }
      }
      else {
  Remove removeClass = new Remove();
  removeClass.setAttributeIndices("" + theClass);
  removeClass.setInvertSelection(false);
  removeClass.setInputFormat(train);
  if (updateable) {
    Instances clusterTrain = Filter.useFilter(train, removeClass);
    clusterer.buildClusterer(clusterTrain);
          trainHeader = clusterTrain;
    while (source.hasMoreElements(train)) {
      inst = source.nextElement(train);
      removeClass.input(inst);
      removeClass.batchFinished();
      Instance clusterTrainInst = removeClass.output();
      ((UpdateableClusterer) clusterer).updateClusterer(clusterTrainInst);
    }
    ((UpdateableClusterer) clusterer).updateFinished();
  }
  else {
    Instances clusterTrain = Filter.useFilter(source.getDataSet(), removeClass);
    clusterer.buildClusterer(clusterTrain);
          trainHeader = clusterTrain;
  }
  ClusterEvaluation ce = new ClusterEvaluation();
  ce.setClusterer(clusterer);
  ce.evaluateClusterer(train, trainFileName);
 
  return "\n\n=== Clustering stats for training data ===\n\n" +
    ce.clusterResultsToString();
      }
    }

    /* Output cluster predictions only (for the test data if specified,
       otherwise for the training data */
    if (printClusterAssignments) {
      return printClusterings(clusterer, trainFileName, testFileName, attributesToOutput);
    }

    text.append(clusterer.toString());
    text.append("\n\n=== Clustering stats for training data ===\n\n"
    + printClusterStats(clusterer, trainFileName));

    if (testFileName.length() != 0) {
      // check header compatibility
      DataSource test = new DataSource(testFileName);
      Instances testStructure = test.getStructure();
      if (!trainHeader.equalHeaders(testStructure)) {
        throw new Exception("Training and testing data are not compatible");
      }

      text.append("\n\n=== Clustering stats for testing data ===\n\n"
View Full Code Here

    int cc = clusterer.numberOfClusters();
    double[] instanceStats = new double[cc];
    int unclusteredInstances = 0;

    if (fileName.length() != 0) {
      DataSource source = new DataSource(fileName);
      Instances structure = source.getStructure();
      Instance inst;
      while (source.hasMoreElements(structure)) {
  inst = source.nextElement(structure);
  try {
    cnum = clusterer.clusterInstance(inst);

    if (clusterer instanceof DensityBasedClusterer) {
      loglk += ((DensityBasedClusterer)clusterer).
View Full Code Here

    throws Exception {

    StringBuffer text = new StringBuffer();
    int i = 0;
    int cnum;
    DataSource source = null;
    Instance inst;
    Instances structure;
   
    if (testFileName.length() != 0)
      source = new DataSource(testFileName);
    else
      source = new DataSource(trainFileName);
   
    structure = source.getStructure();
    while (source.hasMoreElements(structure)) {
      inst = source.nextElement(structure);
      try {
  cnum = clusterer.clusterInstance(inst);
 
  text.append(i + " " + cnum + " "
      + attributeValuesString(inst, attributesToOutput) + "\n");
View Full Code Here

  protected Instances process(Instances instances) throws Exception {
    // initializing necessary?
    if (!m_Initialized) {
      // do we have a file to initialize with?
      if ((getInitFile() != null) && getInitFile().isFile()) {
  DataSource source = new DataSource(getInitFile().getAbsolutePath());
  Instances data = source.getDataSet();
  m_InitFileClassIndex.setUpper(data.numAttributes() - 1);
  data.setClassIndex(m_InitFileClassIndex.getIndex());
  initFilter(data);
      }
      else {
View Full Code Here

TOP

Related Classes of weka.core.converters.ConverterUtils.DataSource

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.