Examples of weka.core.converters.ConverterUtils.DataSource

Package weka.core.converters.ConverterUtils

Examples of weka.core.converters.ConverterUtils.DataSource

weka.core.converters.ConverterUtils.DataSource

     * @attribute assessorID string
     * @attribute assesseeID string
     * @attribute feedbackValue string
     */
    
    DataSource source;
    try
    {
      source = new DataSource(arffFileName);
      Instances instances = source.getDataSet();
      feedbacks = new ArrayList<Feedback>();
      logger.debug("Number of instances in arff file is " + instances.numInstances());
      
      Enumeration enu = instances.enumerateInstances();
      //get all the feedback lines

View Full Code Here

   */
  @Override
  public void evaluateAttributesFromFile(String corpusName,
      String featureSetName, String splitName, String file)
      throws Exception {
    DataSource ds = new DataSource(file);
    Instances inst = ds.getDataSet();
    String label = FileUtil.parseLabelFromFileName(inst.relationName());
    Integer run = FileUtil.parseRunFromFileName(inst.relationName());
    Integer fold = FileUtil.parseFoldFromFileName(inst.relationName());
    evaluateAttributes(corpusName, featureSetName, splitName, inst, label,
        run, fold);

View Full Code Here

    boolean noOutput = false,
    printClassifications = false, trainStatistics = true,
    printMargins = false, printComplexityStatistics = false,
    printGraph = false, classStatistics = false, printSource = false;
    StringBuffer text = new StringBuffer();
    DataSource trainSource = null, testSource = null;
    ObjectInputStream objectInputStream = null;
    BufferedInputStream xmlInputStream = null;
    CostMatrix costMatrix = null;
    StringBuffer schemeOptionsText = null;
    Range attributesToOutput = null;
    long trainTimeStart = 0, trainTimeElapsed = 0,
    testTimeStart = 0, testTimeElapsed = 0;
    String xml = "";
    String[] optionsTmp = null;
    Classifier classifierBackup;
    Classifier classifierClassifications = null;
    boolean printDistribution = false;
    int actualClassIndex = -1;  // 0-based class index
    String splitPercentageString = "";
    double splitPercentage = -1;
    boolean preserveOrder = false;
    boolean trainSetPresent = false;
    boolean testSetPresent = false;
    String thresholdFile;
    String thresholdLabel;
    StringBuffer predsBuff = null; // predictions from cross-validation


    // help requested?
    if (Utils.getFlag("h", options) || Utils.getFlag("help", options)) {
      
      // global info requested as well?
      boolean globalInfo = Utils.getFlag("synopsis", options) ||
        Utils.getFlag("info", options);
      
      throw new Exception("\nHelp requested." 
          + makeOptionString(classifier, globalInfo));
    }
    
    try {
      // do we get the input from XML instead of normal parameters?
      xml = Utils.getOption("xml", options);
      if (!xml.equals(""))
  options = new XMLOptions(xml).toArray();


      // is the input model only the XML-Options, i.e. w/o built model?
      optionsTmp = new String[options.length];
      for (int i = 0; i < options.length; i++)
  optionsTmp[i] = options[i];


      String tmpO = Utils.getOption('l', optionsTmp);
      //if (Utils.getOption('l', optionsTmp).toLowerCase().endsWith(".xml")) {
      if (tmpO.endsWith(".xml")) {
  // try to load file as PMML first
  boolean success = false;
  try {
    PMMLModel pmmlModel = PMMLFactory.getPMMLModel(tmpO);
    if (pmmlModel instanceof PMMLClassifier) {
      classifier = ((PMMLClassifier)pmmlModel);
      success = true;
    }
  } catch (IllegalArgumentException ex) {
    success = false;
  }
  if (!success) {
    // load options from serialized data  ('-l' is automatically erased!)
    XMLClassifier xmlserial = new XMLClassifier();
    Classifier cl = (Classifier) xmlserial.read(Utils.getOption('l', options));
    
    // merge options
    optionsTmp = new String[options.length + cl.getOptions().length];
    System.arraycopy(cl.getOptions(), 0, optionsTmp, 0, cl.getOptions().length);
    System.arraycopy(options, 0, optionsTmp, cl.getOptions().length, options.length);
    options = optionsTmp;
  }
      }


      noCrossValidation = Utils.getFlag("no-cv", options);
      // Get basic options (options the same for all schemes)
      classIndexString = Utils.getOption('c', options);
      if (classIndexString.length() != 0) {
  if (classIndexString.equals("first"))
    classIndex = 1;
  else if (classIndexString.equals("last"))
    classIndex = -1;
  else
    classIndex = Integer.parseInt(classIndexString);
      }
      trainFileName = Utils.getOption('t', options); 
      objectInputFileName = Utils.getOption('l', options);
      objectOutputFileName = Utils.getOption('d', options);
      testFileName = Utils.getOption('T', options);
      foldsString = Utils.getOption('x', options);
      if (foldsString.length() != 0) {
  folds = Integer.parseInt(foldsString);
      }
      seedString = Utils.getOption('s', options);
      if (seedString.length() != 0) {
  seed = Integer.parseInt(seedString);
      }
      if (trainFileName.length() == 0) {
  if (objectInputFileName.length() == 0) {
    throw new Exception("No training file and no object "+
    "input file given.");
  } 
  if (testFileName.length() == 0) {
    throw new Exception("No training file and no test "+
    "file given.");
  }
      } else if ((objectInputFileName.length() != 0) &&
    ((!(classifier instanceof UpdateableClassifier)) ||
        (testFileName.length() == 0))) {
  throw new Exception("Classifier not incremental, or no " +
      "test file provided: can't "+
  "use both train and model file.");
      }
      try {
  if (trainFileName.length() != 0) {
    trainSetPresent = true;
    trainSource = new DataSource(trainFileName);
  }
  if (testFileName.length() != 0) {
    testSetPresent = true;
    testSource = new DataSource(testFileName);
  }
  if (objectInputFileName.length() != 0) {
    if (objectInputFileName.endsWith(".xml")) {
      // if this is the case then it means that a PMML classifier was
      // successfully loaded earlier in the code
      objectInputStream = null;
      xmlInputStream = null;
    } else {
      InputStream is = new FileInputStream(objectInputFileName);
      if (objectInputFileName.endsWith(".gz")) {
        is = new GZIPInputStream(is);
      }
      // load from KOML?
      if (!(objectInputFileName.endsWith(".koml") && KOML.isPresent()) ) {
        objectInputStream = new ObjectInputStream(is);
        xmlInputStream    = null;
      }
      else {
        objectInputStream = null;
        xmlInputStream    = new BufferedInputStream(is);
      }
    }
  }
      } catch (Exception e) {
  throw new Exception("Can't open file " + e.getMessage() + '.');
      }
      if (testSetPresent) {
  template = test = testSource.getStructure();
  if (classIndex != -1) {
    test.setClassIndex(classIndex - 1);
  } else {
    if ( (test.classIndex() == -1) || (classIndexString.length() != 0) )
      test.setClassIndex(test.numAttributes() - 1);
  }
  actualClassIndex = test.classIndex();
      }
      else {
  // percentage split
  splitPercentageString = Utils.getOption("split-percentage", options);
  if (splitPercentageString.length() != 0) {
    if (foldsString.length() != 0)
      throw new Exception(
    "Percentage split cannot be used in conjunction with "
    + "cross-validation ('-x').");
    splitPercentage = Double.parseDouble(splitPercentageString);
    if ((splitPercentage <= 0) || (splitPercentage >= 100))
      throw new Exception("Percentage split value needs be >0 and <100.");
  }
  else {
    splitPercentage = -1;
  }
  preserveOrder = Utils.getFlag("preserve-order", options);
  if (preserveOrder) {
    if (splitPercentage == -1)
      throw new Exception("Percentage split ('-percentage-split') is missing.");
  }
  // create new train/test sources
  if (splitPercentage > 0) {
    testSetPresent = true;
    Instances tmpInst = trainSource.getDataSet(actualClassIndex);
    if (!preserveOrder)
      tmpInst.randomize(new Random(seed));
    int trainSize = 
            (int) Math.round(tmpInst.numInstances() * splitPercentage / 100);
    int testSize  = tmpInst.numInstances() - trainSize;
    Instances trainInst = new Instances(tmpInst, 0, trainSize);
    Instances testInst  = new Instances(tmpInst, trainSize, testSize);
    trainSource = new DataSource(trainInst);
    testSource  = new DataSource(testInst);
    template = test = testSource.getStructure();
    if (classIndex != -1) {
      test.setClassIndex(classIndex - 1);
    } else {
      if ( (test.classIndex() == -1) || (classIndexString.length() != 0) )
        test.setClassIndex(test.numAttributes() - 1);
    }
    actualClassIndex = test.classIndex();
  }
      }
      if (trainSetPresent) {
  template = train = trainSource.getStructure();
  if (classIndex != -1) {
    train.setClassIndex(classIndex - 1);
  } else {
    if ( (train.classIndex() == -1) || (classIndexString.length() != 0) )
      train.setClassIndex(train.numAttributes() - 1);
  }
  actualClassIndex = train.classIndex();
  if ((testSetPresent) && !test.equalHeaders(train)) {
    throw new IllegalArgumentException("Train and test file not compatible!");
  }
      }
      if (template == null) {
  throw new Exception("No actual dataset provided to use as template");
      }
      costMatrix = handleCostOption(
    Utils.getOption('m', options), template.numClasses());


      classStatistics = Utils.getFlag('i', options);
      noOutput = Utils.getFlag('o', options);
      trainStatistics = !Utils.getFlag('v', options);
      printComplexityStatistics = Utils.getFlag('k', options);
      printMargins = Utils.getFlag('r', options);
      printGraph = Utils.getFlag('g', options);
      sourceClass = Utils.getOption('z', options);
      printSource = (sourceClass.length() != 0);
      printDistribution = Utils.getFlag("distribution", options);
      thresholdFile = Utils.getOption("threshold-file", options);
      thresholdLabel = Utils.getOption("threshold-label", options);


      // Check -p option
      try {
  attributeRangeString = Utils.getOption('p', options);
      }
      catch (Exception e) {
  throw new Exception(e.getMessage() + "\nNOTE: the -p option has changed. " +
      "It now expects a parameter specifying a range of attributes " +
  "to list with the predictions. Use '-p 0' for none.");
      }
      if (attributeRangeString.length() != 0) {
  printClassifications = true;
  noOutput = true;
  if (!attributeRangeString.equals("0")) 
    attributesToOutput = new Range(attributeRangeString);
      }


      if (!printClassifications && printDistribution)
  throw new Exception("Cannot print distribution without '-p' option!");


      // if no training file given, we don't have any priors
      if ( (!trainSetPresent) && (printComplexityStatistics) )
  throw new Exception("Cannot print complexity statistics ('-k') without training file ('-t')!");


      // If a model file is given, we can't process 
      // scheme-specific options
      if (objectInputFileName.length() != 0) {
  Utils.checkForRemainingOptions(options);
      } else {


  // Set options for classifier
  if (classifier instanceof OptionHandler) {
    for (int i = 0; i < options.length; i++) {
      if (options[i].length() != 0) {
        if (schemeOptionsText == null) {
    schemeOptionsText = new StringBuffer();
        }
        if (options[i].indexOf(' ') != -1) {
    schemeOptionsText.append('"' + options[i] + "\" ");
        } else {
    schemeOptionsText.append(options[i] + " ");
        }
      }
    }
    ((OptionHandler)classifier).setOptions(options);
  }
      }
      Utils.checkForRemainingOptions(options);
    } catch (Exception e) {
      throw new Exception("\nWeka exception: " + e.getMessage()
    + makeOptionString(classifier, false));
    }


    // Setup up evaluation objects
    Evaluation trainingEvaluation = new Evaluation(new Instances(template, 0), costMatrix);
    Evaluation testingEvaluation = new Evaluation(new Instances(template, 0), costMatrix);


    // disable use of priors if no training file given
    if (!trainSetPresent)
      testingEvaluation.useNoPriors();


    if (objectInputFileName.length() != 0) {
      // Load classifier from file
      if (objectInputStream != null) {
  classifier = (Classifier) objectInputStream.readObject();
        // try and read a header (if present)
        Instances savedStructure = null;
        try {
          savedStructure = (Instances) objectInputStream.readObject();
        } catch (Exception ex) {
          // don't make a fuss
        }
        if (savedStructure != null) {
          // test for compatibility with template
          if (!template.equalHeaders(savedStructure)) {
            throw new Exception("training and test set are not compatible");
          }
        }
  objectInputStream.close();
      }
      else if (xmlInputStream != null) {
  // whether KOML is available has already been checked (objectInputStream would null otherwise)!
  classifier = (Classifier) KOML.read(xmlInputStream);
  xmlInputStream.close();
      }
    }


    // backup of fully setup classifier for cross-validation
    classifierBackup = Classifier.makeCopy(classifier);


    // Build the classifier if no object file provided
    if ((classifier instanceof UpdateableClassifier) &&
  (testSetPresent || noCrossValidation) &&
  (costMatrix == null) &&
  (trainSetPresent)) {
      // Build classifier incrementally
      trainingEvaluation.setPriors(train);
      testingEvaluation.setPriors(train);
      trainTimeStart = System.currentTimeMillis();
      if (objectInputFileName.length() == 0) {
  classifier.buildClassifier(train);
      }
      Instance trainInst;
      while (trainSource.hasMoreElements(train)) {
  trainInst = trainSource.nextElement(train);
  trainingEvaluation.updatePriors(trainInst);
  testingEvaluation.updatePriors(trainInst);
  ((UpdateableClassifier)classifier).updateClassifier(trainInst);
      }
      trainTimeElapsed = System.currentTimeMillis() - trainTimeStart;
    } else if (objectInputFileName.length() == 0) {
      // Build classifier in one go
      tempTrain = trainSource.getDataSet(actualClassIndex);
      trainingEvaluation.setPriors(tempTrain);
      testingEvaluation.setPriors(tempTrain);
      trainTimeStart = System.currentTimeMillis();
      classifier.buildClassifier(tempTrain);
      trainTimeElapsed = System.currentTimeMillis() - trainTimeStart;
    } 


    // backup of fully trained classifier for printing the classifications
    if (printClassifications)
      classifierClassifications = Classifier.makeCopy(classifier);


    // Save the classifier if an object output file is provided
    if (objectOutputFileName.length() != 0) {
      OutputStream os = new FileOutputStream(objectOutputFileName);
      // binary
      if (!(objectOutputFileName.endsWith(".xml") || (objectOutputFileName.endsWith(".koml") && KOML.isPresent()))) {
  if (objectOutputFileName.endsWith(".gz")) {
    os = new GZIPOutputStream(os);
  }
  ObjectOutputStream objectOutputStream = new ObjectOutputStream(os);
  objectOutputStream.writeObject(classifier);
        if (template != null) {
          objectOutputStream.writeObject(template);
        }
  objectOutputStream.flush();
  objectOutputStream.close();
      }
      // KOML/XML
      else {
  BufferedOutputStream xmlOutputStream = new BufferedOutputStream(os);
  if (objectOutputFileName.endsWith(".xml")) {
    XMLSerialization xmlSerial = new XMLClassifier();
    xmlSerial.write(xmlOutputStream, classifier);
  }
  else
    // whether KOML is present has already been checked
    // if not present -> ".koml" is interpreted as binary - see above
    if (objectOutputFileName.endsWith(".koml")) {
      KOML.write(xmlOutputStream, classifier);
    }
  xmlOutputStream.close();
      }
    }


    // If classifier is drawable output string describing graph
    if ((classifier instanceof Drawable) && (printGraph)){
      return ((Drawable)classifier).graph();
    }


    // Output the classifier as equivalent source
    if ((classifier instanceof Sourcable) && (printSource)){
      return wekaStaticWrapper((Sourcable) classifier, sourceClass);
    }


    // Output model
    if (!(noOutput || printMargins)) {
      if (classifier instanceof OptionHandler) {
  if (schemeOptionsText != null) {
    text.append("\nOptions: "+schemeOptionsText);
    text.append("\n");
  }
      }
      text.append("\n" + classifier.toString() + "\n");
    }


    if (!printMargins && (costMatrix != null)) {
      text.append("\n=== Evaluation Cost Matrix ===\n\n");
      text.append(costMatrix.toString());
    }


    // Output test instance predictions only
    if (printClassifications) {
      DataSource source = testSource;
      predsBuff = new StringBuffer();
      // no test set -> use train set
      if (source == null && noCrossValidation) {
  source = trainSource;
        predsBuff.append("\n=== Predictions on training data ===\n\n");

View Full Code Here


    try {
      Instances i;
      // read from stdin and print statistics
      if (args.length == 0) {
  DataSource source = new DataSource(System.in);
  i = source.getDataSet();
  System.out.println(i.toSummaryString());
      }
      // read file and print statistics
      else if ((args.length == 1) && (!args[0].equals("-h")) && (!args[0].equals("help"))) {
  DataSource source = new DataSource(args[0]);
  i = source.getDataSet();
  System.out.println(i.toSummaryString());
      }
      // read two files, merge them and print result to stdout
      else if ((args.length == 3) && (args[0].toLowerCase().equals("merge"))) {
  DataSource source1 = new DataSource(args[1]);
  DataSource source2 = new DataSource(args[2]);
  i = Instances.mergeInstances(source1.getDataSet(), source2.getDataSet());
  System.out.println(i);
      }
      // read two files, append them and print result to stdout
      else if ((args.length == 3) && (args[0].toLowerCase().equals("append"))) {
  DataSource source1 = new DataSource(args[1]);
  DataSource source2 = new DataSource(args[2]);
  if (!source1.getStructure().equalHeaders(source2.getStructure()))
    throw new Exception("The two datasets have different headers!");
  Instances structure = source1.getStructure();
  System.out.println(source1.getStructure());
  while (source1.hasMoreElements(structure))
    System.out.println(source1.nextElement(structure));
  structure = source2.getStructure();
  while (source2.hasMoreElements(structure))
    System.out.println(source2.nextElement(structure));
      }
      // read two files and compare their headers
      else if ((args.length == 3) && (args[0].toLowerCase().equals("headers"))) {
  DataSource source1 = new DataSource(args[1]);
  DataSource source2 = new DataSource(args[2]);
  if (source1.getStructure().equalHeaders(source2.getStructure()))
    System.out.println("Headers match");
  else
    System.out.println("Headers don't match");
      }
      // read file and seed value, randomize data and print result to stdout
      else if ((args.length == 3) && (args[0].toLowerCase().equals("randomize"))) {
  DataSource source = new DataSource(args[2]);
  i = source.getDataSet();
  i.randomize(new Random(Integer.parseInt(args[1])));
  System.out.println(i);
      }
      // wrong parameters
      else {

View Full Code Here

    Instances testRaw = null;
    boolean hasClass = (test.classIndex() >= 0);
    int unclusteredInstances = 0;
    Vector<Double> clusterAssignments = new Vector<Double>();
    Filter filter = null;
    DataSource source = null;
    Instance inst;


    if (testFileName == null)
      testFileName = "";
    
    // load data
    if (testFileName.length() != 0)
      source = new DataSource(testFileName);
    else
      source = new DataSource(test);
    testRaw = source.getStructure(test.classIndex());
    
    // If class is set then do class based evaluation as well
    if (hasClass) {
      if (testRaw.classAttribute().isNumeric())
  throw new Exception("ClusterEvaluation: Class must be nominal!");


      filter = new Remove();
      ((Remove) filter).setAttributeIndices("" + (testRaw.classIndex() + 1));
      ((Remove) filter).setInvertSelection(false);
      filter.setInputFormat(testRaw);
    }
    
    i = 0;
    while (source.hasMoreElements(testRaw)) {
      // next instance
      inst = source.nextElement(testRaw);
      if (filter != null) {
  filter.input(inst);
  filter.batchFinished();
  inst = filter.output();
      }

View Full Code Here

    int numClasses = inst.classAttribute().numValues();
    int[][] counts = new int [m_numClusters][numClasses];
    int[] clusterTotals = new int[m_numClusters];
    double[] best = new double[m_numClusters+1];
    double[] current = new double[m_numClusters+1];
    DataSource source = null;
    Instances instances = null;
    Instance instance = null;
    int i;
    int numInstances;


    if (fileName == null)
      fileName = "";
    
    if (fileName.length() != 0)
      source = new DataSource(fileName);
    else
      source = new DataSource(inst);
    instances = source.getStructure(inst.classIndex());


    i = 0;
    while (source.hasMoreElements(instances)) {
      instance = source.nextElement(instances);
      if (m_clusterAssignments[i] >= 0) {
        counts[(int)m_clusterAssignments[i]][(int)instance.classValue()]++;
        clusterTotals[(int)m_clusterAssignments[i]]++;
      }
      i++;

View Full Code Here

    boolean printClusterAssignments = false;
    Range attributesToOutput = null;
    StringBuffer text = new StringBuffer();
    int theClass = -1; // class based evaluation of clustering
    boolean updateable = (clusterer instanceof UpdateableClusterer);
    DataSource source = null;
    Instance inst;


    if (Utils.getFlag('h', options) || Utils.getFlag("help", options)) {
      
      // global info requested as well?
      boolean globalInfo = Utils.getFlag("synopsis", options) ||
        Utils.getFlag("info", options);
      
      throw  new Exception("Help requested." 
          + makeOptionString(clusterer, globalInfo));
    }
    
    try {
      // Get basic options (options the same for all clusterers
      //printClusterAssignments = Utils.getFlag('p', options);
      objectInputFileName = Utils.getOption('l', options);
      objectOutputFileName = Utils.getOption('d', options);
      trainFileName = Utils.getOption('t', options);
      testFileName = Utils.getOption('T', options);
      graphFileName = Utils.getOption('g', options);


      // Check -p option
      try {
  attributeRangeString = Utils.getOption('p', options);
      }
      catch (Exception e) {
  throw new Exception(e.getMessage() + "\nNOTE: the -p option has changed. " +
          "It now expects a parameter specifying a range of attributes " +
          "to list with the predictions. Use '-p 0' for none.");
      }
      if (attributeRangeString.length() != 0) {
  printClusterAssignments = true;
  if (!attributeRangeString.equals("0")) 
    attributesToOutput = new Range(attributeRangeString);
      }


      if (trainFileName.length() == 0) {
        if (objectInputFileName.length() == 0) {
          throw  new Exception("No training file and no object " 
             + "input file given.");
        }


        if (testFileName.length() == 0) {
          throw  new Exception("No training file and no test file given.");
        }
      }
      else {
  if ((objectInputFileName.length() != 0) 
      && (printClusterAssignments == false)) {
    throw  new Exception("Can't use both train and model file " 
             + "unless -p specified.");
  }
      }


      seedString = Utils.getOption('s', options);


      if (seedString.length() != 0) {
  seed = Integer.parseInt(seedString);
      }


      foldsString = Utils.getOption('x', options);


      if (foldsString.length() != 0) {
  folds = Integer.parseInt(foldsString);
  doXval = true;
      }
    }
    catch (Exception e) {
      throw  new Exception('\n' + e.getMessage() 
         + makeOptionString(clusterer, false));
    }


    try {
      if (trainFileName.length() != 0) {
  source = new DataSource(trainFileName);
  train  = source.getStructure();


  String classString = Utils.getOption('c',options);
  if (classString.length() != 0) {
    if (classString.compareTo("last") == 0)
      theClass = train.numAttributes();
    else if (classString.compareTo("first") == 0)
      theClass = 1;
    else
      theClass = Integer.parseInt(classString);


    if (theClass != -1) {
      if (doXval || testFileName.length() != 0)
        throw new Exception("Can only do class based evaluation on the "
      +"training data");


      if (objectInputFileName.length() != 0)
        throw new Exception("Can't load a clusterer and do class based "
      +"evaluation");


      if (objectOutputFileName.length() != 0)
        throw new Exception(
      "Can't do class based evaluation and save clusterer");
    }
  }
  else {
    // if the dataset defines a class attribute, use it
    if (train.classIndex() != -1) {
      theClass = train.classIndex() + 1;
      System.err.println(
    "Note: using class attribute from dataset, i.e., attribute #" 
    + theClass);
    }
  }


  if (theClass != -1) {
    if (theClass < 1 || theClass > train.numAttributes())
      throw new Exception("Class is out of range!");


    if (!train.attribute(theClass - 1).isNominal())
      throw new Exception("Class must be nominal!");
    
    train.setClassIndex(theClass - 1);
  }
      }
    }
    catch (Exception e) {
      throw  new Exception("ClusterEvaluation: " + e.getMessage() + '.');
    }


    // Save options
    if (options != null) {
      savedOptions = new String[options.length];
      System.arraycopy(options, 0, savedOptions, 0, options.length);
    }


    if (objectInputFileName.length() != 0)
      Utils.checkForRemainingOptions(options);


    // Set options for clusterer
    if (clusterer instanceof OptionHandler)
      ((OptionHandler)clusterer).setOptions(options);


    Utils.checkForRemainingOptions(options);


    Instances trainHeader = train;
    if (objectInputFileName.length() != 0) {
      // Load the clusterer from file
      //      clusterer = (Clusterer) SerializationHelper.read(objectInputFileName);
      java.io.ObjectInputStream ois = 
        new java.io.ObjectInputStream(
        new java.io.BufferedInputStream(
        new java.io.FileInputStream(objectInputFileName)));
      clusterer = (Clusterer) ois.readObject();
      // try and get the training header
      try {
        trainHeader = (Instances) ois.readObject();
      } catch (Exception ex) {
        // don't moan if we cant
      }
    }
    else {
      // Build the clusterer if no object file provided
      if (theClass == -1) {
  if (updateable) {
    clusterer.buildClusterer(source.getStructure());
    while (source.hasMoreElements(train)) {
      inst = source.nextElement(train);
      ((UpdateableClusterer) clusterer).updateClusterer(inst);
    }
    ((UpdateableClusterer) clusterer).updateFinished();
  }
  else {
    clusterer.buildClusterer(source.getDataSet());
  }
      }
      else {
  Remove removeClass = new Remove();
  removeClass.setAttributeIndices("" + theClass);
  removeClass.setInvertSelection(false);
  removeClass.setInputFormat(train);
  if (updateable) {
    Instances clusterTrain = Filter.useFilter(train, removeClass);
    clusterer.buildClusterer(clusterTrain);
          trainHeader = clusterTrain;
    while (source.hasMoreElements(train)) {
      inst = source.nextElement(train);
      removeClass.input(inst);
      removeClass.batchFinished();
      Instance clusterTrainInst = removeClass.output();
      ((UpdateableClusterer) clusterer).updateClusterer(clusterTrainInst);
    }
    ((UpdateableClusterer) clusterer).updateFinished();
  }
  else {
    Instances clusterTrain = Filter.useFilter(source.getDataSet(), removeClass);
    clusterer.buildClusterer(clusterTrain);
          trainHeader = clusterTrain;
  }
  ClusterEvaluation ce = new ClusterEvaluation();
  ce.setClusterer(clusterer);
  ce.evaluateClusterer(train, trainFileName);
  
  return "\n\n=== Clustering stats for training data ===\n\n" +
    ce.clusterResultsToString();
      }
    }


    /* Output cluster predictions only (for the test data if specified,
       otherwise for the training data */
    if (printClusterAssignments) {
      return printClusterings(clusterer, trainFileName, testFileName, attributesToOutput);
    }


    text.append(clusterer.toString());
    text.append("\n\n=== Clustering stats for training data ===\n\n" 
    + printClusterStats(clusterer, trainFileName));


    if (testFileName.length() != 0) {
      // check header compatibility
      DataSource test = new DataSource(testFileName);
      Instances testStructure = test.getStructure();
      if (!trainHeader.equalHeaders(testStructure)) {
        throw new Exception("Training and testing data are not compatible");
      }


      text.append("\n\n=== Clustering stats for testing data ===\n\n"

View Full Code Here

    int cc = clusterer.numberOfClusters();
    double[] instanceStats = new double[cc];
    int unclusteredInstances = 0;


    if (fileName.length() != 0) {
      DataSource source = new DataSource(fileName);
      Instances structure = source.getStructure();
      Instance inst;
      while (source.hasMoreElements(structure)) {
  inst = source.nextElement(structure);
  try {
    cnum = clusterer.clusterInstance(inst);


    if (clusterer instanceof DensityBasedClusterer) {
      loglk += ((DensityBasedClusterer)clusterer).

View Full Code Here

    throws Exception {


    StringBuffer text = new StringBuffer();
    int i = 0;
    int cnum;
    DataSource source = null;
    Instance inst;
    Instances structure;
    
    if (testFileName.length() != 0)
      source = new DataSource(testFileName);
    else
      source = new DataSource(trainFileName);
    
    structure = source.getStructure();
    while (source.hasMoreElements(structure)) {
      inst = source.nextElement(structure);
      try {
  cnum = clusterer.clusterInstance(inst);
  
  text.append(i + " " + cnum + " "
      + attributeValuesString(inst, attributesToOutput) + "\n");

View Full Code Here

  protected Instances process(Instances instances) throws Exception {
    // initializing necessary?
    if (!m_Initialized) {
      // do we have a file to initialize with?
      if ((getInitFile() != null) && getInitFile().isFile()) {
  DataSource source = new DataSource(getInitFile().getAbsolutePath());
  Instances data = source.getDataSet();
  m_InitFileClassIndex.setUpper(data.numAttributes() - 1);
  data.setClassIndex(m_InitFileClassIndex.getIndex());
  initFilter(data);
      }
      else {

View Full Code Here

0 1 2 3 4 5 6

TOP

Related Classes of weka.core.converters.ConverterUtils.DataSource

com.deafgoat.ml.prognosticator.InstancesReader

cu.repsystestbed.tests.ARFFTest

cu.repsystestbed.util.DefaultArffFeedbackGenerator

cu.repsystestbed.util.ReputationGraphCreator

cu.repsystestbed.util.TrustGraphCreator

edu.uga.cs.fluxbuster.classification.Classifier

org.apache.ctakes.ytex.weka.WekaAttributeEvaluatorImpl

org.cspoker.ai.opponentmodels.weka.ARFFFile

weka.associations.AssociatorEvaluation

weka.attributeSelection.AttributeSelection

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.