Package weka.core

Examples of weka.core.Instances


  public synchronized void acceptDataSet(DataSetEvent e) {
    // ignore structure only events
    if (e.isStructureOnly()) {
      return;
    }
    m_visualizeDataSet = new Instances(e.getDataSet());
    if (m_visualizeDataSet.classIndex() < 0) {
      m_visualizeDataSet.setClassIndex(m_visualizeDataSet.numAttributes()-1);
    }
    if (!m_design) {
      try {
View Full Code Here


  System.err.println("Usage: DataVisualizer <dataset>");
  System.exit(1);
      }
      java.io.Reader r = new java.io.BufferedReader(
       new java.io.FileReader(args[0]));
      Instances inst = new Instances(r);
      final javax.swing.JFrame jf = new javax.swing.JFrame();
      jf.getContentPane().setLayout(new java.awt.BorderLayout());
      final DataVisualizer as = new DataVisualizer();
      as.setInstances(inst);
     
View Full Code Here

        if (getRetrieval() == BATCH) {
            throw new IOException("Cannot mix getting instances in both incremental and batch modes");
        }
        setRetrieval(NONE)
        m_datasetPseudoInc = getDataSet();
        m_structure = new Instances(m_datasetPseudoInc,0);
        setRetrieval(NONE);
        return m_structure;
    }
    if (m_structure == null) {
      if(m_checkForTable) {
        if(!m_DataBaseConnection.tableExists(endOfQuery(true)))
          throw new IOException(
              "Table does not exist according to metadata from JDBC driver. "
              + "If you are convinced the table exists, set 'checkForTable' "
              + "to 'False' in your DatabaseUtils.props file and try again.");
      }
        //finds out which SQL statement to use for the DBMS to limit the number of resulting rows to one
        int choice = 0;
        boolean rightChoice = false;
        while (!rightChoice){
            try{
                if (m_DataBaseConnection.execute(limitQuery(m_query,0,choice)) == false) {
                    throw new IOException("Query didn't produce results");
                }
                m_choice = choice;
                rightChoice = true;
            }
            catch (SQLException ex) {
                choice++;
                if(choice == 3){
                    System.out.println("Incremental loading not supported for that DBMS. Pseudoincremental mode is used if you use incremental loading.\nAll rows are loaded into memory once and retrieved incrementally from memory instead of from the database.");
                    m_pseudoIncremental = true;
                    break pseudo;
                }
            }
        }
        String end = endOfQuery(false);
        ResultSet rs = m_DataBaseConnection.getResultSet();
        ResultSetMetaData md = rs.getMetaData();
//        rs.close();
        int numAttributes = md.getColumnCount();
        int [] attributeTypes = new int [numAttributes];
        m_nominalIndexes = Utils.cast(new Hashtable [numAttributes]);
        m_nominalStrings = Utils.cast(new ArrayList [numAttributes]);
        for (int i = 1; i <= numAttributes; i++) {
            switch (m_DataBaseConnection.translateDBColumnType(md.getColumnTypeName(i))) {
                case DatabaseConnection.STRING :
                    //System.err.println("String --> nominal");
                    ResultSet rs1;
                    String columnName = md.getColumnLabel(i);
                    if(m_DataBaseConnection.getUpperCase())
                        columnName = columnName.toUpperCase();
                    m_nominalIndexes[i - 1] = new Hashtable<String,Double>();
                    m_nominalStrings[i - 1] = new ArrayList<String>();
                    String query = "SELECT COUNT(DISTINCT( "+columnName+" )) FROM " + end;
                    if (m_DataBaseConnection.execute(query) == true){
                        rs1 = m_DataBaseConnection.getResultSet();
                        rs1.next();
                        int count = rs1.getInt(1);
                        rs1.close();
                        //                        if(count > m_nominalToStringLimit || m_DataBaseConnection.execute("SELECT DISTINCT ( "+columnName+" ) FROM "+ end) == false){
                        if(count > m_nominalToStringLimit ||
                           m_DataBaseConnection.execute("SELECT DISTINCT ( "
                                                        + columnName
                                                        + " ) FROM "
                                                        + end
                                                        + " ORDER BY "
                                                        + columnName) == false){
                            attributeTypes[i - 1] = Attribute.STRING;
                            break;
                        }
                        rs1 = m_DataBaseConnection.getResultSet();
                    }
                    else{
                        //System.err.println("Count for nominal values cannot be calculated. Attribute "+columnName+" treated as String.");
                        attributeTypes[i - 1] = Attribute.STRING;
                        break;
                    }
                    attributeTypes[i - 1] = Attribute.NOMINAL;
                    stringToNominal(rs1,i);
                    rs1.close();
                    break;
                case DatabaseConnection.TEXT:
                    //System.err.println("boolean --> string");
                    columnName = md.getColumnLabel(i);
                    if(m_DataBaseConnection.getUpperCase())
                      columnName = columnName.toUpperCase();
                    m_nominalIndexes[i - 1] = new Hashtable<String,Double>();
                    m_nominalStrings[i - 1] = new ArrayList<String>();
                    query = "SELECT COUNT(DISTINCT( "+columnName+" )) FROM " + end;
                    if (m_DataBaseConnection.execute(query) == true){
                      rs1 = m_DataBaseConnection.getResultSet();
                      stringToNominal(rs1,i);
                      rs1.close();
                    }
                    attributeTypes[i - 1] = Attribute.STRING;
                    break;
                case DatabaseConnection.BOOL:
                    //System.err.println("boolean --> nominal");
                    attributeTypes[i - 1] = Attribute.NOMINAL;
                    m_nominalIndexes[i - 1] = new Hashtable<String,Double>();
                    m_nominalIndexes[i - 1].put("false", new Double(0));
                    m_nominalIndexes[i - 1].put("true", new Double(1));
                    m_nominalStrings[i - 1] = new ArrayList<String>();
                    m_nominalStrings[i - 1].add("false");
                    m_nominalStrings[i - 1].add("true");
                    break;
                case DatabaseConnection.DOUBLE:
                    //System.err.println("BigDecimal --> numeric");
                    attributeTypes[i - 1] = Attribute.NUMERIC;
                    break;
                case DatabaseConnection.BYTE:
                    //System.err.println("byte --> numeric");
                    attributeTypes[i - 1] = Attribute.NUMERIC;
                    break;
                case DatabaseConnection.SHORT:
                    //System.err.println("short --> numeric");
                    attributeTypes[i - 1] = Attribute.NUMERIC;
                    break;
                case DatabaseConnection.INTEGER:
                    //System.err.println("int --> numeric");
                    attributeTypes[i - 1] = Attribute.NUMERIC;
                    break;
                case DatabaseConnection.LONG:
                    //System.err.println("long --> numeric");
                    attributeTypes[i - 1] = Attribute.NUMERIC;
                    break;
                case DatabaseConnection.FLOAT:
                    //System.err.println("float --> numeric");
                    attributeTypes[i - 1] = Attribute.NUMERIC;
                    break;
                case DatabaseConnection.DATE:
                    attributeTypes[i - 1] = Attribute.DATE;
                    break;
                case DatabaseConnection.TIME:
                  attributeTypes[i - 1] = Attribute.DATE;
                  break;
                default:
                    //System.err.println("Unknown column type");
                    attributeTypes[i - 1] = Attribute.STRING;
            }
        }
        ArrayList<Attribute> attribInfo = new ArrayList<Attribute>();
        for (int i = 0; i < numAttributes; i++) {
            /* Fix for databases that uppercase column names */
            //String attribName = attributeCaseFix(md.getColumnName(i + 1));
            String attribName = md.getColumnLabel(i + 1);
            switch (attributeTypes[i]) {
                case Attribute.NOMINAL:
                    attribInfo.add(new Attribute(attribName, m_nominalStrings[i]));
                    break;
                case Attribute.NUMERIC:
                    attribInfo.add(new Attribute(attribName));
                    break;
                case Attribute.STRING:
                    Attribute att = new Attribute(attribName, (ArrayList<String>)null);
                    for (int n = 0; n < m_nominalStrings[i].size(); n++) {
                      att.addStringValue((String) m_nominalStrings[i].get(n));
                    }
                    attribInfo.add(att);
                    break;
                case Attribute.DATE:
                    attribInfo.add(new Attribute(attribName, (String)null));
                    break;
                default:
                    throw new IOException("Unknown attribute type");
            }
        }
        m_structure = new Instances(endOfQuery(true), attribInfo,0);
        //get rid of m_idColumn
        if(m_DataBaseConnection.getUpperCase())
              m_idColumn = m_idColumn.toUpperCase();
        //System.out.println(m_structure.attribute(0).name().equals(idColumn));
        if(m_structure.attribute(0).name().equals(m_idColumn)){
            m_oldStructure = new Instances(m_structure,0);
            m_oldStructure.deleteAttributeAt(0);
            //System.out.println(m_structure);
        }
        else
            m_oldStructure = new Instances(m_structure,0);
       
        if (m_DataBaseConnection.getResultSet() != null) {
          rs.close();
        }
    }
    else{
        if(m_oldStructure == null)
            m_oldStructure = new Instances(m_structure,0);
    }
    m_DataBaseConnection.disconnectFromDatabase();
    }
    catch(Exception ex) {
        ex.printStackTrace();
View Full Code Here

    }
    setRetrieval(BATCH);
    connectToDatabase();
   
   
    Instances result = null;
    try{
    if (m_DataBaseConnection.execute(m_query) == false)
      throw new Exception("Query didn't produce results");
    ResultSet rs = m_DataBaseConnection.getResultSet();
    ResultSetMetaData md = rs.getMetaData();
    // Determine structure of the instances
    int numAttributes = md.getColumnCount();
    int [] attributeTypes = new int [numAttributes];
    m_nominalIndexes = Utils.cast(new Hashtable [numAttributes]);
    m_nominalStrings = Utils.cast(new ArrayList [numAttributes]);
    for (int i = 1; i <= numAttributes; i++) {
      switch (m_DataBaseConnection.translateDBColumnType(md.getColumnTypeName(i))) {
 
      case DatabaseConnection.STRING :
        ResultSet rs1;
        String columnName = md.getColumnLabel(i);
        if(m_DataBaseConnection.getUpperCase())
            columnName = columnName.toUpperCase();
        String end = endOfQuery(false);
        m_nominalIndexes[i - 1] = new Hashtable<String,Double>();
        m_nominalStrings[i - 1] = new ArrayList<String>();
        if(m_DataBaseConnection.execute("SELECT DISTINCT ( "
                                        + columnName+" ) FROM "
                                        + end
                                        + " ORDER BY "
                                        + columnName) == false){
            throw new Exception("Nominal values cannot be retrieved");
        }
        rs1 = m_DataBaseConnection.getResultSet();
        attributeTypes[i - 1] = Attribute.NOMINAL;
        stringToNominal(rs1,i);
//        rs1.close(); 
  break;
      case DatabaseConnection.TEXT:
        columnName = md.getColumnLabel(i);
        if(m_DataBaseConnection.getUpperCase())
            columnName = columnName.toUpperCase();
        end = endOfQuery(false);
        m_nominalIndexes[i - 1] = new Hashtable<String,Double>();
        m_nominalStrings[i - 1] = new ArrayList<String>();
        if(m_DataBaseConnection.execute("SELECT DISTINCT ( "+columnName+" ) FROM "+ end) == false){
            throw new Exception("Nominal values cannot be retrieved");
        }
        rs1 = m_DataBaseConnection.getResultSet();
        attributeTypes[i - 1] = Attribute.STRING;
        stringToNominal(rs1,i);
        rs1.close()
  break;
      case DatabaseConnection.BOOL:
  //System.err.println("boolean --> nominal");
  attributeTypes[i - 1] = Attribute.NOMINAL;
  m_nominalIndexes[i - 1] = new Hashtable<String,Double>();
  m_nominalIndexes[i - 1].put("false", new Double(0));
  m_nominalIndexes[i - 1].put("true", new Double(1));
  m_nominalStrings[i - 1] = new ArrayList<String>();
  m_nominalStrings[i - 1].add("false");
  m_nominalStrings[i - 1].add("true");
  break;
      case DatabaseConnection.DOUBLE:
  //System.err.println("BigDecimal --> numeric");
  attributeTypes[i - 1] = Attribute.NUMERIC;
  break;
      case DatabaseConnection.BYTE:
  //System.err.println("byte --> numeric");
  attributeTypes[i - 1] = Attribute.NUMERIC;
  break;
      case DatabaseConnection.SHORT:
  //System.err.println("short --> numeric");
  attributeTypes[i - 1] = Attribute.NUMERIC;
  break;
      case DatabaseConnection.INTEGER:
  //System.err.println("int --> numeric");
  attributeTypes[i - 1] = Attribute.NUMERIC;
  break;
      case DatabaseConnection.LONG:
  //System.err.println("long --> numeric");
  attributeTypes[i - 1] = Attribute.NUMERIC;
  break;
      case DatabaseConnection.FLOAT:
  //System.err.println("float --> numeric");
  attributeTypes[i - 1] = Attribute.NUMERIC;
  break;
      case DatabaseConnection.DATE:
  attributeTypes[i - 1] = Attribute.DATE;
  break;
      case DatabaseConnection.TIME:
  attributeTypes[i - 1] = Attribute.DATE;
  break;
      default:
  //System.err.println("Unknown column type");
  attributeTypes[i - 1] = Attribute.STRING;
      }
    }
   
    // For sqlite
    // cache column names because the last while(rs.next()) { iteration for
    // the tuples below will close the md object: 
    Vector<String> columnNames = new Vector<String>();
    for (int i = 0; i < numAttributes; i++) {
      columnNames.add(md.getColumnLabel(i + 1));
    }

    // Step through the tuples
    //System.err.println("Creating instances...");
    ArrayList<Instance> instances = new ArrayList<Instance>();
    while(rs.next()) {
      double[] vals = new double[numAttributes];
      for(int i = 1; i <= numAttributes; i++) {
  switch (m_DataBaseConnection.translateDBColumnType(md.getColumnTypeName(i))) {
  case DatabaseConnection.STRING :
    String str = rs.getString(i);
   
    if (rs.wasNull()) {
      vals[i - 1] = Utils.missingValue();
            } else {
                Double index = (Double)m_nominalIndexes[i - 1].get(str);
                if (index == null) {
                    index = new Double(m_structure.attribute(i-1).addStringValue(str));
                }
                vals[i - 1] = index.doubleValue();
            }
    break;
  case DatabaseConnection.TEXT:
    str = rs.getString(i);

    if (rs.wasNull()) {
      vals[i - 1] = Utils.missingValue();
    }
    else {
      Double index = (Double)m_nominalIndexes[i - 1].get(str);
      if (index == null) {
        index = new Double(m_structure.attribute(i-1).addStringValue(str));
      }
      vals[i - 1] = index.doubleValue();
    }
    break;
  case DatabaseConnection.BOOL:
    boolean boo = rs.getBoolean(i);
    if (rs.wasNull()) {
      vals[i - 1] = Utils.missingValue();
    } else {
      vals[i - 1] = (boo ? 1.0 : 0.0);
    }
    break;
  case DatabaseConnection.DOUBLE:
    double dd = rs.getDouble(i);
    if (rs.wasNull()) {
      vals[i - 1] = Utils.missingValue();
    } else {
      vals[i - 1] =  dd;
    }
    break;
  case DatabaseConnection.BYTE:
    byte by = rs.getByte(i);
    if (rs.wasNull()) {
      vals[i - 1] = Utils.missingValue();
    } else {
      vals[i - 1] = (double)by;
    }
    break;
  case DatabaseConnection.SHORT:
    short sh = rs.getShort(i);
    if (rs.wasNull()) {
      vals[i - 1] = Utils.missingValue();
    } else {
      vals[i - 1] = (double)sh;
    }
    break;
  case DatabaseConnection.INTEGER:
    int in = rs.getInt(i);
    if (rs.wasNull()) {
      vals[i - 1] = Utils.missingValue();
    } else {
      vals[i - 1] = (double)in;
    }
    break;
  case DatabaseConnection.LONG:
    long lo = rs.getLong(i);
    if (rs.wasNull()) {
      vals[i - 1] = Utils.missingValue();
    } else {
      vals[i - 1] = (double)lo;
    }
    break;
  case DatabaseConnection.FLOAT:
    float fl = rs.getFloat(i);
    if (rs.wasNull()) {
      vals[i - 1] = Utils.missingValue();
    } else {
      vals[i - 1] = (double)fl;
    }
    break;
  case DatabaseConnection.DATE:
          Date date = rs.getDate(i);
          if (rs.wasNull()) {
      vals[i - 1] = Utils.missingValue();
    } else {
            // TODO: Do a value check here.
            vals[i - 1] = (double)date.getTime();
          }
          break;
  case DatabaseConnection.TIME:
          Time time = rs.getTime(i);
          if (rs.wasNull()) {
      vals[i - 1] = Utils.missingValue();
    } else {
            // TODO: Do a value check here.
            vals[i - 1] = (double) time.getTime();
          }
          break;
  default:
    vals[i - 1] = Utils.missingValue();
  }
      }
      Instance newInst;
      newInst = new DenseInstance(1.0, vals);
      instances.add(newInst);
    }  
   
    // Create the header and add the instances to the dataset
    //System.err.println("Creating header...");
    ArrayList<Attribute> attribInfo = new ArrayList<Attribute>();
    for (int i = 0; i < numAttributes; i++) {
      /* Fix for databases that uppercase column names */
      //String attribName = attributeCaseFix(md.getColumnName(i + 1));
//      String attribName = md.getColumnName(i + 1);
      String attribName = columnNames.get(i);
      switch (attributeTypes[i]) {
      case Attribute.NOMINAL:
  attribInfo.add(new Attribute(attribName, m_nominalStrings[i]));
  break;
      case Attribute.NUMERIC:
  attribInfo.add(new Attribute(attribName));
  break;
      case Attribute.STRING:
  Attribute att = new Attribute(attribName, (ArrayList<String>) null);
  attribInfo.add(att);
  for (int n = 0; n < m_nominalStrings[i].size(); n++) {
    att.addStringValue((String) m_nominalStrings[i].get(n));
  }
  break;
      case Attribute.DATE:
  attribInfo.add(new Attribute(attribName, (String)null));
  break;
      default:
  throw new IOException("Unknown attribute type");
      }
    }
    result = new Instances(endOfQuery(true), attribInfo,
             instances.size());
    for (int i = 0; i < instances.size(); i++) {
      result.add((Instance)instances.get(i));
    }
   
    rs.close();
   
    m_DataBaseConnection.disconnectFromDatabase();
    //get rid of m_idColumn
    if(m_DataBaseConnection.getUpperCase())
        m_idColumn = m_idColumn.toUpperCase();
    if(result.attribute(0).name().equals(m_idColumn)){
        result.deleteAttributeAt(0);
    }
    m_structure = new Instances(result,0);
    }
    catch(Exception ex) {
  printException(ex);
        StringBuffer text = new StringBuffer();
        if(m_query.equals("Select * from Results0")){
View Full Code Here

        atf.setOptions(options);
        atf.setSource(atf.getUrl(), atf.getUser(), atf.getPassword());
        if(!atf.m_inc)
            System.out.println(atf.getDataSet());
        else{
            Instances structure = atf.getStructure();
            System.out.println(structure);
            Instance temp;
            do {
            temp = atf.getNextInstance(structure);
            if (temp != null) {
View Full Code Here

    // nothing to do here
  }
   
  public static void main(String[] args) {
    try {
      Instances train = new Instances(new java.io.BufferedReader(new java.io.FileReader(args[0])));
      train.setClassIndex(train.numAttributes() - 1);
      weka.classifiers.evaluation.ThresholdCurve tc =
        new weka.classifiers.evaluation.ThresholdCurve();
      weka.classifiers.evaluation.EvaluationUtils eu =
        new weka.classifiers.evaluation.EvaluationUtils();
      //weka.classifiers.Classifier classifier = new weka.classifiers.functions.Logistic();
      weka.classifiers.Classifier classifier = new weka.classifiers.bayes.NaiveBayes();
      FastVector predictions = new FastVector();
      eu.setSeed(1);
      predictions.appendElements(eu.getCVPredictions(classifier, train, 10));
      Instances result = tc.getCurve(predictions, 0);
      PlotData2D pd = new PlotData2D(result);
      pd.m_alwaysDisplayPointsOfThisSize = 10;

      boolean[] connectPoints = new boolean[result.numInstances()];
      for (int i = 1; i < connectPoints.length; i++) {
        connectPoints[i] = true;
      }
      pd.setConnectPoints(connectPoints);
      final javax.swing.JFrame jf =
View Full Code Here

      } catch (Exception ex) {
  throw new IOException("Unable to determine structure as arff (Reason: " + ex.toString() + ").");
      }
    }

    return new Instances(m_structure, 0);
  }
View Full Code Here

    // Read all instances
    Instance inst;
    while ((inst = m_ArffReader.readInstance(m_structure)) != null)
      m_structure.add(inst);
   
    Instances readIn = new Instances(m_structure);

    // close the stream
    m_sourceReader.close();
   
    return readIn;
View Full Code Here

      updateInfoGivenIndex(maxMinIndex);
    }
   
    private void updateCostBenefit() {
      double value = (double)m_thresholdSlider.getValue() / 100.0;
      Instances plotInstances = m_masterPlot.getPlotInstances();
      int indexOfSampleSize =
        m_masterPlot.getPlotInstances().attribute(ThresholdCurve.SAMPLE_SIZE_NAME).index();
      int indexOfPercOfTarget =
        m_masterPlot.getPlotInstances().attribute(ThresholdCurve.RECALL_NAME).index();
      int indexOfThreshold =
View Full Code Here

      m_classificationAccV.
        setText(Utils.doubleToString((tp + tn) / (totalPos + totalNeg) * 100.0, 4) + "%");     
    }
   
    private void updateInfoGivenIndex(int index) {
      Instances plotInstances = m_masterPlot.getPlotInstances();
      int indexOfSampleSize =
        m_masterPlot.getPlotInstances().attribute(ThresholdCurve.SAMPLE_SIZE_NAME).index();
      int indexOfPercOfTarget =
        m_masterPlot.getPlotInstances().attribute(ThresholdCurve.RECALL_NAME).index();
      int indexOfThreshold =
        m_masterPlot.getPlotInstances().attribute(ThresholdCurve.THRESHOLD_NAME).index();
     
      // update labels
      m_percPopLab.setText(Utils.
          doubleToString(100.0 * plotInstances.instance(index).value(indexOfSampleSize), 4));
      m_percOfTargetLab.setText(Utils.doubleToString(
          100.0 * plotInstances.instance(index).value(indexOfPercOfTarget), 4));
      m_thresholdLab.setText(Utils.doubleToString(plotInstances.instance(index).value(indexOfThreshold), 4));
      /*if (m_percPop.isSelected()) {
        m_percPopLab.setText(Utils.doubleToString(100.0 * value, 4));
      } else if (m_percOfTarget.isSelected()) {
        m_percOfTargetLab.setText(Utils.doubleToString(100.0 * value, 4));
      } else {
        m_thresholdLab.setText(Utils.doubleToString(value, 4));
      }*/
     
      // Update the highlighted point on the graphs */
      if (m_previousShapeIndex >= 0) {
        m_shapeSizes[m_previousShapeIndex] = 1;
      }
    
      m_shapeSizes[index] = 10;
      m_previousShapeIndex = index;
     
      // Update the confusion matrix
//      double totalInstances =
      int tp = plotInstances.attribute(ThresholdCurve.TRUE_POS_NAME).index();
      int fp = plotInstances.attribute(ThresholdCurve.FALSE_POS_NAME).index();
      int tn = plotInstances.attribute(ThresholdCurve.TRUE_NEG_NAME).index();
      int fn = plotInstances.attribute(ThresholdCurve.FALSE_NEG_NAME).index();
      Instance temp = plotInstances.instance(index);
      double totalInstances = temp.value(tp) + temp.value(fp) + temp.value(tn) + temp.value(fn);
      // get the value out of the total pop field (if possible)
      double requestedPopSize = totalInstances;
      try {
        requestedPopSize = Double.parseDouble(m_totalPopField.getText());
View Full Code Here

TOP

Related Classes of weka.core.Instances

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.