Package weka.core

Examples of weka.core.FastVector$FastVectorEnumeration


    m_outputs = new NeuralEnd[0];
    m_inputs = new NeuralEnd[0];
    m_numAttributes = 0;
    m_numClasses = 0;
    m_neuralNodes = new NeuralConnection[0];
    m_selected = new FastVector(4);
    m_graphers = new FastVector(2);
    m_nextId = 0;
    m_stopIt = true;
    m_stopped = true;
    m_accepted = false;
    m_numeric = false;
View Full Code Here


    m_inputs = new NeuralEnd[0];
    m_numAttributes = 0;
    m_numClasses = 0;
    m_neuralNodes = new NeuralConnection[0];
   
    m_selected = new FastVector(4);
    m_graphers = new FastVector(2);
    m_nextId = 0;
    m_stopIt = true;
    m_stopped = true;
    m_accepted = false;   
    m_instances = new Instances(i);
View Full Code Here

   * Builds the classifier.
   */
  private void buildClassifier() throws Exception {

    // Generate input format for classifier
    FastVector atts = new FastVector();
    for (int i = 0; i < getInputFormat().numAttributes(); i++) {
      if (i == documentAtt) {
        atts.addElement(new Attribute("Term_frequency")); // 2
        atts.addElement(new Attribute("IDF")); //
        atts.addElement(new Attribute("TFxIDF")); //
        atts.addElement(new Attribute("First_occurrence")); //
        atts.addElement(new Attribute("Last_occurrence")); //
        atts.addElement(new Attribute("Spread")); //
        atts.addElement(new Attribute("Domain_keyphraseness")); //
        atts.addElement(new Attribute("Length")); //
        atts.addElement(new Attribute("Generality")); //
        atts.addElement(new Attribute("Node_degree")); //
        atts.addElement(new Attribute("Semantic_relatedness")); //
        atts.addElement(new Attribute("Wikipedia_keyphraseness")); //
        atts.addElement(new Attribute("Inverse_Wikip_frequency")); //
        atts.addElement(new Attribute("Total_Wikip_keyphraseness")); // 13

      } else if (i == keyphrasesAtt) {
        if (nominalClassValue) {
          FastVector vals = new FastVector(2);
          vals.addElement("False");
          vals.addElement("True");
          atts.addElement(new Attribute("Keyphrase?", vals));
        } else {
          atts.addElement(new Attribute("Keyphrase?"));
        }
      }
View Full Code Here

    if (debugMode) {
      System.err.println("--- Converting pending instances");
    }

    // Create output format for filter
    FastVector atts = new FastVector();
    for (int i = 1; i < getInputFormat().numAttributes(); i++) {
      if (i == documentAtt) {
        atts.addElement(new Attribute("Candidate_name",
            (FastVector) null)); // 0
        atts.addElement(new Attribute("Candidate_original",
            (FastVector) null)); // 1
        atts.addElement(new Attribute("Term_frequency")); // 2
        atts.addElement(new Attribute("IDF")); // 3
        atts.addElement(new Attribute("TFxIDF")); // 4
        atts.addElement(new Attribute("First_occurrence")); // 5
        atts.addElement(new Attribute("Last_occurrence")); // 6
        atts.addElement(new Attribute("Spread")); // 7
        atts.addElement(new Attribute("Domain_keyphraseness")); // 8
        atts.addElement(new Attribute("Length")); // 9
        atts.addElement(new Attribute("Generality")); // 10
        atts.addElement(new Attribute("Node_degree")); // 11
        atts.addElement(new Attribute("Semantic_relatedness")); // 12
        atts.addElement(new Attribute("Wikipedia_keyphraseness")); // 13
        atts.addElement(new Attribute("Inverse_Wikip_frequency")); // 14
        atts.addElement(new Attribute("Total_Wikip_keyphraseness")); // 15

        atts.addElement(new Attribute("Probability")); // 16
        atts.addElement(new Attribute("Rank")); // 17

      } else if (i == keyphrasesAtt) {
        if (nominalClassValue) {
          FastVector vals = new FastVector(2);
          vals.addElement("False");
          vals.addElement("True");
          atts.addElement(new Attribute("Keyphrase?", vals));
        } else {
          atts.addElement(new Attribute("Keyphrase?"));
        }
      } else {
        atts.addElement(getInputFormat().attribute(i));
      }
    }

    Instances outFormat = new Instances("mauidata", atts, 0);
    setOutputFormat(outFormat);

    // Convert pending input instances into output data
    for (int i = 0; i < getInputFormat().numInstances(); i++) {
      Instance current = getInputFormat().instance(i);
      FastVector vector = convertInstance(current, true);
      Enumeration en = vector.elements();
      while (en.hasMoreElements()) {
        Instance inst = (Instance) en.nextElement();
        push(inst);
      }
    }
View Full Code Here

   * Converts an instance.
   */
  private FastVector convertInstance(Instance instance, boolean training)
  throws Exception {

    FastVector vector = new FastVector();

    String fileName = instance.stringValue(fileNameAtt);

    if (debugMode) {
      System.err.println("-- Converting instance for document "
          + fileName);
    }

    // Get the key phrases for the document
    HashMap<String, Counter> hashKeyphrases = null;

    if (!instance.isMissing(keyphrasesAtt)) {
      String keyphrases = instance.stringValue(keyphrasesAtt);
      hashKeyphrases = getGivenKeyphrases(keyphrases);
    }

    // Get the document text
    String documentText = instance.stringValue(documentAtt);

    // Compute the candidate topics
    HashMap<String, Candidate> candidateList;
    if (allCandidates != null && allCandidates.containsKey(instance)) {
      candidateList = allCandidates.get(instance);
    } else {
      candidateList = getCandidates(documentText);
    }

    System.err.println(candidateList.size() + " candidates ");

    // Set indices for key attributes
    int tfidfAttIndex = documentAtt + 2;
    int distAttIndex = documentAtt + 3;
    int probsAttIndex = documentAtt + numFeatures;

    int countPos = 0;
    int countNeg = 0;

    // Go through the phrases and convert them into instances
    for (Candidate candidate : candidateList.values()) {

      if (candidate.getFrequency() < minOccurFrequency) {
        continue;
      }

      String name = candidate.getName();
      String orig = candidate.getBestFullForm();
      if (!vocabularyName.equals("none")) {
        orig = candidate.getTitle();
      }

      double[] vals = computeFeatureValues(candidate, training,
          hashKeyphrases, candidateList);

      Instance inst = new Instance(instance.weight(), vals);

      inst.setDataset(classifierData);

      // Get probability of a phrase being key phrase
      double[] probs = classifier.distributionForInstance(inst);

      double prob = probs[0];
      if (nominalClassValue) {
        prob = probs[1];
      }

      // Compute attribute values for final instance
      double[] newInst = new double[instance.numAttributes()
                                    + numFeatures + 2];

      int pos = 0;
      for (int i = 1; i < instance.numAttributes(); i++) {

        if (i == documentAtt) {

          // output of values for a given phrase:

          // Add phrase
          int index = outputFormatPeek().attribute(pos)
          .addStringValue(name);
          newInst[pos++] = index;

          // Add original version
          if (orig != null) {
            index = outputFormatPeek().attribute(pos)
            .addStringValue(orig);
          } else {
            index = outputFormatPeek().attribute(pos)
            .addStringValue(name);
          }

          newInst[pos++] = index;

          // Add features
          newInst[pos++] = inst.value(tfIndex);
          newInst[pos++] = inst.value(idfIndex);
          newInst[pos++] = inst.value(tfidfIndex);
          newInst[pos++] = inst.value(firstOccurIndex);
          newInst[pos++] = inst.value(lastOccurIndex);
          newInst[pos++] = inst.value(spreadOccurIndex);
          newInst[pos++] = inst.value(domainKeyphIndex);
          newInst[pos++] = inst.value(lengthIndex);
          newInst[pos++] = inst.value(generalityIndex);
          newInst[pos++] = inst.value(nodeDegreeIndex);
          newInst[pos++] = inst.value(semRelIndex);
          newInst[pos++] = inst.value(wikipKeyphrIndex);
          newInst[pos++] = inst.value(invWikipFreqIndex);
          newInst[pos++] = inst.value(totalWikipKeyphrIndex);

          // Add probability
          probsAttIndex = pos;
          newInst[pos++] = prob;

          // Set rank to missing (computed below)
          newInst[pos++] = Instance.missingValue();

        } else if (i == keyphrasesAtt) {
          newInst[pos++] = inst.classValue();
        } else {
          newInst[pos++] = instance.value(i);
        }
      }

      Instance ins = new Instance(instance.weight(), newInst);
      ins.setDataset(outputFormatPeek());
      vector.addElement(ins);

      if (inst.classValue() == 0) {
        countNeg++;
      } else {
        countPos++;
      }
    }

    System.err.println(countPos + " positive; " + countNeg
        + " negative instances");

    // Sort phrases according to their distance (stable sort)
    double[] vals = new double[vector.size()];
    for (int i = 0; i < vals.length; i++) {
      vals[i] = ((Instance) vector.elementAt(i)).value(distAttIndex);
    }
    FastVector newVector = new FastVector(vector.size());
    int[] sortedIndices = Utils.stableSort(vals);
    for (int i = 0; i < vals.length; i++) {
      newVector.addElement(vector.elementAt(sortedIndices[i]));
    }
    vector = newVector;

    // Sort phrases according to their tfxidf value (stable sort)
    for (int i = 0; i < vals.length; i++) {
      vals[i] = -((Instance) vector.elementAt(i)).value(tfidfAttIndex);
    }
    newVector = new FastVector(vector.size());
    sortedIndices = Utils.stableSort(vals);
    for (int i = 0; i < vals.length; i++) {
      newVector.addElement(vector.elementAt(sortedIndices[i]));
    }
    vector = newVector;

    // Sort phrases according to their probability (stable sort)
    for (int i = 0; i < vals.length; i++) {
      vals[i] = 1 - ((Instance) vector.elementAt(i)).value(probsAttIndex);
    }
    newVector = new FastVector(vector.size());
    sortedIndices = Utils.stableSort(vals);
    for (int i = 0; i < vals.length; i++) {
      newVector.addElement(vector.elementAt(sortedIndices[i]));
    }
    vector = newVector;

    // Compute rank of phrases. Check for subphrases that are ranked
    // lower than superphrases and assign probability -1 and set the
View Full Code Here

      bufferInput(instance);
      return false;

    } else {

      FastVector vector = convertInstance(instance, false);
      Enumeration<Instance> en = vector.elements();
      while (en.hasMoreElements()) {
        Instance inst = en.nextElement();
        push(inst);
      }
      return true;
View Full Code Here

    if (fileNames.size() == 0) {
      throw new Exception("Couldn't find any data in "
          + inputDirectoryName);
    }

    FastVector atts = new FastVector(3);
    atts.addElement(new Attribute("filename", (FastVector) null));
    atts.addElement(new Attribute("document", (FastVector) null));
    atts.addElement(new Attribute("keyphrases", (FastVector) null));
    Instances data = new Instances("keyphrase_training_data", atts, 0);

    // Build model
    mauiFilter = new MauiFilter();
View Full Code Here

    protected void fitLogistic(Instances insts, int cl1, int cl2,
           int numFolds, Random random)
      throws Exception {

      // Create header of instances object
      FastVector atts = new FastVector(2);
      atts.addElement(new Attribute("pred"));
      FastVector attVals = new FastVector(2);
      attVals.addElement(insts.classAttribute().value(cl1));
      attVals.addElement(insts.classAttribute().value(cl2));
      atts.addElement(new Attribute("class", attVals));
      Instances data = new Instances("data", atts, insts.numInstances());
      data.setClassIndex(1);

      // Collect data for fitting the logistic model
View Full Code Here

   * @see   #hasImmediateOutputFormat()
   * @see   #batchFinished()
   */
  protected Instances determineOutputFormat(Instances inputFormat) throws Exception {
    Instances  result;
    FastVector  atts;
    int    i;
    FastVector  values;
    int    classindex;

    classindex = -1;

    // copy old attributes
    atts = new FastVector();
    for (i = 0; i < inputFormat.numAttributes(); i++) {
      // remove class?
      if ((i == inputFormat.classIndex()) && (getRemoveOldClass()) )
  continue;
      // record class index
      if (i == inputFormat.classIndex())
  classindex = i;
      atts.addElement(inputFormat.attribute(i).copy());
    }

    // add new attributes
    // 1. classification?
    if (getOutputClassification()) {
      // if old class got removed, use this one
      if (classindex == -1)
  classindex = atts.size();
      atts.addElement(inputFormat.classAttribute().copy("classification"));
    }

    // 2. distribution?
    if (getOutputDistribution()) {
      if (inputFormat.classAttribute().isNominal()) {
  for (i = 0; i < inputFormat.classAttribute().numValues(); i++) {
    atts.addElement(new Attribute("distribution_" + inputFormat.classAttribute().value(i)));
  }
      }
      else {
  atts.addElement(new Attribute("distribution"));
      }
    }

    // 2. error flag?
    if (getOutputErrorFlag()) {
      if (inputFormat.classAttribute().isNominal()) {
  values = new FastVector();
  values.addElement("no");
  values.addElement("yes");
  atts.addElement(new Attribute("error", values));
      }
      else {
  atts.addElement(new Attribute("error"));
      }
View Full Code Here

    if (m_SelectedAttributes == null) {
      setOutputFormat(null);
      return;
    }

    FastVector attributes = new FastVector(m_SelectedAttributes.length);

    int i;
    if (m_ASEvaluator instanceof AttributeTransformer) {
      informat = ((AttributeTransformer)m_ASEvaluator).transformedHeader();
    } else {
      informat = getInputFormat();
    }

    for (i=0;i < m_SelectedAttributes.length;i++) {
      attributes.
  addElement(informat.attribute(m_SelectedAttributes[i]).copy());
    }

    Instances outputFormat =
      new Instances(getInputFormat().relationName(), attributes, 0);
View Full Code Here

TOP

Related Classes of weka.core.FastVector$FastVectorEnumeration

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.