Package cc.mallet.types

Examples of cc.mallet.types.Instance


        if (lalph == null) lalph = new LabelAlphabet();
        Pipe noop = new Noop(alph, lalph);
        InstanceList newInstances = new InstanceList(noop);
        for (int j = 0; j < oldInstances.size(); j++) {
          int label = clustering.getLabel(j);
          Instance instance = oldInstances.get(j);
          if (clustering.size(label) >= minClusterSize.value)
            newInstances.add(noop.pipe(new Instance(instance.getData(), lalph.lookupLabel(new Integer(label)), instance.getName(), instance.getSource())));
        }
        clusterings.set(i, createSmallerClustering(newInstances));
      }
      if (outputPrefixFile.value != null) {
        try {
          ObjectOutputStream oos =
            new ObjectOutputStream(new FileOutputStream(outputPrefixFile.value));
          oos.writeObject(clusterings);
          oos.close();
        } catch (Exception e) {
          logger.warning("Exception writing clustering to file " + outputPrefixFile.value                        + " " + e);
          e.printStackTrace();
        }
      }
    }
   
   
    // Split into training/testing
    if (trainingProportion.value > 0) {
      if (clusterings.size() > 1)
        throw new IllegalArgumentException("Expect one clustering to do train/test split, not " + clusterings.size());
      Clustering clustering = clusterings.get(0);
      int targetTrainSize = (int)(trainingProportion.value * clustering.getNumInstances());
      TIntHashSet clustersSampled = new TIntHashSet();
      Randoms random = new Randoms(123);
      LabelAlphabet lalph = new LabelAlphabet();
      InstanceList trainingInstances = new InstanceList(new Noop(null, lalph));
      while (trainingInstances.size() < targetTrainSize) {
        int cluster = random.nextInt(clustering.getNumClusters());
        if (!clustersSampled.contains(cluster)) {
          clustersSampled.add(cluster);
          InstanceList instances = clustering.getCluster(cluster);
          for (int i = 0; i < instances.size(); i++) {
            Instance inst = instances.get(i);
            trainingInstances.add(new Instance(inst.getData(), lalph.lookupLabel(new Integer(cluster)), inst.getName(), inst.getSource()));
          }
        }
      }
      trainingInstances.shuffle(random);
      Clustering trainingClustering = createSmallerClustering(trainingInstances);
     
      InstanceList testingInstances = new InstanceList(null, lalph);
      for (int i = 0; i < clustering.getNumClusters(); i++) {
        if (!clustersSampled.contains(i)) {
          InstanceList instances = clustering.getCluster(i);
          for (int j = 0; j < instances.size(); j++) {
            Instance inst = instances.get(j);
            testingInstances.add(new Instance(inst.getData(), lalph.lookupLabel(new Integer(i)), inst.getName(), inst.getSource()));
          }         
        }
      }
      testingInstances.shuffle(random);
      Clustering testingClustering = createSmallerClustering(testingInstances);
View Full Code Here


          fi++;


          File record = records.get(k);
          labels.add(label);
          instances.add(new Instance(new Record(fieldAlph, valueAlph, parseFile(record)),
                        new Integer(label), record.toString(),
                        record.toString()));
        }
      }
      clusterings[i] =
View Full Code Here

      uri = "csvline:"+reader.getLineNumber();
    } else {
      uri = uriStr;
    }
    assert (data != null);
    Instance carrier = new Instance (data, target, uri, null);
    try {
      this.currentLine = reader.readLine();
    } catch (IOException e) {
      throw new IllegalStateException ();
    }
View Full Code Here

  public Instance next ()
  {
    URI uri = null;
    try { uri = new URI ("array:" + index++); }
    catch (Exception e) { e.printStackTrace(); throw new IllegalStateException(); }
    return new Instance (subIterator.next(), target, uri, null);
  }
View Full Code Here

      if (m.find ()){
        targetName = m.group (1);
      }
    }

    return new Instance (nextFile, targetName, nextFile.toURI(), null);
  }
View Full Code Here

  public Instance next ()
  {
    URI uri = null;
    try { uri = new URI ("array:" + index++); }
    catch (Exception e) { e.printStackTrace(); throw new IllegalStateException(); }
    return new Instance (subIterator.next(), targetIterator.next(), uri, null);
  }
View Full Code Here

  public Instance next ()
  {
    URI uri = null;
    try { uri = new URI ("array:" + index); }
    catch (Exception e) { e.printStackTrace(); throw new IllegalStateException(); }
    return new Instance (data[index++], null, uri, null);
  }
View Full Code Here

    //xxx Producing small numbers? int randomSize = r.nextPoisson (featureVectorSizePoissonLambda);
    int randomSize = (int)featureVectorSizePoissonLambda;
    TokenSequence ts = classCentroid[currentClassIndex].randomTokenSequence (r, randomSize);
    //logger.fine ("FeatureVector "+currentClassIndex+" "+currentInstanceIndex); fv.print();
    currentInstanceIndex--;
    return new Instance (ts, classNames[currentClassIndex], uri, null);
  }
View Full Code Here

    }
   
    public Double call() throws Exception {
      double value = 0;
      for (int ii = start; ii < end; ii++) {
        Instance inst = trainingSet.get(ii);
        Sequence input = (Sequence) inst.getData();
        // logZ     
        value -= new SumLatticePR(crf, ii, input, null, modelCopy, cachedDots[ii], true, null, null, false).getTotalWeight();
      }
      return value;
    }
View Full Code Here

          clusteredErrorLabelVectors[i][j] = new ArrayList();
        }

      for (int i = 0; i < theTrainingData.size(); i++) {
        logger.info ("instance="+i);
        Instance instance = theTrainingData.get(i);
        Sequence input = (Sequence) instance.getData();
        Sequence trueOutput = (Sequence) instance.getTarget();
        assert (input.size() == trueOutput.size());
        SumLattice lattice =
          crf.sumLatticeFactory.newSumLattice (crf, input, (Sequence)null, (Transducer.Incrementor)null, 
              (LabelAlphabet)theTrainingData.getTargetAlphabet());
        int prevLabelIndex = 0;          // This will put extra error instances in this cluster
View Full Code Here

TOP

Related Classes of cc.mallet.types.Instance

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.