Package cc.mallet.types

Examples of cc.mallet.types.TokenSequence


    this (true, conjunctions);
  }
 
  public Instance pipe (Instance carrier)
  {
    TokenSequence ts = (TokenSequence) carrier.getData();
    int tsSize = ts.size();
    PropertyList[] oldfs = null;
    PropertyList[] newfs = null;
    try {
      oldfs = new PropertyList[ts.size()];
    }
    catch (Exception e) {
      System.err.println("Exception allocating oldfs: " + e);
    }
    try {
      newfs = new PropertyList[ts.size()];
    }
    catch (Exception e) {
      System.err.println("Exception allocating newfs: " + e);
    }
   
    for (int i = 0; i < tsSize; i++)
      oldfs[i] = ts.get(i).getFeatures ();
    if (includeOriginalSingletons)
      for (int i = 0; i < tsSize; i++)
        newfs[i] = ts.get(i).getFeatures ();

    for (int i = 0; i < tsSize; i++) {
      for (int j = 0; j < conjunctions.length; j++) {       
        // allow conjunction offsets of length n - awc
        PropertyList.Iterator[] iters = getOffsetIters (conjunctions, j, tsSize, i, oldfs);
        if (iters == null)
          continue;
        int[] iterIndices = new int[iters.length];
        for (int ii=0; ii < iterIndices.length; ii++)
          iterIndices[ii] = -1;
        newfs[i] = makeConjunctions (iters, 0, conjunctions, j, tsSize, newfs[i], i, oldfs, iterIndices);
      }
    }
    // Put the new PropertyLists in place
    for (int i = 0; i < ts.size(); i++)
      ts.get(i).setFeatures (newfs[i]);
    return carrier;
  }   
View Full Code Here


    noMtLst.addThruPipe (new ArrayIterator (doc1));

    Instance mtInst = mtLst.get (0);
    Instance noMtInst = noMtLst.get (0);

    TokenSequence mtTs = (TokenSequence) mtInst.getData ();
    TokenSequence noMtTs = (TokenSequence) noMtInst.getData ();

    assertEquals (6, mtTs.size ());
    assertEquals (6, noMtTs.size ());

    assertEquals (1.0, mtTs.get (3).getFeatureValue ("time"), 1e-15);
    assertEquals (1.0, noMtTs.get (3).getFeatureValue ("time"), 1e-15);
    assertEquals (1.0, mtTs.get (4).getFeatureValue ("time"), 1e-15);
    assertEquals (0.0, noMtTs.get (4).getFeatureValue ("time"), 1e-15);
  }
View Full Code Here

    this (true, conjunctions);
  }
 
  public Instance pipe (Instance carrier)
  {
    TokenSequence ts = (TokenSequence) carrier.getData();
    int tsSize = ts.size();
    PropertyList[] oldfs = new PropertyList[ts.size()];
    PropertyList[] newfs = new PropertyList[ts.size()];
    for (int i = 0; i < tsSize; i++)
      oldfs[i] = ts.get(i).getFeatures ();
    if (includeOriginalSingletons)
      for (int i = 0; i < tsSize; i++)
        newfs[i] = ts.get(i).getFeatures ();

    for (int i = 0; i < ts.size(); i++) {
      //System.out.println ("OffsetPropertyConjunctions: ts index="+i+", conjunction =");
      conjunctionList: for (int j = 0; j < conjunctions.length; j++) {
        // Make sure that the offsets in the conjunction are all available at this position
        for (int k = 0; k < conjunctions[j].length; k++) {
          if (conjunctions[j][k] + i < 0
              || conjunctions[j][k] + i > tsSize-1
              || oldfs[i+conjunctions[j][k]] == null)
            continue conjunctionList;
          //System.out.print (" "+conjunctions[j][k]);
        }
        //System.out.print ("\n");

        // Add the features for this conjunction
        if (conjunctions[j].length == 1) {
          int offset = conjunctions[j][0];
          if (offset == 0 && includeOriginalSingletons)
            throw new IllegalArgumentException ("Original singletons already there.");
          PropertyList.Iterator iter = oldfs[i+offset].iterator();
          while (iter.hasNext()) {
            iter.next();
            if (propertyKey != null && !propertyKey.equals(iter.getKey()))
              continue;
            String key = iter.getKey() + (offset==0 ? "" : "@"+offset);
            newfs[i] = PropertyList.add (key, iter.getNumericValue(), newfs[i]);
          }

        } else if (conjunctions[j].length == 2) {
          //System.out.println ("token="+ts.getToken(i).getText()+" conjunctionIndex="+j);
          int offset0 = conjunctions[j][0];
          int offset1 = conjunctions[j][1];
          PropertyList.Iterator iter0 = oldfs[i+offset0].iterator();
          int iter0i = -1;
          while (iter0.hasNext()) {
            iter0i++;
            iter0.next();
            if (propertyKey != null && !propertyKey.equals(iter0.getKey()))
              continue;
            PropertyList.Iterator iter1 = oldfs[i+offset1].iterator();
            int iter1i = -1;
            while (iter1.hasNext()) {
              iter1i++;
              iter1.next();
              if (propertyKey != null && !propertyKey.equals(iter1.getKey()))
                continue;
              // Avoid redundant doubling of feature space; include only upper triangle
              //System.out.println ("off0="+offset0+" off1="+offset1+" iter0i="+iter0i+" iter1i="+iter1i);
              if (offset0 == offset1 && iter1i <= iter0i) continue;
              //System.out.println (">off0="+offset0+" off1="+offset1+" iter0i="+iter0i+" iter1i="+iter1i);
              String key = iter0.getKey() + (offset0==0 ? "" : "@"+offset0)
                           +"&"+iter1.getKey() + (offset1==0 ? "" : "@"+offset1);
              newfs[i] = PropertyList.add (key, iter0.getNumericValue() * iter1.getNumericValue(), newfs[i]);
            }
          }

        } else if (conjunctions[j].length == 3) {
          int offset0 = conjunctions[j][0];
          int offset1 = conjunctions[j][1];
          int offset2 = conjunctions[j][2];
          PropertyList.Iterator iter0 = oldfs[i+offset0].iterator();
          int iter0i = -1;
          while (iter0.hasNext()) {
            iter0i++;
            iter0.next();
            if (propertyKey != null && !propertyKey.equals(iter0.getKey()))
              continue;
            PropertyList.Iterator iter1 = oldfs[i+offset1].iterator();
            int iter1i = -1;
            while (iter1.hasNext()) {
              iter1i++;
              iter1.next();
              if (propertyKey != null && !propertyKey.equals(iter1.getKey()))
                continue;
              // Avoid redundant doubling of feature space; include only upper triangle
              if (offset0 == offset1 && iter1i <= iter0i) continue;
              PropertyList.Iterator iter2 = oldfs[i+offset2].iterator();
              int iter2i = -1;
              while (iter2.hasNext()) {
                iter2i++;
                iter2.next();
                if (propertyKey != null && !propertyKey.equals(iter2.getKey()))
                  continue;
                // Avoid redundant doubling of feature space; include only upper triangle
                if (offset1 == offset2 && iter2i <= iter1i) continue;
                String key = iter0.getKey() + (offset0==0 ? "" : "@"+offset0)
                             +"&"+iter1.getKey() + (offset1==0 ? "" : "@"+offset1)
                             +"&"+iter2.getKey() + (offset2==0 ? "" : "@"+offset2);
                newfs[i] = PropertyList.add (key, iter0.getNumericValue() * iter1.getNumericValue()
                                             * iter2.getNumericValue(), newfs[i]);
              }
            }
          }
        } else {
          throw new UnsupportedOperationException ("Conjunctions of length 4 or more not yet implemented.");
        }
      }
    }

    // Put the new PropertyLists in place
    for (int i = 0; i < ts.size(); i++)
      ts.get(i).setFeatures (newfs[i]);
    return carrier;
  }
View Full Code Here

    Pipe mtPipe = (Pipe) TestSerializable.cloneViaSerialization (origPipe);
    InstanceList mtLst = new InstanceList (mtPipe);
    mtLst.addThruPipe (new ArrayIterator (doc1));
    Instance mtInst = mtLst.get (0);
    TokenSequence mtTs = (TokenSequence) mtInst.getData ();
    assertEquals (6, mtTs.size ());
    assertEquals (1.0, mtTs.get (3).getFeatureValue ("time"), 1e-15);
    assertEquals (1.0, mtTs.get (4).getFeatureValue ("time"), 1e-15);
  }
View Full Code Here

    return isNonNegated;
  }

  public Instance pipe (Instance carrier)
  {
    TokenSequence ts = (TokenSequence) carrier.getData();
    int tsSize = ts.size();
    for (int t = 0; t < tsSize; t++) {
      // Check whether the conjunction is true at time step t
      boolean passes = true;
      for (int fnum = 0; fnum < featurePatterns.length; fnum++) {
        int pos = t + offsets[fnum];
        if (!(pos >= 0 && pos < tsSize)) {
          passes = false;
          break;
        }
        boolean featurePresent = hasMatchingFeature (ts.get(pos), featurePatterns [fnum]);
        if (featurePresent != isNonNegated [fnum]) {
          passes = false;
          break;
        }
      }
      if (passes) {
        if (tagAllTimesteps) {
          for (int fnum = 0; fnum < featurePatterns.length; fnum++) {
            int pos = t + offsets[fnum];
            ts.get(pos).setFeatureValue (thisFeatureName, 1.0);
          }
        } else {
          ts.get(t).setFeatureValue (thisFeatureName, 1.0);
        }
      }
    }

    return carrier;
View Full Code Here

    this (namePrefix, null);
  }
 
  public Instance pipe (Instance carrier)
  {
    TokenSequence ts = (TokenSequence) carrier.getData();
    int tsSize = ts.size();
    for (int i = tsSize-1; i >= 0; i--) {
      Token t = ts.get (i);
      String text = t.getText();
      if (featureRegex != null && !featureRegex.matcher(text).matches())
        continue;
      for (int j = 0; j < i; j++) {
        if (ts.get(j).getText().equals(text)) {
          PropertyList.Iterator iter = ts.get(j).getFeatures().iterator();
          while (iter.hasNext()) {
            iter.next();
            String key = iter.getKey();
            if (filterRegex == null || (filterRegex.matcher(key).matches() ^ !includeFiltered))
              t.setFeatureValue (namePrefix+key, iter.getNumericValue());
View Full Code Here

    URI uri = null;
    try { uri = new URI ("random:" + classNames[currentClassIndex] + "/" + currentInstanceIndex); }
    catch (Exception e) {e.printStackTrace(); throw new IllegalStateException (); }
    //xxx Producing small numbers? int randomSize = r.nextPoisson (featureVectorSizePoissonLambda);
    int randomSize = (int)featureVectorSizePoissonLambda;
    TokenSequence ts = classCentroid[currentClassIndex].randomTokenSequence (r, randomSize);
    //logger.fine ("FeatureVector "+currentClassIndex+" "+currentInstanceIndex); fv.print();
    currentInstanceIndex--;
    return new Instance (ts, classNames[currentClassIndex], uri, null);
  }
View Full Code Here

    this (_realValued, false, "=");
  }

 
  public Instance pipe (Instance carrier) {
    TokenSequence ts = (TokenSequence) carrier.getData ();
    for (int i=0; i < ts.size(); i++) {
      Token t = ts.get (i);
      String[] values = t.getText().split("\\s+");
      for (int j=0; j < values.length; j++) {
        if (specifyFeatureNames) {
          String[] nameAndValue = values[j].split(nameValueSeparator);           
          if (nameAndValue.length != 2) { // no feature name. use token as feature.
View Full Code Here

    this (Pattern.compile (regex), dataGroup, targetGroup);
  }

  public Instance pipe (Instance carrier)
  {
    TokenSequence ts = (TokenSequence) carrier.getData();
    TokenSequence targetTokenSeq = new TokenSequence (ts.size());
    for (int i = 0; i < ts.size(); i++) {
      Token t = ts.get(i);
      Matcher matcher = regex.matcher (t.getText());
      if (matcher.matches()) {
        targetTokenSeq.add (matcher.group(targetGroup));
        t.setText (matcher.group (dataGroup));
      } else {
        logger.warning ("Skipping token: No match of "+regex.pattern()
                        +" at token #"+i+" with text "+t.getText());
      }
View Full Code Here

    this (false);
  }

  public Instance pipe (Instance carrier)
  {
    TokenSequence ts = (TokenSequence) carrier.getData();
    // xxx This doesn't seem so efficient.  Perhaps have TokenSequence
    // use a LinkedList, and remove Tokens from it? -?
    // But a LinkedList implementation of TokenSequence would be quite inefficient -AKM
    TokenSequence ret = new TokenSequence ();
    Token prevToken = null;
    for (int i = 0; i < ts.size(); i++) {
      Token t = ts.get(i);
      String s = t.getText();
      if (CharSequenceLexer.LEX_ALPHA.matcher(s).matches()) {
        ret.add (t);
        prevToken = t;
      else if (markDeletions && prevToken != null)
        prevToken.setProperty (FeatureSequenceWithBigrams.deletionMark, t.getText());
    }
    carrier.setData(ret);
View Full Code Here

TOP

Related Classes of cc.mallet.types.TokenSequence

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.