Examples of org.apache.uima.ruta.textruler.core.TextRulerAnnotation

Package org.apache.uima.ruta.textruler.core

Examples of org.apache.uima.ruta.textruler.core.TextRulerAnnotation

org.apache.uima.ruta.textruler.core.TextRulerAnnotation
This class encapsulates the for us important information about an Annotation, since AnnotationFS keeps its corresponding CAS alive or gets killed when the CAS gets reset.


  public List<WhiskRuleItem> getTermsWithinBounds(List<WhiskRuleItem> allTerms, int startPos,
          int endPos) {
    List<WhiskRuleItem> result = new ArrayList<WhiskRuleItem>();
    for (WhiskRuleItem term : allTerms) {
      TextRulerAnnotation a = term.getWordConstraint().getTokenAnnotation();
      if (a.getBegin() >= startPos && a.getEnd() <= endPos)
        result.add(term);
      if (a.getEnd() > endPos)
        break;
    }
    return result;
  }

View Full Code Here


  protected LP2Rule createInitialRuleForPositiveExample(TextRulerExample example) {
    TextRulerTarget target = example.getTarget();
    LP2Rule rule = new LP2Rule(this, example.getTarget());
    CAS docCas = example.getDocumentCAS();
    TextRulerAnnotation exampleAnnotation = example.getAnnotation();
    TypeSystem ts = docCas.getTypeSystem();
    Type tokensRootType = ts.getType(TextRulerToolkit.RUTA_ANY_TYPE_NAME);
    int thePosition = target.type == MLTargetType.SINGLE_LEFT_BOUNDARY ? exampleAnnotation
            .getBegin() : exampleAnnotation.getEnd();


    List<AnnotationFS> leftContext = TextRulerToolkit.getAnnotationsBeforePosition(docCas,
            thePosition, windowSize,
            TextRulerToolkit.getFilterSetWithSlotNames(slotNames, filterSet), tokensRootType);
    List<AnnotationFS> rightContext = TextRulerToolkit.getAnnotationsAfterPosition(docCas,
            thePosition, windowSize,
            TextRulerToolkit.getFilterSetWithSlotNames(slotNames, filterSet), tokensRootType);


    // the left context has to be reversed since we get the arrayList from
    // the slot's point of view!
    for (int i = leftContext.size() - 1; i >= 0; i--) {
      TextRulerAnnotation annot = new TextRulerAnnotation(leftContext.get(i), example.getDocument());
      LP2RuleItem item = new LP2RuleItem();
      item.setWordConstraint(annot);
      if (item.getWordConstraint().isRegExpConstraint())
        item.addOtherConstraint(new MLLP2OtherConstraint(annot, annot));
      rule.addPreFillerItem(item);
    }


    for (AnnotationFS afs : rightContext) {
      TextRulerAnnotation annot = new TextRulerAnnotation(afs, example.getDocument());
      LP2RuleItem item = new LP2RuleItem();
      item.setWordConstraint(annot);
      if (item.getWordConstraint().isRegExpConstraint())
        item.addOtherConstraint(new MLLP2OtherConstraint(annot, annot));

View Full Code Here

    this.annotation = a;
  }


  public KEPRuleItem(AnnotationFS afs) {
    super();
    this.annotation = new TextRulerAnnotation(afs);
    this.type = this.annotation.getType();
  }

View Full Code Here

  public List<List<KEPRuleItemCondition>> getConditions() {
    return this.conditions;
  }


  public void setAnnotation(AnnotationFS afs) {
    this.annotation = new TextRulerAnnotation(afs);
    this.type = this.annotation.getType();
  }

View Full Code Here

        // tags:
        // this is not very nice code and not dynamic feature capable, but
        // for testpurposes
        // in order to test WHISK with PosTag Terms...
        if (posTagRootTypeName != null && posTagRootTypeName.length() > 0) {
          TextRulerAnnotation tokenAnnotation = term.getWordConstraint().getTokenAnnotation();
          CAS cas = example.getDocumentCAS();
          TypeSystem ts = cas.getTypeSystem();
          Type posTagsRootType = ts.getType(posTagRootTypeName);
          if (ts != null) {
            // POS-Tags created by our test hmm tagger.
            List<AnnotationFS> posTagAnnotations = TextRulerToolkit.getAnnotationsWithinBounds(cas,
                    tokenAnnotation.getBegin(), tokenAnnotation.getEnd(), null, posTagsRootType);
            if (posTagAnnotations.size() > 0) {
              AnnotationFS posTag = posTagAnnotations.get(0);
              if (posTag.getBegin() == tokenAnnotation.getBegin()
                      && posTag.getEnd() == tokenAnnotation.getEnd()) {
                TextRulerAnnotation posTagAnnotation = new TextRulerAnnotation(posTag, doc,
                        consideredFeatures);


                // 1. most specific term with all constraints we
                // have:
                WhiskRule proposedRule3 = proposedRule.copy();

View Full Code Here

  }


  protected WhiskRule anchor(WhiskRule rule, TextRulerExampleDocument doc,
          TextRulerExample example, int slotIndex) {
    List<WhiskRule> result = new ArrayList<WhiskRule>();
    TextRulerAnnotation slotAnnotation = example.getAnnotations()[slotIndex];
    List<List<WhiskRuleItem>> window = getTermsWithinBounds(slotAnnotation.getBegin(),
            slotAnnotation.getEnd(), example);


    for (List<WhiskRuleItem> inside : window) {


      if (rule == null || inside.isEmpty()) {
        return null;

View Full Code Here

            nextBegin = a.getBegin();
          } else if (nextBegin != a.getBegin()) {
            break;
          }
          if (a.getBegin() <= nextBegin && a.getBegin() >= end) {
            WhiskRuleItem term = new WhiskRuleItem(new TextRulerAnnotation(a,
                    example.getDocument(), consideredFeatures));
            result.add(term);
          }
        }
      }

View Full Code Here

            nextEnd = a.getEnd();
          } else if (nextEnd != a.getEnd()) {
            break;
          }
          if (a.getEnd() >= nextEnd && a.getEnd() <= begin) {
            WhiskRuleItem term = new WhiskRuleItem(new TextRulerAnnotation(a,
                    example.getDocument(), consideredFeatures));
            result.add(term);
          }
        }
      }

View Full Code Here

      }
    }


    for (AnnotationFS annotation : startAs) {
      List<WhiskRuleItem> startList = new ArrayList<WhiskRuleItem>();
      WhiskRuleItem term = new WhiskRuleItem(new TextRulerAnnotation(annotation,
              example.getDocument(), consideredFeatures));
      startList.add(term);
      result.add(startList);
    }

View Full Code Here


  protected List<LP2Rule> createStartRulesForExample(final TextRulerExample example) {
    TextRulerTarget target = example.getTarget();
    List<LP2Rule> result = new ArrayList<LP2Rule>();
    CAS docCas = example.getDocumentCAS();
    TextRulerAnnotation exampleAnnotation = example.getAnnotation();
    TypeSystem ts = docCas.getTypeSystem();
    Type tokensRootType = ts.getType(TextRulerToolkit.RUTA_ANY_TYPE_NAME);


    boolean isLeftBoundary = (target.type == MLTargetType.SINGLE_LEFT_BOUNDARY || target.type == MLTargetType.SINGLE_LEFT_CORRECTION);
    int thePosition = isLeftBoundary ? exampleAnnotation.getBegin() : exampleAnnotation.getEnd();
    List<AnnotationFS> leftContext = TextRulerToolkit.getAnnotationsBeforePosition(docCas,
            thePosition, windowSize,
            TextRulerToolkit.getFilterSetWithSlotNames(slotNames, filterSet), tokensRootType);


    List<AnnotationFS> rightContext;
    if (target.type == MLTargetType.SINGLE_LEFT_CORRECTION
            || target.type == MLTargetType.SINGLE_RIGHT_CORRECTION) {
      rightContext = TextRulerToolkit.getAnnotationsAfterPosition(docCas, thePosition,
              windowSize + 1, TextRulerToolkit.getFilterSetWithSlotNames(slotNames, filterSet),
              tokensRootType);
      rightContext.remove(0);
    } else {
      rightContext = TextRulerToolkit.getAnnotationsAfterPosition(docCas, thePosition, windowSize,
              TextRulerToolkit.getFilterSetWithSlotNames(slotNames, filterSet), tokensRootType);
    }


    int totalCount = leftContext.size() + rightContext.size();


    // LEFT CONTEXT (PRE FILLER PATTERN)
    // result.add(createStartRuleForConstraint(example, 0, true, null));


    for (int index = 0; index < totalCount; index++) {
      boolean isPre = index < leftContext.size();
      int prePostIndex = isPre ? index : index - leftContext.size();
      AnnotationFS tokenAFS = isPre ? leftContext.get(leftContext.size() - 1 - prePostIndex)
              : rightContext.get(prePostIndex);
      TextRulerAnnotation tokenAnnotation = new TextRulerAnnotation(tokenAFS, example.getDocument());
      LP2RuleItem wordItem = new LP2RuleItem();


      // one rule with only the word constraint:
      wordItem.setWordConstraint(tokenAnnotation);
      result.add(createStartRuleForConstraint(example.getTarget(), prePostIndex + 1, isPre,
              wordItem));


      if (wordItem.getWordConstraint().isRegExpConstraint()) {
        LP2RuleItem basicItem = new LP2RuleItem();
        // basicItem.setOtherConstraint("basicTM", new
        // MLLP2OtherConstraint(tokenAnnotation, tokenAnnotation));
        basicItem.addOtherConstraint(new MLLP2OtherConstraint(tokenAnnotation, tokenAnnotation));
        result.add(createStartRuleForConstraint(example.getTarget(), prePostIndex + 1, isPre,
                basicItem));
      }


      // // POS-Tags created by our test hmm tagger.
      // Type posTagsRootType = ts.getType("org.apache.uima.ml.ML.postag");
      // if (posTagsRootType != null)
      // {
      // List<AnnotationFS> posTagAnnotations =
      // TextRulerToolkit.getAnnotationsWithinBounds(example.getDocumentCAS(),
      // tokenAnnotation.getBegin(), tokenAnnotation.getEnd(), null,
      // posTagsRootType);
      // if (posTagAnnotations.size()>0)
      // {
      // if (TextRulerToolkit.DEBUG && posTagAnnotations.size()>1)
      // {
      // TextRulerToolkit.logIfDebug("HOW CAN ONE TOKEN HAVE MORE THAN ONE POS TAG ?? "+tokenAnnotation.getBegin()+":"+tokenAnnotation.getEnd()+"="+tokenAnnotation.getCoveredText());
      // for (AnnotationFS afs : posTagAnnotations)
      // {
      // System.out.print(afs.getType().getShortName()+":"+afs.getCoveredText()+" "+afs.getBegin()+":"+afs.getEnd()+"\n");
      // }
      // TextRulerToolkit.logIfDebug("");
      // }
      //
      // TextRulerAnnotation posTagAnnotation = new
      // TextRulerAnnotation(posTagAnnotations.get(0),
      // example.getDocument());
      // LP2RuleItem basicItem = new LP2RuleItem();
      // basicItem.setOtherConstraint("postag", new
      // MLLP2OtherConstraint(posTagAnnotation, posTagAnnotation));
      // result.add(createStartRuleForConstraint(example.getTarget(),
      // prePostIndex+1, isPre, basicItem));
      // }
      // }


      // new dynamic system: grab everything we get from the annotation
      // index that lies over this token:
      // (annotations WITHIN (with smaller bounds than the token itself)
      // are ignored for now! we could
      // add using them with the CONTAINS constraint. but our
      // MLLP2OtherConstraint is not yet capable of this!


      List<AnnotationFS> featureAnnotations = TextRulerToolkit.getOtherAnnotationsOverToken(docCas,
              tokenAFS, filterSetWithSlotNames);
      if (TextRulerToolkit.DEBUG && featureAnnotations.size() > 1) {
        TextRulerToolkit.log("FOUND MORE THAN ONE EXTRA TOKEN FEATURE ANNOTATION !");
        for (AnnotationFS featA : featureAnnotations)
          TextRulerToolkit.log(featA.toString());
        TextRulerToolkit.log("--------------------------------");
      }
      for (AnnotationFS featA : featureAnnotations) {
        TextRulerAnnotation featureAnnot = new TextRulerAnnotation(featA, example.getDocument());
        LP2RuleItem basicItem = new LP2RuleItem();
        basicItem.addOtherConstraint(new MLLP2OtherConstraint(tokenAnnotation, featureAnnot));
        result.add(createStartRuleForConstraint(example.getTarget(), prePostIndex + 1, isPre,
                basicItem));
      }

View Full Code Here

0 1 2 3

TOP

Related Classes of org.apache.uima.ruta.textruler.core.TextRulerAnnotation

org.apache.uima.cas.Feature

org.apache.uima.ruta.textruler.learner.kep.KEPRuleItem

org.apache.uima.ruta.textruler.learner.lp2.BasicLP2

org.apache.uima.ruta.textruler.learner.lp2.NaiveLP2

org.apache.uima.ruta.textruler.learner.lp2.OptimizedLP2

org.apache.uima.ruta.textruler.learner.rapier.Rapier

org.apache.uima.ruta.textruler.learner.whisk.generic.Whisk

org.apache.uima.ruta.textruler.learner.whisk.token.Whisk

org.apache.uima.ruta.textruler.learner.wien.Wien

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.