Package org.apache.uima.ruta.textruler.core

Examples of org.apache.uima.ruta.textruler.core.TextRulerAnnotation


  private void fillSlotRulesWithMostSpecificRules() {
    slotRules.clear();
    for (TextRulerExample example : examples) {
      RapierRule rule = new RapierRule(this, example.getTarget());
      TextRulerAnnotation slotAnnotation = example.getAnnotation();
      CAS docCas = example.getDocumentCAS();
      TypeSystem ts = docCas.getTypeSystem();
      Type tokensRootType = ts.getType(TextRulerToolkit.RUTA_ANY_TYPE_NAME);

      // first, get all words/tokens:
      List<AnnotationFS> before = TextRulerToolkit.getAnnotationsBeforePosition(
              example.getDocumentCAS(), slotAnnotation.getBegin(), -1,
              TextRulerToolkit.getFilterSetWithSlotNames(slotNames, filterSet), tokensRootType);
      List<AnnotationFS> after = TextRulerToolkit.getAnnotationsAfterPosition(
              example.getDocumentCAS(), slotAnnotation.getEnd(), -1,
              TextRulerToolkit.getFilterSetWithSlotNames(slotNames, filterSet), tokensRootType);
      List<AnnotationFS> inside = TextRulerToolkit.getAnnotationsWithinBounds(
              example.getDocumentCAS(), slotAnnotation.getBegin(), slotAnnotation.getEnd(),
              TextRulerToolkit.getFilterSetWithSlotNames(slotNames, filterSet), tokensRootType);

      // the before annotations have to be reversed:
      for (int i = before.size() - 1; i >= 0; i--) {
        AnnotationFS afs = before.get(i);
        RapierRuleItem ruleItem = new RapierRuleItem();
        ruleItem.addWordConstraint(new TextRulerWordConstraint(new TextRulerAnnotation(afs, example
                .getDocument())));
        addAvailablePosTagConstraintToItem(ruleItem, afs, example);
        rule.addPreFillerItem(ruleItem);
      }

      for (AnnotationFS afs : inside) {
        RapierRuleItem ruleItem = new RapierRuleItem();
        ruleItem.addWordConstraint(new TextRulerWordConstraint(new TextRulerAnnotation(afs, example
                .getDocument())));
        addAvailablePosTagConstraintToItem(ruleItem, afs, example);
        rule.addFillerItem(ruleItem);
      }
      for (AnnotationFS afs : after) {
        RapierRuleItem ruleItem = new RapierRuleItem();
        ruleItem.addWordConstraint(new TextRulerWordConstraint(new TextRulerAnnotation(afs, example
                .getDocument())));
        addAvailablePosTagConstraintToItem(ruleItem, afs, example);
        rule.addPostFillerItem(ruleItem);
      }
View Full Code Here


  protected LP2Rule createInitialRuleForPositiveExample(TextRulerExample example) {
    TextRulerTarget target = example.getTarget();
    LP2Rule rule = new LP2Rule(this, example.getTarget());
    CAS docCas = example.getDocumentCAS();
    TextRulerAnnotation exampleAnnotation = example.getAnnotation();
    TypeSystem ts = docCas.getTypeSystem();
    Type tokensRootType = ts.getType(TextRulerToolkit.RUTA_ANY_TYPE_NAME);
    int thePosition = target.type == MLTargetType.SINGLE_LEFT_BOUNDARY ? exampleAnnotation
            .getBegin() : exampleAnnotation.getEnd();

    List<AnnotationFS> leftContext = TextRulerToolkit.getAnnotationsBeforePosition(docCas,
            thePosition, windowSize,
            TextRulerToolkit.getFilterSetWithSlotNames(slotNames, filterSet), tokensRootType);
    List<AnnotationFS> rightContext = TextRulerToolkit.getAnnotationsAfterPosition(docCas,
            thePosition, windowSize,
            TextRulerToolkit.getFilterSetWithSlotNames(slotNames, filterSet), tokensRootType);

    // the left context has to be reversed since we get the arrayList from
    // the slot's point of view!
    for (int i = leftContext.size() - 1; i >= 0; i--) {
      TextRulerAnnotation annot = new TextRulerAnnotation(leftContext.get(i), example.getDocument());
      LP2RuleItem item = new LP2RuleItem();
      item.setWordConstraint(annot);
      if (item.getWordConstraint().isRegExpConstraint())
        item.addOtherConstraint(new MLLP2OtherConstraint(annot, annot));
      rule.addPreFillerItem(item);
    }

    for (AnnotationFS afs : rightContext) {
      TextRulerAnnotation annot = new TextRulerAnnotation(afs, example.getDocument());
      LP2RuleItem item = new LP2RuleItem();
      item.setWordConstraint(annot);
      if (item.getWordConstraint().isRegExpConstraint())
        item.addOtherConstraint(new MLLP2OtherConstraint(annot, annot));
View Full Code Here

      // tags:
      // this is not very nice code and not dynamic feature capable, but
      // for testpurposes
      // in order to test WHISK with PosTag Terms...
      if (posTagRootTypeName != null && posTagRootTypeName.length() > 0) {
        TextRulerAnnotation tokenAnnotation = term.getWordConstraint().getTokenAnnotation();
        CAS cas = example.getDocumentCAS();
        TypeSystem ts = cas.getTypeSystem();
        Type posTagsRootType = ts.getType(posTagRootTypeName);
        if (ts != null) {
          // POS-Tags created by our test hmm tagger.
          List<AnnotationFS> posTagAnnotations = TextRulerToolkit.getAnnotationsWithinBounds(cas,
                  tokenAnnotation.getBegin(), tokenAnnotation.getEnd(), null, posTagsRootType);
          if (posTagAnnotations.size() > 0) {
            AnnotationFS posTag = posTagAnnotations.get(0);
            if (posTag.getBegin() == tokenAnnotation.getBegin()
                    && posTag.getEnd() == tokenAnnotation.getEnd()) {
              TextRulerAnnotation posTagAnnotation = new TextRulerAnnotation(posTag, doc);

              // 1. most specific term with all constraints we
              // have:
              WhiskRule proposedRule3 = proposedRule.copy();
              WhiskRuleItem t3 = proposedRule3.searchItemWithTermNumber(term
View Full Code Here

      return newRule;
  }

  protected WhiskRule anchor(WhiskRule rule, TextRulerExampleDocument doc,
          TextRulerExample example, List<WhiskRuleItem> allTerms, int slotIndex) {
    TextRulerAnnotation slotAnnotation = example.getAnnotations()[slotIndex];
    List<WhiskRuleItem> inside = getTermsWithinBounds(allTerms, slotAnnotation.getBegin(),
            slotAnnotation.getEnd());

    if (rule == null || inside.isEmpty()) {
      return null;
    }
    // create base 1 and base 2:
View Full Code Here

            filterSet), tokensRootType);

    List<WhiskRuleItem> result = new ArrayList<WhiskRuleItem>();
    int i = 0;
    for (AnnotationFS afs : all) {
      WhiskRuleItem term = new WhiskRuleItem(new TextRulerAnnotation(afs, example.getDocument()));
      term.setTermNumberInExample(i);
      i++;
      result.add(term);
    }
    return result;
View Full Code Here

  public List<WhiskRuleItem> getTermsWithinBounds(List<WhiskRuleItem> allTerms, int startPos,
          int endPos) {
    List<WhiskRuleItem> result = new ArrayList<WhiskRuleItem>();
    for (WhiskRuleItem term : allTerms) {
      TextRulerAnnotation a = term.getWordConstraint().getTokenAnnotation();
      if (a.getBegin() >= startPos && a.getEnd() <= endPos)
        result.add(term);
      if (a.getEnd() > endPos)
        break;
    }
    return result;
  }
View Full Code Here

  protected List<LP2Rule> createStartRulesForExample(final TextRulerExample example) {
    TextRulerTarget target = example.getTarget();
    List<LP2Rule> result = new ArrayList<LP2Rule>();
    CAS docCas = example.getDocumentCAS();
    TextRulerAnnotation exampleAnnotation = example.getAnnotation();
    TypeSystem ts = docCas.getTypeSystem();
    Type tokensRootType = ts.getType(TextRulerToolkit.RUTA_ANY_TYPE_NAME);

    boolean isLeftBoundary = (target.type == MLTargetType.SINGLE_LEFT_BOUNDARY || target.type == MLTargetType.SINGLE_LEFT_CORRECTION);
    int thePosition = isLeftBoundary ? exampleAnnotation.getBegin() : exampleAnnotation.getEnd();
    List<AnnotationFS> leftContext = TextRulerToolkit.getAnnotationsBeforePosition(docCas,
            thePosition, windowSize,
            TextRulerToolkit.getFilterSetWithSlotNames(slotNames, filterSet), tokensRootType);

    List<AnnotationFS> rightContext;
    if (target.type == MLTargetType.SINGLE_LEFT_CORRECTION
            || target.type == MLTargetType.SINGLE_RIGHT_CORRECTION) {
      rightContext = TextRulerToolkit.getAnnotationsAfterPosition(docCas, thePosition,
              windowSize + 1, TextRulerToolkit.getFilterSetWithSlotNames(slotNames, filterSet),
              tokensRootType);
      rightContext.remove(0);
    } else {
      rightContext = TextRulerToolkit.getAnnotationsAfterPosition(docCas, thePosition, windowSize,
              TextRulerToolkit.getFilterSetWithSlotNames(slotNames, filterSet), tokensRootType);
    }

    int totalCount = leftContext.size() + rightContext.size();

    // LEFT CONTEXT (PRE FILLER PATTERN)
    // result.add(createStartRuleForConstraint(example, 0, true, null));

    for (int index = 0; index < totalCount; index++) {
      boolean isPre = index < leftContext.size();
      int prePostIndex = isPre ? index : index - leftContext.size();
      AnnotationFS tokenAFS = isPre ? leftContext.get(leftContext.size() - 1 - prePostIndex)
              : rightContext.get(prePostIndex);
      TextRulerAnnotation tokenAnnotation = new TextRulerAnnotation(tokenAFS, example.getDocument());
      LP2RuleItem wordItem = new LP2RuleItem();

      // one rule with only the word constraint:
      wordItem.setWordConstraint(tokenAnnotation);
      result.add(createStartRuleForConstraint(example.getTarget(), prePostIndex + 1, isPre,
              wordItem));

      if (wordItem.getWordConstraint().isRegExpConstraint()) {
        LP2RuleItem basicItem = new LP2RuleItem();
        // basicItem.setOtherConstraint("basicTM", new
        // MLLP2OtherConstraint(tokenAnnotation, tokenAnnotation));
        basicItem.addOtherConstraint(new MLLP2OtherConstraint(tokenAnnotation, tokenAnnotation));
        result.add(createStartRuleForConstraint(example.getTarget(), prePostIndex + 1, isPre,
                basicItem));
      }

      // // POS-Tags created by our test hmm tagger.
      // Type posTagsRootType = ts.getType("org.apache.uima.ml.ML.postag");
      // if (posTagsRootType != null)
      // {
      // List<AnnotationFS> posTagAnnotations =
      // TextRulerToolkit.getAnnotationsWithinBounds(example.getDocumentCAS(),
      // tokenAnnotation.getBegin(), tokenAnnotation.getEnd(), null,
      // posTagsRootType);
      // if (posTagAnnotations.size()>0)
      // {
      // if (TextRulerToolkit.DEBUG && posTagAnnotations.size()>1)
      // {
      // TextRulerToolkit.logIfDebug("HOW CAN ONE TOKEN HAVE MORE THAN ONE POS TAG ?? "+tokenAnnotation.getBegin()+":"+tokenAnnotation.getEnd()+"="+tokenAnnotation.getCoveredText());
      // for (AnnotationFS afs : posTagAnnotations)
      // {
      // System.out.print(afs.getType().getShortName()+":"+afs.getCoveredText()+" "+afs.getBegin()+":"+afs.getEnd()+"\n");
      // }
      // TextRulerToolkit.logIfDebug("");
      // }
      //
      // TextRulerAnnotation posTagAnnotation = new
      // TextRulerAnnotation(posTagAnnotations.get(0),
      // example.getDocument());
      // LP2RuleItem basicItem = new LP2RuleItem();
      // basicItem.setOtherConstraint("postag", new
      // MLLP2OtherConstraint(posTagAnnotation, posTagAnnotation));
      // result.add(createStartRuleForConstraint(example.getTarget(),
      // prePostIndex+1, isPre, basicItem));
      // }
      // }

      // new dynamic system: grab everything we get from the annotation
      // index that lies over this token:
      // (annotations WITHIN (with smaller bounds than the token itself)
      // are ignored for now! we could
      // add using them with the CONTAINS constraint. but our
      // MLLP2OtherConstraint is not yet capable of this!

      List<AnnotationFS> featureAnnotations = TextRulerToolkit.getNonTMAnnoationsOverToken(docCas,
              tokenAFS, filterSetWithSlotNames);
      if (TextRulerToolkit.DEBUG && featureAnnotations.size() > 1) {
        TextRulerToolkit.log("FOUND MORE THAN ONE EXTRA TOKEN FEATURE ANNOTATION !");
        for (AnnotationFS featA : featureAnnotations)
          TextRulerToolkit.log(featA.toString());
        TextRulerToolkit.log("--------------------------------");
      }
      for (AnnotationFS featA : featureAnnotations) {
        TextRulerAnnotation featureAnnot = new TextRulerAnnotation(featA, example.getDocument());
        LP2RuleItem basicItem = new LP2RuleItem();
        basicItem.addOtherConstraint(new MLLP2OtherConstraint(tokenAnnotation, featureAnnot));
        result.add(createStartRuleForConstraint(example.getTarget(), prePostIndex + 1, isPre,
                basicItem));
      }
View Full Code Here

      TextRulerAnnotation[] lastExampleAnnotations = lastExample.getAnnotations();
      List<AnnotationFS> tailTokens = TextRulerToolkit.getAnnotationsAfterPosition(cas,
              lastExampleAnnotations[lastExampleAnnotations.length - 1].getEnd(), 0,
              TextRulerToolkit.getFilterSetWithSlotNames(slotNames, filterSet), tokenType);
      for (AnnotationFS afs : headTokens)
        head.add(new WienRuleItem(new TextRulerAnnotation(afs, doc)));
      for (AnnotationFS afs : tailTokens)
        tail.add(new WienRuleItem(new TextRulerAnnotation(afs, doc)));
      PatternPair p = new PatternPair();
      p.l.addAll(head);
      p.r.addAll(tail);
      headTailCache.put(key, p);
    }
View Full Code Here

      List<TextRulerExample> examples = doc.getPositiveExamples();
      for (int i = 0; i < examples.size() - 1; i++) {
        // get separator between i'th and (i+1)'th example:
        TextRulerAnnotation[] exampleAnnotations1 = examples.get(i).getAnnotations();
        TextRulerAnnotation[] exampleAnnotations2 = examples.get(i + 1).getAnnotations();
        TextRulerAnnotation lastOf1 = exampleAnnotations1[exampleAnnotations1.length - 1];
        TextRulerAnnotation firstOf2 = exampleAnnotations2[0];
        List<AnnotationFS> theTokens = TextRulerToolkit.getAnnotationsWithinBounds(cas, lastOf1
                .getEnd(), firstOf2.getBegin(), TextRulerToolkit.getFilterSetWithSlotNames(
                slotNames, filterSet), tokenType);
        TextRulerRulePattern thePattern = new TextRulerRulePattern();
        for (AnnotationFS afs : theTokens)
          thePattern.add(new WienRuleItem(new TextRulerAnnotation(afs, doc)));
        if (thePattern.size() > 0)
          result.add(thePattern);

      }
      interTupelSeparatorsCache.put(key, result);
View Full Code Here

    boolean isLastSlot = slotIndex >= slotNames.length - 1;
    for (int ei = 0; ei < examples.size(); ei++) {
      boolean isLastExample = ei == examples.size() - 1;
      TextRulerExample e = examples.get(ei);
      // get stuff between slot slotIndex and slotIndex+1
      TextRulerAnnotation slotAnnotation = e.getAnnotations()[slotIndex];
      TextRulerAnnotation nextSlotAnnotation;

      if (!isLastSlot)
        nextSlotAnnotation = e.getAnnotations()[slotIndex + 1];
      else {
        if (!isLastExample) // the next slot annotation is the first
          // example annotation of the next template:
          nextSlotAnnotation = examples.get(ei + 1).getAnnotations()[0];
        else
          nextSlotAnnotation = null;
      }

      List<AnnotationFS> theTokens;
      if (nextSlotAnnotation == null)
        theTokens = TextRulerToolkit.getAnnotationsAfterPosition(cas, slotAnnotation.getEnd(), 0,
                TextRulerToolkit.getFilterSetWithSlotNames(slotNames, filterSet), tokenType);
      else
        theTokens = TextRulerToolkit.getAnnotationsWithinBounds(cas, slotAnnotation.getEnd(),
                nextSlotAnnotation.getBegin(), TextRulerToolkit.getFilterSetWithSlotNames(
                        slotNames, filterSet), tokenType);
      TextRulerRulePattern thePattern = new TextRulerRulePattern();
      for (AnnotationFS afs : theTokens)
        thePattern.add(new WienRuleItem(new TextRulerAnnotation(afs, doc)));
      if (thePattern.size() > 0)
        result.add(thePattern);
    }
    return result;
  }
View Full Code Here

TOP

Related Classes of org.apache.uima.ruta.textruler.core.TextRulerAnnotation

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.