Package org.apache.uima.ruta.textruler.core

Examples of org.apache.uima.ruta.textruler.core.TextRulerRulePattern


  protected constraint3ReturnType testConstraint3(TextRulerExampleDocument doc,
          TextRulerRulePattern h, TextRulerRulePattern t, TextRulerRulePattern l1) {
    final boolean logReasons = false;

    TextRulerRulePattern head = new TextRulerRulePattern();
    TextRulerRulePattern tail = new TextRulerRulePattern();

    getPageHeadAndTailPortion(doc, head, tail);

    // 1: l1 must be a proper suffix of the portion between the end of h and
    // the first slot filler:
    // (head / h) / l1 = l1

    int hPos = head.find(h);

    // TOOD precalculate this outside this method ?
    TextRulerRulePattern restForL1 = head.subPattern(hPos + h.size(), -1).copy();
    for (TextRulerRuleItem it : restForL1)
      ((WienRuleItem) it).getWordConstraint().setGeneralizeLinkMarkUp(true);
    int l1Pos = restForL1.find(l1);
    if (l1Pos < 0 || l1Pos != restForL1.size() - l1.size()) {
      TextRulerToolkit.logIf(logReasons, "REASON 1\n\tl1         \t" + l1 + "\n\trestforl1\t"
              + restForL1);
      return constraint3ReturnType.C3_L1CandidateSuffixError;
    }

    // 2: t must not occur in the subpattern after h and before l1
    if (l1Pos > 0) {
      TextRulerRulePattern patternBetweenHandL1 = restForL1.subPattern(0, l1Pos);
      if (patternBetweenHandL1.size() >= t.size()) {
        if (patternBetweenHandL1.find(t) >= 0) {
          TextRulerToolkit.logIf(logReasons, "REASON 2");
          return constraint3ReturnType.C3_TailCandidateH_L1Error;
        }
      }
    }

    // 2a: addons, not specified in WIEN paper !!
    TextRulerRulePattern lastSlotRightPattern = patternPairs.get(slotNames.length - 1).r;
    if (t.find(lastSlotRightPattern) == 0) // the right boundary of the last
    // slot may not be part of the
    // tail pattern!
    {
      TextRulerToolkit.logIf(logReasons, "REASON 3: " + lastSlotRightPattern + "\tTail: " + t);
      return constraint3ReturnType.C3_TailCandidateRK_PrefixError;
    }

    int tPos = tail.find(t);
    if (tPos < 0) {
      TextRulerToolkit.logIf(logReasons, "REASON 4");
      return constraint3ReturnType.C3_TailCandidateNotFoundError;
    } // this is an own constraint definition: if a document does not have
    // the tail in it,
    // what should we do then ? is this a n error or is this okay since the
    // document may not have any tail after the data ?

    // 3: l1 must not precede t in the page's tail:
    int l1tPos = tail.find(l1);
    if (l1tPos >= 0) // l1 occurs in the page's tail:
    {
      if (l1tPos < tPos) {
        TextRulerToolkit.logIf(logReasons, "REASON 5");
        return constraint3ReturnType.C3_TailCandidateSucceedsL1InTailError;
      }
    }

    List<TextRulerRulePattern> interTupleSeparators = getInterTupleSepatators(doc);

    for (TextRulerRulePattern itSep : interTupleSeparators) {
      // 4: l1 must be a proper suffix of each of the inter-tuple
      // separators:
      TextRulerRulePattern itSepCopy = itSep.copy();
      for (TextRulerRuleItem it : itSepCopy)
        ((WienRuleItem) it).getWordConstraint().setGeneralizeLinkMarkUp(true);
      int l1itsPos = itSepCopy.find(l1);
      if (l1itsPos < 0 || l1itsPos != itSepCopy.size() - l1.size()) {
        TextRulerToolkit.logIf(logReasons, "REASON 6: \n\tl1\t" + l1 + "\n\titSep\t" + itSep);
        return constraint3ReturnType.C3_L1CandidateInterTupleSeparatorSuffixError;
      }

      // 5: t must never precede l1 in any inter-tuple separator:
View Full Code Here


      return "<no results yet>";
    String result = getTMFileHeaderString() + "DECLARE wien_tail;\n" + "DECLARE wien_rulemark;\n"
            + "DECLARE wien_content;\n" + "BOOLEAN wien_redo;\n\n"
            + "// tail/head/content area stuff:\n";

    TextRulerRulePattern hCopy = hPattern.copy();

    ((WienRuleItem) hCopy.get(0)).addCondition("-PARTOF(wien_content)");
    result += hCopy + " ALL*?{->MARK(wien_content)};\n";

    TextRulerRulePattern tCopy = tPattern.copy();
    ((WienRuleItem) tCopy.get(0)).addCondition("PARTOF(wien_content)");

    result += tCopy + "{->MARK(wien_tail";
    if (tPattern.size() > 1)
      result += ", 1, " + tPattern.size();
    result += ")};\n\n";
View Full Code Here

TOP

Related Classes of org.apache.uima.ruta.textruler.core.TextRulerRulePattern

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.