Package org.apache.uima.ruta.textruler.core

Examples of org.apache.uima.ruta.textruler.core.TextRulerStatisticsCollector


              + RutaEngine.SCRIPT_FILE_EXTENSION));
      FileUtils.saveString2File(allRulesContent, new File(getTempRulesFileName()));

      CAS testCAS = getTestCAS();
      for (TextRulerExampleDocument doc : sortedDocs) {
        TextRulerStatisticsCollector c = new TextRulerStatisticsCollector();
        doc.resetAndFillTestCAS(testCAS, target);
        CAS docCAS = doc.getCAS();
        ae.process(testCAS);
        compareOriginalDocumentWithTestCAS(doc, testCAS, target, c, true); // test whole ruleset and
        // collect negative
        // examples

        // now we have some covered positive examples that are good, and
        // maybe some negative examples
        // for that we might create Correction Rules... in order to do
        // that we have to create
        // ShiftExamples and map negative examples (incorrect inserted
        // boundaries) with a specific
        // distance to an original positive example...

        // TODO should that be done in both directions ? left and right
        // ?! what happes if we
        // find two potential examples, one left, one right ? --> for
        // now: use the nearer one. if
        // exactly the same distance, use the one where the wrong tag
        // would be IN the slot filler!
        List<TextRulerExample> correctTags = doc.getPositiveExamples();
        List<TextRulerExample> wrongTags = new ArrayList<TextRulerExample>(
                c.getCoveredNegativeExamples());
        List<TextRulerShiftExample> newExamples = new ArrayList<TextRulerShiftExample>();
        for (TextRulerExample wrongTag : wrongTags) {
          // test, if there's a corresponding positive example
          // somewhere around (within maxDistance)
          List<AnnotationFS> left = TextRulerToolkit.getAnnotationsBeforePosition(docCAS, wrongTag
View Full Code Here


      // TextRulerToolkit.log("RULE: "+rule.getRuleString());
      // testRuleOnTrainingsSet(rule, exampleDocuments.getDocuments());

      // this rule has to at least cover its seed example!!
      TextRulerStatisticsCollector c = new TextRulerStatisticsCollector();
      c.addCoveredPositive(example);
      rule.setCoveringStatistics(c);
      slotRules.add(rule);
    }
    initialRuleBaseSize = slotRules.size();
  }
View Full Code Here

      result.add(bestRule);
      rules.remove(0);
      Set<TextRulerExample> coveredExamples = bestRule.getCoveringStatistics()
              .getCoveredPositiveExamples();
      for (TrabalRule rule : rules) {
        TextRulerStatisticsCollector collector = rule.getCoveringStatistics();
        Iterator<TextRulerExample> iterator = coveredExamples.iterator();
        while (iterator.hasNext()) {
          TextRulerExample example = iterator.next();
          if (collector.getCoveredPositiveExamples().contains(example)) {
            collector.getCoveredPositiveExamples().remove(example);
            // if (rule.getErrorType() == ErrorType.ANNOTATION
            // || rule.getErrorType() == ErrorType.DELETING) {
            // collector.incCoveredPositives(-1);
            // } else {
            collector.incCoveredPositives(-1);
            // }
          }
          if (collector.getCoveredPositivesCount() <= 0) {
            break;
          }
        }
        if (rule.getErrorRate() > maxErrorRate
                || rule.getCoveringStatistics().getCoveredPositivesCount() <= 0) {
View Full Code Here

    if (rules.isEmpty())
      return rules;

    List<TextRulerStatisticsCollector> sums = new ArrayList<TextRulerStatisticsCollector>();
    for (TrabalRule each : rules) {
      sums.add(new TextRulerStatisticsCollector());
    }
    List<TextRulerExampleDocument> goldDocs;
    List<TextRulerExampleDocument> additionalDocs;
    goldDocs = documents.getDocuments();
    additionalDocs = additionalDocuments.getDocuments();
    CAS theTestCAS = getTestCAS();
    int counter = 0;
    for (TrabalRule rule : rules) {
      counter++;
      String ruleString = rule.getRuleString();
      String ruleInfo = getRuleInfo(rule);
      System.out.println("testing: " + ruleString);
      if (inducedRules.containsKey(ruleString)) {
        rule.setCoveringStatistics(inducedRules.get(ruleString));
        System.out.println("skipped with " + inducedRules.get(ruleString));
      } else {
        for (int i = 0; i < goldDocs.size(); i++) {
          TextRulerExampleDocument goldDoc = goldDocs.get(i);
          TextRulerExampleDocument additionalDoc = additionalDocs.get(i);
          sendStatusUpdateToDelegate("Testing " + ruleSet + ruleInfo + " on document " + (i + 1)
                  + " of " + goldDocs.size() + " : rule " + counter + " of " + rules.size(),
                  TextRulerLearnerState.ML_RUNNING, false);
          TextRulerStatisticsCollector sumC = new TextRulerStatisticsCollector();
          prepareTestCas(theTestCAS, goldDoc, additionalDoc);
          testRuleOnDocument(rule, goldDoc, additionalDoc, sumC, theTestCAS);
          sums.get(counter - 1).add(sumC);
          int n = sumC.getCoveredNegativesCount();
          int p = sumC.getCoveredPositivesCount();
          int pnorm = p;
          if (pnorm == 0) {
            pnorm = 1;
          }
          if (n / pnorm > maxErrorRate) {
            System.out.println("stopped:" + sumC);
            break;
          }

          if (shouldAbort())
            return rules;
        }
        TextRulerStatisticsCollector c = sums.get(counter - 1);
        rule.setCoveringStatistics(sums.get(counter - 1));
        inducedRules.put(ruleString, c);
      }
    }
    for (int ruleIndex = 0; ruleIndex < rules.size(); ruleIndex++) {
View Full Code Here

    if (shouldAbort())
      return;
    String rStr = getAnnotationRulesString(target.getSingleSlotTypeName());
    for (TextRulerExampleDocument doc : exampleDocuments.getDocuments()) {
      CAS processedCAS = applyScriptOnDocument(rStr, doc, target);
      TextRulerStatisticsCollector scriptStatistics = new TextRulerStatisticsCollector();
      compareOriginalDocumentWithTestCAS(doc, processedCAS, target, scriptStatistics,
              collectNegativeCoveredInstancesWhenTesting());
      for (KEPRule cRule : correctionRules.get(target.getSingleSlotTypeName())) {
        if (shouldAbort())
          break;
        if (cRule.getCoveringStatistics() == null) {
          cRule.setCoveringStatistics(new TextRulerStatisticsCollector());
        }
        processedCAS = applyScriptOnDocument(rStr, doc, target);
        TextRulerStatisticsCollector correctedStats = new TextRulerStatisticsCollector();
        testRuleOnDocument(cRule, doc, correctedStats, processedCAS);
        for (TextRulerExample ex : scriptStatistics.getCoveredNegativeExamples()) {
          if (!correctedStats.getCoveredNegativeExamples().contains(ex)) {
            cRule.getCoveringStatistics().addCoveredNegative(ex);
          }
        }
        for (TextRulerExample ex : scriptStatistics.getCoveredPositiveExamples()) {
          if (!correctedStats.getCoveredPositiveExamples().contains(ex)) {
            cRule.getCoveringStatistics().addCoveredPositive(ex);
          }
        }
        cRule.getCoveringStatistics().reflectCountsFromCoveredExamples();
      }
View Full Code Here

          if (containsWordConstraint)
            break;
        }
      if (!containsWordConstraint) {
        // and calculate intersection of coverings:
        TextRulerStatisticsCollector newCovering;
        if (currentCovering != null)
          newCovering = getCoveringIntersection(currentCovering,
                  candidateRule.getCoveringStatistics());
        else
          newCovering = candidateRule.getCoveringStatistics();

        // prune all rules that go below our minCoveredPositives
        // threshold!
        if (newCovering.getCoveredPositivesCount() >= minCoveredPositives) {
          // add rule to configuration tuple
          currentRuleTuple.add(candidateRule);

          if (!recursiveCreateAllRuleCombinations(startRules, ctxStartRules, index + 1,
                  currentRuleTuple, newCovering, debugRuleCollector))
View Full Code Here

  }

  protected TextRulerStatisticsCollector getCoveringIntersection(
          final TextRulerStatisticsCollector c1, final TextRulerStatisticsCollector c2) {
    // calculate intersections of coverings:
    TextRulerStatisticsCollector resultC = new TextRulerStatisticsCollector(c1);

    resultC.getCoveredPositiveExamples().retainAll(c2.getCoveredPositiveExamples());
    resultC.getCoveredNegativeExamples().retainAll(c2.getCoveredNegativeExamples());
    resultC.reflectCountsFromCoveredExamples();

    return resultC;
  }
View Full Code Here

      // TextRulerToolkit.log("RULE: "+rule.getRuleString());
      // testRuleOnTrainingsSet(rule, exampleDocuments.getDocuments());

      // this rule has to at least cover its seed example!!
      TextRulerStatisticsCollector c = new TextRulerStatisticsCollector();
      c.addCoveredPositive(example);
      rule.setCoveringStatistics(c);
      slotRules.add(rule);
    }
    initialRuleBaseSize = slotRules.size();
  }
View Full Code Here

              + RutaEngine.SCRIPT_FILE_EXTENSION));
      FileUtils.saveString2File(allRulesContent, new File(getTempRulesFileName()));

      CAS testCAS = getTestCAS();
      for (TextRulerExampleDocument doc : sortedDocs) {
        TextRulerStatisticsCollector c = new TextRulerStatisticsCollector();
        doc.resetAndFillTestCAS(testCAS, target);
        CAS docCAS = doc.getCAS();
        ae.process(testCAS);
        compareOriginalDocumentWithTestCAS(doc, testCAS, target, c, true); // test whole ruleset and
        // collect negative
        // examples

        // now we have some covered positive examples that are good, and
        // maybe some negative examples
        // for that we might create Correction Rules... in order to do
        // that we have to create
        // ShiftExamples and map negative examples (incorrect inserted
        // boundaries) with a specific
        // distance to an original positive example...

        // TODO should that be done in both directions ? left and right
        // ?! what happes if we
        // find two potential examples, one left, one right ? --> for
        // now: use the nearer one. if
        // exactly the same distance, use the one where the wrong tag
        // would be IN the slot filler!
        List<TextRulerExample> correctTags = doc.getPositiveExamples();
        List<TextRulerExample> wrongTags = new ArrayList<TextRulerExample>(
                c.getCoveredNegativeExamples());
        List<TextRulerShiftExample> newExamples = new ArrayList<TextRulerShiftExample>();
        for (TextRulerExample wrongTag : wrongTags) {
          // test, if there's a corresponding positive example
          // somewhere around (within maxDistance)
          List<AnnotationFS> left = TextRulerToolkit.getAnnotationsBeforePosition(docCAS, wrongTag
View Full Code Here

          if (containsWordConstraint)
            break;
        }
      if (!containsWordConstraint) {
        // and calculate intersection of coverings:
        TextRulerStatisticsCollector newCovering;
        if (currentCovering != null)
          newCovering = getCoveringIntersection(currentCovering,
                  candidateRule.getCoveringStatistics());
        else
          newCovering = candidateRule.getCoveringStatistics();

        // prune all rules that go below our minCoveredPositives
        // threshold!
        if (newCovering.getCoveredPositivesCount() >= minCoveredPositives) {
          // add rule to configuration tuple
          currentRuleTuple.add(candidateRule);

          if (!recursiveCreateAllRuleCombinations(startRules, ctxStartRules, index + 1,
                  currentRuleTuple, newCovering, debugRuleCollector))
View Full Code Here

TOP

Related Classes of org.apache.uima.ruta.textruler.core.TextRulerStatisticsCollector

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.