Package org.apache.uima.ruta.textruler.core

Examples of org.apache.uima.ruta.textruler.core.TextRulerRuleList


    exampleDocuments.createExamplesForTarget(target);
    examples = exampleDocuments.getAllPositiveExamples();

    if (shouldAbort())
      return null;
    TextRulerRuleList bestRulesPool = new TextRulerRuleList();
    TextRulerRuleList contextRulesPool = new TextRulerRuleList();
    String slotName = target.getSingleSlotRawTypeName();
    bestRulesPoolMap.put(slotName, bestRulesPool);
    contextRulesPoolMap.put(slotName, contextRulesPool);

    coveredExamples = new HashSet<TextRulerExample>();
    int roundNumber = 0;
    for (TextRulerExample e : examples)
      if (!coveredExamples.contains(e)) {
        if (shouldAbort())
          break;
        roundNumber++;
        currentBestRules = new LP2CurrentBestRulesQueue(maxCurrentBestRulesCount);
        currentContextualRules = new LP2CurrentBestRulesQueue(maxCurrentContextualRulesCount);
        // TextRulerToolkit.log("Example: "+e.getAnnotation().getBegin()+" : "+e.getAnnotation().getEnd());

        induceRulesFromExample(e, roundNumber);

        // TextRulerToolkit.log("Best Rules from this Seed: "+currentBestRules.size());
        // if (TextRulerToolkit.DEBUG && currentBestRules.size()>1)
        // {
        // for (TextRulerRule r : currentBestRules)
        // {
        // TextRulerToolkit.log("\tp="+r.getCoveringStatistics().getCoveredPositivesCount()+"; n="+r.getCoveringStatistics().getCoveredNegativesCount()+";  "+r.getRuleString());
        // for (TextRulerExample ex :
        // r.getCoveringStatistics().getCoveredPositiveExamples())
        // {
        // TextRulerToolkit.log("\t\te="+ex.getAnnotation().getBegin());
        //
        // }
        // }
        // }
        for (LP2Rule bestRule : currentBestRules) {
          addToFinalBestRulesPool(bestRule);
        }
        for (LP2Rule ctxRule : currentContextualRules) {
          addToFinalContextRulesPool(ctxRule);
        }
        sendStatusUpdateToDelegate("New Rules added.", TextRulerLearnerState.ML_RUNNING, true);
      }
    if (TextRulerToolkit.DEBUG) {
      bestRulesPool.saveToRulesFile(getIntermediateRulesFileName(), getFileHeaderString(true));
      // for (TextRulerRule r : bestRulesPool)
      // {
      // TextRulerToolkit.log("p="+r.getCoveringStatistics().getCoveredPositivesCount()+"; n="+r.getCoveringStatistics().getCoveredNegativesCount()+";  "+r.getRuleString());
      // }
    }

    TextRulerRuleList result = bestRulesPool;
    if (contextualRules != null)
      for (TextRulerRule r : contextRulesPool)
        contextualRules.add(r);
    return result;
  }
View Full Code Here


      return;
    slotMaximumTokenCountMap.put(slotName, histogram.size() - 1); // -1 since the
    // zero-histogram point
    // also needs a place!

    TextRulerRuleList ctxRules = new TextRulerRuleList();
    TextRulerRuleList bestRules = learnTaggingRules(new TextRulerTarget(slotName,
            MLTargetType.SINGLE_LEFT_BOUNDARY, this), ctxRules); // learn
    // left
    // boundary
    // best
    // rules
    if (bestRules != null) {
      leftBoundaryBestRulesMap.put(slotName, bestRules.getRulesString(""));
      leftBoundaryContextualRulesMap.put(slotName, ctxRules.getRulesString("\t"));
      bestRules.clear(); // free som memory/references
    }
    if (shouldAbort())
      return;
    ctxRules.clear();
    bestRules = learnTaggingRules(new TextRulerTarget(slotName, MLTargetType.SINGLE_RIGHT_BOUNDARY,
            this), ctxRules); // learn
    // right
    // boundary best
    // rules
    if (bestRules != null) {
      rightBoundaryBestRulesMap.put(slotName, bestRules.getRulesString(""));
      rightBoundaryContextualRulesMap.put(slotName, ctxRules.getRulesString("\t"));
    }

    // TODO add correction rule learn stuff
    // testTaggingRulesAndCreateCorrectionRulesExamples(null, STANDARD_MAX_CONTEXTUAL_RULES_COUNT)

    // correct left start
    TextRulerTarget lsTarget = new TextRulerTarget(slotName, MLTargetType.SINGLE_LEFT_CORRECTION,
            this);
    lsTarget.setMaxShiftDistance(shiftSize);
    TextRulerRuleList correctLeftRules = learnTaggingRules(lsTarget, null);

    // resultString = "CAP{REGEXP(\"PM\")} ALL{->MARKONCE(stimeEND)};";
    // try {
    // FileUtils.saveString2File(resultString, file);
    // } catch (IOException e) {
View Full Code Here

      String leftBoundaryBestRulesString = leftBoundaryBestRulesMap.get(eachSlot);
      String rightBoundaryBestRulesString = rightBoundaryBestRulesMap.get(eachSlot);
      String leftBoundaryContextualRulesString = leftBoundaryContextualRulesMap.get(eachSlot);
      String rightBoundaryContextualRulesString = rightBoundaryContextualRulesMap.get(eachSlot);
      TextRulerRuleList bestRulesPool = bestRulesPoolMap.get(eachSlot);
      TextRulerRuleList contextRulesPool = contextRulesPoolMap.get(eachSlot);

      sb.append("\n// Slot: " + TextRulerToolkit.getTypeShortName(eachSlot) + "\n");
      sb.append("// LEFT BOUNDARY RULES:\n");
      if (leftBoundaryBestRulesString != null) {
        sb.append(leftBoundaryBestRulesString);
        sb.append("\n// RIGHT BOUNDARY RULES:\n");
        if (rightBoundaryBestRulesString != null)
          sb.append(rightBoundaryBestRulesString);
        else if (bestRulesPool != null)
          sb.append(bestRulesPool.getRulesString(""));

        sb.append("\nBLOCK(contextualRules_" + TextRulerToolkit.getTypeShortName(eachSlot)
                + ") Document{} {\n"
                + "\tDocument{->ASSIGN(redoContextualRules, false)}; // reset flag\n");
        sb.append("\n\t// LEFT BOUNDARY CONTEXTUAL RULES:\n");
        sb.append(leftBoundaryContextualRulesString);

        sb.append("\n\t// RIGHT BOUNDARY CONTEXTUAL RULES:\n");
        if (rightBoundaryBestRulesString != null)
          sb.append(rightBoundaryContextualRulesString);
        else if (contextRulesPool != null)
          sb.append(contextRulesPool.getRulesString("\t"));

        sb.append("\n\t//Document{IF(redoContextualRules)->CALL(thisFile.contextualRules_"
                + TextRulerToolkit.getTypeShortName(eachSlot) + ")};\n}\n");
      } else if (bestRulesPool != null) {
        sb.append(bestRulesPool.getRulesString(""));
        sb.append("\n\t// LEFT BOUNDARY CONTEXTUAL RULES:\n");
        if (contextRulesPool != null)
          sb.append(contextRulesPool.getRulesString(""));
      }
    }

    for (String eachSlot : slotNames) {
      String leftBoundary = TextRulerToolkit.getTypeShortName((new TextRulerTarget(eachSlot,
View Full Code Here

      examples = exampleDocuments.getAllPositiveExamples();

      if (shouldAbort())
        return;

      slotRules = new TextRulerRuleList();
      ruleList = new RapierRulePriorityQueue(ruleListSize);

      TextRulerToolkit.log("--- RAPIER START for Slot " + currentSlotName);

      sendStatusUpdateToDelegate("Creating initial rule base...",
View Full Code Here

    // this is the inner loop of the WHISK pseudo-code:
    // For each inst in Training
    // for each tag

    cachedTestedRuleStatistics.clear();
    ruleList = new TextRulerRuleList();
    coveredExamples = new HashSet<TextRulerExample>();

    sendStatusUpdateToDelegate("Creating examples...", TextRulerLearnerState.ML_RUNNING, false);
    TextRulerTarget target = new TextRulerTarget(slotNames[0], this); // only
    // single-slot-target
View Full Code Here

    // this is the inner loop of the WHISK pseudo-code:
    // For each inst in Training
    // for each tag

    cachedTestedRuleStatistics.clear();
    ruleList = new TextRulerRuleList();
    coveredExamples = new HashSet<TextRulerExample>();

    sendStatusUpdateToDelegate("Creating examples...", TextRulerLearnerState.ML_RUNNING, false);
    for (int i = 0; i < slotNames.length; i++) {
      TextRulerTarget target = new TextRulerTarget(slotNames[i], this);
View Full Code Here

  @Override
  protected TextRulerRuleList learnTaggingRules(TextRulerTarget target,
          TextRulerRuleList contextualRules) {
    cachedTestedStartRuleStatisticsHitCounter = 0;
    cachedTestedStartRuleStatistics.clear();
    TextRulerRuleList result = super.learnTaggingRules(target, contextualRules);
    TextRulerToolkit
            .log("[OptimizedLP2.learnTaggingRules] saved rule testings due to start rule results cache: "
                    + cachedTestedStartRuleStatisticsHitCounter);
    TextRulerToolkit.log("[OptimizedLP2.learnTaggingRules] cacheSize at end of induction: "
            + cachedTestedStartRuleStatistics.size());
View Full Code Here

      examples = exampleDocuments.getAllPositiveExamples();

      if (shouldAbort())
        return;

      slotRules = new TextRulerRuleList();
      ruleList = new RapierRulePriorityQueue(ruleListSize);

      TextRulerToolkit.log("--- RAPIER START for Slot " + currentSlotName);

      sendStatusUpdateToDelegate("Creating initial rule base...",
View Full Code Here

    // this is the inner loop of the WHISK pseudo-code:
    // For each inst in Training
    // for each tag

    cachedTestedRuleStatistics.clear();
    ruleList = new TextRulerRuleList();
    coveredExamples = new HashSet<TextRulerExample>();

    sendStatusUpdateToDelegate("Creating examples...", TextRulerLearnerState.ML_RUNNING, false);
    for (int i = 0; i < slotNames.length; i++) {
      TextRulerTarget target = new TextRulerTarget(slotNames[i], this);
View Full Code Here

    // this is the inner loop of the WHISK pseudo-code:
    // For each inst in Training
    // for each tag

    cachedTestedRuleStatistics.clear();
    ruleList = new TextRulerRuleList();
    coveredExamples = new HashSet<TextRulerExample>();

    sendStatusUpdateToDelegate("Creating examples...", TextRulerLearnerState.ML_RUNNING, false);
    TextRulerTarget target = new TextRulerTarget(slotNames[0], this); // only
    // single-slot-target
View Full Code Here

TOP

Related Classes of org.apache.uima.ruta.textruler.core.TextRulerRuleList

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.