Examples of org.languagetool.AnalyzedSentence

org.languagetool.AnalyzedSentence
A sentence that has been tokenized and analyzed. @author Daniel Naber

          // enable indentation use
          goodSentence = goodSentence.replaceAll("[\\n\\t]+", "");
          goodSentence = cleanXML(goodSentence);


          assertTrue(goodSentence.trim().length() > 0);
          final AnalyzedSentence sent = disambiguateUntil(rules, id,
              languageTool.getRawAnalyzedSentence(goodSentence));
          final AnalyzedSentence sentToReplace = disambiguateUntil(rules, id,
              languageTool.getRawAnalyzedSentence(goodSentence));
          //note: we're testing only if string representations are equal
          //it's because getRawAnalyzedSentence does not set all properties
          //in AnalyzedSentence, and during equal test they are set for the
          //left-hand side
          assertEquals("The untouched example (" + goodSentence + ") for " + lang.getName() +
              " rule " + id +"["+ rule.getSubId() +"] was touched!",
              sent.toString(), rule.replace(sentToReplace).toString());
        }
      }
      final List<DisambiguatedExample> examples = rule.getExamples();
      if (examples != null) {
        for (final DisambiguatedExample example : examples) {


          final String outputForms = example.getDisambiguated();
          assertTrue("No input form found for: " + id, outputForms != null);
          assertTrue(outputForms.trim().length() > 0);
          final int expectedMatchStart = example.getExample().indexOf("<marker>");
          final int expectedMatchEnd = example.getExample().indexOf("</marker>") - "<marker>".length();
          if (expectedMatchStart == -1 || expectedMatchEnd == -1) {
            fail(lang
                + ": No position markup ('<marker>...</marker>') in disambiguated example in rule " + rule);
          }
          final String inputForms = example.getAmbiguous();
          assertTrue("No input form found for: " + id, inputForms != null);
          assertTrue(inputForms.trim().length() > 0);
          assertTrue("Input and output forms for rule " + id + "are the same!",
              !outputForms.equals(inputForms));
          final AnalyzedSentence cleanInput = languageTool
              .getRawAnalyzedSentence(cleanXML(example.getExample()));
          final AnalyzedSentence sent = disambiguateUntil(rules, id,
              languageTool
              .getRawAnalyzedSentence(cleanXML(example.getExample())));
          final AnalyzedSentence disambiguatedSent = rule
              .replace(disambiguateUntil(rules, id, languageTool
                  .getRawAnalyzedSentence(cleanXML(example.getExample()))));
          assertTrue(
              "Disambiguated sentence is equal to the non-disambiguated sentence for rule: "
                  + id, !cleanInput.equals(disambiguatedSent));
          assertTrue(
              "Disambiguated sentence is equal to the input sentence for rule: "
                  + id + ". The sentence was: " + sent, !sent.equals(disambiguatedSent));
          String reading = "";
          String annotations = "";
          for (final AnalyzedTokenReadings readings : sent.getTokens()) {
            if (readings.isSentenceStart() && !inputForms.contains("<S>")) {
              continue;
            }
            if (readings.getStartPos() == expectedMatchStart) {
              final AnalyzedTokenReadings[] r = { readings };
              reading = new AnalyzedSentence(r).toShortString(",");
              annotations = readings.getHistoricalAnnotations();
              assertTrue(
                  "Wrong marker position in the example for the rule " + id,
                  readings.getStartPos() == expectedMatchStart
                  && readings.getStartPos() + readings.getToken().length() == expectedMatchEnd);
              break;
            }
          }
          assertEquals("The input form for the rule " + id + " in the example: "
              + example.toString() + " is different than expected (expected "
              + inputForms + " but got " + sortForms(reading) + "). The token has been changed by the disambiguator: " + annotations,
              inputForms, sortForms(reading));
          for (final AnalyzedTokenReadings readings : disambiguatedSent.getTokens()) {
            if (readings.isSentenceStart() && !outputForms.contains("<S>")) {
              continue;
            }
            if (readings.getStartPos() == expectedMatchStart) {
              final AnalyzedTokenReadings[] r = { readings };
              reading = new AnalyzedSentence(r).toShortString(",");
              assertTrue(readings.getStartPos() == expectedMatchStart
                  && readings.getStartPos() + readings.getToken().length() == expectedMatchEnd);
              break;
            }
          }

View Full Code Here


  // useful for testing the rule cascade
  private static AnalyzedSentence disambiguateUntil(
      final List<DisambiguationPatternRule> rules, final String ruleID,
      final AnalyzedSentence sentence) throws IOException {
    AnalyzedSentence disambiguated = sentence;
    for (final DisambiguationPatternRule rule : rules) {
      if (ruleID.equals(rule.getId())) {
        break;
      }
      disambiguated = rule.replace(disambiguated);

View Full Code Here

    // tag text
    final List<String> sentences = langTool.sentenceTokenize(textArea.getText());
    final StringBuilder sb = new StringBuilder();
    try {
      for (String sent : sentences) {
        final AnalyzedSentence analyzedText = langTool.getAnalyzedSentence(sent);
        final String analyzedTextString = StringTools.escapeHTML(analyzedText.toString(",")).
                replace("&lt;S&gt;", "&lt;S&gt;<br>").
                replace("[", "<font color='" + TAG_COLOR + "'>[").
                replace("]", "]</font><br>");
        sb.append(analyzedTextString).append('\n');
      }

View Full Code Here


class TestFrenchDisambiguator implements Disambiguator {


  @Override
  public AnalyzedSentence disambiguate(AnalyzedSentence input) throws IOException {
    AnalyzedSentence sentence = input;
    String filePath = "/disambiguator.xml";
    try (InputStream inputStream = getClass().getResourceAsStream(filePath)) {
      final DisambiguationRuleLoader ruleLoader = new DisambiguationRuleLoader();
      List<DisambiguationPatternRule> disambiguationRules = ruleLoader.getRules(inputStream);
      for (final DisambiguationPatternRule patternRule : disambiguationRules) {

View Full Code Here

   *
   * @param contents Text to tag.
   * @param lt LanguageTool instance
   */
  public static void tagText(final String contents, final JLanguageTool lt) throws IOException {
    AnalyzedSentence analyzedText;
    final List<String> sentences = lt.sentenceTokenize(contents);
    for (final String sentence : sentences) {
      analyzedText = lt.getAnalyzedSentence(sentence);
      System.out.println(analyzedText.toString());
    }
  }

View Full Code Here


    if (tokenIter == null || !tokenIter.hasNext()) {
      // there are no remaining tokens from the current sentence... are there more sentences?
      if (input.incrementToken()) {
        // a new sentence is available: process it.
        final AnalyzedSentence sentence = languageTool.getAnalyzedSentence(termAtt.toString());


        final List<AnalyzedTokenReadings> tokenBuffer = Arrays.asList(sentence.getTokens());
        tokenIter = tokenBuffer.iterator();
        /*
         * it should not be possible to have a sentence with 0 words, check just in case. returning
         * EOS isn't the best either, but it's the behavior of the original code.
         */

View Full Code Here

    assertEquals(Arrays.asList("нервах", "нерви"), matches[0].getSuggestedReplacements());
    
    assertEquals(1, rule.match(langTool.getAnalyzedSentence("в п'ятьом людям")).length);
    assertEquals(1, rule.match(langTool.getAnalyzedSentence("в понад п'ятьом людям")).length);


    AnalyzedSentence analyzedSentence = langTool.getAnalyzedSentence("завдяки їх вдалим трюкам");
    RuleMatch[] match = rule.match(analyzedSentence);
    assertEquals(1, match.length);
    List<String> suggestedReplacements = match[0].getSuggestedReplacements();
    assertTrue("Did not find «їхній»: " + suggestedReplacements, suggestedReplacements.contains("їхнім"));
  }

View Full Code Here

            }
          }
        }
      }
    }
    return new AnalyzedSentence(output);
  }

View Full Code Here

      final String sentence = doc.get(FIELD_NAME);
      final List<RuleMatch> ruleMatches = languageTool.check(sentence);
      if (ruleMatches.size() > 0) {
        final String source = doc.get(SOURCE_FIELD_NAME);
        final String title = doc.get(Indexer.TITLE_FIELD_NAME);
        final AnalyzedSentence analyzedSentence = languageTool.getAnalyzedSentence(sentence);
        final MatchingSentence matchingSentence = new MatchingSentence(sentence, source, title, analyzedSentence, ruleMatches);
        matchingSentences.add(matchingSentence);
      }
    }
    return matchingSentences;

View Full Code Here

   * @since 1.0.1
   */
  public static List<RuleMatch> checkBitext(final String src, final String trg,
                                            final JLanguageTool srcLt, final JLanguageTool trgLt,
                                            final List<BitextRule> bRules) throws IOException {
    final AnalyzedSentence srcText = srcLt.getAnalyzedSentence(src);
    final AnalyzedSentence trgText = trgLt.getAnalyzedSentence(trg);
    final List<RuleMatch> ruleMatches = trgLt.checkAnalyzedSentence(JLanguageTool.ParagraphHandling.NORMAL,
            trgLt.getAllRules(), 0, 0, 1, trg, trgText, null);
    for (BitextRule bRule : bRules) {
      final RuleMatch[] curMatch = bRule.match(srcText, trgText);
      if (curMatch != null) {

View Full Code Here

0 1 2 3

TOP

Related Classes of org.languagetool.AnalyzedSentence

org.languagetool.chunking.EnglishChunkerTest

org.languagetool.commandline.CommandLineTools

org.languagetool.dev.index.LanguageToolFilter

org.languagetool.dev.index.Searcher

org.languagetool.dev.POSTagLanguageModel

org.languagetool.dev.SuggestionExtractorTool

org.languagetool.gui.Main

org.languagetool.rules.bitext.BitextPatternRuleTest

org.languagetool.rules.bitext.FalseFriendsAsBitextLoaderTest

org.languagetool.rules.de.CaseRuleTest

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.