Package org.languagetool

Examples of org.languagetool.AnalyzedTokenReadings


  }

  private AnalyzedTokenReadings setAndAnnotate(final AnalyzedTokenReadings oldReading, final AnalyzedToken newReading) {
    final String old = oldReading.toString();
    final String prevAnot = oldReading.getHistoricalAnnotations();
    final AnalyzedTokenReadings newAtr = new AnalyzedTokenReadings(oldReading.getReadings(),
            oldReading.getStartPos());
    newAtr.setWhitespaceBefore(oldReading.isWhitespaceBefore());
    newAtr.addReading(newReading);
    newAtr.setHistoricalAnnotations(
            annotateToken(prevAnot, old, newAtr.toString()));
    return newAtr;
  }
View Full Code Here


    final List<String> l = Arrays.asList("inflectedform3");
    final List<AnalyzedTokenReadings> analyzedTokenReadings = tagger.tag(l);
    assertNotNull(analyzedTokenReadings);
    assertEquals(1, analyzedTokenReadings.size());

    final AnalyzedTokenReadings analyzedTokenReading = analyzedTokenReadings.get(0);
    assertEquals("inflectedform3", analyzedTokenReading.getToken());
    assertNotNull(analyzedTokenReading.getReadings());
    assertEquals(4, analyzedTokenReading.getReadingsLength());

    AnalyzedToken analyzedToken;

    analyzedToken = analyzedTokenReading.getReadings().get(0);
    assertEquals("inflectedform3", analyzedToken.getToken());
    assertEquals("lemma3a", analyzedToken.getLemma());
    assertEquals("POS3a", analyzedToken.getPOSTag());

    analyzedToken = analyzedTokenReading.getReadings().get(1);
    assertEquals("inflectedform3", analyzedToken.getToken());
    assertEquals("lemma3b", analyzedToken.getLemma());
    assertEquals("POS3b", analyzedToken.getPOSTag());

    analyzedToken = analyzedTokenReading.getReadings().get(2);
    assertEquals("inflectedform3", analyzedToken.getToken());
    assertEquals("lemma3c", analyzedToken.getLemma());
    assertEquals("POS3c", analyzedToken.getPOSTag());

    analyzedToken = analyzedTokenReading.getReadings().get(3);
    assertEquals("inflectedform3", analyzedToken.getToken());
    assertEquals("lemma3d", analyzedToken.getLemma());
    assertEquals("POS3d", analyzedToken.getPOSTag());
  }
View Full Code Here

  public void testMultiplePOS() throws Exception {
    final List<String> l = Arrays.asList("inflectedform2");
    final List<AnalyzedTokenReadings> analyzedTokenReadings = tagger.tag(l);
    assertNotNull(analyzedTokenReadings);
    assertEquals(1, analyzedTokenReadings.size());
    final AnalyzedTokenReadings analyzedTokenReading = analyzedTokenReadings.get(0);
    assertEquals("inflectedform2", analyzedTokenReading.getToken());
    assertNotNull(analyzedTokenReading.getReadings());
    assertEquals(3,analyzedTokenReading.getReadingsLength());
    AnalyzedToken analyzedToken;

    analyzedToken = analyzedTokenReading.getReadings().get(0);
    assertEquals("POS1a", analyzedToken.getPOSTag());
    assertEquals("inflectedform2", analyzedToken.getToken());
    assertEquals("lemma2", analyzedToken.getLemma());

    analyzedToken = analyzedTokenReading.getReadings().get(1);
    assertEquals("POS1b", analyzedToken.getPOSTag());
    assertEquals("inflectedform2", analyzedToken.getToken());
    assertEquals("lemma2", analyzedToken.getLemma());

    analyzedToken = analyzedTokenReading.getReadings().get(2);
    assertEquals("POS1c", analyzedToken.getPOSTag());
    assertEquals("inflectedform2", analyzedToken.getToken());
    assertEquals("lemma2", analyzedToken.getLemma());
  }
View Full Code Here

    final List<String> l = Arrays.asList("inflectedform2", "inflectedform3");
    final List<AnalyzedTokenReadings> analyzedTokenReadings = tagger.tag(l);
    assertNotNull(analyzedTokenReadings);
    assertEquals(2, analyzedTokenReadings.size());

    AnalyzedTokenReadings analyzedTokenReading;

    analyzedTokenReading = analyzedTokenReadings.get(0);
    assertEquals("inflectedform2", analyzedTokenReading.getToken());
    assertNotNull(analyzedTokenReading.getReadings());
    assertEquals(3,analyzedTokenReading.getReadingsLength());
    // analyzedTokenReading.getReadings are tested by #testMultipleLemma()

    analyzedTokenReading = analyzedTokenReadings.get(1);
    assertEquals("inflectedform3", analyzedTokenReading.getToken());
    assertNotNull(analyzedTokenReading.getReadings());
    assertEquals(4, analyzedTokenReading.getReadingsLength());
    // analyzedTokenReading.getReadings are tested by #testMultiplePOS() 
  }
View Full Code Here

        }
      }
      if (l.isEmpty()) {
        l.add(new AnalyzedToken(word, null, null));
      }
      tokenReadings.add(new AnalyzedTokenReadings(l, pos));
      pos += word.length();
    }

    return tokenReadings;
  }
View Full Code Here

    return tokenReadings;
  }

  @Override
  public AnalyzedTokenReadings createNullToken(String token, int startPos) {
    return new AnalyzedTokenReadings(new AnalyzedToken(token, null, null),
            startPos);
  }
View Full Code Here

  public RuleMatch[] match(final AnalyzedSentence sentence) {
    final List<RuleMatch> ruleMatches = new ArrayList<>();
    final AnalyzedTokenReadings[] tokens = sentence.getTokensWithoutWhitespace();
   
    if (tokens.length > 3) {
      final AnalyzedTokenReadings analyzedToken = tokens[1];
      final String token = analyzedToken.getToken();
      // avoid "..." etc. to be matched:
      boolean isWord = true;
      if (token.length() == 1) {
        final char c = token.charAt(0);
        if (!Character.isLetter(c)) {
          isWord = false;
        }
      }
     
      if (isWord && lastToken.equals(token)
          && !isException(token) && !isException(tokens[2].getToken()) && !isException(tokens[3].getToken())) {
        final String shortMsg;
        if (isAdverb(analyzedToken)) {
          shortMsg = messages.getString("desc_repetition_beginning_adv");
        } else if (beforeLastToken.equals(token)) {
          shortMsg = messages.getString("desc_repetition_beginning_word");
        } else {
          shortMsg = "";
        }
         
        if (!shortMsg.equals("")) {
          final String msg = shortMsg + " " + messages.getString("desc_repetition_beginning_thesaurus");
          final int startPos = analyzedToken.getStartPos();
          final int endPos = startPos + token.length();
          final RuleMatch ruleMatch = new RuleMatch(this, startPos, endPos, msg, shortMsg);
          ruleMatches.add(ruleMatch);
        }
      }
View Full Code Here

  public MatchState(Match match, Synthesizer synthesizer) {
    this.match = match;
    this.synthesizer = synthesizer;
    final String lemma = match.getLemma();
    if (!StringUtils.isEmpty(lemma)) {
      formattedToken = new AnalyzedTokenReadings(new AnalyzedToken(lemma, match.getPosTag(), lemma), 0);
    }
  }
View Full Code Here

            annotateChange(whTokens[position], prevValue, prevAnot);
          }
        }
      } else if (!StringTools.isEmpty(disambiguatedPOS)) { // negative filtering
        Pattern p = Pattern.compile(disambiguatedPOS);
        AnalyzedTokenReadings tmp = new AnalyzedTokenReadings(whTokens[fromPos].getReadings(),
            whTokens[fromPos].getStartPos());
        for (AnalyzedToken analyzedToken : tmp) {
          if (analyzedToken.getPOSTag() != null) {
            final Matcher mPos = p.matcher(analyzedToken.getPOSTag());
            if (mPos.matches()) {
              final int position = sentence.getOriginalPosition(firstMatchToken + correctedStPos);
              final String prevValue = whTokens[position].toString();
              final String prevAnot = whTokens[position].getHistoricalAnnotations();
              whTokens[position].removeReading(analyzedToken);
              annotateChange(whTokens[position], prevValue, prevAnot);
            }
          }
        }
      }
      break;
    case ADD:
      if (newTokenReadings != null) {
        if (newTokenReadings.length == matchingTokensWithCorrection
            - startPositionCorrection + endPositionCorrection) {
          for (int i = 0; i < newTokenReadings.length; i++) {
            final String token;
            final int position = sentence.getOriginalPosition(firstMatchToken + correctedStPos + i);
            if ("".equals(newTokenReadings[i].getToken())) { // empty token
              token = whTokens[position].getToken();
            } else {
              token = newTokenReadings[i].getToken();
            }
            final String lemma;
            if (newTokenReadings[i].getLemma() == null) { // empty lemma
              lemma = token;
            } else {
              lemma = newTokenReadings[i].getLemma();
            }
            final AnalyzedToken newTok = new AnalyzedToken(token,
                newTokenReadings[i].getPOSTag(), lemma);
            final String prevValue = whTokens[position].toString();
            final String prevAnot = whTokens[position].getHistoricalAnnotations();
            whTokens[position].addReading(newTok);
            annotateChange(whTokens[position], prevValue, prevAnot);
          }
        }
      }
      break;
    case FILTERALL:
      for (int i = 0; i < matchingTokensWithCorrection - startPositionCorrection + endPositionCorrection; i++) {
        final int position = sentence.getOriginalPosition(firstMatchToken + correctedStPos + i);
        Element myEl;
        if (elementsMatched.get(i + startPositionCorrection)) {
          myEl = rule.getPatternElements().get(i + startPositionCorrection);
        } else {
          int k = 1;
          while (i + startPositionCorrection + k < rule.getPatternElements().size() + endPositionCorrection &&
              !elementsMatched.get(i + startPositionCorrection + k)) {
            k++;
          }
         //FIXME: this is left to see whether this fails anywhere
         assert(i + k + startPositionCorrection < rule.getPatternElements().size());
         myEl = rule.getPatternElements().get(i + k + startPositionCorrection);
        }
        final Match tmpMatchToken = new Match(myEl.getPOStag(), null,
            true,
            myEl.getPOStag(),
            null, Match.CaseConversion.NONE, false, false,
            Match.IncludeRange.NONE);

        MatchState matchState = tmpMatchToken.createState(rule.getLanguage().getSynthesizer(), whTokens[position]);
        final String prevValue = whTokens[position].toString();
        final String prevAnot = whTokens[position].getHistoricalAnnotations();
        whTokens[position] = matchState.filterReadings();
        annotateChange(whTokens[position], prevValue, prevAnot);
      }
      break;
    case IMMUNIZE:
      for (int i = 0; i < matchingTokensWithCorrection - startPositionCorrection + endPositionCorrection; i++) {
        whTokens[sentence.getOriginalPosition(firstMatchToken + correctedStPos + i)].immunize();
      }
      break;
    case IGNORE_SPELLING:
      for (int i = 0; i < matchingTokensWithCorrection - startPositionCorrection + endPositionCorrection; i++) {
        whTokens[sentence.getOriginalPosition(firstMatchToken + correctedStPos + i)].ignoreSpelling();
      }
      break;
    case FILTER:
      if (matchElement == null) { // same as REPLACE if using <match>
        final Match tmpMatchToken = new Match(disambiguatedPOS, null,
            true, disambiguatedPOS, null,
            Match.CaseConversion.NONE, false, false,
            Match.IncludeRange.NONE);
        boolean newPOSmatches = false;

        // only apply filter rule when it matches previous tags:
        for (int i = 0; i < whTokens[fromPos].getReadingsLength(); i++) {
          if (!whTokens[fromPos].getAnalyzedToken(i).hasNoTag() &&
              whTokens[fromPos].getAnalyzedToken(i).getPOSTag().matches(disambiguatedPOS)) {
            newPOSmatches = true;
            break;
          }
        }
        if (newPOSmatches) {
          final MatchState matchState = tmpMatchToken.createState(rule.getLanguage().getSynthesizer(), whTokens[fromPos]);
          final String prevValue = whTokens[fromPos].toString();
          final String prevAnot = whTokens[fromPos].getHistoricalAnnotations();
          whTokens[fromPos] = matchState.filterReadings();
          annotateChange(whTokens[fromPos], prevValue, prevAnot);
        }
        break;
      }
      //fallthrough
    case REPLACE:
    default:
        if (newTokenReadings != null && newTokenReadings.length > 0) {
          if (newTokenReadings.length == matchingTokensWithCorrection - startPositionCorrection + endPositionCorrection) {
            for (int i = 0; i < newTokenReadings.length; i++) {
              final String token;
              final int position = sentence.getOriginalPosition(firstMatchToken + correctedStPos + i);
              if ("".equals(newTokenReadings[i].getToken())) { // empty token
                token = whTokens[position].getToken();
              } else {
                token = newTokenReadings[i].getToken();
              }
              final String lemma;
              if (newTokenReadings[i].getLemma() == null) { // empty lemma
                lemma = token;
              } else {
                lemma = newTokenReadings[i].getLemma();
              }
              final AnalyzedToken analyzedToken = new AnalyzedToken(token, newTokenReadings[i].getPOSTag(), lemma);
              final AnalyzedTokenReadings toReplace = new AnalyzedTokenReadings(
                  analyzedToken,
                  whTokens[fromPos].getStartPos());
              whTokens[position] = replaceTokens(
                  whTokens[position], toReplace);
            }
          }
        } else if (matchElement == null) {
          String lemma = "";
          for (AnalyzedToken analyzedToken : whTokens[fromPos]) {
            if (analyzedToken.getPOSTag() != null
                && analyzedToken.getPOSTag().equals(disambiguatedPOS) && analyzedToken.getLemma() != null) {
              lemma = analyzedToken.getLemma();
            }
          }
          if (StringTools.isEmpty(lemma)) {
            lemma = whTokens[fromPos].getAnalyzedToken(0).getLemma();
          }

          final AnalyzedToken analyzedToken = new AnalyzedToken(whTokens[fromPos].getToken(), disambiguatedPOS, lemma);
          final AnalyzedTokenReadings toReplace = new AnalyzedTokenReadings(
              analyzedToken, whTokens[fromPos].getStartPos());
          whTokens[fromPos] = replaceTokens(whTokens[fromPos], toReplace);
        } else {
          // using the match element
          final MatchState matchElementState = matchElement.createState(rule.getLanguage().getSynthesizer(), whTokens[fromPos]);
View Full Code Here

      }
    }
    if (l.isEmpty()) {
      return formattedToken;
    }
    final AnalyzedTokenReadings anTkRead = new AnalyzedTokenReadings(
        l.toArray(new AnalyzedToken[l.size()]),
        formattedToken.getStartPos());
    anTkRead.setWhitespaceBefore(formattedToken.isWhitespaceBefore());
    if (!formattedToken.getChunkTags().isEmpty()) {
      anTkRead.setChunkTags(formattedToken.getChunkTags());
    }
    if (formattedToken.isImmunized()) {
     anTkRead.immunize();
    }
    return anTkRead;
  }
View Full Code Here

TOP

Related Classes of org.languagetool.AnalyzedTokenReadings

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.