Package org.languagetool

Examples of org.languagetool.AnalyzedToken


        formattedToken = new AnalyzedTokenReadings(new AnalyzedToken(
            matchedToken.getToken(), posTag, formattedToken.getToken()),
            matchedToken.getStartPos());
        formattedToken.setWhitespaceBefore(matchedToken.isWhitespaceBefore());
        */
        matchedToken.leaveReading(new AnalyzedToken(
            matchedToken.getToken(), posTag, formattedToken.getToken()));
        formattedToken = matchedToken;
      }
      String token = formattedToken.getToken();
      if (pRegexMatch != null && regexReplace != null) {
      /* only replace if it is something to replace*/
        token = pRegexMatch.matcher(token).replaceAll(regexReplace);
      }
      token = convertCase(token, token);
      if (posTag != null) {
        final int numRead = formattedToken.getReadingsLength();
        if (postagRegexp) {
          String targetPosTag = posTag;
          for (int i = 0; i < numRead; i++) {
            final String tst = formattedToken.getAnalyzedToken(i).getPOSTag();
            if (tst != null && pPosRegexMatch.matcher(tst).matches()) {
              targetPosTag = formattedToken.getAnalyzedToken(i).getPOSTag();
              if (posTagReplace != null) {
                targetPosTag = pPosRegexMatch.matcher(targetPosTag).replaceAll(
                    posTagReplace);
              }
              l.add(new AnalyzedToken(token, targetPosTag, formattedToken
                      .getAnalyzedToken(i).getLemma()));
              l.get(l.size() - 1).setWhitespaceBefore(formattedToken.isWhitespaceBefore());
            }
          }
          if (l.isEmpty()) {
            for (final AnalyzedToken anaTok : getNewToken(numRead, token)) {
              l.add(anaTok);
            }
          }
        } else {
          for (final AnalyzedToken anaTok : getNewToken(numRead, token)) {
            l.add(anaTok);
          }         
        }
        if (formattedToken.isSentEnd()) {
          l.add(new AnalyzedToken(formattedToken.getToken(),
            JLanguageTool.SENTENCE_END_TAGNAME,
            formattedToken.getAnalyzedToken(0).getLemma()));
        }
        if (formattedToken.isParaEnd()) {
          l.add(new AnalyzedToken(formattedToken.getToken(),
              JLanguageTool.PARAGRAPH_END_TAGNAME,
              formattedToken.getAnalyzedToken(0).getLemma()));
          }       
      }
    }
View Full Code Here


          lemma = formattedToken.getAnalyzedToken(j).getLemma();
        }
        if (StringTools.isEmpty(lemma)) {
          lemma = formattedToken.getAnalyzedToken(0).getLemma();
        }
        list.add(new AnalyzedToken(token, posTag, lemma));
        list.get(list.size() - 1).
          setWhitespaceBefore(formattedToken.isWhitespaceBefore());
      }
    }
    return list.toArray(new AnalyzedToken[list.size()]);
View Full Code Here

import org.languagetool.AnalyzedToken;

public class DutchSynthesizerTest extends TestCase {

  private final AnalyzedToken dummyToken(String tokenStr) {
    return new AnalyzedToken(tokenStr, tokenStr, tokenStr);
  }
View Full Code Here

      final int firstMatchToken, final int prevSkipNext) throws IOException {   
    boolean thisMatched = false;
    final int numberOfReadings = tokens[tokenNo].getReadingsLength();
    setupAndGroup(firstMatchToken, elem, tokens);
    for (int l = 0; l < numberOfReadings; l++) {
      final AnalyzedToken matchToken = tokens[tokenNo].getAnalyzedToken(l);
      prevMatched = prevMatched || prevSkipNext > 0 && prevElement != null
      && prevElement.isMatchedByScopeNextException(matchToken);
      if (prevMatched) {
        return false;
      }
View Full Code Here

  public List<AnalyzedTokenReadings> tag(List<String> sentenceTokens) throws IOException {
    final List<AnalyzedTokenReadings> tokenReadings = new ArrayList<>();
    int pos = 0;
    for (String word : sentenceTokens) {
      final List<AnalyzedToken> l = new ArrayList<>();
      AnalyzedToken at = asAnalyzedToken(word);
      l.add(at);
      tokenReadings.add(new AnalyzedTokenReadings(l, pos));
      pos += at.getToken().length();
    }
    return tokenReadings;
  }
View Full Code Here

    return tokenReadings;
  }

  @Override
  public final AnalyzedTokenReadings createNullToken(final String token, final int startPos) {
    return new AnalyzedTokenReadings(new AnalyzedToken(token, null, null), startPos);
  }
View Full Code Here

    return new AnalyzedTokenReadings(new AnalyzedToken(token, null, null), startPos);
  }

  @Override
  public AnalyzedToken createToken(String token, String posTag) {
    return new AnalyzedToken(token, posTag, null);
  }
View Full Code Here

    return new AnalyzedToken(token, posTag, null);
  }

  private AnalyzedToken asAnalyzedToken(final String word) {
    if (!word.contains("/")) {
      return new AnalyzedToken(" ", null, null);
    }
    String[] parts = word.split("/");
    return new AnalyzedToken(parts[0], parts[1], null);
  }
View Full Code Here

  private String synth(String word, String posTag, boolean regEx) throws IOException {
    return Arrays.toString(synthesizer.synthesize(dummyToken(word), posTag, regEx));
  }

  private AnalyzedToken dummyToken(String tokenStr) {
    return new AnalyzedToken(tokenStr, tokenStr, tokenStr);
  }
View Full Code Here

      } else {
        // word not known, try to decompose it and use the last part for POS tagging:
        if (!StringTools.isEmpty(word.trim())) {
          final List<String> compoundParts = compoundTokenizer.tokenize(word);
          if (compoundParts.size() <= 1) {
            l.add(new AnalyzedToken(word, null, null));
          } else {
            // last part governs a word's POS:
            String lastPart = compoundParts.get(compoundParts.size()-1);
            if (StringTools.startsWithUppercase(word)) {
              lastPart = StringTools.uppercaseFirstChar(lastPart);
            }
            taggerTokens = lexiconLookup(lastPart, morfologik);
            if (taggerTokens != null) {
              tagWord(taggerTokens, word, l, compoundParts);
            } else {
              l.add(new AnalyzedToken(word, null, null));
            }
          }
        } else {
          l.add(new AnalyzedToken(word, null, null));
        }
      }

      tokenReadings.add(new AnalyzedTokenReadings(l.toArray(new AnalyzedToken[l.size()]), pos));
      pos += word.length();
View Full Code Here

TOP

Related Classes of org.languagetool.AnalyzedToken

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.