Examples of org.languagetool.AnalyzedToken

org.languagetool.AnalyzedToken
A word (or punctuation, or whitespace) and its part-of-speech tag. @author Daniel Naber

    return new AnalyzedTokenReadings(new AnalyzedToken(token, null, null), startPos);
  }


  @Override
  public AnalyzedToken createToken(String token, String posTag) {
    return new AnalyzedToken(token, posTag, null);
  }

View Full Code Here

    final List<AnalyzedTokenReadings> tokenReadings = new ArrayList<>();
    for (String word : sentenceTokens) {
      final List<AnalyzedToken> l = new ArrayList<>();
      // a real tagger would need to assign a POS tag
      // in the next line instead of null:
      l.add(new AnalyzedToken(word, null, null));      
      tokenReadings.add(new AnalyzedTokenReadings(l, 0));
    }
    return tokenReadings;
  }

View Full Code Here

    return tokenReadings;
  }


  @Override
  public AnalyzedTokenReadings createNullToken(String token, int startPos) {
    return new AnalyzedTokenReadings(new AnalyzedToken(token, null, null), startPos);
  }

View Full Code Here

    return new AnalyzedTokenReadings(new AnalyzedToken(token, null, null), startPos);
  }


  @Override
  public AnalyzedToken createToken(String token, String posTag) {
    return new AnalyzedToken(token, posTag, null);
  }

View Full Code Here

              if (newTokenReadings[i].getLemma() == null) { //empty lemma
                lemma = token;
              } else {
                lemma = newTokenReadings[i].getLemma();
              }
              final AnalyzedToken newTok = new AnalyzedToken(token, newTokenReadings[i].getPOSTag(), lemma);


              final String prevValue = whTokens[position].toString();
              final String prevAnot = whTokens[position].getHistoricalAnnotations();
              whTokens[position].addReading(newTok);
              annotateChange(whTokens[position], prevValue, prevAnot);
            }
          }
        }
        break;
      case FILTERALL:
        for (int i = 0; i < matchingTokens - startPositionCorrection
                + endPositionCorrection; i++) {
          final int position = text.getOriginalPosition(firstMatchToken
                  + correctedStPos + i);
          final Element myEl = patternElements.get(i+startPositionCorrection);
          final Match tmpMatchToken = new Match(myEl.getPOStag(), null,
                  true, myEl.getPOStag(), //myEl.isPOStagRegularExpression()
                  null, Match.CaseConversion.NONE, false, false,
                  Match.IncludeRange.NONE);
          tmpMatchToken.setToken(whTokens[position]);
          final String prevValue = whTokens[position].toString();
          final String prevAnot = whTokens[position]
                  .getHistoricalAnnotations();
          whTokens[position] = tmpMatchToken.filterReadings();
          annotateChange(whTokens[position], prevValue, prevAnot);
        }
        break;
      case IMMUNIZE:
        for (int i = 0; i < matchingTokens - startPositionCorrection + endPositionCorrection; i++) {
          whTokens[text.getOriginalPosition(firstMatchToken + correctedStPos + i)].immunize();
        }
        break;
      case FILTER:
        if (matchElement == null) { // same as REPLACE if using <match>
          final Match tmpMatchToken = new Match(disambiguatedPOS, null, true,
                  disambiguatedPOS, null, Match.CaseConversion.NONE,
                  false, false, Match.IncludeRange.NONE);
          tmpMatchToken.setToken(whTokens[fromPos]);
          final String prevValue = whTokens[fromPos].toString();
          final String prevAnot = whTokens[fromPos].getHistoricalAnnotations();
          whTokens[fromPos] = tmpMatchToken.filterReadings();
          annotateChange(whTokens[fromPos], prevValue, prevAnot);
          filtered = true;
        }
        // FALLTHROUGH
      case REPLACE:
      default:
        if (!filtered) {
          if (newTokenReadings != null && newTokenReadings.length > 0) {
            if (newTokenReadings.length == matchingTokens - startPositionCorrection + endPositionCorrection) {
              String lemma;
              String token;
              for (int i = 0; i < newTokenReadings.length; i++) {
                final int position = text.getOriginalPosition(firstMatchToken + correctedStPos
                        + i);
                if ("".equals(newTokenReadings[i].getToken())) { //empty token
                  token = whTokens[position].getToken();
                } else {
                  token = newTokenReadings[i].getToken();
                }
                if (newTokenReadings[i].getLemma() == null) { //empty lemma
                  lemma = token;
                } else {
                  lemma = newTokenReadings[i].getLemma();
                }
                final AnalyzedTokenReadings toReplace = new AnalyzedTokenReadings(
                        new AnalyzedToken(token, newTokenReadings[i].getPOSTag(), lemma),
                        whTokens[fromPos].getStartPos());
                whTokens[position] = replaceTokens(whTokens[position], toReplace);
              }
            }
          } else if (matchElement == null) {
            String lemma = "";
            for (int l = 0; l < numRead; l++) {
              if (whTokens[fromPos].getAnalyzedToken(l).getPOSTag() != null
                      && (whTokens[fromPos].getAnalyzedToken(l).getPOSTag().equals(
                      disambiguatedPOS) && (whTokens[fromPos].getAnalyzedToken(l)
                      .getLemma() != null))) {
                lemma = whTokens[fromPos].getAnalyzedToken(l).getLemma();
              }
            }
            if (StringTools.isEmpty(lemma)) {
              lemma = whTokens[fromPos].getAnalyzedToken(0).getLemma();
            }


            final AnalyzedTokenReadings toReplace = new AnalyzedTokenReadings(
                    new AnalyzedToken(whTokens[fromPos].getToken(), disambiguatedPOS,
                            lemma), whTokens[fromPos].getStartPos());
            whTokens[fromPos] = replaceTokens(whTokens[fromPos], toReplace);
          } else {
            // using the match element
            matchElement.setToken(whTokens[fromPos]);

View Full Code Here

      if (isLast) {
          sb.append("/");
      }
      sb.append(mFull.get(tokens));
      sb.append(">");
      final AnalyzedToken tokenStart = new AnalyzedToken(tok, sb.toString(), tokens);
      return setAndAnnotate(token, tokenStart);      
  }

View Full Code Here

    }
  }


  private void addNewWord(final String word, final String lemma,
      final String pos) {
    final AnalyzedToken newWd = new AnalyzedToken(word, pos, lemma);
    if (newWdList == null) {
      newWdList = new ArrayList<>();
    }
    newWdList.add(newWd);
  }

View Full Code Here

      for (AnalyzedToken taggerToken : taggerTokens ) {
        final String posTag = taggerToken.getPOSTag();
        if (posTag != null) {
          final Matcher m = ADJ_PART_FS.matcher(posTag);
          if (m.matches()) {
            additionalTaggedTokens.add(new AnalyzedToken(word, "RG", lowerWord));
            return additionalTaggedTokens;
          }
        }
      }
    }
    //Any well-formed verb with prefixes is tagged as a verb copying the original tags   
    Matcher matcher=PREFIXES_FOR_VERBS.matcher(word);
    if (matcher.matches()) {
      final String possibleVerb = matcher.group(2).toLowerCase();
      List<AnalyzedToken> taggerTokens;
      taggerTokens = asAnalyzedTokenList(possibleVerb, dictLookup.lookup(possibleVerb));
      for (AnalyzedToken taggerToken : taggerTokens ) {
        final String posTag = taggerToken.getPOSTag();
        if (posTag != null) {
          final Matcher m = VERB.matcher(posTag);
          if (m.matches()) {
            String lemma=matcher.group(1).toLowerCase().concat(taggerToken.getLemma());
            additionalTaggedTokens.add(new AnalyzedToken(word, posTag, lemma));
          }
        }
      }
      return additionalTaggedTokens;
    }
    // Any well-formed noun with prefix ex- is tagged as a noun copying the original tags
    if (word.startsWith("ex")) {
      final String lowerWord = word.toLowerCase(conversionLocale);
      final String possibleNoun = lowerWord.replaceAll("^ex(.+)$", "$1");
      List<AnalyzedToken> taggerTokens;
      taggerTokens = asAnalyzedTokenList(possibleNoun,dictLookup.lookup(possibleNoun));
      for (AnalyzedToken taggerToken : taggerTokens) {
        final String posTag = taggerToken.getPOSTag();
        if (posTag != null) {
          final Matcher m = NOUN.matcher(posTag);
          if (m.matches()) {
            String lemma = "ex".concat(taggerToken.getLemma());
            additionalTaggedTokens.add(new AnalyzedToken(word, posTag, lemma));
          }
        }
      }
      return additionalTaggedTokens;
    }

View Full Code Here

  private String synthNonRegex(String word, String pos) throws IOException {
    return Arrays.toString(synth.synthesize(dummyToken(word), pos, false));
  }


  private AnalyzedToken dummyToken(String tokenStr) {
    return new AnalyzedToken(tokenStr, tokenStr, tokenStr);
  }

View Full Code Here

      final List<WordData> taggerTokens = morfologik.lookup(lowerCaseWord);
      if (taggerTokens != null) {
        for (WordData wd : taggerTokens) {
          final String[] tagsArr = wd.getStem().toString().split("\\+");
          for (final String currTag : tagsArr) {
            l.add(new AnalyzedToken(word, 
                wd.getTag().toString(), currTag));
          }
        }      
      }
      if (manualTagger != null) { // add user tags, if any
        final String[] manualTags = manualTagger.lookup(lowerCaseWord);
        if (manualTags != null) {
          for (int i = 0; i < manualTags.length/2; i=i+2) {
            l.add(new AnalyzedToken(word, manualTags[i+1], manualTags[i]));
      }
        }
      }


      if (l.isEmpty()) {
        l.add(new AnalyzedToken(word, null, null));
      }      
      tokenReadings.add(new AnalyzedTokenReadings(l, pos));
      pos += word.length();
    }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.languagetool.AnalyzedToken

org.apache.commons.lang.builder.EqualsBuilder

org.apache.commons.lang.builder.HashCodeBuilder

org.languagetool.chunking.EnglishChunkerTest

org.languagetool.chunking.EnglishChunkFilterTest

org.languagetool.dev.index.LanguageToolFilter

org.languagetool.dev.index.PatternRuleQueryBuilder

org.languagetool.rules.AbstractCompoundRule

org.languagetool.rules.ca.AccentuationCheckRule

org.languagetool.rules.ca.SimpleReplaceVerbsRule

org.languagetool.rules.ConfusionProbabilityRuleTest

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.