Package org.languagetool

Examples of org.languagetool.AnalyzedTokenReadings


      final List<AnalyzedToken> l = new ArrayList<>();

      AnalyzedToken at = asAnalyzedToken(word);
      l.add(at);
      tokenReadings.add(new AnalyzedTokenReadings(l, pos));
      pos += at.getToken().length();
    }

    return tokenReadings;
  }
View Full Code Here


    return tokenReadings;
  }

  @Override
  public final AnalyzedTokenReadings createNullToken(final String token, final int startPos) {
    return new AnalyzedTokenReadings(new AnalyzedToken(token, null, null), startPos);
  }
View Full Code Here

      return false;
    }
   
    if (!precSpace && follSpace) {
      // exception for English inches, e.g., 20"
      final AnalyzedTokenReadings prevToken = tokens[i - 1];
      if ("\"".equals(tokenStr)
          && NUMBER.matcher(prevToken.getToken()).matches()) {
        return false;
      }
      // Exception for English plural Saxon genitive
      // current disambiguation scheme is a bit too greedy
      // for adjectives
      if ("'".equals(tokenStr) && tokens[i].hasPosTag("POS")) {
        return false;
      }
      // puttin' on the Ritz
      if ("'".equals(tokenStr) && prevToken.hasPosTag("VBG")
          && prevToken.getToken().endsWith("in")) {
        return false;
      }
    }
    if (precSpace && !follSpace) {
      // hold 'em!
View Full Code Here

      }
    }

    // It must clear attributes, as it is creating new tokens.
    clearAttributes();
    final AnalyzedTokenReadings tr = tokenIter.next();
    AnalyzedToken at = tr.getAnalyzedToken(0);

    // add POS tag for sentence start.
    if (tr.isSentStart()) {
      // TODO: would be needed so negated tokens can match on something (see testNegatedMatchAtSentenceStart())
      // but breaks other cases:
      //termAtt.append("SENT_START");
      typeAtt.setType("pos");
      if (toLowerCase) {
        termAtt.append(POS_PREFIX.toLowerCase() + tr.getAnalyzedToken(0).getPOSTag().toLowerCase());
      } else {
        termAtt.append(POS_PREFIX + tr.getAnalyzedToken(0).getPOSTag());
      }
      return true;
    }

    // by pass the white spaces.
    if (tr.isWhitespace()) {
      return this.incrementToken();
    }

    offsetAtt.setOffset(tr.getStartPos(), tr.getStartPos() + at.getToken().length());

    for (int i = 0; i < tr.getReadingsLength(); i++) {
      at = tr.getAnalyzedToken(i);
      if (at.getPOSTag() != null) {
        if (toLowerCase) {
          posStack.push(POS_PREFIX.toLowerCase() + at.getPOSTag().toLowerCase());
        } else {
          posStack.push(POS_PREFIX + at.getPOSTag());
        }
      }
    }

    current = captureState();
    if (toLowerCase) {
      termAtt.append(tr.getAnalyzedToken(0).getToken().toLowerCase());
    } else {
      termAtt.append(tr.getAnalyzedToken(0).getToken());
    }

    return true;

  }
View Full Code Here

          break;
        }
      }
      if (unified) {
        if (tokCnt == 0 || tokSequence.isEmpty()) {
          tokSequence.add(new AnalyzedTokenReadings(aToken, 0));
        } else {
          tokSequence.get(0).addReading(aToken);
        }
      }
    }
View Full Code Here

        anyFeatUnified = anyFeatUnified || allFeatsUnified;
      }
      unifiedNext &= anyFeatUnified;
      if (unifiedNext) {
        if (tokSequence.size() == readingsCounter) {
          tokSequence.add(new AnalyzedTokenReadings(aToken, 0));
        } else {
           if (readingsCounter<tokSequence.size()) {
                 tokSequence.get(readingsCounter).addReading(aToken);
               } else {
                   unifiedNext = false;
View Full Code Here

  public final AnalyzedTokenReadings[] getUnifiedTokens() {
    if (tokSequence.isEmpty()) {
      return null;
    }
    if (!firstUnified) {
      final AnalyzedTokenReadings tmpATR;
      int first = 0;
      tmpFeaturesFound.add(true); // Bentley's search idea
      while (!tmpFeaturesFound.get(first)) {
        first++;
      }
      tmpFeaturesFound.remove(tmpFeaturesFound.size() - 1);
      if (first >= tmpFeaturesFound.size()) {
        return null;
      }
      // FIXME: why this happens??
      final int numRead = tokSequence.get(0).getReadingsLength();
      if (first < numRead) {
        tmpATR = new AnalyzedTokenReadings(tokSequence.get(0).getAnalyzedToken(
            first), 0);
        for (int i = first + 1; i <= Math.min(numRead - 1, tokCnt); i++) {
          if (tmpFeaturesFound.get(i)) {
            tmpATR.addReading(tokSequence.get(0).getAnalyzedToken(i));
          }
        }
        tokSequence.set(0, tmpATR);
      }
      firstUnified = true;
View Full Code Here

  public RuleMatch[] match(final AnalyzedSentence text) {
    final List<RuleMatch> ruleMatches = new ArrayList<>();
    final AnalyzedTokenReadings[] tokens = text.getTokensWithoutWhitespace();
   
    if (tokens.length>3) {
      final AnalyzedTokenReadings analyzedToken = tokens[1];
      final String token = analyzedToken.getToken();
      // avoid "..." etc. to be matched:
      boolean isWord = true;
      if (token.length() == 1) {
        final char c = token.charAt(0);
        if (!Character.isLetter(c)) {
          isWord = false;
        }
      }
     
      if (isWord && lastToken.equals(token)
          && !isException(token) && !isException(tokens[2].getToken()) && !isException(tokens[3].getToken())) {
        final String shortMsg;
        if (isAdverb(analyzedToken)) {
          shortMsg = messages.getString("desc_repetition_beginning_adv");
        } else if (beforeLastToken.equals(token)) {
          shortMsg = messages.getString("desc_repetition_beginning_word");
        } else {
          shortMsg = "";
        }
         
        if (!shortMsg.equals("")) {
          final String msg = shortMsg + " " + messages.getString("desc_repetition_beginning_thesaurus");
          final int startPos = analyzedToken.getStartPos();
          final int endPos = startPos + token.length();
          final RuleMatch ruleMatch = new RuleMatch(this, startPos, endPos, msg, shortMsg);
          ruleMatches.add(ruleMatch);
        }
      }
View Full Code Here

    final AnalyzedTokenReadings[] tokens = text.getTokensWithoutWhitespace();

    RuleMatch prevRuleMatch = null;
    final Queue<AnalyzedTokenReadings> prevTokens = new ArrayBlockingQueue<>(MAX_TERMS);
    for (int i = 0; i < tokens.length + MAX_TERMS-1; i++) {
      final AnalyzedTokenReadings token;
      // we need to extend the token list so we find matches at the end of the original list:
      if (i >= tokens.length) {
        token = new AnalyzedTokenReadings(new AnalyzedToken("", "", null), prevTokens.peek().getStartPos());
      } else {
        token = tokens[i];
      }
      if (i == 0) {
        addToQueue(token, prevTokens);
        continue;
      }

      final StringBuilder sb = new StringBuilder();
      int j = 0;
      AnalyzedTokenReadings firstMatchToken = null;
      final List<String> stringsToCheck = new ArrayList<>();
      final List<String> origStringsToCheck = new ArrayList<>();    // original upper/lowercase spelling
      final Map<String, AnalyzedTokenReadings> stringToToken = new HashMap<>();
      for (AnalyzedTokenReadings atr : prevTokens) {
        if (j == 0) {
          firstMatchToken = atr;
        }
        sb.append(' ');
        sb.append(atr.getToken());
        if (j >= 1) {
          final String stringToCheck = normalize(sb.toString());
          stringsToCheck.add(stringToCheck);
          origStringsToCheck.add(sb.toString().trim());
          if (!stringToToken.containsKey(stringToCheck))
            stringToToken.put(stringToCheck, atr);
        }
        j++;
      }
      // iterate backwards over all potentially incorrect strings to make
      // sure we match longer strings first:
      for (int k = stringsToCheck.size()-1; k >= 0; k--) {
        final String stringToCheck = stringsToCheck.get(k);
        final String origStringToCheck = origStringsToCheck.get(k);
        if (incorrectCompounds.contains(stringToCheck)) {
          final AnalyzedTokenReadings atr = stringToToken.get(stringToCheck);
          String msg = null;
          final List<String> replacement = new ArrayList<>();
          if (!noDashSuggestion.contains(stringToCheck)) {
            replacement.add(origStringToCheck.replace(' ', '-'));
            msg = withHyphenMessage;
          }
          if (!hasAllUppercaseParts(origStringToCheck) && !onlyDashSuggestion.contains(stringToCheck)) {
            replacement.add(mergeCompound(origStringToCheck));
            msg = withoutHyphenMessage;
          }
          final String[] parts = stringToCheck.split(" ");
          if (parts.length > 0 && parts[0].length() == 1) {
            replacement.clear();
            replacement.add(origStringToCheck.replace(' ', '-'));
            msg = withHyphenMessage;
          } else if (replacement.isEmpty() || replacement.size() == 2) {     // isEmpty shouldn't happen
            msg = withOrWithoutHyphenMessage;
          }
          final RuleMatch ruleMatch = new RuleMatch(this, firstMatchToken.getStartPos(),
              atr.getStartPos() + atr.getToken().length(), msg, shortDesc);
          // avoid duplicate matches:
          if (prevRuleMatch != null && prevRuleMatch.getFromPos() == ruleMatch.getFromPos()) {
            prevRuleMatch = ruleMatch;
            break;
          }
View Full Code Here

     
      if (l.isEmpty()) {
        l.add(new AnalyzedToken(word, null, null));
      }
     
      tokenReadings.add(new AnalyzedTokenReadings(l, pos));
      pos += word.length();
    }

    return tokenReadings;
View Full Code Here

TOP

Related Classes of org.languagetool.AnalyzedTokenReadings

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.