Package org.languagetool.tagging.de

Examples of org.languagetool.tagging.de.AnalyzedGermanTokenReadings


        continue;
      }
      if (i > 0 && (tokens[i-1].getToken().equals("Herr") || tokens[i-1].getToken().equals("Herrn") || tokens[i-1].getToken().equals("Frau")) ) {   // "Frau Stieg" could be a name, ignore
        continue;
      }
      final AnalyzedGermanTokenReadings analyzedToken = (AnalyzedGermanTokenReadings)tokens[i];
      final String token = analyzedToken.getToken();
      List<AnalyzedGermanToken> readings = analyzedToken.getGermanReadings();
      AnalyzedGermanTokenReadings analyzedGermanToken2;
     
      boolean isBaseform = false;
      if (analyzedToken.getReadingsLength() >= 1 && analyzedToken.hasLemma(token)) {
        isBaseform = true;
      }
      if ((readings == null || analyzedToken.getAnalyzedToken(0).getPOSTag() == null || analyzedToken.hasReadingOfType(GermanToken.POSType.VERB))
          && isBaseform) {
        // no match, e.g. for "Groß": try if there's a match for the lowercased word:
        analyzedGermanToken2 = tagger.lookup(token.toLowerCase());
        if (analyzedGermanToken2 != null) {
          readings = analyzedGermanToken2.getGermanReadings();
        }
        boolean nextTokenIsPersonalPronoun = false;
        if (i < tokens.length - 1) {
          // avoid false alarm for "Das haben wir getan." etc:
          nextTokenIsPersonalPronoun = tokens[i + 1].hasPartialPosTag("PRO:PER") || tokens[i + 1].getToken().equals("Sie");
        }
        potentiallyAddLowercaseMatch(ruleMatches, tokens[i], prevTokenIsDas, token, nextTokenIsPersonalPronoun);
      }
      prevTokenIsDas = nounIndicators.contains(tokens[i].getToken().toLowerCase());
      if (readings == null) {
        continue;
      }
      final boolean hasNounReading = analyzedToken.hasReadingOfType(GermanToken.POSType.NOMEN);
      if (hasNounReading) {  // it's the spell checker's task to check that nouns are uppercase
        continue;
      }
      // TODO: this lookup should only happen once:
      analyzedGermanToken2 = tagger.lookup(token.toLowerCase());
      if (analyzedToken.getAnalyzedToken(0).getPOSTag() == null && analyzedGermanToken2 == null) {
        continue;
      }
      if (analyzedToken.getAnalyzedToken(0).getPOSTag() == null && analyzedGermanToken2 != null
          && analyzedGermanToken2.getAnalyzedToken(0).getPOSTag() == null) {
        // unknown word, probably a name etc
        continue;
      }
      potentiallyAddUppercaseMatch(ruleMatches, tokens, i, analyzedToken, token);
    }
View Full Code Here


      final String posToken = tokens[i].getAnalyzedToken(0).getPOSTag();
      if (posToken != null && posToken.equals(JLanguageTool.SENTENCE_START_TAGNAME))
        continue;
      //AnalyzedGermanToken analyzedToken = new AnalyzedGermanToken(tokens[i]);
     
      final AnalyzedGermanTokenReadings analyzedToken = (AnalyzedGermanTokenReadings)tokens[i];
      final boolean relevantPronoun = isRelevantPronoun(tokens, i);
    
      boolean ignore = couldBeRelativeClause(tokens, i);
      if (i > 0) {
        final String prevToken = tokens[i-1].getToken().toLowerCase();
        if ((tokens[i].getToken().equals("eine") || tokens[i].getToken().equals("einen"))
            && (prevToken.equals("der") || prevToken.equals("die") || prevToken.equals("das") || prevToken.equals("des") || prevToken.equals("dieses"))) {
          // TODO: "der eine Polizist" -> nicht ignorieren, sondern "der polizist" checken; "auf der einen Seite"
          ignore = true;
        }
      }
     
      // avoid false alarm on "nichts Gutes" and "alles Gute"
      if (analyzedToken.getToken().equals("nichts") || analyzedToken.getToken().equals("alles")
          || analyzedToken.getToken().equals("dies")) {
        ignore = true;
      }

      if ((analyzedToken.hasReadingOfType(POSType.DETERMINER) || relevantPronoun) && !ignore) {
        int tokenPos = i + 1;
        if (tokenPos >= tokens.length)
          break;
        AnalyzedGermanTokenReadings nextToken = (AnalyzedGermanTokenReadings)tokens[tokenPos];
        nextToken = maybeAddAdjectiveReadings(nextToken, tokens, tokenPos);
        if (isNonPredicativeAdjective(nextToken)) {
          tokenPos = i + 2;
          if (tokenPos >= tokens.length)
            break;
          final AnalyzedGermanTokenReadings nextNextToken = (AnalyzedGermanTokenReadings)tokens[tokenPos];
          if (nextNextToken.hasReadingOfType(POSType.NOMEN)) {
            // TODO: add a case (checkAdjNounAgreement) for special cases like "deren",
            // e.g. "deren komisches Geschenke" isn't yet detected as incorrect
            final RuleMatch ruleMatch = checkDetAdjNounAgreement((AnalyzedGermanTokenReadings)tokens[i],
                nextToken, (AnalyzedGermanTokenReadings)tokens[i+2]);
            if (ruleMatch != null) {
View Full Code Here

    }
    return false;
  }

  private boolean isRelevantPronoun(AnalyzedTokenReadings[] tokens, int pos) {
    final AnalyzedGermanTokenReadings analyzedToken = (AnalyzedGermanTokenReadings)tokens[pos];
    boolean relevantPronoun = analyzedToken.hasReadingOfType(POSType.PRONOMEN);
    // avoid false alarms:
    final String token = tokens[pos].getToken();
    if (pos > 0 && tokens[pos-1].getToken().equalsIgnoreCase("vor") && tokens[pos].getToken().equalsIgnoreCase("allem")) {
      relevantPronoun = false;
    } else if (PRONOUNS_TO_BE_IGNORED.contains(token.toLowerCase())) {
View Full Code Here

      AnalyzedTokenReadings[] tokens, int tokenPos) {
    final String nextTerm = nextToken.getToken();
    // Just a heuristic: nouns and proper nouns that end with "er" are considered
    // city names:
    if (nextTerm.endsWith("er") && tokens.length > tokenPos+1 && !ER_TO_BE_IGNORED.contains(nextTerm)) {
      final AnalyzedGermanTokenReadings nextNextToken = (AnalyzedGermanTokenReadings)tokens[tokenPos+1];
      try {
        final AnalyzedGermanTokenReadings nextATR = tagger.lookup(nextTerm.substring(0, nextTerm.length()-2));
        final AnalyzedGermanTokenReadings nextNextATR = tagger.lookup(nextNextToken.getToken());
        //System.err.println("nextATR: " + nextATR);
        //System.err.println("nextNextATR: " + nextNextATR);
        // "Münchner": special case as cutting off last two characters doesn't produce city name:
        if ("Münchner".equals(nextTerm) ||
            (nextATR != null &&
            // tagging in Morphy for cities is not coherent:
            (nextATR.hasReadingOfType(POSType.PROPER_NOUN) || nextATR.hasReadingOfType(POSType.NOMEN) &&
            nextNextATR != null && nextNextATR.hasReadingOfType(POSType.NOMEN)))) {
          final AnalyzedGermanToken[] adjReadings = new AnalyzedGermanToken[ADJ_READINGS.length];
          for (int j = 0; j < ADJ_READINGS.length; j++) {
            adjReadings[j] = new AnalyzedGermanToken(nextTerm, ADJ_READINGS[j], null);
          }
          nextToken = new AnalyzedGermanTokenReadings(adjReadings, nextToken.getStartPos());
        }
      } catch (IOException e) {
        throw new RuntimeException(e);
      }
    }
View Full Code Here

TOP

Related Classes of org.languagetool.tagging.de.AnalyzedGermanTokenReadings

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.