Package morfologik.stemming

Examples of morfologik.stemming.IStemmer.lookup()


    }

    for (final String word : sentenceTokens) {
      final List<AnalyzedToken> l = new ArrayList<>();
      final String lowerCaseWord = word.toLowerCase(RO_LOCALE);
      final List<WordData> taggerTokens = morfologik.lookup(lowerCaseWord);
      if (taggerTokens != null) {
        for (WordData wd : taggerTokens) {
          final String[] tagsArr = wd.getStem().toString().split("\\+");
          for (final String currTag : tagsArr) {
            l.add(new AnalyzedToken(word,
View Full Code Here


    final IStemmer dictLookup = new DictionaryLookup(getDictionary());

    for (String word : sentenceTokens) {
      final List<AnalyzedToken> l = new ArrayList<>();
      final String lowerWord = word.toLowerCase(conversionLocale);
      taggerTokens = asAnalyzedTokenList(word, dictLookup.lookup(word));
      lowerTaggerTokens = asAnalyzedTokenList(word, dictLookup.lookup(lowerWord));
      final boolean isLowercase = word.equals(lowerWord);
      final boolean isMixedCase = StringTools.isMixedCase(word);

      //normal case
View Full Code Here

    for (String word : sentenceTokens) {
      final List<AnalyzedToken> l = new ArrayList<>();
      final String lowerWord = word.toLowerCase(conversionLocale);
      taggerTokens = asAnalyzedTokenList(word, dictLookup.lookup(word));
      lowerTaggerTokens = asAnalyzedTokenList(word, dictLookup.lookup(lowerWord));
      final boolean isLowercase = word.equals(lowerWord);
      final boolean isMixedCase = StringTools.isMixedCase(word);

      //normal case
      addTokens(taggerTokens, l);
View Full Code Here

      //tag lowercase word with startuppercase word tags
      if (tagLowercaseWithUppercase) {
        if (lowerTaggerTokens.isEmpty() && taggerTokens.isEmpty()) {
          if (isLowercase) {
            upperTaggerTokens = asAnalyzedTokenList(word,
                dictLookup.lookup(StringTools.uppercaseFirstChar(word)));
            if (!upperTaggerTokens.isEmpty()) {
              addTokens(upperTaggerTokens, l);
            }
          }
        }
View Full Code Here

      // This loop happens when we need to retry probing the dictionary
      // which happens rarely when trying to remove suffixes -mañ, -se, etc.
      for (;;) {
        final List<AnalyzedToken> l = new ArrayList<>();
        final String lowerWord = probeWord.toLowerCase(conversionLocale);
        taggerTokens = asAnalyzedTokenList(word, dictLookup.lookup(probeWord));
        lowerTaggerTokens = asAnalyzedTokenList(word, dictLookup.lookup(lowerWord));
        final boolean isLowercase = probeWord.equals(lowerWord);

        // Normal case.
        addTokens(taggerTokens, l);
View Full Code Here

      // which happens rarely when trying to remove suffixes -mañ, -se, etc.
      for (;;) {
        final List<AnalyzedToken> l = new ArrayList<>();
        final String lowerWord = probeWord.toLowerCase(conversionLocale);
        taggerTokens = asAnalyzedTokenList(word, dictLookup.lookup(probeWord));
        lowerTaggerTokens = asAnalyzedTokenList(word, dictLookup.lookup(lowerWord));
        final boolean isLowercase = probeWord.equals(lowerWord);

        // Normal case.
        addTokens(taggerTokens, l);
View Full Code Here

        // Uppercase.
        if (lowerTaggerTokens.isEmpty() && taggerTokens.isEmpty()) {
          if (isLowercase) {
            upperTaggerTokens = asAnalyzedTokenList(word,
                dictLookup.lookup(StringTools.uppercaseFirstChar(probeWord)));
            if (!upperTaggerTokens.isEmpty()) {
              addTokens(upperTaggerTokens, l);
            }
          }
          if (l.isEmpty()) {
View Full Code Here

      final AvsAnRule rule = new AvsAnRule(null);
      return new String[] { rule.suggestAorAn(token.getToken()) };
    }

    final IStemmer synthesizer = createStemmer();
    final List<WordData> wordData = synthesizer.lookup(token.getLemma() + "|" + posTag);
    final List<String> wordForms = new ArrayList<>();
    for (WordData wd : wordData) {
      wordForms.add(wd.getStem().toString());
    }
    return wordForms.toArray(new String[wordForms.size()]);
View Full Code Here

      // normal case, manual tagger
      addTokens(manualTaggerTokens, l);
      // normal case, tagger dictionary
      if (manualTaggerTokens.isEmpty()) {
        addTokens(asAnalyzedTokenList(word, dictLookup.lookup(word)), l);
      }
      // tag non-lowercase words (alluppercase or startuppercase but not mixedcase)
      // with lowercase word tags
      if (!isLowercase && !isMixedCase) {
        // manual tagger
View Full Code Here

      if (!isLowercase && !isMixedCase) {
        // manual tagger
        addTokens(manualLowerTaggerTokens, l);
        // tagger dictionary
        if (manualLowerTaggerTokens.isEmpty()) {
          addTokens(asAnalyzedTokenList(word, dictLookup.lookup(lowerWord)), l);
        }
      }
      // additional tagging with prefixes
      if (l.isEmpty() && !isMixedCase) {
        addTokens(additionalTags(word), l);
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.