Package morfologik.stemming

Examples of morfologik.stemming.DictionaryLookup


      final AvsAnRule rule = new AvsAnRule(null);
      return new String[] { rule.suggestAorAn(token.getToken()) };
    } else {
      if (synthesizer == null) {
        final URL url = JLanguageTool.getDataBroker().getFromResourceDirAsUrl(RESOURCE_FILENAME);
        synthesizer = new DictionaryLookup(Dictionary.read(url));
      }
      final List<WordData> wordData = synthesizer.lookup(token.getLemma() + "|" + posTag);
      final List<String> wordForms = new ArrayList<>();
      for (WordData wd : wordData) {
        wordForms.add(wd.getStem().toString());
View Full Code Here


    final List<AnalyzedTokenReadings> tokenReadings = new ArrayList<>();
    int pos = 0;
    // caching Lametyzator instance - lazy init
    if (morfologik == null) {     
      final URL url = JLanguageTool.getDataBroker().getFromResourceDirAsUrl(DICT_FILENAME);
      morfologik = new DictionaryLookup(Dictionary.read(url));     
    }
    if (manualTagger == null) {
      manualTagger = new ManualTagger(JLanguageTool.getDataBroker().getFromResourceDirAsStream(USER_DICT_FILENAME));
    }
    if (compoundTokenizer == null) {
View Full Code Here

  }

  protected void initSynthesizer() throws IOException {
    if (synthesizer == null) {
      final URL url = JLanguageTool.getDataBroker().getFromResourceDirAsUrl(resourceFileName);
      synthesizer = new DictionaryLookup(Dictionary.read(url));
    }
  }
View Full Code Here

        }
        return ret;       
    }
   
    private IStemmer loadDictionary() throws IOException {
        IStemmer dictLookup = new DictionaryLookup(Dictionary.read(dictFile));
        return dictLookup;
    }
View Full Code Here

    final List<AnalyzedTokenReadings> tokenReadings = new ArrayList<>();
    int pos = 0;
    // caching IStemmer instance - lazy init
    if (dictLookup == null) {
      final URL url = JLanguageTool.getDataBroker().getFromResourceDirAsUrl(getFileName());
      dictLookup = new DictionaryLookup(Dictionary.read(url));
    }

    for (String word : sentenceTokens) {
      final List<AnalyzedToken> l = new ArrayList<>();
      final String lowerWord = word.toLowerCase(conversionLocale);
View Full Code Here

    return (String) method.invoke(null, argObjects);
  }

  public static void testDictionary(BaseTagger tagger, Language language) throws IOException {
    final Dictionary dictionary = Dictionary.read(JLanguageTool.getDataBroker().getFromResourceDirAsUrl(tagger.getFileName()));
    final DictionaryLookup lookup = new DictionaryLookup(dictionary);
    for (WordData wordData : lookup) {
      if (wordData.getTag() == null || wordData.getTag().length() == 0) {
        System.err.println("**** Warning: " + language + ": the word " + wordData.getWord() + "/" + wordData.getStem() + " lacks a POS tag in the dictionary.");
      }
    }
View Full Code Here

  public boolean existsWord(String word) throws IOException {
    // caching Lametyzator instance - lazy init
    if (dictLookup == null) {
      final URL url = JLanguageTool.getDataBroker().getFromResourceDirAsUrl(DICT_FILENAME);
      dictLookup = new DictionaryLookup(Dictionary.read(url));
    }
    final String lowerWord = word.toLowerCase(conversionLocale);
    List<WordData> posTagsFromDict = dictLookup.lookup(lowerWord);
    if (posTagsFromDict.isEmpty()) {
      posTagsFromDict = dictLookup.lookup(word);
View Full Code Here

    final List<AnalyzedTokenReadings> tokenReadings = new ArrayList<>();
    int pos = 0;
    // caching Lametyzator instance - lazy init
    if (morfologik == null) {     
      final URL url = JLanguageTool.getDataBroker().getFromResourceDirAsUrl(binaryDictPath);
      morfologik = new DictionaryLookup(Dictionary.read(url));
    }
    if (manualTagger == null && plaintextDictPath != null) {
      manualTagger = new ManualTagger(JLanguageTool.getDataBroker().getFromResourceDirAsStream(plaintextDictPath));
    }
View Full Code Here

    final List<AnalyzedTokenReadings> tokenReadings = new ArrayList<>();
    int pos = 0;
    // caching IStemmer instance - lazy init
    if (dictLookup == null) {
      final URL url = JLanguageTool.getDataBroker().getFromResourceDirAsUrl(getFileName());
      dictLookup = new DictionaryLookup(Dictionary.read(url));
    }

    Matcher matcher;
    for (String word : sentenceTokens) {
      String probeWord = word;
View Full Code Here

    final List<AnalyzedTokenReadings> tokenReadings = new ArrayList<>();
    int pos = 0;
    // caching Lametyzator instance - lazy init
    if (morfologik == null) {     
      final URL url = JLanguageTool.getDataBroker().getFromResourceDirAsUrl(getFileName());
      morfologik = new DictionaryLookup(Dictionary.read(url));
    }

    for (String word : sentenceTokens) {
      final List<AnalyzedToken> l = new ArrayList<>();
      final String lowerWord = word.toLowerCase(plLocale);
View Full Code Here

TOP

Related Classes of morfologik.stemming.DictionaryLookup

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.