Package org.languagetool.dev.eval

Source Code of org.languagetool.dev.eval.LanguageToolEvaluator

/* LanguageTool, a natural language style checker
* Copyright (C) 2014 Daniel Naber (http://www.danielnaber.de)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
* USA
*/
package org.languagetool.dev.eval;

import org.languagetool.JLanguageTool;
import org.languagetool.language.BritishEnglish;
import org.languagetool.language.English;
import org.languagetool.languagemodel.LanguageModel;
import org.languagetool.languagemodel.LuceneLanguageModel;
import org.languagetool.markup.AnnotatedText;
import org.languagetool.rules.RuleMatch;
import org.languagetool.rules.en.EnglishConfusionProbabilityRule;

import java.io.File;
import java.io.IOException;
import java.util.List;

/**
* Wrapper around LanguageTool for easier use from the evaluation scripts.
* @since 2.7
*/
class LanguageToolEvaluator implements Evaluator {

  private final JLanguageTool langTool;
  private final LanguageModel languageModel;

  LanguageToolEvaluator(File indexTopDir) throws IOException {
    langTool = new JLanguageTool(new BritishEnglish());
    langTool.activateDefaultPatternRules();
    disableRules();
    if (indexTopDir != null) {
      if (indexTopDir.isDirectory()) {
        languageModel = new LuceneLanguageModel(indexTopDir);
        System.out.println("Using Lucene language model from " + languageModel);
        EnglishConfusionProbabilityRule probabilityRule =
                new EnglishConfusionProbabilityRule(JLanguageTool.getMessageBundle(), languageModel, new English());
        //new EnglishConfusionProbabilityRule(JLanguageTool.getMessageBundle(), languageModel, new File("/tmp/languagetool_network.net"));
        langTool.addRule(probabilityRule);
      } else {
        throw new RuntimeException("Does not exist or not a directory: " + indexTopDir);
      }
    } else {
      languageModel = null;
    }
  }
 
  @Override
  public void close() {
    if (languageModel != null) {
      languageModel.close();
    }
  }

  private void disableRules() {
    // The Pedler corpus has some real errors that have no error markup, so we disable
    // some rules that typically match those:
    langTool.disableRule("COMMA_PARENTHESIS_WHITESPACE");
    langTool.disableRule("SENT_START_CONJUNCTIVE_LINKING_ADVERB_COMMA");
    langTool.disableRule("EN_QUOTES");
    langTool.disableRule("I_LOWERCASE");
    //langTool.disableRule("MORFOLOGIK_RULE_EN_GB");  // disabling spell rule improves precision 0.77 -> 0.88 (as of 2014-07-18)
    // turn off style rules:
    langTool.disableRule("LITTLE_BIT");
    langTool.disableRule("ALL_OF_THE");
    langTool.disableRule("SOME_OF_THE");
    // British English vs. American English - not clear whether the corpus contains only BE:
    langTool.disableRule("EN_GB_SIMPLE_REPLACE");
    langTool.disableRule("APARTMENT-FLAT");
  }

  @Override
  public List<RuleMatch> check(AnnotatedText annotatedText) throws IOException {
    return langTool.check(annotatedText);
  }
}
TOP

Related Classes of org.languagetool.dev.eval.LanguageToolEvaluator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.