/* LanguageTool, a natural language style checker
* Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
* USA
*/
package org.languagetool.rules.patterns;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.ResourceBundle;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.apache.commons.lang.StringUtils;
import org.languagetool.JLanguageTool;
import org.languagetool.Language;
import org.languagetool.rules.Category;
import org.languagetool.rules.IncorrectExample;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
/**
* Loads {@link PatternRule}s from a false friends XML file.
*
* @author Daniel Naber
*/
public class FalseFriendRuleLoader extends DefaultHandler {
public FalseFriendRuleLoader() {
}
/**
* @param file XML file with false friend rules
* @since 2.3
*/
public final List<PatternRule> getRules(final File file, final Language language, final Language motherTongue) throws IOException {
try (InputStream inputStream = new FileInputStream(file)) {
return getRules(inputStream, language, motherTongue);
} catch (ParserConfigurationException | SAXException e) {
throw new IOException("Could not load false friend rules from " + file, e);
}
}
public final List<PatternRule> getRules(final InputStream stream,
final Language textLanguage, final Language motherTongue)
throws ParserConfigurationException, SAXException, IOException {
final FalseFriendRuleHandler handler = new FalseFriendRuleHandler(
textLanguage, motherTongue);
final SAXParserFactory factory = SAXParserFactory.newInstance();
final SAXParser saxParser = factory.newSAXParser();
saxParser.getXMLReader()
.setFeature(
"http://apache.org/xml/features/nonvalidating/load-external-dtd",
false);
saxParser.parse(stream, handler);
final List<PatternRule> rules = handler.getRules();
// Add suggestions to each rule:
final ResourceBundle messages = ResourceBundle.getBundle(
JLanguageTool.MESSAGE_BUNDLE, motherTongue.getLocale());
for (final PatternRule rule : rules) {
final List<String> suggestionMap = handler.getSuggestionMap().get(rule.getId());
if (suggestionMap != null) {
final MessageFormat msgFormat = new MessageFormat(messages
.getString("false_friend_suggestion"));
final Object[] msg = { formatSuggestions(suggestionMap) };
rule.setMessage(rule.getMessage() + " " + msgFormat.format(msg));
}
}
return rules;
}
private String formatSuggestions(final List<String> l) {
final StringBuilder sb = new StringBuilder();
for (final Iterator<String> iter = l.iterator(); iter.hasNext();) {
final String s = iter.next();
sb.append("<suggestion>");
sb.append(s);
sb.append("</suggestion>");
if (iter.hasNext()) {
sb.append(", ");
}
}
return sb.toString();
}
}
class FalseFriendRuleHandler extends XMLRuleHandler {
/** Definitions of values in XML files. */
private static final String TRANSLATION = "translation";
private final ResourceBundle messages;
private final MessageFormat formatter;
private final Language textLanguage;
private final Language motherTongue;
private boolean defaultOff;
private Language language;
private Language translationLanguage;
private Language currentTranslationLanguage;
private List<StringBuilder> translations = new ArrayList<>();
private StringBuilder translation = new StringBuilder();
private final List<String> suggestions = new ArrayList<>();
// rule ID -> list of translations:
private final Map<String, List<String>> suggestionMap = new HashMap<>();
private boolean inTranslation;
public FalseFriendRuleHandler(final Language textLanguage, final Language motherTongue) {
messages = ResourceBundle.getBundle(
JLanguageTool.MESSAGE_BUNDLE, motherTongue.getLocale());
formatter = new MessageFormat("");
formatter.setLocale(motherTongue.getLocale());
this.textLanguage = textLanguage;
this.motherTongue = motherTongue;
}
public Map<String, List<String>> getSuggestionMap() {
return suggestionMap;
}
// ===========================================================
// SAX DocumentHandler methods
// ===========================================================
@Override
public void startElement(final String namespaceURI, final String lName,
final String qName, final Attributes attrs) throws SAXException {
if (qName.equals(RULE)) {
translations = new ArrayList<>();
id = attrs.getValue("id");
if (!(inRuleGroup && defaultOff)) {
defaultOff = "off".equals(attrs.getValue("default"));
}
if (inRuleGroup && id == null) {
id = ruleGroupId;
}
correctExamples = new ArrayList<>();
incorrectExamples = new ArrayList<>();
} else if (qName.equals(PATTERN)) {
inPattern = true;
final String languageStr = attrs.getValue("lang");
if (Language.isLanguageSupported(languageStr)) {
language = Language.getLanguageForShortName(languageStr);
}
} else if (qName.equals(TOKEN)) {
setToken(attrs);
} else if (qName.equals(TRANSLATION)) {
inTranslation = true;
final String languageStr = attrs.getValue("lang");
if (Language.isLanguageSupported(languageStr)) {
final Language tmpLang = Language.getLanguageForShortName(languageStr);
currentTranslationLanguage = tmpLang;
if (tmpLang.equalsConsiderVariantsIfSpecified(motherTongue)) {
translationLanguage = tmpLang;
}
}
} else if (qName.equals(EXAMPLE)
&& attrs.getValue(TYPE).equals("correct")) {
inCorrectExample = true;
correctExample = new StringBuilder();
} else if (qName.equals(EXAMPLE)
&& attrs.getValue(TYPE).equals("incorrect")) {
inIncorrectExample = true;
incorrectExample = new StringBuilder();
} else if (qName.equals(MESSAGE)) {
inMessage = true;
message = new StringBuilder();
} else if (qName.equals(RULEGROUP)) {
ruleGroupId = attrs.getValue("id");
inRuleGroup = true;
defaultOff = "off".equals(attrs.getValue(DEFAULT));
}
}
@Override
public void endElement(final String namespaceURI, final String sName,
final String qName) throws SAXException {
if (qName.equals(RULE)) {
if (language.equalsConsiderVariantsIfSpecified(textLanguage) && translationLanguage != null
&& translationLanguage.equalsConsiderVariantsIfSpecified(motherTongue) && language != motherTongue
&& !translations.isEmpty()) {
formatter.applyPattern(messages.getString("false_friend_hint"));
final String tokensAsString = StringUtils.join(elementList, " ").replace('|', '/');
final Object[] messageArguments = { tokensAsString,
messages.getString(textLanguage.getShortName()),
formatTranslations(translations),
messages.getString(motherTongue.getShortName()) };
final String description = formatter.format(messageArguments);
final PatternRule rule = new FalseFriendPatternRule(id, language, elementList,
messages.getString("false_friend_desc") + " "
+ tokensAsString, description, messages
.getString("false_friend"));
rule.setCorrectExamples(correctExamples);
rule.setIncorrectExamples(incorrectExamples);
rule.setCategory(new Category(messages
.getString("category_false_friend")));
if (defaultOff) {
rule.setDefaultOff();
}
rules.add(rule);
}
if (elementList != null) {
elementList.clear();
}
} else if (qName.equals(TOKEN)) {
finalizeTokens();
} else if (qName.equals(PATTERN)) {
inPattern = false;
} else if (qName.equals(TRANSLATION)) {
if (currentTranslationLanguage != null && currentTranslationLanguage.equalsConsiderVariantsIfSpecified(motherTongue)) {
// currentTranslationLanguage can be null if the language is not supported
translations.add(translation);
}
if (currentTranslationLanguage != null && currentTranslationLanguage.equalsConsiderVariantsIfSpecified(textLanguage)
&& language.equalsConsiderVariantsIfSpecified(motherTongue)) {
suggestions.add(translation.toString());
}
translation = new StringBuilder();
inTranslation = false;
currentTranslationLanguage = null;
} else if (qName.equals(EXAMPLE)) {
if (inCorrectExample) {
correctExamples.add(correctExample.toString());
} else if (inIncorrectExample) {
incorrectExamples
.add(new IncorrectExample(incorrectExample.toString()));
}
inCorrectExample = false;
inIncorrectExample = false;
correctExample = new StringBuilder();
incorrectExample = new StringBuilder();
} else if (qName.equals(MESSAGE)) {
inMessage = false;
} else if (qName.equals(RULEGROUP)) {
if (!suggestions.isEmpty()) {
final List<String> l = new ArrayList<>(suggestions);
suggestionMap.put(id, l);
suggestions.clear();
}
inRuleGroup = false;
}
}
private String formatTranslations(final List<StringBuilder> translations) {
final StringBuilder sb = new StringBuilder();
for (final Iterator<StringBuilder> iter = translations.iterator(); iter.hasNext();) {
final StringBuilder trans = iter.next();
sb.append('"');
sb.append(trans.toString());
sb.append('"');
if (iter.hasNext()) {
sb.append(", ");
}
}
return sb.toString();
}
@Override
public void characters(final char[] buf, final int offset, final int len) {
final String s = new String(buf, offset, len);
if (inToken && inPattern) {
elements.append(s);
} else if (inCorrectExample) {
correctExample.append(s);
} else if (inIncorrectExample) {
incorrectExample.append(s);
} else if (inTranslation) {
translation.append(s);
}
}
}