Package games.stendhal.common.parser

Source Code of games.stendhal.common.parser.WordList$Verb

/* $Id: WordList.java,v 1.3 2011/05/02 19:21:03 martinfuchs Exp $ */
/***************************************************************************
*                   (C) Copyright 2003-2010 - Stendhal                    *
***************************************************************************
***************************************************************************
*                                                                         *
*   This program is free software; you can redistribute it and/or modify  *
*   it under the terms of the GNU General Public License as published by  *
*   the Free Software Foundation; either version 2 of the License, or     *
*   (at your option) any later version.                                   *
*                                                                         *
***************************************************************************/
package games.stendhal.common.parser;

import games.stendhal.common.grammar.Grammar;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintWriter;
import java.util.AbstractList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.TreeMap;

import marauroa.common.Log4J;
import marauroa.common.io.UnicodeSupportingInputStreamReader;

import org.apache.log4j.Logger;

/**
* WordList stores a list of words recognized by the ConversationParser. Words
* are categorised by type (noun, verb, adjective, preposition) and optionally
* sub-types (animals, food, fluids, ...).
*
* @author Martin Fuchs
*/

final public class WordList {

  private static final Logger logger = Logger.getLogger(WordList.class);

  // ExpressionTypes for dynamic registration
  public static final String SUBJECT_NAME_DYNAMIC = ExpressionType.SUBJECT_NAME
      + ExpressionType.SUFFIX_DYNAMIC;

  public static final String VERB_DYNAMIC = ExpressionType.VERB
      + ExpressionType.SUFFIX_DYNAMIC;

  public static final String WORDS_FILENAME = "words.txt";

  private Map<String, WordEntry> words = new TreeMap<String, WordEntry>();

  private Map<String, Set<CompoundName>> compoundNames = new HashMap<String, Set<CompoundName>>();

  // We keep house holding the usage of registered subject names (see registerSubjectName).
  private Map<String, Integer> subjectRefCount = new HashMap<String, Integer>();

  /** instance variable with package protection because of FindBugs hint */
  static private WordList instance;

  // Initialise the word list by reading from the
  // input file "words.txt" in the class path.
  static {
    Log4J.init();

    initInstance();
  }

  /**
   * Initialises the WordList instance.
   */
  private static void initInstance() {
    // read word list from "words.txt"
    instance = new WordList();

    instance.readFromResources();
  }

  /**
   * Reads the word list from the resource file "words.txt".
   */
  private void readFromResources() {

    final InputStream str = WordList.class.getResourceAsStream(WORDS_FILENAME);

        if (str != null) {
        try {
          final BufferedReader reader = new BufferedReader(new UnicodeSupportingInputStreamReader(str, "UTF-8"));

          try {
            read(reader, null);
          } catch (final IOException e) {
            logger.error("error while reading resource file '"+WORDS_FILENAME+"'", e);
          } finally {
            try {
              reader.close();
            } catch (IOException e) {
              logger.error("error while closing reader stream for '"+WORDS_FILENAME+"'", e);
            }
          }
      } finally {
          try {
            str.close();
          } catch (IOException e) {
            logger.warn("exception on closing resource stream", e);
          }
        }
        } else {
            logger.error("unable to locate resource file '"+WORDS_FILENAME+"'");           
        }
  }

  /**
   * Returns a reference to the global word list instance.
   *
   * @return WordList
   */
  public static WordList getInstance() {
    return instance;
  }

  /**
   * Reads word list from reader object.
   *
   * @param reader
   * @param comments
   * @throws IOException
   */
  public void read(final BufferedReader reader, final List<String> comments) throws IOException {
    while (true) {
      final String line = reader.readLine();
      if (line == null) {
        break;
      }

      if (line.startsWith("#")) {
        if (comments != null) {
          comments.add(line);
        }
      } else {
        final StringTokenizer tk = new StringTokenizer(line);

        if (!tk.hasMoreTokens()) {
          continue;
        }

        String key = tk.nextToken();

        key = trimWord(key);
        final WordEntry entry = new WordEntry();
        entry.setNormalized(key);

        readEntryLine(key, tk, entry);
        addEntry(key, entry);
      }
    }

    // calculate the hash value from all word entries
//    calculateHash();
  }

  /**
   * Reads one line of the word list and adds the new entry.
   *
   * @param key
   *
   * @param tk
   * @param entry
   */
  private void readEntryLine(final String key, final StringTokenizer tk,
      final WordEntry entry) {
    if (tk.hasMoreTokens()) {
      entry.setType(new ExpressionType(tk.nextToken()));

      if (tk.hasMoreTokens()) {
        String s = tk.nextToken();

        if (s.charAt(0) == '=') {
          entry.setNormalized(trimWord(s.substring(1)));
          if (tk.hasMoreTokens()) {
            s = tk.nextToken();
          } else {
            s = null;
          }
        }

        if (s != null) {
          if (entry.isNumeral()) {
            entry.setValue(Integer.valueOf(s));
          } else {
            entry.setPlurSing(s);
          }
        }
      }

      final String normalized = entry.getNormalized();

      if (Character.isLowerCase(entry.getTypeString().charAt(0))) {
        // Type identifiers are always upper case, so a word in
        // lower case must be a plural.
        entry.setType(new ExpressionType(ExpressionType.OBJECT));
        entry.setPlurSing(trimWord(entry.getTypeString()));
      } else if ((entry.getPlurSing() == null)
          && entry.isObject()) {
        // complete missing plural expressions using the
        // Grammar.plural() function
        final String plural = Grammar.plural(normalized);

        // only store single word plurals
        if (plural.indexOf(' ') == -1) {
          entry.setPlurSing(plural);
        }
      } else if (entry.getPlurSing() != null) {
        // check plural strings using the Grammar.plural() function
        if (!entry.isPronoun() && !entry.isObsessional() &&
          !normalized.equals("is")) {
          String plural = Grammar.plural(key);

          if ((plural.indexOf(' ') == -1)
              && !plural.equals(entry.getPlurSing())) {
            // retry with normalized in case it differs from key
            plural = Grammar.plural(normalized);

            if ((plural.indexOf(' ') == -1)
                && !plural.equals(entry.getPlurSing())) {
              logger.warn(String.format(
                  "suspicious plural: %s -> %s (%s?)", key,
                  entry.getPlurSing(), plural));
            }
          }
        }
      }

      while (tk.hasMoreTokens()) {
        logger.warn("superfluous trailing word in words.txt: "
            + tk.nextToken());
      }
    }
  }

  /**
   * Add an entry to the word list.
   *
   * @param key
   * @param entry
   */
  private void addEntry(final String key, final WordEntry entry) {
    words.put(trimWord(key), entry);

    // store plural and associate with singular form
    if ((entry.getPlurSing() != null)
        && !entry.getPlurSing().equals(entry.getNormalized())) {
      final WordEntry pluralEntry = new WordEntry();

      pluralEntry.setNormalized(entry.getPlurSing());
      pluralEntry.setType(new ExpressionType(entry.getTypeString()
          + ExpressionType.SUFFIX_PLURAL));
      pluralEntry.setPlurSing(entry.getNormalized());
      pluralEntry.setValue(entry.getValue());

      final WordEntry prev = words.put(entry.getPlurSing(), pluralEntry);

      if (prev != null) {
        logger.debug(String.format("ambiguous plural: %s/%s -> %s",
            pluralEntry.getPlurSing(), prev.getPlurSing(),
            entry.getPlurSing()));

        pluralEntry.setPlurSing(null);
        prev.setPlurSing(null);
      }
    }
  }

  /**
   * Print all words of a given (main-)type.
   *
   * @param writer
   * @param type
   */
  public void printWordType(final PrintWriter writer, final String type) {
    for (Map.Entry<String, WordEntry> it : words.entrySet()) {
      final WordEntry entry = it.getValue();
      boolean matches;

      if (type == null) {
        // match all entries with empty type specifier
        matches = entry.getType() == null;
      } else {
        // all real (no plural) entries with matching type string
        matches = entry.getTypeString().startsWith(type)
            && !entry.isPlural();
      }

      if (matches) {
        entry.print(writer, it.getKey());

        writer.println();
      }
    }
  }

  /**
   * Transform the given word to lower case and trim special characters at
   * beginning and end to use this normalized form as key in the word list.
   *
   * @param word
   * @return the trimmed word
   */
  public static String trimWord(final String word) {
    String tempword = word.toLowerCase();

    // Currently we only need to trim "'" characters.
    while (tempword.length() > 0) {
      final char c = tempword.charAt(0);

      if (c == '\'') {
        tempword = tempword.substring(1);
      } else {
        break;
      }
    }

    while (tempword.length() > 0) {
      final char c = tempword.charAt(tempword.length() - 1);

      if (c == '\'') {
        tempword = tempword.substring(0, tempword.length() - 1);
      } else {
        break;
      }
    }

    return tempword;
  }

  /**
   * Find an entry for a given word.
   *
   * @param str
   * @return WordEntry
   */
  public WordEntry find(final String str) {
    final WordEntry entry = words.get(trimWord(str));

    return entry;
  }

  /**
   * Lookup the plural form of the given word from the word list.
   *
   * @param word
   * @return plural string
   */
  public String plural(final String word) {
    final WordEntry entry = words.get(trimWord(word));

    if (entry != null) {
      if ((entry.getType() != null) && !entry.getType().isPlural()) {
        // return the associated singular from the word list
        return entry.getPlurSing();
      } else {
        // The word is already in singular form.
        return entry.getNormalized();
      }
    } else {
      // fall back: call Grammar.plural()
      return Grammar.plural(word);
    }
  }

  /**
   * Lookup the singular form of the given word from the word list.
   *
   * @param word
   * @return singular string
   */
  public String singular(final String word) {
    final WordEntry entry = words.get(trimWord(word));

    if (entry != null) {
      if (entry.isPlural()) {
        // return the associated singular from the word list
        return entry.getPlurSing();
      } else {
        // The word is already in singular form.
        return entry.getNormalized();
      }
    } else {
      // fall back: call Grammar.singular()
      return Grammar.singular(word);
    }
  }

  /**
   * Return type for normalizeVerb().
   */
  static class Verb extends Grammar.Verb {
    public Verb(Grammar.Verb verb, WordEntry entry) {
      super(verb);

      assert entry != null;
      this.entry = entry;
    }

    public WordEntry entry; // is never null
  }

  /**
   * Try to normalise the given word as verb.
   *
   * @param word
   *
   * @return Verb object with additional information
   */
  Verb normalizeVerb(final String word) {
    final String trimmedWord = trimWord(word);

    final Grammar.Verb verb = Grammar.normalizeRegularVerb(trimmedWord);

    if (verb != null) {
      WordEntry entry = words.get(verb.word);

      // try and re-append "e" if it was removed by
      // normalizeRegularVerb()
      if ((entry == null) && trimmedWord.endsWith("e")
          && !verb.word.endsWith("e")) {
        entry = words.get(verb.word + "e");
      }

      if (entry != null) {
        return new Verb(verb, entry);
      }
    }

    return null;
  }

  /**
   * Try to find a matching word for a derived adjective.
   *
   * @param word
   * @return WordEntry
   */
  WordEntry normalizeAdjective(final String word) {
    final String trimmedWord = trimWord(word);

    final String normalized = Grammar.normalizeDerivedAdjective(trimmedWord);

    if (normalized != null) {
      final WordEntry entry = words.get(normalized);

      return entry;
    } else {
      return null;
    }
  }

  /**
   * Register a subject name to be recognized by the conversation parser.
   *
   * @param name
   */
  public void registerSubjectName(final String name) {
    registerSubjectName(name, ExpressionType.SUBJECT_NAME);
  }

  /**
   * Register a subject name to be recognized by the conversation parser.
   *
   * @param name
   * @param typeString
   */
  public void registerSubjectName(final String name, final String typeString) {
    final String key = trimWord(name);

    Integer usageCount = subjectRefCount.get(key);
    if ((usageCount != null) && (usageCount > 0)) {
      // For already known names, we only have to increment the
      // usage counter.
      subjectRefCount.put(key, ++usageCount);
      return;
    }

    // register the new subject name
    if (usageCount == null) {
      registerName(name, typeString);
      subjectRefCount.put(key, 1);
    }
  }

  /**
   * De-register a subject name.
   *
   * @param name
   */
  public void unregisterSubjectName(final String name) {
    final String key = trimWord(name);
    final WordEntry entry = words.get(key);

    if (entry != null && entry.isName() && entry.isDynamic()) {
      Integer usageCount = subjectRefCount.get(key);

      if (usageCount != null) {
        // decrement the usage counter
        subjectRefCount.put(key, --usageCount);

        if (usageCount == 0) {
          subjectRefCount.remove(key);
          unregisterName(name);
        }
      }
    }
  }

  /**
   * Register an item or creature name to be recognized by the conversation
   * parser.
   *
   * @param name
   * @param typeString
   */
  public void registerName(final String name, final String typeString) {
    // parse item name without merging Expression entries
    final ConversationContext ctx = new ConversationContext();
    ctx.setMergeExpressions(false);
    final Sentence parsed = ConversationParser.parse(name, ctx);

    Expression lastExpr = null;
    boolean prepositionSeen = false;

    for (final Expression expr : parsed) {
      if ((expr.getType() == null) || expr.getType().isEmpty()) {
        // register the unknown word as new entry
        final WordEntry entry = words.get(expr.getNormalized());

        // set the type to the given one with added "DYN" suffix
        final ExpressionType type = new ExpressionType(typeString
            + ExpressionType.SUFFIX_DYNAMIC);
        entry.setType(type);
        expr.setType(type);
      } else if (expr.isQuestion()) {
        logger.warn("name already registered with incompatible expression type while registering name '"
            + name + "': " + expr.getNormalizedWithTypeString()
            + " expected type: " + typeString);
      }

      if (expr.isPreposition()) {
        prepositionSeen = true;
      } else if (!prepositionSeen) {
        lastExpr = expr;
      }
    }

    if (lastExpr != null) {
      if (!isNameCompatibleLastType(lastExpr, typeString)) {
        if (typeString.startsWith(ExpressionType.SUBJECT)) {
          // ignore suspicious NPC names for now
        } else {
          logger.warn("last word of name '" + name
            + "' has an unexpected type: "
            + lastExpr.getNormalizedWithTypeString()
            + " expected type: " + typeString);
        }
      }
    }

    // register compound item and subject names to use them when merging expressions
    if (parsed.getExpressions().size() > 1) {
      Expression firstExpr = parsed.expressions.get(0);
      String firstWord = firstExpr.getOriginal().toLowerCase();

      Set<CompoundName> nameSet = compoundNames.get(firstWord);

      if (nameSet == null) {
        nameSet = new HashSet<CompoundName>();
        compoundNames.put(firstWord, nameSet);
      }

      nameSet.add(new CompoundName(parsed, typeString));
    }
  }

  /**
   * Search for compound names.
   * @param expressions list of expressions
   * @param idx start index of the expression list
   * @return compound name or null
   */
  public CompoundName searchCompoundName(AbstractList<Expression> expressions, int idx) {
        Expression first = expressions.get(idx);

      Set<CompoundName> candidates = compoundNames.get(first.getOriginal().toLowerCase());

    if (candidates != null) {
        for(CompoundName compName : candidates) {
          if (compName.matches(expressions, idx)) {
            return compName;
          }
        }
    }

    return null;
  }

  /**
   * De-register a name after all references have been removed.
   * @param name
   */
  private void unregisterName(final String name) {
    // parse item name without merging Expression entries
    final ConversationContext ctx = new ConversationContext();
    ctx.setMergeExpressions(false);
    final Sentence parsed = ConversationParser.parse(name, ctx);

    // remove compound names
    if (parsed.expressions.size() > 1) {
      Expression firstExpr = parsed.expressions.get(0);
      String firstWord = firstExpr.getOriginal().toLowerCase();

      Set<CompoundName> nameSet = compoundNames.get(firstWord);

      if (nameSet != null) {
        for(CompoundName compName : nameSet) {
          if (compName.matches(parsed.expressions, 0)) {
            nameSet.remove(compName);

            if (nameSet.isEmpty()) {
              compoundNames.remove(firstWord);
            }

            break;
          }
        }
      }
    }

    for(Expression expr : parsed.expressions) {
      if (expr.isDynamic()) {
        words.remove(expr.getNormalized());
      }
    }
  }

  /**
   * Check for compatible types.
   * @param lastExpr last word in an expression
   * @param typeString expected type string
   * @return
   */
  private static boolean isNameCompatibleLastType(
      final Expression lastExpr, final String typeString) {
    final ExpressionType lastType = lastExpr.getType();

    if (lastType.getTypeString().startsWith(typeString)) {
      return true;
    }

    if (typeString.startsWith(lastType.getTypeString())) {
      return true;
    }

    if (lastType.isNumeral()) {
      return true;
    }

    if (lastType.isDynamic()) {
      return true;
    }

    // Ignore words like "chicken", "cat" and "incorporeal armor", which are
    // registered as objects, but also used as subjects.
    if (lastType.isObject() && typeString.startsWith(ExpressionType.SUBJECT)) {
      return true;
    }
    if (lastType.isSubject() && typeString.startsWith(ExpressionType.OBJECT)) {
      return true;
    }

    // handle ambiguous cases like "mill"
    if (Grammar.isAmbiguousNounVerb(lastExpr.getNormalized())) {
      if (lastType.isVerb() && typeString.equals(ExpressionType.OBJECT)) {
        return true;
      }
      if (lastType.isObject() && typeString.equals(ExpressionType.VERB)) {
        return true;
      }
    }

    return false;
  }

  /**
   * Register a verb to be recognized by the conversation parser.
   *
   * @param verb
   */
  public void registerVerb(final String verb) {
    final String key = trimWord(verb);
    final WordEntry entry = words.get(key);

    if ((entry == null) || (entry.getType() == null)
        || entry.getType().isEmpty()) {
      final WordEntry newEntry = new WordEntry();

      newEntry.setNormalized(key);
      newEntry.setType(new ExpressionType(VERB_DYNAMIC));

      words.put(key, newEntry);
//    } else if (!checkNameCompatibleLastType(entry, ExpressionType.VERB)) {
//       logger.warn("verb name already registered with incompatible expression type: " +
//      entry.getNormalizedWithTypeString());
    }
  }

  /**
   * Add a new word to the list in order to remember it later.
   *
   * @param str
   * @return the added entry
   */
  public WordEntry addNewWord(final String str) {
    final String key = trimWord(str);
    WordEntry entry = words.get(key);

    if (entry == null) {
      entry = new WordEntry();
      entry.setType(new ExpressionType(""));

      // add the new entry
      entry.setNormalized(key);
      words.put(key, entry);
    } else {
      logger.warn("word already known: " + str + " -> "
          + entry.getNormalized());
    }

    return entry;
  }

  /**
   * Return number of word entries.
   * @return number of entries
   */
  public int getWordCount() {
    return words.size();
  }
}
TOP

Related Classes of games.stendhal.common.parser.WordList$Verb

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.