Package simplenlg.orthography.english

Source Code of simplenlg.orthography.english.OrthographyProcessor

/*
* The contents of this file are subject to the Mozilla Public License
* Version 1.1 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS"
* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
* License for the specific language governing rights and limitations
* under the License.
*
* The Original Code is "Simplenlg".
*
* The Initial Developer of the Original Code is Ehud Reiter, Albert Gatt and Dave Westwater.
* Portions created by Ehud Reiter, Albert Gatt and Dave Westwater are Copyright (C) 2010-11 The University of Aberdeen. All Rights Reserved.
*
* Contributor(s): Ehud Reiter, Albert Gatt, Dave Wewstwater, Roman Kutlak, Margaret Mitchell.
*/
package simplenlg.orthography.english;

import java.util.ArrayList;
import java.util.List;

import simplenlg.features.DiscourseFunction;
import simplenlg.features.InternalFeature;
import simplenlg.framework.CoordinatedPhraseElement;
import simplenlg.framework.DocumentCategory;
import simplenlg.framework.DocumentElement;
import simplenlg.framework.ElementCategory;
import simplenlg.framework.ListElement;
import simplenlg.framework.NLGElement;
import simplenlg.framework.NLGModule;
import simplenlg.framework.StringElement;

/**
* <p>
* This processing module deals with punctuation when applied to
* <code>DocumentElement</code>s. The punctuation currently handled by this
* processor includes the following (as of version 4.0):
* <ul>
* <li>Capitalisation of the first letter in sentences.</li>
* <li>Termination of sentences with a period if not interrogative.</li>
* <li>Termination of sentences with a question mark if they are interrogative.</li>
* <li>Replacement of multiple conjunctions with a comma. For example,
* <em>John and Peter and Simon</em> becomes <em>John, Peter and Simon</em>.</li>
* </ul>
* </p>
*
*
* @author D. Westwater, University of Aberdeen.
* @version 4.0
*
*/
public class OrthographyProcessor extends NLGModule {

  @Override
  public void initialise() {
    // No initialisation.
  }

  @Override
  public NLGElement realise(NLGElement element) {
    NLGElement realisedElement = null;

    if (element != null) {
      ElementCategory category = element.getCategory();

      if (category instanceof DocumentCategory
          && element instanceof DocumentElement) {
        List<NLGElement> components = ((DocumentElement) element)
            .getComponents();

        switch ((DocumentCategory) category) {

        case SENTENCE:
          realisedElement = realiseSentence(components, element);
          break;

        case LIST_ITEM:
          if (components != null && components.size() > 0) {
            // recursively realise whatever's in the list item
            // NB: this will realise embedded lists within list
            // items
            realisedElement = new ListElement(realise(components));
          }
          break;

        default:
          ((DocumentElement) element)
              .setComponents(realise(components));
          realisedElement = element;
        }

      } else if (element instanceof ListElement) {
        // AG: changes here: if we have a premodifier, then we ask the
        // realiseList method to separate with a comma.
        StringBuffer buffer = new StringBuffer();
        List<NLGElement> children = element.getChildren();
        Object function = children.isEmpty() ? null : children.get(0)
            .getFeature(InternalFeature.DISCOURSE_FUNCTION);

        if (DiscourseFunction.PRE_MODIFIER.equals(function)) {
          realiseList(buffer, element.getChildren(), ",");
        } else {
          realiseList(buffer, element.getChildren(), "");
        }

        // realiseList(buffer, element.getChildren(), "");
        realisedElement = new StringElement(buffer.toString());

      } else if (element instanceof CoordinatedPhraseElement) {
        realisedElement = realiseCoordinatedPhrase(element
            .getChildren());

      } else {
        realisedElement = element;
      }

      // make the realised element inherit the original category
      // essential if list items are to be properly formatted later
      if (realisedElement != null) {
        realisedElement.setCategory(category);
      }
    }

    return realisedElement;
  }

  /**
   * Performs the realisation on a sentence. This includes adding the
   * terminator and capitalising the first letter.
   *
   * @param components
   *            the <code>List</code> of <code>NLGElement</code>s representing
   *            the components that make up the sentence.
   * @param element
   *            the <code>NLGElement</code> representing the sentence.
   * @return the realised element as an <code>NLGElement</code>.
   */
  private NLGElement realiseSentence(List<NLGElement> components,
      NLGElement element) {

    NLGElement realisedElement = null;
    if (components != null && components.size() > 0) {
      StringBuffer realisation = new StringBuffer();
      realiseList(realisation, components, "");

      capitaliseFirstLetter(realisation);
      terminateSentence(realisation, element.getFeatureAsBoolean(
          InternalFeature.INTERROGATIVE).booleanValue());

      ((DocumentElement) element).clearComponents();
      // realisation.append(' ');
      element.setRealisation(realisation.toString());
      realisedElement = element;
    }
    return realisedElement;
  }

  /**
   * Adds the sentence terminator to the sentence. This is a period ('.') for
   * normal sentences or a question mark ('?') for interrogatives.
   *
   * @param realisation
   *            the <code>StringBuffer<code> containing the current
   * realisation of the sentence.
   * @param interrogative
   *            a <code>boolean</code> flag showing <code>true</code> if the
   *            sentence is an interrogative, <code>false</code> otherwise.
   */
  private void terminateSentence(StringBuffer realisation,
      boolean interrogative) {
    char character = realisation.charAt(realisation.length() - 2);
    if (character != '.' && character != '?') {
      if (interrogative) {
        realisation.append('?');
      } else {
        realisation.append('.');
      }
    }
  }

  /**
   * Capitalises the first character of a sentence if it is a lower case
   * letter.
   *
   * @param realisation
   *            the <code>StringBuffer<code> containing the current
   * realisation of the sentence.
   */
  private void capitaliseFirstLetter(StringBuffer realisation) {
    char character = realisation.charAt(0);
    if (character >= 'a' && character <= 'z') {
      character = (char) ('A' + (character - 'a'));
      realisation.setCharAt(0, character);
    }
  }

  @Override
  public List<NLGElement> realise(List<NLGElement> elements) {
    List<NLGElement> realisedList = new ArrayList<NLGElement>();

    if (elements != null && elements.size() > 0) {
      for (NLGElement eachElement : elements) {
        if (eachElement instanceof DocumentElement) {
          realisedList.add(realise(eachElement));
        } else {
          realisedList.add(eachElement);
        }
      }
    }
    return realisedList;
  }

  /**
   * Realises a list of elements appending the result to the on-going
   * realisation.
   *
   * @param realisation
   *            the <code>StringBuffer<code> containing the current
   *         realisation of the sentence.
   * @param components
   *            the <code>List</code> of <code>NLGElement</code>s representing
   *            the components that make up the sentence.
   * @param listSeparator
   *            the string to use to separate elements of the list, empty if
   *            no separator needed
   */
  private void realiseList(StringBuffer realisation,
      List<NLGElement> components, String listSeparator) {

    NLGElement realisedChild = null;

    for (int i = 0; i < components.size(); i++) {
      NLGElement thisElement = components.get(i);
      realisedChild = realise(thisElement);
      String childRealisation = realisedChild.getRealisation();

      // check that the child realisation is non-empty
      if (childRealisation != null && childRealisation.length() > 0
          && !childRealisation.matches("^[\\s\\n]+$")) {
        realisation.append(realisedChild.getRealisation());

        if (components.size() > 1 && i < components.size() - 1) {
          realisation.append(listSeparator);
        }

        realisation.append(' ');
      }
    }

    if (realisation.length() > 0) {
      realisation.setLength(realisation.length() - 1);
    }
  }

  /**
   * Realises coordinated phrases. Where there are more than two coordinates,
   * then a comma replaces the conjunction word between all the coordinates
   * save the last two. For example, <em>John and Peter and Simon</em> becomes
   * <em>John, Peter and Simon</em>.
   *
   * @param components
   *            the <code>List</code> of <code>NLGElement</code>s representing
   *            the components that make up the sentence.
   * @return the realised element as an <code>NLGElement</code>.
   */
  private NLGElement realiseCoordinatedPhrase(List<NLGElement> components) {
    StringBuffer realisation = new StringBuffer();
    NLGElement realisedChild = null;

    int length = components.size();

    for (int index = 0; index < length; index++) {
      realisedChild = components.get(index);
      if (index < length - 2
          && DiscourseFunction.CONJUNCTION.equals(realisedChild
              .getFeature(InternalFeature.DISCOURSE_FUNCTION))) {

        realisation.append(", "); //$NON-NLS-1$
      } else {
        realisedChild = realise(realisedChild);
        realisation.append(realisedChild.getRealisation()).append(' ');
      }
    }
    realisation.setLength(realisation.length() - 1);
    return new StringElement(realisation.toString().replace(" ,", ",")); //$NON-NLS-1$ //$NON-NLS-2$
  }
}
TOP

Related Classes of simplenlg.orthography.english.OrthographyProcessor

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.