Package org.apache.uima.tutorial.ex3

Source Code of org.apache.uima.tutorial.ex3.TutorialDateTime$Maker

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.uima.tutorial.ex3;

import java.text.BreakIterator;
import java.text.DateFormat;
import java.text.NumberFormat;
import java.text.ParsePosition;
import java.util.Date;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.ResultSpecification;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.tutorial.DateAnnot;
import org.apache.uima.tutorial.DateTimeAnnot;
import org.apache.uima.tutorial.TimeAnnot;
import org.apache.uima.util.Level;

/**
* Simple Date/Time annotator.
*/
public class TutorialDateTime extends JCasAnnotator_ImplBase {

  static abstract class Maker {
    abstract Annotation newAnnotation(JCas jcas, int start, int end);
  }

  JCas jcas;

  String input;

  ParsePosition pp = new ParsePosition(0);

  // Static vars holding patterns, and function pointers

  // n:nn nn:nn followed optionally with AM or PM

  // .*? (any number of arbitrary chars, minimum, not greedy)
  // \b followed by a word boundary
  // [0-2]? followed by the optionally the first digit, a 0, 1, or 2
  // \d:[0-6]\d followed by a digit and the colon char,and minutes
  // \s*?(AM|PM)? followed by optional white space (non greedy) and AM or PM
  static final Pattern hoursMinutesPattern = Pattern
          .compile("(?s)\\b([0-2]?\\d:[0-5]\\d\\s*(AM\\W|PM\\W|am\\W|pm\\W)?)");

  //
  static final DateFormat dfTimeShort = DateFormat.getTimeInstance(DateFormat.SHORT, Locale.US);

  // .*? (any number of artibrary chars, non greedy
  // \b word boundary
  // [0-1]? optional first digit
  // \d digit of month
  // /
  // [0-3]? optional day of month 1st digit
  // \d
  // ((/[1-2]\d\d\d)|(/\d\d)|\s) // year is /nnnn or /nn or missing
  static final Pattern numericDatePattern = Pattern
          .compile("(?s)\\b([0-1]?\\d/[0-3]?\\d((/[1-2]\\d\\d\\d)|(/\\d\\d))?)\\W");

  // not static, because DateFormat is not threadsafe
  final DateFormat dfDateShort = DateFormat.getDateInstance(DateFormat.SHORT, Locale.US);

  // .*? (any number of artibrary chars, non greedy
  // \b word boundary
  // [Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Sept|Oct|Nov|Dec] Month
  // \.? optional period
  // \s+
  // [0-3]? optional day of month 1st digit
  // \d
  // (((,\s+)?[1-2]\d\d\d\W)|((,\s+)?\d\d\W)|\W) // year is /nnnn or /nn or missing
  static final String shortMonthNames = "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Sept|Oct|Nov|Dec)";

  static final Pattern mediumDatePattern = Pattern.compile("(?s)\\b(" + shortMonthNames
          + "\\.?\\s[0-3]?\\d(((,\\s+)?[1-2]\\d\\d\\d)|((,\\s+)?\\d\\d))?)\\W");

  static final DateFormat dfDateMedium = DateFormat.getDateInstance(DateFormat.MEDIUM, Locale.US);

  // for long month names, exclude May since it is covered by short month names
  static final String longMonthNames = "(January|February|March|April|June|July|August|September|October|November|December)";

  static final Pattern longDatePattern = Pattern.compile("(?s)\\b(" + longMonthNames
          + "\\s[0-3]?\\d(((,\\s+)?[1-2]\\d\\d\\d)|((,\\s+)?\\d\\d))?)\\W");

  static final DateFormat dfDateLong = DateFormat.getDateInstance(DateFormat.LONG, Locale.US);

  static final NumberFormat numberFormat = NumberFormat.getInstance(Locale.US);

  // function pointers for new instances
  static final Maker dateAnnotationMaker = new Maker() {
    Annotation newAnnotation(JCas jcas, int start, int end) {
      return new DateAnnot(jcas, start, end);
    }
  };

  static final Maker timeAnnotationMaker = new Maker() {
    Annotation newAnnotation(JCas jcas, int start, int end) {
      return new TimeAnnot(jcas, start, end);
    }
  };

  static final String defaultYear = "2003";
 
  private static boolean warningMessageShown = false;

  // PROCESS
  /**
   * The ResultSpecification controls what gets produced. For example, to only produce
   * DateAnnotations, change the descriptor for this component to specify it outputs only that type.
   */
  public void process(JCas aJCas) {
    jcas = aJCas;
    input = jcas.getDocumentText();

    // Create Annotations
    ResultSpecification resultSpec = getResultSpecification();
    boolean timeWanted = resultSpec.containsType("org.apache.uima.tutorial.TimeAnnot", aJCas.getDocumentLanguage());
    boolean dateWanted = resultSpec.containsType("org.apache.uima.tutorial.DateAnnot", aJCas.getDocumentLanguage());

    if (timeWanted)
      makeAnnotations(timeAnnotationMaker, hoursMinutesPattern, dfTimeShort);
   
    if (dateWanted) {   
      makeAnnotations(dateAnnotationMaker, numericDatePattern, dfDateShort);
      makeAnnotations(dateAnnotationMaker, mediumDatePattern, dfDateMedium);
      makeAnnotations(dateAnnotationMaker, longDatePattern, dfDateLong);
    }
   
    if (!timeWanted && !dateWanted && !warningMessageShown) {
      String m = String.format(
        "No output is being produced by the TutorialDateTime annotator because the Result Specification did not contain" +
        " a request for the type%n" +
        "  org.apache.uima.tutorial.TimeAnnot nor%n" +
        "  org.apache.uima.tutorial.DateAnnot" +
        " with the language '%s'%n" +
              "  (Note: this message will only be shown once.)%n",
              aJCas.getDocumentLanguage());             
      System.err.println(m);
      getContext().getLogger().log(Level.WARNING, m);
      warningMessageShown = true;
    }
  }

  // HELPER METHODS

  void makeAnnotations(Maker m, BreakIterator b) {
    b.setText(input);
    for (int end = b.next(), start = b.first(); end != BreakIterator.DONE; start = end, end = b
            .next()) {

      // eliminate all-whitespace tokens
      boolean isWhitespace = true;
      for (int i = start; i < end; i++) {
        if (!Character.isWhitespace(input.charAt(i))) {
          isWhitespace = false;
          break;
        }
      }
      if (!isWhitespace) {
        m.newAnnotation(jcas, start, end).addToIndexes();
      }
    }
  }

  void makeAnnotations(Maker m, Pattern pattern, DateFormat dateFormat) {
    Matcher matcher = pattern.matcher(input);
    String matched;
    while (matcher.find()) {
      int start = matcher.start(1);
      matched = fixUpDateTimeStrings(matcher.group(1));
      DateTimeAnnot dtAnnot = (DateTimeAnnot) m.newAnnotation(jcas, start, matcher.end(1));
      pp.setIndex(0);
      Date dtSpec = dateFormat.parse(matched, pp);
      // System.out.println(dtAnnot.dtSpec);
      if (dtSpec != null) {
        dtAnnot.setShortDateString(dfDateShort.format(dtSpec));
      }
      dtAnnot.addToIndexes();
    }
  }

  String fixUpDateTimeStrings(String s) {
    String av; // append value
    pp.setIndex(0);
    if (-1 < s.indexOf(":")) { // have time string
      if (s.endsWith("AM") || s.endsWith("PM") || s.endsWith("am") || s.endsWith("pm"))
        return s;
      else {
        int hour = numberFormat.parse(s, pp).intValue();
        if (0 == hour)
          av = " AM";
        else if (hour < 9)
          av = " PM";
        else
          av = " AM";
        return s + av;
      }
    }

    // have date string
    return s + ", " + defaultYear; // in case no year available
  }

}
TOP

Related Classes of org.apache.uima.tutorial.ex3.TutorialDateTime$Maker

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.