Package org.apache.ctakes.core.util

Source Code of org.apache.ctakes.core.util.DateParser

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.ctakes.core.util;

import java.text.DateFormat;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.List;

import org.apache.uima.jcas.JCas;

import org.apache.ctakes.typesystem.type.refsem.Date;

public class DateParser {

  private static DateFormat df = DateFormat.getDateInstance();
  private static Calendar cal = Calendar.getInstance();

  // month names in FULL and SHORT formats
  private static List<String> monthFullNames = new ArrayList<String>();
  private static List<String> monthShortNames = new ArrayList<String>();
 
  static {
    monthFullNames.add("january");
    monthFullNames.add("february");
    monthFullNames.add("march");
    monthFullNames.add("april");
    monthFullNames.add("may");
    monthFullNames.add("june");
    monthFullNames.add("july");
    monthFullNames.add("august");
    monthFullNames.add("september");
    monthFullNames.add("october");
    monthFullNames.add("november");
    monthFullNames.add("december");

    monthShortNames.add("jan");
    monthShortNames.add("feb");
    monthShortNames.add("mar");
    monthShortNames.add("apr");
    monthShortNames.add("may");
    monthShortNames.add("jun");
    monthShortNames.add("jul");
    monthShortNames.add("aug");
    monthShortNames.add("sep");   //monthShortNames.add("sept");
    monthShortNames.add("oct");
    monthShortNames.add("nov");
    monthShortNames.add("dec");
    if (monthShortNames.get(11).equals("dec")) {
      // good, do nothing, order is correct
    } else {
      throw new RuntimeException("Check initialization of monthShortNames");
    }

  }

  /**
   * First try parsing full date (month, day and year) using java.util.Date
   * If that fails, try extracting at least part of the date
   */
  public static Date parse(JCas jcas, String dateString) {
    Date date = new Date(jcas);
    try {
   
      java.util.Date jud = df.parse(dateString);
      // if no parse exception, create the CTS Date
      date = new Date(jcas); // create new CTS Date
      cal.setTime(jud);
      date.setDay(Integer.toString(cal.get(Calendar.DAY_OF_MONTH)));// df.cal.get(Calendar.DAY_OF_MONTH, jud)));
      date.setMonth(Integer.toString(cal.get(Calendar.MONTH)+1));
      date.setYear(Integer.toString(cal.get(Calendar.YEAR)));

    } catch (ParseException e) {
     
      // An exception while parsing the date using java.util.Date
      // Try to get just month and year or month and day etc

      // possibilities to consider just for US dates
      // m dd  (ambiguous pattern with m yy, for at least some values)
      // m yy  (ambiguous pattern with m dd, for at least some values)
      // m yyyy
      // m dd yy
      // m dd yyyy

      dateString = dateString.trim().toLowerCase();
     
      // look for month as string at beginning
      for (int i=0; i<monthShortNames.size(); i++) {
        if (dateString.startsWith(monthShortNames.get(i))) {
          date.setMonth(dateString.substring(0, getIndexFirstNonLetter(dateString)));
        }
      }
     

      // look for month as number

      // look for day
     
      // look for year as yy
     
      // look for year as yyyy at end
      int yearPosition = getIndexAfterLastNonDigit(dateString);
      if (yearPosition+4==dateString.length()) {
        date.setYear(dateString.substring(yearPosition));
      }

     
    }
   
    return date;
  }
 
  /**
   *
   * @return if entire string is letters, returns length of s
   */
  public static int getIndexFirstNonLetter(String s) {
    for (int i=0; i<s.length(); i++) {
      if (!Character.isLetter(s.charAt(i))) {
        return i;
      }
    }
    return s.length();
  }

  /**
   *
   * @return if entire string is letters, returns 0
   */
  public static int getIndexAfterLastNonDigit(String s) {
    for (int i=s.length(); i>0 ;) {
      i--;
      if (!Character.isDigit(s.charAt(i))) {
        return i+1;
      }
    }
    return 0;
  }
}
TOP

Related Classes of org.apache.ctakes.core.util.DateParser

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.