Package util.misc

Source Code of util.misc.AbstractXmlTvDataHandler

/*
* Copyright Michael Keppler
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see <http://www.gnu.org/licenses/>.
*/
package util.misc;

import java.io.File;
import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

import util.io.IOUtilities;
import devplugin.Date;
import devplugin.Program;
import devplugin.ProgramFieldType;

/**
* @author bananeweizen
*
*/
public abstract class AbstractXmlTvDataHandler extends DefaultHandler {
  private static final String PRESENTED_BY = "presented by";
  private static final String ACTOR_ROLE_SEPARATOR = "\t\t-\t\t";
  private static final String COMMA_SPACE = ", ";
  private static final String COMMA_LINE_BREAK = ",\n";
  /**
   * RegEx-Pattern for the Actor
   */
  private static final Pattern ACTOR_PATTERN = Pattern.compile("(.*)\\((.*)\\)");
  private static final SimpleDateFormat DATE_FORMAT = new SimpleDateFormat("yyyyMMddHHmmss ZZZZ");
  /**
   * Holds the text of the current tag.
   */
  private StringBuffer mText = new StringBuffer();

  /**
   * Value for some elements (e.g. star-rating)
   */
  private String mValue = null;

  /**
   * The value of the attribute 'lang' of the current tag.
   */
  private String mLang;

  /**
   * The channel ID of the current program
   */
  private String mChannelId;

  /**
   * if the currently parsed program is valid
   */
  private boolean mIsValid = false;
  private String mEpisodeType;
  /**
   * title of the currently parsed program
   */
  private String mTitle;
  private boolean mSetTitle;
  /**
   * role of the currently parsed actor
   */
  private String mRole;
  /**
   * unit of the program length: seconds | minutes | hours
   */
  private String mLengthUnit;
  /**
   * subtitle kind of the currently parsed program: teletext | onscreen |
   * deaf-signed
   */
  private String mSubtitles;

  /**
   * Handles the occurrence of tag text by buffering it for later analysis
   */
  public void characters(char ch[], int start, int length) throws SAXException {
    // There is some text -> Add it to the text buffer
    mText.append(ch, start, length);
  }

  /**
   * occurrence of a start tag<br>
   * Here we only handle tags with attributes, all other tags are handled when
   * they end.
   */
  public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
    try {
      // A new tag begins -> Clear the text buffer
      mText.setLength(0);

      // Set the lang
      mLang = attributes.getValue("lang");

      // Special tag treatment
      if (qName.equals("programme")) {
        mIsValid = false;
        mSetTitle = false;
        mTitle = null;
        String startDateTime = attributes.getValue("start");
        mChannelId = attributes.getValue("channel");
        if (startDateTime == null) {
          logMessage("Start time missing in programme tag");
        } else if (mChannelId == null) {
          logMessage("Channel missing in programme tag");
        } else {
          Date startDate = extractDate(startDateTime);
          int startTime = extractTime(startDateTime);
          if (startDate != null && startTime > -1) {
            startProgram(startDate, startTime);

            addField(ProgramFieldType.START_TIME_TYPE, startTime);

            String vps = attributes.getValue("vps-start");
            if (vps != null) {
              int time = extractTime(vps);
              addField(ProgramFieldType.VPS_TYPE, time);
            }

            String endDateTime = attributes.getValue("stop");
            if (endDateTime != null) {
              int endTime = extractTime(endDateTime);
              addField(ProgramFieldType.END_TIME_TYPE, endTime);
            }

            mIsValid = true;
          }
        }
      } else if (qName.equals("previously-shown")) {
        Date prevDate = extractDate(attributes.getValue("start"));
        if (prevDate != null) {
          addField(ProgramFieldType.REPETITION_OF_TYPE, prevDate.toString());
        }
      } else if (qName.equals("next-time-shown")) {
        Date nextDate = extractDate(attributes.getValue("start"));
        if (nextDate != null) {
          addField(ProgramFieldType.REPETITION_ON_TYPE, nextDate.toString());
        }
      } else if ("episode-num".equals(qName)) {
        mEpisodeType = attributes.getValue("system");
      } else if ("actor".equals(qName)) {
        mRole = attributes.getValue("role");
      } else if ("length".equals(qName)) {
        mLengthUnit = attributes.getValue("units");
      } else if ("subtitles".equals(qName)) {
        mSubtitles = attributes.getValue("type");
      }
    } catch (Exception e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
  }

  /**
   * starts a new program
   *
   * @param startDate
   *          start date of the program
   * @param startTime
   *          time in minutes after midnight
   */
  protected abstract void startProgram(final Date startDate, final int startTime);

  /**
   * Handles the occurrence of an end tag.
   */
  public void endElement(String uri, String localName, String qName) throws SAXException {
    try {
      if (isValid()) {
        // every value shall be trimmed by default
        String text = mText.toString().trim();

        if ("title".equalsIgnoreCase(qName)) {
          mTitle = text;
          if ((mLang == null) || mLang.equals(getChannelCountry()) || getCountries(mLang).contains(getChannelCountry())) {
            addField(ProgramFieldType.TITLE_TYPE, text);
            mSetTitle = true;
          } else {
            addField(ProgramFieldType.ORIGINAL_TITLE_TYPE, text);
          }
        } else if ("sub-title".equalsIgnoreCase(qName)) { // do not mix this up
                                                          // with "subtitles" !
          if ((mLang == null) || mLang.equals(getChannelCountry())) {
            addField(ProgramFieldType.EPISODE_TYPE, text);
          } else {
            addField(ProgramFieldType.ORIGINAL_EPISODE_TYPE, text);
          }
        } else if ("desc".equalsIgnoreCase(qName)) {
          addField(ProgramFieldType.DESCRIPTION_TYPE, text);
        } else if ("date".equalsIgnoreCase(qName)) {
          if (text.length() < 4) {
            logMessage("WARNING: The date value must have at least 4 chars: '" + text + '\'');
          } else {
            try {
              int year = Integer.parseInt(text.substring(0, 4));
              addField(ProgramFieldType.PRODUCTION_YEAR_TYPE, year);
            } catch (NumberFormatException e) {
              logMessage("WARNING: The date value doesn't start with a year: '" + text + '\'');
            }
          }
        } else if ("rating".equalsIgnoreCase(qName)) {
          try {
            int ageLimit = Integer.valueOf(text);
            addField(ProgramFieldType.AGE_LIMIT_TYPE, ageLimit);
          } catch (NumberFormatException exc) {
            addField(ProgramFieldType.AGE_RATING_TYPE, text);
          }
        } else if ("url".equalsIgnoreCase(qName)) {
          addField(ProgramFieldType.URL_TYPE, text);
        } else if ("category".equalsIgnoreCase(qName)) {
          text = text.substring(0, 1).toUpperCase() + text.substring(1);
          if (text.toLowerCase().indexOf("serie") > -1) {
            setInfoBit(Program.INFO_CATEGORIE_SERIES);
          } else if (text.toLowerCase().indexOf("movie") > -1) {
            setInfoBit(Program.INFO_CATEGORIE_MOVIE);
          } else if (text.toLowerCase().indexOf("sport") > -1) {
            setInfoBit(Program.INFO_CATEGORIE_SPORTS);
          } else if (text.toLowerCase().indexOf("music") > -1) {
            setInfoBit(Program.INFO_CATEGORIE_ARTS);
          } else if (text.toLowerCase().indexOf("news") > -1) {
            setInfoBit(Program.INFO_CATEGORIE_NEWS);
          }
          addToList(ProgramFieldType.GENRE_TYPE, text, COMMA_SPACE);
        } else if ("country".equalsIgnoreCase(qName)) {
          addField(ProgramFieldType.ORIGIN_TYPE, text);
        } else if ("subtitles".equalsIgnoreCase(qName)) {
          if ("deaf-signed".equalsIgnoreCase(mSubtitles)) {
            setInfoBit(Program.INFO_SIGN_LANGUAGE);
          } else {
            if ((mLang == null) || mLang.equals(getChannelCountry())) {
              setInfoBit(Program.INFO_SUBTITLE_FOR_AURALLY_HANDICAPPED);
            } else {
              setInfoBit(Program.INFO_ORIGINAL_WITH_SUBTITLE);
            }
          }
        } else if ("live".equalsIgnoreCase(qName)) {
          setInfoBit(Program.INFO_LIVE);
        } else if ("length".equalsIgnoreCase(qName)) {
          try {
            int length = Integer.parseInt(text);
            if ("seconds".equalsIgnoreCase(mLengthUnit)) {
              addField(ProgramFieldType.NET_PLAYING_TIME_TYPE, length / 60);
            } else if ("minutes".equalsIgnoreCase(mLengthUnit)) {
              addField(ProgramFieldType.NET_PLAYING_TIME_TYPE, length);
            } else if ("hours".equalsIgnoreCase(mLengthUnit)) {
              addField(ProgramFieldType.NET_PLAYING_TIME_TYPE, length * 60);
            } else {
              addField(ProgramFieldType.NET_PLAYING_TIME_TYPE, length);
            }
          } catch (NumberFormatException exc) {
            logMessage("WARNING: length is no number: '" + text + "' and will be ignored.");
          }
        } else if ("actor".equalsIgnoreCase(qName)) {
          if (mRole != null && mRole.length() > 0) {
            text += ACTOR_ROLE_SEPARATOR + mRole;
          } else {
            Matcher m = ACTOR_PATTERN.matcher(text);
            if (m.matches()) {
              text = m.group(2).trim() + ACTOR_ROLE_SEPARATOR + m.group(1).trim();
            }
          }
          // some "actors" are really presenters
          if (text.toLowerCase().startsWith(PRESENTED_BY)) {
            text = text.substring(PRESENTED_BY.length()).trim();
            addToList(ProgramFieldType.MODERATION_TYPE, text, COMMA_SPACE);
          } else {
            addToList(ProgramFieldType.ACTOR_LIST_TYPE, text, COMMA_LINE_BREAK);
          }
        } else if ("director".equalsIgnoreCase(qName)) {
          addToList(ProgramFieldType.DIRECTOR_TYPE, text, COMMA_SPACE);
        } else if ("writer".equalsIgnoreCase(qName) || "adapter".equalsIgnoreCase(qName)) {
          addToList(ProgramFieldType.SCRIPT_TYPE, text, COMMA_SPACE);
        } else if ("presenter".equalsIgnoreCase(qName) || "commentator".equalsIgnoreCase(qName)) {
          addToList(ProgramFieldType.MODERATION_TYPE, text, COMMA_SPACE);
        } else if ("music".equalsIgnoreCase(qName) || "composer".equalsIgnoreCase(qName)) {
          addToList(ProgramFieldType.MUSIC_TYPE, text, COMMA_SPACE);
        } else if ("producer".equalsIgnoreCase(qName)) {
          addToList(ProgramFieldType.PRODUCER_TYPE, text, COMMA_SPACE);
        } else if ("guest".equalsIgnoreCase(qName) || "editor".equalsIgnoreCase(qName)) {
          addToList(ProgramFieldType.ADDITIONAL_PERSONS_TYPE, text, COMMA_SPACE);
        } else if ("colour".equalsIgnoreCase(qName)) {
          if (text.equals("no")) {
            setInfoBit(Program.INFO_VISION_BLACK_AND_WHITE);
          } else if (!text.equals("yes")) {
            logMessage("WARNING: value of colour tag must be 'yes' or 'no'," + " but it is '" + text + '\'');
          }
        } else if ("quality".equalsIgnoreCase(qName)) {
          if (text.equals("HDTV") || text.equals("HD")) {
            setInfoBit(Program.INFO_VISION_HD);
          } else if (text.equals("SD")) {
            //do nothing
          } else {
            logMessage("WARNING: unsupported value of quality tag: '" + text + '\'');
          }
        } else if ("aspect".equalsIgnoreCase(qName)) {
          if (text.equals("4:3")) {
            setInfoBit(Program.INFO_VISION_4_TO_3);
          } else if (text.equals("16:9")) {
            setInfoBit(Program.INFO_VISION_16_TO_9);
          } else {
            logMessage("WARNING: value of aspect tag must be '4:3' or '16:9'," + " but it is '" + text + '\'');
          }
        } else if ("stereo".equalsIgnoreCase(qName)) {
          if (text.equals("mono")) {
            setInfoBit(Program.INFO_AUDIO_MONO);
          } else if (text.equals("stereo")) {
            setInfoBit(Program.INFO_AUDIO_STEREO);
          } else if (text.equals("surround")) {
            setInfoBit(Program.INFO_AUDIO_DOLBY_SURROUND);
          } else if (text.equals("dolby")) {
            setInfoBit(Program.INFO_AUDIO_DOLBY_SURROUND);
          } else if (text.equals("5.1")) {
            setInfoBit(Program.INFO_AUDIO_DOLBY_DIGITAL_5_1);
          } else if (text.equals("dolby digital")) {
            setInfoBit(Program.INFO_AUDIO_DOLBY_DIGITAL_5_1);
          } else if (text.equals("two channel tone")) {
            setInfoBit(Program.INFO_AUDIO_TWO_CHANNEL_TONE);
          } else if (text.equals("bilingual")) {
            setInfoBit(Program.INFO_AUDIO_TWO_CHANNEL_TONE);
          } else if (text.equals("audio description")) {
            setInfoBit(Program.INFO_AUDIO_DESCRIPTION);
          } else {
            logMessage("WARNING: value of stereo tag must be one of 'mono', "
                + "'stereo', 'surround', '5.1' or 'two channel tone' but it is '" + text + '\'');
          }
        } else if ("picture".equalsIgnoreCase(qName)) {
          File file = new File(text);
          if (file.exists() && file.isFile()) {
            try {
              addField(ProgramFieldType.PICTURE_TYPE, IOUtilities.getBytesFromFile(file));
            } catch (IOException e) {
              logException(e);
            }
          } else {
            logMessage("Warning: File does not exist: " + text);
          }
        } else if ("picture-copyright".equalsIgnoreCase(qName)) {
          addField(ProgramFieldType.PICTURE_COPYRIGHT_TYPE, text);
        } else if ("picture-description".equalsIgnoreCase(qName)) {
          addField(ProgramFieldType.PICTURE_DESCRIPTION_TYPE, text);
        } else if ("value".equalsIgnoreCase(qName)) {
          mValue = text;
        } else if ("star-rating".equalsIgnoreCase(qName)) {
          if (mValue != null) {

            if (mValue.contains("/")) {
              try {
                int num = Integer.valueOf(mValue.substring(0, mValue.indexOf('/')).trim());
                int max = Integer.valueOf(mValue.substring(mValue.indexOf('/') + 1).trim());
                addField(ProgramFieldType.RATING_TYPE, num * 100 / max);
              } catch (NumberFormatException ex) {
                logException(ex);
              }
            } else {
              logMessage("Star-rating must be in form 8/10");
            }

          }

          mValue = null;
        } else if ("new".equalsIgnoreCase(qName) || "premiere".equalsIgnoreCase(qName)) {
          setInfoBit(Program.INFO_NEW);
        } else if ("programme".equalsIgnoreCase(qName)) {
          // if we only set the original title, then we still need to set the
          // title
          if (!mSetTitle && mTitle != null) {
            addField(ProgramFieldType.TITLE_TYPE, mTitle);
          }
          endProgram();
        } else if ("episode-num".equals(qName)) {
          if ("onscreen".equals(mEpisodeType)) {
            addField(ProgramFieldType.EPISODE_TYPE, text);
          } else if ("xmltv_ns".equals(mEpisodeType)) {
            // format is
            // season/totalseasons.episodenum/totalepisode.part/totalparts
            // where current numbers start at 0, while total numbers start at 1
            try {
              if (text.length() > 0) {
                String[] ep = text.split("\\.");
                if (ep.length > 0 && ep[0].length() > 0) {
                  String[] seasons = ep[0].trim().split("/");
                  if (seasons.length > 0 && seasons[0].trim().length() > 0) {
                    int season = Integer.parseInt(seasons[0].trim()) + 1;
                    if (season > 0) {
                      addField(ProgramFieldType.SEASON_NUMBER_TYPE, season);
                    }
                  }
                }
                if (ep.length > 1 && ep[1].length() > 0) {
                  String[] parts = ep[1].trim().split("/");
                  if (parts.length == 2) {
                    String currentString = parts[0].trim();
                    if (currentString.length() > 0) {
                      int current = Integer.parseInt(currentString) + 1;
                      if (current > 0) {
                        addField(ProgramFieldType.EPISODE_NUMBER_TYPE, current);
                      }
                    }
                    String totalString = parts[1].trim();
                    if (totalString.length() > 0) {
                      int total = Integer.parseInt(totalString);
                      if (total > 0) {
                        addField(ProgramFieldType.EPISODE_TOTAL_NUMBER_TYPE, total);
                      }
                    }
                  }
                }
              }
            } catch (NumberFormatException e) {
              logMessage("WARNING: the value of xmltv_ns doesn't meet the specifications: '" + text + '\'');
            }
          }

        } else if ("credits".equalsIgnoreCase(qName)) {
          // already parsed as actor, director,...
        } else if ("tv".equalsIgnoreCase(qName)) {
          // root element, no useful information
        } else if ("video".equalsIgnoreCase(qName)) {
          // already parsed as colour, aspect, quality
        } else {
          logMessage("Warning: Unknown element '" + qName + '\'');
        }
      }
    } catch (Exception e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
    // Clear lang
    mLang = null;
  }

  private static String getCountries(final String language) {
    if (language == null) {
      return null;
    }
    StringBuilder result = new StringBuilder();
    for (Locale locale : Locale.getAvailableLocales()) {
      if (locale.getLanguage().equalsIgnoreCase(language)) {
        String country = locale.getCountry();
        if (country != null && country.length() > 0) {
          result.append(',').append(country);
        }
      }
    }
    return result.toString().toLowerCase();
  }

  protected abstract String getChannelCountry();

  /**
   * program parsing finishes
   */
  protected abstract void endProgram();

  /**
   * whether or not the currently parsed program is valid
   *
   * @return true if the program is valid
   */
  protected boolean isValid() {
    return mIsValid;
  }

  /**
   * sets a bit in the info field
   *
   * @param bit
   *          The bit to add
   */
  protected abstract void setInfoBit(final int bit);

  /**
   * Adds a binary field to the current program
   */
  protected abstract void addField(final ProgramFieldType fieldType, final byte[] value);

  /**
   * adds a String field to the current program
   *
   * @param fieldType
   * @param value
   */
  protected abstract void addField(final ProgramFieldType fieldType, final String value);

  /**
   * adds an Integer field to the current program
   *
   * @param fieldType
   * @param value
   */
  protected abstract void addField(final ProgramFieldType fieldType, final int value);

  protected abstract void logMessage(final String message);

  protected abstract void logException(final Exception exc);

  /**
   * Extracts the time from a XMLTV time value.
   *
   * @param dateTime
   *          The value to extract the time from.
   * @return The time in minutes after midnight
   */
  private int extractTime(final String dateTime) {
    Calendar cal = parseDateTime(dateTime);
    if (cal == null) {
      return -1;
    }
    int hour = cal.get(Calendar.HOUR_OF_DAY);
    int minute = cal.get(Calendar.MINUTE);
    return hour * 60 + minute;
  }

  /**
   * Extracts the date from a XMLTV time value.
   *
   * @param dateTime
   *          The value to extract the date from.
   * @return The date.
   */
  private Date extractDate(final String dateTime) {
    Calendar cal = parseDateTime(dateTime);
    if (cal == null) {
      return null;
    }
    return new devplugin.Date(cal.get(Calendar.YEAR), cal.get(Calendar.MONTH) + 1, cal.get(Calendar.DAY_OF_MONTH));
  }

  private synchronized Calendar parseDateTime(final String time) {
    try {
      Calendar calendar = Calendar.getInstance();
      calendar.setTime(DATE_FORMAT.parse(time.substring(0, 20)));
      calendar.setTimeInMillis(calendar.getTimeInMillis() - calendar.getTimeZone().getRawOffset());
      return calendar;
    } catch (ParseException e) {
      logMessage("invalid time format: " + time);
      return null;
    }
  }

  /**
   * XMLTV channel id of currently parsed program
   *
   * @return channel id
   */
  protected final String getChannelId() {
    return mChannelId;
  }

  /**
   * Adds a text to a field that builds a comma separated value (e.g. the actor
   * list).
   *
   * @param fieldType
   *          The type of the field to add the text to.
   * @param value
   *          The text to add.
   * @param separator
   *          separator to add after each new entry
   */
  protected abstract void addToList(final ProgramFieldType fieldType, String value, final String separator);

}
TOP

Related Classes of util.misc.AbstractXmlTvDataHandler

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.