Package com.google.sitebricks.mail.imap

Source Code of com.google.sitebricks.mail.imap.MessageStatusExtractor

package com.google.sitebricks.mail.imap;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import org.joda.time.DateTime;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.mail.internet.MailDateFormat;
import java.text.ParseException;
import java.util.List;
import java.util.Queue;
import java.util.Set;
import java.util.TimeZone;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
* Extracts a MessageStatus from a partial IMAP fetch. Specifically
* a "fetch all" command which comes back with subject, sender, uid
* internaldate and rfc822.size (length).
* <p>
* A more robust form of fetch exists for message body parts which
* would handle email body, html mail, attachments, etc.
*
* @author dhanji@gmail.com (Dhanji R. Prasanna)
*/
class MessageStatusExtractor implements Extractor<List<MessageStatus>> {
  private static final Logger log = LoggerFactory.getLogger(MessageStatusExtractor.class);
  // 10 Sep 2011 14:19:55 -0700
  static final Pattern ALTERNATE_RECEIVED_DATE_PATTERN = Pattern.compile(
      "\\d?\\d \\w\\w\\w [0-9]{4} \\d?\\d:\\d?\\d:\\d?\\d [-+]?[0-9]{4}");

  static final DateTimeFormatter INTERNAL_DATE = DateTimeFormat.forPattern(
      "dd-MMM-yyyy HH:mm:ss Z");
  static final DateTimeFormatter ALTERNATE_INTERNAL_DATE = DateTimeFormat.forPattern(
      "MMM-yyyy HH:mm:ss Z");
  static final Pattern HELPFUL_NOTIFICATION_PATTERN = Pattern.compile("[*] \\d+ (EXISTS|EXPUNGE)\\s*",
      Pattern.CASE_INSENSITIVE);
  static final Pattern SIZE_MARKER = Pattern.compile("\\{(\\d+)\\}$", Pattern.MULTILINE);

  @Override
  public List<MessageStatus> extract(List<String> messages) {
    List<MessageStatus> statuses = Lists.newArrayList();
    for (int i = 0, messagesSize = messages.size(); i < messagesSize; i++) {
      String message = messages.get(i);
      if (null == message || message.isEmpty())
        continue;

      // Discard the success token and any EXISTS or EXPUNGE tokens.
      try {
        if (Command.isEndOfSequence(message) || HELPFUL_NOTIFICATION_PATTERN.matcher(message).matches())
          continue;
      } catch (ExtractionException ee) {
        log.error("Warning: error parsing email message status! {}", messages, ee);
        continue;
      }

      // The only newlines allowed are inside strings, so check whether the message
      // might have been split between lines and unfold as appropriate.
      boolean isUnterminatedString = isUnterminatedString(message, false);
      while (isUnterminatedString && (i + 1 < messagesSize)) {
        String next = messages.get(i + 1);
        message = message + '\n' + next;
        isUnterminatedString = isUnterminatedString(next, isUnterminatedString);
        // Skip next.
        i++;
      }

      // Newlines are actually also allowed outside strings if a length marker is specified.
      Matcher matcher = SIZE_MARKER.matcher(message);
      while (matcher.find()) {
        int size = Integer.parseInt(matcher.group(1));
        StringBuilder stringToken = new StringBuilder("\n");
        String rest = "";
        int newlines = 1;
        boolean done = false;
        while (stringToken.length() <= size + 1 && !done && (i + 1 < messagesSize)) {
          String next = messages.get(i + 1).trim();
          ++i;
          if (next.length() + stringToken.length() <= size) {
            stringToken.append(next).append('\n');
            newlines++;
          } else {
            int offset = Math.max(0, size - stringToken.length() - newlines * 2);
            stringToken.append(next.substring(0, offset));
            rest = next.substring(offset);

            // We could have over-counted as newlines are not always counted as 2 characters.
            // For sanity
            int bracketPos = rest.indexOf("((");
            if (bracketPos > 0 && rest.charAt(bracketPos - 1) == ' ')
              bracketPos--;

            int nilPos = rest.indexOf(" NIL");
            int delim = Math.min(bracketPos == -1 ? Integer.MAX_VALUE : bracketPos,
                nilPos == -1 ? Integer.MAX_VALUE : nilPos);
           
            if (delim == Integer.MAX_VALUE) {
              int spacePos = rest.indexOf(" ");
              delim = spacePos == -1 ? Integer.MAX_VALUE : spacePos;
            }
           
            if (delim > 0 && delim != Integer.MAX_VALUE) {
              stringToken.append(rest.substring(0, delim));
              rest = rest.substring(delim);
              done = true;
            }
          }
        }

        // Now take the extracted subject and compose it into the message header as though it
        // were quoted.
        message = matcher.replaceAll("");
        // Escape nested quotes:
        String newToken = stringToken.toString().replaceAll("\"", "\\\\\"");
        message += '"' + newToken + '"' + rest;
        // The new message string might have further size markers.
        matcher = SIZE_MARKER.matcher(message);
      }

      final MessageStatus messageStatus = parseStatus(message.replaceFirst("^[*] ", ""));
      if (messageStatus != null)
        statuses.add(messageStatus);
    }

    return statuses;
  }

  /**
   Check for string termination, will check for quote escaping, but only if it's escaped
   within a string... otherwise it's illegal and we'll treat it as a regular quote.
   A trailing backslash indicates a \CRLF was received (as envisaged in RFC 822 3.4.5).
   */
  @VisibleForTesting
  static boolean isUnterminatedString(String message, boolean alreadyInString) {
    boolean escaped = false;
    boolean inString = alreadyInString;
    for (int i = 0; i < message.length(); i++) {
      final char c = message.charAt(i);
      if (inString) {
        if (c == '\\') {
          escaped = !escaped;
        } else if (c == '"') {
          if (!escaped)
            inString = false;
          escaped = false;
        } else
          escaped = false;
      } else
        inString = c == '"';
    }
    return inString;
  }

  private static MessageStatus parseStatus(String message) {
    if (message.startsWith("BAD")) {
      log.warn("error, got BAD response ignoring: {}", message);
      return null;
    }

    Queue<String> tokens = Parsing.tokenize(message);
    MessageStatus status = new MessageStatus();
    try {
      // Assert that we have an envelope.
      Parsing.match(tokens, int.class);
      Parsing.eat(tokens, "FETCH", "(");

      while (!tokens.isEmpty()) {
        boolean match = parseUid(tokens, status);
        match |= parseEnvelope(tokens, status);
        match |= parseFlags(tokens, status);
        match |= parseInternalDate(tokens, status);
        match |= parseRfc822Size(tokens, status);

        match |= parseGmailUid(tokens, status);
        match |= parseGmailThreadId(tokens, status);
        match |= parseGmailLabels(tokens, status);

        if (!match) {
          break;
        }
      }
    } catch (IllegalArgumentException e) {
      log.warn("Error parsing status: {}", message);
      throw e;
    }

    // We don't really need to bother closing the last ')'

    return status;
  }

  private static boolean parseRfc822Size(Queue<String> tokens, MessageStatus status) {
    if (Parsing.matchAnyOf(tokens, "RFC822.SIZE") == null)
      return false;
    status.setSize(Parsing.match(tokens, int.class));
    return true;
  }

  private static boolean parseGmailThreadId(Queue<String> tokens, MessageStatus status) {
    if (Parsing.matchAnyOf(tokens, "X-GM-THRID") == null)
      return false;
    status.setThreadId(Parsing.match(tokens, long.class));
    return true;
  }

  private static boolean parseGmailUid(Queue<String> tokens, MessageStatus status) {
    if (Parsing.matchAnyOf(tokens, "X-GM-MSGID") == null)
      return false;
    status.setGmailMsgId(Parsing.match(tokens, long.class));
    return true;
  }

  private static boolean parseInternalDate(Queue<String> tokens, MessageStatus status) {
    if (Parsing.matchAnyOf(tokens, "INTERNALDATE") == null)
      return false;

    String internalDate = tokens.peek();
    if (Parsing.isValid(internalDate)) {
      internalDate = Parsing.normalizeDateToken(Parsing.match(tokens, String.class));
      DateTime dateTime;
      try {
        dateTime = INTERNAL_DATE.parseDateTime(internalDate);
      } catch (IllegalArgumentException e) {
        try {
          dateTime = ALTERNATE_INTERNAL_DATE.parseDateTime(internalDate);
        } catch (IllegalArgumentException iae) {
          log.warn("Unable to extract internal date--message may be incomplete", e);
          dateTime = null;
        }
      }

      if (dateTime != null)
        status.setInternalDate(dateTime.toDate());
    }

    return true;
  }

  public static boolean parseFlags(Queue<String> tokens, MessageStatus status) {
    Set<Flag> flags = Flag.parseFlagList(tokens);
    if (flags == null)
      return false;
    status.getFlags().addAll(flags);
    return true;
  }

  private static boolean parseUid(Queue<String> tokens, MessageStatus status) {
    if (Parsing.matchAnyOf(tokens, "UID") == null)
      return false;
    status.setImapUid(Parsing.match(tokens, int.class));
    return true;
  }

  private static boolean parseGmailLabels(Queue<String> tokens, MessageStatus status) {
    if (Parsing.matchAnyOf(tokens, "X-GM-LABELS") == null)
      return false;
    Parsing.eat(tokens, "(");

    // Create a label set for this message status only if there are labels.
    status.setLabels(Sets.<String>newHashSet());

    // Check if there are labels to add.
    while (!")".equals(tokens.peek())) { // \Inbox
      String token = tokens.poll();

      // HACK: horrible hack!!!
      // The original Parser incorrectly left escaped backslashes intact. We now
      // emulate this by putting them back in...
      // this code replaces all single backslashes (escaped here as "\\\\") to double backslashes.
      token = token.replaceAll("\\\\", "\\\\\\\\");

      status.getLabels().add(token);
    }
    Parsing.eat(tokens, ")");
    return true;
  }

  private static boolean parseEnvelope(Queue<String> tokens, MessageStatus status) {
    if (Parsing.matchAnyOf(tokens, "ENVELOPE") == null)
      return false;
    Parsing.eat(tokens, "(");

    String receivedDate = tokens.peek();
    if (Parsing.isValid(receivedDate)) {
      receivedDate = Parsing.normalizeDateToken(Parsing.match(tokens, String.class));
      try {
        MailDateFormat dateFormat = new MailDateFormat();
        dateFormat.setTimeZone(TimeZone.getTimeZone("Australia/Sydney"));
        status.setReceivedDate(dateFormat.parse(receivedDate));
      } catch (ParseException e) {
        log.error("Malformed received date format {}. Unable to parse.", receivedDate, e);
      }
    } else if (receivedDate != null) {
      Parsing.eat(tokens, "NIL");
    }

    status.setSubject(Parsing.decode(Parsing.match(tokens, String.class)));

    status.setFrom(Parsing.readAddresses(tokens));
    status.setSender(Parsing.readAddresses(tokens));
    status.setReplyTo(Parsing.readAddresses(tokens));
    status.setTo(Parsing.readAddresses(tokens));
    status.setCc(Parsing.readAddresses(tokens));
    status.setBcc(Parsing.readAddresses(tokens));

    status.setInReplyTo(Parsing.match(tokens, String.class));
    status.setMessageUid(Parsing.match(tokens, String.class));

    // Close envelope.
    Parsing.eat(tokens, ")");
    return true;
  }
}
TOP

Related Classes of com.google.sitebricks.mail.imap.MessageStatusExtractor

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.