Package org.apache.flume.source

Source Code of org.apache.flume.source.SyslogUtils

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.flume.source;

import org.apache.flume.Event;
import org.apache.flume.annotations.InterfaceAudience;
import org.apache.flume.annotations.InterfaceStability;
import org.apache.flume.event.EventBuilder;
import org.jboss.netty.buffer.ChannelBuffer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.ByteArrayOutputStream;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Calendar;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.regex.MatchResult;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

@InterfaceAudience.Private
@InterfaceStability.Evolving
public class SyslogUtils {
  final public static String SYSLOG_TIMESTAMP_FORMAT_RFC5424_2 = "yyyy-MM-dd'T'HH:mm:ss.SZ";
  final public static String SYSLOG_TIMESTAMP_FORMAT_RFC5424_1 = "yyyy-MM-dd'T'HH:mm:ss.S";
  final public static String SYSLOG_TIMESTAMP_FORMAT_RFC5424_3 = "yyyy-MM-dd'T'HH:mm:ssZ";
  final public static String SYSLOG_TIMESTAMP_FORMAT_RFC5424_4 = "yyyy-MM-dd'T'HH:mm:ss";
  final public static String SYSLOG_TIMESTAMP_FORMAT_RFC3164_1 = "yyyyMMM d HH:mm:ss";

  final public static String SYSLOG_MSG_RFC5424_0 =
      "(?:\\<(\\d{1,3})\\>)" + // priority
      "(?:(\\d?)\\s?)" + // version
      /* yyyy-MM-dd'T'HH:mm:ss.SZ or yyyy-MM-dd'T'HH:mm:ss.S+hh:mm or - (null stamp) */
      "(?:" +
        "(\\d{4}[-]\\d{2}[-]\\d{2}[T]\\d{2}[:]\\d{2}[:]\\d{2}" +
        "(?:\\.\\d{1,6})?(?:[+-]\\d{2}[:]\\d{2}|Z)?)|-)" + // stamp
      "\\s" + // separator
      "(?:([\\w][\\w\\d\\.@\\-]*)|-)" + // host name or - (null)
      "\\s" + // separator
      "(.*)$"; // body

  final public static String SYSLOG_MSG_RFC3164_0 =
      "(?:\\<(\\d{1,3})\\>)" +
      "(?:(\\d)?\\s?)" + // version
      // stamp MMM d HH:mm:ss, single digit date has two spaces
      "([A-Z][a-z][a-z]\\s{1,2}\\d{1,2}\\s\\d{2}[:]\\d{2}[:]\\d{2})" +
      "\\s" + // separator
      "([\\w][\\w\\d\\.@-]*)" + // host
      "\\s(.*)$"// body

  final public static int SYSLOG_PRIORITY_POS = 1;
  final public static int SYSLOG_VERSION_POS = 2;
  final public static int SYSLOG_TIMESTAMP_POS = 3;
  final public static int SYSLOG_HOSTNAME_POS = 4;
  final public static int SYSLOG_BODY_POS = 5;

  private Mode m = Mode.START;
  private StringBuilder prio = new StringBuilder();
  private ByteArrayOutputStream baos;
  private static final Logger logger = LoggerFactory
      .getLogger(SyslogUtils.class);

  final public static String SYSLOG_FACILITY = "Facility";
  final public static String SYSLOG_SEVERITY = "Severity";
  final public static String SYSLOG_PRIORITY = "Priority";
  final public static String SYSLOG_VERSION = "Version";
  final public static String EVENT_STATUS = "flume.syslog.status";
  final public static Integer MIN_SIZE = 10;
  final public static Integer DEFAULT_SIZE = 2500;
  private final boolean isUdp;
  private boolean isBadEvent;
  private boolean isIncompleteEvent;
  private Integer maxSize;
  private Set<String> keepFields;

  private class SyslogFormatter {
    public Pattern regexPattern;
    public ArrayList<String> searchPattern = new ArrayList<String>();
    public ArrayList<String> replacePattern = new ArrayList<String>();
    public ArrayList<SimpleDateFormat> dateFormat = new ArrayList<SimpleDateFormat>();
    public boolean addYear;
  }
  private ArrayList<SyslogFormatter> formats = new ArrayList<SyslogFormatter>();

  private String priority = null;
  private String version = null;
  private String timeStamp = null;
  private String hostName = null;
  private String msgBody = null;

  private static final String[] DEFAULT_FIELDS_TO_KEEP = {
    SyslogSourceConfigurationConstants.CONFIG_KEEP_FIELDS_PRIORITY,
    SyslogSourceConfigurationConstants.CONFIG_KEEP_FIELDS_VERSION,
    SyslogSourceConfigurationConstants.CONFIG_KEEP_FIELDS_TIMESTAMP,
    SyslogSourceConfigurationConstants.CONFIG_KEEP_FIELDS_HOSTNAME
  };
  public static final String KEEP_FIELDS_ALL = "--all--";

  public static boolean keepAllFields(Set<String> keepFields) {
    if (keepFields == null) {
      return false;
    }
    return keepFields.contains(KEEP_FIELDS_ALL);
  }

  public static Set<String> chooseFieldsToKeep(String keepFields) {
    if (keepFields == null) {
      return null;
    }

    keepFields = keepFields.trim().toLowerCase(Locale.ENGLISH);

    if (keepFields.equals("false") || keepFields.equals("none")) {
      return null;
    }

    if (keepFields.equals("true") || keepFields.equals("all")) {
      Set<String> fieldsToKeep = new HashSet<String>(1);
      fieldsToKeep.add(KEEP_FIELDS_ALL);
      return fieldsToKeep;
    }

    Set<String> fieldsToKeep = new HashSet<String>(DEFAULT_FIELDS_TO_KEEP.length);

    for (String field : DEFAULT_FIELDS_TO_KEEP) {
      if (keepFields.indexOf(field) != -1) {
        fieldsToKeep.add(field);
      }
    }

    return fieldsToKeep;
  }

  public static String addFieldsToBody(Set<String> keepFields,
                                       String body,
                                       String priority,
                                       String version,
                                       String timestamp,
                                       String hostname) {
    // Prepend fields to be kept in message body.
    if (keepFields != null) {
      if (keepFields.contains(SyslogSourceConfigurationConstants.CONFIG_KEEP_FIELDS_HOSTNAME)) {
        body = hostname + " " + body;
      }
      if (keepFields.contains(SyslogSourceConfigurationConstants.CONFIG_KEEP_FIELDS_TIMESTAMP)) {
        body = timestamp + " " + body;
      }
      if (keepFields.contains(SyslogSourceConfigurationConstants.CONFIG_KEEP_FIELDS_VERSION)) {
        if (version != null && !version.isEmpty()) {
          body = version + " " + body;
        }
      }
      if (keepFields.contains(SyslogSourceConfigurationConstants.CONFIG_KEEP_FIELDS_PRIORITY)) {
        body = "<" + priority + ">" + body;
      }
    }

    return body;
  }

  public SyslogUtils() {
    this(false);
  }

  public SyslogUtils(boolean isUdp) {
    this(DEFAULT_SIZE,
        new HashSet<String>(Arrays.asList(SyslogSourceConfigurationConstants.DEFAULT_KEEP_FIELDS)),
        isUdp);
  }

  public SyslogUtils(Integer eventSize, Set<String> keepFields, boolean isUdp) {
    this.isUdp = isUdp;
    isBadEvent = false;
    isIncompleteEvent = false;
    maxSize = (eventSize < MIN_SIZE) ? MIN_SIZE : eventSize;
    baos = new ByteArrayOutputStream(eventSize);
    this.keepFields = keepFields;
    initHeaderFormats();
  }

  // extend the default header formatter
  public void addFormats(Map<String, String> formatProp) {
    if (formatProp.isEmpty() || !formatProp.containsKey(
        SyslogSourceConfigurationConstants.CONFIG_REGEX)) {
      return;
    }
    SyslogFormatter fmt1 = new SyslogFormatter();
    fmt1.regexPattern = Pattern.compile( formatProp.get(
        SyslogSourceConfigurationConstants.CONFIG_REGEX) );
    if (formatProp.containsKey(
        SyslogSourceConfigurationConstants.CONFIG_SEARCH)) {
      fmt1.searchPattern.add(formatProp.get(
          SyslogSourceConfigurationConstants.CONFIG_SEARCH));
    }
    if (formatProp.containsKey(
        SyslogSourceConfigurationConstants.CONFIG_REPLACE)) {
      fmt1.replacePattern.add(formatProp.get(
          SyslogSourceConfigurationConstants.CONFIG_REPLACE));
    }
    if (formatProp.containsKey(
        SyslogSourceConfigurationConstants.CONFIG_DATEFORMAT)) {
        fmt1.dateFormat.add(new SimpleDateFormat(formatProp.get(
            SyslogSourceConfigurationConstants.CONFIG_DATEFORMAT)));
    }
    formats.add(0, fmt1);
  }

  // setup built-in formats
  private void initHeaderFormats() {
    // setup RFC5424 formater
    SyslogFormatter fmt1 = new SyslogFormatter();
    fmt1.regexPattern = Pattern.compile(SYSLOG_MSG_RFC5424_0);
    // 'Z' in timestamp indicates UTC zone, so replace it it with '+0000' for date formatting
    fmt1.searchPattern.add("Z");
    fmt1.replacePattern.add("+0000");
    // timezone in RFC5424 is [+-]tt:tt, so remove the ':' for java date formatting
    fmt1.searchPattern.add("([+-])(\\d{2})[:](\\d{2})");
    fmt1.replacePattern.add("$1$2$3");
    // FLUME-2497: SimpleDateFormat does not handle microseconds, Truncate after 3 digits.
    fmt1.searchPattern.add("(T\\d{2}:\\d{2}:\\d{2}\\.\\d{3})(\\d*)");
    fmt1.replacePattern.add("$1");
    fmt1.dateFormat.add(new SimpleDateFormat(SYSLOG_TIMESTAMP_FORMAT_RFC5424_1, Locale.ENGLISH));
    fmt1.dateFormat.add(new SimpleDateFormat(SYSLOG_TIMESTAMP_FORMAT_RFC5424_2, Locale.ENGLISH));
    fmt1.dateFormat.add(new SimpleDateFormat(SYSLOG_TIMESTAMP_FORMAT_RFC5424_3, Locale.ENGLISH));
    fmt1.dateFormat.add(new SimpleDateFormat(SYSLOG_TIMESTAMP_FORMAT_RFC5424_4, Locale.ENGLISH));
    fmt1.addYear = false;

    // setup RFC3164 formater
    SyslogFormatter fmt2 = new SyslogFormatter();
    fmt2.regexPattern = Pattern.compile(SYSLOG_MSG_RFC3164_0);
    // the single digit date has two spaces, so trim it
    fmt2.searchPattern.add("  ");
    fmt2.replacePattern.add(" ");
    fmt2.dateFormat.add(new SimpleDateFormat(SYSLOG_TIMESTAMP_FORMAT_RFC3164_1, Locale.ENGLISH));
    fmt2.addYear = true;

    formats.add(fmt1);
    formats.add(fmt2);
  }

  enum Mode {
    START, PRIO, DATA
  };

  public enum SyslogStatus{
    OTHER("Unknown"),
    INVALID("Invalid"),
    INCOMPLETE("Incomplete");

    private final String syslogStatus;

    private SyslogStatus(String status){
      syslogStatus = status;
    }

    public String getSyslogStatus(){
      return this.syslogStatus;
    }
  }

  // create the event from syslog data
  Event buildEvent() {
    byte[] body;
    int pri = 0;
    int sev = 0;
    int facility = 0;

    if(!isBadEvent){
      pri = Integer.parseInt(prio.toString());
      sev = pri % 8;
      facility = pri / 8;
      formatHeaders();
    }

    Map <String, String> headers = new HashMap<String, String>();
    headers.put(SYSLOG_FACILITY, String.valueOf(facility));
    headers.put(SYSLOG_SEVERITY, String.valueOf(sev));
    if ((priority != null) && (priority.length() > 0)) {
      headers.put("priority", priority);
    }
    if ((version != null) && (version.length() > 0)) {
      headers.put("version", version);
    }
    if ((timeStamp != null) && timeStamp.length() > 0) {
      headers.put("timestamp", timeStamp);
    }
    if ((hostName != null) && (hostName.length() > 0)) {
      headers.put("host", hostName);
    }
    if(isBadEvent){
      logger.warn("Event created from Invalid Syslog data.");
      headers.put(EVENT_STATUS, SyslogStatus.INVALID.getSyslogStatus());
    } else if(isIncompleteEvent){
      logger.warn("Event size larger than specified event size: {}. You should " +
          "consider increasing your event size.", maxSize);
      headers.put(EVENT_STATUS, SyslogStatus.INCOMPLETE.getSyslogStatus());
    }

    if (!keepAllFields(keepFields)) {
      if ((msgBody != null) && (msgBody.length() > 0)) {
        body = msgBody.getBytes();
      } else {
        // Parse failed.
        body = baos.toByteArray();
      }
    } else {
      body = baos.toByteArray();
    }
    reset();
    // format the message
    return EventBuilder.withBody(body, headers);
  }

  // Apply each known pattern to message
  private void formatHeaders() {
    String eventStr = baos.toString();
    String timeStampString = null;

    for(int p=0; p < formats.size(); p++) {
      SyslogFormatter fmt = formats.get(p);
      Pattern pattern = fmt.regexPattern;
      Matcher matcher = pattern.matcher(eventStr);
      if (! matcher.matches()) {
        continue;
      }
      MatchResult res = matcher.toMatchResult();
      for (int grp=1; grp <= res.groupCount(); grp++) {
        String value = res.group(grp);
        if (grp == SYSLOG_TIMESTAMP_POS) {
          timeStampString = value;

          // apply available format replacements to timestamp
          if (value != null) {
            for (int sp=0; sp < fmt.searchPattern.size(); sp++) {
              value = value.replaceAll(fmt.searchPattern.get(sp), fmt.replacePattern.get(sp));
            }
            // Add year to timestamp if needed
            if (fmt.addYear) {
              value = String.valueOf(Calendar.getInstance().get(Calendar.YEAR)) + value;
            }
            // try the available time formats to timestamp
            for (int dt = 0; dt < fmt.dateFormat.size(); dt++) {
              try {
                timeStamp = String.valueOf(fmt.dateFormat.get(dt).parse(value).getTime());
                break; // done. formatted the time
              } catch (ParseException e) {
                // Error formatting the timeStamp, try next format
                continue;
              }
            }
          }
        } else if (grp == SYSLOG_HOSTNAME_POS) {
          hostName = value;
        } else if (grp == SYSLOG_PRIORITY_POS) {
          priority = value;
        } else if (grp == SYSLOG_VERSION_POS) {
          version = value;
        } else if (grp == SYSLOG_BODY_POS) {
          msgBody = addFieldsToBody(keepFields, value, priority, version, timeStampString, hostName);
        }
      }
      break; // we successfully parsed the message using this pattern
    }
  }

  private void reset(){
    baos.reset();
    m = Mode.START;
    prio.delete(0, prio.length());
    isBadEvent = false;
    isIncompleteEvent = false;
    hostName = null;
    timeStamp = null;
    msgBody = null;
  }

  // extract relevant syslog data needed for building Flume event
  public Event extractEvent(ChannelBuffer in){

    /* for protocol debugging
    ByteBuffer bb = in.toByteBuffer();
    int remaining = bb.remaining();
    byte[] buf = new byte[remaining];
    bb.get(buf);
    HexDump.dump(buf, 0, System.out, 0);
    */

    byte b = 0;
    Event e = null;
    boolean doneReading = false;

    try {
      while (!doneReading && in.readable()) {
        b = in.readByte();
        switch (m) {
        case START:
          if (b == '<') {
            baos.write(b);
            m = Mode.PRIO;
          } else if(b == '\n'){
            //If the character is \n, it was because the last event was exactly
            //as long  as the maximum size allowed and
            //the only remaining character was the delimiter - '\n', or
            //multiple delimiters were sent in a row.
            //Just ignore it, and move forward, don't change the mode.
            //This is a no-op, just ignore it.
            logger.debug("Delimiter found while in START mode, ignoring..");

          } else {
            isBadEvent = true;
            baos.write(b);
            //Bad event, just dump everything as if it is data.
            m = Mode.DATA;
          }
          break;
        case PRIO:
          baos.write(b);
          if (b == '>') {
            m = Mode.DATA;
          } else {
            char ch = (char) b;
            prio.append(ch);
            if (!Character.isDigit(ch)) {
              isBadEvent = true;
              //If we hit a bad priority, just write as if everything is data.
              m = Mode.DATA;
            }
          }
          break;
        case DATA:
          // TCP syslog entries are separated by '\n'
          if (b == '\n') {
            e = buildEvent();
            doneReading = true;
          } else {
            baos.write(b);
          }
          if(baos.size() == this.maxSize && !doneReading){
            isIncompleteEvent = true;
            e = buildEvent();
            doneReading = true;
          }
          break;
        }

      }

      // UDP doesn't send a newline, so just use what we received
      if (e == null && isUdp) {
        doneReading = true;
        e = buildEvent();
      }
    } finally {
      // no-op
    }

    return e;
  }

  public Integer getEventSize() {
    return maxSize;
  }

  public void setEventSize(Integer eventSize) {
    this.maxSize = eventSize;
  }

  public void setKeepFields(Set<String> keepFields) {
    this.keepFields = keepFields;
  }
}

TOP

Related Classes of org.apache.flume.source.SyslogUtils

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.