Package org.apache.flume.serialization

Source Code of org.apache.flume.serialization.SyslogAvroEventSerializer$Builder

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.flume.serialization;

import com.google.common.base.Charsets;
import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Map;
import org.apache.avro.Schema;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.serialization.SyslogAvroEventSerializer.SyslogEvent;
import org.apache.flume.source.SyslogUtils;
import org.joda.time.DateTime;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* This class exists to give an idea of how to use the AvroEventWriter
* and is not intended for inclusion in the Flume core.<br/>
* Problems with it are:<br/>
* (1) assumes very little parsing is done at the first hop (more TBD)<br/>
* (2) no field has been defined for use as a UUID for deduping<br/>
* (3) tailored to syslog messages but not specific to any application<br/>
* (4) not efficient about data copying from an implementation perspective<br/>
* Often, it makes more sense to parse your (meta-)data out of the message part
* itself and then store that in an application-specific Avro schema.
*/
public class SyslogAvroEventSerializer
    extends AbstractAvroEventSerializer<SyslogEvent> {

  private static final DateTimeFormatter dateFmt1 =
      DateTimeFormat.forPattern("MMM dd HH:mm:ss").withZoneUTC();
  private static final DateTimeFormatter dateFmt2 =
      DateTimeFormat.forPattern("MMM  d HH:mm:ss").withZoneUTC();

  private static final Logger logger =
      LoggerFactory.getLogger(SyslogAvroEventSerializer.class);

  // It's usually better to embed this schema in the class as a string.
  // Avro does this for you if you generate Java classes from a schema file.
  // But since this is a test class, having the schema in an .avsc file is more
  // readable. Should probably just use the maven avro plugin to generate
  // the inner SyslogEvent class from this file.
  private static final File schemaFile =
      new File("src/test/resources/syslog_event.avsc");

  private final OutputStream out;
  private final Schema schema;

  public SyslogAvroEventSerializer(OutputStream out) throws IOException {
    this.out = out;
    this.schema = new Schema.Parser().parse(schemaFile);
  }

  @Override
  protected OutputStream getOutputStream() {
    return out;
  }

  @Override
  protected Schema getSchema() {
    return schema;
  }

  // very simple rfc3164 parser
  @Override
  protected SyslogEvent convert(Event event) {
    SyslogEvent sle = new SyslogEvent();

    // Stringify body so it's easy to parse.
    // This is a pretty inefficient way to do it.
    String msg = new String(event.getBody(), Charsets.UTF_8);

    // parser read pointer
    int seek = 0;

    // Check Flume headers to see if we came from SyslogTcp(or UDP) Source,
    // which at the time of this writing only parses the priority.
    // This is a bit schizophrenic and it should parse all the fields or none.
    Map<String, String> headers = event.getHeaders();
    boolean fromSyslogSource = false;
    if (headers.containsKey(SyslogUtils.SYSLOG_FACILITY)) {
      fromSyslogSource = true;
      int facility = Integer.parseInt(headers.get(SyslogUtils.SYSLOG_FACILITY));
      sle.setFacility(facility);
    }
    if (headers.containsKey(SyslogUtils.SYSLOG_SEVERITY)) {
      fromSyslogSource = true;
      int severity = Integer.parseInt(headers.get(SyslogUtils.SYSLOG_SEVERITY));
      sle.setSeverity(severity);
    }

    // assume the message was received raw (maybe via NetcatSource)
    // parse the priority string
    if (!fromSyslogSource) {
      if (msg.charAt(0) == '<') {
        int end = msg.indexOf(">");
        if (end > -1) {
          seek = end + 1;
          String priStr = msg.substring(1, end);
          int priority = Integer.parseInt(priStr);
          int severity = priority % 8;
          int facility = (priority - severity) / 8;
          sle.setFacility(facility);
          sle.setSeverity(severity);
        }
      }
    }

    // parse the timestamp
    String timestampStr = msg.substring(seek, seek + 15);
    long ts = parseRfc3164Date(timestampStr);
    if (ts != 0) {
      sle.setTimestamp(ts);
      seek += 15 + 1; // space after timestamp
    }

    // parse the hostname
    int nextSpace = msg.indexOf(' ', seek);
    if (nextSpace > -1) {
      String hostname = msg.substring(seek, nextSpace);
      sle.setHostname(hostname);
      seek = nextSpace + 1;
    }

    // everything else is the message
    String actualMessage = msg.substring(seek);
    sle.setMessage(actualMessage);

    logger.debug("Serialized event as: {}", sle);

    return sle;
  }

  /**
   * Returns epoch time in millis, or 0 if the string cannot be parsed.
   * We use two date formats because the date spec in rfc3164 is kind of weird.
   * <br/>
   * <b>Warning:</b> logic is used here to determine the year even though it's
   * not part of the timestamp format, and we assume that the machine running
   * Flume has a clock that is at least close to the same day as the machine
   * that generated the event. We also assume that the event was generated
   * recently.
   */
  private static long parseRfc3164Date(String in) {
    DateTime date = null;
    try {
      date = dateFmt1.parseDateTime(in);
    } catch (IllegalArgumentException e) {
      // ignore the exception, we act based on nullity of date object
      logger.debug("Date parse failed on ({}), trying single-digit date", in);
    }

    if (date == null) {
      try {
        date = dateFmt2.parseDateTime(in);
      } catch (IllegalArgumentException e) {
        // ignore the exception, we act based on nullity of date object
        logger.debug("2nd date parse failed on ({}), unknown date format", in);
      }
    }

    // hacky stuff to try and deal with boundary cases, i.e. new year's eve.
    // rfc3164 dates are really dumb.
    // NB: cannot handle replaying of old logs or going back to the future
    if (date != null) {
      DateTime now = new DateTime();
      int year = now.getYear();
      DateTime corrected = date.withYear(year);

      // flume clock is ahead or there is some latency, and the year rolled
      if (corrected.isAfter(now) && corrected.minusMonths(1).isAfter(now)) {
        corrected = date.withYear(year - 1);
      // flume clock is behind and the year rolled
      } else if (corrected.isBefore(now) && corrected.plusMonths(1).isBefore(now)) {
        corrected = date.withYear(year + 1);
      }
      date = corrected;
    }

    if (date == null) {
      return 0;
    }

    return date.getMillis();
  }

  public static class Builder implements EventSerializer.Builder {

    @Override
    public EventSerializer build(Context context, OutputStream out) {
      SyslogAvroEventSerializer writer = null;
      try {
        writer = new SyslogAvroEventSerializer(out);
        writer.configure(context);
      } catch (IOException e) {
        logger.error("Unable to parse schema file. Exception follows.", e);
      }
      return writer;
    }

  }

  // This class would ideally be generated from the avro schema file,
  // but we are letting reflection do the work instead.
  // There's no great reason not to let Avro generate it.
  public static class SyslogEvent {
    private int facility;
    private int severity;
    private long timestamp;
    private String hostname = "";
    private String message = "";

    public void setFacility(int f) { facility = f; }
    public int getFacility() { return facility; }

    public void setSeverity(int s) { severity = s; }
    public int getSeverity() { return severity; }

    public void setTimestamp(long t) { timestamp = t; }
    public long getTimestamp() { return timestamp; }

    public void setHostname(String h) { hostname = h; }
    public String getHostname() { return hostname; }

    public void setMessage(String m) { message = m; }
    public String getMessage() { return message; }

    @Override
    public String toString() {
      StringBuilder builder = new StringBuilder();
      builder.append("{ Facility: ").append(facility).append(", ");
      builder.append(" Severity: ").append(severity).append(", ");
      builder.append(" Timestamp: ").append(timestamp).append(", ");
      builder.append(" Hostname: ").append(hostname).append(", ");
      builder.append(" Message: \"").append(message).append("\" }");
      return builder.toString();
    }
  }
}
TOP

Related Classes of org.apache.flume.serialization.SyslogAvroEventSerializer$Builder

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.