/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.flume.serialization;
import com.google.common.base.Charsets;
import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Map;
import org.apache.avro.Schema;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.serialization.SyslogAvroEventSerializer.SyslogEvent;
import org.apache.flume.source.SyslogUtils;
import org.joda.time.DateTime;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* This class exists to give an idea of how to use the AvroEventWriter
* and is not intended for inclusion in the Flume core.<br/>
* Problems with it are:<br/>
* (1) assumes very little parsing is done at the first hop (more TBD)<br/>
* (2) no field has been defined for use as a UUID for deduping<br/>
* (3) tailored to syslog messages but not specific to any application<br/>
* (4) not efficient about data copying from an implementation perspective<br/>
* Often, it makes more sense to parse your (meta-)data out of the message part
* itself and then store that in an application-specific Avro schema.
*/
public class SyslogAvroEventSerializer
extends AbstractAvroEventSerializer<SyslogEvent> {
private static final DateTimeFormatter dateFmt1 =
DateTimeFormat.forPattern("MMM dd HH:mm:ss").withZoneUTC();
private static final DateTimeFormatter dateFmt2 =
DateTimeFormat.forPattern("MMM d HH:mm:ss").withZoneUTC();
private static final Logger logger =
LoggerFactory.getLogger(SyslogAvroEventSerializer.class);
// It's usually better to embed this schema in the class as a string.
// Avro does this for you if you generate Java classes from a schema file.
// But since this is a test class, having the schema in an .avsc file is more
// readable. Should probably just use the maven avro plugin to generate
// the inner SyslogEvent class from this file.
private static final File schemaFile =
new File("src/test/resources/syslog_event.avsc");
private final OutputStream out;
private final Schema schema;
public SyslogAvroEventSerializer(OutputStream out) throws IOException {
this.out = out;
this.schema = new Schema.Parser().parse(schemaFile);
}
@Override
protected OutputStream getOutputStream() {
return out;
}
@Override
protected Schema getSchema() {
return schema;
}
// very simple rfc3164 parser
@Override
protected SyslogEvent convert(Event event) {
SyslogEvent sle = new SyslogEvent();
// Stringify body so it's easy to parse.
// This is a pretty inefficient way to do it.
String msg = new String(event.getBody(), Charsets.UTF_8);
// parser read pointer
int seek = 0;
// Check Flume headers to see if we came from SyslogTcp(or UDP) Source,
// which at the time of this writing only parses the priority.
// This is a bit schizophrenic and it should parse all the fields or none.
Map<String, String> headers = event.getHeaders();
boolean fromSyslogSource = false;
if (headers.containsKey(SyslogUtils.SYSLOG_FACILITY)) {
fromSyslogSource = true;
int facility = Integer.parseInt(headers.get(SyslogUtils.SYSLOG_FACILITY));
sle.setFacility(facility);
}
if (headers.containsKey(SyslogUtils.SYSLOG_SEVERITY)) {
fromSyslogSource = true;
int severity = Integer.parseInt(headers.get(SyslogUtils.SYSLOG_SEVERITY));
sle.setSeverity(severity);
}
// assume the message was received raw (maybe via NetcatSource)
// parse the priority string
if (!fromSyslogSource) {
if (msg.charAt(0) == '<') {
int end = msg.indexOf(">");
if (end > -1) {
seek = end + 1;
String priStr = msg.substring(1, end);
int priority = Integer.parseInt(priStr);
int severity = priority % 8;
int facility = (priority - severity) / 8;
sle.setFacility(facility);
sle.setSeverity(severity);
}
}
}
// parse the timestamp
String timestampStr = msg.substring(seek, seek + 15);
long ts = parseRfc3164Date(timestampStr);
if (ts != 0) {
sle.setTimestamp(ts);
seek += 15 + 1; // space after timestamp
}
// parse the hostname
int nextSpace = msg.indexOf(' ', seek);
if (nextSpace > -1) {
String hostname = msg.substring(seek, nextSpace);
sle.setHostname(hostname);
seek = nextSpace + 1;
}
// everything else is the message
String actualMessage = msg.substring(seek);
sle.setMessage(actualMessage);
logger.debug("Serialized event as: {}", sle);
return sle;
}
/**
* Returns epoch time in millis, or 0 if the string cannot be parsed.
* We use two date formats because the date spec in rfc3164 is kind of weird.
* <br/>
* <b>Warning:</b> logic is used here to determine the year even though it's
* not part of the timestamp format, and we assume that the machine running
* Flume has a clock that is at least close to the same day as the machine
* that generated the event. We also assume that the event was generated
* recently.
*/
private static long parseRfc3164Date(String in) {
DateTime date = null;
try {
date = dateFmt1.parseDateTime(in);
} catch (IllegalArgumentException e) {
// ignore the exception, we act based on nullity of date object
logger.debug("Date parse failed on ({}), trying single-digit date", in);
}
if (date == null) {
try {
date = dateFmt2.parseDateTime(in);
} catch (IllegalArgumentException e) {
// ignore the exception, we act based on nullity of date object
logger.debug("2nd date parse failed on ({}), unknown date format", in);
}
}
// hacky stuff to try and deal with boundary cases, i.e. new year's eve.
// rfc3164 dates are really dumb.
// NB: cannot handle replaying of old logs or going back to the future
if (date != null) {
DateTime now = new DateTime();
int year = now.getYear();
DateTime corrected = date.withYear(year);
// flume clock is ahead or there is some latency, and the year rolled
if (corrected.isAfter(now) && corrected.minusMonths(1).isAfter(now)) {
corrected = date.withYear(year - 1);
// flume clock is behind and the year rolled
} else if (corrected.isBefore(now) && corrected.plusMonths(1).isBefore(now)) {
corrected = date.withYear(year + 1);
}
date = corrected;
}
if (date == null) {
return 0;
}
return date.getMillis();
}
public static class Builder implements EventSerializer.Builder {
@Override
public EventSerializer build(Context context, OutputStream out) {
SyslogAvroEventSerializer writer = null;
try {
writer = new SyslogAvroEventSerializer(out);
writer.configure(context);
} catch (IOException e) {
logger.error("Unable to parse schema file. Exception follows.", e);
}
return writer;
}
}
// This class would ideally be generated from the avro schema file,
// but we are letting reflection do the work instead.
// There's no great reason not to let Avro generate it.
public static class SyslogEvent {
private int facility;
private int severity;
private long timestamp;
private String hostname = "";
private String message = "";
public void setFacility(int f) { facility = f; }
public int getFacility() { return facility; }
public void setSeverity(int s) { severity = s; }
public int getSeverity() { return severity; }
public void setTimestamp(long t) { timestamp = t; }
public long getTimestamp() { return timestamp; }
public void setHostname(String h) { hostname = h; }
public String getHostname() { return hostname; }
public void setMessage(String m) { message = m; }
public String getMessage() { return message; }
@Override
public String toString() {
StringBuilder builder = new StringBuilder();
builder.append("{ Facility: ").append(facility).append(", ");
builder.append(" Severity: ").append(severity).append(", ");
builder.append(" Timestamp: ").append(timestamp).append(", ");
builder.append(" Hostname: ").append(hostname).append(", ");
builder.append(" Message: \"").append(message).append("\" }");
return builder.toString();
}
}
}