Package com.cloudera.flume.handlers.text

Source Code of com.cloudera.flume.handlers.text.SyslogEntryFormat

/**
* Licensed to Cloudera, Inc. under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  Cloudera, Inc. licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.flume.handlers.text;

import java.io.IOException;
import java.io.OutputStream;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import com.cloudera.flume.core.Event;
import com.cloudera.flume.core.EventImpl;
import com.cloudera.flume.core.Event.Priority;
import com.cloudera.flume.handlers.text.FormatFactory.OutputFormatBuilder;
import com.cloudera.flume.handlers.text.output.AbstractOutputFormat;
import com.cloudera.flume.handlers.text.output.OutputFormat;
import com.cloudera.util.Clock;
import com.google.common.base.Preconditions;

/**
* This extracts values from a single text syslog line. The extract function
* needs to have a year specified because the syslog format does not specify a
* year!
*/
public class SyslogEntryFormat extends AbstractOutputFormat implements
    InputFormat {

  final static Pattern SYSLOG_PAT = Pattern
      .compile("(\\S{3} \\d{1,2} \\d{2}:\\d{2}:\\d{2}) (\\S+) ([^:]*?)(:(.*))?");
  private static final String NAME = "syslog";

  // Not static because of concurrency bug in JDK on static DateFormats
  // see: http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6231579
  final DateFormat SYSLOG_DF = new SimpleDateFormat("MMM dd HH:mm:ss");

  int year;

  public SyslogEntryFormat() {
    this(Calendar.getInstance().get(Calendar.YEAR));
  }

  public SyslogEntryFormat(int year) {
    this.year = year;
  }

  public Event extract(String s, int year) throws EventExtractException {
    Matcher m = SYSLOG_PAT.matcher(s);
    if (!m.matches())
      throw new EventExtractException("Does not match syslog format! " + s);

    String date = m.group(1);
    Date d = null;
    try {
      d = SYSLOG_DF.parse(date);
    } catch (ParseException e) {
      throw new EventExtractException("Invalid date format: " + date);
    }

    Calendar c = Calendar.getInstance();
    c.setTime(d);
    c.set(Calendar.YEAR, year);
    d = c.getTime();

    String host = m.group(2);

    // TODO(jon) body should be the raw entry, and a new field should be
    // created for this instead.
    String body = m.group(5); // TODO (jon) make this another field
    String service = m.group(3);
    if (body == null || body.length() == 0) {
      // body = service;
      service = null;
    }

    Map<String, byte[]> fields = new HashMap<String, byte[]>();
    if (service != null)
      fields.put("service", service.getBytes());
    // Event e = new EventImpl(body.getBytes(), d.getTime(), Priority.INFO,
    Event e = new EventImpl(s.getBytes(), d.getTime(), Priority.INFO,
        Clock.nanos(), host, fields);
    return e;

  }

  @Override
  public Event extract(String s) throws EventExtractException {
    return extract(s, this.year);
  }

  /**
   * This outputs a single line log entry similar to that generate by
   * syslog/syslog-ng.
   *
   * It is generally in the form:
   *
   * <date> <sourcehost> <service>: <message body>
   *
   * Here is an example:
   *
   * Aug 21 08:02:39 soundwave NetworkManager: <info> (wlan0): supplicant
   *
   */
  private String format(Event e) {
    StringBuilder b = new StringBuilder();
    b.append(SYSLOG_DF.format(new Date(e.getTimestamp())));
    b.append(" ");
    b.append(e.getHost());
    b.append(" ");
    byte[] svc = e.get("service");
    if (svc != null) {
      b.append(new String(svc));
      b.append(": ");
    }
    b.append(new String(e.getBody()));
    b.append("\n");
    return b.toString();
  }

  @Override
  public void format(OutputStream o, Event e) throws IOException {
    o.write(format(e).getBytes());
  }

  public static OutputFormatBuilder builder() {
    return new OutputFormatBuilder() {

      @Override
      public OutputFormat build(String... args) {
        Preconditions.checkArgument(args.length <= 1,
            "usage: syslogEntry[(year)]");
        int year = Calendar.getInstance().get(Calendar.YEAR);
        if (args.length >= 1) {
          year = Integer.parseInt(args[0]);
        }

        OutputFormat format = new SyslogEntryFormat(year);
        format.setBuilder(this);

        return format;
      }

      @Override
      public String getName() {
        return NAME;
      }

    };
  }

}
TOP

Related Classes of com.cloudera.flume.handlers.text.SyslogEntryFormat

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.