Package com.cloudera.flume.handlers.debug

Source Code of com.cloudera.flume.handlers.debug.Log4jTextFileSource

/**
* Licensed to Cloudera, Inc. under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  Cloudera, Inc. licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.flume.handlers.debug;

import java.io.IOException;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import com.cloudera.flume.conf.Context;
import com.cloudera.flume.conf.SourceFactory.SourceBuilder;
import com.cloudera.flume.core.Event;
import com.cloudera.flume.core.EventImpl;
import com.cloudera.flume.core.EventSource;
import com.cloudera.flume.core.Event.Priority;
import com.google.common.base.Preconditions;

/**
* This adds a parser that breaks out parts of a log4j log file and tries to
* re-concatenate split lines.
*
* TODO (jon) check if this is the hadoop log4j default out of the box.
*/
public class Log4jTextFileSource extends TextFileSource {

  public Log4jTextFileSource(String fname) {
    super(fname);
  }

  // This should parse a log4j line
  Pattern l4jPat = Pattern
      .compile("^(\\S+?) (\\S+?) (\\d\\d/\\d\\d/\\d\\d \\d\\d:\\d\\d:\\d\\d) (\\S+?) (.*)");

  DateFormat dformat = new SimpleDateFormat("yy/MM/dd HH:mm:ss");
  String prevLine = null;

  /**
   * Reads lines until we either reach the end of a file, or a line that looks
   * like the beginning of a new entry. If last line prevLine is set to null. If
   * it is a new event, we make the prev line contain that value and return the
   * completed event.
   */
  Event readUntilNextEvent(Matcher m) throws IOException {
    try {
      // new event!
      String host = m.group(1);
      // String xx = m.group(2); // TODO (jon) I don't know what this is
      Date d;
      d = dformat.parse(m.group(3));
      String prio = m.group(4);
      String body = m.group(5);

      // this is really just to differentiate entries at the "same time" at
      // second/millisecond resolution"
      long nanos = System.nanoTime();
      StringBuilder builder = new StringBuilder(body);
      while (true) {
        String s2 = raf.readLine();
        if (s2 == null) {
          // reached the last line
          prevLine = null;
          Event e = new EventImpl(builder.toString().getBytes(), d.getTime(),
              Priority.valueOf(prio), nanos, host,
              new HashMap<String, byte[]>());
          return e;
        }

        // valid line, need to get more lines.
        Matcher m2 = l4jPat.matcher(s2);
        if (m2.matches()) {
          // a new matching line? event finished, save line for next round.
          Event e = new EventImpl(builder.toString().getBytes(), d.getTime(),
              Priority.valueOf(prio), nanos, host,
              new HashMap<String, byte[]>());
          prevLine = s2;
          return e;
        }
        // didn't match, append to previous
        builder.append("\n");
        builder.append(s2);
      }
    } catch (ParseException e1) {
      // TODO XXX Auto-generated catch block
      e1.printStackTrace();
    }
    return null;
  }

  /**
   * Because of the semantics of next, we need to keep the previous line around.
   */
  public Event next() throws IOException {
    Preconditions.checkState(raf != null,
        "Need to open source before reading from it");
    String s = null;
    if (prevLine == null) {
      s = raf.readLine();

      if (s == null)
        return null; // last line

      // first line (fall through)

    } else {
      // event break
      s = prevLine;
    }

    Matcher m = l4jPat.matcher(s);
    if (m.matches()) {
      Event e = readUntilNextEvent(m);
      updateEventProcessingStats(e);
      return e;
    }

    // This is unreachable.
    return null;
  }

  public static SourceBuilder builder() {
    return new SourceBuilder() {
      @Override
      public EventSource build(Context ctx, String... argv) {
        if (argv.length != 1) {
          throw new IllegalArgumentException("usage: log4jfile(filename) ");
        }
        return new Log4jTextFileSource(argv[0]);
      }

    };
  }
}
TOP

Related Classes of com.cloudera.flume.handlers.debug.Log4jTextFileSource

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.