Package oi.thekraken.grok.api

Source Code of oi.thekraken.grok.api.Grok

/*******************************************************************************
* Copyright 2014 Anthony Corbacho and contributors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
package oi.thekraken.grok.api;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;

import oi.thekraken.grok.api.exception.GrokException;

import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.code.regexp.Matcher;
import com.google.code.regexp.Pattern;


/**
* {@code Grok} parse arbitrary text and structure it.<p/>
*
* {@code Grok} is simple API that allows you to easily parse logs
* and other files (single line). With {@code Grok},
* you can turn unstructured log and event data into structured data (JSON).
*<p/>
* example:<p/>
* <pre>
*  Grok grok = Grok.create("patterns/patterns");
*  grok.compile("%{USER}");
*  Match gm = grok.match("root");
*  gm.captures();
* </pre>
*
* @since 0.0.1
* @author anthonycorbacho
*/
public class Grok {

  private static final Logger LOG = LoggerFactory.getLogger(Grok.class);
  /**
   * Named regex of the originalGrokPattern.
   */
  private String namedRegex;
  /**
   * Map of the named regex of the originalGrokPattern
   * with id = namedregexid and value = namedregex.
   */
  private Map<String, String> namedRegexCollection;
  /**
   * Original {@code Grok} pattern (expl: %{IP}).
   */
  private String originalGrokPattern;
  /**
   * Pattern of the namedRegex.
   */
  private Pattern compiledNamedRegex;
  /**
   * {@code Grok} discovery.
   */
  private Discovery disco;
  /**
   * {@code Grok} patterns definition.
   */
  private Map<String, String> grokPatternDefinition;

  /** only use in grok discovery. */
  private String savedPattern;

  /**
   * Create Empty {@code Grok}.
   */
  public static final Grok EMPTY = new Grok();

  /**
   * Create a new <i>empty</i>{@code Grok} object.
   */
  public Grok() {
    originalGrokPattern = StringUtils.EMPTY;
    disco = null;
    namedRegex = StringUtils.EMPTY;
    compiledNamedRegex = null;
    grokPatternDefinition = new TreeMap<String, String>();
    namedRegexCollection = new TreeMap<String, String>();
    savedPattern = StringUtils.EMPTY;
  }

  public String getSaved_pattern() {
    return savedPattern;
  }

  public void setSaved_pattern(String savedpattern) {
    this.savedPattern = savedpattern;
  }

  /**
   * Create a {@code Grok} instance with the given patterns file and
   * a {@code Grok} pattern.
   *
   * @param grokPatternPath Path to the pattern file
   * @param grokExpression  - <b>OPTIONAL</b> - Grok pattern to compile ex: %{APACHELOG}
   * @return {@code Grok} instance
   * @throws GrokException
   */
  public static Grok create(String grokPatternPath, String grokExpression)
      throws GrokException {
    if (StringUtils.isBlank(grokPatternPath)) {
      throw new GrokException("{grokPatternPath} should not be empty or null");
    }
    Grok g = new Grok();
    g.addPatternFromFile(grokPatternPath);
    if (StringUtils.isNotBlank(grokExpression)) {
      g.compile(grokExpression);
    }
    return g;
  }

  /**
   * Create a {@code Grok} instance with the given grok patterns file.
   *
   * @param  grokPatternPath : Path to the pattern file
   * @return Grok
   * @throws GrokException
   */
  public static Grok create(String grokPatternPath) throws GrokException {
    return create(grokPatternPath, null);
  }

  /**
   * Add custom pattern to grok in the runtime.
   *
   * @param name : Pattern Name
   * @param pattern : Regular expression Or {@code Grok} pattern
   * @throws GrokException
   **/
  public void addPattern(String name, String pattern) throws GrokException {
    if (StringUtils.isBlank(name)) {
      throw new GrokException("Invalid Pattern name");
    }
    if (StringUtils.isBlank(name)) {
      throw new GrokException("Invalid Pattern");
    }
    grokPatternDefinition.put(name, pattern);
  }

  /**
   * Copy the given Map of patterns (pattern name, regular expression) to {@code Grok},
   * duplicate element will be override.
   *
   * @param cpy : Map to copy
   * @throws GrokException
   **/
  public void copyPatterns(Map<String, String> cpy) throws GrokException {
    if (cpy == null) {
      throw new GrokException("Invalid Patterns");
    }

    if (cpy.isEmpty()) {
      throw new GrokException("Invalid Patterns");
    }
    for (Map.Entry<String, String> entry : cpy.entrySet()) {
      grokPatternDefinition.put(entry.getKey().toString(), entry.getValue().toString());
    }
  }

  /**
   * Get the current map of {@code Grok} pattern.
   *
   * @return Patterns (name, regular expression)
   */
  public Map<String, String> getPatterns() {
    return grokPatternDefinition;
  }

  /**
   * Get the named regex from the {@code Grok} pattern. <p></p>
   * See {@link #compile(String)} for more detail.
   * @return named regex
   */
  public String getNamedRegex() {
    return namedRegex;
  }

  /**
   * Add patterns to {@code Grok} from the given file.
   *
   * @param file : Path of the grok pattern
   * @throws GrokException
   */
  public void addPatternFromFile(String file) throws GrokException {

    File f = new File(file);
    if (!f.exists()) {
      throw new GrokException("Pattern not found");
    }

    if (!f.canRead()) {
      throw new GrokException("Pattern cannot be read");
    }

    FileReader r = null;
    try {
      r = new FileReader(f);
      addPatternFromReader(r);
    } catch (FileNotFoundException e) {
      throw new GrokException(e.getMessage());
    } catch (@SuppressWarnings("hiding") IOException e) {
      throw new GrokException(e.getMessage());
    } finally {
      try {
        if (r != null) {
          r.close();
        }
      } catch (IOException io) {
        // TODO(anthony) : log the error
      }
    }
  }

  /**
   * Add patterns to {@code Grok} from a Reader.
   *
   * @param r : Reader with {@code Grok} patterns
   * @throws GrokException
   */
  public void addPatternFromReader(Reader r) throws GrokException {
    BufferedReader br = new BufferedReader(r);
    String line;
    // We dont want \n and commented line
    Pattern pattern = Pattern.compile("^([A-z0-9_]+)\\s+(.*)$");
    try {
      while ((line = br.readLine()) != null) {
        Matcher m = pattern.matcher(line);
        if (m.matches()) {
          this.addPattern(m.group(1), m.group(2));
        }
      }
      br.close();
    } catch (IOException e) {
      throw new GrokException(e.getMessage());
    } catch (GrokException e) {
      throw new GrokException(e.getMessage());
    }

  }

  /**
   * Match the given <tt>log</tt> with the named regex.
   * And return the json representation of the matched element
   *
   * @param log : log to match
   * @return json representation og the log
   */
  public String capture(String log){
    Match match = match(log);
    match.captures();
    return match.toJson();
  }

  /**
   * Match the given list of <tt>log</tt> with the named regex
   * and return the list of json representation of the matched elements.
   *
   * @param logs : list of log
   * @return list of json representation of the log
   */
  public List<String> captures(List<String> logs){
    List<String> matched = new ArrayList<String>();
    for (String log : logs) {
      Match match = match(log);
      match.captures();
      matched.add(match.toJson());
    }
    return matched;
  }

  /**
   * Match the given <tt>text</tt> with the named regex
   * {@code Grok} will extract data from the string and get an extence of {@link Match}.
   *
   * @param text : Single line of log
   * @return Grok Match
   */
  public Match match(String text) {
    if (compiledNamedRegex == null || StringUtils.isBlank(text)) {
      return Match.EMPTY;
    }

    Matcher m = compiledNamedRegex.matcher(text);
    Match match = new Match();
    if (m.find()) {
      match.setSubject(text);
      match.setGrok(this);
      match.setMatch(m);
      match.setStart(m.start(0));
      match.setEnd(m.end(0));
    }
    return match;
  }

  /**
   * Compile the {@code Grok} pattern to named regex pattern.
   *
   * @param pattern : Grok pattern (ex: %{IP})
   * @throws GrokException
   */
  public void compile(String pattern) throws GrokException {

    if (StringUtils.isBlank(pattern)) {
      throw new GrokException("{pattern} should not be empty or null");
    }

    namedRegex = pattern;
    originalGrokPattern = pattern;
    int index = 0;
    /** flag for infinite recurtion */
    int iterationLeft = 1000;
    Boolean continueIteration = true;

    // Replace %{foo} with the regex (mostly groupname regex)
    // and then compile the regex
    while (continueIteration) {
      continueIteration = false;
      if (iterationLeft <= 0) {
        throw new GrokException("Deep recursion pattern compilation of " + originalGrokPattern);
      }
      iterationLeft--;

      Matcher m = GrokUtils.GROK_PATTERN.matcher(namedRegex);
      // Match %{Foo:bar} -> pattern name and subname
      // Match %{Foo=regex} -> add new regex definition
      if (m.find()) {
        continueIteration = true;
        Map<String, String> group = m.namedGroups();
        if (group.get("definition") != null) {
          try {
            addPattern(group.get("pattern"), group.get("definition"));
            group.put("name", group.get("name") + "=" + group.get("definition"));
          } catch (GrokException e) {
            // Log the exeception
          }
        }
        namedRegexCollection.put("name" + index,
            (group.get("subname") != null ? group.get("subname") : group.get("name")));
        namedRegex =
            StringUtils.replace(namedRegex, "%{" + group.get("name") + "}", "(?<name" + index + ">"
                + grokPatternDefinition.get(group.get("pattern")) + ")");
        // System.out.println(_expanded_pattern);
        index++;
      }
    }

    if (namedRegex.isEmpty()) {
      throw new GrokException("Pattern not fount");
    }
    // Compile the regex
    compiledNamedRegex = Pattern.compile(namedRegex);
  }

  /**
   * {@code Grok} will try to find the best expression that will match your input.
   * {@link Discovery}
   *
   * @param input : Single line of log
   * @return the Grok pattern
   */
  public String discover(String input) {

    if (disco == null) {
      disco = new Discovery(this);
    }
    return disco.discover(input);
  }

  /**
   * Original grok pattern used to compile to the named regex.
   *
   * @return String Original Grok pattern
   */
  public String getOriginalGrokPattern(){
    return originalGrokPattern;
  }

  /**
   * Get the named regex from the given id.
   *
   * @param id : named regex id
   * @return String of the named regex
   */
  public String getNamedRegexCollectionById(String id) {
    return namedRegexCollection.get(id);
  }

  /**
   * Get the full collection of the named regex.
   *
   * @return named RegexCollection
   */
  public Map<String, String> getNamedRegexCollection() {
    return namedRegexCollection;
  }
}
TOP

Related Classes of oi.thekraken.grok.api.Grok

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.