Package asia.redact.bracket.properties

Source Code of asia.redact.bracket.properties.PropertiesLexer

/*
*  This file is part of Bracket Properties
*  Copyright 2011 David R. Smith
*
*/

package asia.redact.bracket.properties;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;

import asia.redact.bracket.properties.Properties.Mode;
import asia.redact.bracket.util.AsciiToNativeFilterReader;
/**
* <pre>
*
* Parse a properties file generally conformant to the description at
*  http://download.oracle.com/javase/6/docs/api/java/util/Properties.html#load(java.io.Reader)
*  into tokens.
*  The lexer will be slightly more true to the data than java.util. For example, it will preserve
*  whitespace in the value as valid data, which java.util.Properties would silently strip off. The lexer
*  will also happily consume UTF-8 (no need for unicode-style escapes). Use Mode.Compatibility
*  for better compatibility to the java.util package and the "spec" above. 
*
* There is one additional extension in the lexer: a comment line which starts with #;; is treated
* as transient (not read in as a comment or saved as a comment in the properties results). This can be
* used to generate a transient header and footer.
*
* As of version 1.3.2-SNAPSHOT and above, these lines are parsed as META_DATA tokens and are available in
* the parse phase.
*
* This class is an "off-line" (non-streaming) lexer, it is backed by a String as input, which implies it is
* limited by memory resources. That's not a problem for all but unusually large properties files on contemporary
* hardware.
*
* </pre>
*
* @author Dave
*
* @see PropertiesToken
* @see PropertiesTokenType
* @see InputAdapter
*/
public class PropertiesLexer {

  final String input;
  int index;
  final List<PropertiesToken> list = new ArrayList<PropertiesToken>();
  private final Lock lock = new ReentrantLock();

  /**
   * Convenience method, swallows the input whole
   * This method filters for unicode escapes if the mode is Compatibility
   *
   * @param in
   */
  public PropertiesLexer(String input) {
    super();
    //this.input = input;
    if(Properties.Factory.mode == Mode.Compatibility){
      // first filter the entire input for unicode escapes
      AsciiToNativeFilterReader reader = new AsciiToNativeFilterReader(new StringReader(input));
      StringBuilder builder = new StringBuilder();
      char [] array = new char[8120];
      int count = 0;
      try {
        while((count = reader.read(array))!= -1) {
          builder.append(array,0,count);
        }
      } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      }

      this.input = builder.toString();
    }else{
      // normal, String will be in the default encoding, normally UTF-8
      this.input = input; 
    }
  }
 
  /**
   * Convenience method, swallows the input whole
   * This method filters for unicode escapes if the mode is Compatibility
   *
   * @param in
   */
  public PropertiesLexer(Reader in){
    //this(new InputAdapter().asString(in));
   
    if(Properties.Factory.mode == Mode.Compatibility){
      // first filter the entire input for unicode escapes
      AsciiToNativeFilterReader reader = new AsciiToNativeFilterReader(in);
      StringBuilder builder = new StringBuilder();
      char [] array = new char[8120];
      int count = 0;
      try {
        while((count = reader.read(array))!= -1) {
          builder.append(array,0,count);
        }
      } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      }

      input = builder.toString();
    }else{
      // normal, String will be in the default encoding, normally UTF-8
      input =  new InputAdapter().asString(in);
    }
  }
 
  /**
   * Convenience method, swallows the input whole
   * This method filters for unicode escapes if the mode is Compatibility
   * This method requires we specify the explicit charset of the file
   *
   * @param in
   */
  public PropertiesLexer(File in, Charset charset){
     String str = new InputAdapter().asString(in,charset);
    if(Properties.Factory.mode == Mode.Compatibility){
      // first filter the entire input for unicode escapes
      AsciiToNativeFilterReader reader = new AsciiToNativeFilterReader(new StringReader(str));
      StringBuilder builder = new StringBuilder();
      char [] array = new char[8120];
      int count = 0;
      try {
        while((count = reader.read(array))!= -1) {
          builder.append(array,0,count);
        }
      } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      }

      input = builder.toString();
    }else{
      // normal, String will be in the default encoding, normally UTF-8
      input =  str;
    }
  }
 
  /**
   * Convenience method, swallows the input whole
   * This method filters for unicode escapes if the mode is Compatibility
   *
   * @param in
   */
  public PropertiesLexer(InputStream in){

    if(Properties.Factory.mode == Mode.Compatibility){
      // first filter the entire input for unicode escapes
      AsciiToNativeFilterReader reader = new AsciiToNativeFilterReader(new InputStreamReader(in));
      StringBuilder builder = new StringBuilder();
      char [] array = new char[8120];
      int count = 0;
      try {
        while((count = reader.read(array))!= -1) {
          builder.append(array,0,count);
        }
      } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      }

      input = builder.toString();
    }else{
      // normal, String will be in the default encoding, normally UTF-8
      input =  new InputAdapter().asString(in);
    }
  }

  public PropertiesLexer(InputStream in, Charset charset){

    if(Properties.Factory.mode == Mode.Compatibility){
      // first filter the entire input for unicode escapes
      AsciiToNativeFilterReader reader = new AsciiToNativeFilterReader(new InputStreamReader(in,charset));
      StringBuilder builder = new StringBuilder();
      char [] array = new char[8120];
      int count = 0;
      try {
        while((count = reader.read(array))!= -1) {
          builder.append(array,0,count);
        }
      } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      }

      input = builder.toString();
    }else{
      // normal, String will be in the default encoding, normally UTF-8
      input =  new InputAdapter().asString(in);
    }
  }
 
  public void lex() {
    lock.lock();
    long count = 0;
    try {
      if(input==null)return;
      PropertiesToken lastEol = null;
      while(hasNext()){
        StringBuffer buf = new StringBuffer();
        PropertiesToken eol = null;
        while(hasNext() && ((eol=scanLineBreak())==null)){
            buf.append(next());
        }
        analyzeLine(buf.toString(),lastEol, count);
        count++;
        if(eol == null) {
          return;
        }
        lastEol = eol;
        list.add(eol);
        index+=eol.text.length();
      }
    }finally{
      list.add(PropertiesToken.eof());
      lock.unlock();
    }
  }
 
  private void analyzeLine(String buf, PropertiesToken tok,long count){
    lock.lock();
    try {
      //strip off any leading white space
      int countBlank = 0;
      for(int i =0;i<buf.length();i++){
        if(Character.isWhitespace(buf.charAt(i))){
          countBlank++;
          continue;
        }else{
          break;
        }
      }
      if(countBlank>0)buf=buf.substring(countBlank);
      // this should handle blank lines
      if(buf.isEmpty())return;
     
      // do not keep our Last Generated header banner or our End footer banner
      // as of version 1.3.2-SNAPSHOT, parse this as META_DATA if we are in Explicit mode
      if((buf.length() > 3) && (buf.charAt(0)== '#' && buf.charAt(1)== ';' && buf.charAt(2)==';')){
       
        if(Properties.Factory.mode == Mode.Explicit){
       
          // collect all the contents of the line up to the line break
          String meta = buf.substring(3, buf.length());
          list.add(new PropertiesToken(PropertiesTokenType.META_DATA,meta));
         
        }else{
          // skip this line if not explicit mode
          return;
        }
       
      }
     
      char ch = buf.charAt(0);
      switch(ch){
        case '#':
        case '!': comment(buf); break;
        default: {
          scanKeyValue(buf,tok);
        }
      }
    }finally{
      lock.unlock();
    }
  }
 
  private void scanKeyValue(String buf,PropertiesToken eol){
    lock.lock();
    try {
      int sepIndex=-1;
      char previous = '\0';
      char ch = '\0';
      boolean sawEscapedDelimiter=false;
      for(int i=0;i<buf.length();i++){
          previous=ch;
        ch = buf.charAt(i);
        if((ch == '='||ch==':') && previous == '\\'){
          sawEscapedDelimiter=true;
        }
        if((ch == '='||ch==':') && previous != '\\'){
          //I see an actual delimiter
          sepIndex=i;
          break;
        }
      }
      if(sepIndex == -1){
        //no separator found, test if continuation...
        if(eol !=null && eol.type==PropertiesTokenType.LOGICAL_LINE_BREAK){
          list.add(new PropertiesToken(PropertiesTokenType.VALUE,buf));
        }
      }else{
        // key and value
       
        //first purge escaped delimiters from the key, if needed
        if(sawEscapedDelimiter){
          String key = buf.substring(0, sepIndex);
          previous = '\0';
          ch = '\0';
          StringBuilder b = new StringBuilder();
          for(int i=0;i<key.length();i++){
              previous=ch;
            ch = buf.charAt(i);
            if((ch == '='||ch==':') && previous == '\\'){
              b.deleteCharAt(b.length()-1);
            }
            b.append(ch);
          }
          // use cleaned up key
          // Issue #1 - need to trim key also of whitespace at end, should be trimmed at front above
          list.add(new PropertiesToken(PropertiesTokenType.KEY,b.toString().trim()));
         
        }else{
          //delimiter not escaped, so just use key as is
          // Issue #1 - but need to trim key of whitespace
          list.add(new PropertiesToken(PropertiesTokenType.KEY,buf.substring(0, sepIndex).trim()));
        }
       
       
        list.add(new PropertiesToken(PropertiesTokenType.SEPARATOR,String.valueOf(buf.charAt(sepIndex))));
        list.add(new PropertiesToken(PropertiesTokenType.VALUE,buf.substring(sepIndex+1, buf.length())));
      }
    }finally{
      lock.unlock();
    }
  }
 
  private void comment(String buf){
    lock.lock();
    try {
      list.add(new PropertiesToken(PropertiesTokenType.COMMENT,buf));
    }finally{
      lock.unlock();
    }
  }
 
  private PropertiesToken scanLineBreak(){
    lock.lock();
    try {
      if(la(0)=='\\' && la(1) == '\r'&&la(2)=='\n'){
        return new PropertiesToken(PropertiesTokenType.LOGICAL_LINE_BREAK,"\\\r\n");
      }else if(la(0)=='\\' && la(1) == '\r'){
        return new PropertiesToken(PropertiesTokenType.LOGICAL_LINE_BREAK,"\\\r");
      }else if(la(0)=='\\' && la(1) == '\n'){
        return new PropertiesToken(PropertiesTokenType.LOGICAL_LINE_BREAK,"\\\n");
      }else if(la(0) == '\r'&&la(1)=='\n'){
        return new PropertiesToken(PropertiesTokenType.NATURAL_LINE_BREAK,"\r\n");
      }else if(la(0) == '\r'){
        return new PropertiesToken(PropertiesTokenType.NATURAL_LINE_BREAK,"\r");
      }else if(la(0) == '\n'){
        return new PropertiesToken(PropertiesTokenType.NATURAL_LINE_BREAK,"\n");
      }
      return null;
    }finally{
      lock.unlock();
    }
  }
 

  private boolean hasNext() {
    lock.lock();
    try {
      return index < input.length();
    }finally{
      lock.unlock();
    }
  }
 
  private char next() {
    lock.lock();
    try {
      if(index >= input.length()) {
        throw new RuntimeException("problem, index >= "+input.length());
      }
      char ch = input.charAt(index);
      index++;
      return ch;
    }finally{
      lock.unlock();
    }
  }
 
  private Character la(int count) {
    lock.lock();
    try {
      try {
        return input.charAt(index+count);
      }catch(IndexOutOfBoundsException x){
        return '\0';
      }
    }finally{
      lock.unlock();
    }
  }

  public List<PropertiesToken> getList() {
    return list;
  }
}
TOP

Related Classes of asia.redact.bracket.properties.PropertiesLexer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.