Package codec.x501

Source Code of codec.x501.RFC2253Parser

/* ========================================================================
*
*  This file is part of CODEC, which is a Java package for encoding
*  and decoding ASN.1 data structures.
*
*  Author: Fraunhofer Institute for Computer Graphics Research IGD
*          Department A8: Security Technology
*          Fraunhoferstr. 5, 64283 Darmstadt, Germany
*
*  Rights: Copyright (c) 2004 by Fraunhofer-Gesellschaft
*          zur Foerderung der angewandten Forschung e.V.
*          Hansastr. 27c, 80686 Munich, Germany.
*
* ------------------------------------------------------------------------
*
*  The software package is free software; you can redistribute it and/or
*  modify it under the terms of the GNU Lesser General Public License as
*  published by the Free Software Foundation; either version 2.1 of the
*  License, or (at your option) any later version.
*
*  This library is distributed in the hope that it will be useful, but
*  WITHOUT ANY WARRANTY; without even the implied warranty of
*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
*  Lesser General Public License for more details.
*
*  You should have received a copy of the GNU Lesser General Public
*  License along with this software package; if not, write to the Free
*  Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
*  MA 02110-1301, USA or obtain a copy of the license at
*  http://www.fsf.org/licensing/licenses/lgpl.txt.
*
* ------------------------------------------------------------------------
*
*  The CODEC library can solely be used and distributed according to
*  the terms and conditions of the GNU Lesser General Public License .
*
*  The CODEC library has not been tested for the use or application
*  for a determined purpose. It is a developing version that can
*  possibly contain errors. Therefore, Fraunhofer-Gesellschaft zur
*  Foerderung der angewandten Forschung e.V. does not warrant that the
*  operation of the CODEC library will be uninterrupted or error-free.
*  Neither does Fraunhofer-Gesellschaft zur Foerderung der angewandten
*  Forschung e.V. warrant that the CODEC library will operate and
*  interact in an uninterrupted or error-free way together with the
*  computer program libraries of third parties which the CODEC library
*  accesses and which are distributed together with the CODEC library.
*
*  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V.
*  does not warrant that the operation of the third parties's computer
*  program libraries themselves which the CODEC library accesses will
*  be uninterrupted or error-free.
*
*  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V.
*  shall not be liable for any errors or direct, indirect, special,
*  incidental or consequential damages, including lost profits resulting
*  from the combination of the CODEC library with software of any user
*  or of any third party or resulting from the implementation of the
*  CODEC library in any products, systems or services of any user or
*  of any third party.
*
*  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V.
*  does not provide any warranty nor any liability that utilization of
*  the CODEC library will not interfere with third party intellectual
*  property rights or with any other protected third party rights or will
*  cause damage to third parties. Fraunhofer Gesellschaft zur Foerderung
*  der angewandten Forschung e.V. is currently not aware of any such
*  rights.
*
*  The CODEC library is supplied without any accompanying services.
*
* ========================================================================
*/
package codec.x501;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.StringTokenizer;

import codec.Hex;
import codec.UTF8InputStreamReader;

/**
* A parser to process RFC2253 conform Distinguished Names (cf.
* ftp://ftp.rfc-editor.org/in-notes/rfc2253.txt).
*
* The BNC grammer of Distinguished Name string structure: <quote>
*           
* <pre>
*   distinguishedName = [name]  ; may be empty string
*  
*   name = name-component *(&quot;,&quot; name-component)
*  
*   name-component = attributeTypeAndValue *(&quot;+&quot; attributeTypeAndValue)
*  
*   attributeTypeAndValue = attributeType &quot;=&quot; attributeValue
*  
*   attributeType = (ALPHA 1*keychar) / oid
*   keychar       = ALPHA / DIGIT / &quot;-&quot;
*  
*   oid        = 1*DIGIT *(&quot;.&quot; 1*DIGIT)
*   attributeValue = string
*   string     = *( stringchar / pair )
*                / &quot;#&quot; hexstring
*                / QUOTATION *( quotechar / pair ) QUOTATION  ; only from v2
*           
*   quotechar  = &lt;any character except &quot;\&quot; or QUOTATION &gt;
*  
*   special    = &quot;,&quot; / &quot;=&quot; / &quot;+&quot; / &quot;&lt;&quot; /  &quot;&gt;&quot; / &quot;#&quot; / &quot;;&quot;
*  
*   pair       = &quot;\&quot; ( special / &quot;\&quot; / QUOTATION / hexpair )
*   stringchar = &lt;any character except one of special, &quot;\&quot; or QUOTATION &gt;
*  
*   hexstring  = 1*hexpair
*   hexpair    = hexchar hexchar
*  
*   hexchar    = DIGIT / &quot;A&quot; / &quot;B&quot; / &quot;C&quot; / &quot;D&quot; / &quot;E&quot; / &quot;F&quot;
*                / &quot;a&quot; / &quot;b&quot; / &quot;c&quot; / &quot;d&quot; / &quot;e&quot; / &quot;f&quot;
*   ALPHA      =  &lt;any ASCII alphabetic character&gt; ; (decimal 65-90 and 97-122)
*   DIGIT      =  &lt;any ASCII decimal digit&gt;  ; (decimal 48-57)
*   QUOTATION  =  &lt;the ASCII double quotation mark character '&quot;' decimal 34&gt;
* </pre></quote>
*
* @author Zaharina Velikova
* @author Jan Peters
* @version "$Id: RFC2253Parser.java,v 1.1 2007/08/30 08:45:05 pebinger Exp $"
*/
public class RFC2253Parser {

    /**
     * Characters that are treated in a special way. Some of them must be
     * escaped or quoted, some serve as delimiters and more.
     */
    public static final String TOKENIZERS = " ,+=\"#;\\<>\r\n";

    /**
     * Characters that are treated in a special way. Some of them must be
     * escaped or quoted, some serve as delimiters and more.
     */
    public static final String SPECIALS = "\"\\,=+<>#;";

    /**
     * Delimiters for the <code>StringTokenizer</code>.
     */
    public static final String SEPARATORS = ";,+";

    /**
     * Valid hex characters.
     */
    public static final String HEXCHAR = "0123456789ABCDEFabcdef";

    /**
     * The pair introducer.
     */
    public static final String ESCAPE = "\\";

    /**
     * The quote character.
     */
    public static final String QUOTE = "\"";

    /**
     * The plus character.
     */
    public static final String PLUS = "+";

    /**
     * The space character.
     */
    public static final String SPC = " ";

    /**
     * The line break character.
     */
    public static final String LINEBREAK = "\r";

    /**
     * The carriage return character.
     */
    public static final String RETURN = "\n";

    /**
     * Whitespace characters.
     */
    public static final String WHITESPACE = SPC + LINEBREAK + RETURN;

    /**
     * This method parses the given name.
     *
     * @param rfc2253name
     *                The name string that is parsed into its components.
     * @throws BadNameException
     *                 if the syntax of the name string is not correct.
     * @return A <code>List</code> with elements of type {@link AVA}.
     */
    public List parse(String rfc2253name) throws BadNameException {
  UTF8InputStreamReader utfReader;
  ByteArrayOutputStream baos;
  ByteArrayInputStream bais;
  StringTokenizer st;
  StringBuffer value;
  boolean tokParsed;
  boolean utfParsed;
  boolean plus;
  String str;
  String tok;
  String key;
  String val;
  String t;
  byte[] ascii;
  byte[] buf;
  byte[] utf;
  char[] chs;
  byte b;
  int returnState;
  int trunc;
  int state;
  int i;

  trunc = -1;
  returnState = -1;
  state = 0;
  tokParsed = true;
  utfParsed = true;
  key = new String();
  value = new StringBuffer();
  tok = "";
  baos = new ByteArrayOutputStream();

  LinkedList ava_ = new LinkedList();

  if (rfc2253name.equals("")) {
      return ava_;
  }
  st = new StringTokenizer(rfc2253name, TOKENIZERS, true);

  while (st.hasMoreTokens() || !tokParsed) {
      if (tokParsed) {
    tok = st.nextToken();
      }

      switch (state) {
      case 0:
    /*
     * We consume whitespace characters and wait for an attribute
     * keyword in this state.
     */
    if (WHITESPACE.indexOf(tok) >= 0) {
        continue;
    }
    if (SPECIALS.indexOf(tok.charAt(0)) < 0) {
        tok = tok.trim();

        if (tok.length() >= 4
          && tok.substring(0, 4).equalsIgnoreCase("OID.")) {
      tok = tok.substring(4);
        }

        if (Character.isDigit(tok.charAt(0))) {
      chs = tok.toCharArray();

      for (i = 1; i < chs.length; i++) {
          if (!Character.isDigit(chs[i]) && !(chs[i] == '.')) {
        throw new BadNameException("(" + state
          + ") The key '" + tok
          + "' seems to be an OID, but it "
          + "contains the illegal character '"
          + chs[i] + "'!");
          }
      }
        } else {
      chs = tok.toCharArray();

      for (i = 1; i < chs.length; i++) {
          if (!Character.isDigit(chs[i])
            && !Character.isLetter(chs[i])
            && chs[i] != '-') {
        throw new BadNameException("(" + state
          + ") The key '" + tok
          + "' contains the the illegal "
          + "character '" + chs[i] + "'!");
          }
      }
        }
        key = tok;
        state = 1;

        continue;
    }
    throw new BadNameException("(" + state
      + ") Key starts with SPECIAL '" + tok + "'!");

      case 1:
    /*
     * We again consume whitespace characters until we encounter an
     * equals sign ('='). Then we advance our state.
     */
    if (WHITESPACE.indexOf(tok) >= 0) {
        continue;
    }
    if (tok.equals("=")) {
        state = 2;
        continue;
    }
    throw new BadNameException("(" + state
      + ") '=' expected after '" + key + "'!");

      case 2:
    /*
     * We again consume whitespace characters until we hit the first
     * non-space character. In that case we emulate an epsilon
     * transition to state number 3. In other words, we fall through
     * with the current token still being valid.
     */
    if (WHITESPACE.indexOf(tok) >= 0) {
        continue;
    }
    /*
     * If the first token is a hash mark ('#') then we have n
     * hexadecimal encoding that is treated in state 7.
     */
    if (tok.equals("#")) {
        state = 7;

        continue;
    }
    /*
     * Fall through, new state, token stays valid.
     */
    state = 3;

      case 3:
    /*
     * The central state. It distinguishes between quoted and
     * unquoted substrings, handles the truncation counter for
     * trailing whitespace characters and more.
     */
    if (!tok.equals(ESCAPE) && !utfParsed) {
        throw new BadNameException("(" + state
          + ") Invalid UTF-8 code '"
          + Hex.encode(baos.toByteArray()) + "'!");
    }

    if (!tokParsed) {
        tokParsed = true;
    }

    /*
     * Check if we hit a whitespace character. This is a nasty case
     * because the DN could be 'cn= "foo" ,...' which must be
     * transformed into 'cn=foo'. that means all whitespace
     * characters before the delimiter must be truncated.
     */
    if (WHITESPACE.indexOf(tok) >= 0) {
        /*
         * Ignore leading whitespace characters.
         */
        if (value.length() == 0) {
      continue;
        }

        /*
         * Remember start of trailing whitespace characters.
         */
        if (trunc == -1) {
      trunc = value.length();
        }

        value.append(tok);
        continue;
    }

    /*
     * If we hit upon a RDN separator then we can ship out the
     * attribute and value.
     */
    if (SEPARATORS.indexOf(tok.charAt(0)) >= 0) {
        /*
         * Remove trailing whitespace characters, if existent
         */
        if (trunc != -1) {
      value.setLength(trunc);
        }

        /*
         * If the separator is a PLUS then we have to set the flag
         * that says: "this AVA is followed by another one at the
         * same level."
         */
        val = value.toString();
        state = 0;
        plus = tok.equals(PLUS);

        value.setLength(0);

        /*
         * We got a key and an empty value. Now we ship it out and
         * go on in state 0.
         */
        ava_.add(new AVA(key, val, plus));
        continue;
    }

    if (trunc != -1) {
        trunc = -1;
    }

    /*
     * An ESCAPE brings us into a state that simply returns after
     * having read the escaped special character.
     */
    if (tok.equals(ESCAPE)) {
        returnState = state;

        state = 4;
        continue;
    }
    /*
     * If we hit upon a QUOTE then we have to parse a quoted
     * substring. The state for that simply returns as well.
     */
    if (tok.equals(QUOTE)) {
        if (value.length() > 0) {
      throw new BadNameException("(" + state
        + ") Only whitespace characters "
        + "are allowed before the first unescaped "
        + "quotation mark (\")!");
        }
        state = 5;
        continue;
    }
    /*
     * Last not least, we check for a special character that is not
     * escaped. If there isn't then we have plain chars that we
     * append to the current value.
     */
    if (SPECIALS.indexOf(tok.charAt(0)) < 0) {
        if (tok.length() > 0) {
      value.append(tok);
        }
        continue;
    }
    throw new BadNameException("(" + state
      + ") Unquoted special character '" + tok + "' after '"
      + key + "'!");

      case 4:
    /*
     * This state handles escaped SPECIAL characters, backslashes
     * (\), quotations ("), and UTF-8 code. It returns to the set
     * 'returnState'.
     */
    if (SPECIALS.indexOf(tok.charAt(0)) >= 0) {
        /*
         * We got an escaped special character, so we append it to
         * the value.
         */
        value.append(tok);

        state = returnState;
        continue;
    }
    if (tok.length() > 1) {
        t = tok.substring(0, 2);
        /*
         * we first have to check whether we are in a situation like
         * 'CN=Before\0DAfter' (escaped non printable ascii
         * character)
         */
        try {
      b = (Hex.decode(t))[0];
        } catch (Exception e) {
      throw new IllegalArgumentException("(" + state
        + ") Invalid hex character '" + t + "'!");
        }

        if (b >= 0 && b <= 37) {
      try {
          if (baos.size() == 0) {
        ascii = new byte[1];
        ascii[0] = b;
        str = new String(ascii);
        value.append(str);
          } else {
        baos.write(b);

        throw new BadNameException("(" + state
          + ") Invalid UTF-8 code '"
          + Hex.encode(baos.toByteArray()) + "'!");
          }
      } catch (IllegalArgumentException iae) {
          throw new BadNameException("(" + state
            + ") Invalid hex character '" + t + "'!");
      }
        } else {
      /*
       * now we have to try to parse the UTF-8 code
       */
      try {
          baos.write(b);
          utf = baos.toByteArray();
          bais = new ByteArrayInputStream(utf);

          utfReader = new UTF8InputStreamReader(bais, 2048);

          str = utfReader.readLine();
          value.append(str);

          baos.reset();
          utfParsed = true;
      } catch (Exception e) {
          if (tok.length() == 2) {
        tokParsed = true;
        utfParsed = false;

        state = returnState;
        continue;
          }
          throw new BadNameException("(" + state
            + ") Invalid UTF-8 code '"
            + Hex.encode(baos.toByteArray()) + "'!");
      }
        }

        if (tok.length() > 2) {
      tokParsed = false;

      tok = tok.substring(2);
        } else {
      tokParsed = true;
        }

        state = returnState;
        continue;
    }
    throw new BadNameException("(" + state
      + ") Can't ESCAPE non-special character '"
      + tok.charAt(0) + "'!");

      case 5:
    /*
     * This state means, that we are parsing a quoted value, upon
     * hitting another unescaped quote.
     */
    if (!tok.equals(ESCAPE) && !utfParsed) {
        throw new BadNameException("(" + state
          + ") Invalid UTF-8 code '"
          + Hex.encode(baos.toByteArray()) + "'!");
    }

    if (!tokParsed) {
        tokParsed = true;
    }
    if (tok.equals(QUOTE)) {
        state = 6;
        continue;
    }

    /*
     * If we read an escape character then we again have to call a
     * substate for handling that.
     */
    if (tok.equals(ESCAPE)) {
        returnState = state;

        state = 4;
        continue;
    }
    /*
     * Since we are in quotation marks, we add what we find to the
     * current value.
     */
    value.append(tok);
    continue;

      case 6:
    /*
     * This state means, that we have found the closing unescaped
     * quotation mark, wait for a RDN separator, and handle
     * whitespace characters.
     */
    if (SEPARATORS.indexOf(tok.charAt(0)) >= 0) {
        /*
         * Remove trailing whitespace characters, if existent
         */
        if (trunc != -1) {
      value.setLength(trunc);
        }

        /*
         * If the separator is a PLUS then we have to set the flag
         * that says: "this AVA is followed by another one at the
         * same level."
         */
        val = value.toString();
        state = 0;
        plus = tok.equals(PLUS);

        value.setLength(0);

        /*
         * We got a key and an empty value. Now we ship it out and
         * go on in state 0.
         */
        ava_.add(new AVA(key, val, plus));
        continue;
    }
    if (WHITESPACE.indexOf(tok) < 0) {
        throw new BadNameException("(" + state
          + ") Only whitespace characters are "
          + "allowed after the second unescaped quotation "
          + "mark (\")!");
    }
    continue;

      case 7:
    /*
     * This state decodes a string that represents a binary value.
     * If we hit upon a separator then we ship out the accumulated
     * hexadecimal string.
     */
    if (SEPARATORS.indexOf(tok.charAt(0)) >= 0) {
        try {
      val = value.toString();
      state = 0;
      plus = tok.equals(PLUS);
      buf = Hex.decode(val);

      value.setLength(0);
        } catch (Exception e) {
      throw new BadNameException("(" + state
        + ") Bad hexadecimal code '" + value.toString()
        + "'!");
        }
        if (buf.length == 0) {
      throw new BadNameException("(" + state
        + ") Empty hexadecimal code '"
        + value.toString() + "'!");
        }
        ava_.add(new AVA(key, buf, plus));

        continue;
    }

    /*
     * Remove trailing spaces.
     */
    if (WHITESPACE.indexOf(tok) >= 0) {
        if (trunc == -1) {
      trunc = value.length();
        }
        continue;
    }
    if (trunc != -1) {
        throw new BadNameException("(" + state
          + ") Non-trailing whitespace characters "
          + "after hexadecimal code '" + value.toString()
          + "'!");
    }
    /*
     * We check for specials now. If we do not hit one then we
     * assume everything is fine and we go on. The hexadecimal
     * encoding is checked when we ship out the string.
     */
    if (HEXCHAR.indexOf(tok.charAt(0)) >= 0) {
        value.append(tok);
        continue;
    }
    /*
     * Everything else is an error.
     */
    throw new BadNameException("(" + state
      + ") Bad hexadecimal encoding '" + value.toString()
      + "'!");

      default:
    throw new IllegalStateException("(" + state
      + ") Illegal state!");
      }
  }

  if (!utfParsed) {
      throw new BadNameException("(" + state + ") Invalid UTF-8 code '"
        + Hex.encode(baos.toByteArray()) + "'!");
  }
  if (trunc != -1) {
      value.setLength(trunc);
  }

  /*
   * We first check if the state machine is in a final state.
   */
  if (state != 2 && state != 3 && state != 6 && state != 7) {
      throw new BadNameException("(" + state + ") Not in a final state!");
  }
  /*
   * We have to check for the epsilon transitions of the final states.
   */
  switch (state) {
  case 7:
      try {
    val = value.toString();
    buf = Hex.decode(val);
      } catch (Exception e) {
    throw new BadNameException("(" + state
      + ") Bad hexadecimal code '" + value.toString() + "'!");
      }
      if (buf.length == 0) {
    throw new BadNameException("(" + state
      + ") Empty hexadecimal code '" + value.toString()
      + "'!");
      }
      ava_.add(new AVA(key, buf, false));
      break;

  case 6:
      val = value.toString();

      ava_.add(new AVA(key, val, false));
      break;

  case 3:
      val = value.toString();

      ava_.add(new AVA(key, val, false));
      break;

  case 2:
      ava_.add(new AVA(key, new String(), false));
      break;
  }
  return ava_;
    }

    /**
     * Main method of the class.
     *
     * @param argv
     *                a sequence of RFC2253 strings (e.g. "CN=DE")
     */
    public static void main(String[] argv) throws Exception {
  RFC2253Parser parser = new RFC2253Parser();
  for (int n = 0; n < argv.length; n++) {
      System.out.println("Input: '" + argv[n] + "'");
      Iterator it = parser.parse(argv[n]).iterator();
      while (it.hasNext()) {
    System.out.println(it.next());
      }
  }
    }

}
TOP

Related Classes of codec.x501.RFC2253Parser

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.