/* ========================================================================
*
* This file is part of CODEC, which is a Java package for encoding
* and decoding ASN.1 data structures.
*
* Author: Fraunhofer Institute for Computer Graphics Research IGD
* Department A8: Security Technology
* Fraunhoferstr. 5, 64283 Darmstadt, Germany
*
* Rights: Copyright (c) 2004 by Fraunhofer-Gesellschaft
* zur Foerderung der angewandten Forschung e.V.
* Hansastr. 27c, 80686 Munich, Germany.
*
* ------------------------------------------------------------------------
*
* The software package is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of the
* License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software package; if not, write to the Free
* Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA or obtain a copy of the license at
* http://www.fsf.org/licensing/licenses/lgpl.txt.
*
* ------------------------------------------------------------------------
*
* The CODEC library can solely be used and distributed according to
* the terms and conditions of the GNU Lesser General Public License .
*
* The CODEC library has not been tested for the use or application
* for a determined purpose. It is a developing version that can
* possibly contain errors. Therefore, Fraunhofer-Gesellschaft zur
* Foerderung der angewandten Forschung e.V. does not warrant that the
* operation of the CODEC library will be uninterrupted or error-free.
* Neither does Fraunhofer-Gesellschaft zur Foerderung der angewandten
* Forschung e.V. warrant that the CODEC library will operate and
* interact in an uninterrupted or error-free way together with the
* computer program libraries of third parties which the CODEC library
* accesses and which are distributed together with the CODEC library.
*
* Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V.
* does not warrant that the operation of the third parties's computer
* program libraries themselves which the CODEC library accesses will
* be uninterrupted or error-free.
*
* Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V.
* shall not be liable for any errors or direct, indirect, special,
* incidental or consequential damages, including lost profits resulting
* from the combination of the CODEC library with software of any user
* or of any third party or resulting from the implementation of the
* CODEC library in any products, systems or services of any user or
* of any third party.
*
* Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V.
* does not provide any warranty nor any liability that utilization of
* the CODEC library will not interfere with third party intellectual
* property rights or with any other protected third party rights or will
* cause damage to third parties. Fraunhofer Gesellschaft zur Foerderung
* der angewandten Forschung e.V. is currently not aware of any such
* rights.
*
* The CODEC library is supplied without any accompanying services.
*
* ========================================================================
*/
package codec.x501;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.StringTokenizer;
import codec.Hex;
import codec.UTF8InputStreamReader;
/**
* A parser to process RFC2253 conform Distinguished Names (cf.
* ftp://ftp.rfc-editor.org/in-notes/rfc2253.txt).
*
* The BNC grammer of Distinguished Name string structure: <quote>
*
* <pre>
* distinguishedName = [name] ; may be empty string
*
* name = name-component *("," name-component)
*
* name-component = attributeTypeAndValue *("+" attributeTypeAndValue)
*
* attributeTypeAndValue = attributeType "=" attributeValue
*
* attributeType = (ALPHA 1*keychar) / oid
* keychar = ALPHA / DIGIT / "-"
*
* oid = 1*DIGIT *("." 1*DIGIT)
* attributeValue = string
* string = *( stringchar / pair )
* / "#" hexstring
* / QUOTATION *( quotechar / pair ) QUOTATION ; only from v2
*
* quotechar = <any character except "\" or QUOTATION >
*
* special = "," / "=" / "+" / "<" / ">" / "#" / ";"
*
* pair = "\" ( special / "\" / QUOTATION / hexpair )
* stringchar = <any character except one of special, "\" or QUOTATION >
*
* hexstring = 1*hexpair
* hexpair = hexchar hexchar
*
* hexchar = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
* / "a" / "b" / "c" / "d" / "e" / "f"
* ALPHA = <any ASCII alphabetic character> ; (decimal 65-90 and 97-122)
* DIGIT = <any ASCII decimal digit> ; (decimal 48-57)
* QUOTATION = <the ASCII double quotation mark character '"' decimal 34>
* </pre></quote>
*
* @author Zaharina Velikova
* @author Jan Peters
* @version "$Id: RFC2253Parser.java,v 1.1 2007/08/30 08:45:05 pebinger Exp $"
*/
public class RFC2253Parser {
/**
* Characters that are treated in a special way. Some of them must be
* escaped or quoted, some serve as delimiters and more.
*/
public static final String TOKENIZERS = " ,+=\"#;\\<>\r\n";
/**
* Characters that are treated in a special way. Some of them must be
* escaped or quoted, some serve as delimiters and more.
*/
public static final String SPECIALS = "\"\\,=+<>#;";
/**
* Delimiters for the <code>StringTokenizer</code>.
*/
public static final String SEPARATORS = ";,+";
/**
* Valid hex characters.
*/
public static final String HEXCHAR = "0123456789ABCDEFabcdef";
/**
* The pair introducer.
*/
public static final String ESCAPE = "\\";
/**
* The quote character.
*/
public static final String QUOTE = "\"";
/**
* The plus character.
*/
public static final String PLUS = "+";
/**
* The space character.
*/
public static final String SPC = " ";
/**
* The line break character.
*/
public static final String LINEBREAK = "\r";
/**
* The carriage return character.
*/
public static final String RETURN = "\n";
/**
* Whitespace characters.
*/
public static final String WHITESPACE = SPC + LINEBREAK + RETURN;
/**
* This method parses the given name.
*
* @param rfc2253name
* The name string that is parsed into its components.
* @throws BadNameException
* if the syntax of the name string is not correct.
* @return A <code>List</code> with elements of type {@link AVA}.
*/
public List parse(String rfc2253name) throws BadNameException {
UTF8InputStreamReader utfReader;
ByteArrayOutputStream baos;
ByteArrayInputStream bais;
StringTokenizer st;
StringBuffer value;
boolean tokParsed;
boolean utfParsed;
boolean plus;
String str;
String tok;
String key;
String val;
String t;
byte[] ascii;
byte[] buf;
byte[] utf;
char[] chs;
byte b;
int returnState;
int trunc;
int state;
int i;
trunc = -1;
returnState = -1;
state = 0;
tokParsed = true;
utfParsed = true;
key = new String();
value = new StringBuffer();
tok = "";
baos = new ByteArrayOutputStream();
LinkedList ava_ = new LinkedList();
if (rfc2253name.equals("")) {
return ava_;
}
st = new StringTokenizer(rfc2253name, TOKENIZERS, true);
while (st.hasMoreTokens() || !tokParsed) {
if (tokParsed) {
tok = st.nextToken();
}
switch (state) {
case 0:
/*
* We consume whitespace characters and wait for an attribute
* keyword in this state.
*/
if (WHITESPACE.indexOf(tok) >= 0) {
continue;
}
if (SPECIALS.indexOf(tok.charAt(0)) < 0) {
tok = tok.trim();
if (tok.length() >= 4
&& tok.substring(0, 4).equalsIgnoreCase("OID.")) {
tok = tok.substring(4);
}
if (Character.isDigit(tok.charAt(0))) {
chs = tok.toCharArray();
for (i = 1; i < chs.length; i++) {
if (!Character.isDigit(chs[i]) && !(chs[i] == '.')) {
throw new BadNameException("(" + state
+ ") The key '" + tok
+ "' seems to be an OID, but it "
+ "contains the illegal character '"
+ chs[i] + "'!");
}
}
} else {
chs = tok.toCharArray();
for (i = 1; i < chs.length; i++) {
if (!Character.isDigit(chs[i])
&& !Character.isLetter(chs[i])
&& chs[i] != '-') {
throw new BadNameException("(" + state
+ ") The key '" + tok
+ "' contains the the illegal "
+ "character '" + chs[i] + "'!");
}
}
}
key = tok;
state = 1;
continue;
}
throw new BadNameException("(" + state
+ ") Key starts with SPECIAL '" + tok + "'!");
case 1:
/*
* We again consume whitespace characters until we encounter an
* equals sign ('='). Then we advance our state.
*/
if (WHITESPACE.indexOf(tok) >= 0) {
continue;
}
if (tok.equals("=")) {
state = 2;
continue;
}
throw new BadNameException("(" + state
+ ") '=' expected after '" + key + "'!");
case 2:
/*
* We again consume whitespace characters until we hit the first
* non-space character. In that case we emulate an epsilon
* transition to state number 3. In other words, we fall through
* with the current token still being valid.
*/
if (WHITESPACE.indexOf(tok) >= 0) {
continue;
}
/*
* If the first token is a hash mark ('#') then we have n
* hexadecimal encoding that is treated in state 7.
*/
if (tok.equals("#")) {
state = 7;
continue;
}
/*
* Fall through, new state, token stays valid.
*/
state = 3;
case 3:
/*
* The central state. It distinguishes between quoted and
* unquoted substrings, handles the truncation counter for
* trailing whitespace characters and more.
*/
if (!tok.equals(ESCAPE) && !utfParsed) {
throw new BadNameException("(" + state
+ ") Invalid UTF-8 code '"
+ Hex.encode(baos.toByteArray()) + "'!");
}
if (!tokParsed) {
tokParsed = true;
}
/*
* Check if we hit a whitespace character. This is a nasty case
* because the DN could be 'cn= "foo" ,...' which must be
* transformed into 'cn=foo'. that means all whitespace
* characters before the delimiter must be truncated.
*/
if (WHITESPACE.indexOf(tok) >= 0) {
/*
* Ignore leading whitespace characters.
*/
if (value.length() == 0) {
continue;
}
/*
* Remember start of trailing whitespace characters.
*/
if (trunc == -1) {
trunc = value.length();
}
value.append(tok);
continue;
}
/*
* If we hit upon a RDN separator then we can ship out the
* attribute and value.
*/
if (SEPARATORS.indexOf(tok.charAt(0)) >= 0) {
/*
* Remove trailing whitespace characters, if existent
*/
if (trunc != -1) {
value.setLength(trunc);
}
/*
* If the separator is a PLUS then we have to set the flag
* that says: "this AVA is followed by another one at the
* same level."
*/
val = value.toString();
state = 0;
plus = tok.equals(PLUS);
value.setLength(0);
/*
* We got a key and an empty value. Now we ship it out and
* go on in state 0.
*/
ava_.add(new AVA(key, val, plus));
continue;
}
if (trunc != -1) {
trunc = -1;
}
/*
* An ESCAPE brings us into a state that simply returns after
* having read the escaped special character.
*/
if (tok.equals(ESCAPE)) {
returnState = state;
state = 4;
continue;
}
/*
* If we hit upon a QUOTE then we have to parse a quoted
* substring. The state for that simply returns as well.
*/
if (tok.equals(QUOTE)) {
if (value.length() > 0) {
throw new BadNameException("(" + state
+ ") Only whitespace characters "
+ "are allowed before the first unescaped "
+ "quotation mark (\")!");
}
state = 5;
continue;
}
/*
* Last not least, we check for a special character that is not
* escaped. If there isn't then we have plain chars that we
* append to the current value.
*/
if (SPECIALS.indexOf(tok.charAt(0)) < 0) {
if (tok.length() > 0) {
value.append(tok);
}
continue;
}
throw new BadNameException("(" + state
+ ") Unquoted special character '" + tok + "' after '"
+ key + "'!");
case 4:
/*
* This state handles escaped SPECIAL characters, backslashes
* (\), quotations ("), and UTF-8 code. It returns to the set
* 'returnState'.
*/
if (SPECIALS.indexOf(tok.charAt(0)) >= 0) {
/*
* We got an escaped special character, so we append it to
* the value.
*/
value.append(tok);
state = returnState;
continue;
}
if (tok.length() > 1) {
t = tok.substring(0, 2);
/*
* we first have to check whether we are in a situation like
* 'CN=Before\0DAfter' (escaped non printable ascii
* character)
*/
try {
b = (Hex.decode(t))[0];
} catch (Exception e) {
throw new IllegalArgumentException("(" + state
+ ") Invalid hex character '" + t + "'!");
}
if (b >= 0 && b <= 37) {
try {
if (baos.size() == 0) {
ascii = new byte[1];
ascii[0] = b;
str = new String(ascii);
value.append(str);
} else {
baos.write(b);
throw new BadNameException("(" + state
+ ") Invalid UTF-8 code '"
+ Hex.encode(baos.toByteArray()) + "'!");
}
} catch (IllegalArgumentException iae) {
throw new BadNameException("(" + state
+ ") Invalid hex character '" + t + "'!");
}
} else {
/*
* now we have to try to parse the UTF-8 code
*/
try {
baos.write(b);
utf = baos.toByteArray();
bais = new ByteArrayInputStream(utf);
utfReader = new UTF8InputStreamReader(bais, 2048);
str = utfReader.readLine();
value.append(str);
baos.reset();
utfParsed = true;
} catch (Exception e) {
if (tok.length() == 2) {
tokParsed = true;
utfParsed = false;
state = returnState;
continue;
}
throw new BadNameException("(" + state
+ ") Invalid UTF-8 code '"
+ Hex.encode(baos.toByteArray()) + "'!");
}
}
if (tok.length() > 2) {
tokParsed = false;
tok = tok.substring(2);
} else {
tokParsed = true;
}
state = returnState;
continue;
}
throw new BadNameException("(" + state
+ ") Can't ESCAPE non-special character '"
+ tok.charAt(0) + "'!");
case 5:
/*
* This state means, that we are parsing a quoted value, upon
* hitting another unescaped quote.
*/
if (!tok.equals(ESCAPE) && !utfParsed) {
throw new BadNameException("(" + state
+ ") Invalid UTF-8 code '"
+ Hex.encode(baos.toByteArray()) + "'!");
}
if (!tokParsed) {
tokParsed = true;
}
if (tok.equals(QUOTE)) {
state = 6;
continue;
}
/*
* If we read an escape character then we again have to call a
* substate for handling that.
*/
if (tok.equals(ESCAPE)) {
returnState = state;
state = 4;
continue;
}
/*
* Since we are in quotation marks, we add what we find to the
* current value.
*/
value.append(tok);
continue;
case 6:
/*
* This state means, that we have found the closing unescaped
* quotation mark, wait for a RDN separator, and handle
* whitespace characters.
*/
if (SEPARATORS.indexOf(tok.charAt(0)) >= 0) {
/*
* Remove trailing whitespace characters, if existent
*/
if (trunc != -1) {
value.setLength(trunc);
}
/*
* If the separator is a PLUS then we have to set the flag
* that says: "this AVA is followed by another one at the
* same level."
*/
val = value.toString();
state = 0;
plus = tok.equals(PLUS);
value.setLength(0);
/*
* We got a key and an empty value. Now we ship it out and
* go on in state 0.
*/
ava_.add(new AVA(key, val, plus));
continue;
}
if (WHITESPACE.indexOf(tok) < 0) {
throw new BadNameException("(" + state
+ ") Only whitespace characters are "
+ "allowed after the second unescaped quotation "
+ "mark (\")!");
}
continue;
case 7:
/*
* This state decodes a string that represents a binary value.
* If we hit upon a separator then we ship out the accumulated
* hexadecimal string.
*/
if (SEPARATORS.indexOf(tok.charAt(0)) >= 0) {
try {
val = value.toString();
state = 0;
plus = tok.equals(PLUS);
buf = Hex.decode(val);
value.setLength(0);
} catch (Exception e) {
throw new BadNameException("(" + state
+ ") Bad hexadecimal code '" + value.toString()
+ "'!");
}
if (buf.length == 0) {
throw new BadNameException("(" + state
+ ") Empty hexadecimal code '"
+ value.toString() + "'!");
}
ava_.add(new AVA(key, buf, plus));
continue;
}
/*
* Remove trailing spaces.
*/
if (WHITESPACE.indexOf(tok) >= 0) {
if (trunc == -1) {
trunc = value.length();
}
continue;
}
if (trunc != -1) {
throw new BadNameException("(" + state
+ ") Non-trailing whitespace characters "
+ "after hexadecimal code '" + value.toString()
+ "'!");
}
/*
* We check for specials now. If we do not hit one then we
* assume everything is fine and we go on. The hexadecimal
* encoding is checked when we ship out the string.
*/
if (HEXCHAR.indexOf(tok.charAt(0)) >= 0) {
value.append(tok);
continue;
}
/*
* Everything else is an error.
*/
throw new BadNameException("(" + state
+ ") Bad hexadecimal encoding '" + value.toString()
+ "'!");
default:
throw new IllegalStateException("(" + state
+ ") Illegal state!");
}
}
if (!utfParsed) {
throw new BadNameException("(" + state + ") Invalid UTF-8 code '"
+ Hex.encode(baos.toByteArray()) + "'!");
}
if (trunc != -1) {
value.setLength(trunc);
}
/*
* We first check if the state machine is in a final state.
*/
if (state != 2 && state != 3 && state != 6 && state != 7) {
throw new BadNameException("(" + state + ") Not in a final state!");
}
/*
* We have to check for the epsilon transitions of the final states.
*/
switch (state) {
case 7:
try {
val = value.toString();
buf = Hex.decode(val);
} catch (Exception e) {
throw new BadNameException("(" + state
+ ") Bad hexadecimal code '" + value.toString() + "'!");
}
if (buf.length == 0) {
throw new BadNameException("(" + state
+ ") Empty hexadecimal code '" + value.toString()
+ "'!");
}
ava_.add(new AVA(key, buf, false));
break;
case 6:
val = value.toString();
ava_.add(new AVA(key, val, false));
break;
case 3:
val = value.toString();
ava_.add(new AVA(key, val, false));
break;
case 2:
ava_.add(new AVA(key, new String(), false));
break;
}
return ava_;
}
/**
* Main method of the class.
*
* @param argv
* a sequence of RFC2253 strings (e.g. "CN=DE")
*/
public static void main(String[] argv) throws Exception {
RFC2253Parser parser = new RFC2253Parser();
for (int n = 0; n < argv.length; n++) {
System.out.println("Input: '" + argv[n] + "'");
Iterator it = parser.parse(argv[n]).iterator();
while (it.hasNext()) {
System.out.println(it.next());
}
}
}
}