// MimeParser.java
// $Id: MimeParser.java,v 1.16 2003/02/24 10:31:10 ylafon Exp $
// (c) COPYRIGHT MIT and INRIA, 1996.
// Please first read the full copyright statement in file COPYRIGHT.html
package org.w3c.www.mime ;
import java.io.BufferedInputStream;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintStream;
import org.w3c.www.http.HttpAccept;
import org.w3c.www.http.HttpAcceptCharset;
import org.w3c.www.http.HttpAcceptLanguage;
import org.w3c.www.http.HttpMessage;
import org.w3c.www.http.HttpRequestMessage;
/**
* The MimeParser class parses an input MIME stream.
*/
public class MimeParser {
protected int ch = -1 ;
protected InputStream input = null ;
protected byte buffer[] = new byte[128] ;
protected int bsize = 0 ;
/**
* The factory used to create new MIME header holders.
*/
protected MimeParserFactory factory = null ;
protected void expect (int car)
throws MimeParserException, IOException
{
if ( car != ch ) {
String sc = (new Character((char) car)).toString() ;
String se = (new Character((char) ch)).toString() ;
throw new MimeParserException ("expecting "
+ sc + "("+car+")"
+ " got "
+ se + "("+ch+")\n"
+ "context: "
+ new String (buffer, 0, 0, bsize)
+ "\n") ;
}
ch = input.read() ;
}
protected void skipSpaces ()
throws MimeParserException, IOException
{
while ( (ch == ' ') || (ch == '\t') )
ch = input.read() ;
}
protected final void append (int c) {
if ( bsize+1 >= buffer.length ) {
byte nb[] = new byte[buffer.length*2] ;
System.arraycopy (buffer, 0, nb, 0, buffer.length) ;
buffer = nb ;
}
buffer[bsize++] = (byte) c ;
}
/*
* Get the header name:
*/
protected String parse822HeaderName ()
throws MimeParserException, IOException
{
bsize = 0 ;
while ( (ch >= 32) && (ch != ':') ) {
append ((char) ch) ;
ch = input.read() ;
}
skipSpaces();
expect (':') ;
if ( bsize <= 0 )
throw new MimeParserException ("expected a header name.") ;
return new String (buffer, 0, 0, bsize) ;
}
/*
* Get the header body, still trying to be 822 compliant *and* HTTP
* robust, which is unfortunately a contradiction.
*/
protected void parse822HeaderBody ()
throws MimeParserException, IOException
{
parse822HeaderBody(true);
}
/*
* Get the header body, still trying to be 822 compliant *and* HTTP
* robust, which is unfortunately a contradiction.
*/
protected void parse822HeaderBodyLenient ()
throws MimeParserException, IOException
{
bsize = 0 ;
skipSpaces () ;
boolean quoted = false;
loop:
while ( true ) {
switch (ch) {
case -1:
break loop ;
case '\r':
if ( (ch = input.read()) != '\n' ) {
append ('\r') ;
continue ;
}
// no break intentional
case '\n':
// do as if '\r' had been received. This defeats 822, but
// makes HTTP more "robust". I wish HTTP were a binary
// protocol.
switch (ch = input.read()) {
case ' ': case '\t':
// header continuation, eat LWS then add a SP
do {
ch = input.read () ;
} while ((ch == ' ') || (ch == '\t')) ;
if ((ch == '\r') || (ch == '\n')) {
// empty continuation, restart to check
continue;
}
append(' ');
append(ch);
break ;
default:
break loop ;
}
break ;
case '\\':
append ((char) ch) ;
if (quoted) {
ch = input.read();
append ((char) ch) ;
}
break;
case '\"':
quoted = !quoted;
default:
append ((char) ch) ;
break ;
}
ch = input.read() ;
}
return ;
}
/*
* Get the header body, still trying to be 822 compliant *and* HTTP
* robust, which is unfortunately a contradiction.
* @param lenient boolean, true for robustness, false to stricter spec
* adherence
*/
protected void parse822HeaderBody (boolean lenient)
throws MimeParserException, IOException
{
if (lenient) {
parse822HeaderBodyLenient();
} else {
parse822HeaderBodyStrict();
}
}
/*
* Get the header body, still trying to be 822 compliant *and* HTTP
* robust, which is unfortunately a contradiction.
*
*/
protected void parse822HeaderBodyStrict ()
throws MimeParserException, IOException
{
bsize = 0 ;
skipSpaces () ;
boolean quoted = false;
boolean gotr = false;
loop:
while ( true ) {
switch (ch) {
case -1:
break loop ;
case '\r':
if ( (ch = input.read()) != '\n' ) {
append ('\r') ;
continue ;
}
gotr = true;
continue;
// no break intentional
case '\n':
if (quoted) {
if (gotr) {
append('\r');
append('\n');
break;
}
throw new MimeParserException("MimeParser: "+
"\\n not allowed in "+
"quoted string");
}
// do as if '\r' had been received. This defeats 822, but
// makes HTTP more "robust". I wish HTTP were a binary
// protocol.
if (gotr) {
switch (ch = input.read()) {
case ' ': case '\t':
// header continuation, eat LWS then add a SP
do {
ch = input.read () ;
} while ((ch == ' ') || (ch == '\t')) ;
if (ch == '\r') {
continue;
}
append(' ');
append(ch);
gotr = false;
break ;
default:
break loop ;
}
} else {
append('\n');
}
break;
case '\\':
gotr = false;
append ((char) ch) ;
if (quoted) {
ch = input.read();
append ((char) ch) ;
}
break;
case '\"':
gotr = false;
quoted = !quoted;
default:
if (quoted) {
if ((ch < 32) && (ch != '\t')) {
throw new MimeParserException("MimeParser: "+
"CTRL not allowed in "+
"quoted string");
}
}
gotr = false;
append ((char) ch) ;
break ;
}
ch = input.read() ;
}
return ;
}
/*
* Parse the given input stream for an HTTP 1.1 token.
*/
protected String parseToken (boolean lower)
throws MimeParserException, IOException
{
bsize = 0 ;
while ( true ) {
switch ( ch ) {
// CTLs
case -1:
case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
case 8: case 9: case 10: case 11: case 12: case 13: case 14:
case 15: case 16: case 17: case 18: case 19: case 20: case 21:
case 22: case 23: case 24: case 25: case 26: case 27: case 28:
case 29: case 30: case 31:
// tspecials
case '(': case ')': case '<': case '>': case '@':
case ',': case ';': case ':': case '\\': case '\"':
case '/': case '[': case ']': case '?': case '=':
case '{': case '}': case ' ':
return new String (buffer, 0, 0, bsize) ;
default:
append ((char) (lower
? Character.toLowerCase((char) ch)
: ch)) ;
}
ch = input.read() ;
}
}
protected void parse822Headers(MimeHeaderHolder msg, boolean lenient)
throws MimeParserException, IOException
{
while ( true ) {
if ( ch == '\r' ) {
if ( (ch = input.read()) == '\n' )
return ;
} else if ( lenient && (ch == '\n') ) {
return ;
}
String name = parse822HeaderName();
skipSpaces() ;
parse822HeaderBody (lenient) ;
msg.notifyHeader(name, buffer, 0, bsize);
}
}
protected void parse822Headers(MimeHeaderHolder msg)
throws MimeParserException, IOException
{
parse822Headers(msg, true);
}
/**
* parse the stream, and create a MimeHeaderHolder containing all
* the parsed headers, note that invalid headers will trigger an exception
* in stirct mode, and will just be removed in lenient mode
* @param lenient, a boolean, true if we want to be kind with bad people
* @return a MimeHeaderHolder instance containing the aprsed headers
*/
public MimeHeaderHolder parse(boolean lenient)
throws MimeParserException, IOException
{
MimeHeaderHolder msg = factory.createHeaderHolder(this);
ch = input.read() ;
cached = true ;
if ( ! msg.notifyBeginParsing(this) ) {
if ( ! cached )
ch = input.read();
if (lenient) {
try {
parse822Headers (msg, lenient) ;
} catch (MimeParserException ex) {
// be lenient ;)
}
} else {
parse822Headers (msg, lenient) ;
}
}
msg.notifyEndParsing(this);
return msg;
}
/**
* parse the stream, and create a MimeHeaderHolder containing all
* the parsed headers, in lenient mode
* Always be lenient by default (general rule is: be lenient in what you
* accept conservative with what you generate).
*/
public MimeHeaderHolder parse()
throws MimeParserException, IOException
{
return parse(true);
}
boolean cached = false ;
public int read()
throws IOException
{
if ( cached )
cached = false;
else
ch = input.read();
return ch;
}
public void unread(int ch) {
if ( cached )
throw new RuntimeException("cannot unread more then once !");
this.ch = ch;
cached = true;
}
/**
* Get the message body, as an input stream.
* @return The input stream used by the parser to get data, after
* a call to <code>parse</code>, this input stream contains exactly
* the body of the message.
*/
public InputStream getInputStream () {
return input ;
}
/**
* Create an instance of the MIMEParser class.
* @param in The input stream to be parsed as a MIME stream.
* @param factory The factory used to create MIME header holders.
*/
public MimeParser (InputStream input, MimeParserFactory factory) {
this.input = input ;
this.factory = factory;
}
/**
* Debuging
*/
public static void main(String args[]) {
try {
String factoryname = args[0];
String filename = args[1];
// Create the factory:
MimeParserFactory f = null;
f = (MimeParserFactory) Class.forName(factoryname).newInstance();
// Create the parser:
InputStream in = (new BufferedInputStream
(new FileInputStream (filename)));
MimeParser p = new MimeParser(in, f);
HttpRequestMessage m = (HttpRequestMessage) p.parse();
HttpAccept a[] = m.getAccept();
for (int i = 0 ; i < a.length ; i++) {
System.out.println("accept: "+a[i].getMimeType());
}
HttpAcceptLanguage l[] = m.getAcceptLanguage();
for (int i = 0 ; i < l.length ; i++) {
System.out.println("accept-lang: "+l[i].getLanguage());
}
HttpAcceptCharset c[] = m.getAcceptCharset();
for (int i = 0 ; i < c.length ; i++) {
System.out.println("accept-charset: "+c[i].getCharset());
}
m.emit(System.out);
} catch (Exception ex) {
ex.printStackTrace();
System.out.println("MimeParser <factory> <file>");
}
}
}