package edu.stanford.nlp.process;
import edu.stanford.nlp.io.Lexer;
import edu.stanford.nlp.io.RuntimeIOException;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
/**
* An implementation of {@link Tokenizer} designed to work with
* {@link Lexer} implementing classes. Throw in a {@link Lexer} on
* construction and you get a {@link Tokenizer}.
*
* @author Roger Levy
*/
public class LexerTokenizer extends AbstractTokenizer<String> {
private Lexer lexer;
/**
* Internally fetches the next token.
*
* @return the next token in the token stream, or null if none exists.
*/
@Override
protected String getNext() {
String token = null;
try {
int a = Lexer.IGNORE;
while ((a = lexer.yylex()) == Lexer.IGNORE) {
; // skip tokens to be ignored
}
if (a == lexer.getYYEOF()) {
token = null;
} else {
token = lexer.yytext();
}
} catch (IOException e) {
// do nothing, return null
}
return token;
}
/* Constructs a tokenizer from a {@link Lexer}
*/
public LexerTokenizer(Lexer l) {
if (l == null) {
throw new IllegalArgumentException("You can't make a Tokenizer out of a null Lexer!");
} else {
this.lexer = l;
}
}
/* Constructs a tokenizer from a {@link Lexer} and makes a {@link
* Reader} the active input stream for the tokenizer.
*/
public LexerTokenizer(Lexer l, Reader r) {
this(l);
try {
l.yyreset(r);
} catch (IOException e) {
throw new RuntimeIOException(e.getMessage());
}
getNext();
}
/**
* for testing only
*/
public static void main(String[] args) throws IOException {
Tokenizer<String> t = new LexerTokenizer(new JFlexDummyLexer((Reader) null), new BufferedReader(new FileReader(args[0])));
while (t.hasNext()) {
System.out.println("token " + t.next());
}
}
}