package net.sf.jpluck.handlers;
import net.sf.jpluck.plucker.Paragraph;
import net.sf.jpluck.plucker.TextRecord;
import net.sf.jpluck.spider.Resource;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
public class PlainTextHandler extends ContentHandler {
public PlainTextHandler(net.sf.jpluck.plucker.Document pluckerDocument, net.sf.jpluck.jxl.Document jxlDocument,
Resource resource) {
super(pluckerDocument, jxlDocument, resource);
}
public void handle() throws HandlingException {
try {
TextRecord textRecord = new TextRecord(resource.getURI(), jxlDocument.getOutputEncoding(),
jxlDocument.isUseHiresMargins());
String charset = resource.getCharset();
if (charset == null) {
charset = "ISO-8859-1";
}
LineNumberReader rdr = new LineNumberReader(new InputStreamReader(new ByteArrayInputStream(resource.getData()),
charset));
Paragraph paragraph = textRecord.addParagraph(Paragraph.DEFAULT_SPACING);
for (String line; (line = rdr.readLine()) != null;) {
if (line.length() == 0) {
paragraph = textRecord.addParagraph(Paragraph.DEFAULT_SPACING);
} else {
int start = line.indexOf("http://");
if (start > -1) {
int end = line.indexOf(' ', start);
if (end == -1) {
end = line.length();
}
String url = line.substring(start, end);
paragraph.addLinkStart(url);
paragraph.addPreformattedText(url);
paragraph.addLinkEnd();
} else {
paragraph.addPreformattedText(line);
}
paragraph.addNewline();
}
}
pluckerDocument.addRecord(textRecord);
} catch (IOException e) {
throw new HandlingException(e);
}
}
}