package edu.stanford.nlp.ie.ner.webapp;
import java.awt.Color;
import java.io.*;
import java.util.*;
import java.util.zip.*;
import javax.servlet.*;
import javax.servlet.http.*;
import org.apache.commons.lang3.StringEscapeUtils;
import edu.stanford.nlp.ie.crf.CRFClassifier;
import edu.stanford.nlp.ie.crf.NERGUI;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.ling.CoreAnnotations;
/**
* This is a servlet interface to the CRFClassifier.
*
* @author Dat Hoang 2011
* @author John Bauer
*
**/
public class NERServlet extends HttpServlet
{
private String format;
private boolean spacing;
private String defaultClassifier;
private List<String> classifiers = new ArrayList<String>();
private Map<String, CRFClassifier> ners;
private static final int MAXIMUM_QUERY_LENGTH = 3000;
public void init()
throws ServletException
{
format = getServletConfig().getInitParameter("outputFormat");
if (format == null || format.trim().equals(""))
throw new ServletException("Invalid outputFormat setting.");
String spacingStr = getServletConfig().getInitParameter("preserveSpacing");
if (spacingStr == null || spacingStr.trim().equals(""))
throw new ServletException("Invalid preserveSpacing setting.");
//spacing = Boolean.valueOf(spacingStr).booleanValue();
spacingStr = spacingStr.trim().toLowerCase();
spacing = "true".equals(spacingStr);
String path = getServletContext().getRealPath("/WEB-INF/data/models");
for (String classifier : new File(path).list()) {
classifiers.add(classifier);
}
// TODO: get this from somewhere more interesting?
defaultClassifier = classifiers.get(0);
for (String classifier : classifiers) {
log(classifier);
}
ners = Generics.newHashMap();
for (String classifier : classifiers) {
CRFClassifier model = null;
String filename = "/WEB-INF/data/models/" + classifier;
InputStream is = getServletConfig().getServletContext().getResourceAsStream(filename);
if (is == null)
throw new ServletException("File not found. Filename = " + filename);
try {
if (filename.endsWith(".gz")) {
is = new BufferedInputStream(new GZIPInputStream(is));
} else {
is = new BufferedInputStream(is);
}
model = CRFClassifier.getClassifier(is);
} catch (IOException e) {
throw new ServletException("IO problem reading classifier.");
} catch (ClassCastException e) {
throw new ServletException("Classifier class casting problem.");
} catch (ClassNotFoundException e) {
throw new ServletException("Classifier class not found problem.");
} finally {
try {
is.close();
} catch (IOException e) {
//do nothing
}
}
ners.put(classifier, model);
}
}
public void doGet(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException
{
if (request.getCharacterEncoding() == null) {
request.setCharacterEncoding("utf-8");
}
response.setContentType("text/html; charset=UTF-8");
this.getServletContext().getRequestDispatcher("/header.jsp").
include(request, response);
request.setAttribute("classifiers", classifiers);
this.getServletContext().getRequestDispatcher("/ner.jsp").
include(request, response);
addResults(request, response);
this.getServletContext().getRequestDispatcher("/footer.jsp").
include(request, response);
}
public void doPost(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException
{
doGet(request, response);
}
public void addResults(HttpServletRequest request,
HttpServletResponse response)
throws IOException
{
String input = request.getParameter("input");
if (input == null) {
return;
}
input = input.trim();
if (input.equals("")) {
return;
}
PrintWriter out = response.getWriter();
if (input.length() > MAXIMUM_QUERY_LENGTH) {
out.print("This query is too long. If you want to run very long queries, please download and use our <a href=\"http://nlp.stanford.edu/software/CRF-NER.shtml\">publicly released distribution</a>.");
return;
}
String outputFormat = request.getParameter("outputFormat");
if (outputFormat == null || outputFormat.trim().equals("")) {
outputFormat = this.format;
}
boolean preserveSpacing;
String preserveSpacingStr = request.getParameter("preserveSpacing");
if (preserveSpacingStr == null || preserveSpacingStr.trim().equals("")) {
preserveSpacing = this.spacing;
} else {
preserveSpacingStr = preserveSpacingStr.trim();
preserveSpacing = Boolean.valueOf(preserveSpacingStr);
}
String classifier = request.getParameter("classifier");
if (classifier == null || classifier.trim().equals("")) {
classifier = this.defaultClassifier;
}
response.addHeader("classifier", classifier);
response.addHeader("outputFormat", outputFormat);
response.addHeader("preserveSpacing", String.valueOf(preserveSpacing));
if (outputFormat.equals("highlighted")) {
outputHighlighting(out, ners.get(classifier), input);
} else {
out.print(StringEscapeUtils.escapeHtml4(ners.get(classifier).classifyToString(input, outputFormat, preserveSpacing)));
}
}
public void outputHighlighting(PrintWriter out,
CRFClassifier classifier,
String input) {
Set<String> labels = classifier.labels();
String background = classifier.backgroundSymbol();
List<List<CoreMap>> sentences = classifier.classify(input);
Map<String, Color> tagToColorMap =
NERGUI.makeTagToColorMap(labels, background);
StringBuilder result = new StringBuilder();
int lastEndOffset = 0;
for (List<CoreMap> sentence : sentences) {
for (CoreMap word : sentence) {
int beginOffset = word.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class);
int endOffset = word.get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
String answer = word.get(CoreAnnotations.AnswerAnnotation.class);
if (beginOffset > lastEndOffset) {
result.append(StringEscapeUtils.escapeHtml4(input.substring(lastEndOffset, beginOffset)));
}
// Add a color bar for any tagged words
if (!background.equals(answer)) {
Color color = tagToColorMap.get(answer);
result.append("<span style=\"color:#ffffff;background:" +
NERGUI.colorToHTML(color) + "\">");
}
result.append(StringEscapeUtils.escapeHtml4(input.substring(beginOffset, endOffset)));
// Turn off the color bar
if (!background.equals(answer)) {
result.append("</span>");
}
lastEndOffset = endOffset;
}
}
if (lastEndOffset < input.length()) {
result.append(StringEscapeUtils.escapeHtml4(input.substring(lastEndOffset)));
}
result.append("<br><br>");
result.append("Potential tags:");
for (String label : tagToColorMap.keySet()) {
result.append("<br> ");
Color color = tagToColorMap.get(label);
result.append("<span style=\"color:#ffffff;background:" +
NERGUI.colorToHTML(color) + "\">");
result.append(StringEscapeUtils.escapeHtml4(label));
result.append("</span>");
}
out.print(result.toString());
}
}