Factory.deleteResource(pipeline);
// extract sentences from text
AnnotationSet sentences = inputAS.get(sequenceType);
ArrayList<Element<Object>> elements = new ArrayList<Element<Object>>();
Alphabet labelAlphabet = tagger.getYAlphabet();
OffsetComparator oc = new OffsetComparator();
SparseVector[] x;
Object[] y;
int[] labels;
ElementSequence<Element<Object>> sequence;
Element<Object> element;
ArrayList<Annotation> tokens;
String chunk, label;
for (Annotation sentence : sentences) {
// extract tokens from sentence
tokens = new ArrayList(inputAS.get(sentence.getStartNode().getOffset(),
sentence.getEndNode().getOffset()).get(elementType));
Collections.sort(tokens, oc);
for (Annotation token : tokens) {
// extract the chunk of the token
chunk = document.getContent().getContent(token.getStartNode().getOffset(),
token.getEndNode().getOffset()).toString();
label = "?";
// create an element with the chunk and label
element = new Element<Object>(chunk, label, token);
elements.add(element);
}
x = new SparseVector[elements.size()];
y = new Object[elements.size()];
// fill in the labels
for (int i = 0; i < y.length; i++)
y[i] = "?";
// create an element sequence and extract the features
sequence = new ElementSequence(elements, xAlphabet, yAlphabet, x,
y, document, inputAS);
elements = new ArrayList<Element<Object>>();
// extract the labels for the tokens in the sentence
labels = tagger.label(extractor.process(sequence).x);
FeatureMap features = Factory.newFeatureMap();
long start, end;
if (labels.length == tokens.size()) {
for (int i = 0; i < tokens.size(); i++) {
if (labels[i] == labelAlphabet.lookupObject("B-DAT")) {
start = tokens.get(i).getStartNode().getOffset();
end = tokens.get(i).getEndNode().getOffset();
while ((i + 1 < tokens.size()) &&
(labels[i + 1] == labelAlphabet.lookupObject("I-DAT"))) {
end = tokens.get(i + 1).getEndNode().getOffset();
i++;
}
outputAS.add(start, end, datType, features);
}
else if (labels[i] == labelAlphabet.lookupObject("B-LOC")) {
start = tokens.get(i).getStartNode().getOffset();
end = tokens.get(i).getEndNode().getOffset();
while ((i + 1 < tokens.size()) &&
(labels[i + 1] == labelAlphabet.lookupObject("I-LOC"))) {
end = tokens.get(i + 1).getEndNode().getOffset();
i++;
}
outputAS.add(start, end, locType, features);
}
else if (labels[i] == labelAlphabet.lookupObject("B-ORG")) {
start = tokens.get(i).getStartNode().getOffset();
end = tokens.get(i).getEndNode().getOffset();
while ((i + 1 < tokens.size()) &&
(labels[i + 1] == labelAlphabet.lookupObject("I-ORG"))) {
end = tokens.get(i + 1).getEndNode().getOffset();
i++;
}
outputAS.add(start, end, orgType, features);
}
else if (labels[i] == labelAlphabet.lookupObject("B-PER")) {
start = tokens.get(i).getStartNode().getOffset();
end = tokens.get(i).getEndNode().getOffset();
while ((i + 1 < tokens.size()) &&
(labels[i + 1] == labelAlphabet.lookupObject("I-PER"))) {
end = tokens.get(i + 1).getEndNode().getOffset();
i++;
}
outputAS.add(start, end, perType, features);