AnalysisEngineDescription segmenterDesc = UIMAFramework.getXMLParser()
.parseAnalysisEngineDescription(
new XMLInputSource(JUnitExtension
.getFile("TextAnalysisEngineImplTest/NewlineSegmenter.xml")));
AnalysisEngine ae = UIMAFramework.produceAnalysisEngine(segmenterDesc);
CAS cas = ae.newCAS();
cas.setDocumentText("Line one\nLine two\nLine three");
CasIterator iter = ae.processAndOutputNewCASes(cas);
assertTrue(iter.hasNext());
CAS outCas = iter.next();
assertEquals("Line one", outCas.getDocumentText());
outCas.release();
assertTrue(iter.hasNext());
outCas = iter.next();
assertEquals("Line two", outCas.getDocumentText());
outCas.release();
assertTrue(iter.hasNext());
outCas = iter.next();
assertEquals("Line three", outCas.getDocumentText());
outCas.release();
assertFalse(iter.hasNext());
// aggregate
AnalysisEngineDescription aggSegDesc = UIMAFramework.getXMLParser()
.parseAnalysisEngineDescription(
new XMLInputSource(JUnitExtension
.getFile("TextAnalysisEngineImplTest/AggregateWithSegmenter.xml")));
ae = UIMAFramework.produceAnalysisEngine(aggSegDesc);
cas = ae.newCAS();
cas.setDocumentText("Line one\nLine two\nLine three");
iter = ae.processAndOutputNewCASes(cas);
assertTrue(iter.hasNext());
outCas = iter.next();
assertEquals("Line one", outCas.getDocumentText());
assertEquals("Line one", TestAnnotator.lastDocument);
outCas.release();
assertTrue(iter.hasNext());
outCas = iter.next();
assertEquals("Line two", outCas.getDocumentText());
assertEquals("Line two", TestAnnotator.lastDocument);
outCas.release();
assertTrue(iter.hasNext());
outCas = iter.next();
assertEquals("Line three", outCas.getDocumentText());
assertEquals("Line three", TestAnnotator.lastDocument);
outCas.release();
assertFalse(iter.hasNext());
// Annotator should NOT get the original CAS according to the default flow
assertEquals("Line three", TestAnnotator.lastDocument);
// nested aggregate
AnalysisEngineDescription nestedAggSegDesc = UIMAFramework
.getXMLParser()
.parseAnalysisEngineDescription(
new XMLInputSource(
JUnitExtension
.getFile("TextAnalysisEngineImplTest/AggregateContainingAggregateSegmenter.xml")));
ae = UIMAFramework.produceAnalysisEngine(nestedAggSegDesc);
cas = ae.newCAS();
cas.setDocumentText("Line one\nLine two\nLine three");
iter = ae.processAndOutputNewCASes(cas);
assertTrue(iter.hasNext());
outCas = iter.next();
assertEquals("Line one", outCas.getDocumentText());
assertEquals("Line one", TestAnnotator.lastDocument);
outCas.release();
assertTrue(iter.hasNext());
outCas = iter.next();
assertEquals("Line two", outCas.getDocumentText());
assertEquals("Line two", TestAnnotator.lastDocument);
outCas.release();
assertTrue(iter.hasNext());
outCas = iter.next();
assertEquals("Line three", outCas.getDocumentText());
assertEquals("Line three", TestAnnotator.lastDocument);
outCas.release();
assertFalse(iter.hasNext());
// Annotator should NOT get the original CAS according to the default flow
assertEquals("Line three", TestAnnotator.lastDocument);
// two segmenters
AnalysisEngineDescription twoSegDesc = UIMAFramework.getXMLParser()
.parseAnalysisEngineDescription(
new XMLInputSource(JUnitExtension
.getFile("TextAnalysisEngineImplTest/AggregateWith2Segmenters.xml")));
ae = UIMAFramework.produceAnalysisEngine(twoSegDesc);
cas = ae.newCAS();
cas.setDocumentText("One\tTwo\nThree\tFour");
iter = ae.processAndOutputNewCASes(cas);
assertTrue(iter.hasNext());
outCas = iter.next();
assertEquals("One", outCas.getDocumentText());
assertEquals("One", TestAnnotator.lastDocument);
outCas.release();
assertTrue(iter.hasNext());
outCas = iter.next();
assertEquals("Two", outCas.getDocumentText());
assertEquals("Two", TestAnnotator.lastDocument);
outCas.release();
assertTrue(iter.hasNext());
outCas = iter.next();
assertEquals("Three", outCas.getDocumentText());
assertEquals("Three", TestAnnotator.lastDocument);
outCas.release();
assertTrue(iter.hasNext());
outCas = iter.next();
assertEquals("Four", outCas.getDocumentText());
assertEquals("Four", TestAnnotator.lastDocument);
outCas.release();
assertFalse(iter.hasNext());
// Annotator should NOT get the original CAS according to the default flow
assertEquals("Four", TestAnnotator.lastDocument);
// dropping segments
aggSegDesc = UIMAFramework.getXMLParser().parseAnalysisEngineDescription(
new XMLInputSource(JUnitExtension
.getFile("TextAnalysisEngineImplTest/AggregateSegmenterForDropTest.xml")));
ae = UIMAFramework.produceAnalysisEngine(aggSegDesc);
cas = ae.newCAS();
cas.setDocumentText("Line one\nDROP\nLine two\nDROP\nLine three");
// results should be the same as the first aggregate segmenter test.
// segmetns whose text is DROP should not be output.
iter = ae.processAndOutputNewCASes(cas);
assertTrue(iter.hasNext());
outCas = iter.next();
assertEquals("Line one", outCas.getDocumentText());
assertEquals("Line one", TestAnnotator.lastDocument);
outCas.release();
assertTrue(iter.hasNext());
outCas = iter.next();
assertEquals("Line two", outCas.getDocumentText());
assertEquals("Line two", TestAnnotator.lastDocument);
outCas.release();
assertTrue(iter.hasNext());
outCas = iter.next();
assertEquals("Line three", outCas.getDocumentText());
assertEquals("Line three", TestAnnotator.lastDocument);
outCas.release();
assertFalse(iter.hasNext());
// Annotator should NOT get the original CAS according to the default flow
assertEquals("Line three", TestAnnotator.lastDocument);
//with ParallelStep
AnalysisEngineDescription desc = UIMAFramework.getXMLParser().parseAnalysisEngineDescription(
new XMLInputSource(JUnitExtension.getFile("TextAnalysisEngineImplTest/AggregateForParallelStepCasMultiplierTest.xml")));
ae = UIMAFramework.produceAnalysisEngine(desc);
cas.reset();
cas.setDocumentText("One\tTwo\nThree\tFour");
iter = ae.processAndOutputNewCASes(cas);
Set<String> expectedOutputs = new HashSet<String>();
expectedOutputs.add("One");
expectedOutputs.add("Two\nThree");
expectedOutputs.add("Four");
expectedOutputs.add("One\tTwo");
expectedOutputs.add("Three\tFour");
while (iter.hasNext()) {
outCas = iter.next();
assertTrue(expectedOutputs.remove(outCas.getDocumentText()));
outCas.release();
}
assertTrue(expectedOutputs.isEmpty());
// test aggregate with 2 AEs sharing resource manager
AnalysisEngineDescription aggregateSegDesc = UIMAFramework.getXMLParser()
.parseAnalysisEngineDescription(
new XMLInputSource(JUnitExtension
.getFile("TextAnalysisEngineImplTest/AggregateWithSegmenter.xml")));
ResourceManager rsrcMgr = UIMAFramework.newDefaultResourceManager();
Map<String, Object> params = new HashMap<String, Object>();
AnalysisEngine ae1 = UIMAFramework.produceAnalysisEngine(aggregateSegDesc, rsrcMgr, params);
AnalysisEngine ae2 = UIMAFramework.produceAnalysisEngine(aggregateSegDesc, rsrcMgr, params);
// start with testing first ae
CAS cas1 = ae1.newCAS();
cas1.setDocumentText("Line one\nLine two\nLine three");
CasIterator iter1 = ae1.processAndOutputNewCASes(cas1);
assertTrue(iter1.hasNext());
CAS outCas1 = iter1.next();
assertEquals("Line one", outCas1.getDocumentText());
// now test second ae
CAS cas2 = ae2.newCAS();
cas2.setDocumentText("Line one\nLine two\nLine three");
CasIterator iter2 = ae2.processAndOutputNewCASes(cas2);
assertTrue(iter2.hasNext());
CAS outCas2 = iter2.next();
assertEquals("Line one", outCas2.getDocumentText());
outCas2.release();
assertTrue(iter2.hasNext());
outCas2 = iter2.next();
assertEquals("Line two", outCas2.getDocumentText());
outCas2.release();
assertTrue(iter2.hasNext());
outCas2 = iter2.next();
assertEquals("Line three", outCas2.getDocumentText());
outCas2.release();
assertFalse(iter2.hasNext());
// continue testing first ae
outCas1.release();
assertTrue(iter1.hasNext());