// (LEGACY) Gets metadata using the extractors and appends to documents
//
private void enrichSource(SourcePojo source, List<DocumentPojo> toAdd, List<DocumentPojo> toUpdate, List<DocumentPojo> toRemove)
{
StructuredAnalysisHarvester sah = null;
UnstructuredAnalysisHarvester usah = null;
// Create metadata from the text using regex (also calculate header/footer information if desired)
if (source.getUnstructuredAnalysisConfig() != null)
{
usah = new UnstructuredAnalysisHarvester();
// If performing structured analysis also then need to mux them
// since the UAH will run on the body/description potentially created by the SAH
// and the SAH will take the metadata generated by UAH to create entities and events
if (source.getStructuredAnalysisConfig() != null) {
sah = new StructuredAnalysisHarvester();
sah.addUnstructuredHandler(usah);
}
else {
toAdd = usah.executeHarvest(this, source, toAdd);
}
}
// For sources that generate structured data, we can turn that into entities and events
// and fill in document fields from the metadata (that can be used by entity extraction)
if (source.getStructuredAnalysisConfig() != null)
{
if (null == sah) {
sah = new StructuredAnalysisHarvester();
}
toAdd = sah.executeHarvest(this, source, toAdd);
// (if usah exists then this runs usah)
}
// Perform text and entity extraction
if (source.getStructuredAnalysisConfig() == null) // (Else is performed during SAH above)