}
}
@Override
public void train(CollectionReader collectionReader, File directory) throws Exception {
AggregateBuilder builder = new AggregateBuilder();
//builder.add(AnalysisEngineFactory.createPrimitiveDescription(ReplaceCTakesEntityMentionsAndModifiersWithGold.class));
// AnalysisEngineDescription assertionDescription = AssertionCleartkAnalysisEngine.getDescription(
// CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
// //MultiClassLIBSVMDataWriterFactory.class.getName(),
// MaxentStringOutcomeDataWriter.class.getName(),
// DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
// directory.getPath());
// builder.add(assertionDescription);
AnalysisEngineDescription documentIdPrinterAnnotator = AnalysisEngineFactory.createPrimitiveDescription(DocumentIdPrinterAnalysisEngine.class);
builder.add(documentIdPrinterAnnotator);
AnalysisEngineDescription goldCopierIdentifiedAnnotsAnnotator = AnalysisEngineFactory.createPrimitiveDescription(ReferenceIdentifiedAnnotationsSystemToGoldCopier.class);
builder.add(goldCopierIdentifiedAnnotsAnnotator);
AnalysisEngineDescription goldCopierSupportingAnnotsAnnotator = AnalysisEngineFactory.createPrimitiveDescription(ReferenceSupportingAnnotationsSystemToGoldCopier.class);
builder.add(goldCopierSupportingAnnotsAnnotator);
AnalysisEngineDescription assertionAttributeClearerAnnotator = AnalysisEngineFactory.createPrimitiveDescription(ReferenceAnnotationsSystemAssertionClearer.class);
builder.add(assertionAttributeClearerAnnotator);
String generalSectionRegexFileUri =
"org/mitre/medfacts/zoner/section_regex.xml";
AnalysisEngineDescription zonerAnnotator =
AnalysisEngineFactory.createPrimitiveDescription(ZoneAnnotator.class,
ZoneAnnotator.PARAM_SECTION_REGEX_FILE_URI,
generalSectionRegexFileUri
);
// builder.add(zonerAnnotator);
String mayoSectionRegexFileUri =
"org/mitre/medfacts/uima/mayo_sections.xml";
AnalysisEngineDescription mayoZonerAnnotator =
AnalysisEngineFactory.createPrimitiveDescription(ZoneAnnotator.class,
ZoneAnnotator.PARAM_SECTION_REGEX_FILE_URI,
mayoSectionRegexFileUri
);
// builder.add(mayoZonerAnnotator);
// URL assertionCuePhraseLookupAnnotatorDescriptorUrl1 = this.getClass().getClassLoader().getResource("org/apache/ctakes/dictionary/lookup/AssertionCuePhraseDictionaryLookupAnnotator.xml");
// logger.info(String.format("assertionCuePhraseLookupAnnotatorDescriptorUrl1 (slashes): %s", assertionCuePhraseLookupAnnotatorDescriptorUrl1));
// URL assertionCuePhraseLookupAnnotatorDescriptorUrl2 = this.getClass().getClassLoader().getResource("org.apache.ctakes.dictionary.lookup.AssertionCuePhraseDictionaryLookupAnnotator.xml");
// logger.info(String.format("assertionCuePhraseLookupAnnotatorDescriptorUrl2 (periods): %s", assertionCuePhraseLookupAnnotatorDescriptorUrl2));
//
//
// AnalysisEngineDescription cuePhraseLookupAnnotator =
// AnalysisEngineFactory.createAnalysisEngineDescription("org/apache/ctakes/dictionary/lookup/AssertionCuePhraseDictionaryLookupAnnotator");
// builder.add(cuePhraseLookupAnnotator);
// Set up Feature Selection parameters
Float featureSelectionThreshold = options.featureSelectionThreshold;
Class<? extends DataWriter> dataWriterClassFirstPass = getDataWriterClass();
if (options.featureSelectionThreshold==null) {
featureSelectionThreshold = 0f;
}
// Add each assertion Analysis Engine to the pipeline!
builder.add(AnalysisEngineFactory.createPrimitiveDescription(AlternateCuePhraseAnnotator.class, new Object[]{}));
if (!options.ignorePolarity)
{
if (options.useYtexNegation) {
AnalysisEngineDescription polarityAnnotator = AnalysisEngineFactory.createAnalysisEngineDescription(YTEX_NEGATION_DESCRIPTOR);
builder.add(polarityAnnotator);
} else {
AnalysisEngineDescription polarityAnnotator = AnalysisEngineFactory.createPrimitiveDescription(PolarityCleartkAnalysisEngine.class); //, this.additionalParamemters);
ConfigurationParameterFactory.addConfigurationParameters(
polarityAnnotator,
AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
AssertionEvaluation.GOLD_VIEW_NAME,
// CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
// this.dataWriterFactoryClass.getName(),
DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
dataWriterClassFirstPass,
DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
new File(directory, "polarity").getPath(),
AssertionCleartkAnalysisEngine.PARAM_FEATURE_SELECTION_URI,
PolarityCleartkAnalysisEngine.createFeatureSelectionURI(new File(directory, "polarity")),
AssertionCleartkAnalysisEngine.PARAM_FEATURE_SELECTION_THRESHOLD,
featureSelectionThreshold
);
builder.add(polarityAnnotator);
}
}
if (!options.ignoreConditional)
{
AnalysisEngineDescription conditionalAnnotator = AnalysisEngineFactory.createPrimitiveDescription(ConditionalCleartkAnalysisEngine.class); //, this.additionalParamemters);
ConfigurationParameterFactory.addConfigurationParameters(
conditionalAnnotator,
AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
AssertionEvaluation.GOLD_VIEW_NAME,
// CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
// this.dataWriterFactoryClass.getName(),
DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
dataWriterClassFirstPass,
DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
new File(directory, "conditional").getPath(),
AssertionCleartkAnalysisEngine.PARAM_FEATURE_SELECTION_URI,
ConditionalCleartkAnalysisEngine.createFeatureSelectionURI(new File(directory, "conditional")),
AssertionCleartkAnalysisEngine.PARAM_FEATURE_SELECTION_THRESHOLD,
featureSelectionThreshold
);
builder.add(conditionalAnnotator);
}
if (!options.ignoreUncertainty)
{
AnalysisEngineDescription uncertaintyAnnotator = AnalysisEngineFactory.createPrimitiveDescription(UncertaintyCleartkAnalysisEngine.class); //, this.additionalParamemters);
ConfigurationParameterFactory.addConfigurationParameters(
uncertaintyAnnotator,
AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
AssertionEvaluation.GOLD_VIEW_NAME,
// CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
// this.dataWriterFactoryClass.getName(),
DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
dataWriterClassFirstPass,
DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
new File(directory, "uncertainty").getPath(),
AssertionCleartkAnalysisEngine.PARAM_FEATURE_SELECTION_URI,
UncertaintyCleartkAnalysisEngine.createFeatureSelectionURI(new File(directory, "uncertainty")),
AssertionCleartkAnalysisEngine.PARAM_FEATURE_SELECTION_THRESHOLD,
featureSelectionThreshold
);
builder.add(uncertaintyAnnotator);
}
if (!options.ignoreSubject)
{
AnalysisEngineDescription subjectAnnotator = AnalysisEngineFactory.createPrimitiveDescription(SubjectCleartkAnalysisEngine.class); //, this.additionalParamemters);
ConfigurationParameterFactory.addConfigurationParameters(
subjectAnnotator,
AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
AssertionEvaluation.GOLD_VIEW_NAME,
// CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
// this.dataWriterFactoryClass.getName(),
DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
dataWriterClassFirstPass,
DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
new File(directory, "subject").getPath(),
AssertionCleartkAnalysisEngine.PARAM_FEATURE_SELECTION_URI,
SubjectCleartkAnalysisEngine.createFeatureSelectionURI(new File(directory, "subject")),
AssertionCleartkAnalysisEngine.PARAM_FEATURE_SELECTION_THRESHOLD,
featureSelectionThreshold
);
builder.add(subjectAnnotator);
}
if (!options.ignoreGeneric)
{
AnalysisEngineDescription genericAnnotator = AnalysisEngineFactory.createPrimitiveDescription(GenericCleartkAnalysisEngine.class); //, this.additionalParamemters);
ConfigurationParameterFactory.addConfigurationParameters(
genericAnnotator,
AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
AssertionEvaluation.GOLD_VIEW_NAME,
// CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
// this.dataWriterFactoryClass.getName(),
DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
dataWriterClassFirstPass,
DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
new File(directory, "generic").getPath(),
AssertionCleartkAnalysisEngine.PARAM_FEATURE_SELECTION_URI,
GenericCleartkAnalysisEngine.createFeatureSelectionURI(new File(directory, "generic")),
AssertionCleartkAnalysisEngine.PARAM_FEATURE_SELECTION_THRESHOLD,
featureSelectionThreshold
);
builder.add(genericAnnotator);
}
// 2/20/13 srh adding
if (!options.ignoreHistory) {
AnalysisEngineDescription historyAnnotator = AnalysisEngineFactory.createPrimitiveDescription(HistoryCleartkAnalysisEngine.class);
ConfigurationParameterFactory.addConfigurationParameters(
historyAnnotator,
AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
AssertionEvaluation.GOLD_VIEW_NAME,
// CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
// this.dataWriterFactoryClass.getName(),
DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
dataWriterClassFirstPass,
DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
new File(directory, "historyOf").getPath(),
AssertionCleartkAnalysisEngine.PARAM_FEATURE_SELECTION_URI,
HistoryCleartkAnalysisEngine.createFeatureSelectionURI(new File(directory, "historyOf")),
AssertionCleartkAnalysisEngine.PARAM_FEATURE_SELECTION_THRESHOLD,
featureSelectionThreshold
);
builder.add(historyAnnotator);
}
/*
AnalysisEngineDescription classifierAnnotator = AnalysisEngineFactory.createPrimitiveDescription(
this.classifierAnnotatorClass,
this.additionalParameters);
ConfigurationParameterFactory.addConfigurationParameters(
classifierAnnotator,
RelationExtractorAnnotator.PARAM_GOLD_VIEW_NAME,
RelationExtractorEvaluation.GOLD_VIEW_NAME,
CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
this.dataWriterFactoryClass.getName(),
DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
directory.getPath());
builder.add(classifierAnnotator);
*/
SimplePipeline.runPipeline(collectionReader, builder.createAggregateDescription());
//HideOutput hider = new HideOutput();
for (String currentAssertionAttribute : annotationTypes)
{
File currentDirectory = new File(directory, currentAssertionAttribute);