SemEvalCorpusReader.PARAM_COMBINATION_STRATEGY, CombinationStrategy.SAME_ROW_ONLY.toString(), SemEvalCorpusReader.PARAM_LANGUAGE, "en");
// Tokenization
AnalysisEngineDescription seg = createEngineDescription(
BreakIteratorSegmenter.class);
AggregateBuilder builder = new AggregateBuilder();
builder.add(seg, CombinationReader.INITIAL_VIEW, CombinationReader.VIEW_1);
builder.add(seg, CombinationReader.INITIAL_VIEW, CombinationReader.VIEW_2);
AnalysisEngine aggr_seg = builder.createAggregate();
// POS Tagging
AnalysisEngineDescription pos = createEngineDescription(
OpenNlpPosTagger.class,
OpenNlpPosTagger.PARAM_LANGUAGE, "en");
builder = new AggregateBuilder();
builder.add(pos, CombinationReader.INITIAL_VIEW, CombinationReader.VIEW_1);
builder.add(pos, CombinationReader.INITIAL_VIEW, CombinationReader.VIEW_2);
AnalysisEngine aggr_pos = builder.createAggregate();
// Lemmatization
AnalysisEngineDescription lem = createEngineDescription(
// StanfordLemmatizer.class);
GateLemmatizer.class);
builder = new AggregateBuilder();
builder.add(lem, CombinationReader.INITIAL_VIEW, CombinationReader.VIEW_1);
builder.add(lem, CombinationReader.INITIAL_VIEW, CombinationReader.VIEW_2);
AnalysisEngine aggr_lem = builder.createAggregate();
// Stopword Filter (if applicable)
AnalysisEngineDescription stopw = createEngineDescription(
StopwordFilter.class,
StopwordFilter.PARAM_STOPWORD_LIST, "classpath:/stopwords/stopwords_english_punctuation.txt",
StopwordFilter.PARAM_ANNOTATION_TYPE_NAME, Lemma.class.getName(),
StopwordFilter.PARAM_STRING_REPRESENTATION_METHOD_NAME, "getValue");
builder = new AggregateBuilder();
builder.add(stopw, CombinationReader.INITIAL_VIEW, CombinationReader.VIEW_1);
builder.add(stopw, CombinationReader.INITIAL_VIEW, CombinationReader.VIEW_2);
AnalysisEngine aggr_stopw = builder.createAggregate();
// Similarity Scorer
AnalysisEngine scorer = createEngine(SimilarityScorer.class,
SimilarityScorer.PARAM_NAME_VIEW_1, CombinationReader.VIEW_1,
SimilarityScorer.PARAM_NAME_VIEW_2, CombinationReader.VIEW_2,