* At least one noun:
*/
Spotter npSpotter = SpotterWithSelector.getInstance(
lexSpotterT3,
new AtLeastOneNounSelector(),
new LingPipeTaggedTokenProvider(lingPipeFactory)
);
npSpotter.setName("\\atLeastOneNoun");
latexTable.append(getLatexTableRow(npSpotter, documents, goldSurfaceFormOccurrences,baseResult));
/**
* OpenNLP Chunker
*/
String openNLPDir = "/data/spotlight/3.7/opennlp/english/";
String i18nLanguageCode = "en";
File stopwords = new File("data/stopwords/stopwords_en.txt");
SurfaceFormDictionary sfDictProbThreshGold = ProbabilisticSurfaceFormDictionary.fromLingPipeDictionaryFile(lexSpotterGoldFile,false);
Spotter onlpChunksSpotterGold = OpenNLPChunkerSpotter.fromDir(openNLPDir,i18nLanguageCode,sfDictProbThreshGold,stopwords);
onlpChunksSpotterGold.setName("\\joNPL{gold} ");
latexTable.append(getLatexTableRow(onlpChunksSpotterGold, documents, goldSurfaceFormOccurrences,baseResult));
//File sfDictThresh3 = new File("/home/pablo/workspace/spotlight/index/output/surfaceForms-fromOccs-thresh3-TRD.set");
//SurfaceFormDictionary sfDictProbThresh3 = ProbabilisticSurfaceFormDictionary.fromFile(sfDictThresh3, false);
SurfaceFormDictionary sfDictProbThresh3 = ProbabilisticSurfaceFormDictionary.fromLingPipeDictionaryFile(lexSpotterT3File,false);
Spotter onlpChunksSpotter3 = OpenNLPChunkerSpotter.fromDir(openNLPDir,i18nLanguageCode,sfDictProbThresh3,stopwords);
onlpChunksSpotter3.setName("\\joNPL{>3} ");
latexTable.append(getLatexTableRow(onlpChunksSpotter3, documents, goldSurfaceFormOccurrences,baseResult));
//File sfDictThresh10 = new File("/home/pablo/workspace/spotlight/index/output/surfaceForms-fromOccs-thresh10-TRD.set");
//SurfaceFormDictionary sfDictProbThresh10 = ProbabilisticSurfaceFormDictionary.fromFile(sfDictThresh10, false);
SurfaceFormDictionary sfDictProbThresh10 = ProbabilisticSurfaceFormDictionary.fromLingPipeDictionaryFile(lexSpotterT10File,false);
Spotter onlpChunksSpotter10 = OpenNLPChunkerSpotter.fromDir(openNLPDir,i18nLanguageCode,sfDictProbThresh10,stopwords);
onlpChunksSpotter10.setName("\\joNPL{>10} ");
latexTable.append(getLatexTableRow(onlpChunksSpotter10, documents, goldSurfaceFormOccurrences,baseResult));
//File sfDictThresh75 = new File("/home/pablo/workspace/spotlight/index/output/surfaceForms-fromOccs-thresh75.tsv");
//SurfaceFormDictionary sfDictProbThresh75 = ProbabilisticSurfaceFormDictionary.fromFile(sfDictThresh75, false);
SurfaceFormDictionary sfDictProbThresh75 = ProbabilisticSurfaceFormDictionary.fromLingPipeDictionaryFile(lexSpotterT75File,false);
Spotter onlpChunksSpotter75 = OpenNLPChunkerSpotter.fromDir(openNLPDir,i18nLanguageCode,sfDictProbThresh75,stopwords);
onlpChunksSpotter75.setName("\\joNPL{>75} ");
latexTable.append(getLatexTableRow(onlpChunksSpotter75, documents, goldSurfaceFormOccurrences,baseResult));
/**
* No common words.
*/
Spotter noCommonSpotter = SpotterWithSelector.getInstance(
lexSpotterT3,
new CoOccurrenceBasedSelector(configuration.getSpotterConfiguration()),
new LingPipeTaggedTokenProvider(lingPipeFactory)
);
noCommonSpotter.setName("\\cw ");
latexTable.append(getLatexTableRow(noCommonSpotter, documents, goldSurfaceFormOccurrences,baseResult));
/**
* Kea
*/
Spotter keaSpotter1 = new KeaSpotter("/data/spotlight/3.7/kea/keaModel-1-3-1", 1000, -1);
keaSpotter1.setName("$Kea_{>0}$ ");
latexTable.append(getLatexTableRow(keaSpotter1, documents, goldSurfaceFormOccurrences,baseResult));
// Spotter keaSpotter2 = new KeaSpotter("/data/spotlight/3.7/kea/keaModel-1-3-1", 1000, 0.015);
// keaSpotter2.setName("$Kea_{>0.015}$ ");
// latexTable.append(getLatexTableRow(keaSpotter2, documents, goldSurfaceFormOccurrences,baseResult));
//
// Spotter keaSpotter3 = new KeaSpotter("/data/spotlight/3.7/kea/keaModel-1-3-1", 1000, 0.075);
// keaSpotter3.setName("$Kea_{>0.075}$ ");
// latexTable.append(getLatexTableRow(keaSpotter3, documents, goldSurfaceFormOccurrences,baseResult));
//
// Spotter keaSpotter4 = new KeaSpotter("/data/spotlight/3.7/kea/keaModel-1-3-1", 1000, 0.15);
// keaSpotter4.setName("$Kea_{>0.15}$ ");
// latexTable.append(getLatexTableRow(keaSpotter4, documents, goldSurfaceFormOccurrences,baseResult));
//
// Spotter keaSpotter5 = new KeaSpotter("/data/spotlight/3.7/kea/keaModel-1-3-1", 1000, 0.3);
// keaSpotter5.setName("$Kea_{>0.3}$ ");
// latexTable.append(getLatexTableRow(keaSpotter5,documents,goldSurfaceFormOccurrences,baseResult));
/**
* NER
*/
Spotter neSpotter = new NESpotter(configuration.getSpotterConfiguration().getOpenNLPModelDir(), configuration.getI18nLanguageCode(), configuration.getSpotterConfiguration().getOpenNLPModelsURI());
neSpotter.setName("\\ner ");
latexTable.append(getLatexTableRow(neSpotter, documents, goldSurfaceFormOccurrences,baseResult));
/**
* NER+NP
*/
Spotter onlpSpotter = new OpenNLPNGramSpotter(configuration.getSpotterConfiguration().getOpenNLPModelDir()+ "/" + configuration.getLanguage().toLowerCase(), configuration.getI18nLanguageCode());
onlpSpotter.setName("\\nerNP ");
latexTable.append(getLatexTableRow(onlpSpotter, documents, goldSurfaceFormOccurrences,baseResult));
/**
* NER+NP+NG-CW
*/
Spotter onlpNoCommonSpotter = SpotterWithSelector.getInstance(
onlpSpotter,
new CoOccurrenceBasedSelector(configuration.getSpotterConfiguration()),
new LingPipeTaggedTokenProvider(lingPipeFactory)
);
onlpNoCommonSpotter.setName("NER+NP+NG-CW ");
latexTable.append(getLatexTableRow(onlpNoCommonSpotter, documents, goldSurfaceFormOccurrences,baseResult));
System.out.println(latexTable);