/*
* Copyright 2008-2011 Grant Ingersoll, Thomas Morton and Drew Farris
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* -------------------
* To purchase or learn more about Taming Text, by Grant Ingersoll, Thomas Morton and Drew Farris, visit
* http://www.manning.com/ingersoll
*/
package com.tamingtext.util;
import java.io.File;
import java.io.FileInputStream;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.InputStream;
import java.util.Map;
import opennlp.tools.namefind.NameFinderME;
import opennlp.tools.namefind.TokenNameFinderModel;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.tamingtext.opennlp.PooledTokenNameFinderModel;
/** Encapsulates OpenNLP's NameFinder by providing a mechanism to load
* all of the name finder models files found in a single directory into memory
* and instantiating an array of NameFinderME objects.
*/
public class NameFinderFactory {
private static final Logger log = LoggerFactory.getLogger(NameFinderFactory.class);
NameFinderME[] finders;
String[] modelNames;
/** Create a NameFinderEngine that loads models from the directory specified
* in the system property <code>model.dir</code> system property for the
* english language
*
* @param modelDirectory
* the directory containing the model files, can be null to force
* use of the model.dir system property.
* @throws IOException
*/
public NameFinderFactory() throws IOException {
this(null);
}
public NameFinderFactory(Map<String,String> param) throws IOException {
String language = OpenNLPUtil.getModelLanguage(param);
String modelDirectory = OpenNLPUtil.getModelDirectory(param);
loadNameFinders(language, modelDirectory);
}
/** Create a NameFinderEngine that loads models from the specified directory,
* or, reads the <code>model.dir</code> system property in order to determine
* if the <code>modelDirectory</code> is <code>null</code> or empty.
* @param language
* two letter language prefix from the model file names.
* @param modelDirectory
* the directory containing the model files, can be null to force
* use of the model.dir system property.
* @throws IOException
*/
public NameFinderFactory(String language, String modelDirectory) throws IOException {
loadNameFinders(language, modelDirectory);
}
/** Load the name finder models. Currently any file in the model directory
* that starts with (lang)-ner
* @param language
* @param modelDirectory
* can be null to use the value of the system property model.dir
* @return
*/
protected File[] findNameFinderModels(String language, String modelDirectory) {
final String modelPrefix = language + "-ner";
log.info("Loading name finder models from {} using prefix {} ",
new Object[] { modelDirectory, modelPrefix } );
File[] models = new File(modelDirectory).listFiles(new FilenameFilter() {
public boolean accept(File file, String name) {
if (name.startsWith(modelPrefix)) {
return true;
}
return false;
}
});
if (models == null || models.length < 1) {
throw new RuntimeException("Configuration Error: No models in " + modelDirectory);
}
return models;
}
/** Load name finder models based upon models for the specified language
* in the specified model directory.
*
* @param language
* @param modelDirectory
* can be null to use the value of the system property model.dir
* @throws IOException
*/
protected void loadNameFinders(String language, String modelDirectory) throws IOException {
//<start id="maxent.examples.namefinder.setup"/>
File modelFile;
File[] models //<co id="nfe.findmodels"/>
= findNameFinderModels(language, modelDirectory);
modelNames = new String[models.length];
finders = new NameFinderME[models.length];
for (int fi = 0; fi < models.length; fi++) {
modelFile = models[fi];
modelNames[fi] = modelNameFromFile(language, modelFile); //<co id="nfe.modelname"/>
log.info("Loading model {}", modelFile);
InputStream modelStream = new FileInputStream(modelFile);
TokenNameFinderModel model = //<co id="nfe.modelreader"/>
new PooledTokenNameFinderModel(modelStream);
finders[fi] = new NameFinderME(model);
}
/*<calloutlist>
<callout arearefs="nfe.findmodels">Find Models</callout>
<callout arearefs="nfe.modelname">Determine Model Name</callout>
<callout arearefs="nfe.modelreader">Read Model</callout>
</calloutlist>*/
//<end id="maxent.examples.namefinder.setup"/>
}
/** Extract the model name from the model file, this is used to display
* the type of named entity found
* @param language
* @param modelFile
* @return
*/
protected String modelNameFromFile(String language, File modelFile) {
String modelName = modelFile.getName();
return modelName.replace(language + "-ner-", "").replace(".bin", "");
}
/** Obtain a reference to the array of NameFinderME's loaded by the engine.
* @return
*/
public NameFinderME[] getNameFinders() {
return finders;
}
/** Returns the names of each of the models loaded by the engine, an array
* parallel with the array returned by {@link #getFinders()}
* @return
*/
public String[] getModelNames() {
return modelNames;
}
}