package letweb.semanticum.aes;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.logging.FileHandler;
import java.util.logging.Logger;
import letweb.semanticum.tsd.Audio;
import letweb.semanticum.tsd.STText;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Level;
import edu.cmu.sphinx.frontend.util.ConcatAudioFileDataSource;
import edu.cmu.sphinx.recognizer.Recognizer;
import edu.cmu.sphinx.result.Result;
import edu.cmu.sphinx.util.props.ConfigurationManager;
public class SphinxTranscriber extends JCasAnnotator_ImplBase {
public static final String[] paramName = new String[]{"LanguageModel",
"AcousticModel", "Dict", "FillerDict", "AMDef", "ABWidth",
"RBWidth", "AWBWidth", "RWBWidth", "WordInsertionProb",
"UnitInsertionProb", "SilInsertionProb", "FillerInsertionProb",
"langWeight", "GrowSkip", "lookAhead", "LogLevel"};
public static final ArrayList<String[]> paramList = new ArrayList<String[]>();
private static final String CF = "ConfigFile";
private static final String sphinxLog = "sphinxLogFilename";
private String ConfigFile;
private ConfigurationManager cm;
private Logger sphinxLogger;
@Override
public void initialize(UimaContext aContext) throws ResourceInitializationException {
super.initialize(aContext);
paramList.add(new String[]{"lm", (String)aContext.getConfigParameterValue(paramName[0])});
paramList.add(new String[]{"am", (String)aContext.getConfigParameterValue(paramName[1])});
paramList.add(new String[]{"dict", (String)aContext.getConfigParameterValue(paramName[2])});
paramList.add(new String[]{"filler", (String)aContext.getConfigParameterValue(paramName[3])});
paramList.add(new String[]{"mdef", (String)aContext.getConfigParameterValue(paramName[4])});
paramList.add(new String[]{"absoluteBeamWidth", (String)aContext.getConfigParameterValue(paramName[5])});
paramList.add(new String[]{"relativeBeamWidth", (String)aContext.getConfigParameterValue(paramName[6])});
paramList.add(new String[]{"absoluteWordBeamWidth", (String)aContext.getConfigParameterValue(paramName[7])});
paramList.add(new String[]{"relativeWordBeamWidth", (String)aContext.getConfigParameterValue(paramName[8])});
paramList.add(new String[]{"wordInsertionProbability", (String)aContext.getConfigParameterValue(paramName[9])});
paramList.add(new String[]{"unitInsertionProbability", (String)aContext.getConfigParameterValue(paramName[10])});
paramList.add(new String[]{"silenceInsertionProbability", (String)aContext.getConfigParameterValue(paramName[11])});
paramList.add(new String[]{"fillerInsertionProbability", (String)aContext.getConfigParameterValue(paramName[12])});
paramList.add(new String[]{"languageWeight", (String)aContext.getConfigParameterValue(paramName[13])});
paramList.add(new String[]{"skip", (String)aContext.getConfigParameterValue(paramName[14])});
paramList.add(new String[]{"lookahead", (String)aContext.getConfigParameterValue(paramName[15])});
paramList.add(new String[]{"logLevel", (String)aContext.getConfigParameterValue(paramName[16])});
//Parametri di configurazione di sphinx4
ConfigFile = (String) aContext.getConfigParameterValue(CF);
cm = new ConfigurationManager(ConfigFile);
for(String[] p: paramList)
cm.setGlobalProperty(p[0], p[1]);
//Creazione di un nuovo file di log per sphinx4
sphinxLogger = cm.getRootLogger();
sphinxLogger.setUseParentHandlers(false);
try {
sphinxLogger.addHandler(new FileHandler((String)aContext.getConfigParameterValue(sphinxLog)));
} catch (SecurityException e) {
getContext().getLogger().log(Level.WARNING, "SecurityError: " + e.getMessage());
} catch (IOException e) {
getContext().getLogger().log(Level.WARNING, "IOError: " + e.getMessage());
}
}
@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
try {
Recognizer recognizer;
//Clone necessario, istanzia un nuovo Configuration Manager per ogni process()
//in caso di più sphinxAE in esecuzione, le risorse vengono duplicate da quello
//creato nel metodo inizilize() per non reimpostare tutti i parametri
ConfigurationManager cm = this.cm.clone();
recognizer = (Recognizer) cm.lookup("recognizer");
List<File> batch = new ArrayList<File>();
ArrayList<Integer> beginAnnot = new ArrayList<Integer>();
ArrayList<Integer> endAnnot = new ArrayList<Integer>();
ArrayList<Long> beginTimeAnnot = new ArrayList<Long>();
ArrayList<Long> endTimeAnnot = new ArrayList<Long>();
StringBuffer document = new StringBuffer();
Iterator audioIt = aJCas.getAnnotationIndex(Audio.type).iterator();
Audio audio = (Audio) audioIt.next();
Result result;
// getContext().getLogger().log("CF File is " + ConfigFile);
// if(audioIt.hasNext())
ConcatAudioFileDataSource data = (ConcatAudioFileDataSource) cm.lookup("dataSource");
for(String i: audio.getAudioPath().toArray())
batch.add(new File(i));
data.setBatchFiles(batch);
recognizer.allocate();
getContext().getLogger().log(Level.INFO, "Start recognition of " + audio.getAudioPath().toString(0));
Long comp1 = System.currentTimeMillis();
while((result = recognizer.recognize()) != null){
ArrayList<TextAndTime> stt = TextAndTime.convert(result);
if(stt != null)
for(TextAndTime t: stt){
String word = t.getWord();
if(word.matches(TextAndTime.PATTERN_SIL))
document.append(word);
else{
beginAnnot.add(document.length());
document.append(word);
endAnnot.add(document.length());
beginTimeAnnot.add(t.getIniTime());
endTimeAnnot.add(t.getEndTime());
}
document.append(" ");
}
}
Long comp2 = System.currentTimeMillis();
getContext().getLogger().log(Level.INFO, "End recognition of " + audio.getAudioPath().toString(0));
getContext().getLogger().log(Level.INFO, TextAndTime.getFormattedLong(comp2 - comp1)
+ " is total recognition time of this audio files " + audio.getAudioPath().toString(0));
getContext().getLogger().log(Level.INFO, "Text " + document.toString());
aJCas.setDocumentText(document.toString());
Iterator<Long> itb = beginTimeAnnot.iterator(),
ite = endTimeAnnot.iterator();
Iterator<Integer> itbb = beginAnnot.iterator(),
itee = endAnnot.iterator();
while(itb.hasNext()){
STText stt = new STText(aJCas);
stt.setBegin(itbb.next());
stt.setEnd(itee.next());
stt.setStartTime(itb.next());
stt.setEndTime(ite.next());
stt.addToIndexes();
}
recognizer.deallocate();
} catch (CloneNotSupportedException e) {
getContext().getLogger().log(Level.WARNING, "Configuration Manager CloneError: " + e.getMessage());
}
}
@Override
public void destroy() {
// TODO Auto-generated method stub
super.destroy();
}
}