/*
* Copyright (c) 2011, Yahoo! Inc. All rights reserved.
*
* Redistribution and use of this software in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* Redistributions of source code must retain the above copyright notice, this list of conditions
* and the following disclaimer.
*
* Redistributions in binary form must reproduce the above copyright notice, this list of conditions
* and the following disclaimer in the documentation and/or other materials provided with the
* distribution.
*
* Neither the name of Yahoo! Inc. nor the names of its contributors may be used to endorse or
* promote products derived from this software without specific prior written permission of Yahoo!
* Inc.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.yahoo.labs.taxomo;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import com.martiansoftware.jsap.FlaggedOption;
import com.martiansoftware.jsap.JSAP;
import com.martiansoftware.jsap.JSAPException;
import com.martiansoftware.jsap.JSAPResult;
import com.martiansoftware.jsap.Parameter;
import com.martiansoftware.jsap.SimpleJSAP;
import com.martiansoftware.jsap.Switch;
import com.yahoo.labs.taxomo.util.StateSet;
import com.yahoo.labs.taxomo.util.Taxonomy;
import com.yahoo.labs.taxomo.util.Util;
/**
* Computes the probability of a set of sequences given a model.
* <p>
* See <tt>--help</tt> for command-line options.
*
* @author chato
*
*/
public class ComputeProbabilities {
static final Logger logger = Logger.getLogger(ComputeProbabilities.class);
static {
Util.configureLogger(logger, Level.INFO);
}
/**
* @param args
* @throws IOException
* @throws JSAPException
*/
public static void main(String[] args) throws IOException, JSAPException {
logger.setLevel(Level.INFO);
final SimpleJSAP jsap = new SimpleJSAP(ComputeProbabilities.class.getName(), "Computes the probabilities of a set of sequences given a HMM.", new Parameter[] {
new Switch("verbose", 'v', "verbose", "Set verbose output"),
new FlaggedOption("model-file", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'm', "model-file", "File containing the description of the HMM."),
new FlaggedOption("taxonomy-file", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 't', "taxonomy-file", "File containing the taxonomy."),
new FlaggedOption("input-file", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'i', "input-file", "File containing the input sequences."),
new FlaggedOption("sequence", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 's', "sequence",
"Optionally, pass a single sequence as a string. Place it between quotes, with space-separated symbols.") });
final JSAPResult jsapResult = jsap.parse(args);
if (jsap.messagePrinted())
return;
if (jsapResult.getBoolean("verbose")) {
logger.setLevel(Level.DEBUG);
}
File modelFile = new File(jsapResult.getString("model-file"));
File taxoFile = new File(jsapResult.getString("taxonomy-file"));
StateSet stateSet = new StateSet(modelFile, new Taxonomy(taxoFile));
Model hmm = new Model(modelFile, stateSet);
if (jsapResult.userSpecified("input-file")) {
File inputFile = new File(jsapResult.getString("input-file"));
BufferedReader br = new BufferedReader(new FileReader(inputFile));
String sequenceStr;
double logProb = 0.0;
int seqCount = 0;
while ((sequenceStr = br.readLine()) != null) {
logProb += hmm.viterbiCalculateNonOverlap(sequenceStr);
seqCount++;
}
logger.info("Log probability = " + logProb);
} else if (jsapResult.userSpecified("sequence")) {
String sequenceStr = jsapResult.getString("sequence");
logger.info("Probability = " + Math.exp(hmm.viterbiCalculateNonOverlap(sequenceStr)));
} else {
throw new IllegalArgumentException("You must specify either an input file or a sequence");
}
}
}