/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.
This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
http://www.cs.umass.edu/~mccallum/mallet
This software is provided under the terms of the Common Public License,
version 1.0, as published by http://www.opensource.org. For further
information, see the file `LICENSE' included with this distribution. */
/**
@author Aron Culotta <a href="mailto:culotta@cs.umass.edu">culotta@cs.umass.edu</a>
*/
package cc.mallet.fst.confidence;
import java.util.logging.*;
import java.util.*;
import java.io.Serializable;
import cc.mallet.extract.LabeledSpan;
import cc.mallet.fst.*;
import cc.mallet.pipe.Noop;
import cc.mallet.pipe.iterator.*;
import cc.mallet.types.*;
import cc.mallet.util.MalletLogger;
/**
* Abstract class that estimates the confidence of a {@link Segment}
* extracted by a {@link Transducer}.
*/
abstract public class TransducerConfidenceEstimator implements Serializable
{
private static Logger logger = MalletLogger.getLogger(TransducerConfidenceEstimator.class.getName());
protected Transducer model; // the trained Transducer which
// performed the extractions.
java.util.Vector segmentConfidences;
public TransducerConfidenceEstimator (Transducer model) {
this.model = model;
}
/**
Calculates the confidence in the tagging of a {@link Segment}.
*/
public double estimateConfidenceFor (Segment segment) {
return estimateConfidenceFor (segment, null);
}
abstract public double estimateConfidenceFor (Segment segment, SumLatticeDefault lattice);
public java.util.Vector getSegmentConfidences () {return this.segmentConfidences;}
/**
Ranks all {@link Segment}s in this {@link InstanceList} by
confidence estimate.
@param ilist list of segmentation instances
@param startTags represent the labels for the start states (B-)
of all segments
@param continueTags represent the labels for the continue state
(I-) of all segments
@return array of {@link Segment}s ordered by non-decreasing
confidence scores, as calculated by <code>estimateConfidenceFor</code>
*/
public Segment[] rankSegmentsByConfidence (InstanceList ilist, Object[] startTags,
Object[] continueTags) {
ArrayList segmentList = new ArrayList ();
SegmentIterator iter = new SegmentIterator (this.model, ilist, startTags, continueTags);
if (this.segmentConfidences == null)
segmentConfidences = new java.util.Vector ();
while (iter.hasNext ()) {
Segment segment = (Segment) iter.nextSegment ();
double confidence = estimateConfidenceFor (segment);
segment.setConfidence (confidence);
logger.fine ("confidence=" + segment.getConfidence() + " for segment\n"
+ segment.sequenceToString() + "\n");
segmentList.add (segment);
}
Collections.sort (segmentList);
Segment[] ret = new Segment[1];
ret = (Segment[]) segmentList.toArray (ret);
return ret;
}
/**
ranks the segments in one {@link Instance}
@param instance instances to be segmented
@param startTags represent the labels for the start states (e.g. B-)
of all segments
@param continueTags represent the labels for the continue state
(e.g. I-) of all segments
@return array of {@link Segment}s ordered by non-decreasing
confidence scores, as calculated by <code>estimateConfidenceFor</code>
*/
public Segment[] rankSegmentsByConfidence (Instance instance, Object[] startTags,
Object[] continueTags) {
InstanceList ilist = new InstanceList (new Noop(instance.getDataAlphabet(),instance.getTargetAlphabet()));
ilist.add (instance);
return rankSegmentsByConfidence (ilist, startTags, continueTags);
}
public Transducer getTransducer() { return this.model; }
}