/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.
This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
This software is provided under the terms of the Common Public License,
version 1.0, as published by http://www.opensource.org. For further
information, see the file `LICENSE' included with this distribution. */
@author Aron Culotta <a href="mailto:culotta@cs.umass.edu">culotta@cs.umass.edu</a>
package cc.mallet.fst.confidence;
import java.util.logging.*;
import java.util.*;
import java.io.Serializable;
import cc.mallet.extract.LabeledSpan;
import cc.mallet.fst.*;
import cc.mallet.pipe.Noop;
import cc.mallet.pipe.iterator.*;
import cc.mallet.types.*;
import cc.mallet.util.MalletLogger;
* Abstract class that estimates the confidence of a {@link Segment}
* extracted by a {@link Transducer}.
abstract public class TransducerConfidenceEstimator implements Serializable
private static Logger logger = MalletLogger.getLogger(TransducerConfidenceEstimator.class.getName());
protected Transducer model; // the trained Transducer which
// performed the extractions.
java.util.Vector segmentConfidences;
public TransducerConfidenceEstimator (Transducer model) {
this.model = model;
Calculates the confidence in the tagging of a {@link Segment}.
public double estimateConfidenceFor (Segment segment) {
return estimateConfidenceFor (segment, null);
abstract public double estimateConfidenceFor (Segment segment, SumLatticeDefault lattice);
public java.util.Vector getSegmentConfidences () {return this.segmentConfidences;}
Ranks all {@link Segment}s in this {@link InstanceList} by
confidence estimate.
@param ilist list of segmentation instances
@param startTags represent the labels for the start states (B-)
of all segments
@param continueTags represent the labels for the continue state
(I-) of all segments
@return array of {@link Segment}s ordered by non-decreasing
confidence scores, as calculated by <code>estimateConfidenceFor</code>
public Segment[] rankSegmentsByConfidence (InstanceList ilist, Object[] startTags,
Object[] continueTags) {
ArrayList segmentList = new ArrayList ();
SegmentIterator iter = new SegmentIterator (this.model, ilist, startTags, continueTags);
if (this.segmentConfidences == null)
segmentConfidences = new java.util.Vector ();
while (iter.hasNext ()) {
Segment segment = (Segment) iter.nextSegment ();
double confidence = estimateConfidenceFor (segment);
segment.setConfidence (confidence);
logger.fine ("confidence=" + segment.getConfidence() + " for segment\n"
+ segment.sequenceToString() + "\n");
segmentList.add (segment);
Collections.sort (segmentList);
Segment[] ret = new Segment[1];
ret = (Segment[]) segmentList.toArray (ret);
return ret;
ranks the segments in one {@link Instance}
@param instance instances to be segmented
@param startTags represent the labels for the start states (e.g. B-)
of all segments
@param continueTags represent the labels for the continue state
(e.g. I-) of all segments
@return array of {@link Segment}s ordered by non-decreasing
confidence scores, as calculated by <code>estimateConfidenceFor</code>
public Segment[] rankSegmentsByConfidence (Instance instance, Object[] startTags,
Object[] continueTags) {
InstanceList ilist = new InstanceList (new Noop(instance.getDataAlphabet(),instance.getTargetAlphabet()));
ilist.add (instance);
return rankSegmentsByConfidence (ilist, startTags, continueTags);
public Transducer getTransducer() { return this.model; }