Source Code of edu.washington.cs.knowitall.nlp.extraction.SpanExtraction

package edu.washington.cs.knowitall.nlp.extraction;


import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;


import com.google.common.collect.ImmutableList;


import edu.washington.cs.knowitall.commonlib.Range;
import edu.washington.cs.knowitall.nlp.ChunkedSentence;


/***
 * <p>
 * This class represents an extraction from a single sentence that is made up of
 * one or more fields, each of which corresponds to a span in the sentence.
 * </p>
 * <p>
 * For example, a binary subject-verb-object relationship can be represented as
 * a {@code SentenceSpanExtraction} with three fields: one for the subject, one
 * for the verb phrase, and one for the object. This class keeps a reference to
 * the sentence the extraction originated from, and the ranges of each field.
 * Each field is represented as a {@link ChunkedExtraction}.
 * </p>
 * <p>
 * A SpanExtraction object is also equipped with a set of properties, mapping a
 * String key to a String value.
 * </p>
 *
 * @author afader
 *
 */
public class SpanExtraction {


    private ChunkedSentence sent;
    private int numFields;
    private ImmutableList<Range> fieldRanges;
    private ImmutableList<ChunkedExtraction> fields;
    private ImmutableList<String> fieldNames;
    private HashMap<String, ChunkedExtraction> namedFields;
    private Map<String, String> props;


    /**
     * Constructs a new extraction from the given sentence, with fields defined
     * by the given ranges and names.
     *
     * @param sent
     * @param ranges
     * @param fieldNames
     */
    public SpanExtraction(ChunkedSentence sent, List<Range> fieldRanges,
            List<String> fieldNames) {
        initFromRanges(sent, fieldRanges, fieldNames);
    }


    /**
     * Constructs a new extraction from the given sentence, with fields defined
     * by the given ranges. Uses the default field names of field0, field1,
     * field2, etc.
     *
     * @param sent
     * @param ranges
     */
    public SpanExtraction(ChunkedSentence sent, List<Range> fieldRanges) {
        int n = fieldRanges.size();
        initFromRanges(sent, fieldRanges, getDefaultFieldNames(n));
    }


    /**
     * Constructs a new extraction from the given {@link ChunkedExtraction}s.
     * These must all come from the same sentence. Uses the default field names
     * of field0, field1, field2, etc.
     *
     * @param fields
     */
    public SpanExtraction(List<ChunkedExtraction> fields) {
        int n = fields.size();
        initFromFields(fields, getDefaultFieldNames(n));
    }


    /**
     * Constructs a new extraction from the given {@link ChunkedExtraction}s.
     * These must all come from the same sentence. Uses the default field names
     * of field0, field1, field2, etc.
     *
     * @param fields
     */
    public SpanExtraction(ChunkedExtraction[] fields) {
        List<ChunkedExtraction> fieldsList = new ArrayList<ChunkedExtraction>(
                fields.length);
        for (ChunkedExtraction field : fields)
            fieldsList.add(field);
        initFromFields(fieldsList, getDefaultFieldNames(fieldsList.size()));
    }


    /**
     * Constructs a new extraction from the given {@link ChunkedExtraction}s.
     * These must all come from the same sentence.
     *
     * @param fields
     * @param fieldNames
     */
    public SpanExtraction(List<ChunkedExtraction> fields,
            List<String> fieldNames) {
        initFromFields(fields, fieldNames);
    }


    /**
     * Constructs a new extraction from the given {@link ChunkedExtraction}s.
     * These must all come from the same sentence.
     *
     * @param fields
     * @param fieldNames
     */
    public SpanExtraction(ChunkedExtraction[] fields, String[] fieldNames) {
        List<ChunkedExtraction> fieldsList = new ArrayList<ChunkedExtraction>(
                fields.length);
        for (ChunkedExtraction field : fields)
            fieldsList.add(field);
        List<String> fieldNamesList = new ArrayList<String>(fieldNames.length);
        for (String name : fieldNames)
            fieldNamesList.add(name);
        initFromFields(fieldsList, fieldNamesList);
    }


    /**
     * @return the number of fields in this extraction
     */
    public int getNumFields() {
        return fieldRanges.size();
    }


    /**
     * @return the field names of this extraction
     */
    public List<String> getFieldNames() {
        return fieldNames;
    }


    /**
     * @param name
     * @return true if this extraction has the given named field
     */
    public boolean hasField(String name) {
        return namedFields.containsKey(name);
    }


    /**
     * @return the sentence
     */
    public ChunkedSentence getSentence() {
        return sent;
    }


    /**
     * @return the ranges of each field
     */
    public List<Range> getFieldRanges() {
        return fieldRanges;
    }


    /**
     * @return the fields
     */
    public List<ChunkedExtraction> getFields() {
        return fields;
    }


    /**
     * @param i
     * @return the range of the ith field
     */
    public Range getFieldRange(int i) {
        return getFieldRanges().get(i);
    }


    /**
     * @param name
     * @return the range of the field with the given name
     */
    public Range getFieldRange(String name) {
        return namedFields.get(name).getRange();
    }


    /**
     * @param name
     * @return the field with the given name
     */
    public ChunkedExtraction getField(String name) {
        return namedFields.get(name);
    }


    /**
     * @param i
     * @return the ith field
     */
    public ChunkedExtraction getField(int i) {
        return getFields().get(i);
    }


    /**
     * @param i
     * @return the name of the ith field
     */
    public String getFieldName(int i) {
        return getFieldNames().get(i);
    }


    /**
     * @return the map of property names to property values
     */
    public Map<String, String> getProperties() {
        return props;
    }


    /**
     * Sets the given property
     *
     * @param name
     *            the name of the property
     * @param value
     *            the value
     */
    public void setProperty(String name, String value) {
        props.put(name, value);
    }


    /**
     * Sets the properties to the given map.
     *
     * @param props
     */
    public void setProperties(Map<String, String> props) {
        this.props = props;
    }


    /**
     * Checks whether this extraction has the given property.
     *
     * @param name
     * @return true if this extraction has the given property
     */
    public boolean hasProperty(String name) {
        return props.containsKey(name);
    }


    /**
     * @param name
     * @return the value of the given property
     */
    public String getProperty(String name) {
        if (hasProperty(name)) {
            return props.get(name);
        } else {
            throw new IllegalArgumentException("Invalid property name: " + name);
        }
    }


    /**
     * @return the names of any properties that have been set
     */
    public Set<String> getPropertyNames() {
        return props.keySet();
    }


    /**
     * Returns a B/I/O encoding of this extraction as a list of strings. The
     * list has the same length as the underlying sentence. The tags used in the
     * B/I/O encoding are in the form B-FieldName0, I-FieldName0, ... for each
     * field name.
     *
     * @return the B/I/O encoding as a list of strings
     */
    public List<String> toBIOLayer() {
        int n = getSentence().getLength();
        ArrayList<String> result = new ArrayList<String>(n);
        for (int i = 0; i < n; i++)
            result.add("O");
        for (String fieldName : getFieldNames()) {
            Range r = getFieldRange(fieldName);
            int start = r.getStart();
            int end = r.getEnd();
            result.set(start, "B-" + fieldName);
            for (int j = start + 1; j < end; j++) {
                result.set(j, "I-" + fieldName);
            }
        }
        return result;
    }


    private void initFromRanges(ChunkedSentence sent, List<Range> fieldRanges,
            List<String> fieldNames) {


        if (fieldRanges.size() < 1) {
            throw new IllegalArgumentException("must have at least 1 field");
        }
        if (fieldNames.size() != fieldRanges.size()) {
            throw new IllegalArgumentException(
                    "length of ranges must equal length of fieldNames");
        }


        this.sent = sent;
        this.numFields = fieldRanges.size();
        this.fieldRanges = ImmutableList.copyOf(fieldRanges);


        List<ChunkedExtraction> tFields = new ArrayList<ChunkedExtraction>(
                numFields);
        this.namedFields = new HashMap<String, ChunkedExtraction>(numFields);


        for (int i = 0; i < numFields; i++) {
            Range range = fieldRanges.get(i);
            ChunkedExtraction field = new ChunkedExtraction(sent, range);
            String fieldName = fieldNames.get(i);
            tFields.add(field);
            namedFields.put(fieldName, field);
        }


        this.fields = ImmutableList.copyOf(tFields);
        this.fieldNames = ImmutableList.copyOf(fieldNames);
        this.props = new HashMap<String, String>();
    }


    private void initFromFields(List<ChunkedExtraction> fields,
            List<String> fieldNames) {


        List<Range> tFieldRanges = new ArrayList<Range>(fields.size());
        for (ChunkedExtraction field : fields) {
            tFieldRanges.add(field.getRange());
        }


        if (fields.size() < 1) {
            throw new IllegalArgumentException("must have at least 1 field");
        }


        for (int i = 0; i < fields.size(); i++) {
            ChunkedSentence sent1 = fields.get(i).getSentence();
            ChunkedSentence sent2 = fields.get((i + 1) % fields.size())
                    .getSentence();
            if (!sent1.equals(sent2)) {
                throw new IllegalArgumentException(
                        "fields must come from the same sentence");
            }
        }


        this.sent = fields.get(0).getSentence();
        this.numFields = fields.size();
        this.fieldRanges = ImmutableList.copyOf(tFieldRanges);
        this.fields = ImmutableList.copyOf(fields);
        this.fieldNames = ImmutableList.copyOf(fieldNames);


        this.namedFields = new HashMap<String, ChunkedExtraction>(
                this.numFields);
        for (int i = 0; i < this.numFields; i++) {
            this.namedFields.put(fieldNames.get(i), fields.get(i));
        }


        this.props = new HashMap<String, String>();


    }


    private static List<String> getDefaultFieldNames(int n) {
        List<String> result = new ArrayList<String>(n);
        for (int i = 0; i < n; i++)
            result.add("field" + i);
        return result;
    }


    @Override
    public int hashCode() {
        final int prime = 31;
        int result = 1;
        result = prime * result
                + ((fieldNames == null) ? 0 : fieldNames.hashCode());
        result = prime * result
                + ((fieldRanges == null) ? 0 : fieldRanges.hashCode());
        result = prime * result + ((fields == null) ? 0 : fields.hashCode());
        result = prime * result
                + ((namedFields == null) ? 0 : namedFields.hashCode());
        result = prime * result + numFields;
        result = prime * result + ((props == null) ? 0 : props.hashCode());
        result = prime * result + ((sent == null) ? 0 : sent.hashCode());
        return result;
    }


    @Override
    public boolean equals(Object obj) {
        if (this == obj)
            return true;
        if (obj == null)
            return false;
        if (getClass() != obj.getClass())
            return false;
        SpanExtraction other = (SpanExtraction) obj;
        if (fieldNames == null) {
            if (other.fieldNames != null)
                return false;
        } else if (!fieldNames.equals(other.fieldNames))
            return false;
        if (fieldRanges == null) {
            if (other.fieldRanges != null)
                return false;
        } else if (!fieldRanges.equals(other.fieldRanges))
            return false;
        if (fields == null) {
            if (other.fields != null)
                return false;
        } else if (!fields.equals(other.fields))
            return false;
        if (namedFields == null) {
            if (other.namedFields != null)
                return false;
        } else if (!namedFields.equals(other.namedFields))
            return false;
        if (numFields != other.numFields)
            return false;
        if (props == null) {
            if (other.props != null)
                return false;
        } else if (!props.equals(other.props))
            return false;
        if (sent == null) {
            if (other.sent != null)
                return false;
        } else if (!sent.equals(other.sent))
            return false;
        return true;
    }


}
Source Code of edu.washington.cs.knowitall.nlp.extraction.SpanExtraction

Related Classes of edu.washington.cs.knowitall.nlp.extraction.SpanExtraction