package edu.stanford.nlp.tagger.io;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.trees.DiskTreebank;
import edu.stanford.nlp.trees.LabeledScoredTreeReaderFactory;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.Treebank;
import edu.stanford.nlp.trees.TreeNormalizer;
import edu.stanford.nlp.trees.TreeReaderFactory;
import edu.stanford.nlp.trees.TreeTransformer;
import java.util.function.Predicate;
public class TreeTaggedFileReader implements TaggedFileReader {
final Treebank treebank;
final String filename;
final TreeReaderFactory trf;
final TreeTransformer transformer;
final TreeNormalizer normalizer;
final Predicate<Tree> treeFilter;
final Iterator<Tree> treeIterator;
Tree next = null;
// int numSentences = 0;
public TreeTaggedFileReader(TaggedFileRecord record) {
filename = record.file;
trf = record.trf == null ? new LabeledScoredTreeReaderFactory() : record.trf;
transformer = record.treeTransformer;
normalizer = record.treeNormalizer;
treeFilter = record.treeFilter;
treebank = new DiskTreebank(trf, record.encoding);
if (record.treeRange != null) {
treebank.loadPath(filename, record.treeRange);
} else {
treebank.loadPath(filename);
}
treeIterator = treebank.iterator();
findNext();
}
public Iterator<List<TaggedWord>> iterator() { return this; }
public String filename() { return filename; }
public boolean hasNext() { return next != null; }
public List<TaggedWord> next() {
if (next == null) {
throw new NoSuchElementException("Iterator exhausted.");
}
Tree t = next;
if (normalizer != null) {
t = normalizer.normalizeWholeTree(t, t.treeFactory());
}
if (transformer != null) {
t = t.transform(transformer);
}
findNext();
return t.taggedYield();
}
/**
* Skips ahead in the iterator to the next non-filtered tree.
*/
private void findNext() {
while (treeIterator.hasNext()) {
next = treeIterator.next();
if (treeFilter == null || treeFilter.test(next)) {
return;
}
}
next = null;
}
public void remove() { throw new UnsupportedOperationException(); }
}