package edu.stanford.nlp.sentiment;
import java.util.List;
import edu.stanford.nlp.neural.rnn.RNNCoreAnnotations;
import edu.stanford.nlp.trees.MemoryTreebank;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.util.CollectionUtils;
import java.util.function.Predicate;
import edu.stanford.nlp.util.Generics;
* In the Sentiment dataset converted to tree form, the labels on the
* intermediate nodes are the sentiment scores and the leaves are the
* text of the sentence. This class provides routines to read a file
* of those trees and attach the sentiment score as the GoldLabel
* annotation.
* @author John Bauer
public class SentimentUtils {
private SentimentUtils() {} // static methods only
public static void attachGoldLabels(Tree tree) {
if (tree.isLeaf()) {
for (Tree child : tree.children()) {
// In the sentiment data set, the node labels are simply the gold
// class labels. There are no categories encoded.
RNNCoreAnnotations.setGoldClass(tree, Integer.valueOf(tree.label().value()));
* Given a file name, reads in those trees and returns them as a List
public static List<Tree> readTreesWithGoldLabels(String path) {
List<Tree> trees = Generics.newArrayList();
MemoryTreebank treebank = new MemoryTreebank("utf-8");
treebank.loadPath(path, null);
for (Tree tree : treebank) {
return trees;
static final Predicate<Tree> UNKNOWN_ROOT_FILTER = tree -> {
int gold = RNNCoreAnnotations.getGoldClass(tree);
return gold != -1;
public static List<Tree> filterUnknownRoots(List<Tree> trees) {
return CollectionUtils.filterAsList(trees, UNKNOWN_ROOT_FILTER);
public static String sentimentString(SentimentModel model, int sentiment) {
String[] classNames = model.op.classNames;
if (sentiment < 0 || sentiment > classNames.length) {
return "Unknown sentiment label " + sentiment;
return classNames[sentiment];