package edu.stanford.nlp.process;
import edu.stanford.nlp.ling.Word;
import edu.stanford.nlp.util.Generics;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.Set;
/**
* Simple stoplist class.
*
* @author Sepandar Kamvar
*/
public class StopList {
private Set<Word> wordSet;
/*
* Constructs a stoplist with very few stopwords.
*/
public StopList() {
wordSet = Generics.newHashSet();
addGenericWords();
}
/**
* Constructs a new stoplist from the contents of a file. It is
* assumed that the file contains stopwords, one on a line.
* The stopwords need not be in any order.
*/
public StopList(File list) {
wordSet = Generics.newHashSet();
try {
BufferedReader reader = new BufferedReader(new FileReader(list));
while (reader.ready()) {
wordSet.add(new Word(reader.readLine()));
}
} catch (IOException e) {
throw new RuntimeException(e);
//e.printStackTrace(System.err);
//addGenericWords();
}
}
/**
* Adds some extremely common words to the stoplist.
*/
private void addGenericWords() {
String[] genericWords = {"a", "an", "the", "and", "or", "but", "nor"};
for (int i = 1; i < 7; i++) {
wordSet.add(new Word(genericWords[i]));
}
}
/**
* Returns true if the word is in the stoplist.
*/
public boolean contains(Word word) {
return wordSet.contains(word);
}
/**
* Returns true if the word is in the stoplist.
*/
public boolean contains(String word) {
return wordSet.contains(new Word(word));
}
}