Package

Source Code of WVToolYaleOutput

import java.io.FileWriter;

import edu.udo.cs.wvtool.config.WVTConfiguration;
import edu.udo.cs.wvtool.config.WVTConfigurationFact;
import edu.udo.cs.wvtool.config.WVTConfigurationRule;
import edu.udo.cs.wvtool.generic.output.WordVectorWriter;
import edu.udo.cs.wvtool.generic.stemmer.DummyStemmer;
import edu.udo.cs.wvtool.generic.stemmer.LovinsStemmerWrapper;
import edu.udo.cs.wvtool.generic.stemmer.PorterStemmerWrapper;
import edu.udo.cs.wvtool.generic.stemmer.WVTStemmer;
import edu.udo.cs.wvtool.generic.vectorcreation.TFIDF;
import edu.udo.cs.wvtool.main.WVTDocumentInfo;
import edu.udo.cs.wvtool.main.WVTFileInputList;
import edu.udo.cs.wvtool.main.WVTool;
import edu.udo.cs.wvtool.util.WordList2AMLFile;
import edu.udo.cs.wvtool.wordlist.WVTWordList;

public class WVToolYaleOutput {

    public static void main(String[] args) throws Exception {

        // EXAMPLE HOW TO CALL THE PROGRAM FROM JAVA

        // Initialize the WVTool
        WVTool wvt = new WVTool(false);

        // Initialize the configuration
        WVTConfiguration config = new WVTConfiguration();

        final WVTStemmer dummyStemmer = new DummyStemmer();
        final WVTStemmer porterStemmer = new PorterStemmerWrapper();

        config.setConfigurationRule(WVTConfiguration.STEP_STEMMER, new WVTConfigurationRule() {
            public Object getMatchingComponent(WVTDocumentInfo d) {

                if (d.getContentLanguage().equals("english"))
                    return porterStemmer;
                else
                    return dummyStemmer;
            }
        });

        WVTStemmer stemmer = new LovinsStemmerWrapper();

        config.setConfigurationRule(WVTConfiguration.STEP_STEMMER, new WVTConfigurationFact(stemmer));

        // Initialize the input list with two classes
        WVTFileInputList list = new WVTFileInputList(2);

        // Add entries
        list.addEntry(new WVTDocumentInfo("../data/alt.atheism", "txt", "", "german", 0));
        list.addEntry(new WVTDocumentInfo("../data/soc.religion.christian", "txt", "", "english", 1));

        // Generate the word list

        WVTWordList wordList = wvt.createWordList(list, config);

        // Prune the word list
        wordList.pruneByFrequency(2, 5);

        // Store the aml file
        WordList2AMLFile.storeWordList(wordList, new FileWriter("test_wv.aml"), true, "wv.dat");
        // Create the word vectors

        // Set up an output filter (write sparse vectors to a file)
        FileWriter outFile = new FileWriter("wv.dat");
        WordVectorWriter wvw = new WordVectorWriter(outFile, true, true, true, 1);

        config.setConfigurationRule(WVTConfiguration.STEP_OUTPUT, new WVTConfigurationFact(wvw));

        config.setConfigurationRule(WVTConfiguration.STEP_VECTOR_CREATION, new WVTConfigurationFact(new TFIDF()));

        // Create the vectors
        wvt.createVectors(list, config, wordList);

        // Close the output file
        wvw.close();
        outFile.close();

      
    }
   
   
}
TOP

Related Classes of WVToolYaleOutput

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.