Examples of Vocabulary

com.github.pmerienne.trident.ml.nlp.Vocabulary
com.luxoft.dnepr.courses.regular.unit14.model.Vocabulary
joshua.corpus.vocab.Vocabulary
Vocabulary is the class that keeps track of the unique words that occur in a corpus of text for a particular language. It assigns integer IDs to Words, which is useful when we are creating suffix arrays or doing similar things. @author Chris Callison-Burch @since 8 February 2005 @author Lane Schwartz @version $LastChangedDate:2008-07-30 17:15:52 -0400 (Wed, 30 Jul 2008) $
kea.vocab.Vocabulary
maui.vocab.Vocabulary
Builds an index with the content of the controlled vocabulary. Accepts vocabularies as rdf files (SKOS format) and in plain text format: vocabulary_name.en (with "ID TERM" per line) - descriptors & non-descriptors vocabulary_name.use (with "ID_NON-DESCR \t ID_DESCRIPTOR" per line) vocabulary_name.rel (with "ID \t RELATED_ID1 RELATED_ID2 ... " per line) See KEA's homepage for more details. @author Olena Medelyan
net.sourceforge.align.model.vocabulary.Vocabulary
Represents a vocabulary mapping words to identifiers. @author Jarek Lipski (loomchild)
org.antlr.v4.runtime.Vocabulary
This interface provides information about the vocabulary used by a recognizer. @see Recognizer#getVocabulary() @author Sam Harwell
org.apache.sis.util.resources.Vocabulary
Locale-dependent resources for single words or short sentences. @author Martin Desruisseaux (IRD, Geomatys) @since 0.3 (derived from geotk-2.2) @version 0.3 @module
org.deri.grefine.rdf.vocab.Vocabulary
org.geotools.resources.i18n.Vocabulary
Base class for locale-dependent resources. Instances of this class should never been created directly. Use the factory method {@link #getResources}or use static convenience methods instead. @since 2.2 @source $URL$ @version $Id$ @author Martin Desruisseaux (IRD)
syntaxLearner.corpus.Vocabulary
@author Omer ShapiraThis class is meant to keep track of vocabulary operations, including a data structure for words, hash functions, word affinity in a language, and other helper functions.

Examples of joshua.corpus.vocab.Vocabulary

   * Constructs an empty corpus.
   * <p>
   * NOTE: Primarily needed for Externalizable interface.
   */
  public CorpusArray() {
    super(new Vocabulary());
//    this.symbolTable = new Vocabulary();
    this.sentences = new int[]{};
    this.corpus = new int[]{};
  }

View Full Code Here

Examples of joshua.corpus.vocab.Vocabulary

    String corpusFileName = args[0];
    String binaryVocabFilename = args[1];
    String binaryCorpusFilename = args[2];
    String charset = (args.length > 3) ? args[3] : "UTF-8";
    
    Vocabulary symbolTable = new Vocabulary();
    int[] lengths = Vocabulary.initializeVocabulary(corpusFileName, symbolTable, true);
    
    CorpusArray corpusArray = SuffixArrayFactory.createCorpusArray(corpusFileName, symbolTable, lengths[0], lengths[1]);
    
    corpusArray.write(binaryCorpusFilename, binaryVocabFilename, charset);

View Full Code Here

Examples of joshua.corpus.vocab.Vocabulary

  private int[]      words;
  
  
  public BasicPhrase(byte language, String sentence) {
    this.language   = language;
    this.vocabulary = new Vocabulary();
    this.words = splitSentence(sentence, vocabulary);
  }

View Full Code Here

Examples of joshua.corpus.vocab.Vocabulary

    int lastSentence = Integer.parseInt(argv[3]);
    HashMap<Integer,Integer> chosenSentences = new HashMap<Integer,Integer>();
    for (int i = firstSentence; i < lastSentence; i++) {
      chosenSentences.put(i, i);
    }
    Vocabulary vocab = new Vocabulary();
    DiskHyperGraph dhg = new DiskHyperGraph(vocab, 0, true, null);
    dhg.initRead(itemsFile, rulesFile, chosenSentences);
    JungHyperGraph hg = new JungHyperGraph(dhg.readHyperGraph(), vocab);
    JFrame frame = new JFrame("Joshua Hypergraph");
    frame.getContentPane().add(new HyperGraphViewer(hg, vocab));

View Full Code Here

Examples of joshua.corpus.vocab.Vocabulary

    int lastSentence = Integer.parseInt(argv[3]);
    HashMap<Integer,Integer> chosenSentences = new HashMap<Integer,Integer>();
    for (int i = firstSentence; i < lastSentence; i++) {
      chosenSentences.put(i, i);
    }
    Vocabulary vocab = new Vocabulary();
    DiskHyperGraph dhg = new DiskHyperGraph(vocab, 0, true, null);
    dhg.initRead(itemsFile, rulesFile, chosenSentences);
    JungHyperGraph hg = new JungHyperGraph(dhg.readHyperGraph(), vocab);
    return;
  }

View Full Code Here

Examples of joshua.corpus.vocab.Vocabulary

          String binaryTargetFileName = joshDirName + File.separator + "target.corpus";
//          String binaryTargetSuffixesFileName = joshDirName + File.separator + "target.suffixes";
          String binaryAlignmentFileName = joshDirName + File.separator + "alignment.grids";


          logger.fine("Loading vocabulary...");
          Vocabulary commonVocab = new Vocabulary();
          ObjectInput in = BinaryIn.vocabulary(binaryVocabFileName);
          commonVocab.readExternal(in);


          logger.fine("Loading source corpus...");
          Corpus sourceCorpus = new MemoryMappedCorpusArray(commonVocab, binarySourceFileName);


          logger.fine("Loading source suffix array...");

View Full Code Here

Examples of joshua.corpus.vocab.Vocabulary

  //===============================================================
  
  public static void main(String[] args) throws IOException, ClassNotFoundException {




    Vocabulary symbolTable;
    Corpus corpusArray;
    Suffixes suffixArray;
    FrequentPhrases frequentPhrases;


    if (args.length == 1) {


      String corpusFileName = args[0];


      logger.info("Constructing vocabulary from file " + corpusFileName);
      symbolTable = new Vocabulary();
      int[] lengths = Vocabulary.initializeVocabulary(corpusFileName, symbolTable, true);


      logger.info("Constructing corpus array from file " + corpusFileName);
      corpusArray = SuffixArrayFactory.createCorpusArray(corpusFileName, symbolTable, lengths[0], lengths[1]);


      logger.info("Constructing suffix array from file " + corpusFileName);
      suffixArray = new SuffixArray(corpusArray, Cache.DEFAULT_CAPACITY);


    } else if (args.length == 3) {


      String binarySourceVocabFileName = args[0];
      String binaryCorpusFileName = args[1];
      String binarySuffixArrayFileName = args[2];


      if (logger.isLoggable(Level.INFO)) logger.info("Constructing source language vocabulary from binary file " + binarySourceVocabFileName);
      ObjectInput in = BinaryIn.vocabulary(binarySourceVocabFileName);
      symbolTable = new Vocabulary();
      symbolTable.readExternal(in);


      logger.info("Constructing corpus array from file " + binaryCorpusFileName);
      if (logger.isLoggable(Level.INFO)) logger.info("Constructing memory mapped source language corpus array.");
      corpusArray = new MemoryMappedCorpusArray(symbolTable, binaryCorpusFileName);

View Full Code Here

Examples of joshua.corpus.vocab.Vocabulary

      for (String sentence : to_be_or_not_to_be) {
        String[] array = sentence.split("\\s+");
        Arrays.sort(array);
        for (String s : array) { set.add(s); }
      }
      symbolTableToBe = new Vocabulary(set);
      int[] lengths = Vocabulary.initializeVocabulary(corpusFileName, new Vocabulary(), true);


      logger.fine("Constructing corpus array from file " + corpusFileName);
      corpusToBe = SuffixArrayFactory.createCorpusArray(corpusFileName, symbolTableToBe, lengths[0], lengths[1]);


      logger.fine("Constructing suffix array from file " + corpusFileName);

View Full Code Here

Examples of joshua.corpus.vocab.Vocabulary

        sourcePrintStream.println(sentence);
      }
      sourcePrintStream.close();
      corpusFileName = sourceFile.getAbsolutePath();
      
      Vocabulary symbolTable;
      
      logger.fine("Constructing vocabulary from file " + corpusFileName);
      ArrayList<String> words = new ArrayList<String>();
      for (String sentence : sentences) {
        String[] array = sentence.split("\\s+");
        for (String s : array) { 
          if (! words.contains(s)) {
            words.add(s); 
          }
        }
      }
      Collections.sort(words);
      LinkedHashSet<String> set = new LinkedHashSet<String>(words);
      symbolTable = new Vocabulary(set);
      int[] lengths = Vocabulary.initializeVocabulary(corpusFileName, new Vocabulary(), true);


      logger.fine("Constructing corpus array from file " + corpusFileName);
      corpusArray = SuffixArrayFactory.createCorpusArray(corpusFileName, symbolTable, lengths[0], lengths[1]);


      logger.fine("Constructing suffix array from file " + corpusFileName);

View Full Code Here

Examples of kea.vocab.Vocabulary

    m_DisallowInternalPeriods = disallow;
  }




  public void loadThesaurus(Stemmer st, Stopwords sw) {
    m_Vocabulary = new Vocabulary(m_vocabulary,m_vocabularyFormat, m_documentLanguage);


    m_Vocabulary.setStemmer(st);
    m_Vocabulary.setStopwords(sw);
    m_Vocabulary.initialize();
    try {

View Full Code Here

0 1 2 3 4 5

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.