Examples of net.sourceforge.align.model.vocabulary.Vocabulary

Package net.sourceforge.align.model.vocabulary

Examples of net.sourceforge.align.model.vocabulary.Vocabulary

net.sourceforge.align.model.vocabulary.Vocabulary
Represents a vocabulary mapping words to identifiers. @author Jarek Lipski (loomchild)

   * Tests translation model parsing.
   */
  @Test
  public void testParse() {
    StringReader reader = new StringReader(TRANSLATION_MODEL);
    Vocabulary sourceVocabulary = new Vocabulary();
    Vocabulary targetVocabulary = new Vocabulary();
    TranslationModel translationModel = TranslationModelUtil.parse(reader, 
        sourceVocabulary, targetVocabulary);
    SourceData sourceData;
    sourceData = translationModel.get(sourceVocabulary.getWid("a"));
    assertEquals(1, sourceData.getTranslationList().size());
    assertEquals(0.0, sourceData.getTranslationProbability(targetVocabulary.getWid("A")), 0.0001);
    assertEquals(1.0, sourceData.getTranslationProbability(targetVocabulary.getWid("D")), 0.0001);
    sourceData = translationModel.get(sourceVocabulary.getWid("b"));
    assertEquals(2, sourceData.getTranslationList().size());
    assertEquals(0.25, sourceData.getTranslationProbability(targetVocabulary.getWid("C")), 0.0001);
    assertEquals(0.75, sourceData.getTranslationProbability(targetVocabulary.getWid("D")), 0.0001);
    sourceData = translationModel.get(sourceVocabulary.getWid("c"));
    assertEquals(1, sourceData.getTranslationList().size());
    assertEquals(1.0, sourceData.getTranslationProbability(targetVocabulary.getWid("A")), 0.0001);
    sourceData = translationModel.get(100);
    assertEquals(0, sourceData.getTranslationList().size());
  }

View Full Code Here

      throw new IllegalArgumentException("Reference corpus cannot be empty");
    }
    
    this.splitAlgorithm = splitAlgorithm;
    
    sourceVocabulary = new Vocabulary();
    targetVocabulary = new Vocabulary();
    List<List<Integer>> sourceWidList = new ArrayList<List<Integer>>();
    List<List<Integer>> targetWidList = new ArrayList<List<Integer>>();


    VocabularyUtil.tokenize(splitAlgorithm, alignmentList, sourceVocabulary, 
        targetVocabulary, sourceWidList, targetWidList);

View Full Code Here

   */
  private List<Alignment> unifyRareWords(List<Alignment> alignmentList) {
    
    SplitAlgorithm splitAlgorithm = DEFAULT_TOKENIZE_ALGORITHM;


    Vocabulary sourceVocabulary = new Vocabulary();
    Vocabulary targetVocabulary = new Vocabulary();
    List<List<Integer>> sourceWidList = new ArrayList<List<Integer>>(); 
    List<List<Integer>> targetWidList = new ArrayList<List<Integer>>();


    VocabularyUtil.tokenize(splitAlgorithm, alignmentList, 
        sourceVocabulary, targetVocabulary,

View Full Code Here

    int maxWordCount = createInt(commandLine, "max-word-count", 
        DEFAULT_MAX_WORD_COUNT);
    int minOccurenceCount = createInt(commandLine, "min-occurrence-count", 
        DEFAULT_MIN_OCCURRENCE_COUNT);


    Vocabulary sourceVocabulary = new Vocabulary();
    Vocabulary targetVocabulary = new Vocabulary();
    List<List<Integer>> sourceWidList = new ArrayList<List<Integer>>(); 
    List<List<Integer>> targetWidList = new ArrayList<List<Integer>>();


    VocabularyUtil.tokenize(splitAlgorithm, alignmentList, 
        sourceVocabulary, targetVocabulary,

View Full Code Here

      //Prosty algorytm tokenizujący dlatego że wejście powinno być już 
      //stokenizowane - tokeny rozdzielone spacją.
      //TODO: Czy dobrze? nie może być tokenów ze spacją w środku.
      SplitAlgorithm splitAlgorithm = 
        VocabularyUtil.DEFAULT_TOKENIZE_ALGORITHM;
      Vocabulary sourceVocabulary = new Vocabulary();
      Vocabulary targetVocabulary = new Vocabulary();
      
      Parser parser = new AlParser(getIn());
      List<Alignment> alignmentList = parser.parse();


      List<List<Integer>> sourceWidList = new ArrayList<List<Integer>>();

View Full Code Here

        TranslationModelUtil.DEFAULT_TRAIN_ITERATION_COUNT);
    String translationCorpus = commandLine.getOptionValue('t');
    String languageModels = commandLine.getOptionValue("x");
    String transModel = commandLine.getOptionValue("y");
    
    Vocabulary sourceVocabulary = new Vocabulary();
    Vocabulary targetVocabulary = new Vocabulary();
    List<List<Integer>> sourceWidList = new ArrayList<List<Integer>>();
    List<List<Integer>> targetWidList = new ArrayList<List<Integer>>();


    if (translationCorpus != null) {
      List<Alignment> translationAlignmentList = loadAlignmentList(translationCorpus);

View Full Code Here

TOP

Related Classes of net.sourceforge.align.model.vocabulary.Vocabulary

net.sourceforge.align.calculator.content.TranslationCalculator

net.sourceforge.align.filter.macro.MooreMacro

net.sourceforge.align.model.translation.TranslationModelUtilTest

net.sourceforge.align.ui.console.command.AlignCommand

net.sourceforge.align.ui.console.command.ModelCommand

net.sourceforge.align.ui.console.command.ModifyCommand

java.io.PrintWriter

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.