Package net.sourceforge.align.model.vocabulary

Examples of net.sourceforge.align.model.vocabulary.Vocabulary


   * Tests translation model parsing.
   */
  @Test
  public void testParse() {
    StringReader reader = new StringReader(TRANSLATION_MODEL);
    Vocabulary sourceVocabulary = new Vocabulary();
    Vocabulary targetVocabulary = new Vocabulary();
    TranslationModel translationModel = TranslationModelUtil.parse(reader,
        sourceVocabulary, targetVocabulary);
    SourceData sourceData;
    sourceData = translationModel.get(sourceVocabulary.getWid("a"));
    assertEquals(1, sourceData.getTranslationList().size());
    assertEquals(0.0, sourceData.getTranslationProbability(targetVocabulary.getWid("A")), 0.0001);
    assertEquals(1.0, sourceData.getTranslationProbability(targetVocabulary.getWid("D")), 0.0001);
    sourceData = translationModel.get(sourceVocabulary.getWid("b"));
    assertEquals(2, sourceData.getTranslationList().size());
    assertEquals(0.25, sourceData.getTranslationProbability(targetVocabulary.getWid("C")), 0.0001);
    assertEquals(0.75, sourceData.getTranslationProbability(targetVocabulary.getWid("D")), 0.0001);
    sourceData = translationModel.get(sourceVocabulary.getWid("c"));
    assertEquals(1, sourceData.getTranslationList().size());
    assertEquals(1.0, sourceData.getTranslationProbability(targetVocabulary.getWid("A")), 0.0001);
    sourceData = translationModel.get(100);
    assertEquals(0, sourceData.getTranslationList().size());
  }
View Full Code Here


      throw new IllegalArgumentException("Reference corpus cannot be empty");
    }
   
    this.splitAlgorithm = splitAlgorithm;
   
    sourceVocabulary = new Vocabulary();
    targetVocabulary = new Vocabulary();
    List<List<Integer>> sourceWidList = new ArrayList<List<Integer>>();
    List<List<Integer>> targetWidList = new ArrayList<List<Integer>>();

    VocabularyUtil.tokenize(splitAlgorithm, alignmentList, sourceVocabulary,
        targetVocabulary, sourceWidList, targetWidList);
View Full Code Here

   */
  private List<Alignment> unifyRareWords(List<Alignment> alignmentList) {
   
    SplitAlgorithm splitAlgorithm = DEFAULT_TOKENIZE_ALGORITHM;

    Vocabulary sourceVocabulary = new Vocabulary();
    Vocabulary targetVocabulary = new Vocabulary();
    List<List<Integer>> sourceWidList = new ArrayList<List<Integer>>();
    List<List<Integer>> targetWidList = new ArrayList<List<Integer>>();

    VocabularyUtil.tokenize(splitAlgorithm, alignmentList,
        sourceVocabulary, targetVocabulary,
View Full Code Here

    int maxWordCount = createInt(commandLine, "max-word-count",
        DEFAULT_MAX_WORD_COUNT);
    int minOccurenceCount = createInt(commandLine, "min-occurrence-count",
        DEFAULT_MIN_OCCURRENCE_COUNT);

    Vocabulary sourceVocabulary = new Vocabulary();
    Vocabulary targetVocabulary = new Vocabulary();
    List<List<Integer>> sourceWidList = new ArrayList<List<Integer>>();
    List<List<Integer>> targetWidList = new ArrayList<List<Integer>>();

    VocabularyUtil.tokenize(splitAlgorithm, alignmentList,
        sourceVocabulary, targetVocabulary,
View Full Code Here

      //Prosty algorytm tokenizujący dlatego że wejście powinno być już
      //stokenizowane - tokeny rozdzielone spacją.
      //TODO: Czy dobrze? nie może być tokenów ze spacją w środku.
      SplitAlgorithm splitAlgorithm =
        VocabularyUtil.DEFAULT_TOKENIZE_ALGORITHM;
      Vocabulary sourceVocabulary = new Vocabulary();
      Vocabulary targetVocabulary = new Vocabulary();
     
      Parser parser = new AlParser(getIn());
      List<Alignment> alignmentList = parser.parse();

      List<List<Integer>> sourceWidList = new ArrayList<List<Integer>>();
View Full Code Here

        TranslationModelUtil.DEFAULT_TRAIN_ITERATION_COUNT);
    String translationCorpus = commandLine.getOptionValue('t');
    String languageModels = commandLine.getOptionValue("x");
    String transModel = commandLine.getOptionValue("y");
   
    Vocabulary sourceVocabulary = new Vocabulary();
    Vocabulary targetVocabulary = new Vocabulary();
    List<List<Integer>> sourceWidList = new ArrayList<List<Integer>>();
    List<List<Integer>> targetWidList = new ArrayList<List<Integer>>();

    if (translationCorpus != null) {
      List<Alignment> translationAlignmentList = loadAlignmentList(translationCorpus);
View Full Code Here

TOP

Related Classes of net.sourceforge.align.model.vocabulary.Vocabulary

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.