Examples of Vocabulary


Examples of maui.vocab.Vocabulary

    try {

      if (debugMode) {
        System.err.println("--- Loading the vocabulary...");
      }
      vocabulary = new Vocabulary(vocabularyName, vocabularyFormat);
      vocabulary.setStemmer(stemmer);
      vocabulary.setStopwords(stopwords);
      vocabulary.setDebug(debugMode);
      vocabulary.initialize();
    } catch (Exception e) {
View Full Code Here

Examples of net.sourceforge.align.model.vocabulary.Vocabulary

   * Tests translation model parsing.
   */
  @Test
  public void testParse() {
    StringReader reader = new StringReader(TRANSLATION_MODEL);
    Vocabulary sourceVocabulary = new Vocabulary();
    Vocabulary targetVocabulary = new Vocabulary();
    TranslationModel translationModel = TranslationModelUtil.parse(reader,
        sourceVocabulary, targetVocabulary);
    SourceData sourceData;
    sourceData = translationModel.get(sourceVocabulary.getWid("a"));
    assertEquals(1, sourceData.getTranslationList().size());
    assertEquals(0.0, sourceData.getTranslationProbability(targetVocabulary.getWid("A")), 0.0001);
    assertEquals(1.0, sourceData.getTranslationProbability(targetVocabulary.getWid("D")), 0.0001);
    sourceData = translationModel.get(sourceVocabulary.getWid("b"));
    assertEquals(2, sourceData.getTranslationList().size());
    assertEquals(0.25, sourceData.getTranslationProbability(targetVocabulary.getWid("C")), 0.0001);
    assertEquals(0.75, sourceData.getTranslationProbability(targetVocabulary.getWid("D")), 0.0001);
    sourceData = translationModel.get(sourceVocabulary.getWid("c"));
    assertEquals(1, sourceData.getTranslationList().size());
    assertEquals(1.0, sourceData.getTranslationProbability(targetVocabulary.getWid("A")), 0.0001);
    sourceData = translationModel.get(100);
    assertEquals(0, sourceData.getTranslationList().size());
  }
View Full Code Here

Examples of net.sourceforge.align.model.vocabulary.Vocabulary

      throw new IllegalArgumentException("Reference corpus cannot be empty");
    }
   
    this.splitAlgorithm = splitAlgorithm;
   
    sourceVocabulary = new Vocabulary();
    targetVocabulary = new Vocabulary();
    List<List<Integer>> sourceWidList = new ArrayList<List<Integer>>();
    List<List<Integer>> targetWidList = new ArrayList<List<Integer>>();

    VocabularyUtil.tokenize(splitAlgorithm, alignmentList, sourceVocabulary,
        targetVocabulary, sourceWidList, targetWidList);
View Full Code Here

Examples of net.sourceforge.align.model.vocabulary.Vocabulary

   */
  private List<Alignment> unifyRareWords(List<Alignment> alignmentList) {
   
    SplitAlgorithm splitAlgorithm = DEFAULT_TOKENIZE_ALGORITHM;

    Vocabulary sourceVocabulary = new Vocabulary();
    Vocabulary targetVocabulary = new Vocabulary();
    List<List<Integer>> sourceWidList = new ArrayList<List<Integer>>();
    List<List<Integer>> targetWidList = new ArrayList<List<Integer>>();

    VocabularyUtil.tokenize(splitAlgorithm, alignmentList,
        sourceVocabulary, targetVocabulary,
View Full Code Here

Examples of net.sourceforge.align.model.vocabulary.Vocabulary

    int maxWordCount = createInt(commandLine, "max-word-count",
        DEFAULT_MAX_WORD_COUNT);
    int minOccurenceCount = createInt(commandLine, "min-occurrence-count",
        DEFAULT_MIN_OCCURRENCE_COUNT);

    Vocabulary sourceVocabulary = new Vocabulary();
    Vocabulary targetVocabulary = new Vocabulary();
    List<List<Integer>> sourceWidList = new ArrayList<List<Integer>>();
    List<List<Integer>> targetWidList = new ArrayList<List<Integer>>();

    VocabularyUtil.tokenize(splitAlgorithm, alignmentList,
        sourceVocabulary, targetVocabulary,
View Full Code Here

Examples of net.sourceforge.align.model.vocabulary.Vocabulary

      //Prosty algorytm tokenizujący dlatego że wejście powinno być już
      //stokenizowane - tokeny rozdzielone spacją.
      //TODO: Czy dobrze? nie może być tokenów ze spacją w środku.
      SplitAlgorithm splitAlgorithm =
        VocabularyUtil.DEFAULT_TOKENIZE_ALGORITHM;
      Vocabulary sourceVocabulary = new Vocabulary();
      Vocabulary targetVocabulary = new Vocabulary();
     
      Parser parser = new AlParser(getIn());
      List<Alignment> alignmentList = parser.parse();

      List<List<Integer>> sourceWidList = new ArrayList<List<Integer>>();
View Full Code Here

Examples of net.sourceforge.align.model.vocabulary.Vocabulary

        TranslationModelUtil.DEFAULT_TRAIN_ITERATION_COUNT);
    String translationCorpus = commandLine.getOptionValue('t');
    String languageModels = commandLine.getOptionValue("x");
    String transModel = commandLine.getOptionValue("y");
   
    Vocabulary sourceVocabulary = new Vocabulary();
    Vocabulary targetVocabulary = new Vocabulary();
    List<List<Integer>> sourceWidList = new ArrayList<List<Integer>>();
    List<List<Integer>> targetWidList = new ArrayList<List<Integer>>();

    if (translationCorpus != null) {
      List<Alignment> translationAlignmentList = loadAlignmentList(translationCorpus);
View Full Code Here

Examples of org.antlr.v4.runtime.Vocabulary

  public String getTokenName(int t) {
    if (t == Token.EOF) {
      return "EOF";
    }

    Vocabulary vocabulary = parser != null ? parser.getVocabulary() : VocabularyImpl.EMPTY_VOCABULARY;
    String displayName = vocabulary.getDisplayName(t);
    if (displayName.equals(Integer.toString(t))) {
      return displayName;
    }

    return displayName + "<" + t + ">";
View Full Code Here

Examples of org.antlr.v4.runtime.Vocabulary

    String[] tokenNames = {
      "<INVALID>",
      "TOKEN_REF", "RULE_REF", "'//'", "'/'", "'*'", "'!'", "ID", "STRING"
    };

    Vocabulary vocabulary = VocabularyImpl.fromTokenNames(tokenNames);
    Assert.assertNotNull(vocabulary);
    Assert.assertEquals("EOF", vocabulary.getSymbolicName(Token.EOF));
    for (int i = 0; i < tokenNames.length; i++) {
      Assert.assertEquals(tokenNames[i], vocabulary.getDisplayName(i));

      if (tokenNames[i].startsWith("'")) {
        Assert.assertEquals(tokenNames[i], vocabulary.getLiteralName(i));
        Assert.assertNull(vocabulary.getSymbolicName(i));
      }
      else if (Character.isUpperCase(tokenNames[i].charAt(0))) {
        Assert.assertNull(vocabulary.getLiteralName(i));
        Assert.assertEquals(tokenNames[i], vocabulary.getSymbolicName(i));
      }
      else {
        Assert.assertNull(vocabulary.getLiteralName(i));
        Assert.assertNull(vocabulary.getSymbolicName(i));
      }
    }
  }
View Full Code Here

Examples of org.apache.sis.util.resources.Vocabulary

        switch (borderWidth) {
            case 1: horizontalLine = '─'; separator += "│ "; break;
            case 2: horizontalLine = '═'; separator += "║ "; break;
        }
        final TableAppender table = new TableAppender(toAppendTo, separator);
        final Vocabulary resources = Vocabulary.getResources(headerLocale);
        /*
         * If there is a header for at least one statistics, write the full headers row.
         */
        if (horizontalLine != 0) {
            table.nextLine(horizontalLine);
        }
        if (showHeaders) {
            table.nextColumn();
            for (final String header : headers) {
                if (header != null) {
                    table.append(header);
                    table.setCellAlignment(TableAppender.ALIGN_CENTER);
                }
                table.nextColumn();
            }
            table.append(lineSeparator);
            if (horizontalLine != 0) {
                table.nextLine(horizontalLine);
            }
        }
        /*
         * Initialize the NumberFormat for formatting integers without scientific notation.
         * This is necessary since the format may have been modified by a previous execution
         * of this method.
         */
        final Format format = getFormat(Double.class);
        if (format instanceof DecimalFormat) {
            ((DecimalFormat) format).applyPattern("#0"); // Also disable scientific notation.
        } else if (format instanceof NumberFormat) {
            setFractionDigits((NumberFormat) format, 0);
        }
        /*
         * Iterates over the rows to format (count, minimum, maximum, mean, RMS, standard deviation),
         * then iterate over columns (statistics on sample values, on the first derivatives, etc.)
         * The NumberFormat configuration may be different for each column, but we can skip many
         * reconfiguration in the common case where there is only one column.
         */
        boolean needsConfigure = false;
        for (int i=0; i<KEYS.length; i++) {
            switch (i) {
                case 1: if (!showNaNCount) continue; else break;
                // Case 0 and 1 use the above configuration for integers.
                // Case 2 unconditionally needs a reconfiguration for floating point values.
                // Case 3 and others need reconfiguration only if there is more than one column.
                case 2: needsConfigure = true; break;
                case 3: needsConfigure = (stats[0].differences() != null); break;
            }
            table.setCellAlignment(TableAppender.ALIGN_LEFT);
            table.append(resources.getString(KEYS[i])).append(':');
            for (final Statistics s : stats) {
                final Number value;
                switch (i) {
                    case 0:  value = s.count();    break;
                    case 1:  value = s.countNaN(); break;
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.