Package joshua.corpus.vocab

Examples of joshua.corpus.vocab.SymbolTable


    this.alignments = alignments;
    this.sourceSuffixArray = sourceSuffixArray;
    this.sampleSize = sampleSize;
    this.models = models;
   
    SymbolTable vocab = sourceSuffixArray.getVocabulary();
    this.nonterminalIDs = new int[]{vocab.addNonterminal(SymbolTable.X1_STRING), vocab.addNonterminal(SymbolTable.X2_STRING)};
  }
View Full Code Here


        targetSuffixArray, alignments,
        lexProbs, models, sampleSize,
        maxPhraseSpan, maxPhraseLength,
        minNonterminalSpan, maxNonterminalSpan);
   
    SymbolTable vocab = new Vocabulary();
   
    Corpus corpus = suffixArray.getCorpus();
   
    NGramLanguageModel largeLM = new LMGrammarJAVA(
        vocab,
View Full Code Here

public abstract class AbstractLexProbs implements LexicalProbabilities {

  /* See Javadoc for LexicalProbabilities#getTargetGivenSourceAlignments(Pattern,Pattern). */
  public AlignmentGrid getTargetGivenSourceAlignments(Pattern targetPattern, Pattern sourcePattern) {
   
    SymbolTable sourceVocab = getSourceVocab();
    SymbolTable targetVocab = getTargetVocab();
   
    StringBuilder alignmentPoints = new StringBuilder();
   
    for (IndexedInt indexedTarget : Lists.eachWithIndex(targetPattern.getWordIDs())) {
      final int targetWord = indexedTarget.getValue();
      final int targetIndex = indexedTarget.getIndex();
     
      if (targetVocab.isNonterminal(targetWord)) {
        //TODO Do something special here
       
      } else {
       
        float max = targetGivenSource(targetWord, null);
View Full Code Here

  }
 
  /* See Javadoc for LexicalProbabilities#getSourceGivenTargetAlignments(Pattern,Pattern). */
  public AlignmentGrid getSourceGivenTargetAlignments(Pattern sourcePattern, Pattern targetPattern) {
   
    SymbolTable sourceVocab = getSourceVocab();
    SymbolTable targetVocab = getTargetVocab();
   
    StringBuilder alignmentPoints = new StringBuilder();
   
    for (IndexedInt indexedSource : Lists.eachWithIndex(sourcePattern.getWordIDs())) {     
      int sourceWord = indexedSource.getValue();
      int sourceIndex = indexedSource.getIndex();
     
      if (sourceVocab.isNonterminal(sourceWord)) {
        //TODO Do something special here
       
      } else {
     
        float max = sourceGivenTarget(sourceWord, null);
        Integer bestTargetIndex = null;
       
        for (IndexedInt indexedTarget : Lists.eachWithIndex(targetPattern.getWordIDs())) {       
          int targetWord = indexedTarget.getValue();
          int targetIndex = indexedTarget.getIndex();
         
          if (! targetVocab.isNonterminal(targetWord)) {
            float score = this.sourceGivenTarget(sourceWord, targetWord);
            if (score > max) {
              max = score;
              bestTargetIndex = targetIndex;
            }
View Full Code Here

    String outVocabFile = args[1].trim();
    
   
    // Load the lm file so that the SRI toolkit will set up the map
    int lmOrder = 1;
    SymbolTable symbolTable = new SrilmSymbol(lmOrder);
    new LMGrammarSRILM((SrilmSymbol)symbolTable, lmOrder, lmFile);
   
   
    // Write the map to a temporary file
    File tmpFile = File.createTempFile("srilm", "out");
View Full Code Here

      System.out.println("num of args is "+ args.length);
      for(int i=0; i <args.length; i++)System.out.println("arg is: " + args[i]);
      System.exit(0);   
    }
    long start_time = System.currentTimeMillis();
    SymbolTable symbolTbl = new BuildinSymbol(null)
    boolean is_using_crf =  new Boolean(args[0].trim());
    HGDiscriminativeLearner.usingCRF=is_using_crf;
    String f_l_train_items=args[1].trim();
    String f_l_train_rules=args[2].trim();
    String f_l_orc_items=args[3].trim();
View Full Code Here

  }
 
 
  public static void main(String[] args) {
   
    SymbolTable symbolTable = new BuildinSymbol();
    //init symbol
   
    NULL_ALIGN_WRD_SYM_ID = symbolTable.addTerminal(NULL_ALIGN_WRD_SYM);
    NON_TERMINAL_TAG_SYM_ID = symbolTable.addNonterminal(NON_TERMINAL_TAG_SYM);
   
    //read weights files
    eweights_table = readWeightFile("C:\\data_disk\\java_work_space\\SyntaxMT\\phraseExtraction\\lex.f2e.gz");
    fweights_table = readWeightFile("C:\\data_disk\\java_work_space\\SyntaxMT\\phraseExtraction\\lex.e2f.gz");
   
View Full Code Here

    int num_sents=new Integer(args[2].trim());
    String f_config=args[3].trim();//be careful with the weights
   
    //set up models
    VariationalDecoderConfiguration.readConfigFile(f_config);
    SymbolTable symbolTbl = new BuildinSymbol(null)
    List<FeatureFunction> featFunctions = new ArrayList<FeatureFunction>();
    HashMap<VariationalNgramApproximator, FeatureTemplateBasedFF> approximatorMap = new HashMap<VariationalNgramApproximator, FeatureTemplateBasedFF> ();
    VariationalDecoderConfiguration.initializeModels(f_config, symbolTbl, featFunctions, approximatorMap);   
    double insideOutsideScalingFactor =  VariationalDecoderConfiguration.insideoutsideScalingFactor;       
   
View Full Code Here

 
    int baseline_lm_feat_id = 0;//???????
   
    int max_num_words =25;
   
    SymbolTable p_symbol = new BuildinSymbol(null);
    KBestExtractor kbest_extractor = new KBestExtractor(p_symbol, true, false, false, false,  false, true);//????????????
    ApproximateFilterHGByOneString filter = new ApproximateFilterHGByOneString(p_symbol,baseline_lm_feat_id,baseline_lm_order);
    StringSumInHG p_sumer = new StringSumInHG(p_symbol, kbest_extractor, filter);
   
    //#### process test set
View Full Code Here

  private final MatchedHierarchicalPhrases matchedPhrases;
 
  RootNode(PrefixTree tree, int incomingArcValue) {
    super(tree.parallelCorpus, 1);
    this.tree = tree;
    SymbolTable vocab = tree.vocab;
    this.matchedPhrases = HierarchicalPhrases.emptyList(vocab);
    Suffixes suffixArray = tree.suffixArray;
    if (suffixArray != null) {
      setBounds(0, suffixArray.size()-1);
    }
View Full Code Here

TOP

Related Classes of joshua.corpus.vocab.SymbolTable

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.