Package joshua.corpus.suffix_array

Examples of joshua.corpus.suffix_array.Pattern


  }

  /* See Javadoc for RuleExtractor class. */
  public List<Rule> extractRules(MatchedHierarchicalPhrases sourceHierarchicalPhrases) {

    Pattern sourcePattern = sourceHierarchicalPhrases.getPattern();
   
    if (logger.isLoggable(Level.FINE)) logger.fine("Extracting rules for source pattern: " + sourcePattern);
     
    Cache<Pattern,List<Rule>> cache = sourceSuffixArray.getCachedRules();
   
    if (cache.containsKey(sourcePattern)) {
      return cache.get(sourcePattern);
    } else {
     
      ArrayList<HierarchicalPhrase> translations = getTranslations(sourceHierarchicalPhrases);
     
      Map<Pattern,Integer> counts = new HashMap<Pattern,Integer>();
      for (Pattern translation : translations) {
        if (translation != null) {
          Integer count = counts.get(translation);
          if (null == count) {
            count = 1;
          } else {
            count++;
          }
          counts.put(translation, count);
        }
      }

      if (logger.isLoggable(Level.FINER)) { logger.finer(
          translations.size() + " actual translations of " +
          sourcePattern + " being stored.");
      }


      float p_e_given_f_denominator = translations.size();

      // We don't want to produce duplicate rules
      HashSet<HierarchicalPhrase> uniqueTranslations = new HashSet<HierarchicalPhrase>(translations);
     
      List<Rule> results = new ArrayList<Rule>(sourceHierarchicalPhrases.size());
     
      int sourcePatternCount = sourceHierarchicalPhrases.size();
      for (HierarchicalPhrase translation : uniqueTranslations) {
        float[] featureScores =
          calculateFeatureValues(
              sourcePattern,
              sourcePatternCount,
              translation,
              counts, p_e_given_f_denominator);

        Rule rule = new BilingualRule(
            SymbolTable.X,
            sourcePattern.getWordIDs(),
            translation.getWordIDs(),
            featureScores,
            translation.arity(),
            sourceSuffixArray.getVocabulary().addTerminal(JoshuaConfiguration.phrase_owner),
            0.0f,
View Full Code Here


   
    Chart chart; {
      //TODO: we should not use "(((" to decide whether it is a lattice input
      final boolean looksLikeLattice = segment.sentence().startsWith("(((");
      Lattice<Integer> inputLattice = null;
      Pattern sentence = null;
      if (looksLikeLattice) {
        inputLattice = Lattice.createFromString(segment.sentence(),
                  this.symbolTable);
        sentence = null; // TODO SA needs to accept lattices!
      } else {
        int[] intSentence = this.symbolTable.getIDs(segment.sentence());
        if (logger.isLoggable(Level.FINEST))
          logger.finest("Converted \"" + segment.sentence() + "\" into " + Arrays.toString(intSentence));
        inputLattice = Lattice.createLattice(intSentence);
        sentence = new Pattern(this.symbolTable, intSentence);
      }
      if (logger.isLoggable(Level.FINEST))
        logger.finest("Translating input lattice:\n" + inputLattice.toString());

      Grammar[] grammars = new Grammar[grammarFactories.size()];
View Full Code Here

   
    Grammar[] grammars = new Grammar[grammarFactories.size()];
    int i = 0;
    for (GrammarFactory factory : this.grammarFactories) {
      grammars[i] = factory.getGrammarForSentence(
          new Pattern(this.symbolTable, intSentence));
     
      // For batch grammar, we do not want to sort it every time
      if (! grammars[i].isSorted()) {
        grammars[i].sortGrammar(this.featureFunctions);
      }
View Full Code Here

//      root.setBounds(0, suffixArray.size()-1);
//    }
//    root.sourceHierarchicalPhrases = HierarchicalPhrases.emptyList(vocab);

    // Define epsilon to be an empty pattern
    epsilon = new Pattern(vocab);

   
    // 1: children(p_eps) <-- children(p_eps) U p_x

    if (maxNonterminals > 0) {  // Create and set up the X node that comes off of ROOT
View Full Code Here

     
      // 3: Add <f_i, i, i+1, p_eps> to queue
      queue.add(new Tuple(epsilon, i, i, root));
    }

    if (this.maxNonterminals > 0) {  Pattern xpattern = new Pattern(vocab,X);
     
      int start = START_OF_SENTENCE;
      if (!sentenceInitialX) start += 1;
   
      // 4: for i from 1 to I
      for (int i=start; i<=END_OF_SENTENCE; i++) {
        //if (logger.isLoggable(Level.FINEST)) logger.finest("Adding tuple (" + (i-1) + ","+(i)+","+root+",{"+X+","+intToString(sentence[i])+"})");
        if (logger.isLoggable(Level.FINEST)) logger.finest("Adding tuple (X," + (i-1) + ","+ i +","+xnode.toShortString(vocab) +")");
       
        // 5: Add <X f_i, i-1, i+1, p_x> to queue
        if (edgeXMayViolatePhraseSpan) {
          queue.add(new Tuple(xpattern, i, i, xnode))
        } else {
          queue.add(new Tuple(xpattern, i-1, i, xnode));
        }
      }
    }


    // 6: While queue is not empty do
    while (! queue.isEmpty()) {

      if (logger.isLoggable(Level.FINER)) {
        logger.finer("\n");
        if (logger.isLoggable(Level.FINEST)) logger.finest("CURRENT TREE:      " + root);
      }
     
      // 7: Pop <alpha, i, j, p_alphaBeta> from queue
      Tuple tuple = queue.remove();

      int i = tuple.spanStart;
      int j = tuple.spanEnd;
      Node prefixNode = tuple.prefixNode;
      Pattern prefixPattern = tuple.pattern;

//      if (prefixNode.objectID==329 //) {
//          || (prefixNode.objectID==28 && i==13 && j==17)) {
//        int x = -1;
//        x++;
//      }
     
      if (logger.isLoggable(Level.FINER)) logger.finer("Have tuple (" +prefixPattern+","+ i + ","+j+","+prefixNode.toShortString(vocab)+")");

      if (j <= END_OF_SENTENCE) {

        // 8: If p_alphaBetaF_i elementOf children(p_alphaBeta) then
        if (prefixNode.hasChild(sentence[j])) {

          if (logger.isLoggable(Level.FINER)) logger.finer("EXISTING node for \"" + sentence[j] + "\" from " + prefixNode.toShortString(vocab) + " to node " + prefixNode.getChild(sentence[j]).toShortString(vocab) + " with pattern " + prefixPattern);

          // child is p_alphaBetaF_j
          Node child = prefixNode.getChild(sentence[j]);
         
          // 9: If p_alphaBetaF_j is inactive then
          if (! child.active) {
           
            // 10: Continue to next item in queue
            continue;
           
            // 11: Else
          } else {
           
            // 12: EXTEND_QUEUE(alpha beta f_j, i, j, f_1^I)
            if (logger.isLoggable(Level.FINER)) {
              logger.finer("Calling EXTEND_QUEUE("+i+","+j+","+prefixPattern+","+prefixNode.toShortString(vocab));
              if (logger.isLoggable(Level.FINEST)) logger.finest("TREE BEFOR EXTEND: " + root);
            }
            extendQueue(queue, i, j, sentence, new Pattern(prefixPattern,sentence[j]), child);
            if (logger.isLoggable(Level.FINEST)) logger.finest("TREE AFTER EXTEND: " + root);
           
          }

        } else { // 13: Else

          // 14: children(alphaBeta) <-- children(alphaBeta) U p_alphaBetaF_j
          //     (Add new child node)
          if (logger.isLoggable(Level.FINER)) logger.finer("Adding new node to node " + prefixNode.toShortString(vocab));
          Node newNode = prefixNode.addChild(sentence[j]);
          if (logger.isLoggable(Level.FINER)) {
            String word = (suffixArray==null) ? ""+sentence[j] : suffixArray.getVocabulary().getWord(sentence[j]);
            logger.finer("Created new node " + newNode.toShortString(vocab) +" for \"" + word + "\" and \n  added it to " + prefixNode.toShortString(vocab));
          }


          // 15: p_beta <-- suffix_link(p_alpha_beta)
          //     suffixNode in this code is p_beta_f_j, not p_beta
          Node suffixNode = prefixNode.calculateSuffixLink(sentence[j]);

          if (logger.isLoggable(Level.FINEST)) {
            String oldSuffixLink = (newNode.suffixLink==null) ? "null" : "id"+newNode.suffixLink.objectID;
            String newSuffixLink = (suffixNode==null) ? "null" : "id"+suffixNode.objectID;
            logger.finest("Changing suffix link from " + oldSuffixLink + " to " + newSuffixLink + " for node " + newNode.toShortString(vocab) + " (prefix node " + prefixNode.toShortString(vocab) + " ) with token " + sentence[j]);
          }
         
          newNode.linkToSuffix( suffixNode );


          // 16: if p_beta_f_j is inactive then
          if (! suffixNode.active) {
           
            // 17: Mark p_alpha_beta_f_j inactive
            newNode.active = false; //Node.INACTIVE;
           
            // 18: else
          } else {

            Pattern extendedPattern = new Pattern(prefixPattern,sentence[j]);

            MatchedHierarchicalPhrases result = null;
           
            if (suffixArray != null) {
             
View Full Code Here

        int[] patternWords = pattern.getWordIDs();
       
        // 6: Q_alphaX <-- Q_alpha
        {
          SymbolTable vocab = (suffixArray==null) ? null : suffixArray.getVocabulary();
          Pattern xpattern = new Pattern(vocab, patternWords, X);
         
//          HierarchicalPhrases phrasesWithFinalX = new HierarchicalPhrases(xpattern, node.sourceHierarchicalPhrases);
          MatchedHierarchicalPhrases phrasesWithFinalX;
          if (suffixArray==null) {
            // This should only happen in certain unit tests
            logger.severe("This should only be encountered during unit testing!");
            if (node.sourceHierarchicalPhrases==null) {
              node.sourceHierarchicalPhrases = HierarchicalPhrases.emptyList((SymbolTable) null);
              node.sourcePattern = node.sourceHierarchicalPhrases.getPattern();
            }
            phrasesWithFinalX = node.getMatchedPhrases().copyWithFinalX();
          } else {
            Cache<Pattern,MatchedHierarchicalPhrases> cache = suffixArray.getCachedHierarchicalPhrases();
            if (cache.containsKey(xpattern)) {
              phrasesWithFinalX = cache.get(xpattern);
            } else {
              phrasesWithFinalX = node.getMatchedPhrases().copyWithFinalX();
              suffixArray.cacheMatchingPhrases(phrasesWithFinalX);
            }
          } 
         
          List<Rule> rules = (ruleExtractor==null) ?
                Collections.<Rule>emptyList() :
                ruleExtractor.extractRules(phrasesWithFinalX);
          //xNode.storeResults(phrasesWithFinalX, rules);
          storeResults(xNode, phrasesWithFinalX, rules);
        }
     
        if (logger.isLoggable(Level.FINEST)) logger.finest("Alpha pattern is " + pattern);

        // For efficiency, don't add any tuples to the queue whose patterns would exceed the max allowed number of tokens
        if (patternWords.length+2 <= maxPhraseLength) {
         
          int I = sentence.length;
          if (!sentenceFinalX) I -= 1;
         
          int min = (I<i+maxPhraseSpan) ? I : i+maxPhraseSpan-1;
          Pattern patternX = new Pattern(pattern, X);

          // 7: for k from j+1 to min(I, i+MaxPhraseLength) do
          for (int k=j+2; k<=min; k++) {

            // 8: Add <alpha f_j X, i, k, p_alphaX> to queue
View Full Code Here

 
  @Test(dependsOnMethods={"setup"})
  public void arity() {
    SymbolTable vocab = sourceCorpusArray.getVocabulary();
   
    Pattern     pattern = new Pattern(vocab, vocab.getIDs("de sesiones del parlamento europeo"));
    int[]       terminalSequenceStartIndices = {4};
    int[]       sentenceNumbers = {0};
   
    HierarchicalPhrases phrases =
      new HierarchicalPhrases(pattern, terminalSequenceStartIndices, sentenceNumbers);
View Full Code Here

  Pattern pattern;
 
  @Test
  public void basicPattern() {
   
    pattern = new Pattern(vocab, words);
   
    Assert.assertEquals(pattern.getWordIDs(), words);
    Assert.assertEquals(pattern.getVocab(), vocab);
    Assert.assertEquals(pattern.arity(), 2);
   
View Full Code Here

 
  @Test(dependsOnMethods = {"basicPattern"})
  public void extendedPattern() {
   
    Pattern extendedPattern = new Pattern(pattern, extra);
   
    Assert.assertEquals(extendedPattern.getWordIDs().length, extendedWords.length);
   
    for (int i=0; i<extendedWords.length; i++) {
      Assert.assertEquals(extendedPattern.getWordIDs()[i], extendedWords[i]);
    }
   
    Assert.assertEquals(extendedPattern.getVocab(), vocab);
    Assert.assertEquals(extendedPattern.arity(), 2);
     
  }
View Full Code Here

  @Test(dependsOnMethods = {"basicPattern"})
  public void copiedPattern() {
   
    Phrase phrase = pattern;
   
    Pattern copiedPattern = new Pattern(phrase);
   
    Assert.assertEquals(copiedPattern.getWordIDs().length, words.length);
    for (int i=0; i<words.length; i++) {
      Assert.assertEquals(copiedPattern.getWordIDs()[i], words[i]);
    }
    Assert.assertEquals(copiedPattern.getVocab(), vocab);
    Assert.assertEquals(copiedPattern.arity(), 2)
  }
View Full Code Here

TOP

Related Classes of joshua.corpus.suffix_array.Pattern

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.