Package joshua.corpus

Examples of joshua.corpus.MatchedHierarchicalPhrases


            // 18: else
          } else {

            Pattern extendedPattern = new Pattern(prefixPattern,sentence[j]);

            MatchedHierarchicalPhrases result = null;
           
            if (suffixArray != null) {
             
              // 19: Q_alpha-beta-f_j <-- query(alpha-beta-f_j, Q_alpha-beta, Q_beta-f_j)
              result = query(extendedPattern, newNode, prefixNode, suffixNode);
             
            }

            // 20: if Q_alpha_beta_f_j = ∅ (meaning that no results were found for this query)
            //if (result != null && result.isEmpty()) {// && prefixNode != xnode) {
            if (result != null && result.isEmpty()) {
             
              // 21: Mark p_alpha_beta_f_j inactive
              newNode.active = false; //Node.INACTIVE;
             
              // 22: else
View Full Code Here


  public MatchedHierarchicalPhrases query(Pattern pattern, Node node, Node prefixNode, Node suffixNode) {

    if (logger.isLoggable(Level.FINER)) logger.finer("PrefixTree.query( " + pattern + ",\n\t   new node " + node + ",\n\tprefix node " + prefixNode + ",\n\tsuffix node " + suffixNode + ")");
    long startTime = System.nanoTime();
   
    MatchedHierarchicalPhrases result;

//    boolean stop = false;
//    if (pattern.toString().startsWith("[de ")) {
//      logger.warning("Found it! " + pattern.toString() + " yahoo");
//      int x;
//      x=5;
//      x+=1;
//      stop = true;
//    }
//   
//    if (stop) {
//      if (stop) {
//        logger.info("Stopping");
//        logger.info("Did you stop?");
//      }
//    }
//   
   
    if (suffixArray.getCachedHierarchicalPhrases().containsKey(pattern)) {
      result = suffixArray.getCachedHierarchicalPhrases().get(pattern);
      int[] bounds = suffixArray.findPhrase(pattern, 0, pattern.size(), prefixNode.lowBoundIndex, prefixNode.highBoundIndex);
      if (bounds!=null) {
        node.setBounds(bounds[0],bounds[1]);
      }
    } else {
      if (pattern.toString().startsWith("[de ")) {
        int x = 5;
        x++;
      }

      int arity = pattern.arity();

      // 1: if alpha=u then
      //    If the pattern is contiguous, look up the pattern in the suffix array
      if (arity == 0) {

        // 2: SUFFIX-ARRAY-LOOKUP(SA_f, a alpha b, l_a_alpha, h_a_alpha
        // Get the first and last index in the suffix array for the specified pattern
        int[] bounds = suffixArray.findPhrase(pattern, 0, pattern.size(), prefixNode.lowBoundIndex, prefixNode.highBoundIndex);
        if (bounds==null) {
          result = HierarchicalPhrases.emptyList(pattern);
          suffixArray.cacheMatchingPhrases(result);
          //TODO Should node.setBounds(bounds) be called here?
        } else {
          node.setBounds(bounds[0],bounds[1]);
          int[] startingPositions = suffixArray.getAllPositions(bounds);
          result = suffixArray.createTriviallyHierarchicalPhrases(startingPositions, pattern, vocab);
        }


      } else { // 3: else --- alpha is a discontiguous pattern

        // 8: If M_a_alpha_b has been precomputed (then result will be non-null)
        // 9: Retrieve M_a_alpha_b from cache of precomputations


        // 10: else
        if (suffixArray.getCachedHierarchicalPhrases().containsKey(pattern)) { 
          result = suffixArray.getMatchingPhrases(pattern);
        } else {

          // 16: M_a_alpha_b <-- QUERY_INTERSECT(M_a_alpha, M_alpha_b)

          int[] sourceWords = prefixNode.getSourcePattern().getWordIDs();

          // Special handling of case when prefixNode is the X off of root (hierarchicalPhrases for that node is empty)
          if (arity==1 && sourceWords[0] < 0 && sourceWords[sourceWords.length-1] < 0){

            result = suffixNode.getMatchedPhrases().copyWithInitialX();

          } else {

            // Normal query intersection case (when prefixNode != X off of root)

            if (logger.isLoggable(Level.FINEST)) logger.finest("Calling queryIntersect("+pattern+" M_a_alpha.pattern=="+prefixNode.getSourcePattern() + ", M_alpha_b.pattern=="+suffixNode.getSourcePattern()+")");

            result = HierarchicalPhrases.queryIntersect(pattern, prefixNode.getMatchedPhrases(), suffixNode.getMatchedPhrases(), minNonterminalSpan, maxPhraseSpan, suffixArray);

          }

          suffixArray.cacheMatchingPhrases(result);
        }
      }
    }
   
    long finalQueryTime = System.nanoTime();
    if (logger.isLoggable(Level.FINE)) {
      long elapsedQueryTime = finalQueryTime - startTime;
      long microseconds = elapsedQueryTime / 1000;
      float milliseconds = microseconds / 1000.0f;
      logger.fine("Time to query pattern:\t" + pattern.toString() + "\t" + milliseconds + " milliseconds\t" + result.size() + " instances");
    }
   
    // 17: Return M_a_alpha_b
    List<Rule> rules = ruleExtractor.extractRules(result);
//    node.storeResults(result, rules);
    storeResults(node, result, rules);
   
    if (logger.isLoggable(Level.FINE)) {
      long elapsedTime = System.nanoTime() - finalQueryTime;
      long microseconds = elapsedTime / 1000;
      float milliseconds = microseconds / 1000.0f;
      logger.fine("Time to extract rules for pattern:\t" + pattern.toString() + "\t" + milliseconds + " milliseconds\t" + result.size() + " instances");
    }

    return result;

  }
View Full Code Here

        {
          SymbolTable vocab = (suffixArray==null) ? null : suffixArray.getVocabulary();
          Pattern xpattern = new Pattern(vocab, patternWords, X);
         
//          HierarchicalPhrases phrasesWithFinalX = new HierarchicalPhrases(xpattern, node.sourceHierarchicalPhrases);
          MatchedHierarchicalPhrases phrasesWithFinalX;
          if (suffixArray==null) {
            // This should only happen in certain unit tests
            logger.severe("This should only be encountered during unit testing!");
            if (node.sourceHierarchicalPhrases==null) {
              node.sourceHierarchicalPhrases = HierarchicalPhrases.emptyList((SymbolTable) null);
View Full Code Here

 
  /* See Javadoc for joshua.decoder.ff.tm.Trie#hasRules */
  public boolean hasRules() {
   
    if (active) {
      MatchedHierarchicalPhrases sourceHierarchicalPhrases = this.getMatchedPhrases();

      return ! sourceHierarchicalPhrases.isEmpty();
    } else {
      return false;
    }
  }
View Full Code Here

      SymbolTable vocab = corpus.getVocabulary();
     
      if (arity==0) {
        int[] bounds = this.findPhrase(pattern, 0, pattern.size(), 0, this.size()-1);
        int[] startPositions = this.getAllPositions(bounds);
        MatchedHierarchicalPhrases result = this.createTriviallyHierarchicalPhrases(startPositions, pattern, vocab);
        return result;
      } else if (arity==size) {
        int[] startPositions = new int[]{};
        MatchedHierarchicalPhrases result = this.createTriviallyHierarchicalPhrases(startPositions, pattern, vocab);
        return result;
      } else if (arity==1 && pattern.startsWithNonterminal()) {
        int[] terminals = new int[size-1];
        for (int i=1; i<size; i++) {
          terminals[i-1] = patternTokens[i];
        }
        Pattern terminalsPattern = new Pattern(vocab, terminals);
        MatchedHierarchicalPhrases terminalsMatch = this.createHierarchicalPhrases(terminalsPattern, minNonterminalSpan, maxPhraseSpan);
        MatchedHierarchicalPhrases result = terminalsMatch.copyWithInitialX();
        hierarchicalPhraseCache.put(pattern, result);
        return result;
      } else if (arity==1 && pattern.endsWithNonterminal()) {
        int[] terminals = new int[size-1];
        for (int i=0, n=size-1; i<n; i++) {
          terminals[i] = patternTokens[i];
        }
        Pattern terminalsPattern = new Pattern(vocab, terminals);
        MatchedHierarchicalPhrases terminalsMatch = this.createHierarchicalPhrases(terminalsPattern, minNonterminalSpan, maxPhraseSpan);
        MatchedHierarchicalPhrases result = terminalsMatch.copyWithFinalX();
        hierarchicalPhraseCache.put(pattern, result);
        return result;
//        int[] bounds = this.findPhrase(pattern, 0, size, 0, this.size());
//        int[] startPositions = this.getAllPositions(bounds);
////        Pattern patternX = new Pattern(pattern, PrefixTree.X);
//        MatchedHierarchicalPhrases result = this.createHierarchicalPhrases(startPositions, pattern, vocab);
//        return result;
      else {
       
        int[] prefixTokens = new int[patternTokens.length - 1];
        for (int i=0, n=patternTokens.length-1; i<n; i++) {
          prefixTokens[i] = patternTokens[i];
        }
       
        int[] suffixTokens = new int[patternTokens.length - 1];
        for (int i=1, n=patternTokens.length; i<n; i++) {
          suffixTokens[i-1] = patternTokens[i];
        }
       
        Pattern prefix = new Pattern(vocab, prefixTokens);
        Pattern suffix = new Pattern(vocab, suffixTokens);
       
        MatchedHierarchicalPhrases prefixMatches = createHierarchicalPhrases(prefix, minNonterminalSpan, maxPhraseSpan);
        MatchedHierarchicalPhrases suffixMatches = createHierarchicalPhrases(suffix, minNonterminalSpan, maxPhraseSpan);
       
        MatchedHierarchicalPhrases result =
          HierarchicalPhrases.queryIntersect(
              pattern, prefixMatches, suffixMatches,
              minNonterminalSpan, maxPhraseSpan, this);
     
        hierarchicalPhraseCache.put(pattern, result);
View Full Code Here

    Assert.assertEquals(pattern.size(), 3);
   
    int minNonterminalSpan = 2;
    int maxPhraseSpan = 5;
   
    MatchedHierarchicalPhrases matches =
      suffixArray.createHierarchicalPhrases(pattern, minNonterminalSpan, maxPhraseSpan);
   
    Assert.assertNotNull(matches);
    Assert.assertEquals(matches.getPattern(), pattern);
    Assert.assertEquals(matches.arity(), 0);
    Assert.assertEquals(matches.size(), 1);
  }
View Full Code Here

      Assert.assertEquals(pattern.size(), 2);
     
      int minNonterminalSpan = 2;
      int maxPhraseSpan = 5;

      MatchedHierarchicalPhrases matches =
        suffixArray.createHierarchicalPhrases(pattern, minNonterminalSpan, maxPhraseSpan);

      Assert.assertNotNull(matches);
      Assert.assertEquals(matches.getPattern(), pattern);
      Assert.assertEquals(matches.arity(), 1);
      Assert.assertEquals(matches.size(), 4);
    }
   
    {
      Pattern pattern = new Pattern(vocab, vocab.getID("it"), vocab.getID(SymbolTable.X_STRING), vocab.getID("and"));
      Assert.assertEquals(pattern.arity(), 1);
      Assert.assertEquals(pattern.size(), 3);

      int minNonterminalSpan = 2;
      int maxPhraseSpan = 5;

      MatchedHierarchicalPhrases matches =
        suffixArray.createHierarchicalPhrases(pattern, minNonterminalSpan, maxPhraseSpan);

      Assert.assertNotNull(matches);
      Assert.assertEquals(matches.getPattern(), pattern);
      Assert.assertEquals(matches.arity(), 1);
      Assert.assertEquals(matches.size(), 2);
    }
  }
View Full Code Here

    int[] M_a_alpha_startPositions = {25,30,27,30};
    int[] M_a_alpha_sentenceNumbers = {2,2};
    Pattern M_a_alpha_pattern = new Pattern(vocab, en, X, de, X);
    Assert.assertEquals(M_a_alpha_pattern.arity(),2);
   
    MatchedHierarchicalPhrases a =
      new HierarchicalPhrases(M_a_alpha_pattern, M_a_alpha_startPositions, M_a_alpha_sentenceNumbers);
   
    MatchedHierarchicalPhrases b =
      new HierarchicalPhrases(M_a_alpha_pattern, M_a_alpha_startPositions, M_a_alpha_sentenceNumbers);
   
    MatchedHierarchicalPhrases c =
      new HierarchicalPhrases(M_a_alpha_pattern, M_a_alpha_startPositions, M_a_alpha_sentenceNumbers);
   
   
    Assert.assertTrue(a.equals(a));
    Assert.assertTrue(b.equals(b));
    Assert.assertTrue(c.equals(c));
   
    Assert.assertFalse(a==b);
    Assert.assertFalse(a==c);
    Assert.assertFalse(b==a);
    Assert.assertFalse(b==c);
   
    Assert.assertTrue(a.equals(b));
    Assert.assertTrue(a.equals(c));
    Assert.assertTrue(b.equals(a));
    Assert.assertTrue(b.equals(c));
    Assert.assertTrue(c.equals(a));
    Assert.assertTrue(c.equals(b));
   
   
    int[] M_alpha_b_startPositions = {2,5,30,33,30,36,700,703,952,956};
    int[] M_alpha_b_sentenceNumbers = {1,2,2,57,94};
    Pattern M_alpha_b_pattern = new Pattern(vocab, X, de, X, en);
    Assert.assertEquals(M_alpha_b_pattern.arity(),2);
    MatchedHierarchicalPhrases d =
      new HierarchicalPhrases(M_alpha_b_pattern, M_alpha_b_startPositions, M_alpha_b_sentenceNumbers);
   
    Assert.assertFalse(a.equals(d));
    Assert.assertFalse(b.equals(d));
    Assert.assertFalse(c.equals(d));
   
    Assert.assertFalse(d.equals(a));
    Assert.assertFalse(d.equals(b));
    Assert.assertFalse(d.equals(c));
  }
View Full Code Here

   
    int[] M_a_alpha_startPositions = {25,30,27,30};
    int[] M_a_alpha_sentenceNumbers = {2,2};
    Pattern M_a_alpha_pattern = new Pattern(vocab, en, X, de, X);
    Assert.assertEquals(M_a_alpha_pattern.arity(),2);
    MatchedHierarchicalPhrases M_a_alpha =
      new HierarchicalPhrases(M_a_alpha_pattern, M_a_alpha_startPositions, M_a_alpha_sentenceNumbers);
    Assert.assertEquals(M_a_alpha.size(),2);
    Assert.assertEquals(M_a_alpha.arity(),2);
    Assert.assertEquals(M_a_alpha.getNumberOfTerminalSequences(),2);
    Assert.assertFalse(M_a_alpha.startsWithNonterminal());
    Assert.assertFalse(M_a_alpha.secondTokenIsTerminal());
    Assert.assertTrue(M_a_alpha.endsWithNonterminal());
    Assert.assertFalse(M_a_alpha.endsWithTwoTerminals());
    Assert.assertFalse(M_a_alpha.isEmpty());
    Assert.assertEquals(M_a_alpha.getFirstTerminalIndex(0), 25);
    Assert.assertEquals(M_a_alpha.getFirstTerminalIndex(1), 27);
    Assert.assertEquals(M_a_alpha.getLastTerminalIndex(0), 30+1);
    Assert.assertEquals(M_a_alpha.getLastTerminalIndex(1), 30+1);
   
   
    int[] M_alpha_b_startPositions = {2,5,30,33,30,36,700,703,952,956};
    int[] M_alpha_b_sentenceNumbers = {1,2,2,57,94};
    Pattern M_alpha_b_pattern = new Pattern(vocab, X, de, X, en);
    Assert.assertEquals(M_alpha_b_pattern.arity(),2);
    MatchedHierarchicalPhrases M_alpha_b =
      new HierarchicalPhrases(M_alpha_b_pattern, M_alpha_b_startPositions, M_alpha_b_sentenceNumbers);
    Assert.assertEquals(M_alpha_b.size(), 5);
    Assert.assertEquals(M_alpha_b.arity(),2);
    Assert.assertEquals(M_alpha_b.getNumberOfTerminalSequences(),2);
    Assert.assertTrue(M_alpha_b.startsWithNonterminal());
    Assert.assertTrue(M_alpha_b.secondTokenIsTerminal());
    Assert.assertFalse(M_alpha_b.endsWithNonterminal());
    Assert.assertFalse(M_alpha_b.endsWithTwoTerminals());
    Assert.assertFalse(M_alpha_b.isEmpty());
    Assert.assertEquals(M_alpha_b.getFirstTerminalIndex(0), 2);
    Assert.assertEquals(M_alpha_b.getFirstTerminalIndex(1), 30);
    Assert.assertEquals(M_alpha_b.getFirstTerminalIndex(2), 30);
    Assert.assertEquals(M_alpha_b.getFirstTerminalIndex(3), 700);
    Assert.assertEquals(M_alpha_b.getFirstTerminalIndex(4), 952);
    Assert.assertEquals(M_alpha_b.getLastTerminalIndex(0), 5+1);
    Assert.assertEquals(M_alpha_b.getLastTerminalIndex(1), 33+1);
    Assert.assertEquals(M_alpha_b.getLastTerminalIndex(2), 36+1);
    Assert.assertEquals(M_alpha_b.getLastTerminalIndex(3), 703+1);
    Assert.assertEquals(M_alpha_b.getLastTerminalIndex(4), 956+1);
   
    int minNonterminalSpan = 2;
    int maxPhraseSpan = 10;
   
    MatchedHierarchicalPhrases M_a_alpha_b =
      AbstractHierarchicalPhrases.queryIntersect(new Pattern(vocab, en, X, de, X, en), M_a_alpha, M_alpha_b, minNonterminalSpan, maxPhraseSpan, null);
 
    Assert.assertNotNull(M_a_alpha_b);
    Assert.assertEquals(M_a_alpha_b.size(), 3);
    Assert.assertEquals(M_a_alpha_b.arity(),2);
    Assert.assertEquals(M_a_alpha_b.getNumberOfTerminalSequences(),3);
    Assert.assertFalse(M_a_alpha_b.isEmpty());
    Assert.assertEquals(M_a_alpha_b.getFirstTerminalIndex(0), 25);
    Assert.assertEquals(M_a_alpha_b.getFirstTerminalIndex(1), 27);
    Assert.assertEquals(M_a_alpha_b.getFirstTerminalIndex(2), 27);
    Assert.assertEquals(M_a_alpha_b.getLastTerminalIndex(0), 33+1);
    Assert.assertEquals(M_a_alpha_b.getLastTerminalIndex(1), 33+1);
    Assert.assertEquals(M_a_alpha_b.getLastTerminalIndex(2), 36+1);
   
    Assert.assertFalse(M_a_alpha_b.startsWithNonterminal());
    Assert.assertFalse(M_a_alpha_b.secondTokenIsTerminal());
    Assert.assertFalse(M_a_alpha_b.endsWithNonterminal());
    Assert.assertFalse(M_a_alpha_b.endsWithTwoTerminals());
   
  }
View Full Code Here

TOP

Related Classes of joshua.corpus.MatchedHierarchicalPhrases

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.