Package org.apache.nutch.searcher.Summary

Examples of org.apache.nutch.searcher.Summary.Ellipsis


        tokenCount += tokenFraction;
      }
    }
   
    if (tokenCount > 0 && lastExcerptPos < tokens.length)
      s.add(new Ellipsis());
    return s;
  }
View Full Code Here


          } else {
            summary.add(new Fragment(parts[j]));
          }
          highlight = !highlight;
        }
        summary.add(new Ellipsis());
      }
    } catch (Exception e) {
      // Nothing to do...
    }
    return summary;
View Full Code Here

          } else {
            summary.add(new Fragment(parts[j]));
          }
          highlight = !highlight;
        }
        summary.add(new Ellipsis());
      }
     
      /* TODO MC  BUG resolved 0000029 - if query terms do not occur on text, an empty summary is returned. Now it sends the first tokens. */
      if (result==null || result.length==0) {
        tokens = analyzer.tokenStream("content", new StringReader(text));
             
        Token firstToken=null, lastToken=null;
        Token token=null;
        int maxLen=100; // the same as defined in SimpleFragmenter but it is private
       
        /*
        ArrayList<Token> titleTokens=new ArrayList<Token>();
        ArrayList<Token> textTokens=new ArrayList<Token>();
        boolean titleMatched=false;
        boolean hasMatched=false; // exit match after match title the first time            
       
        // remove title from text. compares pairs of text
        while ((titleMatched || !hasMatched) && (token=tokens.next())!=null) {
         
          if (token.type().equals("<WORD>")) {
         
            if (titleTokens.size()==0) {
              titleTokens.add(token);
            }
            else if (textTokens.size()<titleTokens.size()) {
              textTokens.add(token);
            }
         
            if (textTokens.size()==titleTokens.size()) {
              // compare
              titleMatched=true;
              for (int i=0;i<textTokens.size() && titleMatched;i++) {
                if (!textTokens.get(i).termText().equals(titleTokens.get(i).termText())) {
                  titleMatched=false;   
                }               
              }
              if (titleMatched) { // try to match a larger pattern
                titleTokens.add(textTokens.get(0));
                textTokens.remove(0);
                hasMatched=true;
              }
              else { // remove rest of title from text
                if (hasMatched) {
                  firstToken=textTokens.get(titleTokens.size()-2);                                 
                }
                else { // add one more token to title
                  titleTokens.add(textTokens.get(0));
                    textTokens.remove(0);
                }
              }
            }
          }       
        }
       
        if (textTokens.size()==0) {
          return summary;
        }
                             
        for (int i=0;i<textTokens.size() && textTokens.get(i).endOffset()-firstToken.startOffset()<maxLen;i++) {
          lastToken=textTokens.get(i);
        }
        */
                     
        // read tokens until maxLen
        while ((token=tokens.next())!=null) {       
          if (token.type().equals("<WORD>")) {
            if (firstToken==null) {
              firstToken=token;
            }
            else if (token.endOffset()-firstToken.startOffset()<maxLen) {         
              lastToken=token;                         
            }                   
            else {
              break;
            }
          }
        }       
        if (lastToken==null) {
          lastToken=firstToken;
        }
       
        summary.add(new Fragment(text.substring(firstToken.startOffset(), lastToken.endOffset())));
        summary.add(new Ellipsis());
      }
      /* TODO MC */
     
    } catch (Exception e) {
      // Nothing to do...
View Full Code Here

    assertEquals("fragment text", fragment.toString());
    assertFalse(fragment.isEllipsis());
    assertFalse(fragment.isHighlight());
    assertTrue(fragment.equals(new Fragment("fragment text")));
    assertFalse(fragment.equals(new Fragment("some text")));
    assertFalse(fragment.equals(new Ellipsis()));
    assertFalse(fragment.equals(new Highlight("fragment text")));
  }
View Full Code Here

    assertFalse(fragment.equals(new Highlight("fragment text")));
  }

  /** Test of <code>Ellipsis</code> inner class */
  public void testEllipsis() {
    Fragment fragment = new Ellipsis();
    assertEquals(" ... ", fragment.getText());
    assertEquals(" ... ", fragment.toString());
    assertTrue(fragment.isEllipsis());
    assertFalse(fragment.isHighlight());
    assertFalse(fragment.equals(new Fragment("fragment text")));
    assertTrue(fragment.equals(new Ellipsis()));
    assertFalse(fragment.equals(new Highlight("fragment text")));
  }
View Full Code Here

    assertEquals("highlight text", fragment.getText());
    assertEquals("highlight text", fragment.toString());
    assertFalse(fragment.isEllipsis());
    assertTrue(fragment.isHighlight());
    assertFalse(fragment.equals(new Fragment("fragment text")));
    assertFalse(fragment.equals(new Ellipsis()));
    assertFalse(fragment.equals(new Highlight("fragment text")));
    assertTrue(fragment.equals(new Highlight("highlight text")));
  }
View Full Code Here

  public void testToString() {
    Summary summary = new Summary();
    assertEquals("", summary.toString());
    summary.add(new Fragment("fragment1"));
    assertEquals("fragment1", summary.toString());
    summary.add(new Ellipsis());
    assertEquals("fragment1 ... ", summary.toString());
    summary.add(new Highlight("highlight"));
    assertEquals("fragment1 ... highlight", summary.toString());
    summary.add(new Fragment("fragment2"));
    assertEquals("fragment1 ... highlightfragment2", summary.toString());   
View Full Code Here

  /** Test of <code>toStrings</code>. */
  public void testToStrings() {
    Summary[] summaries = { new Summary(), new Summary() };
    summaries[0].add(new Fragment("fragment1.1"));
    summaries[0].add(new Ellipsis());
    summaries[0].add(new Highlight("highlight1"));
    summaries[0].add(new Fragment("fragment1.2"));
    summaries[1].add(new Fragment("fragment2.1"));
    summaries[1].add(new Ellipsis());
    summaries[1].add(new Highlight("highlight2"));
    summaries[1].add(new Fragment("fragment2.2"));
    String[] strings = Summary.toStrings(summaries);
    assertEquals(2, strings.length);
    assertEquals("fragment1.1 ... highlight1fragment1.2", strings[0]);
View Full Code Here

    assertTrue(summary1.equals(summary2));
    summary1.add(new Fragment("text fragment"));
    assertFalse(summary1.equals(summary2));
    summary2.add(new Fragment("text fragment"));
    assertTrue(summary1.equals(summary2));
    summary1.add(new Ellipsis());
    assertFalse(summary1.equals(summary2));
    summary2.add(new Ellipsis());
    assertTrue(summary1.equals(summary2));
    summary1.add(new Highlight("highlight"));
    assertFalse(summary1.equals(summary2));
    summary2.add(new Highlight("highlight"));
    assertTrue(summary1.equals(summary2));
View Full Code Here

 
  /** Test of <code>writable</code> implementation. */
  public void testWritable() throws Exception {
    Summary summary = new Summary();
    summary.add(new Fragment("fragment1.1"));
    summary.add(new Ellipsis());
    summary.add(new Highlight("highlight1"));
    summary.add(new Fragment("fragment1.2"));
    WritableTestUtils.testWritable(summary);
  }
View Full Code Here

TOP

Related Classes of org.apache.nutch.searcher.Summary.Ellipsis

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.