Examples of org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner

Package org.apache.lucene.search.highlight.SynonymTokenizer

Examples of org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner

org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner

    helper.start();
  }


  public void testGetBestSingleFragment() throws Exception {


    TestHighlightRunner helper = new TestHighlightRunner() {


      @Override
      public void run() throws Exception {
        doSearching(new TermQuery(new Term(FIELD_NAME, "kennedy")));
        numHighlights = 0;
        for (int i = 0; i < hits.totalHits; i++) {
          String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
          TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text);


          Highlighter highlighter = getHighlighter(query, FIELD_NAME,
              HighlighterTest.this);
          highlighter.setTextFragmenter(new SimpleFragmenter(40));
          String result = highlighter.getBestFragment(tokenStream, text);
          if (VERBOSE) System.out.println("\t" + result);
        }
        assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
            numHighlights == 4);


        numHighlights = 0;
        for (int i = 0; i < hits.totalHits; i++) {
          String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
          Highlighter highlighter = getHighlighter(query, FIELD_NAME,
              HighlighterTest.this);
          highlighter.getBestFragment(analyzer, FIELD_NAME, text);
        }
        assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
            numHighlights == 4);


        numHighlights = 0;
        for (int i = 0; i < hits.totalHits; i++) {
          String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);


          Highlighter highlighter = getHighlighter(query, FIELD_NAME,
              HighlighterTest.this);
          highlighter.getBestFragments(analyzer, FIELD_NAME, text, 10);
        }
        assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
            numHighlights == 4);


      }


    };


    helper.start();


  }

View Full Code Here


  }


  public void testGetBestSingleFragmentWithWeights() throws Exception {


    TestHighlightRunner helper = new TestHighlightRunner() {


      @Override
      public void run() throws Exception {
        WeightedSpanTerm[] wTerms = new WeightedSpanTerm[2];
        wTerms[0] = new WeightedSpanTerm(10f, "hello");


        List<PositionSpan> positionSpans = new ArrayList<PositionSpan>();
        positionSpans.add(new PositionSpan(0, 0));
        wTerms[0].addPositionSpans(positionSpans);


        wTerms[1] = new WeightedSpanTerm(1f, "kennedy");
        positionSpans = new ArrayList<PositionSpan>();
        positionSpans.add(new PositionSpan(14, 14));
        wTerms[1].addPositionSpans(positionSpans);


        Highlighter highlighter = getHighlighter(wTerms, HighlighterTest.this);// new
        // Highlighter(new
        // QueryTermScorer(wTerms));
        TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, texts[0]);
        highlighter.setTextFragmenter(new SimpleFragmenter(2));


        String result = highlighter.getBestFragment(tokenStream, texts[0]).trim();
        assertTrue("Failed to find best section using weighted terms. Found: [" + result + "]",
            "<B>Hello</B>".equals(result));


        // readjust weights
        wTerms[1].setWeight(50f);
        tokenStream = analyzer.tokenStream(FIELD_NAME, texts[0]);
        highlighter = getHighlighter(wTerms, HighlighterTest.this);
        highlighter.setTextFragmenter(new SimpleFragmenter(2));


        result = highlighter.getBestFragment(tokenStream, texts[0]).trim();
        assertTrue("Failed to find best section using weighted terms. Found: " + result,
            "<B>kennedy</B>".equals(result));
      }


    };


    helper.start();


  }

View Full Code Here

  }


  // tests a "complex" analyzer that produces multiple
  // overlapping tokens
  public void testOverlapAnalyzer() throws Exception {
    TestHighlightRunner helper = new TestHighlightRunner() {


      @Override
      public void run() throws Exception {
        HashMap<String,String> synonyms = new HashMap<String,String>();
        synonyms.put("football", "soccer,footie");
        Analyzer analyzer = new SynonymAnalyzer(synonyms);


        String s = "football-soccer in the euro 2004 footie competition";


        BooleanQuery query = new BooleanQuery();
        query.add(new TermQuery(new Term("bookid", "football")), Occur.SHOULD);
        query.add(new TermQuery(new Term("bookid", "soccer")), Occur.SHOULD);
        query.add(new TermQuery(new Term("bookid", "footie")), Occur.SHOULD);


        Highlighter highlighter = getHighlighter(query, null, HighlighterTest.this);


        // Get 3 best fragments and separate with a "..."
        TokenStream tokenStream = analyzer.tokenStream(null, s);


        String result = highlighter.getBestFragments(tokenStream, s, 3, "...");
        String expectedResult = "<B>football</B>-<B>soccer</B> in the euro 2004 <B>footie</B> competition";
        assertTrue("overlapping analyzer should handle highlights OK, expected:" + expectedResult
            + " actual:" + result, expectedResult.equals(result));
      }


    };


    helper.start();


  }

View Full Code Here

    helper.start();


  }


  public void testGetSimpleHighlight() throws Exception {
    TestHighlightRunner helper = new TestHighlightRunner() {


      @Override
      public void run() throws Exception {
        numHighlights = 0;
        doSearching(new TermQuery(new Term(FIELD_NAME, "kennedy")));
        // new Highlighter(HighlighterTest.this, new QueryTermScorer(query));


        for (int i = 0; i < hits.totalHits; i++) {
          String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
          TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text);
          Highlighter highlighter = getHighlighter(query, FIELD_NAME,
              HighlighterTest.this);
          String result = highlighter.getBestFragment(tokenStream, text);
          if (VERBOSE) System.out.println("\t" + result);
        }
        assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
            numHighlights == 4);
      }
    };
    helper.start();
  }

View Full Code Here

    };
    helper.start();
  }


  public void testGetTextFragments() throws Exception {
    TestHighlightRunner helper = new TestHighlightRunner() {


      @Override
      public void run() throws Exception {


        doSearching(new TermQuery(new Term(FIELD_NAME, "kennedy")));


        for (int i = 0; i < hits.totalHits; i++) {
          String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
          TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text);


          Highlighter highlighter = getHighlighter(query, FIELD_NAME,
              HighlighterTest.this);// new Highlighter(this, new
          // QueryTermScorer(query));
          highlighter.setTextFragmenter(new SimpleFragmenter(20));
          String stringResults[] = highlighter.getBestFragments(tokenStream, text, 10);


          tokenStream = analyzer.tokenStream(FIELD_NAME, text);
          TextFragment fragmentResults[] = highlighter.getBestTextFragments(tokenStream, text,
              true, 10);


          assertTrue("Failed to find correct number of text Fragments: " + fragmentResults.length
              + " vs " + stringResults.length, fragmentResults.length == stringResults.length);
          for (int j = 0; j < stringResults.length; j++) {
            if (VERBOSE) System.out.println(fragmentResults[j]);
            assertTrue("Failed to find same text Fragments: " + fragmentResults[j] + " found",
                fragmentResults[j].toString().equals(stringResults[j]));


          }


        }
      }
    };
    helper.start();
  }

View Full Code Here

  public void testMaxSizeHighlight() throws Exception {
    final MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET);
    // we disable MockTokenizer checks because we will forcefully limit the 
    // tokenstream and call end() before incrementToken() returns false.
    analyzer.setEnableChecks(false);
    TestHighlightRunner helper = new TestHighlightRunner() {


      @Override
      public void run() throws Exception {
        numHighlights = 0;
        doSearching(new TermQuery(new Term(FIELD_NAME, "meat")));
        TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, texts[0]);
        Highlighter highlighter = getHighlighter(query, FIELD_NAME,
            HighlighterTest.this);// new Highlighter(this, new
        // QueryTermScorer(query));
        highlighter.setMaxDocCharsToAnalyze(30);


        highlighter.getBestFragment(tokenStream, texts[0]);
        assertTrue("Setting MaxDocBytesToAnalyze should have prevented "
            + "us from finding matches for this record: " + numHighlights + " found",
            numHighlights == 0);
      }
    };


    helper.start();
  }

View Full Code Here


    helper.start();
  }


  public void testMaxSizeHighlightTruncates() throws Exception {
    TestHighlightRunner helper = new TestHighlightRunner() {


      @Override
      public void run() throws Exception {
        String goodWord = "goodtoken";
        CharacterRunAutomaton stopWords = new CharacterRunAutomaton(BasicAutomata.makeString("stoppedtoken"));
        // we disable MockTokenizer checks because we will forcefully limit the 
        // tokenstream and call end() before incrementToken() returns false.
        final MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords);
        analyzer.setEnableChecks(false);
        TermQuery query = new TermQuery(new Term("data", goodWord));


        String match;
        StringBuilder sb = new StringBuilder();
        sb.append(goodWord);
        for (int i = 0; i < 10000; i++) {
          sb.append(" ");
          // only one stopword
          sb.append("stoppedtoken");
        }
        SimpleHTMLFormatter fm = new SimpleHTMLFormatter();
        Highlighter hg = getHighlighter(query, "data", fm);// new Highlighter(fm,
        // new
        // QueryTermScorer(query));
        hg.setTextFragmenter(new NullFragmenter());
        hg.setMaxDocCharsToAnalyze(100);
        match = hg.getBestFragment(analyzer, "data", sb.toString());
        assertTrue("Matched text should be no more than 100 chars in length ", match.length() < hg
            .getMaxDocCharsToAnalyze());


        // add another tokenized word to the overrall length - but set way
        // beyond
        // the length of text under consideration (after a large slug of stop
        // words
        // + whitespace)
        sb.append(" ");
        sb.append(goodWord);
        match = hg.getBestFragment(analyzer, "data", sb.toString());
        assertTrue("Matched text should be no more than 100 chars in length ", match.length() < hg
            .getMaxDocCharsToAnalyze());
      }
    };


    helper.start();


  }

View Full Code Here

    helper.start();


  }
  
  public void testMaxSizeEndHighlight() throws Exception {
    TestHighlightRunner helper = new TestHighlightRunner() {
      @Override
      public void run() throws Exception {
        CharacterRunAutomaton stopWords = new CharacterRunAutomaton(new RegExp("i[nt]").toAutomaton());
        TermQuery query = new TermQuery(new Term("text", "searchterm"));


        String text = "this is a text with searchterm in it";
        SimpleHTMLFormatter fm = new SimpleHTMLFormatter();
        Highlighter hg = getHighlighter(query, "text", fm);
        hg.setTextFragmenter(new NullFragmenter());
        hg.setMaxDocCharsToAnalyze(36);
        String match = hg.getBestFragment(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords), "text", text);
        assertTrue(
            "Matched text should contain remainder of text after highlighted query ",
            match.endsWith("in it"));
      }
    };
    helper.start();
  }

View Full Code Here

    };
    helper.start();
  }


  public void testUnRewrittenQuery() throws Exception {
    final TestHighlightRunner helper = new TestHighlightRunner() {


      @Override
      public void run() throws Exception {
        numHighlights = 0;
        // test to show how rewritten query can still be used
        searcher = newSearcher(reader);
        Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET);
        
        BooleanQuery query = new BooleanQuery();
        query.add(new WildcardQuery(new Term(FIELD_NAME, "jf?")), Occur.SHOULD);
        query.add(new WildcardQuery(new Term(FIELD_NAME, "kenned*")), Occur.SHOULD);


        if (VERBOSE) System.out.println("Searching with primitive query");
        // forget to set this and...
        // query=query.rewrite(reader);
        TopDocs hits = searcher.search(query, null, 1000);


        // create an instance of the highlighter with the tags used to surround
        // highlighted text
        // QueryHighlightExtractor highlighter = new
        // QueryHighlightExtractor(this,
        // query, new StandardAnalyzer(TEST_VERSION));


        int maxNumFragmentsRequired = 3;


        for (int i = 0; i < hits.totalHits; i++) {
          String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
          TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text);
          Highlighter highlighter = getHighlighter(query, FIELD_NAME, HighlighterTest.this, false);


          highlighter.setTextFragmenter(new SimpleFragmenter(40));


          String highlightedText = highlighter.getBestFragments(tokenStream, text,
              maxNumFragmentsRequired, "...");


          if (VERBOSE) System.out.println(highlightedText);
        }
        // We expect to have zero highlights if the query is multi-terms and is
        // not
        // rewritten!
        assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
            numHighlights == 0);
      }
    };


    helper.start();
  }

View Full Code Here


    helper.start();
  }


  public void testNoFragments() throws Exception {
    TestHighlightRunner helper = new TestHighlightRunner() {


      @Override
      public void run() throws Exception {
        doSearching(new TermQuery(new Term(FIELD_NAME, "aninvalidquerywhichshouldyieldnoresults")));


        for (String text : texts) {
          TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text);
          Highlighter highlighter = getHighlighter(query, FIELD_NAME,
              HighlighterTest.this);
          String result = highlighter.getBestFragment(tokenStream, text);
          assertNull("The highlight result should be null for text with no query terms", result);
        }
      }
    };


    helper.start();
  }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner

org.apache.lucene.search.highlight.HighlighterTest

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.