Package org.apache.lucene.search.highlight.SynonymTokenizer

Examples of org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner


    helper.start();
  }

  public void testGetBestSingleFragment() throws Exception {

    TestHighlightRunner helper = new TestHighlightRunner() {

      @Override
      public void run() throws Exception {
        doSearching(new TermQuery(new Term(FIELD_NAME, "kennedy")));
        numHighlights = 0;
        for (int i = 0; i < hits.totalHits; i++) {
          String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
          TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text);

          Highlighter highlighter = getHighlighter(query, FIELD_NAME,
              HighlighterTest.this);
          highlighter.setTextFragmenter(new SimpleFragmenter(40));
          String result = highlighter.getBestFragment(tokenStream, text);
          if (VERBOSE) System.out.println("\t" + result);
        }
        assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
            numHighlights == 4);

        numHighlights = 0;
        for (int i = 0; i < hits.totalHits; i++) {
          String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
          Highlighter highlighter = getHighlighter(query, FIELD_NAME,
              HighlighterTest.this);
          highlighter.getBestFragment(analyzer, FIELD_NAME, text);
        }
        assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
            numHighlights == 4);

        numHighlights = 0;
        for (int i = 0; i < hits.totalHits; i++) {
          String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);

          Highlighter highlighter = getHighlighter(query, FIELD_NAME,
              HighlighterTest.this);
          highlighter.getBestFragments(analyzer, FIELD_NAME, text, 10);
        }
        assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
            numHighlights == 4);

      }

    };

    helper.start();

  }
View Full Code Here


  }

  public void testGetBestSingleFragmentWithWeights() throws Exception {

    TestHighlightRunner helper = new TestHighlightRunner() {

      @Override
      public void run() throws Exception {
        WeightedSpanTerm[] wTerms = new WeightedSpanTerm[2];
        wTerms[0] = new WeightedSpanTerm(10f, "hello");

        List<PositionSpan> positionSpans = new ArrayList<PositionSpan>();
        positionSpans.add(new PositionSpan(0, 0));
        wTerms[0].addPositionSpans(positionSpans);

        wTerms[1] = new WeightedSpanTerm(1f, "kennedy");
        positionSpans = new ArrayList<PositionSpan>();
        positionSpans.add(new PositionSpan(14, 14));
        wTerms[1].addPositionSpans(positionSpans);

        Highlighter highlighter = getHighlighter(wTerms, HighlighterTest.this);// new
        // Highlighter(new
        // QueryTermScorer(wTerms));
        TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, texts[0]);
        highlighter.setTextFragmenter(new SimpleFragmenter(2));

        String result = highlighter.getBestFragment(tokenStream, texts[0]).trim();
        assertTrue("Failed to find best section using weighted terms. Found: [" + result + "]",
            "<B>Hello</B>".equals(result));

        // readjust weights
        wTerms[1].setWeight(50f);
        tokenStream = analyzer.tokenStream(FIELD_NAME, texts[0]);
        highlighter = getHighlighter(wTerms, HighlighterTest.this);
        highlighter.setTextFragmenter(new SimpleFragmenter(2));

        result = highlighter.getBestFragment(tokenStream, texts[0]).trim();
        assertTrue("Failed to find best section using weighted terms. Found: " + result,
            "<B>kennedy</B>".equals(result));
      }

    };

    helper.start();

  }
View Full Code Here

  }

  // tests a "complex" analyzer that produces multiple
  // overlapping tokens
  public void testOverlapAnalyzer() throws Exception {
    TestHighlightRunner helper = new TestHighlightRunner() {

      @Override
      public void run() throws Exception {
        HashMap<String,String> synonyms = new HashMap<String,String>();
        synonyms.put("football", "soccer,footie");
        Analyzer analyzer = new SynonymAnalyzer(synonyms);

        String s = "football-soccer in the euro 2004 footie competition";

        BooleanQuery query = new BooleanQuery();
        query.add(new TermQuery(new Term("bookid", "football")), Occur.SHOULD);
        query.add(new TermQuery(new Term("bookid", "soccer")), Occur.SHOULD);
        query.add(new TermQuery(new Term("bookid", "footie")), Occur.SHOULD);

        Highlighter highlighter = getHighlighter(query, null, HighlighterTest.this);

        // Get 3 best fragments and separate with a "..."
        TokenStream tokenStream = analyzer.tokenStream(null, s);

        String result = highlighter.getBestFragments(tokenStream, s, 3, "...");
        String expectedResult = "<B>football</B>-<B>soccer</B> in the euro 2004 <B>footie</B> competition";
        assertTrue("overlapping analyzer should handle highlights OK, expected:" + expectedResult
            + " actual:" + result, expectedResult.equals(result));
      }

    };

    helper.start();

  }
View Full Code Here

    helper.start();

  }

  public void testGetSimpleHighlight() throws Exception {
    TestHighlightRunner helper = new TestHighlightRunner() {

      @Override
      public void run() throws Exception {
        numHighlights = 0;
        doSearching(new TermQuery(new Term(FIELD_NAME, "kennedy")));
        // new Highlighter(HighlighterTest.this, new QueryTermScorer(query));

        for (int i = 0; i < hits.totalHits; i++) {
          String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
          TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text);
          Highlighter highlighter = getHighlighter(query, FIELD_NAME,
              HighlighterTest.this);
          String result = highlighter.getBestFragment(tokenStream, text);
          if (VERBOSE) System.out.println("\t" + result);
        }
        assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
            numHighlights == 4);
      }
    };
    helper.start();
  }
View Full Code Here

    };
    helper.start();
  }

  public void testGetTextFragments() throws Exception {
    TestHighlightRunner helper = new TestHighlightRunner() {

      @Override
      public void run() throws Exception {

        doSearching(new TermQuery(new Term(FIELD_NAME, "kennedy")));

        for (int i = 0; i < hits.totalHits; i++) {
          String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
          TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text);

          Highlighter highlighter = getHighlighter(query, FIELD_NAME,
              HighlighterTest.this);// new Highlighter(this, new
          // QueryTermScorer(query));
          highlighter.setTextFragmenter(new SimpleFragmenter(20));
          String stringResults[] = highlighter.getBestFragments(tokenStream, text, 10);

          tokenStream = analyzer.tokenStream(FIELD_NAME, text);
          TextFragment fragmentResults[] = highlighter.getBestTextFragments(tokenStream, text,
              true, 10);

          assertTrue("Failed to find correct number of text Fragments: " + fragmentResults.length
              + " vs " + stringResults.length, fragmentResults.length == stringResults.length);
          for (int j = 0; j < stringResults.length; j++) {
            if (VERBOSE) System.out.println(fragmentResults[j]);
            assertTrue("Failed to find same text Fragments: " + fragmentResults[j] + " found",
                fragmentResults[j].toString().equals(stringResults[j]));

          }

        }
      }
    };
    helper.start();
  }
View Full Code Here

  public void testMaxSizeHighlight() throws Exception {
    final MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET);
    // we disable MockTokenizer checks because we will forcefully limit the
    // tokenstream and call end() before incrementToken() returns false.
    analyzer.setEnableChecks(false);
    TestHighlightRunner helper = new TestHighlightRunner() {

      @Override
      public void run() throws Exception {
        numHighlights = 0;
        doSearching(new TermQuery(new Term(FIELD_NAME, "meat")));
        TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, texts[0]);
        Highlighter highlighter = getHighlighter(query, FIELD_NAME,
            HighlighterTest.this);// new Highlighter(this, new
        // QueryTermScorer(query));
        highlighter.setMaxDocCharsToAnalyze(30);

        highlighter.getBestFragment(tokenStream, texts[0]);
        assertTrue("Setting MaxDocBytesToAnalyze should have prevented "
            + "us from finding matches for this record: " + numHighlights + " found",
            numHighlights == 0);
      }
    };

    helper.start();
  }
View Full Code Here

    helper.start();
  }

  public void testMaxSizeHighlightTruncates() throws Exception {
    TestHighlightRunner helper = new TestHighlightRunner() {

      @Override
      public void run() throws Exception {
        String goodWord = "goodtoken";
        CharacterRunAutomaton stopWords = new CharacterRunAutomaton(BasicAutomata.makeString("stoppedtoken"));
        // we disable MockTokenizer checks because we will forcefully limit the
        // tokenstream and call end() before incrementToken() returns false.
        final MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords);
        analyzer.setEnableChecks(false);
        TermQuery query = new TermQuery(new Term("data", goodWord));

        String match;
        StringBuilder sb = new StringBuilder();
        sb.append(goodWord);
        for (int i = 0; i < 10000; i++) {
          sb.append(" ");
          // only one stopword
          sb.append("stoppedtoken");
        }
        SimpleHTMLFormatter fm = new SimpleHTMLFormatter();
        Highlighter hg = getHighlighter(query, "data", fm);// new Highlighter(fm,
        // new
        // QueryTermScorer(query));
        hg.setTextFragmenter(new NullFragmenter());
        hg.setMaxDocCharsToAnalyze(100);
        match = hg.getBestFragment(analyzer, "data", sb.toString());
        assertTrue("Matched text should be no more than 100 chars in length ", match.length() < hg
            .getMaxDocCharsToAnalyze());

        // add another tokenized word to the overrall length - but set way
        // beyond
        // the length of text under consideration (after a large slug of stop
        // words
        // + whitespace)
        sb.append(" ");
        sb.append(goodWord);
        match = hg.getBestFragment(analyzer, "data", sb.toString());
        assertTrue("Matched text should be no more than 100 chars in length ", match.length() < hg
            .getMaxDocCharsToAnalyze());
      }
    };

    helper.start();

  }
View Full Code Here

    helper.start();

  }
 
  public void testMaxSizeEndHighlight() throws Exception {
    TestHighlightRunner helper = new TestHighlightRunner() {
      @Override
      public void run() throws Exception {
        CharacterRunAutomaton stopWords = new CharacterRunAutomaton(new RegExp("i[nt]").toAutomaton());
        TermQuery query = new TermQuery(new Term("text", "searchterm"));

        String text = "this is a text with searchterm in it";
        SimpleHTMLFormatter fm = new SimpleHTMLFormatter();
        Highlighter hg = getHighlighter(query, "text", fm);
        hg.setTextFragmenter(new NullFragmenter());
        hg.setMaxDocCharsToAnalyze(36);
        String match = hg.getBestFragment(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords), "text", text);
        assertTrue(
            "Matched text should contain remainder of text after highlighted query ",
            match.endsWith("in it"));
      }
    };
    helper.start();
  }
View Full Code Here

    };
    helper.start();
  }

  public void testUnRewrittenQuery() throws Exception {
    final TestHighlightRunner helper = new TestHighlightRunner() {

      @Override
      public void run() throws Exception {
        numHighlights = 0;
        // test to show how rewritten query can still be used
        searcher = newSearcher(reader);
        Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET);
       
        BooleanQuery query = new BooleanQuery();
        query.add(new WildcardQuery(new Term(FIELD_NAME, "jf?")), Occur.SHOULD);
        query.add(new WildcardQuery(new Term(FIELD_NAME, "kenned*")), Occur.SHOULD);

        if (VERBOSE) System.out.println("Searching with primitive query");
        // forget to set this and...
        // query=query.rewrite(reader);
        TopDocs hits = searcher.search(query, null, 1000);

        // create an instance of the highlighter with the tags used to surround
        // highlighted text
        // QueryHighlightExtractor highlighter = new
        // QueryHighlightExtractor(this,
        // query, new StandardAnalyzer(TEST_VERSION));

        int maxNumFragmentsRequired = 3;

        for (int i = 0; i < hits.totalHits; i++) {
          String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
          TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text);
          Highlighter highlighter = getHighlighter(query, FIELD_NAME, HighlighterTest.this, false);

          highlighter.setTextFragmenter(new SimpleFragmenter(40));

          String highlightedText = highlighter.getBestFragments(tokenStream, text,
              maxNumFragmentsRequired, "...");

          if (VERBOSE) System.out.println(highlightedText);
        }
        // We expect to have zero highlights if the query is multi-terms and is
        // not
        // rewritten!
        assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
            numHighlights == 0);
      }
    };

    helper.start();
  }
View Full Code Here

    helper.start();
  }

  public void testNoFragments() throws Exception {
    TestHighlightRunner helper = new TestHighlightRunner() {

      @Override
      public void run() throws Exception {
        doSearching(new TermQuery(new Term(FIELD_NAME, "aninvalidquerywhichshouldyieldnoresults")));

        for (String text : texts) {
          TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text);
          Highlighter highlighter = getHighlighter(query, FIELD_NAME,
              HighlighterTest.this);
          String result = highlighter.getBestFragment(tokenStream, text);
          assertNull("The highlight result should be null for text with no query terms", result);
        }
      }
    };

    helper.start();
  }
View Full Code Here

TOP

Related Classes of org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.