Package org.apache.lucene.analysis.standard

Examples of org.apache.lucene.analysis.standard.StandardAnalyzer


 
  public void testHighlightingWithDefaultField() throws Exception {

    String s1 = "I call our world Flatland, not because we call it so,";

    QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, new StandardAnalyzer(TEST_VERSION_CURRENT));

    // Verify that a query against the default field results in text being
    // highlighted
    // regardless of the field name.
    Query q = parser.parse("\"world Flatland\"~3");
View Full Code Here


   * This method intended for use with <tt>testHighlightingWithDefaultField()</tt>
* @throws InvalidTokenOffsetsException
   */
  private static String highlightField(Query query, String fieldName, String text)
      throws IOException, InvalidTokenOffsetsException {
    TokenStream tokenStream = new StandardAnalyzer(TEST_VERSION_CURRENT).tokenStream(fieldName, new StringReader(text));
    // Assuming "<B>", "</B>" used to highlight
    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter();
    QueryScorer scorer = new QueryScorer(query, fieldName, FIELD_NAME);
    Highlighter highlighter = new Highlighter(formatter, scorer);
    highlighter.setTextFragmenter(new SimpleFragmenter(Integer.MAX_VALUE));
View Full Code Here

          sb.append(" ");
          // only one stopword
          sb.append(stopWords.iterator().next());
        }
        SimpleHTMLFormatter fm = new SimpleHTMLFormatter();
        Highlighter hg = getHighlighter(query, "data", new StandardAnalyzer(TEST_VERSION_CURRENT, stopWords).tokenStream(
            "data", new StringReader(sb.toString())), fm);// new Highlighter(fm,
        // new
        // QueryTermScorer(query));
        hg.setTextFragmenter(new NullFragmenter());
        hg.setMaxDocCharsToAnalyze(100);
        match = hg.getBestFragment(new StandardAnalyzer(TEST_VERSION_CURRENT, stopWords), "data", sb.toString());
        assertTrue("Matched text should be no more than 100 chars in length ", match.length() < hg
            .getMaxDocCharsToAnalyze());

        // add another tokenized word to the overrall length - but set way
        // beyond
        // the length of text under consideration (after a large slug of stop
        // words
        // + whitespace)
        sb.append(" ");
        sb.append(goodWord);
        match = hg.getBestFragment(new StandardAnalyzer(TEST_VERSION_CURRENT, stopWords), "data", sb.toString());
        assertTrue("Matched text should be no more than 100 chars in length ", match.length() < hg
            .getMaxDocCharsToAnalyze());
      }
    };
View Full Code Here

        stopWords.add("it");
        TermQuery query = new TermQuery(new Term("text", "searchterm"));

        String text = "this is a text with searchterm in it";
        SimpleHTMLFormatter fm = new SimpleHTMLFormatter();
        Highlighter hg = getHighlighter(query, "text", new StandardAnalyzer(TEST_VERSION_CURRENT,
            stopWords).tokenStream("text", new StringReader(text)), fm);
        hg.setTextFragmenter(new NullFragmenter());
        hg.setMaxDocCharsToAnalyze(36);
        String match = hg.getBestFragment(new StandardAnalyzer(TEST_VERSION_CURRENT, stopWords), "text", text);
        assertTrue(
            "Matched text should contain remainder of text after highlighted query ",
            match.endsWith("in it"));
      }
    };
View Full Code Here

      public void run() throws Exception {
        numHighlights = 0;
        // test to show how rewritten query can still be used
        if (searcher != null) searcher.close();
        searcher = new IndexSearcher(ramDir, true);
        Analyzer analyzer = new StandardAnalyzer(TEST_VERSION_CURRENT);

        QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, analyzer);
        Query query = parser.parse("JF? or Kenned*");
        if (VERBOSE) System.out.println("Searching with primitive query");
        // forget to set this and...
        // query=query.rewrite(reader);
        TopDocs hits = searcher.search(query, null, 1000);

        // create an instance of the highlighter with the tags used to surround
        // highlighted text
        // QueryHighlightExtractor highlighter = new
        // QueryHighlightExtractor(this,
        // query, new StandardAnalyzer(TEST_VERSION));

        int maxNumFragmentsRequired = 3;

        for (int i = 0; i < hits.totalHits; i++) {
          String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
          TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
          Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream, HighlighterTest.this, false);

          highlighter.setTextFragmenter(new SimpleFragmenter(40));

          String highlightedText = highlighter.getBestFragments(tokenStream, text,
View Full Code Here

  public void testMultiSearcher() throws Exception {
    // setup index 1
    Directory ramDir1 = newDirectory();
    IndexWriter writer1 = new IndexWriter(ramDir1, newIndexWriterConfig(
        TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT)));
    Document d = new Document();
    Field f = new Field(FIELD_NAME, "multiOne", Field.Store.YES, Field.Index.ANALYZED);
    d.add(f);
    writer1.addDocument(d);
    writer1.optimize();
    writer1.close();
    IndexReader reader1 = IndexReader.open(ramDir1, true);

    // setup index 2
    Directory ramDir2 = newDirectory();
    IndexWriter writer2 = new IndexWriter(ramDir2, newIndexWriterConfig(
        TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT)));
    d = new Document();
    f = new Field(FIELD_NAME, "multiTwo", Field.Store.YES, Field.Index.ANALYZED);
    d.add(f);
    writer2.addDocument(d);
    writer2.optimize();
    writer2.close();
    IndexReader reader2 = IndexReader.open(ramDir2, true);

    IndexSearcher searchers[] = new IndexSearcher[2];
    searchers[0] = new IndexSearcher(ramDir1, true);
    searchers[1] = new IndexSearcher(ramDir2, true);
    MultiSearcher multiSearcher = new MultiSearcher(searchers);
    QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, new StandardAnalyzer(TEST_VERSION_CURRENT));
    parser.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
    query = parser.parse("multi*");
    if (VERBOSE) System.out.println("Searching for: " + query.toString(FIELD_NAME));
    // at this point the multisearcher calls combine(query[])
    hits = multiSearcher.search(query, null, 1000);
View Full Code Here

  public void setUp() throws Exception {
    super.setUp();
    dir = newDirectory();
    ramDir = newDirectory();
    IndexWriter writer = new IndexWriter(ramDir, newIndexWriterConfig(
        TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT)));
    for (int i = 0; i < texts.length; i++) {
      addDoc(writer, texts[i]);
    }
    Document doc = new Document();
    NumericField nfield = new NumericField(NUMERIC_FIELD_NAME, Store.YES, true);
View Full Code Here

  /**
   * Wraps {@link StandardAnalyzer}.
   */
  public ShingleAnalyzerWrapper(Version matchVersion) {
    super();
    this.defaultAnalyzer = new StandardAnalyzer(matchVersion);
    setOverridesTokenStreamMethod(ShingleAnalyzerWrapper.class);
  }
View Full Code Here

    // The numbers go away because SimpleAnalzyer ignores them
    assertQueryEquals("3", null, "");
    assertQueryEquals("term 1.0 1 2", null, "term");
    assertQueryEquals("term term1 term2", null, "term term term");

    Analyzer a = new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT);
    assertQueryEquals("3", a, "3");
    assertQueryEquals("term 1.0 1 2", a, "term 1.0 1 2");
    assertQueryEquals("term term1 term2", a, "term term1 term2");
  }
View Full Code Here

    assertQueryEqualsDOA("term +term +term", null, "+term +term +term");
    assertQueryEqualsDOA("-term term term", null, "-term +term +term");
  }

  public void testBoost() throws Exception {
    StandardAnalyzer oneStopAnalyzer = new StandardAnalyzer(Version.LUCENE_CURRENT, Collections.singleton("on"));
    StandardQueryParser qp = new StandardQueryParser();
    qp.setAnalyzer(oneStopAnalyzer);

    Query q = qp.parse("on^1.0", "field");
    assertNotNull(q);
    q = qp.parse("\"hello\"^2.0", "field");
    assertNotNull(q);
    assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
    q = qp.parse("hello^2.0", "field");
    assertNotNull(q);
    assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
    q = qp.parse("\"on\"^1.0", "field");
    assertNotNull(q);

    StandardQueryParser qp2 = new StandardQueryParser();
    qp2.setAnalyzer(new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));

    q = qp2.parse("the^3", "field");
    // "the" is a stop word so the result is an empty query:
    assertNotNull(q);
    assertEquals("", q.toString());
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.standard.StandardAnalyzer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.