Examples of org.apache.lucene.analysis.MockAnalyzer

org.apache.lucene.analysis.MockAnalyzer
Analyzer for testing
This analyzer is a replacement for Whitespace/Simple/KeywordAnalyzers for unit tests. If you are testing a custom component such as a queryparser or analyzer-wrapper that consumes analysis streams, its a great idea to test it with this analyzer instead. MockAnalyzer has the following behavior:
- By default, the assertions in {@link MockTokenizer} are turned on for extrachecks that the consumer is consuming properly. These checks can be disabled with {@link #setEnableChecks(boolean)}.
- Payload data is randomly injected into the stream for more thorough testing of payloads.
@see MockTokenizer

public class TestLongPostings extends LuceneTestCase {


  // Produces a realistic unicode random string that
  // survives MockAnalyzer unchanged:
  private String getRandomTerm(String other) throws IOException {
    Analyzer a = new MockAnalyzer(random);
    while(true) {
      String s = _TestUtil.randomRealisticUnicodeString(random);
      if (other != null && s.equals(other)) {
        continue;
      }
      final TokenStream ts = a.tokenStream("foo", new StringReader(s));
      final TermAttribute termAtt = ts.getAttribute(TermAttribute.class);
      int count = 0;
      ts.reset();
      while(ts.incrementToken()) {
        if (count == 0 && !termAtt.term().equals(s)) {

View Full Code Here

      }
    }


    final IndexReader r;
    if (true) { 
      final IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))
        .setOpenMode(IndexWriterConfig.OpenMode.CREATE)
        .setMergePolicy(newLogMergePolicy());
      iwc.setRAMBufferSizeMB(16.0 + 16.0 * random.nextDouble());
      iwc.setMaxBufferedDocs(-1);
      final RandomIndexWriter riw = new RandomIndexWriter(random, dir, iwc);

View Full Code Here

    mergePolicy.setNoCFSRatio(1); // This test expects all of its segments to be in CFS
    conf.setMergePolicy(mergePolicy);


    IndexWriter writer = new IndexWriter(
        dir,
        newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).
            setMaxBufferedDocs(10).
            setMergePolicy(mergePolicy)
    );


    writer.setInfoStream(VERBOSE ? System.out : null);


    int i;
    for(i=0;i<35;i++) {
      addDoc(writer, i);
    }
    mergePolicy.setUseCompoundFile(false);
    for(;i<45;i++) {
      addDoc(writer, i);
    }
    writer.close();


    // Delete one doc so we get a .del file:
    IndexReader reader = IndexReader.open(dir, false);
    Term searchTerm = new Term("id", "7");
    int delCount = reader.deleteDocuments(searchTerm);
    assertEquals("didn't delete the right number of documents", 1, delCount);


    // Set one norm so we get a .s0 file:
    reader.setNorm(21, "content", (float) 1.5);
    reader.close();


    // Now, artificially create an extra .del file & extra
    // .s0 file:
    String[] files = dir.listAll();


    /*
    for(int j=0;j<files.length;j++) {
      System.out.println(j + ": " + files[j]);
    }
    */


    // The numbering of fields can vary depending on which
    // JRE is in use.  On some JREs we see content bound to
    // field 0; on others, field 1.  So, here we have to
    // figure out which field number corresponds to
    // "content", and then set our expected file names below
    // accordingly:
    CompoundFileReader cfsReader = new CompoundFileReader(dir, "_2.cfs");
    FieldInfos fieldInfos = new FieldInfos(cfsReader, "_2.fnm");
    int contentFieldIndex = -1;
    for(i=0;i<fieldInfos.size();i++) {
      FieldInfo fi = fieldInfos.fieldInfo(i);
      if (fi.name.equals("content")) {
        contentFieldIndex = i;
        break;
      }
    }
    cfsReader.close();
    assertTrue("could not locate the 'content' field number in the _2.cfs segment", contentFieldIndex != -1);


    String normSuffix = "s" + contentFieldIndex;


    // Create a bogus separate norms file for a
    // segment/field that actually has a separate norms file
    // already:
    copyFile(dir, "_2_1." + normSuffix, "_2_2." + normSuffix);


    // Create a bogus separate norms file for a
    // segment/field that actually has a separate norms file
    // already, using the "not compound file" extension:
    copyFile(dir, "_2_1." + normSuffix, "_2_2.f" + contentFieldIndex);


    // Create a bogus separate norms file for a
    // segment/field that does not have a separate norms
    // file already:
    copyFile(dir, "_2_1." + normSuffix, "_1_1." + normSuffix);


    // Create a bogus separate norms file for a
    // segment/field that does not have a separate norms
    // file already using the "not compound file" extension:
    copyFile(dir, "_2_1." + normSuffix, "_1_1.f" + contentFieldIndex);


    // Create a bogus separate del file for a
    // segment that already has a separate del file: 
    copyFile(dir, "_0_1.del", "_0_2.del");


    // Create a bogus separate del file for a
    // segment that does not yet have a separate del file:
    copyFile(dir, "_0_1.del", "_1_1.del");


    // Create a bogus separate del file for a
    // non-existent segment:
    copyFile(dir, "_0_1.del", "_188_1.del");


    // Create a bogus segment file:
    copyFile(dir, "_0.cfs", "_188.cfs");


    // Create a bogus fnm file when the CFS already exists:
    copyFile(dir, "_0.cfs", "_0.fnm");
    
    // Create a deletable file:
    copyFile(dir, "_0.cfs", "deletable");


    // Create some old segments file:
    copyFile(dir, "segments_2", "segments");
    copyFile(dir, "segments_2", "segments_1");


    // Create a bogus cfs file shadowing a non-cfs segment:
    assertTrue(dir.fileExists("_3.fdt"));
    assertTrue(!dir.fileExists("_3.cfs"));
    copyFile(dir, "_1.cfs", "_3.cfs");
    
    String[] filesPre = dir.listAll();


    // Open & close a writer: it should delete the above 4
    // files and nothing more:
    writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND));
    writer.close();


    String[] files2 = dir.listAll();
    dir.close();

View Full Code Here


  @Override
  public void setUp() throws Exception {
    super.setUp();
    directory = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));


    //Add series of docs with filterable fields : url, text and dates  flags
    addDoc(writer, "http://lucene.apache.org", "lucene 1.4.3 available", "20040101");
    addDoc(writer, "http://lucene.apache.org", "New release pending", "20040102");
    addDoc(writer, "http://lucene.apache.org", "Lucene 1.9 out now", "20050101");

View Full Code Here


  @Override
  public void setUp() throws Exception {
    super.setUp();


    analyzer = new MockAnalyzer(random());
    directory = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));


    //Add series of docs with misspelt names
    addDoc(writer, "jonathon smythe", "1");
    addDoc(writer, "jonathan smith", "2");
    addDoc(writer, "johnathon smyth", "3");

View Full Code Here

    Document doc = searcher.doc(sd[0].doc);
    assertEquals("Should match most similar when using 2 words", "2", doc.get("id"));
  }


  public void testFuzzyLikeThisQueryEquals() {
    Analyzer analyzer = new MockAnalyzer(random());
    FuzzyLikeThisQuery fltq1 = new FuzzyLikeThisQuery(10, analyzer);
    fltq1.addTerms("javi", "subject", 0.5f, 2);
    FuzzyLikeThisQuery fltq2 = new FuzzyLikeThisQuery(10, analyzer);
    fltq2.addTerms("javi", "subject", 0.5f, 2);
    assertEquals("FuzzyLikeThisQuery with same attributes is not equal", fltq1,

View Full Code Here

    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(
        random(),
        dir,
        newIndexWriterConfig(TEST_VERSION_CURRENT,
            new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
    boolean canUseIDV = !"Lucene3x".equals(w.w.getConfig().getCodec().getName());
    List<Document> documents = new ArrayList<Document>();
    // 0
    Document doc = new Document();
    addGroupField(doc, groupField, "author1", canUseIDV);

View Full Code Here

    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(
        random(),
        dir,
        newIndexWriterConfig(TEST_VERSION_CURRENT,
            new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
    Document doc = new Document();
    doc.add(newField("group", "foo", StringField.TYPE_NOT_STORED));
    w.addDocument(doc);


    IndexSearcher indexSearcher = newSearcher(w.getReader());

View Full Code Here

    
    int bits = Integer.parseInt(reader.readLine());
    int terms = (int) Math.pow(2, bits);
    
    Directory dir = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)).setMergePolicy(newLogMergePolicy()));


    Document doc = new Document();
    Field field = newTextField("field", "", Field.Store.NO);
    doc.add(field);

View Full Code Here

    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(
                               random(),
                               dir,
                               newIndexWriterConfig(TEST_VERSION_CURRENT,
                                                    new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
    boolean canUseIDV = !"Lucene3x".equals(w.w.getConfig().getCodec().getName());
    // 0
    Document doc = new Document();
    addGroupField(doc, groupField, "author1", canUseIDV);
    doc.add(new TextField("content", "random text", Field.Store.YES));

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.lucene.analysis.MockAnalyzer

org.apache.lucene.analysis.core.TestDuelingAnalyzers

org.apache.lucene.analysis.shingle.ShingleAnalyzerWrapperTest

org.apache.lucene.benchmark.byTask.TestPerfTasksLogic

org.apache.lucene.codecs.idversion.TestIDVersionPostingsFormat

org.apache.lucene.codecs.lucene40.TestReuseDocsEnum

org.apache.lucene.codecs.lucene41.TestBlockPostingsFormat2

org.apache.lucene.codecs.perfield.TestPerFieldPostingsFormat2

org.apache.lucene.facet.search.CategoryListIteratorTest

org.apache.lucene.facet.search.TestDemoFacets

org.apache.lucene.facet.search.TestFacetsCollector

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.