Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.MockTokenizer


  }

  public void testIgnoreCase() throws IOException {
    StringReader reader = new StringReader("Now is The Time");
    Set<Object> stopWords = new HashSet<Object>(Arrays.asList( "is", "the", "Time" ));
    TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopWords, true);
    assertTokenStreamContents(stream, new String[] { "Now" });
  }
View Full Code Here


  public void testStopFilt() throws IOException {
    StringReader reader = new StringReader("Now is The Time");
    String[] stopWords = new String[] { "is", "the", "Time" };
    Set<Object> stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords);
    TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet);
    assertTokenStreamContents(stream, new String[] { "Now", "The" });
  }
View Full Code Here

    String stopWords[] = a.toArray(new String[0]);
    for (int i=0; i<a.size(); i++) log("Stop: "+stopWords[i]);
    Set<Object> stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords);
    // with increments
    StringReader reader = new StringReader(sb.toString());
    StopFilter stpf = new StopFilter(Version.LUCENE_24, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet);
    doTestStopPositons(stpf,true);
    // without increments
    reader = new StringReader(sb.toString());
    stpf = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet);
    doTestStopPositons(stpf,false);
    // with increments, concatenating two stop filters
    ArrayList<String> a0 = new ArrayList<String>();
    ArrayList<String> a1 = new ArrayList<String>();
    for (int i=0; i<a.size(); i++) {
      if (i%2==0) {
        a0.add(a.get(i));
      } else {
        a1.add(a.get(i));
      }
    }
    String stopWords0[] =  a0.toArray(new String[0]);
    for (int i=0; i<a0.size(); i++) log("Stop0: "+stopWords0[i]);
    String stopWords1[] =  a1.toArray(new String[0]);
    for (int i=0; i<a1.size(); i++) log("Stop1: "+stopWords1[i]);
    Set<Object> stopSet0 = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords0);
    Set<Object> stopSet1 = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords1);
    reader = new StringReader(sb.toString());
    StopFilter stpf0 = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet0); // first part of the set
    stpf0.setEnablePositionIncrements(true);
    StopFilter stpf01 = new StopFilter(TEST_VERSION_CURRENT, stpf0, stopSet1); // two stop filters concatenated!
    doTestStopPositons(stpf01,true);
  }
View Full Code Here

    w.addDocument(doc);

    Analyzer analyzer = new Analyzer() {
      @Override
      public TokenStream tokenStream(String fieldName, Reader reader) {
        MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
        tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases.
        return new CrashingFilter(fieldName, tokenizer);
      }
    };

    Document crashDoc = new Document();
View Full Code Here

    Directory dir = newDirectory();
    IndexWriterConfig conf = newIndexWriterConfig( TEST_VERSION_CURRENT, new Analyzer() {

      @Override
      public TokenStream tokenStream(String fieldName, Reader reader) {
        MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
        tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases.
        return new TokenFilter(tokenizer) {
          private int count = 0;

          @Override
          public boolean incrementToken() throws IOException {
View Full Code Here

  public void testDocumentsWriterExceptions() throws IOException {
    Analyzer analyzer = new Analyzer() {
      @Override
      public TokenStream tokenStream(String fieldName, Reader reader) {
        MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
        tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases.
        return new CrashingFilter(fieldName, tokenizer);
      }
    };

    for(int i=0;i<2;i++) {
View Full Code Here

  public void testDocumentsWriterExceptionThreads() throws Exception {
    Analyzer analyzer = new Analyzer() {
      @Override
      public TokenStream tokenStream(String fieldName, Reader reader) {
        MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
        tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases.
        return new CrashingFilter(fieldName, tokenizer);
      }
    };

    final int NUM_THREAD = 3;
View Full Code Here

      doc.add(newField("id", docCount+"", Field.Index.NOT_ANALYZED));
      doc.add(newField("content", "silly content " + docCount, Field.Index.ANALYZED));
      if (docCount == 4) {
        Field f = newField("crash", "", Field.Index.ANALYZED);
        doc.add(f);
        MockTokenizer tokenizer = new MockTokenizer(new StringReader("crash me on the 4th token"), MockTokenizer.WHITESPACE, false);
        tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases.
        f.setTokenStream(new CrashingFilter("crash", tokenizer));
      }
    }
    try {
      w.addDocuments(docs);
View Full Code Here

      doc.add(newField("id", docCount+"", Field.Index.NOT_ANALYZED));
      doc.add(newField("content", "silly content " + docCount, Field.Index.ANALYZED));
      if (docCount == crashAt) {
        Field f = newField("crash", "", Field.Index.ANALYZED);
        doc.add(f);
        MockTokenizer tokenizer = new MockTokenizer(new StringReader("crash me on the 4th token"), MockTokenizer.WHITESPACE, false);
        tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases.
        f.setTokenStream(new CrashingFilter("crash", tokenizer));
      }
    }

    try {
View Full Code Here

  }
 
  public void testWithKeywordAttribute() throws IOException {
    CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
    set.add("yourselves");
    Tokenizer tokenizer = new MockTokenizer(new StringReader("yourselves yours"), MockTokenizer.WHITESPACE, false);
    TokenStream filter = new PorterStemFilter(new KeywordMarkerFilter(tokenizer, set));  
    assertTokenStreamContents(filter, new String[] {"yourselves", "your"});
  }
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.MockTokenizer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.