Examples of LetterTokenizer


Examples of org.apache.lucene.analysis.core.LetterTokenizer

  // LUCENE-3642: normalize SMP->BMP and check that offsets are correct
  public void testCrossPlaneNormalization() throws IOException {
    Analyzer analyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, newAttributeFactory(), reader) {
          @Override
          protected int normalize(int c) {
            if (c > 0xffff) {
              return 'δ';
            } else {
View Full Code Here

Examples of org.apache.lucene.analysis.core.LetterTokenizer

  // LUCENE-3642: normalize BMP->SMP and check that offsets are correct
  public void testCrossPlaneNormalization2() throws IOException {
    Analyzer analyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, newAttributeFactory(), reader) {
          @Override
          protected int normalize(int c) {
            if (c <= 0xffff) {
              return 0x1043C;
            } else {
View Full Code Here

Examples of org.apache.lucene.analysis.core.LetterTokenizer

    };
    checkAnalysisConsistency(random, b, random.nextBoolean(), "");
  }

  public void testGraphs() throws IOException {
    TokenStream tk = new LetterTokenizer(TEST_VERSION_CURRENT, new StringReader("abc d efgh ij klmno p q"));
    tk = new ShingleFilter(tk);
    tk = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tk, 7, 10);
    assertTokenStreamContents(tk,
        new String[] { "efgh ij", "ij klmn", "ij klmno", "klmno p" },
        new int[]    { 6,11,11,14 },
View Full Code Here

Examples of org.apache.lucene.analysis.core.LetterTokenizer

  // LUCENE-3642: normalize SMP->BMP and check that offsets are correct
  public void testCrossPlaneNormalization() throws IOException {
    Analyzer analyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, reader) {
          @Override
          protected int normalize(int c) {
            if (c > 0xffff) {
              return 'δ';
            } else {
View Full Code Here

Examples of org.apache.lucene.analysis.core.LetterTokenizer

  // LUCENE-3642: normalize BMP->SMP and check that offsets are correct
  public void testCrossPlaneNormalization2() throws IOException {
    Analyzer analyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, reader) {
          @Override
          protected int normalize(int c) {
            if (c <= 0xffff) {
              return 0x1043C;
            } else {
View Full Code Here

Examples of org.apache.lucene.analysis.core.LetterTokenizer

    };
    checkAnalysisConsistency(random, b, random.nextBoolean(), "");
  }

  public void testGraphs() throws IOException {
    TokenStream tk = new LetterTokenizer(TEST_VERSION_CURRENT, new StringReader("abc d efgh ij klmno p q"));
    tk = new ShingleFilter(tk);
    tk = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tk, 7, 10);
    tk.reset();
    assertTokenStreamContents(tk,
        new String[] { "efgh ij", "ij klmn", "ij klmno", "klmno p" },
        new int[]    { 6,11,11,14 },
        new int[]    { 13,19,19,21 },
        new int[]    { 3,1,0,1 },
View Full Code Here

Examples of org.apache.lucene.analysis.core.LetterTokenizer

    };
    checkAnalysisConsistency(random, b, random.nextBoolean(), "");
  }

  public void testGraphs() throws IOException {
    TokenStream tk = new LetterTokenizer(TEST_VERSION_CURRENT, new StringReader("abc d efgh ij klmno p q"));
    tk = new ShingleFilter(tk);
    tk = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tk, 7, 10);
    tk.reset();
    assertTokenStreamContents(tk,
        new String[] { "efgh ij", "ij klmn", "ij klmno", "klmno p" },
        new int[]    { 6,11,11,14 },
        new int[]    { 13,19,19,21 },
        new int[]    { 3,1,0,1 },
View Full Code Here

Examples of org.apache.lucene.analysis.core.LetterTokenizer

  // LUCENE-3642: normalize SMP->BMP and check that offsets are correct
  public void testCrossPlaneNormalization() throws IOException {
    Analyzer analyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, reader) {
          @Override
          protected int normalize(int c) {
            if (c > 0xffff) {
              return 'δ';
            } else {
View Full Code Here

Examples of org.apache.lucene.analysis.core.LetterTokenizer

  // LUCENE-3642: normalize BMP->SMP and check that offsets are correct
  public void testCrossPlaneNormalization2() throws IOException {
    Analyzer analyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, reader) {
          @Override
          protected int normalize(int c) {
            if (c <= 0xffff) {
              return 0x1043C;
            } else {
View Full Code Here

Examples of org.apache.lucene.analysis.core.LetterTokenizer

    };
    checkAnalysisConsistency(random, b, random.nextBoolean(), "");
  }

  public void testGraphs() throws IOException {
    TokenStream tk = new LetterTokenizer(TEST_VERSION_CURRENT, new StringReader("abc d efgh ij klmno p q"));
    tk = new ShingleFilter(tk);
    tk = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tk, 7, 10);
    assertTokenStreamContents(tk,
        new String[] { "efgh ij", "ij klmn", "ij klmno", "klmno p" },
        new int[]    { 6,11,11,14 },
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.