Package org.apache.solr.analysis

Examples of org.apache.solr.analysis.TokenizerChain


        return null; // used for map registration
      }
    };
    filterLoader.load( loader, (NodeList)xpath.evaluate("./filter", node, XPathConstants.NODESET) );

    return new TokenizerChain(charFilters.toArray(new CharFilterFactory[charFilters.size()]),
        tokenizers.get(0), filters.toArray(new TokenFilterFactory[filters.size()]));
  };
View Full Code Here


      NamedList<List<NamedList>> namedList = new NamedList<List<NamedList>>();
      namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(analyzeTokenStream(tokenStream), context));
      return namedList;
    }

    TokenizerChain tokenizerChain = (TokenizerChain) analyzer;
    CharFilterFactory[] cfiltfacs = tokenizerChain.getCharFilterFactories();
    TokenizerFactory tfac = tokenizerChain.getTokenizerFactory();
    TokenFilterFactory[] filtfacs = tokenizerChain.getTokenFilterFactories();

    NamedList<List<NamedList>> namedList = new NamedList<List<NamedList>>();

    if( cfiltfacs != null ){
      String source = value;
      for(CharFilterFactory cfiltfac : cfiltfacs ){
        CharStream reader = CharReader.get(new StringReader(source));
        reader = cfiltfac.create(reader);
        source = writeCharStream(namedList, reader);
      }
    }

    TokenStream tokenStream = tfac.create(tokenizerChain.charStream(new StringReader(value)));
    List<AttributeSource> tokens = analyzeTokenStream(tokenStream);

    namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokens, context));

    ListBasedTokenStream listBasedTokenStream = new ListBasedTokenStream(tokens);
View Full Code Here

  private static SimpleOrderedMap<Object> getAnalyzerInfo(Analyzer analyzer) {
    SimpleOrderedMap<Object> aninfo = new SimpleOrderedMap<Object>();
    aninfo.add("className", analyzer.getClass().getName());
    if (analyzer instanceof TokenizerChain) {

      TokenizerChain tchain = (TokenizerChain)analyzer;

      CharFilterFactory[] cfiltfacs = tchain.getCharFilterFactories();
      SimpleOrderedMap<Map<String, Object>> cfilters = new SimpleOrderedMap<Map<String, Object>>();
      for (CharFilterFactory cfiltfac : cfiltfacs) {
        Map<String, Object> tok = new HashMap<String, Object>();
        String className = cfiltfac.getClass().getName();
        tok.put("className", className);
        tok.put("args", cfiltfac.getArgs());
        cfilters.add(className.substring(className.lastIndexOf('.')+1), tok);
      }
      if (cfilters.size() > 0) {
        aninfo.add("charFilters", cfilters);
      }
     
      SimpleOrderedMap<Object> tokenizer = new SimpleOrderedMap<Object>();
      TokenizerFactory tfac = tchain.getTokenizerFactory();
      tokenizer.add("className", tfac.getClass().getName());
      tokenizer.add("args", tfac.getArgs());
      aninfo.add("tokenizer", tokenizer);

      TokenFilterFactory[] filtfacs = tchain.getTokenFilterFactories();
      SimpleOrderedMap<Map<String, Object>> filters = new SimpleOrderedMap<Map<String, Object>>();
      for (TokenFilterFactory filtfac : filtfacs) {
        Map<String, Object> tok = new HashMap<String, Object>();
        String className = filtfac.getClass().getName();
        tok.put("className", className);
View Full Code Here

        return null; // used for map registration
      }
    };
    filterLoader.load(loader, tokenFilterNodes);

    return new TokenizerChain(charFilters.toArray(new CharFilterFactory[charFilters.size()]),
        tokenizers.get(0), filters.toArray(new TokenFilterFactory[filters.size()]));
  }
View Full Code Here

    if (!(analyzer instanceof TokenizerChain)) {
      throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
        "Invalid index analyzer '" + analyzer.getClass() + "' received");
    }

    final TokenizerChain chain = (TokenizerChain) analyzer;
    // copy the existing list of token filters
    final TokenFilterFactory[] old = chain.getTokenFilterFactories();
    final TokenFilterFactory[] filterFactories = new TokenFilterFactory[old.length + 3];
    System.arraycopy(old, 0, filterFactories, 0, old.length);
    // append the datatype analyzer filter factory
    final DatatypeAnalyzerFilterFactory datatypeFactory = new DatatypeAnalyzerFilterFactory(luceneDefaultVersion);
    datatypeFactory.register(datatypes);
    filterFactories[old.length] = datatypeFactory;
    // append the position attribute filter factory
    filterFactories[old.length + 1] = new PositionAttributeFilterFactory();
    // append the siren payload filter factory
    filterFactories[old.length + 2] = new SirenPayloadFilterFactory();
    // create a new tokenizer chain with the updated list of filter factories
    return new TokenizerChain(chain.getCharFilterFactories(),
      chain.getTokenizerFactory(), filterFactories);
  }
View Full Code Here

    final IndexSchema schema = h.getCore().getSchema();
    final SchemaField ntriple = schema.getField(JSON_FIELD);
    final FieldType tmp = ntriple.getType();

    assertTrue(tmp.getAnalyzer() instanceof TokenizerChain);
    final TokenizerChain ts = (TokenizerChain) tmp.getAnalyzer();
    assertNotNull(ts.getTokenizerFactory());
    assertTrue(ts.getTokenizerFactory() instanceof JsonTokenizerFactory);

    // 3 filters for index analyzer
    assertNotNull(ts.getTokenFilterFactories());
    assertEquals(3, ts.getTokenFilterFactories().length);
    assertTrue(ts.getTokenFilterFactories()[0] instanceof DatatypeAnalyzerFilterFactory);
    assertTrue(ts.getTokenFilterFactories()[1] instanceof PositionAttributeFilterFactory);
    assertTrue(ts.getTokenFilterFactories()[2] instanceof SirenPayloadFilterFactory);

    // no query analyzer
    assertNull(tmp.getQueryAnalyzer());
  }
View Full Code Here

  public void testSirenFieldDatatypeAnalyzer() throws Exception {
    final IndexSchema schema = h.getCore().getSchema();
    final SchemaField ntriple = schema.getField(JSON_FIELD);
    final FieldType tmp = ntriple.getType();

    TokenizerChain ts = (TokenizerChain) tmp.getAnalyzer();

    assertTrue(ts.getTokenFilterFactories()[0] instanceof DatatypeAnalyzerFilterFactory);
    final DatatypeAnalyzerFilterFactory f = (DatatypeAnalyzerFilterFactory) ts.getTokenFilterFactories()[0];
    assertNotNull(f.getDatatypeAnalyzers());
    assertEquals(9, f.getDatatypeAnalyzers().size());

    assertNotNull(f.getDatatypeAnalyzers().get("http://json.org/field"));
    ts = (TokenizerChain) f.getDatatypeAnalyzers().get("http://json.org/field");
    assertNotNull(ts.getTokenizerFactory());
    assertTrue(ts.getTokenizerFactory() instanceof WhitespaceTokenizerFactory);

    assertNotNull(f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#string"));
    ts = (TokenizerChain) f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#string");
    assertNotNull(ts.getTokenizerFactory());
    assertTrue(ts.getTokenizerFactory() instanceof UAX29URLEmailTokenizerFactory);

    assertNotNull(f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#int"));
    assertTrue(f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#int") instanceof IntNumericAnalyzer);
    final IntNumericAnalyzer a = (IntNumericAnalyzer) f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#int");
    assertEquals(8, a.getPrecisionStep());
View Full Code Here

          charFilterDef.params(), luceneMatchVersion
      );
      charFilters[index].init( mapOfParameters );
      injectResourceLoader( charFilters[index], defaultResourceLoader, mapOfParameters );
    }
    return new TokenizerChain( charFilters, tokenFactory, filters );
  }
View Full Code Here

          charFilterDef.params(), luceneMatchVersion
      );
      charFilters[index].init( mapOfParameters );
      injectResourceLoader( charFilters[index], defaultResourceLoader, mapOfParameters );
    }
    return new TokenizerChain( charFilters, tokenFactory, filters );
  }
View Full Code Here

          charFilterDef.params(), luceneMatchVersion
      );
      charFilters[index].init( mapOfParameters );
      injectResourceLoader( charFilters[index], defaultResourceLoader, mapOfParameters );
    }
    return new TokenizerChain( charFilters, tokenFactory, filters );
  }
View Full Code Here

TOP

Related Classes of org.apache.solr.analysis.TokenizerChain

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.