Examples of TokenFilter


Examples of edu.ucla.sspace.text.TokenFilter

     * with optional configuration file, if it is not {@code null}, and any
     * {@link TokenFilter}s or {@link Stemmer}s that have been specified by the
     * command line.
     */
    protected void setupDependencyExtractor() {
        TokenFilter filter = (argOptions.hasOption("tokenFilter"))
            ? TokenFilter.loadFromSpecification(argOptions.getStringOption('F'))
            : null;

        Stemmer stemmer = argOptions.getObjectOption("stemmingAlgorithm", null);
        String format = argOptions.getStringOption(
View Full Code Here

Examples of edu.ucla.sspace.text.TokenFilter

        // Extract key arguments.
        boolean doLowerCasing = options.hasOption("lowerCase");
        boolean doPos = options.hasOption("partOfSpeech");
        boolean discardHeader = options.hasOption('H');

        TokenFilter filter = (options.hasOption("tokenFilter"))
            ? TokenFilter.loadFromSpecification(options.getStringOption('F'))
            : null;

        Stemmer stemmer = options.getObjectOption("stemmingAlgorithm", null);
View Full Code Here

Examples of edu.ucla.sspace.text.TokenFilter

        // Setup logging.
        if (options.hasOption("verbose"))
            LoggerUtil.setLevel(Level.FINE);

        TokenFilter filter = (options.hasOption("tokenFilter"))
            ? TokenFilter.loadFromSpecification(options.getStringOption('F'))
            : null;

        // setup the dependency extractor.
        DependencyExtractor e = new CoNLLDependencyExtractor(filter, null);
View Full Code Here

Examples of edu.ucla.sspace.text.TokenFilter

        // Setup logging.
        if (options.hasOption("verbose"))
            LoggerUtil.setLevel(Level.FINE);

        TokenFilter filter = (options.hasOption("tokenFilter"))
            ? TokenFilter.loadFromSpecification(options.getStringOption('F'))
            : null;

        // setup the dependency extractor.
        DependencyExtractor e = new CoNLLDependencyExtractor(filter, null);
View Full Code Here

Examples of org.apache.lucene.analysis.TokenFilter

  public void testTokenReuse() throws IOException {
    Analyzer analyzer = new Analyzer() {
      @Override
      public TokenStream tokenStream(String fieldName, Reader reader) {
        return new TokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader)) {
          boolean first=true;
          AttributeSource.State state;

          @Override
          public boolean incrementToken() throws IOException {
View Full Code Here

Examples of org.apache.lucene.analysis.TokenFilter

      throw new IllegalArgumentException("logStream must not be null");

    return new Analyzer() {
      @Override
      public TokenStream tokenStream(final String fieldName, Reader reader) {
        return new TokenFilter(child.tokenStream(fieldName, reader)) {
          private int position = -1;
          private TermAttribute termAtt = addAttribute(TermAttribute.class);
          private PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
          private OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
          private TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
View Full Code Here

Examples of org.apache.lucene.analysis.TokenFilter

      return child; // no need to wrap
 
    return new Analyzer() {
      @Override
      public TokenStream tokenStream(String fieldName, Reader reader) {
        return new TokenFilter(child.tokenStream(fieldName, reader)) {
          private int todo = maxTokens;
         
          @Override
          public boolean incrementToken() throws IOException {
            return --todo >= 0 ? input.incrementToken() : false;
View Full Code Here

Examples of org.apache.lucene.analysis.TokenFilter

      @Override
      public TokenStream tokenStream(String fieldName, Reader reader) {
        final ArrayList<AttributeSource.State> tokens = cache.get(fieldName);
        if (tokens == null) { // not yet cached
          final ArrayList<AttributeSource.State> tokens2 = new ArrayList<AttributeSource.State>();
          TokenStream tokenStream = new TokenFilter(child.tokenStream(fieldName, reader)) {

            @Override
            public boolean incrementToken() throws IOException {
              boolean hasNext = input.incrementToken();
              if (hasNext) tokens2.add(captureState());
View Full Code Here

Examples of org.apache.lucene.analysis.TokenFilter

  public void testTokenReuse() throws IOException {
    Analyzer analyzer = new Analyzer() {
      @Override
      public TokenStream tokenStream(String fieldName, Reader reader) {
        return new TokenFilter(new WhitespaceTokenizer(reader)) {
          boolean first=true;
          AttributeSource.State state;

          @Override
          public boolean incrementToken() throws IOException {
View Full Code Here

Examples of org.apache.lucene.analysis.TokenFilter

  public void testElision() throws Exception {
    String test = "Plop, juste pour voir l'embrouille avec O'brian. M'enfin.";
    Tokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(test));
    CharArraySet articles = new CharArraySet(TEST_VERSION_CURRENT, asSet("l", "M"), false);
    TokenFilter filter = new ElisionFilter(tokenizer, articles);
    List<String> tas = filter(filter);
    assertEquals("embrouille", tas.get(4));
    assertEquals("O'brian", tas.get(6));
    assertEquals("enfin", tas.get(7));
  }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.