Source Code of org.modeshape.jcr.CndTokenizerTest

/*
 * ModeShape (http://www.modeshape.org)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.modeshape.jcr;


import static org.hamcrest.core.Is.is;
import static org.junit.Assert.assertThat;
import java.util.LinkedList;
import org.junit.Before;
import org.junit.Test;
import org.modeshape.common.text.ParsingException;
import org.modeshape.common.text.Position;
import org.modeshape.common.text.TokenStream.CharacterArrayStream;
import org.modeshape.common.text.TokenStream.Tokens;


public class CndTokenizerTest {


    private CndTokenizer tokenizer;
    private Tokens tokenFactory;
    private LinkedList<int[]> tokenValues;


    @Before
    public void beforeEach() {
        tokenizer = new CndTokenizer(true, false);
        final LinkedList<int[]> tokenValues = new LinkedList<int[]>();
        tokenFactory = new Tokens() {
            @Override
            public void addToken( Position position,
                                  int index ) {
                int[] token = new int[] {index, index + 1, 0};
                tokenValues.add(token);
            }


            @Override
            public void addToken( Position position,
                                  int startIndex,
                                  int endIndex ) {
                int[] token = new int[] {startIndex, endIndex, 0};
                tokenValues.add(token);
            }


            @Override
            public void addToken( Position position,
                                  int startIndex,
                                  int endIndex,
                                  int type ) {
                int[] token = new int[] {startIndex, endIndex, type};
                tokenValues.add(token);
            }
        };
        this.tokenValues = tokenValues;
    }


    protected void tokenize( String input ) {
        tokenizer.tokenize(new CharacterArrayStream(input.toCharArray()), tokenFactory);
    }


    protected void assertNextTokenIs( int startIndex,
                                      int endIndex,
                                      int type ) {
        int[] token = tokenValues.removeFirst();
        assertThat(token[0], is(startIndex));
        assertThat(token[1], is(endIndex));
        assertThat(token[2], is(type));
    }


    protected void assertNoMoreTokens() {
        assertThat(tokenValues.isEmpty(), is(true));
    }


    @Test
    public void shouldCreateNoTokensForEmptyContent() {
        tokenize("");
        assertNoMoreTokens();
    }


    @Test
    public void shouldCreateNoTokensForContentWithOnlyWhitespace() {
        tokenize("  \t   \n   \r\n  \r  ");
        assertNoMoreTokens();
    }


    @Test
    public void shouldCreateTokenForEachSymbolCharacter() {
        String content = "[]<>=-+(),";
        int numSymbols = content.length();
        tokenize(content);
        for (int i = 0; i != numSymbols; ++i) {
            assertNextTokenIs(i, i + 1, CndTokenizer.SYMBOL);
        }
        assertNoMoreTokens();
    }


    @Test
    public void shouldNotIncludeColonInListOfSymbolsSinceTheyCanAppearInNames() {
        tokenizer = new CndTokenizer(true, true);
        String content = "dna:someName";
        tokenize(content);
        assertNextTokenIs(0, content.length(), CndTokenizer.WORD);
        assertNoMoreTokens();
    }


    @Test
    public void shouldCreateVendorExtensionToken() {
        tokenizer = new CndTokenizer(true, true);
        String content = "{vendor extension}";
        tokenize(content);
        assertNextTokenIs(0, content.length(), CndTokenizer.VENDOR_EXTENSION);
        assertNoMoreTokens();
    }


    @Test
    public void shouldNotCreateVendorExtensionTokenIfTokenizerIsNotUsingThem() {
        tokenizer = new CndTokenizer(true, false);
        String content = "{vendor extension}";
        tokenize(content);
        assertNoMoreTokens();
    }


    @Test
    public void shouldCreateTokenForEndOfLineComment() {
        String content = "--//this is a comment\n";
        tokenize(content);
        assertNextTokenIs(0, 1, CndTokenizer.SYMBOL);
        assertNextTokenIs(1, 2, CndTokenizer.SYMBOL);
        assertNextTokenIs(2, content.length() - 1, CndTokenizer.COMMENT); // -1 because '\n' is not included
        assertNoMoreTokens();
    }


    @Test
    public void shouldCreateTokenForEndOfLineCommentThatEndsWithEndOfString() {
        String content = "--//this is a comment";
        tokenize(content);
        assertNextTokenIs(0, 1, CndTokenizer.SYMBOL);
        assertNextTokenIs(1, 2, CndTokenizer.SYMBOL);
        assertNextTokenIs(2, content.length(), CndTokenizer.COMMENT);
        assertNoMoreTokens();
    }


    @Test
    public void shouldCreateTokenForMultiLineComment() {
        String content = "--/*this is a comment*/-";
        tokenize(content);
        assertNextTokenIs(0, 1, CndTokenizer.SYMBOL);
        assertNextTokenIs(1, 2, CndTokenizer.SYMBOL);
        assertNextTokenIs(2, content.length() - 1, CndTokenizer.COMMENT);
        assertNextTokenIs(content.length() - 1, content.length(), CndTokenizer.SYMBOL);
        assertNoMoreTokens();
    }


    @Test
    public void shouldCreateTokenForMultiLineCommentAtEndOfContent() {
        String content = "--/*this is a comment*/";
        tokenize(content);
        assertNextTokenIs(0, 1, CndTokenizer.SYMBOL);
        assertNextTokenIs(1, 2, CndTokenizer.SYMBOL);
        assertNextTokenIs(2, content.length(), CndTokenizer.COMMENT);
        assertNoMoreTokens();
    }


    @Test
    public void shouldCreateTokenForMultiLineCommentWithoutTerminatingCharacters() {
        String content = "--/*this is a comment";
        tokenize(content);
        assertNextTokenIs(0, 1, CndTokenizer.SYMBOL);
        assertNextTokenIs(1, 2, CndTokenizer.SYMBOL);
        assertNextTokenIs(2, content.length(), CndTokenizer.COMMENT);
        assertNoMoreTokens();
    }


    @Test
    public void shouldCreateTokenForMultiLineCommentWithoutAllTerminatingCharacters() {
        String content = "--/*this is a comment*";
        tokenize(content);
        assertNextTokenIs(0, 1, CndTokenizer.SYMBOL);
        assertNextTokenIs(1, 2, CndTokenizer.SYMBOL);
        assertNextTokenIs(2, content.length(), CndTokenizer.COMMENT);
        assertNoMoreTokens();
    }


    @Test
    public void shouldCreateTokenForSingleQuotedString() {
        String content = "--'this is a single-quoted \n string'-";
        assertThat(content.charAt(2), is('\''));
        assertThat(content.charAt(35), is('\''));
        tokenize(content);
        assertNextTokenIs(0, 1, CndTokenizer.SYMBOL);
        assertNextTokenIs(1, 2, CndTokenizer.SYMBOL);
        assertNextTokenIs(2, 36, CndTokenizer.SINGLE_QUOTED_STRING);
        assertNextTokenIs(36, 37, CndTokenizer.SYMBOL);
        assertNoMoreTokens();
    }


    @Test
    public void shouldCreateTokenForSingleQuotedStringWithEscapedSingleQuoteCharacters() {
        String content = "--'this \"is\" a \\'single-quoted\\' \n string'-";
        assertThat(content.charAt(2), is('\''));
        assertThat(content.charAt(41), is('\''));
        tokenize(content);
        assertNextTokenIs(0, 1, CndTokenizer.SYMBOL);
        assertNextTokenIs(1, 2, CndTokenizer.SYMBOL);
        assertNextTokenIs(2, 42, CndTokenizer.SINGLE_QUOTED_STRING);
        assertNextTokenIs(42, 43, CndTokenizer.SYMBOL);
        assertNoMoreTokens();
    }


    @Test
    public void shouldCreateTokenForSingleQuotedStringAtEndOfContent() {
        String content = "--'this is a single-quoted \n string'";
        assertThat(content.charAt(2), is('\''));
        assertThat(content.charAt(35), is('\''));
        tokenize(content);
        assertNextTokenIs(0, 1, CndTokenizer.SYMBOL);
        assertNextTokenIs(1, 2, CndTokenizer.SYMBOL);
        assertNextTokenIs(2, 36, CndTokenizer.SINGLE_QUOTED_STRING);
        assertNoMoreTokens();
    }


    @Test( expected = ParsingException.class )
    public void shouldCreateTokenForSingleQuotedStringWithoutClosingQuote() {
        String content = "--'this is a single-quoted \n string";
        tokenize(content);
    }


    @Test
    public void shouldCreateTokenForDoubleQuotedString() {
        String content = "--\"this is a double-quoted \n string\"-";
        assertThat(content.charAt(2), is('"'));
        assertThat(content.charAt(35), is('"'));
        tokenize(content);
        assertNextTokenIs(0, 1, CndTokenizer.SYMBOL);
        assertNextTokenIs(1, 2, CndTokenizer.SYMBOL);
        assertNextTokenIs(2, 36, CndTokenizer.DOUBLE_QUOTED_STRING);
        assertNextTokenIs(36, 37, CndTokenizer.SYMBOL);
        assertNoMoreTokens();
    }


    @Test
    public void shouldCreateTokenForDoubleQuotedStringWithEscapedDoubleQuoteCharacters() {
        String content = "--\"this 'is' a \\\"double-quoted\\\" \n string\"-";
        assertThat(content.charAt(2), is('"'));
        assertThat(content.charAt(41), is('"'));
        tokenize(content);
        assertNextTokenIs(0, 1, CndTokenizer.SYMBOL);
        assertNextTokenIs(1, 2, CndTokenizer.SYMBOL);
        assertNextTokenIs(2, 42, CndTokenizer.DOUBLE_QUOTED_STRING);
        assertNextTokenIs(42, 43, CndTokenizer.SYMBOL);
        assertNoMoreTokens();
    }


    @Test
    public void shouldCreateTokenForDoubleQuotedStringAtEndOfContent() {
        String content = "--\"this is a double-quoted \n string\"";
        assertThat(content.charAt(2), is('"'));
        assertThat(content.charAt(35), is('"'));
        tokenize(content);
        assertNextTokenIs(0, 1, CndTokenizer.SYMBOL);
        assertNextTokenIs(1, 2, CndTokenizer.SYMBOL);
        assertNextTokenIs(2, 36, CndTokenizer.DOUBLE_QUOTED_STRING);
        assertNoMoreTokens();
    }


    @Test( expected = ParsingException.class )
    public void shouldCreateTokenForDoubleQuotedStringWithoutClosingQuote() {
        String content = "--\"this is a double-quoted \n string";
        tokenize(content);
    }


    @Test
    public void shouldCreateTokensForWordsWithAlphabeticCharacters() {
        String content = "This is a series of words.";
        tokenize(content);
        assertNextTokenIs(0, 4, CndTokenizer.WORD);
        assertNextTokenIs(5, 7, CndTokenizer.WORD);
        assertNextTokenIs(8, 9, CndTokenizer.WORD);
        assertNextTokenIs(10, 16, CndTokenizer.WORD);
        assertNextTokenIs(17, 19, CndTokenizer.WORD);
        assertNextTokenIs(20, 26, CndTokenizer.WORD);
        assertNoMoreTokens();
    }


    @Test
    public void shouldCreateTokensForWordsWithNumericCharacters() {
        String content = "1234 4 5353.324";
        tokenize(content);
        assertNextTokenIs(0, 4, CndTokenizer.WORD);
        assertNextTokenIs(5, 6, CndTokenizer.WORD);
        assertNextTokenIs(7, 15, CndTokenizer.WORD);
        assertNoMoreTokens();
    }


    @Test
    public void shouldCreateTokensForWordsWithAlphaNumericCharacters() {
        String content = "123a 5353.324e100";
        tokenize(content);
        assertNextTokenIs(0, 4, CndTokenizer.WORD);
        assertNextTokenIs(5, 17, CndTokenizer.WORD);
        assertNoMoreTokens();
    }
}
Source Code of org.modeshape.jcr.CndTokenizerTest

Related Classes of org.modeshape.jcr.CndTokenizerTest