/*
* ModeShape (http://www.modeshape.org)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.modeshape.jcr;
import static org.hamcrest.core.Is.is;
import static org.junit.Assert.assertThat;
import java.util.LinkedList;
import org.junit.Before;
import org.junit.Test;
import org.modeshape.common.text.ParsingException;
import org.modeshape.common.text.Position;
import org.modeshape.common.text.TokenStream.CharacterArrayStream;
import org.modeshape.common.text.TokenStream.Tokens;
public class CndTokenizerTest {
private CndTokenizer tokenizer;
private Tokens tokenFactory;
private LinkedList<int[]> tokenValues;
@Before
public void beforeEach() {
tokenizer = new CndTokenizer(true, false);
final LinkedList<int[]> tokenValues = new LinkedList<int[]>();
tokenFactory = new Tokens() {
@Override
public void addToken( Position position,
int index ) {
int[] token = new int[] {index, index + 1, 0};
tokenValues.add(token);
}
@Override
public void addToken( Position position,
int startIndex,
int endIndex ) {
int[] token = new int[] {startIndex, endIndex, 0};
tokenValues.add(token);
}
@Override
public void addToken( Position position,
int startIndex,
int endIndex,
int type ) {
int[] token = new int[] {startIndex, endIndex, type};
tokenValues.add(token);
}
};
this.tokenValues = tokenValues;
}
protected void tokenize( String input ) {
tokenizer.tokenize(new CharacterArrayStream(input.toCharArray()), tokenFactory);
}
protected void assertNextTokenIs( int startIndex,
int endIndex,
int type ) {
int[] token = tokenValues.removeFirst();
assertThat(token[0], is(startIndex));
assertThat(token[1], is(endIndex));
assertThat(token[2], is(type));
}
protected void assertNoMoreTokens() {
assertThat(tokenValues.isEmpty(), is(true));
}
@Test
public void shouldCreateNoTokensForEmptyContent() {
tokenize("");
assertNoMoreTokens();
}
@Test
public void shouldCreateNoTokensForContentWithOnlyWhitespace() {
tokenize(" \t \n \r\n \r ");
assertNoMoreTokens();
}
@Test
public void shouldCreateTokenForEachSymbolCharacter() {
String content = "[]<>=-+(),";
int numSymbols = content.length();
tokenize(content);
for (int i = 0; i != numSymbols; ++i) {
assertNextTokenIs(i, i + 1, CndTokenizer.SYMBOL);
}
assertNoMoreTokens();
}
@Test
public void shouldNotIncludeColonInListOfSymbolsSinceTheyCanAppearInNames() {
tokenizer = new CndTokenizer(true, true);
String content = "dna:someName";
tokenize(content);
assertNextTokenIs(0, content.length(), CndTokenizer.WORD);
assertNoMoreTokens();
}
@Test
public void shouldCreateVendorExtensionToken() {
tokenizer = new CndTokenizer(true, true);
String content = "{vendor extension}";
tokenize(content);
assertNextTokenIs(0, content.length(), CndTokenizer.VENDOR_EXTENSION);
assertNoMoreTokens();
}
@Test
public void shouldNotCreateVendorExtensionTokenIfTokenizerIsNotUsingThem() {
tokenizer = new CndTokenizer(true, false);
String content = "{vendor extension}";
tokenize(content);
assertNoMoreTokens();
}
@Test
public void shouldCreateTokenForEndOfLineComment() {
String content = "--//this is a comment\n";
tokenize(content);
assertNextTokenIs(0, 1, CndTokenizer.SYMBOL);
assertNextTokenIs(1, 2, CndTokenizer.SYMBOL);
assertNextTokenIs(2, content.length() - 1, CndTokenizer.COMMENT); // -1 because '\n' is not included
assertNoMoreTokens();
}
@Test
public void shouldCreateTokenForEndOfLineCommentThatEndsWithEndOfString() {
String content = "--//this is a comment";
tokenize(content);
assertNextTokenIs(0, 1, CndTokenizer.SYMBOL);
assertNextTokenIs(1, 2, CndTokenizer.SYMBOL);
assertNextTokenIs(2, content.length(), CndTokenizer.COMMENT);
assertNoMoreTokens();
}
@Test
public void shouldCreateTokenForMultiLineComment() {
String content = "--/*this is a comment*/-";
tokenize(content);
assertNextTokenIs(0, 1, CndTokenizer.SYMBOL);
assertNextTokenIs(1, 2, CndTokenizer.SYMBOL);
assertNextTokenIs(2, content.length() - 1, CndTokenizer.COMMENT);
assertNextTokenIs(content.length() - 1, content.length(), CndTokenizer.SYMBOL);
assertNoMoreTokens();
}
@Test
public void shouldCreateTokenForMultiLineCommentAtEndOfContent() {
String content = "--/*this is a comment*/";
tokenize(content);
assertNextTokenIs(0, 1, CndTokenizer.SYMBOL);
assertNextTokenIs(1, 2, CndTokenizer.SYMBOL);
assertNextTokenIs(2, content.length(), CndTokenizer.COMMENT);
assertNoMoreTokens();
}
@Test
public void shouldCreateTokenForMultiLineCommentWithoutTerminatingCharacters() {
String content = "--/*this is a comment";
tokenize(content);
assertNextTokenIs(0, 1, CndTokenizer.SYMBOL);
assertNextTokenIs(1, 2, CndTokenizer.SYMBOL);
assertNextTokenIs(2, content.length(), CndTokenizer.COMMENT);
assertNoMoreTokens();
}
@Test
public void shouldCreateTokenForMultiLineCommentWithoutAllTerminatingCharacters() {
String content = "--/*this is a comment*";
tokenize(content);
assertNextTokenIs(0, 1, CndTokenizer.SYMBOL);
assertNextTokenIs(1, 2, CndTokenizer.SYMBOL);
assertNextTokenIs(2, content.length(), CndTokenizer.COMMENT);
assertNoMoreTokens();
}
@Test
public void shouldCreateTokenForSingleQuotedString() {
String content = "--'this is a single-quoted \n string'-";
assertThat(content.charAt(2), is('\''));
assertThat(content.charAt(35), is('\''));
tokenize(content);
assertNextTokenIs(0, 1, CndTokenizer.SYMBOL);
assertNextTokenIs(1, 2, CndTokenizer.SYMBOL);
assertNextTokenIs(2, 36, CndTokenizer.SINGLE_QUOTED_STRING);
assertNextTokenIs(36, 37, CndTokenizer.SYMBOL);
assertNoMoreTokens();
}
@Test
public void shouldCreateTokenForSingleQuotedStringWithEscapedSingleQuoteCharacters() {
String content = "--'this \"is\" a \\'single-quoted\\' \n string'-";
assertThat(content.charAt(2), is('\''));
assertThat(content.charAt(41), is('\''));
tokenize(content);
assertNextTokenIs(0, 1, CndTokenizer.SYMBOL);
assertNextTokenIs(1, 2, CndTokenizer.SYMBOL);
assertNextTokenIs(2, 42, CndTokenizer.SINGLE_QUOTED_STRING);
assertNextTokenIs(42, 43, CndTokenizer.SYMBOL);
assertNoMoreTokens();
}
@Test
public void shouldCreateTokenForSingleQuotedStringAtEndOfContent() {
String content = "--'this is a single-quoted \n string'";
assertThat(content.charAt(2), is('\''));
assertThat(content.charAt(35), is('\''));
tokenize(content);
assertNextTokenIs(0, 1, CndTokenizer.SYMBOL);
assertNextTokenIs(1, 2, CndTokenizer.SYMBOL);
assertNextTokenIs(2, 36, CndTokenizer.SINGLE_QUOTED_STRING);
assertNoMoreTokens();
}
@Test( expected = ParsingException.class )
public void shouldCreateTokenForSingleQuotedStringWithoutClosingQuote() {
String content = "--'this is a single-quoted \n string";
tokenize(content);
}
@Test
public void shouldCreateTokenForDoubleQuotedString() {
String content = "--\"this is a double-quoted \n string\"-";
assertThat(content.charAt(2), is('"'));
assertThat(content.charAt(35), is('"'));
tokenize(content);
assertNextTokenIs(0, 1, CndTokenizer.SYMBOL);
assertNextTokenIs(1, 2, CndTokenizer.SYMBOL);
assertNextTokenIs(2, 36, CndTokenizer.DOUBLE_QUOTED_STRING);
assertNextTokenIs(36, 37, CndTokenizer.SYMBOL);
assertNoMoreTokens();
}
@Test
public void shouldCreateTokenForDoubleQuotedStringWithEscapedDoubleQuoteCharacters() {
String content = "--\"this 'is' a \\\"double-quoted\\\" \n string\"-";
assertThat(content.charAt(2), is('"'));
assertThat(content.charAt(41), is('"'));
tokenize(content);
assertNextTokenIs(0, 1, CndTokenizer.SYMBOL);
assertNextTokenIs(1, 2, CndTokenizer.SYMBOL);
assertNextTokenIs(2, 42, CndTokenizer.DOUBLE_QUOTED_STRING);
assertNextTokenIs(42, 43, CndTokenizer.SYMBOL);
assertNoMoreTokens();
}
@Test
public void shouldCreateTokenForDoubleQuotedStringAtEndOfContent() {
String content = "--\"this is a double-quoted \n string\"";
assertThat(content.charAt(2), is('"'));
assertThat(content.charAt(35), is('"'));
tokenize(content);
assertNextTokenIs(0, 1, CndTokenizer.SYMBOL);
assertNextTokenIs(1, 2, CndTokenizer.SYMBOL);
assertNextTokenIs(2, 36, CndTokenizer.DOUBLE_QUOTED_STRING);
assertNoMoreTokens();
}
@Test( expected = ParsingException.class )
public void shouldCreateTokenForDoubleQuotedStringWithoutClosingQuote() {
String content = "--\"this is a double-quoted \n string";
tokenize(content);
}
@Test
public void shouldCreateTokensForWordsWithAlphabeticCharacters() {
String content = "This is a series of words.";
tokenize(content);
assertNextTokenIs(0, 4, CndTokenizer.WORD);
assertNextTokenIs(5, 7, CndTokenizer.WORD);
assertNextTokenIs(8, 9, CndTokenizer.WORD);
assertNextTokenIs(10, 16, CndTokenizer.WORD);
assertNextTokenIs(17, 19, CndTokenizer.WORD);
assertNextTokenIs(20, 26, CndTokenizer.WORD);
assertNoMoreTokens();
}
@Test
public void shouldCreateTokensForWordsWithNumericCharacters() {
String content = "1234 4 5353.324";
tokenize(content);
assertNextTokenIs(0, 4, CndTokenizer.WORD);
assertNextTokenIs(5, 6, CndTokenizer.WORD);
assertNextTokenIs(7, 15, CndTokenizer.WORD);
assertNoMoreTokens();
}
@Test
public void shouldCreateTokensForWordsWithAlphaNumericCharacters() {
String content = "123a 5353.324e100";
tokenize(content);
assertNextTokenIs(0, 4, CndTokenizer.WORD);
assertNextTokenIs(5, 17, CndTokenizer.WORD);
assertNoMoreTokens();
}
}