String in_tag = null;
Pattern plain_string_content = null;
while (source.hasMore())
{
MatchResult m = null;
if (state != State.in_special_tag && (m = source.scan(SPACE)) != null)
{
encoder.textToken(m.group(), TokenType.space);
}
else
{
switch (state)
{
case initial:
if ((m = source.scan(CDATA_START)) != null)
{
encoder.textToken(m.group(), TokenType.inline_delimiter);
if ((m = source.scan(CDATA_END)) != null)
{
encoder.textToken(m.group().substring(0, m.group().length() - 3), TokenType.plain);
encoder.textToken("]]>", TokenType.inline_delimiter);
}
else if ((m = source.scan(CDATA_ERROR)) != null)
{
encoder.textToken(m.group(), TokenType.error);
}
}
else if ((m = source.scan(COMMENT)) != null)
{
encoder.textToken(m.group(), TokenType.comment);
}
else if ((m = source.scan(DOCTYPE)) != null)
{
encoder.textToken(m.group(), TokenType.doctype);
}
else if ((m = source.scan(PRE_PROCESSOR)) != null)
{
encoder.textToken(m.group(), TokenType.preprocessor);
}
else if ((m = source.scan(COMMENT2)) != null)
{
encoder.textToken(m.group(), TokenType.comment);
}
else if ((m = source.scan(TAG)) != null)
{
in_tag = null;
encoder.textToken(m.group(), TokenType.tag);
}
else if ((m = source.scan(SPECIAL_TAG)) != null)
{
encoder.textToken(m.group(), TokenType.tag);
in_tag = m.group(1);
if (m.group(2) != null)
{
if (in_tag != null)
{
state = State.in_special_tag;
}
}
else
{
state = State.attribute;
}
}
else if ((m = source.scan(PLAIN)) != null)
{
encoder.textToken(m.group(), TokenType.plain);
}
else if ((m = source.scan(ENTITY)) != null)
{
encoder.textToken(m.group(), TokenType.entity);
}
else if ((m = source.scan(ERROR)) != null)
{
in_tag = null;
encoder.textToken(m.group(), TokenType.error);
}
else
{
throw new RuntimeException("[BUG] else-case reached with state " + state + " in " + getClass());
}
break;
case attribute:
if ((m = source.scan(TAG_END)) != null)
{
encoder.textToken(m.group(), TokenType.tag);
in_attribute = null;
if (in_tag != null)
{
state = State.in_special_tag;
}
else
{
state = State.initial;
}
}
else if ((m = source.scan(ATTR_NAME)) != null)
{
in_attribute = IN_ATTRIBUTE.lookup(m.group());
encoder.textToken(m.group(), TokenType.attribute_name);
state = State.attribute_equal;
}
else
{
in_tag = null;
encoder.textToken(source.next(), TokenType.error);
}
break;
case attribute_equal:
if ((m = source.scan(EQUAL)) != null)
{
encoder.textToken(m.group(), TokenType.operator);
state = State.attribute_value;
}
else
{
state = State.attribute;
break;
}
case attribute_value:
if ((m = source.scan(ATTR_NAME)) != null)
{
encoder.textToken(m.group(), TokenType.attribute_value);
state = State.attribute;
}
else if ((m = source.scan(QUOTE)) != null)
{
if (EmbeddedType.script == in_attribute || EmbeddedType.style == in_attribute)
{
encoder.beginGroup(TokenType.string);
encoder.textToken(m.group(), TokenType.delimiter);
String groupStart = m.group();
if ((m = source.scan(JAVASCRIPT_INLINE)) != null)
{
encoder.textToken(m.group(), TokenType.comment);
}
String code = source.scanUntil(Pattern.compile("(?=" + groupStart + "|\\z)")).group();
if (EmbeddedType.script == in_attribute)
{
Syntax.Builder.create()
.scannerType(JavaScriptScanner.TYPE.getName())
.encoder(encoder)
.execute(code);
}
else
{
Syntax.Builder.create()
.scannerType(CSSScanner.TYPE.getName())
.encoder(encoder)
.scannerOptions(
Options.create()
.add(CSSScanner.OPTION_START_STATE, CSSScanner.State.block))
.execute(code);
}
m = source.scan(QUOTE);
if (m != null)
{
encoder.textToken(m.group(), TokenType.delimiter);
}
encoder.endGroup(TokenType.string);
state = State.attribute;
in_attribute = null;
}
else
{
encoder.beginGroup(TokenType.string);
state = State.attribute_value_string;
plain_string_content = PLAIN_STRING_CONTENT.get(m.group());
encoder.textToken(m.group(), TokenType.delimiter);
}
}
else if ((m = source.scan(TAG_END)) != null)
{
encoder.textToken(m.group(), TokenType.tag);
state = State.initial;
}
else
{
encoder.textToken(source.next(), TokenType.error);
}
break;
case attribute_value_string:
if ((m = source.scan(plain_string_content)) != null)
{
encoder.textToken(m.group(), TokenType.content);
}
else if ((m = source.scan(QUOTE)) != null)
{
encoder.textToken(m.group(), TokenType.delimiter);
encoder.endGroup(TokenType.string);
state = State.attribute;
}
else if ((m = source.scan(ENTITY)) != null)
{
encoder.textToken(m.group(), TokenType.entity);
}
else if ((m = source.scan(AMP)) != null)
{
encoder.textToken(m.group(), TokenType.content);
}
else if ((m = source.scan(END)) != null)
{
encoder.endGroup(TokenType.string);
state = State.initial;
encoder.textToken(m.group(), TokenType.error);
}
break;
case in_special_tag:
if ("script".equalsIgnoreCase(in_tag) || "style".equalsIgnoreCase(in_tag))
{
String code = null;
String closing = null;
if ((m = source.scan(SPECIAL_SPACE)) != null)
{
encoder.textToken(m.group(), TokenType.space);
}
if ((m = source.scan(SPECIAL_COMMENT)) != null)
{
code = m.group(2);
if (code == null)
{
code = m.group(4);
}
closing = m.group(3);
encoder.textToken(m.group(1), TokenType.comment);
}
else
{
code = source.scanUntil("(?=(?:\\n\\s*)?<\\/" + in_tag + ">)|\\z").group();
closing = null;