/*
* (c) Copyright 2005 Hewlett-Packard Development Company, LP
* [See end of file]
*/
package com.hp.hpl.jena.iri.impl;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.hp.hpl.jena.iri.ViolationCodes;
public class ComponentPatternParser implements ViolationCodes {
static String separators = "([(](?![?]))|([(][?])|([)])|(\\[)|(\\])|([@][{])|([}]|[a-z]-[a-z])";
static final int OPEN_PAREN = 1;
static final int OPEN_NON_CAPTURING_PAREN = 2;
static final int CLOSE_PAREN = 3;
static final int OPEN_SQ = 4;
static final int CLOSE_SQ = 5;
static final int OPEN_VAR = 6;
static final int CLOSE_BRACE = 7;
static final int LOWER_CASE_RANGE = 8;
static final int OTHER = -1;
static final Pattern keyword = Pattern.compile(separators);
/* .NET port does not like this. Reworked.
*
static final Pattern splitter = Pattern.compile("(?=" + separators
+ ")|(?<=" + separators + ")");
public ComponentPatternParser(String p) {
split = splitter.split(p);
field = 0;
classify = new int[split.length];
for (int i = 0; i < split.length; i++)
classify[i] = classify(split[i]);
while (field < split.length)
next();
// System.err.println(p + " ==> "+ rslt.toString());
pattern = Pattern.compile(rslt.toString());
}
*/
// working data
final String split[];
final int classify[];
int field;
int groupCount;
// result data
final StringBuffer rslt = new StringBuffer();
int shouldLowerCase;
int mustLowerCase;
int hostNames;
final Pattern pattern;
static final String emptyStringArray[] = new String[0];
static private String[] mySplit(String p) {
//return splitter.split(p);
Matcher m = keyword.matcher(p);
ArrayList rslt = new ArrayList();
int pos = 0;
// rslt.add("");
while (m.find()) {
if (m.start()>pos || pos==0) {
rslt.add(p.substring(pos,m.start()));
}
rslt.add(p.substring(m.start(),m.end()));
pos = m.end();
}
if (pos < p.length())
rslt.add(p.substring(pos));
// m.
// String preSplit[] = keyword.split(p);
// String rslt[] = new String[preSplit.length*2];
return (String[])rslt.toArray(emptyStringArray);
}
// static private String[] mySplitx(String p) {
// String r[] = mySplit(p);
// String s[] = splitter.split(p);
// if (r.length!=s.length) {
// System.err.println("Bad lengths: "+p+","+r.length+","+s.length);
// }
// for (int i=0;i<r.length && i <s.length;i++)
// if (!r[i].equals(s[i]))
// System.err.println("Bad component: "+p+","+r[i]+","+s[i]);
// return r;
//
//
// }
// end result data
public ComponentPatternParser(String p) {
split = mySplit(p);
field = 0;
classify = new int[split.length];
for (int i = 0; i < split.length; i++)
classify[i] = classify(split[i]);
while (field < split.length)
next();
// System.err.println(p + " ==> "+ rslt.toString());
pattern = Pattern.compile(rslt.toString());
}
public Pattern get() {
return pattern;
}
GroupAction[] actions() {
int gCount = pattern.matcher("").groupCount()+1;
GroupAction result[] = new GroupAction[gCount];
for (int i=1;i<gCount;i++) {
int g = 1<<i;
if ((mustLowerCase & g)!=0)
result[i] = new ErrorAction(SCHEME_REQUIRES_LOWERCASE);
else if ((shouldLowerCase & g)!=0)
result[i] = new ErrorAction(SCHEME_PREFERS_LOWERCASE);
else if ((hostNames & g)!=0)
result[i] = new HostAction(i);
else
result[i] = GroupAction.NoAction;
}
return result;
}
private int classify(String string) {
Matcher m = keyword.matcher(string);
if (!m.matches())
return OTHER;
for (int i = 1; i <= m.groupCount(); i++)
if (m.start(i) != -1)
return i;
throw new IllegalStateException(
"IRI code internal error: no group matched.");
}
private void untilCloseSq() {
while (classify[field - 1] != CLOSE_SQ) {
if (field >= split.length)
throw new IllegalArgumentException(
"Internal IRI code error. Did not find CLOSE_SQ in until().");
add();
}
}
private void next() {
switch (classify[field]) {
case CLOSE_SQ:
throw new IllegalArgumentException(
"Found unexpected ], either pattern syntax error, or limitation of IRI code.");
case OPEN_SQ:
add();
untilCloseSq();
break;
case OPEN_VAR:
field++;
rslt.append("(");
groupCount++;
if (split[field].equals("host")) {
addHost();
} else {
if (split[field].equals("shouldLowerCase")) {
shouldLowerCase |= (1 << groupCount);
} else if (split[field].equals("mustLowerCase")) {
mustLowerCase |= (1 << groupCount);
} else {
throw new IllegalArgumentException("No macro: "
+ split[field]);
}
addLowerCase();
}
break;
case OPEN_PAREN:
groupCount++;
// fall through
case OPEN_NON_CAPTURING_PAREN:
case CLOSE_PAREN:
case CLOSE_BRACE:
case LOWER_CASE_RANGE:
case OTHER:
add();
return;
default:
throw new IllegalStateException("IRI code internal error.");
}
}
private void addLowerCase() {
int sqCount=0;
field++;
if (classify[field]!=OPEN_PAREN)
throw new IllegalArgumentException(split[field-1]+" macro syntax error");
field++;
rslt.append("?:(?:"); // make group non-capturing.
StringBuffer caseInsensitiveEx = new StringBuffer();
while (classify[field-1]!=CLOSE_PAREN || sqCount>0 ) {
if (field >= split.length)
throw new IllegalArgumentException(
"Internal IRI code error. Did not find CLOSE_PAREN in addLowerCase().");
switch (classify[field]) {
case OPEN_SQ:
sqCount++;
caseInsensitiveEx.append('[');
break;
case CLOSE_SQ:
sqCount--;
caseInsensitiveEx.append(']');
break;
case LOWER_CASE_RANGE:
if (sqCount==0)
makeCaseInsensitive(caseInsensitiveEx);
else {
caseInsensitiveEx.append(split[field]);
caseInsensitiveEx.append((char)(split[field].charAt(0)-'a'+'A'));
caseInsensitiveEx.append('-');
caseInsensitiveEx.append((char)(split[field].charAt(2)-'a'+'A'));
}
break;
case OPEN_PAREN:
if (sqCount==0)
throw new IllegalStateException("IRI code internal error: capturing group not supported inside lowercase.");
// fall through
case OPEN_NON_CAPTURING_PAREN:
case CLOSE_PAREN: // here
case CLOSE_BRACE:
caseInsensitiveEx.append(split[field]);
break;
case OTHER:
makeCaseInsensitive(caseInsensitiveEx);
break;
default:
throw new IllegalStateException("IRI code internal error.");
}
add();
}
if (classify[field]!=CLOSE_BRACE)
throw new IllegalArgumentException("case macro syntax error");
field++;
rslt.append("|("); // start capturing group
rslt.append(caseInsensitiveEx);
rslt.append(")");
}
private void makeCaseInsensitive(StringBuffer caseInsensitiveEx) {
for (int i=0;i<split[field].length();i++) {
char c = split[field].charAt(i);
if (c>='a' && c<='z') {
caseInsensitiveEx.append('[');
caseInsensitiveEx.append(c);
caseInsensitiveEx.append((char)(c-'a'+'A'));
caseInsensitiveEx.append(']');
}
}
}
private void addHost() {
hostNames |= (1 << groupCount);
field++;
if (classify[field]!=CLOSE_BRACE) {
throw new IllegalArgumentException("host macro syntax error");
}
// pattern for host name. A sequence of chars that are not reserved.
// or an IP v6 or future address which starts and ends with [ ] and may
// include :.
rslt.append("[^\\[\\]:/?#@!$&'()*+,;=]*|\\[[^\\[\\]/?#@!$&'()*+,;=]*\\])");
field++;
}
private void add() {
rslt.append(split[field]);
field++;
}
public String toString() {
return pattern.pattern();
}
}
/*
* (c) Copyright 2005 Hewlett-Packard Development Company, LP All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer. 2. Redistributions in
* binary form must reproduce the above copyright notice, this list of
* conditions and the following disclaimer in the documentation and/or other
* materials provided with the distribution. 3. The name of the author may not
* be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
* EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/