/* The contents of this file are subject to the license and copyright terms
* detailed in the license directory at the root of the source tree (also
* available online at http://fedora-commons.org/license/).
*/
package org.fcrepo.common;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URI;
import java.net.URISyntaxException;
import org.fcrepo.common.rdf.SimpleURIReference;
/**
* A persistent identifier for Fedora digital objects.
*
* <p>The following describes the syntactic constraints for PIDs in normalized
* form. The only differences with non-normalized PIDs are that the colon
* delimiter may be encoded as "%3a" or "%3A", and hex-digits may use lowercase
* [a-f].
*
* <pre>
* PID:
* Length : maximum 64
* Syntax : namespace-id ":" object-id
*
* namespace-id:
* Syntax : ( [A-Z] / [a-z] / [0-9] / "-" / "." ) 1+
*
* object-id:
* Syntax : ( [A-Z] / [a-z] / [0-9] / "-" / "." / "~" / "_" / escaped-octet ) 1+
*
* escaped-octet:
* Syntax : "%" hex-digit hex-digit
*
* hex-digit:
* Syntax : [0-9] / [A-F]
* </pre>
*
* @author Chris Wilper
*/
public class PID {
/** The maximum length of a PID is 64. */
public static final int MAX_LENGTH = 64;
/** The reserved handle namespace id * */
public static final String NS_HANDLE = "hdl";
private final String m_normalized;
private final String m_namespaceId;
private final String m_objectId;
private String m_filename;
/**
* Construct a PID from a string, throwing a MalformedPIDException if it's
* not well-formed.
*/
public PID(String pidString)
throws MalformedPIDException {
if (pidString.startsWith(Constants.FEDORA.uri)) {
pidString = pidString.substring(Constants.FEDORA.uri.length());
}
m_normalized = normalize(pidString);
String[] split = m_normalized.split(":");
m_namespaceId = split[0];
m_objectId = split[1];
}
/**
* Alternate constructor that throws an unchecked exception if it's not
* well-formed.
*/
public static PID getInstance(String pidString) {
try {
return new PID(pidString);
} catch (MalformedPIDException e) {
throw new FaultException("Malformed PID: " + e.getMessage(), e);
}
}
/**
* Construct a PID given a filename of the form produced by toFilename(),
* throwing a MalformedPIDException if it's not well-formed.
*/
public static PID fromFilename(String filenameString)
throws MalformedPIDException {
String decoded = filenameString.replaceFirst("_", ":");
if (decoded.endsWith("%")) {
decoded = decoded.substring(0, decoded.length() - 1) + ".";
}
return new PID(decoded);
}
/**
* Return the normalized form of the given pid string, or throw a
* MalformedPIDException.
*/
public static String normalize(String pidString)
throws MalformedPIDException {
if (pidString == null) {
throw new MalformedPIDException("PID is null.");
}
// Then normalize while checking syntax
StringBuffer out = new StringBuffer();
boolean inObjectID = false;
for (int i = 0; i < pidString.length(); i++) {
char c = pidString.charAt(i);
if (!inObjectID) {
if (c == ':') {
out.append(':');
inObjectID = true;
} else if (c == '%') {
// next 2 chars MUST be 3[aA]
if (pidString.length() >= i + 3) {
i++;
if (pidString.charAt(i) == '3') {
i++;
c = pidString.charAt(i);
if (c == 'a' || c == 'A') {
out.append(":");
inObjectID = true;
} else {
throw new MalformedPIDException("Error in PID after first '%': expected '3a' or '3A', but saw '3"
+ c + "'.");
}
} else {
throw new MalformedPIDException("Error in PID after first '%': expected '3a' or '3A', but saw '"
+ pidString.substring(i, i + 2) + "'.");
}
} else {
throw new MalformedPIDException("Error in PID after first '%': expected '3a' or '3A', but saw '"
+ pidString.substring(i + 1) + "'.");
}
} else if (isAlphaNum(c) || c == '-' || c == '.') {
out.append(c);
} else {
// invalid char for namespace-id
throw new MalformedPIDException("PID namespace-id cannot contain '"
+ c + "' character.");
}
} else if (isAlphaNum(c) || c == '-' || c == '.' || c == '~'
|| c == '_') {
out.append(c);
} else if (c == '%') {
// next 2 chars MUST be [0-9][a-f][A-F]
if (pidString.length() >= i + 3) {
char h1 = getNormalizedHexChar(pidString.charAt(++i));
char h2 = getNormalizedHexChar(pidString.charAt(++i));
out.append("%" + h1 + h2);
} else {
throw new MalformedPIDException("PID object-id ended early: need at least 2 chars after '%'.");
}
} else {
throw new MalformedPIDException("PID object-id cannot contain '"
+ c + "' character.");
}
}
if (!inObjectID) {
throw new MalformedPIDException("PID delimiter (:) is missing.");
}
String outString = out.toString();
if (outString.startsWith(":")) {
throw new MalformedPIDException("PID namespace-id cannot be empty.");
}
if (outString.length() < 3) {
throw new MalformedPIDException("PID object-id cannot be empty.");
}
if (outString.length() > MAX_LENGTH) {
throw new MalformedPIDException("PID length exceeds " + MAX_LENGTH
+ ".");
}
// If we got here, it's well-formed, so return it.
return outString;
}
private static boolean isAlphaNum(char c) {
return c >= '0' && c <= '9' || c >= 'a' && c <= 'z' || c >= 'A'
&& c <= 'Z';
}
private static char getNormalizedHexChar(char c)
throws MalformedPIDException {
if (c >= '0' && c <= '9') {
return c;
}
c = ("" + c).toUpperCase().charAt(0);
if (c >= 'A' && c <= 'F') {
return c;
}
throw new MalformedPIDException("Bad hex-digit in PID object-id: " + c);
}
/**
* Return the normalized form of this PID.
*/
@Override
public String toString() {
return m_normalized;
}
/**
* Return the URI form of this PID. This is just the PID, prepended with
* "info:fedora/".
*/
public String toURI() {
return Constants.FEDORA.uri + m_normalized;
}
/**
* Return the URI form of some PID string, assuming it is well-formed.
*/
public static String toURI(String pidString) {
return Constants.FEDORA.uri + pidString;
}
/**
* Return a URIReference of some PID string, assuming it is well-formed.
*/
public static SimpleURIReference toURIReference(String pidString) {
SimpleURIReference ref = null;
try {
ref = new SimpleURIReference(new URI(toURI(pidString)));
} catch (URISyntaxException e) {
// assumes pid is well-formed
throw new Error(e);
}
return ref;
}
/**
* Return a string representing this PID that can be safely used as a
* filename on any OS.
* <ul>
* <li> The colon (:) is replaced with an underscore (_).</li>
* <li> Trailing dots are encoded as percents (%).</li>
* </ul>
*/
public String toFilename() {
if (m_filename == null) { // lazily convert, since not always needed
m_filename = m_normalized.replaceAll(":", "_");
if (m_filename.endsWith(".")) {
m_filename =
m_filename.substring(0, m_filename.length() - 1) + "%";
}
}
return m_filename;
}
public String getNamespaceId() {
return m_namespaceId;
}
public String getObjectId() {
return m_objectId;
}
/**
* {@inheritDoc}
*/
@Override
public boolean equals(Object o) {
return o instanceof PID && m_normalized.equals(((PID) o).toString());
}
/**
* {@inheritDoc}
*/
@Override
public int hashCode() {
return m_normalized.hashCode();
}
/**
* Command-line interactive tester. If one arg given, prints normalized form
* of that PID and exits. If no args, enters interactive mode.
*/
public static void main(String[] args) throws Exception {
if (args.length > 0) {
PID p = new PID(args[0]);
System.out.println("Normalized : " + p.toString());
System.out.println("To filename : " + p.toFilename());
System.out.println("From filename : "
+ PID.fromFilename(p.toFilename()).toString());
} else {
System.out.println("--------------------------------------");
System.out.println("PID Syntax Checker - Interactive mode");
System.out.println("--------------------------------------");
boolean done = false;
BufferedReader reader =
new BufferedReader(new InputStreamReader(System.in));
while (!done) {
try {
System.out.print("Enter a PID (ENTER to exit): ");
String line = reader.readLine();
if (line.equals("")) {
done = true;
} else {
PID p = new PID(line);
System.out.println("Normalized : " + p.toString());
System.out.println("To filename : " + p.toFilename());
System.out.println("From filename : "
+ PID.fromFilename(p.toFilename()).toString());
}
} catch (MalformedPIDException e) {
System.out.println("ERROR: " + e.getMessage());
}
}
}
}
}