/*
* LCNameAuthority.java
*
* Version: $Revision: 3705 $
*
* Date: $Date: 2009-04-11 13:02:24 -0400 (Sat, 11 Apr 2009) $
*
* Copyright (c) 2002-2009, The DSpace Foundation. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* - Neither the name of the DSpace Foundation nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
* TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
package org.dspace.content.authority;
import java.io.IOException;
import java.io.FileNotFoundException;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.FileReader;
import java.io.BufferedReader;
import java.util.Enumeration;
import java.util.List;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.ParserConfigurationException;
import org.xml.sax.XMLReader;
import org.xml.sax.InputSource;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXParseException;
import org.apache.log4j.Logger;
import org.dspace.core.ConfigurationManager;
import org.dspace.content.DCPersonName;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.util.EncodingUtil;
import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.httpclient.HttpException;
/**
* Sample personal name authority based on Library of Congress Name Authority
* Also serves as an example of an SRU client as authority.
*
* This is tuned for the data in the LC Name Authority test instance, see
* http://alcme.oclc.org/srw/search/lcnaf
*
* WARNING: This is just a proof-of-concept implementation. It would need
* WARNING: lots of refinement to be used in production, because it is very
* WARNING: sloppy about digging through the MARC/XML results. No doubt
* WARNING: it is losing a lot of valid results and information.
* WARNING: Could also do a better job including more info (title, life dates
* WARNING: etc) in the label instead of just the name.
*
* Reads these DSpace Config properties:
*
* lcname.url = http://alcme.oclc.org/srw/search/lcnaf
*
* TODO: make # of results to ask for (and return) configurable.
*
* @author Larry Stone
* @version $Revision $
*/
public class LCNameAuthority implements ChoiceAuthority
{
private static Logger log = Logger.getLogger(LCNameAuthority.class);
// get these from configuration
private static String url = null;
// NS URI for SRU respones
private static final String NS_SRU = "http://www.loc.gov/zing/srw/";
// NS URI for MARC/XML
private static final String NS_MX = "http://www.loc.gov/MARC21/slim";
// constructor does static init too..
public LCNameAuthority()
{
if (url == null)
{
url = ConfigurationManager.getProperty("lcname.url");
// sanity check
if (url == null)
throw new IllegalStateException("Missing DSpace configuration keys for LCName Query");
}
}
// punt! this is a poor implementation..
public Choices getBestMatch(String text, int collection, String locale)
{
return getMatches(text, collection, 0, 2, locale);
}
/**
* Match a proposed value against name authority records
* Value is assumed to be in "Lastname, Firstname" format.
*/
public Choices getMatches(String text, int collection, int start, int limit, String locale)
{
boolean error = false;
Choices result = queryPerson(text, start, limit);
if (result == null)
result = new Choices(true);
return result;
}
// punt; supposed to get the canonical display form of a metadata authority key
// XXX FIXME implement this with a query on the authority key, cache results
public String getLabel(String key, String locale)
{
return key;
}
/**
* Guts of the implementation, returns a complete Choices result, or
* null for a failure.
*/
private Choices queryPerson(String text, int start, int limit)
{
// punt if there is no query text
if (text == null || text.trim().length() == 0)
return new Choices(true);
// 1. build CQL query
DCPersonName pn = new DCPersonName(text);
StringBuilder query = new StringBuilder();
query.append("local.FirstName = \"").append(pn.getFirstNames()).
append("\" and local.FamilyName = \"").append(pn.getLastName()).
append("\"");
// XXX arbitrary default limit - should be configurable?
if (limit == 0)
limit = 50;
NameValuePair args[] = new NameValuePair[6];
args[0] = new NameValuePair("operation", "searchRetrieve");
args[1] = new NameValuePair("version", "1.1");
args[2] = new NameValuePair("recordSchema", "info:srw/schema/1/marcxml-v1.1");
args[3] = new NameValuePair("query", query.toString());
args[4] = new NameValuePair("maximumRecords", String.valueOf(limit));
args[5] = new NameValuePair("startRecord", String.valueOf(start+1));
HttpClient hc = new HttpClient();
String srUrl = url + "?" + EncodingUtil.formUrlEncode(args, "UTF8");
GetMethod get = new GetMethod(srUrl);
log.debug("Trying SRU query, URL="+srUrl);
// 2. web request
try
{
int status = hc.executeMethod(get);
if (status == 200)
{
SAXParserFactory spf = SAXParserFactory.newInstance();
SAXParser sp = spf.newSAXParser();
XMLReader xr = sp.getXMLReader();
SRUHandler handler = new SRUHandler();
// XXX FIXME: should turn off validation here explicitly, but
// it seems to be off by default.
xr.setFeature("http://xml.org/sax/features/namespaces", true);
xr.setContentHandler(handler);
xr.setErrorHandler(handler);
xr.parse(new InputSource(get.getResponseBodyAsStream()));
// this probably just means more results available..
if (handler.hits != handler.result.size())
log.warn("Discrepency in results, result.length="+handler.result.size()+
", yet expected results="+handler.hits);
boolean more = handler.hits > (start + handler.result.size());
// XXX add non-auth option; perhaps the UI should do this?
// XXX it's really a policy matter if they allow unauth result.
// XXX good, stop it.
// handler.result.add(new Choice("", text, "Non-Authority: \""+text+"\""));
int confidence;
if (handler.hits == 0)
confidence = Choices.CF_NOTFOUND;
else if (handler.hits == 1)
confidence = Choices.CF_UNCERTAIN;
else
confidence = Choices.CF_AMBIGUOUS;
return new Choices(handler.result.toArray(new Choice[handler.result.size()]),
start, handler.hits, confidence, more);
}
}
catch (HttpException e)
{
log.error("SRU query failed: ", e);
return new Choices(true);
}
catch (IOException e)
{
log.error("SRU query failed: ", e);
return new Choices(true);
}
catch (ParserConfigurationException e)
{
log.warn("Failed parsing SRU result: ", e);
return new Choices(true);
}
catch (SAXException e)
{
log.warn("Failed parsing SRU result: ", e);
return new Choices(true);
}
finally
{
get.releaseConnection();
}
return new Choices(true);
}
/**
* XXX FIXME TODO: Very sloppy MARC/XML parser.
* This only reads subfields 010.a (for LCCN, to use as key)
* and 100.a (for "established personal name")
* Maybe look at Indicator on 100 too.
* Should probably read other 100 subfields to build a more detailed label.
*/
private static class SRUHandler
extends DefaultHandler
{
private List<Choice> result = new ArrayList<Choice>();
private int hits = -1;
private String textValue = null;
private String name = null;
private String oname = null;
private String bname = null;
private String lccn = null;
private String lastTag = null;
private String lastCode = null;
// NOTE: text value MAY be presented in multiple calls, even if
// it all one word, so be ready to splice it together.
// BEWARE: subclass's startElement method should call super()
// to null out 'value'. (Don't you miss the method combination
// options of a real object system like CLOS?)
public void characters(char[] ch, int start, int length)
throws SAXException
{
String newValue = new String(ch, start, length);
if (newValue.length() > 0)
{
if (textValue == null)
textValue = newValue;
else
textValue += newValue;
}
}
public void endElement(String namespaceURI, String localName,
String qName)
throws SAXException
{
if (localName.equals("numberOfRecords") &&
namespaceURI.equals(NS_SRU))
{
hits = Integer.parseInt(textValue.trim());
if (hits > 0)
{
name = null;
lccn = null;
log.debug("Expecting "+hits+" records in results.");
}
}
// after record get next hit ready
else if (localName.equals("record") &&
namespaceURI.equals(NS_SRU))
{
if (name != null && lccn != null)
{
// HACK: many LC name entries end with ',' ...trim it.
if (name.endsWith(","))
name = name.substring(0, name.length()-1);
// XXX DEBUG
// log.debug("Got result, name="+name+", lccn="+lccn);
result.add(new Choice(lccn, name, name));
}
else
log.warn("Got anomalous result, at least one of these null: lccn="+lccn+", name="+name);
name = null;
lccn = null;
}
else if (localName.equals("subfield") &&
namespaceURI.equals(NS_MX))
{
if (lastTag != null && lastCode != null)
{
// 010.a is lccn, "authority code"
if (lastTag.equals("010") && lastCode.equals("a"))
lccn = textValue;
// 100.a is the personal name
else if (lastTag.equals("100") && lastCode.equals("a"))
name = textValue;
if (lastTag.equals("100") && lastCode.equals("d") && (name != null))
name = name+" "+textValue;
}
}
}
// subclass overriding this MUST call it with super()
public void startElement(String namespaceURI, String localName,
String qName, Attributes atts)
throws SAXException
{
textValue = null;
if (localName.equals("datafield") &&
namespaceURI.equals(NS_MX))
{
lastTag = atts.getValue("tag");
if (lastTag == null)
log.warn("MARC datafield without tag attribute!");
}
else if (localName.equals("subfield") &&
namespaceURI.equals(NS_MX))
{
lastCode = atts.getValue("code");
if (lastCode == null)
log.warn("MARC subfield without code attribute!");
}
}
public void error(SAXParseException exception)
throws SAXException
{
throw new SAXException(exception);
}
public void fatalError(SAXParseException exception)
throws SAXException
{
throw new SAXException(exception);
}
}
}