Package org.dspace.content.authority

Source Code of org.dspace.content.authority.LCNameAuthority

/*
* LCNameAuthority.java
*
* Version: $Revision: 3705 $
*
* Date: $Date: 2009-04-11 13:02:24 -0400 (Sat, 11 Apr 2009) $
*
* Copyright (c) 2002-2009, The DSpace Foundation.  All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* - Neither the name of the DSpace Foundation nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
* TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
package org.dspace.content.authority;

import java.io.IOException;
import java.io.FileNotFoundException;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.FileReader;
import java.io.BufferedReader;
import java.util.Enumeration;
import java.util.List;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.ParserConfigurationException;
import org.xml.sax.XMLReader;
import org.xml.sax.InputSource;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXParseException;

import org.apache.log4j.Logger;

import org.dspace.core.ConfigurationManager;
import org.dspace.content.DCPersonName;

import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.util.EncodingUtil;
import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.httpclient.HttpException;

/**
* Sample personal name authority based on Library of Congress Name Authority
* Also serves as an example of an SRU client as authority.
*
* This is tuned for the data in the LC Name Authority test instance, see
* http://alcme.oclc.org/srw/search/lcnaf
*
* WARNING: This is just a proof-of-concept implementation.  It would need
* WARNING: lots of refinement to be used in production, because it is very
* WARNING: sloppy about digging through the MARC/XML results.  No doubt
* WARNING: it is losing a lot of valid results and information.
* WARNING: Could also do a better job including more info (title, life dates
* WARNING: etc) in the label instead of just the name.
*
* Reads these DSpace Config properties:
*
*      lcname.url = http://alcme.oclc.org/srw/search/lcnaf
*
*  TODO: make # of results to ask for (and return) configurable.
*
* @author Larry Stone
* @version $Revision $
*/
public class LCNameAuthority implements ChoiceAuthority
{
    private static Logger log = Logger.getLogger(LCNameAuthority.class);

    // get these from configuration
    private static String url = null;

    // NS URI for SRU respones
    private static final String NS_SRU = "http://www.loc.gov/zing/srw/";

    // NS URI for MARC/XML
    private static final String NS_MX = "http://www.loc.gov/MARC21/slim";

    // constructor does static init too..
    public LCNameAuthority()
    {
        if (url == null)
        {
            url = ConfigurationManager.getProperty("lcname.url");

            // sanity check
            if (url == null)
                throw new IllegalStateException("Missing DSpace configuration keys for LCName Query");
        }
    }

    // punt!  this is a poor implementation..
    public Choices getBestMatch(String text, int collection, String locale)
    {
        return getMatches(text, collection, 0, 2, locale);
    }

    /**
     * Match a proposed value against name authority records
     * Value is assumed to be in "Lastname, Firstname" format.
     */
    public Choices getMatches(String text, int collection, int start, int limit, String locale)
    {
        boolean error = false;
        Choices result = queryPerson(text, start, limit);
        if (result == null)
            result = new Choices(true);
        return result;
    }

    // punt; supposed to get the canonical display form of a metadata authority key
    // XXX FIXME implement this with a query on the authority key, cache results
    public String getLabel(String key, String locale)
    {
        return key;
    }

    /**
     * Guts of the implementation, returns a complete Choices result, or
     * null for a failure.
     */
    private Choices queryPerson(String text, int start, int limit)
    {
        // punt if there is no query text
        if (text == null || text.trim().length() == 0)
            return new Choices(true);

        // 1. build CQL query
        DCPersonName pn = new DCPersonName(text);
        StringBuilder query = new StringBuilder();
        query.append("local.FirstName = \"").append(pn.getFirstNames()).
          append("\" and local.FamilyName = \"").append(pn.getLastName()).
          append("\"");

        // XXX arbitrary default limit - should be configurable?
        if (limit == 0)
            limit = 50;

        NameValuePair args[] = new NameValuePair[6];
        args[0] = new NameValuePair("operation", "searchRetrieve");
        args[1] = new NameValuePair("version", "1.1");
        args[2] = new NameValuePair("recordSchema", "info:srw/schema/1/marcxml-v1.1");
        args[3] = new NameValuePair("query", query.toString());
        args[4] = new NameValuePair("maximumRecords", String.valueOf(limit));
        args[5] = new NameValuePair("startRecord", String.valueOf(start+1));
        HttpClient hc = new HttpClient();
        String srUrl = url + "?" + EncodingUtil.formUrlEncode(args, "UTF8");
        GetMethod get = new GetMethod(srUrl);

        log.debug("Trying SRU query, URL="+srUrl);

        // 2. web request
        try
        {
            int status = hc.executeMethod(get);
            if (status == 200)
            {
                SAXParserFactory spf = SAXParserFactory.newInstance();
                SAXParser sp = spf.newSAXParser();
                XMLReader xr = sp.getXMLReader();
                SRUHandler handler = new SRUHandler();

                // XXX FIXME: should turn off validation here explicitly, but
                //  it seems to be off by default.
                xr.setFeature("http://xml.org/sax/features/namespaces", true);
                xr.setContentHandler(handler);
                xr.setErrorHandler(handler);
                xr.parse(new InputSource(get.getResponseBodyAsStream()));

                // this probably just means more results available..
                if (handler.hits != handler.result.size())
                    log.warn("Discrepency in results, result.length="+handler.result.size()+
                          ", yet expected results="+handler.hits);
                boolean more = handler.hits > (start + handler.result.size());

                // XXX add non-auth option; perhaps the UI should do this?
                // XXX it's really a policy matter if they allow unauth result.
                   // XXX good, stop it.
                // handler.result.add(new Choice("", text, "Non-Authority: \""+text+"\""));

                int confidence;
                if (handler.hits == 0)
                    confidence = Choices.CF_NOTFOUND;
                else if (handler.hits == 1)
                    confidence = Choices.CF_UNCERTAIN;
                else
                    confidence = Choices.CF_AMBIGUOUS;
                return new Choices(handler.result.toArray(new Choice[handler.result.size()]),
                                   start, handler.hits, confidence, more);
            }
        }
        catch (HttpException e)
        {
            log.error("SRU query failed: ", e);
            return new Choices(true);
        }
        catch (IOException e)
        {
            log.error("SRU query failed: ", e);
            return new Choices(true);
        }
        catch (ParserConfigurationException  e)
        {
            log.warn("Failed parsing SRU result: ", e);
            return new Choices(true);
        }
        catch (SAXException  e)
        {
            log.warn("Failed parsing SRU result: ", e);
            return new Choices(true);
        }
        finally
        {
            get.releaseConnection();
        }
        return new Choices(true);
    }

    /**
     * XXX FIXME TODO: Very sloppy MARC/XML parser.
     * This only reads subfields 010.a (for LCCN, to use as key)
     * and 100.a (for "established personal name")
     * Maybe look at Indicator on 100 too.
     * Should probably read other 100 subfields to build a more detailed label.
     */
    private static class SRUHandler
        extends DefaultHandler
    {
        private List<Choice> result = new ArrayList<Choice>();
        private int hits = -1;
        private String textValue = null;
        private String name = null;
        private String oname = null;
        private String bname = null;
        private String lccn = null;
        private String lastTag = null;
        private String lastCode = null;

        // NOTE:  text value MAY be presented in multiple calls, even if
        // it all one word, so be ready to splice it together.
        // BEWARE:  subclass's startElement method should call super()
        // to null out 'value'.  (Don't you miss the method combination
        // options of a real object system like CLOS?)
        public void characters(char[] ch, int start, int length)
            throws SAXException
        {
            String newValue = new String(ch, start, length);
            if (newValue.length() > 0)
            {
                if (textValue == null)
                    textValue = newValue;
                else
                    textValue += newValue;
            }
        }

        public void endElement(String namespaceURI, String localName,
                                 String qName)
            throws SAXException
        {
            if (localName.equals("numberOfRecords") &&
                     namespaceURI.equals(NS_SRU))
            {
                hits = Integer.parseInt(textValue.trim());
                if (hits > 0)
                {
                    name = null;
                    lccn = null;
                    log.debug("Expecting "+hits+" records in results.");
                }
            }

            // after record get next hit ready
            else if (localName.equals("record") &&
                     namespaceURI.equals(NS_SRU))
            {
                if (name != null && lccn != null)
                {
                    // HACK: many LC name entries end with ',' ...trim it.
                    if (name.endsWith(","))
                        name = name.substring(0, name.length()-1);

                    // XXX DEBUG
                    // log.debug("Got result, name="+name+", lccn="+lccn);
                    result.add(new Choice(lccn, name, name));
                }
                else
                    log.warn("Got anomalous result, at least one of these null: lccn="+lccn+", name="+name);
                name = null;
                lccn = null;
            }

            else if (localName.equals("subfield") &&
                     namespaceURI.equals(NS_MX))
            {
                if (lastTag != null && lastCode != null)
                {
                    // 010.a is lccn, "authority code"
                    if (lastTag.equals("010") && lastCode.equals("a"))
                        lccn = textValue;
                    
                    // 100.a is the personal name
                    else if (lastTag.equals("100") && lastCode.equals("a"))
                        name = textValue;

                        if (lastTag.equals("100") && lastCode.equals("d") && (name != null))
                        name = name+"  "+textValue;
                        
                }
            }
        }

        // subclass overriding this MUST call it with super()
        public void startElement(String namespaceURI, String localName,
                                 String qName, Attributes atts)
            throws SAXException
        {
            textValue = null;

            if (localName.equals("datafield") &&
                     namespaceURI.equals(NS_MX))
            {
                lastTag = atts.getValue("tag");
                if (lastTag == null)
                    log.warn("MARC datafield without tag attribute!");
            }
            else if (localName.equals("subfield") &&
                     namespaceURI.equals(NS_MX))
            {
                lastCode = atts.getValue("code");
                if (lastCode == null)
                    log.warn("MARC subfield without code attribute!");
            }
        }

        public void error(SAXParseException exception)
            throws SAXException
        {
            throw new SAXException(exception);
        }

        public void fatalError(SAXParseException exception)
            throws SAXException
        {
            throw new SAXException(exception);
        }
    }
}
TOP

Related Classes of org.dspace.content.authority.LCNameAuthority

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.