/*
* PREMISCrosswalk.java
*
* Version: $Revision: 3761 $
*
* Date: $Date: 2009-05-07 04:18:02 +0000 (Thu, 07 May 2009) $
*
* Copyright (c) 2002-2009, The DSpace Foundation. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* - Neither the name of the DSpace Foundation nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
* TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
package org.dspace.content.crosswalk;
import java.io.IOException;
import java.net.URLEncoder;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.log4j.Logger;
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.Bitstream;
import org.dspace.content.BitstreamFormat;
import org.dspace.content.Bundle;
import org.dspace.content.DSpaceObject;
import org.dspace.content.FormatIdentifier;
import org.dspace.content.Item;
import org.dspace.core.ConfigurationManager;
import org.dspace.core.Constants;
import org.dspace.core.Context;
import org.jdom.Element;
import org.jdom.Namespace;
/**
* PREMIS Crosswalk
* <p>
* Translate between DSpace Bitstream properties and PREMIS metadata format
* (see <a href="http://www.oclc.org/research/projects/pmwg/">
* http://www.oclc.org/research/projects/pmwg/</a> for details).
* This is intended to implement the requirements of the DSpace METS SIP
* specification for both ingest and dissemination.
*
* @author Larry Stone
* @version $Revision: 3761 $
*/
public class PREMISCrosswalk
implements IngestionCrosswalk, DisseminationCrosswalk
{
/** log4j category */
private static Logger log = Logger.getLogger(PREMISCrosswalk.class);
private static final Namespace PREMIS_NS =
Namespace.getNamespace("premis", "http://www.loc.gov/standards/premis");
// XML schemaLocation fragment for this crosswalk, from config.
private String schemaLocation =
PREMIS_NS.getURI()+" http://www.loc.gov/standards/premis/PREMIS-v1-0.xsd";
private static final Namespace XLINK_NS =
Namespace.getNamespace("xlink", "http://www.w3.org/TR/xlink");
private static final Namespace namespaces[] = { PREMIS_NS };
/*----------- Submission functions -------------------*/
public void ingest(Context context, DSpaceObject dso, Element root)
throws CrosswalkException, IOException, SQLException, AuthorizeException
{
if (!(root.getName().equals("premis")))
throw new MetadataValidationException("Wrong root element for PREMIS: "+root.toString());
ingest(context, dso, root.getChildren());
}
public void ingest(Context context, DSpaceObject dso, List ml)
throws CrosswalkException, IOException, SQLException, AuthorizeException
{
// we only understand how to crosswalk PREMIS to a Bitstream.
if (dso.getType() != Constants.BITSTREAM)
throw new CrosswalkObjectNotSupported("Wrong target object type, PREMISCrosswalk can only crosswalk to a Bitstream.");
Bitstream bitstream = (Bitstream)dso;
String MIMEType = null;
String bsName = null;
Iterator mi = ml.iterator();
while (mi.hasNext())
{
Element me = (Element)mi.next();
// if we're fed a <premis> wrapper object, recurse on its guts:
if (me.getName().equals("premis"))
ingest(context, dso, me.getChildren());
// "object" section:
else if (me.getName().equals("object"))
{
// originalName becomes new bitstream source and (default) name
Element on = me.getChild("originalName", PREMIS_NS);
if (on != null)
bsName = on.getTextTrim();
// Reconcile technical metadata with bitstream content;
// check that length and message digest (checksum) match.
// XXX FIXME: wait for Checksum Checker code to add better test.
Element oc = me.getChild("objectCharacteristics", PREMIS_NS);
if (oc != null)
{
String ssize = oc.getChildTextTrim("size", PREMIS_NS);
if (ssize != null)
{
try
{
int size = Integer.parseInt(ssize);
if (bitstream.getSize() != size)
throw new MetadataValidationException(
"Bitstream size ("+String.valueOf(bitstream.getSize())+
") does not match size in PREMIS ("+ssize+"), rejecting it.");
}
catch (NumberFormatException ne)
{
throw new MetadataValidationException("Bad number value in PREMIS object/objectCharacteristics/size: "+ssize, ne);
}
}
Element fixity = oc.getChild("fixity", PREMIS_NS);
if (fixity != null)
{
String alg = fixity.getChildTextTrim("messageDigestAlgorithm", PREMIS_NS);
String md = fixity.getChildTextTrim("messageDigest", PREMIS_NS);
String b_alg = bitstream.getChecksumAlgorithm();
String b_md = bitstream.getChecksum();
if (alg != null && md != null &&
b_alg != null && b_md != null &&
alg.equals(b_alg))
{
if (md.equals(b_md))
log.debug("Bitstream checksum agrees with PREMIS: "+bitstream.getName());
else
throw new MetadataValidationException("Bitstream "+alg+" Checksum does not match value in PREMIS ("+b_md+" != "+md+"), for bitstream: "+bitstream.getName());
}
else
log.warn("Cannot test checksum on bitstream="+bitstream.getName()+
", algorithm in PREMIS is different: "+alg);
}
// Look for formatDesignation/formatName, which is
// MIME Type. Match with DSpace bitstream format.
Element format = oc.getChild("format", PREMIS_NS);
if (format != null)
{
Element fd = format.getChild("formatDesignation", PREMIS_NS);
if (fd != null)
MIMEType = fd.getChildTextTrim("formatName", PREMIS_NS);
}
}
// Apply new bitstream name if we found it.
if (bsName != null)
{
bitstream.setName(bsName);
log.debug("Changing bitstream id="+String.valueOf(bitstream.getID())+"name and source to: "+bsName);
}
// reconcile bitstream format; if there's a MIMEtype,
// get it from that, otherwise try to divine from file extension
// (guessFormat() looks at bitstream Name, which we just set)
BitstreamFormat bf = (MIMEType == null) ? null :
BitstreamFormat.findByMIMEType(context, MIMEType);
if (bf == null)
bf = FormatIdentifier.guessFormat(context, bitstream);
if (bf != null)
bitstream.setFormat(bf);
}
else
log.debug("Skipping element: "+me.toString());
}
bitstream.update();
}
/*----------- Dissemination functions -------------------*/
public Namespace[] getNamespaces()
{
return namespaces;
}
public String getSchemaLocation()
{
return schemaLocation;
}
public boolean canDisseminate(DSpaceObject dso)
{
//PREMISCrosswalk can only crosswalk a Bitstream
if (dso.getType() == Constants.BITSTREAM)
return true;
else
return false;
}
public Element disseminateElement(DSpaceObject dso)
throws CrosswalkException,
IOException, SQLException, AuthorizeException
{
if (dso.getType() != Constants.BITSTREAM)
throw new CrosswalkObjectNotSupported("PREMISCrosswalk can only crosswalk a Bitstream.");
Bitstream bitstream = (Bitstream)dso;
Element premis = new Element("premis", PREMIS_NS);
Element object = new Element("object", PREMIS_NS);
premis.addContent(object);
// objectIdentifier is required
Element oid = new Element("objectIdentifier", PREMIS_NS);
Element oit = new Element("objectIdentifierType", PREMIS_NS);
oit.setText("URL");
oid.addContent(oit);
Element oiv = new Element("objectIdentifierValue", PREMIS_NS);
// objectIdentifier value: by preference, if available:
// a. DSpace "persistent" URL to bitstream, if components available.
// b. name of bitstream, if any
// c. made-up name based on sequence ID and extension.
String sid = String.valueOf(bitstream.getSequenceID());
String baseUrl = ConfigurationManager.getProperty("dspace.url");
String handle = null;
// get handle of parent Item of this bitstream, if there is one:
Bundle[] bn = bitstream.getBundles();
if (bn.length > 0)
{
Item bi[] = bn[0].getItems();
if (bi.length > 0)
handle = bi[0].getHandle();
}
// get or make up name for bitstream:
String bsName = bitstream.getName();
if (bsName == null)
{
String ext[] = bitstream.getFormat().getExtensions();
bsName = "bitstream_"+sid+ (ext.length > 0 ? ext[0] : "");
}
if (handle != null && baseUrl != null)
oiv.setText(baseUrl
+ "/bitstream/"
+ URLEncoder.encode(handle, "UTF-8")
+ "/"
+ sid
+ "/"
+ URLEncoder.encode(bsName, "UTF-8"));
else
oiv.setText(URLEncoder.encode(bsName, "UTF-8"));
oid.addContent(oiv);
object.addContent(oid);
// objectCategory is fixed value, "File".
Element oc = new Element("objectCategory", PREMIS_NS);
oc.setText("File");
object.addContent(oc);
Element ochar = new Element("objectCharacteristics", PREMIS_NS);
object.addContent(ochar);
// checksum if available
String cks = bitstream.getChecksum();
String cka = bitstream.getChecksumAlgorithm();
if (cks != null && cka != null)
{
Element fixity = new Element("fixity", PREMIS_NS);
Element mda = new Element("messageDigestAlgorithm", PREMIS_NS);
mda.setText(cka);
fixity.addContent(mda);
Element md = new Element("messageDigest", PREMIS_NS);
md.setText(cks);
fixity.addContent(md);
ochar.addContent(fixity);
}
// size
Element size = new Element("size", PREMIS_NS);
size.setText(String.valueOf(bitstream.getSize()));
ochar.addContent(size);
// Punt and set formatName to the MIME type; the best we can
// do for now in the absence of any usable global format registries.
// objectCharacteristics/format/formatDesignation/
// formatName <- MIME Type
//
Element format = new Element("format", PREMIS_NS);
Element formatDes = new Element("formatDesignation", PREMIS_NS);
Element formatName = new Element("formatName", PREMIS_NS);
formatName.setText(bitstream.getFormat().getMIMEType());
formatDes.addContent(formatName);
format.addContent(formatDes);
ochar.addContent(format);
// originalName <- name (or source if none)
String oname = bitstream.getName();
if (oname == null)
oname = bitstream.getSource();
if (oname != null)
{
Element on = new Element("originalName", PREMIS_NS);
on.setText(oname);
object.addContent(on);
}
return premis;
}
public List disseminateList(DSpaceObject dso)
throws CrosswalkException,
IOException, SQLException, AuthorizeException
{
List result = new ArrayList(1);
result.add(disseminateElement(dso));
return result;
}
public boolean preferList()
{
return false;
}
}