throws PackageValidationException, CrosswalkException,
AuthorizeException, SQLException, IOException
{
ZipInputStream zip = new ZipInputStream(pkg);
HashMap fileIdToBitstream = new HashMap();
WorkspaceItem wi = null;
boolean success = false;
HashSet packageFiles = new HashSet();
boolean validate = params.getBooleanProperty("validate", true);
try
{
/* 1. Read all the files in the Zip into bitstreams first,
* because we only get to take one pass through a Zip input
* stream. Give them temporary bitstream names corresponding
* to the same names they had in the Zip, since those MUST
* match the URL references in <Flocat> and <mdRef> elements.
*/
METSManifest manifest = null;
wi = WorkspaceItem.create(context, collection, useTemplate);
Item item = wi.getItem();
Bundle contentBundle = null;
Bundle mdBundle = null;
ZipEntry ze;
while ((ze = zip.getNextEntry()) != null)
{
if (ze.isDirectory())
continue;
Bitstream bs = null;
String fname = ze.getName();
if (fname.equals(MANIFEST_FILE))
{
if (preserveManifest)
{
mdBundle = item.createBundle(Constants.METADATA_BUNDLE_NAME);
bs = mdBundle.createBitstream(new PackageUtils.UnclosableInputStream(zip));
bs.setName(fname);
bs.setSource(fname);
// Get magic bitstream format to identify manifest.
BitstreamFormat manifestFormat = null;
manifestFormat = PackageUtils.findOrCreateBitstreamFormat(context,
MANIFEST_BITSTREAM_FORMAT, "application/xml",
MANIFEST_BITSTREAM_FORMAT+" package manifest");
bs.setFormat(manifestFormat);
manifest = METSManifest.create(bs.retrieve(), validate);
}
else
{
manifest = METSManifest.create(new PackageUtils.UnclosableInputStream(zip), validate);
continue;
}
}
else
{
// we need to create the bundle only the first time
if (contentBundle == null)
{
contentBundle = item.createBundle(Constants.CONTENT_BUNDLE_NAME);
}
bs = contentBundle.createBitstream(new PackageUtils.UnclosableInputStream(zip));
bs.setSource(fname);
bs.setName(fname);
}
packageFiles.add(fname);
bs.setSource(fname);
bs.update();
}
zip.close();
if (manifest == null)
throw new PackageValidationException("No METS Manifest found (filename="+MANIFEST_FILE+"). Package is unacceptable.");
// initial sanity checks on manifest (in subclass)
checkManifest(manifest);
/* 2. Grovel a file list out of METS Manifest and compare
* it to the files in package, as an integrity test.
*/
List manifestContentFiles = manifest.getContentFiles();
// Compare manifest files with the ones found in package:
// a. Start with content files (mentioned in <fileGrp>s)
HashSet missingFiles = new HashSet();
for (Iterator mi = manifestContentFiles.iterator(); mi.hasNext(); )
{
// First locate corresponding Bitstream and make
// map of Bitstream to <file> ID.
Element mfile = (Element)mi.next();
String mfileId = mfile.getAttributeValue("ID");
if (mfileId == null)
throw new PackageValidationException("Invalid METS Manifest: file element without ID attribute.");
String path = METSManifest.getFileName(mfile);
Bitstream bs = contentBundle.getBitstreamByName(path);
if (bs == null)
{
log.warn("Cannot find bitstream for filename=\""+path+
"\", skipping it..may cause problems later.");
missingFiles.add(path);
}
else
{
fileIdToBitstream.put(mfileId, bs);
// Now that we're done using Name to match to <file>,
// set default bitstream Name to last path element;
// Zip entries all have '/' pathname separators
// NOTE: set default here, hopefully crosswalk of
// a bitstream techMD section will override it.
String fname = bs.getName();
int lastSlash = fname.lastIndexOf('/');
if (lastSlash >= 0 && lastSlash+1 < fname.length())
bs.setName(fname.substring(lastSlash+1));
// Set Default bitstream format:
// 1. attempt to guess from MIME type
// 2. if that fails, guess from "name" extension.
String mimeType = mfile.getAttributeValue("MIMETYPE");
BitstreamFormat bf = (mimeType == null) ? null :
BitstreamFormat.findByMIMEType(context, mimeType);
if (bf == null)
bf = FormatIdentifier.guessFormat(context, bs);
bs.setFormat(bf);
// if this bitstream belongs in another Bundle, move it:
String bundleName = manifest.getBundleName(mfile);
if (!bundleName.equals(Constants.CONTENT_BUNDLE_NAME))
{
Bundle bn;
Bundle bns[] = item.getBundles(bundleName);
if (bns != null && bns.length > 0)
bn = bns[0];
else
bn = item.createBundle(bundleName);
bn.addBitstream(bs);
contentBundle.removeBitstream(bs);
}
// finally, build compare lists by deleting matches.
if (packageFiles.contains(path))
packageFiles.remove(path);
else
missingFiles.add(path);
}
}
// b. Process files mentioned in <mdRef>s - check and move
// to METADATA bundle.
for (Iterator mi = manifest.getMdFiles().iterator(); mi.hasNext(); )
{
Element mdref = (Element)mi.next();
String path = METSManifest.getFileName(mdref);
// finally, build compare lists by deleting matches.
if (packageFiles.contains(path))
packageFiles.remove(path);
else
missingFiles.add(path);
// if there is a bitstream with that name in Content, move
// it to the Metadata bundle:
Bitstream mdbs = contentBundle.getBitstreamByName(path);
if (mdbs != null)
{
if (mdBundle == null)
mdBundle = item.createBundle(Constants.METADATA_BUNDLE_NAME);
mdBundle.addBitstream(mdbs);
contentBundle.removeBitstream(mdbs);
}
}
// KLUDGE: make sure Manifest file doesn't get flagged as missing
// or extra, since it won't be mentioned in the manifest.
if (packageFiles.contains(MANIFEST_FILE))
packageFiles.remove(MANIFEST_FILE);
// Give subclass a chance to refine the lists of in-package
// and missing files, delete extraneous files, etc.
checkPackageFiles(packageFiles, missingFiles, manifest);
// Any discrepency in file lists is a fatal error:
if (!(packageFiles.isEmpty() && missingFiles.isEmpty()))
{
StringBuffer msg = new StringBuffer("Package is unacceptable: contents do not match manifest.");
if (!missingFiles.isEmpty())
{
msg.append("\nPackage is missing these files listed in Manifest:");
for (Iterator mi = missingFiles.iterator(); mi.hasNext(); )
msg.append("\n\t"+(String)mi.next());
}
if (!packageFiles.isEmpty())
{
msg.append("\nPackage contains extra files NOT in manifest:");
for (Iterator mi = packageFiles.iterator(); mi.hasNext(); )
msg.append("\n\t"+(String)mi.next());
}
throw new PackageValidationException(msg.toString());
}
/* 3. crosswalk the metadata
*/
// get mdref'd streams from "callback" object.
MdrefManager callback = new MdrefManager(mdBundle);
chooseItemDmd(context, item, manifest, callback, manifest.getItemDmds(), params);
// crosswalk content bitstreams too.
for (Iterator ei = fileIdToBitstream.entrySet().iterator();
ei.hasNext();)
{
Map.Entry ee = (Map.Entry)ei.next();
manifest.crosswalkBitstream(context, (Bitstream)ee.getValue(),
(String)ee.getKey(), callback);
}
// Take a second pass over files to correct names of derived files
// (e.g. thumbnails, extracted text) to what DSpace expects:
for (Iterator mi = manifestContentFiles.iterator(); mi.hasNext(); )
{
Element mfile = (Element)mi.next();
String bundleName = manifest.getBundleName(mfile);
if (!bundleName.equals(Constants.CONTENT_BUNDLE_NAME))
{
Element origFile = manifest.getOriginalFile(mfile);
if (origFile != null)
{
String ofileId = origFile.getAttributeValue("ID");
Bitstream obs = (Bitstream)fileIdToBitstream.get(ofileId);
String newName = makeDerivedFilename(bundleName, obs.getName());
if (newName != null)
{
String mfileId = mfile.getAttributeValue("ID");
Bitstream bs = (Bitstream)fileIdToBitstream.get(mfileId);
bs.setName(newName);
bs.update();
}
}
}
}
// Sanity-check the resulting metadata on the Item:
PackageUtils.checkMetadata(item);
/* 4. Set primary bitstream; same Bundle
*/
Element pbsFile = manifest.getPrimaryBitstream();
if (pbsFile != null)
{
Bitstream pbs = (Bitstream)fileIdToBitstream.get(pbsFile.getAttributeValue("ID"));
if (pbs == null)
log.error("Got Primary Bitstream file ID="+pbsFile.getAttributeValue("ID")+
", but found no corresponding bitstream.");
else
{
Bundle bn[] = pbs.getBundles();
if (bn.length > 0)
bn[0].setPrimaryBitstreamID(pbs.getID());
else
log.error("Sanity check, got primary bitstream without any parent bundle.");
}
}
// have subclass manage license since it may be extra package file.
addLicense(context, collection, item, manifest, callback, license );
// subclass hook for final checks and rearrangements
finishItem(context, item);
// commit any changes to bundles
Bundle allBn[] = item.getBundles();
for (int i = 0; i < allBn.length; ++i)
{
allBn[i].update();
}
wi.update();
success = true;
log.info(LogManager.getHeader(context, "ingest",
"Created new Item, db ID="+String.valueOf(item.getID())+
", WorkspaceItem ID="+String.valueOf(wi.getID())));
return wi;
}
catch (SQLException se)
{
// disable attempt to delete the workspace object, since
// database may have suffered a fatal error and the
// transaction rollback will get rid of it anyway.
wi = null;
// Pass this exception on to the next handler.
throw se;
}
finally
{
// kill item (which also deletes bundles, bitstreams) if ingest fails
if (!success && wi != null)
wi.deleteAll();
}
}