//=============================================================================
//=== Copyright (C) 2001-2007 Food and Agriculture Organization of the
//=== United Nations (FAO-UN), United Nations World Food Programme (WFP)
//=== and United Nations Environment Programme (UNEP)
//===
//=== This program is free software; you can redistribute it and/or modify
//=== it under the terms of the GNU General Public License as published by
//=== the Free Software Foundation; either version 2 of the License, or (at
//=== your option) any later version.
//===
//=== This program is distributed in the hope that it will be useful, but
//=== WITHOUT ANY WARRANTY; without even the implied warranty of
//=== MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
//=== General Public License for more details.
//===
//=== You should have received a copy of the GNU General Public License
//=== along with this program; if not, write to the Free Software
//=== Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
//===
//=== Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2,
//=== Rome - Italy. email: geonetwork@osgeo.org
//==============================================================================
package org.fao.geonet.kernel.harvest.harvester.csw;
import jeeves.server.context.ServiceContext;
import org.fao.geonet.GeonetContext;
import org.fao.geonet.Logger;
import org.fao.geonet.constants.Geonet;
import org.fao.geonet.csw.common.CswOperation;
import org.fao.geonet.csw.common.CswServer;
import org.fao.geonet.csw.common.ElementSetName;
import org.fao.geonet.csw.common.requests.GetRecordByIdRequest;
import org.fao.geonet.domain.Metadata;
import org.fao.geonet.domain.MetadataType;
import org.fao.geonet.domain.OperationAllowedId_;
import org.fao.geonet.domain.Pair;
import org.fao.geonet.exceptions.OperationAbortedEx;
import org.fao.geonet.kernel.DataManager;
import org.fao.geonet.kernel.harvest.BaseAligner;
import org.fao.geonet.kernel.harvest.harvester.*;
import org.fao.geonet.kernel.search.LuceneSearcher;
import org.fao.geonet.repository.MetadataRepository;
import org.fao.geonet.repository.OperationAllowedRepository;
import org.fao.geonet.repository.Updater;
import org.fao.geonet.utils.Xml;
import org.fao.geonet.repository.Updater;
import org.jdom.Element;
import org.jdom.xpath.XPath;
import javax.annotation.Nonnull;
import java.util.*;
import static org.fao.geonet.utils.AbstractHttpRequest.Method.GET;
import javax.annotation.Nonnull;
import static org.fao.geonet.utils.AbstractHttpRequest.Method.POST;
//=============================================================================
public class Aligner extends BaseAligner
{
//--------------------------------------------------------------------------
//---
//--- Constructor
//---
//--------------------------------------------------------------------------
public Aligner(Logger log, ServiceContext sc, CswServer server, CswParams params) throws OperationAbortedEx
{
this.log = log;
this.context = sc;
this.params = params;
GeonetContext gc = (GeonetContext) context.getHandlerContext(Geonet.CONTEXT_NAME);
dataMan = gc.getBean(DataManager.class);
result = new HarvestResult();
//--- setup get-record-by-id request
request = new GetRecordByIdRequest(sc);
request.setElementSetName(ElementSetName.FULL);
CswOperation oper = server.getOperation(CswServer.GET_RECORD_BY_ID);
// Use the preferred HTTP method and check one exist.
if (oper.getGetUrl() != null && Harvester.PREFERRED_HTTP_METHOD.equals("GET")) {
request.setUrl(oper.getGetUrl());
request.setMethod(GET);
} else if (oper.getPostUrl() != null && Harvester.PREFERRED_HTTP_METHOD.equals("POST")) {
request.setUrl(oper.getPostUrl());
request.setMethod(POST);
} else {
if (oper.getGetUrl() != null) {
request.setUrl(oper.getGetUrl());
request.setMethod(GET);
} else if (oper.getPostUrl() != null) {
request.setUrl(oper.getPostUrl());
request.setMethod(POST);
} else {
throw new OperationAbortedEx("No GET or POST DCP available in this service.");
}
}
if(oper.getPreferredOutputSchema() != null) {
request.setOutputSchema(oper.getPreferredOutputSchema());
}
if(oper.getPreferredServerVersion() != null) {
request.setServerVersion(oper.getPreferredServerVersion());
}
if (params.useAccount) {
request.setCredentials(params.username, params.password);
}
}
//--------------------------------------------------------------------------
//---
//--- Alignment method
//---
//--------------------------------------------------------------------------
public HarvestResult align(Set<RecordInfo> records, List<HarvestError> errors) throws Exception
{
log.info("Start of alignment for : "+ params.name);
//-----------------------------------------------------------------------
//--- retrieve all local categories and groups
//--- retrieve harvested uuids for given harvesting node
localCateg = new CategoryMapper(context);
localGroups= new GroupMapper(context);
localUuids = new UUIDMapper(context.getBean(MetadataRepository.class), params.uuid);
dataMan.flush();
Pair<String, Map<String, Object>> filter =
HarvesterUtil.parseXSLFilter(params.xslfilter, log);
processName = filter.one();
processParams = filter.two();
//-----------------------------------------------------------------------
//--- remove old metadata
for (String uuid : localUuids.getUUIDs())
if (!exists(records, uuid))
{
String id = localUuids.getID(uuid);
if(log.isDebugEnabled())
log.debug(" - Removing old metadata with local id:"+ id);
dataMan.deleteMetadata(context, id);
dataMan.flush();
result.locallyRemoved++;
}
//-----------------------------------------------------------------------
//--- insert/update new metadata
for(RecordInfo ri : records)
{
try{
String id = dataMan.getMetadataId(ri.uuid);
if (id == null) addMetadata(ri);
else updateMetadata(ri, id);
result.totalMetadata++;
}catch(Throwable t) {
errors.add(new HarvestError(t, log));
log.error("Unable to process record from csw (" + this.params.name + ")");
log.error(" Record failed: " + ri.uuid + ". Error is: " + t.getMessage());
} finally {
result.originalMetadata++;
}
}
log.info("End of alignment for : "+ params.name);
return result;
}
//--------------------------------------------------------------------------
//---
//--- Private methods : addMetadata
//---
//--------------------------------------------------------------------------
private void addMetadata(RecordInfo ri) throws Exception
{
Element md = retrieveMetadata(ri.uuid);
if (md == null) {
return;
}
String schema = dataMan.autodetectSchema(md, null);
if (schema == null) {
if(log.isDebugEnabled()) {
log.debug(" - Metadata skipped due to unknown schema. uuid:" + ri.uuid);
}
result.unknownSchema++;
return;
}
if (log.isDebugEnabled()) {
log.debug(" - Adding metadata with remote uuid:" + ri.uuid + " schema:" + schema);
}
if (!params.xslfilter.equals("")) {
md = HarvesterUtil.processMetadata(dataMan.getSchema(schema),
md, processName, processParams, log);
}
//
// insert metadata
//
String group = null, isTemplate = null, docType = null, title = null, category = null;
boolean ufo = false, indexImmediate = false;
final int ownerId;
if (params.ownerId == null) {
if (context.getUserSession() != null) {
ownerId = context.getUserSession().getUserIdAsInt();
} else {
ownerId = 1;
}
} else {
ownerId = Integer.parseInt(params.ownerId);
}
String id = dataMan.insertMetadata(context, schema, md, ri.uuid,
ownerId, group, params.uuid,
isTemplate, docType, category, ri.changeDate, ri.changeDate, ufo, indexImmediate);
int iId = Integer.parseInt(id);
dataMan.setTemplateExt(iId, MetadataType.METADATA);
dataMan.setHarvestedExt(iId, params.uuid);
addPrivileges(id, params.getPrivileges(), localGroups, dataMan, context, log);
context.getBean(MetadataRepository.class).update(Integer.parseInt(id), new Updater<Metadata>() {
@Override
public void apply(@Nonnull Metadata entity) {
addCategories(entity, params.getCategories(), localCateg, context, log, null);
}
});
dataMan.flush();
dataMan.indexMetadata(id, false);
result.addedMetadata++;
}
//--------------------------------------------------------------------------
//---
//--- Private methods : updateMetadata
//---
//--------------------------------------------------------------------------
private void updateMetadata(RecordInfo ri, String id) throws Exception
{
String date = localUuids.getChangeDate(ri.uuid);
if (date == null) {
if(log.isDebugEnabled()) {
log.debug(" - Skipped metadata managed by another harvesting node. uuid:"+ ri.uuid +", name:"+ params.name);
}
} else {
if (!ri.isMoreRecentThan(date)) {
if(log.isDebugEnabled()) {
log.debug(" - Metadata XML not changed for uuid:"+ ri.uuid);
}
result.unchangedMetadata++;
} else {
if(log.isDebugEnabled()) {
log.debug(" - Updating local metadata for uuid:"+ ri.uuid);
}
Element md = retrieveMetadata(ri.uuid);
if (md == null) {
return;
}
String schema = dataMan.autodetectSchema(md, null);
if (!params.xslfilter.equals("")) {
md = HarvesterUtil.processMetadata(dataMan.getSchema(schema),
md, processName, processParams, log);
}
//
// update metadata
//
boolean validate = false;
boolean ufo = false;
boolean index = false;
String language = context.getLanguage();
final Metadata metadata = dataMan.updateMetadata(context, id, md, validate, ufo, index, language, ri.changeDate, true);
OperationAllowedRepository repository = context.getBean(OperationAllowedRepository.class);
repository.deleteAllByIdAttribute(OperationAllowedId_.metadataId, Integer.parseInt(id));
addPrivileges(id, params.getPrivileges(), localGroups, dataMan, context, log);
metadata.getCategories().clear();
addCategories(metadata, params.getCategories(), localCateg, context, log, null);
dataMan.flush();
dataMan.indexMetadata(id, false);
result.updatedMetadata++;
}
}
}
//--------------------------------------------------------------------------
//---
//--- Private methods
//---
//--------------------------------------------------------------------------
/**
* Returns true if the uuid is present in the remote node.
*
* @param records
* @param uuid
* @return
*/
private boolean exists(Set<RecordInfo> records, String uuid)
{
for(RecordInfo ri : records)
if (uuid.equals(ri.uuid))
return true;
return false;
}
//--------------------------------------------------------------------------
/**
* Does CSW GetRecordById request. If validation is requested and the metadata
* does not validate, null is returned.
*
* @param uuid uuid of metadata to request
* @return metadata the metadata
*/
private Element retrieveMetadata(String uuid)
{
request.clearIds();
request.addId(uuid);
try
{
if(log.isDebugEnabled()) {
log.debug("Getting record from : " + request.getHost() + " (uuid:" + uuid + ")");
}
Element response = request.execute();
if(log.isDebugEnabled()) {
log.debug("Record got:\n" + Xml.getString(response));
}
@SuppressWarnings("unchecked")
List<Element> list = response.getChildren();
//--- maybe the metadata has been removed
if (list.size() == 0) {
return null;
}
response = list.get(0);
response = (Element) response.detach();
// validate it here if requested
if (params.validate) {
if(!dataMan.validate(response)) {
log.info("Ignoring invalid metadata with uuid " + uuid);
result.doesNotValidate++;
return null;
}
}
if(params.rejectDuplicateResource) {
if (foundDuplicateForResource(uuid, response)) {
return null;
}
}
return response;
}
catch(Exception e)
{
log.warning("Raised exception while getting record : "+ e);
e.printStackTrace();
result.unretrievable++;
//--- we don't raise any exception here. Just try to go on
return null;
}
}
/**
* Check for metadata in the catalog having the same resource identifier as the
* harvested record.
*
* If one dataset (same MD_metadata/../identificationInfo/../identifier/../code)
* (eg. a NMA layer for roads) is described in 2 or more catalogs with different
* metadata uuids. The metadata may be slightly different depending on the author,
* but the resource is the same. When harvesting, some users would like to have
* the capability to exclude "duplicate" description of the same dataset.
*
* The check is made searching the identifier field in the index using
* {@link org.fao.geonet.kernel.search.LuceneSearcher#getAllMetadataFromIndexFor(String, String, String, java.util.Set, boolean)}
*
* @param uuid the metadata unique identifier
* @param response the XML document to check
* @return true if a record with same resource identifier is found. false otherwise.
*/
private boolean foundDuplicateForResource(String uuid, Element response) {
String schema = dataMan.autodetectSchema(response);
if(schema != null && schema.startsWith("iso19139")) {
String resourceIdentifierXPath = "gmd:identificationInfo/*/gmd:citation/gmd:CI_Citation/gmd:identifier/*/gmd:code/gco:CharacterString";
String resourceIdentifierLuceneIndexField = "identifier";
String defaultLanguage = "eng";
try {
// Extract resource identifier
XPath xp = XPath.newInstance (resourceIdentifierXPath);
xp.addNamespace("gmd", "http://www.isotc211.org/2005/gmd");
xp.addNamespace("gco", "http://www.isotc211.org/2005/gco");
@SuppressWarnings("unchecked")
List<Element> resourceIdentifiers = xp.selectNodes(response);
if (resourceIdentifiers.size() > 0) {
// Check if the metadata to import has a resource identifier
// existing in current catalog for a record with a different UUID
log.debug(" - Resource identifiers found : " + resourceIdentifiers.size());
for (Element identifierNode : resourceIdentifiers) {
String identifier = identifierNode.getTextTrim();
log.debug(" - Searching for duplicates for resource identifier: " + identifier);
Map<String, Map<String,String>> values = LuceneSearcher.getAllMetadataFromIndexFor(defaultLanguage, resourceIdentifierLuceneIndexField,
identifier, Collections.singleton("_uuid"), true);
log.debug(" - Number of resources with same identifier: " + values.size());
for (Map<String, String> recordFieldValues : values.values()) {
String indexRecordUuid = recordFieldValues.get("_uuid");
if (!indexRecordUuid.equals(uuid)) {
log.debug(" - UUID " + indexRecordUuid + " in index does not match harvested record UUID " + uuid);
log.warning(" - Duplicates found. Skipping record with UUID " + uuid + " and resource identifier " + identifier);
result.duplicatedResource ++;
return true;
}
}
}
}
} catch (Throwable e) {
log.warning(" - Error when searching for resource duplicate " + uuid + ". Error is: " + e.getMessage());
e.printStackTrace();
}
}
return false;
}
//--------------------------------------------------------------------------
//---
//--- Variables
//---
//--------------------------------------------------------------------------
private Logger log;
private ServiceContext context;
private CswParams params;
private DataManager dataMan;
private CategoryMapper localCateg;
private GroupMapper localGroups;
private UUIDMapper localUuids;
private HarvestResult result;
private GetRecordByIdRequest request;
private String processName;
private Map<String, Object> processParams = new HashMap<String, Object>();
}