Package org.fao.geonet.kernel.harvest.harvester.csw

Source Code of org.fao.geonet.kernel.harvest.harvester.csw.Aligner

//=============================================================================
//===  Copyright (C) 2001-2007 Food and Agriculture Organization of the
//===  United Nations (FAO-UN), United Nations World Food Programme (WFP)
//===  and United Nations Environment Programme (UNEP)
//===
//===  This program is free software; you can redistribute it and/or modify
//===  it under the terms of the GNU General Public License as published by
//===  the Free Software Foundation; either version 2 of the License, or (at
//===  your option) any later version.
//===
//===  This program is distributed in the hope that it will be useful, but
//===  WITHOUT ANY WARRANTY; without even the implied warranty of
//===  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
//===  General Public License for more details.
//===
//===  You should have received a copy of the GNU General Public License
//===  along with this program; if not, write to the Free Software
//===  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
//===
//===  Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2,
//===  Rome - Italy. email: geonetwork@osgeo.org
//==============================================================================

package org.fao.geonet.kernel.harvest.harvester.csw;

import jeeves.server.context.ServiceContext;
import org.fao.geonet.GeonetContext;
import org.fao.geonet.Logger;
import org.fao.geonet.constants.Geonet;
import org.fao.geonet.csw.common.CswOperation;
import org.fao.geonet.csw.common.CswServer;
import org.fao.geonet.csw.common.ElementSetName;
import org.fao.geonet.csw.common.requests.GetRecordByIdRequest;
import org.fao.geonet.domain.Metadata;
import org.fao.geonet.domain.MetadataType;
import org.fao.geonet.domain.OperationAllowedId_;
import org.fao.geonet.domain.Pair;
import org.fao.geonet.exceptions.OperationAbortedEx;
import org.fao.geonet.kernel.DataManager;
import org.fao.geonet.kernel.harvest.BaseAligner;
import org.fao.geonet.kernel.harvest.harvester.*;
import org.fao.geonet.kernel.search.LuceneSearcher;
import org.fao.geonet.repository.MetadataRepository;
import org.fao.geonet.repository.OperationAllowedRepository;
import org.fao.geonet.repository.Updater;
import org.fao.geonet.utils.Xml;
import org.fao.geonet.repository.Updater;
import org.jdom.Element;
import org.jdom.xpath.XPath;

import javax.annotation.Nonnull;

import java.util.*;

import static org.fao.geonet.utils.AbstractHttpRequest.Method.GET;

import javax.annotation.Nonnull;
import static org.fao.geonet.utils.AbstractHttpRequest.Method.POST;

//=============================================================================

public class Aligner extends BaseAligner
{
  //--------------------------------------------------------------------------
  //---
  //--- Constructor
  //---
  //--------------------------------------------------------------------------

  public Aligner(Logger log, ServiceContext sc, CswServer server, CswParams params) throws OperationAbortedEx
  {
    this.log        = log;
    this.context    = sc;
    this.params     = params;

    GeonetContext gc = (GeonetContext) context.getHandlerContext(Geonet.CONTEXT_NAME);
    dataMan = gc.getBean(DataManager.class);
    result  = new HarvestResult();

    //--- setup get-record-by-id request

    request = new GetRecordByIdRequest(sc);
    request.setElementSetName(ElementSetName.FULL);

    CswOperation oper = server.getOperation(CswServer.GET_RECORD_BY_ID);

    // Use the preferred HTTP method and check one exist.
    if (oper.getGetUrl() != null && Harvester.PREFERRED_HTTP_METHOD.equals("GET")) {
      request.setUrl(oper.getGetUrl());
      request.setMethod(GET);
    } else if (oper.getPostUrl() != null && Harvester.PREFERRED_HTTP_METHOD.equals("POST")) {
      request.setUrl(oper.getPostUrl());
      request.setMethod(POST);
    } else {
      if (oper.getGetUrl() != null) {
        request.setUrl(oper.getGetUrl());
        request.setMethod(GET);
      } else if (oper.getPostUrl() != null) {
        request.setUrl(oper.getPostUrl());
        request.setMethod(POST);
      } else {
        throw new OperationAbortedEx("No GET or POST DCP available in this service.");
      }
    }

    if(oper.getPreferredOutputSchema() != null) {
      request.setOutputSchema(oper.getPreferredOutputSchema());
    }

        if(oper.getPreferredServerVersion() != null) {
      request.setServerVersion(oper.getPreferredServerVersion());
    }

    if (params.useAccount) {
      request.setCredentials(params.username, params.password);
   
   
  }

  //--------------------------------------------------------------------------
  //---
  //--- Alignment method
  //---
  //--------------------------------------------------------------------------

  public HarvestResult align(Set<RecordInfo> records, List<HarvestError> errors) throws Exception
  {
    log.info("Start of alignment for : "+ params.name);

    //-----------------------------------------------------------------------
    //--- retrieve all local categories and groups
    //--- retrieve harvested uuids for given harvesting node

    localCateg = new CategoryMapper(context);
    localGroups= new GroupMapper(context);
    localUuids = new UUIDMapper(context.getBean(MetadataRepository.class), params.uuid);

        dataMan.flush();

        Pair<String, Map<String, Object>> filter =
                HarvesterUtil.parseXSLFilter(params.xslfilter, log);
        processName = filter.one();
        processParams = filter.two();

        //-----------------------------------------------------------------------
    //--- remove old metadata

    for (String uuid : localUuids.getUUIDs())
      if (!exists(records, uuid))
      {
        String id = localUuids.getID(uuid);

                if(log.isDebugEnabled())
                    log.debug("  - Removing old metadata with local id:"+ id);
        dataMan.deleteMetadata(context, id);

                dataMan.flush();

                result.locallyRemoved++;
      }

    //-----------------------------------------------------------------------
    //--- insert/update new metadata

    for(RecordInfo ri : records)
    {
        try{
   
          String id = dataMan.getMetadataId(ri.uuid);
   
          if (id == nulladdMetadata(ri);
          else        updateMetadata(ri, id);
                result.totalMetadata++;
        }catch(Throwable t) {
            errors.add(new HarvestError(t, log));
                log.error("Unable to process record from csw (" + this.params.name + ")");
                log.error("   Record failed: " + ri.uuid + ". Error is: " + t.getMessage());
        } finally {
            result.originalMetadata++;
        }
    }

    log.info("End of alignment for : "+ params.name);

    return result;
  }

  //--------------------------------------------------------------------------
  //---
  //--- Private methods : addMetadata
  //---
  //--------------------------------------------------------------------------

  private void addMetadata(RecordInfo ri) throws Exception
  {
    Element md = retrieveMetadata(ri.uuid);

    if (md == null) {
            return;
        }

    String schema = dataMan.autodetectSchema(md, null);

    if (schema == null) {
            if(log.isDebugEnabled()) {
                log.debug("  - Metadata skipped due to unknown schema. uuid:" + ri.uuid);
            }
      result.unknownSchema++;

      return;
    }

        if (log.isDebugEnabled()) {
            log.debug("  - Adding metadata with remote uuid:" + ri.uuid + " schema:" + schema);
        }

        if (!params.xslfilter.equals("")) {
            md = HarvesterUtil.processMetadata(dataMan.getSchema(schema),
                    md, processName, processParams, log);
        }
        //
        // insert metadata
        //
        String group = null, isTemplate = null, docType = null, title = null, category = null;
        boolean ufo = false, indexImmediate = false;
        final int ownerId;
        if (params.ownerId == null) {
            if (context.getUserSession() != null) {
                ownerId = context.getUserSession().getUserIdAsInt();
            } else {
                ownerId = 1;
            }
        } else {
            ownerId = Integer.parseInt(params.ownerId);
        }
        String id = dataMan.insertMetadata(context, schema, md, ri.uuid,
                ownerId, group, params.uuid,
                isTemplate, docType, category, ri.changeDate, ri.changeDate, ufo, indexImmediate);

    int iId = Integer.parseInt(id);

    dataMan.setTemplateExt(iId, MetadataType.METADATA);
    dataMan.setHarvestedExt(iId, params.uuid);

        addPrivileges(id, params.getPrivileges(), localGroups, dataMan, context, log);
        context.getBean(MetadataRepository.class).update(Integer.parseInt(id), new Updater<Metadata>() {
            @Override
            public void apply(@Nonnull Metadata entity) {
                addCategories(entity, params.getCategories(), localCateg, context, log, null);
            }
        });

        dataMan.flush();

        dataMan.indexMetadata(id, false);
    result.addedMetadata++;
  }

  //--------------------------------------------------------------------------
  //---
  //--- Private methods : updateMetadata
  //---
  //--------------------------------------------------------------------------
  private void updateMetadata(RecordInfo ri, String id) throws Exception
  {
    String date = localUuids.getChangeDate(ri.uuid);

    if (date == null) {
            if(log.isDebugEnabled()) {
                log.debug("  - Skipped metadata managed by another harvesting node. uuid:"+ ri.uuid +", name:"+ params.name);
            }
    } else {
      if (!ri.isMoreRecentThan(date)) {
                if(log.isDebugEnabled()) {
                    log.debug("  - Metadata XML not changed for uuid:"+ ri.uuid);
                }
        result.unchangedMetadata++;
      } else {
                if(log.isDebugEnabled()) {
                    log.debug("  - Updating local metadata for uuid:"+ ri.uuid);
                }
        Element md = retrieveMetadata(ri.uuid);

        if (md == null) {
          return;
        }
                String schema = dataMan.autodetectSchema(md, null);
                if (!params.xslfilter.equals("")) {
                    md = HarvesterUtil.processMetadata(dataMan.getSchema(schema),
                            md, processName, processParams, log);
                }

                //
                // update metadata
                //
                boolean validate = false;
                boolean ufo = false;
                boolean index = false;
                String language = context.getLanguage();
                final Metadata metadata = dataMan.updateMetadata(context, id, md, validate, ufo, index, language, ri.changeDate, true);

                OperationAllowedRepository repository = context.getBean(OperationAllowedRepository.class);
        repository.deleteAllByIdAttribute(OperationAllowedId_.metadataId, Integer.parseInt(id));

                addPrivileges(id, params.getPrivileges(), localGroups, dataMan, context, log);

                metadata.getCategories().clear();
                addCategories(metadata, params.getCategories(), localCateg, context, log, null);

                dataMan.flush();

                dataMan.indexMetadata(id, false);
        result.updatedMetadata++;
      }
    }
  }

  //--------------------------------------------------------------------------
  //---
  //--- Private methods
  //---
  //--------------------------------------------------------------------------

    /**
     *  Returns true if the uuid is present in the remote node.
     *
     * @param records
     * @param uuid
     * @return
     */
  private boolean exists(Set<RecordInfo> records, String uuid)
  {
    for(RecordInfo ri : records)
      if (uuid.equals(ri.uuid))
        return true;

    return false;
  }

  //--------------------------------------------------------------------------

  /**
   * Does CSW GetRecordById request. If validation is requested and the metadata
     * does not validate, null is returned.
     *
     * @param uuid uuid of metadata to request
     * @return metadata the metadata
   */
  private Element retrieveMetadata(String uuid)
  {
    request.clearIds();
    request.addId(uuid);

    try
    {
            if(log.isDebugEnabled()) {
                log.debug("Getting record from : " + request.getHost() + " (uuid:" + uuid + ")");
            }
      Element response = request.execute();
            if(log.isDebugEnabled()) {
                log.debug("Record got:\n" + Xml.getString(response));
            }

      @SuppressWarnings("unchecked")
            List<Element> list = response.getChildren();

      //--- maybe the metadata has been removed

      if (list.size() == 0) {
                return null;
            }

      response = list.get(0);
      response = (Element) response.detach();

            // validate it here if requested
            if (params.validate) {
                if(!dataMan.validate(response))  {
                    log.info("Ignoring invalid metadata with uuid " + uuid);
                    result.doesNotValidate++;
                    return null;
                }
            }
           
            if(params.rejectDuplicateResource) {
                if (foundDuplicateForResource(uuid, response)) {
                    return null;
                }
            }
           
            return response;
    }
    catch(Exception e)
    {
      log.warning("Raised exception while getting record : "+ e);
      e.printStackTrace();
      result.unretrievable++;

      //--- we don't raise any exception here. Just try to go on
      return null;
    }
  }

    /**
     * Check for metadata in the catalog having the same resource identifier as the
     * harvested record.
     *
     * If one dataset (same MD_metadata/../identificationInfo/../identifier/../code)
     * (eg. a NMA layer for roads) is described in 2 or more catalogs with different
     * metadata uuids. The metadata may be slightly different depending on the author,
     * but the resource is the same. When harvesting, some users would like to have
     * the capability to exclude "duplicate" description of the same dataset.
     *
     * The check is made searching the identifier field in the index using
     * {@link org.fao.geonet.kernel.search.LuceneSearcher#getAllMetadataFromIndexFor(String, String, String, java.util.Set, boolean)}
     *
     * @param uuid the metadata unique identifier
     * @param response  the XML document to check
     * @return true if a record with same resource identifier is found. false otherwise.
     */
    private boolean foundDuplicateForResource(String uuid, Element response) {
        String schema = dataMan.autodetectSchema(response);
       
        if(schema != null && schema.startsWith("iso19139")) {
            String resourceIdentifierXPath = "gmd:identificationInfo/*/gmd:citation/gmd:CI_Citation/gmd:identifier/*/gmd:code/gco:CharacterString";
            String resourceIdentifierLuceneIndexField = "identifier";
            String defaultLanguage = "eng";
           
            try {
                // Extract resource identifier
                XPath xp = XPath.newInstance (resourceIdentifierXPath);
                xp.addNamespace("gmd", "http://www.isotc211.org/2005/gmd");
                xp.addNamespace("gco", "http://www.isotc211.org/2005/gco");
                @SuppressWarnings("unchecked")
                List<Element> resourceIdentifiers = xp.selectNodes(response);
                if (resourceIdentifiers.size() > 0) {
                    // Check if the metadata to import has a resource identifier
                    // existing in current catalog for a record with a different UUID
                   
                    log.debug("  - Resource identifiers found : " + resourceIdentifiers.size());
                   
                    for (Element identifierNode : resourceIdentifiers) {
                        String identifier = identifierNode.getTextTrim();
                        log.debug("    - Searching for duplicates for resource identifier: " + identifier);
                       
                        Map<String, Map<String,String>> values = LuceneSearcher.getAllMetadataFromIndexFor(defaultLanguage, resourceIdentifierLuceneIndexField,
                                identifier, Collections.singleton("_uuid"), true);
                        log.debug("    - Number of resources with same identifier: " + values.size());
                        for (Map<String, String> recordFieldValues : values.values()) {
                            String indexRecordUuid = recordFieldValues.get("_uuid");
                            if (!indexRecordUuid.equals(uuid)) {
                                log.debug("      - UUID " + indexRecordUuid + " in index does not match harvested record UUID " + uuid);
                                log.warning("      - Duplicates found. Skipping record with UUID " + uuid + " and resource identifier " + identifier);
                               
                                result.duplicatedResource ++;
                                return true;
                            }
                        }
                    }
                }
            } catch (Throwable e) {
                log.warning("      - Error when searching for resource duplicate " + uuid + ". Error is: " + e.getMessage());
                e.printStackTrace();
            }
        }
        return false;
    }

  //--------------------------------------------------------------------------
  //---
  //--- Variables
  //---
  //--------------------------------------------------------------------------

    private Logger         log;
    private ServiceContext context;
    private CswParams      params;
    private DataManager    dataMan;
    private CategoryMapper localCateg;
    private GroupMapper    localGroups;
    private UUIDMapper     localUuids;
    private HarvestResult  result;
    private GetRecordByIdRequest request;

    private String processName;
    private Map<String, Object> processParams = new HashMap<String, Object>();
}
TOP

Related Classes of org.fao.geonet.kernel.harvest.harvester.csw.Aligner

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.