Package org.fao.geonet.kernel.harvest.harvester.thredds

Source Code of org.fao.geonet.kernel.harvest.harvester.thredds.Harvester$ThreddsService

//=============================================================================
//===  Copyright (C) 2001-2007 Food and Agriculture Organization of the
//===  United Nations (FAO-UN), United Nations World Food Programme (WFP)
//===  and United Nations Environment Programme (UNEP)
//===
//===  Copyright (C) 2008-2011 CSIRO Marine and Atmospheric Research,
//=== Australia
//===
//===  This program is free software; you can redistribute it and/or modify
//===  it under the terms of the GNU General Public License as published by
//===  the Free Software Foundation; either version 2 of the License, or (at
//===  your option) any later version.
//===
//===  This program is distributed in the hope that it will be useful, but
//===  WITHOUT ANY WARRANTY; without even the implied warranty of
//===  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
//===  General Public License for more details.
//===
//===  You should have received a copy of the GNU General Public License
//===  along with this program; if not, write to the Free Software
//===  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
//===
//===  Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2,
//===  Rome - Italy. email: geonetwork@osgeo.org
//==============================================================================

package org.fao.geonet.kernel.harvest.harvester.thredds;

import com.google.common.base.Optional;
import jeeves.server.context.ServiceContext;
import jeeves.xlink.Processor;
import org.apache.commons.io.IOUtils;
import org.fao.geonet.Constants;
import org.fao.geonet.GeonetContext;
import org.fao.geonet.Logger;
import org.fao.geonet.constants.Geonet;
import org.fao.geonet.domain.ISODate;
import org.fao.geonet.domain.Metadata;
import org.fao.geonet.domain.MetadataType;
import org.fao.geonet.exceptions.BadServerCertificateEx;
import org.fao.geonet.exceptions.BadXmlResponseEx;
import org.fao.geonet.kernel.DataManager;
import org.fao.geonet.kernel.SchemaManager;
import org.fao.geonet.kernel.harvest.BaseAligner;
import org.fao.geonet.kernel.harvest.harvester.*;
import org.fao.geonet.kernel.harvest.harvester.fragment.FragmentHarvester;
import org.fao.geonet.kernel.harvest.harvester.fragment.FragmentHarvester.FragmentParams;
import org.fao.geonet.kernel.harvest.harvester.fragment.FragmentHarvester.HarvestSummary;
import org.fao.geonet.kernel.setting.SettingInfo;
import org.fao.geonet.lib.Lib;
import org.fao.geonet.repository.MetadataRepository;
import org.fao.geonet.repository.Updater;
import org.fao.geonet.utils.GeonetHttpRequestFactory;
import org.fao.geonet.util.Sha1Encoder;
import org.fao.geonet.utils.Xml;
import org.fao.geonet.utils.XmlRequest;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.Namespace;
import thredds.catalog.*;
import thredds.catalog.dl.DIFWriter;
import ucar.nc2.Attribute;
import ucar.nc2.dataset.NetcdfDataset;
import ucar.nc2.dataset.NetcdfDatasetInfo;
import ucar.nc2.ncml.NcMLWriter;
import ucar.nc2.units.DateType;
import ucar.unidata.util.StringUtil;

import javax.annotation.Nonnull;
import javax.net.ssl.SSLHandshakeException;
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.URI;
import java.net.URL;
import java.net.URLEncoder;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.*;

//=============================================================================
/**
* A ThreddsHarvester is able to generate metadata for datasets and services
* from a Thredds catalogue. Metadata for datasets are generated
* using dataset information contained in the thredds catalogue document or
* or from opening the dataset and retrieving variables, coordinate systems
* and/or global attributes.
*
* Metadata produced are :
* <ul>
*   <li>ISO19119 for service metadata (all services in the catalog)</li>
*   <li>ISO19139 (or profile) metadata for datasets in catalog</li>
* </ul>
*
* <pre> 
* <nodes>
<node type="thredds" id="114">
*    <site>
*      <name>TEST</name>
*      <uuid>c1da2928-c866-49fd-adde-466fe36d3508</uuid>
*      <account>
*        <use>true</use>
*        <username />
*        <password />
*      </account>
*      <url>http://localhost:5556/thredds/catalog.xml</url>
*      <icon>default.gif</icon>
*    </site>
*    <options>
*      <every>90</every>
*      <oneRunOnly>false</oneRunOnly>
*      <status>active</status>
*      <lang>eng</lang>
*      <createThumbnails>false</createThumbnails>
*      <createServiceMd>false</createServiceMd>
*      <createCollectionDatasetMd>true</createCollectionDatasetMd>
*      <createAtomicDatasetMd>false</createAtomicDatasetMd>
*      <ignoreHarvestOnCollections>true</ignoreHarvestOnCollections>
* Choice of {
*      <outputSchemaOnCollectionsDIF>iso19139</outputSchemaOnCollectionsDIF>
* } OR {
*      <outputSchemaOnCollectionsFragments>iso19139</outputSchemaOnCollectionsFragments>
*      <collectionFragmentStylesheet>collection_fragments.xsl</collectionFragmentStylesheet>
*      <collectionMetadataTemplate>My template</collectionMetadataTemplate>
*      <createCollectionSubtemplates>false</createCollectionSubtemplates>
* }
*      <ignoreHarvestOnAtomics>true</ignoreHarvestOnAtomics>
* Choice of {
*      <outputSchemaOnAtomicsDIF>iso19139.mcp</outputSchemaOnAtomicsDIF>
* } OR {
*      <outputSchemaOnAtomicsFragments>iso19139</outputSchemaOnAtomicsFragments>
*      <atomicFragmentStylesheet>atomic_fragments.xsl</atomicFragmentStylesheet>
*      <atomicMetadataTemplate>My template</atomicMetadataTemplate>
*      <createAtomicSubtemplates>false</createAtomicSubtemplates>
* }
*      <modifiedOnly>true</modifiedOnly>
*      <datasetCategory></datasetCategory>
*    </options>
*    <privileges>
*      <group id="1">
*        <operation name="view" />
*      </group>
*    </privileges>
*    <categories>
*      <category id="3" />
*    </categories>
*    <info>
*      <lastRun>2007-12-05T16:17:20</lastRun>
*      <running>false</running>
*    </info>
</node>
* </nodes>
* </pre>
*
* @author Simon Pigot
*  
*/
class Harvester extends BaseAligner implements IHarvester<HarvestResult>
{
 
 
  //---------------------------------------------------------------------------
  /**
   * Constructor
   * 
   * @param log   
   * @param context    Jeeves context
   * @param params  Information about harvesting configuration for the node
   *
   * @return null
     **/
 
  public Harvester(Logger log, ServiceContext context, ThreddsParams params) {
    this.log    = log;
    this.context= context;
    this.params = params;

    result = new HarvestResult ();
   
    GeonetContext gc = (GeonetContext) context.getHandlerContext (Geonet.CONTEXT_NAME);
    dataMan = gc.getBean(DataManager.class);
    schemaMan = gc.getBean(SchemaManager.class);

    SettingInfo si = context.getBean(SettingInfo.class);
    String siteUrl = si.getSiteUrl() + context.getBaseUrl();
    metadataGetService = siteUrl + "/srv/en/xml.metadata.get";

   
    //--- Create fragment harvester for atomic datasets if required
    if (params.createAtomicDatasetMd && params.atomicMetadataGeneration.equals(ThreddsParams.FRAGMENTS)) {
      atomicFragmentHarvester = new FragmentHarvester(log, context, getAtomicFragmentParams());
    }
   
    //--- Create fragment harvester for collection datasets if required
    if (params.createCollectionDatasetMd && params.collectionMetadataGeneration.equals(ThreddsParams.FRAGMENTS)) {
      collectionFragmentHarvester = new FragmentHarvester(log, context, getCollectionFragmentParams());
    }
  }

  //---------------------------------------------------------------------------
  //---
  //--- API methods
  //---
  //---------------------------------------------------------------------------

  /**
     * Start the harvesting of a thredds catalog
     **/
 
  public HarvestResult harvest(Logger log) throws Exception {
    this.log = log;
   
    Element xml = null;
    log.info("Retrieving remote metadata information for : " + params.name);
       
    //--- Get uuid's and change dates of metadata records previously
    //--- harvested by this harvester grouping by harvest uri
    localUris = new UriMapper(context, params.uuid);

    //--- Try to load thredds catalog document
    String url = params.url;
    try {
      XmlRequest req = context.getBean(GeonetHttpRequestFactory.class).createXmlRequest();
      req.setUrl(new URL(url));
      req.setMethod(XmlRequest.Method.GET);
      Lib.net.setupProxy(context, req);

      xml = req.execute();
    } catch (SSLHandshakeException e) {
      throw new BadServerCertificateEx(
        "Most likely cause: The thredds catalog "+url+" does not have a "+
        "valid certificate. If you feel this is because the server may be "+
        "using a test certificate rather than a certificate from a well "+
        "known certification authority, then you can add this certificate "+
        "to the GeoNetwork keystore using bin/installCert");
    }
   
      //--- Traverse catalog to create services and dataset metadata as required
      harvestCatalog(xml);
         
    //--- Remove previously harvested metadata for uris that no longer exist on the remote site
    for (String localUri : localUris.getUris()) {
      if (!harvestUris.contains(localUri)) {
        for (RecordInfo record: localUris.getRecords(localUri)) {
                    if(log.isDebugEnabled()) log.debug ("  - Removing deleted metadata with id: " + record.id);
          dataMan.deleteMetadata (context, record.id);
   
          if (record.isTemplate.equals("s")) {
            //--- Uncache xlinks if a subtemplate
            Processor.uncacheXLinkUri(metadataGetService+"?uuid=" + record.uuid);
            result.subtemplatesRemoved++;
          } else {
            result.locallyRemoved++;
          }
        }
      }
    }

        dataMan.flush();

        result.totalMetadata = result.serviceRecords + result.collectionDatasetRecords + result.atomicDatasetRecords;
    return result;
  }

  //---------------------------------------------------------------------------
  //---
  //--- Private methods
  //---
  //---------------------------------------------------------------------------

  //---------------------------------------------------------------------------
  /**
     * Add metadata to GN for the services and datasets in a thredds
   * catalog
     * 
   * 1. Open Catalog Document
   * 2. Crawl the catalog processing datasets as ISO19139 records
   * and recording services (attach dataset ids to the services that deliver
   * them)
   * 3. Process services found as ISO19119 records
   * 4. Create a service record for the thredds catalog service provided and
   * list service records as something that the thredds catalog provides
   * 5. Save all
     * 
     * @param cata      Catalog document
     *                  
     **/
 
   private void harvestCatalog (Element cata) throws Exception {

    if (cata == null)
      return;

    //--- loading categories and groups
    localCateg   = new CategoryMapper (context);
    localGroups = new GroupMapper (context);

    //--- Setup proxy authentication 
    Lib.net.setupProxy(context);
   
    //--- load catalog
    InvCatalogFactory factory = new InvCatalogFactory("default", true);
    catalog = (InvCatalogImpl) factory.readXML(params.url);
    StringBuilder buff = new StringBuilder();
    if (!catalog.check(buff, true)) {
      throw new BadXmlResponseEx("Invalid catalog "+ params.url+"\n"+buff.toString());
    }

    //--- display catalog read in log file
    log.info("Catalog read from "+params.url+" is \n"+factory.writeXML(catalog));
    String serviceStyleSheet = context.getAppPath() + Geonet.Path.IMPORT_STYLESHEETS + "/ThreddsCatalog-to-ISO19119_ISO19139.xsl";

    //--- Get base host url
      URL url = new URL(params.url);
    hostUrl = url.getProtocol()+"://"+url.getHost();
    if (url.getPort() != -1) hostUrl += ":"+url.getPort();
   
    //--- Crawl all datasets in the thredds catalogue
    log.info("Crawling the datasets in the catalog....");
    List<InvDataset> dsets = catalog.getDatasets();
    for (InvDataset ds : dsets) {
      crawlDatasets(ds);
    }

    //--- show how many datasets have been processed
    int totalDs =  result.collectionDatasetRecords + result.atomicDatasetRecords;
    log.info("Processed "+totalDs+" datasets.");

    if (params.createServiceMd) {
      //--- process services found by crawling the catalog
      log.info("Processing "+services.size()+" services...");
      processServices(cata, serviceStyleSheet);
 
      //--- finally create a service record for the thredds catalog itself and
      //--- add uuids of services that it provides to operatesOn element 
      //--- (not sure that this is what we should do here really - the catalog
      //--- is a dataset and a service??
      log.info("Creating service metadata for thredds catalog...");
      Map<String, Object> param = new HashMap<String, Object>();
      param.put("lang",      params.lang);
      param.put("topic",    params.topic);
      param.put("uuid",      params.uuid);
      param.put("url",      params.url);
      param.put("name",      catalog.getName());
      param.put("type",      "Thredds Data Service Catalog "+catalog.getVersion());
      param.put("version",  catalog.getVersion());
      param.put("desc",      Xml.getString(cata));
      param.put("props",    catalog.getProperties().toString());
      param.put("serverops",    "");

            if(log.isDebugEnabled()) log.debug ("  - XSLT transformation using "+serviceStyleSheet);
      Element md = Xml.transform (cata, serviceStyleSheet, param);
 
      //--- TODO: Add links to services provided by the thredds catalog - but
      //--- where do we do this in ISO19119?
      saveMetadata(md, Sha1Encoder.encodeString (params.url), params.url);
     
      harvestUris.add(params.url);
     
      result.serviceRecords ++;
    }
  }

  //---------------------------------------------------------------------------
  /**
   * Crawl all datasets in the catalog recursively
   *
     * @param  catalogDs    the dataset being processed
   * @throws  Exception
   **/
  
  private void crawlDatasets(InvDataset catalogDs) throws Exception {
    log.info("Crawling through "+catalogDs.getName());
   
    // HACK!! Get real dataset hidden by netcdf library when catalog ref name
    // equals top dataset name in referenced catalog
    InvDataset realDs = catalogDs;
    if (catalogDs instanceof InvCatalogRef) {
      InvDataset proxyDataset = ((InvCatalogRef)catalogDs).getProxyDataset();
      realDs = proxyDataset.getName().equals(catalogDs.getName())?proxyDataset:catalogDs;
    }

    if (realDs.hasNestedDatasets()) {
      List<InvDataset> dsets = realDs.getDatasets();
      for (InvDataset ds : dsets) {
        crawlDatasets(ds);
      }
    }
   
    if (harvestMetadata(realDs)) {
        log.info("Harvesting dataset: " + realDs.getName());
      harvest(realDs);
    } else {
        log.info("Skipping dataset: " + realDs.getName());
    }
   
    // Release resources allocated when crawling catalog references
    if (catalogDs instanceof InvCatalogRef) {
      ((InvCatalogRef)catalogDs).release();
    }
  }

  //---------------------------------------------------------------------------
  /**
   * Save the metadata to GeoNetwork's database
   *
     * @param md    the metadata being saved
     * @param uuid    the uuid of the metadata being saved
     * @param uri    the uri from which the metadata has been harvested
   **/
 
  private void saveMetadata(Element md, String uuid, String uri) throws Exception {

    //--- strip the catalog namespace as it is not required
    md.removeNamespaceDeclaration(invCatalogNS);

    String schema = dataMan.autodetectSchema(md, null); // should be iso19139
    if (schema == null) {
      log.warning("Skipping metadata with unknown schema.");
      result.unknownSchema ++;
    }

    log.info("  - Adding metadata with " + uuid + " schema is set to " + schema + "\n XML is "+ Xml.getString(md));
    DateFormat df = new SimpleDateFormat ("yyyy-MM-dd'T'HH:mm:ss");
    Date date = new Date();
   
    deleteExistingMetadata(uri);

    //
        // insert metadata
        //
        String group = null, isTemplate = null, docType = null, title = null, category = null;
        boolean ufo = false, indexImmediate = false;
        String id = dataMan.insertMetadata(context, schema, md, uuid, Integer.parseInt(params.ownerId), group, params.uuid,
                     isTemplate, docType, category, df.format(date), df.format(date), ufo, indexImmediate);

    int iId = Integer.parseInt(id);
        addPrivileges(id, params.getPrivileges(), localGroups, dataMan, context, log);
        context.getBean(MetadataRepository.class).update(iId, new Updater<Metadata>() {
            @Override
            public void apply(@Nonnull Metadata entity) {
                addCategories(entity, params.getCategories(), localCateg, context, log, null);
            }
        });

    dataMan.setTemplateExt(iId, MetadataType.METADATA);
    dataMan.setHarvestedExt(iId, params.uuid, Optional.of(uri));

        dataMan.indexMetadata(id, false);

        dataMan.flush();
    }

  //---------------------------------------------------------------------------
  /**
   * Process one dataset generating metadata as per harvesting node settings
   *
     * @param   ds  the dataset to be processed
   * @throws  Exception
   **/
 
  private void harvest(InvDataset ds) throws Exception {
    //--- harvest metadata only if the dataset has changed
    if (!params.modifiedOnly || datasetChanged(ds)) {
      if (harvestMetadataUsingFragments(ds))  {
        createMetadataUsingFragments(ds);
      } else {
        createDIFMetadata(ds);
      }
    }
   
    //--- Add dataset uri to list of harvested uri's
    harvestUris.add(getUri(ds));

    //--- Record uuid of dataset against services that deliver it for 
    //--- inclusion in operatesOn element in 19119 service record
    List<InvAccess> accesses = ds.getAccess();
    for (InvAccess access : accesses) {
      processService(access.getService(), getUuid(ds), ds);
    }
  }

  //---------------------------------------------------------------------------
  /**
   * Get dataset uri
   *
     * @param   ds  the dataset to be processed
     * 
   **/
 
  private String getUri(InvDataset ds) {
    if (ds.getID() == null) {
      return ds.getParentCatalog().getUriString() + "#" + ds.getName();
    } else {
        return getSubsetUrl(ds);
    }
    }

  //---------------------------------------------------------------------------
  /**
   * Has the dataset has been modified since its metadata was last
   * harvested 
   *
     * @param   ds  the dataset to be processed
     * 
   **/
 
  private boolean datasetChanged(InvDataset ds) {
    List<RecordInfo> localRecords = localUris.getRecords(getUri(ds));
   
    if (localRecords == null) return true;
   
      Date lastModifiedDate  = null;
   
    List<DateType> dates = ds.getDates();
   
    for (DateType date: dates) {
      if (date.getType().equalsIgnoreCase("modified")) {
        lastModifiedDate = date.getDate();
      }
    }
   
    if (lastModifiedDate == null) return true;

    String datasetModifiedDate = new ISODate(lastModifiedDate.getTime(), false).toString();
   
    for (RecordInfo localRecord: localRecords) {
      if (localRecord.isOlderThan(datasetModifiedDate)) return true;
    }
   
    return false;
    }

  //---------------------------------------------------------------------------
  /**
   * Delete all metadata previously harvested for a particular uri
   *
     * @param   uri    uri for which previously harvested metadata should be deleted
     * 
   **/
 
  private void deleteExistingMetadata(String uri) throws Exception {
    List<RecordInfo> localRecords = localUris.getRecords(uri);
   
    if (localRecords == null) return;

    for (RecordInfo record: localRecords) {
      dataMan.deleteMetadata (context, record.id);

      if (record.isTemplate.equals("s")) {
        //--- Uncache xlinks if a subtemplate
        Processor.uncacheXLinkUri(metadataGetService+"?uuid=" + record.uuid);
      }
    }
    }

  //---------------------------------------------------------------------------
  /**
     * Create metadata using fragments
     *
     * <ul>
     * <li>collect useful metadata for the dataset<li>
     * <li>use supplied stylesheet to convert collected metadata into fragments</li>
     * <li>harvest metadata from fragments as requested</li>
     * </ul>
     *
     * Metadata collected is as follows:
     *
     * <pre>
     * {@code
     * <root>
     *    <catalogUri>http://someserver.com/thredds/catalog.xml</catalog>
     *    <uuid>uuid-generated-for-dataset</uuid>
     *    <catalog xmlns="http://www.unidata.ucar.edu/namespaces/thredds/InvCatalog/v1.0" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.0.1">
   *     ... subset of catalog containing dataset as the top dataset ...
   *    </catalog>
   *    <netcdf xmlns="http://www.unidata.ucar.edu/namespaces/netcdf/ncml-2.2"  location="example1.nc">
   *       ... ncml generated for netcdf dataset ...
   *       ... atomic datasets only ...
   *    </netcdf>
   * </root>
   * }
   * </pre>
     **/
 
  private void createMetadataUsingFragments(InvDataset ds) {
    try {
      log.info("Retrieving thredds/netcdf metadata...");

      //--- Create root element to collect dataset metadata to be passed to xsl transformation
      Element dsMetadata = new Element("root");
   
      //--- Add catalog uri (url) to allow relative urls to be resolved
      dsMetadata.addContent(new Element("catalogUri").setText(ds.getParentCatalog().getUriString()));

      //--- Add suggested uuid for dataset
      dsMetadata.addContent(new Element("uuid").setText(getUuid(ds)));
     
      //--- Add fullName of dataset
      dsMetadata.addContent(new Element("fullName").setText(ds.getFullName()));

      //--- Add dataset subset catalog information to metadata
      dsMetadata.addContent(getDatasetSubset(ds));
     
      //--- For atomic dataset's add ncml for dataset to metadata
      if (!ds.hasNestedDatasets()) {
        NetcdfDataset ncD = NetcdfDataset.openDataset("thredds:"+ds.getCatalogUrl());
        NcMLWriter ncmlWriter = new NcMLWriter();
        Element ncml = Xml.loadString(ncmlWriter.writeXML(ncD),false);
        dsMetadata.addContent(ncml);
      }

            if(log.isDebugEnabled()) log.debug("Thredds metadata and ncml is:"+Xml.getString(dsMetadata));

      //--- Create fragments using provided stylesheet

      String schema = ds.hasNestedDatasets() ? params.outputSchemaOnCollectionsFragments : params.outputSchemaOnAtomicsFragments;
      fragmentStylesheetDirectory = schemaMan.getSchemaDir(schema) + Geonet.Path.TDS_STYLESHEETS;
      String stylesheet = ds.hasNestedDatasets() ? params.collectionFragmentStylesheet : params.atomicFragmentStylesheet;

      Element fragments = Xml.transform(dsMetadata, fragmentStylesheetDirectory + "/" + stylesheet);
            if(log.isDebugEnabled()) log.debug("Fragments generated for dataset:"+Xml.getString(fragments));
     
      //--- remove any previously harvested metadata/sub-templates
      deleteExistingMetadata(getUri(ds));
     
      //--- Create metadata/subtemplates from fragments
      FragmentHarvester fragmentHarvester = ds.hasNestedDatasets() ? collectionFragmentHarvester : atomicFragmentHarvester;
      HarvestSummary fragmentResult = fragmentHarvester.harvest(fragments, getUri(ds));
     
      //--- Include fragment results in thredds results
      result.fragmentsReturned += fragmentResult.fragmentsReturned;
      result.fragmentsUnknownSchema += fragmentResult.fragmentsUnknownSchema;
      result.subtemplatesAdded += fragmentResult.fragmentsAdded;
      result.fragmentsMatched += fragmentResult.fragmentsMatched;
     
      if (ds.hasNestedDatasets()) {
        result.collectionDatasetRecords += fragmentResult.recordsBuilt;
      } else {
        result.atomicDatasetRecords += fragmentResult.recordsBuilt;
      }
    } catch (Exception e) {
      log.error("Thrown Exception "+e+" during dataset processing");
      e.printStackTrace();
    }
  }

  //---------------------------------------------------------------------------
  /**
   * Return a catalog having the specified dataset as the top dataset
   * resolving inherited metadata and required services
   *
     * @param ds     the dataset to be processed
   */
 
  private Element getDatasetSubset(InvDataset ds) throws Exception {
    String datasetSubsetUrl = getSubsetUrl(ds);
   
    return Xml.loadFile(new URL(datasetSubsetUrl));
  }

  //---------------------------------------------------------------------------
  /**
   * Return url to a catalog having the specified dataset as the top dataset
   *
     * @param ds     the dataset to be processed
   **/
 
  private String getSubsetUrl(InvDataset ds) {
      try {
          return ds.getParentCatalog().getUriString() + "?dataset=" + URLEncoder.encode(ds.getID(), Constants.ENCODING);
        } catch (UnsupportedEncodingException e) {
      log.error("Thrown Exception "+e+" during dataset processing");
          e.printStackTrace();
        }
        return null;
    }
 
  //---------------------------------------------------------------------------
  /**
   * Get uuid for dataset
   *
     * @param ds     the dataset to be processed
   **/
 
  private String getUuid(InvDataset ds) {
    String uuid = ds.getUniqueID();
   
    if (uuid == null) {
      uuid = Sha1Encoder.encodeString (ds.getCatalogUrl()); // md5 full dataset url
    } else {
      uuid = StringUtil.allow(uuid, "_-.",'-');
    }
   
    return uuid;
  }

  //---------------------------------------------------------------------------
  /**
   * Process one dataset by extracting its metadata, writing to DIF
   * and using xslt to transform to the required ISO format.
   *
   * @param ds     the dataset to be processed
   */
 
  private void createDIFMetadata(InvDataset ds) {
  try {

      boolean addCoordSys = false; // add coordinate systems if not DIF relaxed

      //--- TODO: Thredds has a metadata converter interface and some other
      //--- methods of handling metadata (including XML of different
      //--- namespaces) in the catalog - this is a place holder for getting
      //--- this info in future
      List <InvMetadata> mds = ds.getMetadata();
      log.info("Dataset has "+mds.size()+" metadata elements");
      for (InvMetadata md : mds) {
        log.info("Found metadata "+md.toString());
      }

      //--- check and see whether this dataset is DIF writeable
      DIFWriter difWriter = new DIFWriter();
      StringBuffer sBuff = new StringBuffer();
      Element dif = null;

      if (difWriter.isDatasetUseable(ds, sBuff)) {
        log.info("Yay! Dataset has DIF compatible metadata "+sBuff.toString());

        dif = difWriter.writeOneEntry(ds, sBuff);

      } else {
        log.info("Dataset does not have DIF compatible metadata so we will write a relaxed DIF entry\n"+sBuff.toString());

        dif = difWriter.writeOneRelaxedEntry(ds, sBuff);
        addCoordSys = true;
      }

      //--- get the UUID assigned to the DIF record
      String uuid = dif.getChild("Entry_ID", difNS).getText();

      boolean isCollection = ds.hasNestedDatasets();
      log.info("Dataset is a collection dataset? "+isCollection);

      //--- now convert DIF entry into an ISO entry using the appropriate
      //--- difToIso converter (only schemas with a DIF converter are
      //--- supplied to the user for choice)
      Element md = null;
      if (isCollection) {
        String difToIsoStyleSheet = schemaMan.getSchemaDir(params.outputSchemaOnCollectionsDIF) + Geonet.Path.DIF_STYLESHEETS + "/DIFToISO.xsl";
        log.info("Transforming collection dataset to "+params.outputSchemaOnCollectionsDIF);
        md = Xml.transform(dif, difToIsoStyleSheet);
      } else {
        String difToIsoStyleSheet = schemaMan.getSchemaDir(params.outputSchemaOnAtomicsDIF) + Geonet.Path.DIF_STYLESHEETS + "/DIFToISO.xsl";
        log.info("Transforming atomic dataset to "+params.outputSchemaOnAtomicsDIF);
        md = Xml.transform(dif, difToIsoStyleSheet);
      }

      //--- if we don't have full set of DIF metadata then
      //--- if atomic dataset then check dataset for global attributes
      //--- and/or dump coordinate systems else
      //--- if collection then check for ThreddsMetadata.Variables and
      //--- create a netcdfInfo for addition to the ISO record
      if (addCoordSys) {
        boolean globalAttributes = false;
        if (!isCollection) { // open up atomic dataset for info
          log.info("Opening dataset to get global attributes");
          //--- if not a dataset collection then
          //--- open and check global attributes for metadata conventions
          try {
            NetcdfDataset ncD = NetcdfDataset.openDataset("thredds:"+ds.getCatalogUrl());
            Attribute mdCon = ncD.findGlobalAttributeIgnoreCase("metadata_conventions");
            if (mdCon != null) {
              List<Attribute> ga = ncD.getGlobalAttributes();
              for (Attribute att : ga ) {
                                if(log.isDebugEnabled()) log.debug("Attribute found "+att.toString());
                //--- TODO: Attach the attributes to the metadata node
                //--- for conversion into the ISO record by an xslt
              }
            } else {
                            if(log.isDebugEnabled()) log.debug("No global attribute with metadata conventions found");
            }
            ncD.close();
          } catch (Exception e) {
            log.info("Exception raised in netcdfDataset ops: "+e);
            e.printStackTrace();
          }
        }

        //--- if no metadata conventions then find the coordinate systems
        //--- and add these to the appropriate place in whatever ISO or ISO
        //--- profile we are using - MCP: mcp:dataParameters & gmd:keywords,
        //--- ISO: gmd:keywords
        boolean foundNetcdfInfo = false;
        if (!globalAttributes && !isCollection) {
          log.info("No global attributes describing metadata so opening dataset to get coordinate systems");
          try {
            NetcdfDatasetInfo ncDI = new NetcdfDatasetInfo("thredds:"+ds.getCatalogUrl());
            log.info("Coordinate systems builder is "+ncDI.getConventionUsed());
            if (!ncDI.getConventionUsed().equals("None")) {
              Document doc = ncDI.makeDocument();
              Element coords = doc.detachRootElement();
              log.info("Coordinate systems of dataset are: \n"+Xml.getString(coords));
              setCoordsStyleSheet(isCollection);
              addKeywordsAndDataParams(coords, md);
              foundNetcdfInfo = true;
            } else {
                            if(log.isDebugEnabled()) log.debug("Coordinate system convention is not recognized");
            }
            ncDI.close();
          } catch (Exception e) {
            log.info("Exception raised in netcdfDatasetInfo ops: "+e);
            e.printStackTrace();
          }
        }

        //--- finally - check and see whether we can extract variables from the
        //--- ThreddsMetadata - we no longer care whether this is a collection
        //--- or atomic
        if (!globalAttributes && !foundNetcdfInfo) {
          //--- get ThreddsMetadata.Variables and create a netcdfDatasetInfo
          //--- document if possible
          List<ThreddsMetadata.Variables> vsL = ds.getVariables();
          if (vsL != null && vsL.size() > 0) {
            for (ThreddsMetadata.Variables vs : vsL) {
              String vHref = vs.getVocabHref();
              URI    vUri  = vs.getVocabUri();
              String vocab = vs.getVocabulary();
              Element coords = new Element("netcdfDatasetInfo");
              for (ThreddsMetadata.Variable v : vs.getVariableList()) {
                Element varX = new Element("variable")
                varX.setAttribute("name",    v.getName());
                varX.setAttribute("decl",    v.getDescription());
                varX.setAttribute("units",  v.getUnits());
                // - these three attributes are new but then there is no
                // - xsd for this so we can add as we want!
                varX.setAttribute("vocab",      vocab);
                varX.setAttribute("vocaburi",   vUri.toString());
                varX.setAttribute("vocabhref",  vHref);
                coords.addContent(varX);
              }
              log.info("Coordinate systems from ThreddsMetadata are: \n"+Xml.getString(coords));
              setCoordsStyleSheet(isCollection);
              addKeywordsAndDataParams(coords, md);
            }
          }
        }
      }

      //--- write metadata
      saveMetadata(md, uuid, getUri(ds));

      //--- update totals
      if (isCollection) {
        result.collectionDatasetRecords ++;
      } else {
        result.atomicDatasetRecords ++;
      }
    } catch (Exception e) {
        log.error("Thrown Exception "+e+" during dataset processing");
        e.printStackTrace();
    }
  }

  //---------------------------------------------------------------------------
  /**
   * Create the coordinate stylesheet names that will be used to add
   * gmd:keywords and mcp:DataParameters if the output schema requires.
   *
   * @param  isCollection true if we are working with a collection dataset
   */
  private void setCoordsStyleSheet(boolean isCollection) {

    String schemaDir;
    if (!isCollection) {
      schemaDir = schemaMan.getSchemaDir(params.outputSchemaOnAtomicsDIF);
    } else {
      schemaDir = schemaMan.getSchemaDir(params.outputSchemaOnCollectionsDIF);
    }

    cdmCoordsToIsoKeywordsStyleSheet = schemaDir + Geonet.Path.DIF_STYLESHEETS + "/CDMCoords-to-ISO19139Keywords.xsl";

    // -- FIXME: This is still schema dependent and needs to be improved
    // -- What we wait upon is finalization of the new coverage data parameters
    // -- metadata elements (inside MD_ContentInformation) in ISO19115/19139
    if (schemaDir.contains("iso19139.mcp")) {
      cdmCoordsToIsoMcpDataParametersStyleSheet = schemaDir + Geonet.Path.DIF_STYLESHEETS + "/CDMCoords-to-ISO19139MCPDataParameters.xsl";
    } else {
      cdmCoordsToIsoMcpDataParametersStyleSheet = null;
    }
  }

  //---------------------------------------------------------------------------
  /**
   * Process a netcdfinfo document - adding variables as keywords and
   * mcp:DataParameters if the output schema requires.
   *
   * @param  coords  the netcdfinfo document with coord systems embedded
   * @param  md    ISO metadata record to add keywords and data params to
   **/
 
  private void addKeywordsAndDataParams(Element coords, Element md) throws Exception {
    Element keywords = Xml.transform(coords, cdmCoordsToIsoKeywordsStyleSheet);
    addKeywords(md, keywords);
    if (cdmCoordsToIsoMcpDataParametersStyleSheet != null) {
      Element dataParameters = Xml.transform(coords, cdmCoordsToIsoMcpDataParametersStyleSheet);
      log.info("mcp:DataParameters are: \n"+Xml.getString(dataParameters));
      addDataParameters(md, dataParameters);
    }
  }

  //---------------------------------------------------------------------------
  /**
   * Process a service reference in a dataset - record details of the
   * service and add the details of a dataset to the list of datasets it
   * serves - Note: compound services are expanded.
   *
   * @param serv     the service to be processed
   * @param uuid     uuid of the dataset that is delivered by this service
   * @param ds        dataset that is being delivered by this service
   **/
 
  private void processService(InvService serv, String uuid, InvDataset ds) {
   
    //--- get service, if compound service then get all nested services
    List<InvService> servs = new ArrayList<InvService>();
    if (serv.getServiceType() == ServiceType.COMPOUND) {
      servs.addAll(serv.getServices());
    } else {
      servs.add(serv);
    }

    //--- add dataset info to the appropriate ThreddsService
    for (InvService s : servs) {
      //Skip resolver services
      if (s.getServiceType().equals(ServiceType.RESOLVER)) continue;
       
      String sUrl = "";
     
      if (!s.isRelativeBase()) {
        sUrl = s.getBase();
      } else {
        sUrl = hostUrl+s.getBase();
      }

      ThreddsService ts = services.get(sUrl);
      if (ts == null) {
        ts = new ThreddsService();
        ts.service = s;
        ts.version = getVersion(serv, ds);
        ts.ops = getServerOperations(serv, ds);
       
        services.put(sUrl,ts);
      }
      ts.datasets.put(uuid,ds.getName());
    }

  }

  //---------------------------------------------------------------------------
  /**
   * Find the version of the service that delivers a particular dataset
   * Handles OPeNDAP and HTTP only at present
   *
   * @param  serv  the service that delivers the dataset
   * @param  ds    the dataset being delivered by the service
   **/
 
  private String getVersion(InvService serv, InvDataset ds) {
    String result = "unknown";
    if (serv.getServiceType() == ServiceType.OPENDAP) {
      InvAccess access = ds.getAccess(ServiceType.OPENDAP);
      if (access != null) {
        String href = access.getStandardUrlName() + ".ver";
        String readResult = getResultFromHttpUrl(href);
        if (readResult != null) result = readResult;
      }
    } else if (serv.getServiceType() == ServiceType.HTTPServer) {
      result = "HTTP/1.1";
    }
    return result;
  }
  //---------------------------------------------------------------------------
  /**
   * Get the server operations
   * Applicable to OPeNDAP only at present
   *
   * @param  serv  the service that delivers the dataset
   * @param  ds    the dataset being delivered by the service
   **/
 
  private String getServerOperations(InvService serv, InvDataset ds) {
    String result = "none";
    if (serv.getServiceType() == ServiceType.OPENDAP) {
      InvAccess access = ds.getAccess(ServiceType.OPENDAP);
      if (access != null) {
        String href = access.getStandardUrlName() + ".help";
        String readResult = getResultFromHttpUrl(href);
        if (readResult != null) result = readResult;
      }
    }
    return result;
  }

  //---------------------------------------------------------------------------
  /**
   * Get a String result from an HTTP URL
   *
   * @param   href    the URL to get the info from
   **/
 
  private String getResultFromHttpUrl(String href) {
    String result = null;
    try {
      //--- get the version from the OPeNDAP server
      URL url = new URL(href);
      HttpURLConnection conn = (HttpURLConnection)url.openConnection();
      Object o = conn.getContent();
            if(log.isDebugEnabled()) log.debug("Opened "+href+" and got class "+o.getClass().getName());
      StringBuffer version = new StringBuffer();
      String inputLine;
      BufferedReader dis = null;
      InputStreamReader isr = null;
      InputStream is = null;
      try {
          is = conn.getInputStream();
          isr = new InputStreamReader(is, Constants.ENCODING);
                dis = new BufferedReader(isr);
          while ((inputLine = dis.readLine()) != null) {
              version.append(inputLine+"\n")
          }
          result = version.toString();
                if(log.isDebugEnabled()) log.debug("Read from URL:\n"+result);
      } finally {
          IOUtils.closeQuietly(is);
          IOUtils.closeQuietly(isr);
          IOUtils.closeQuietly(dis);
      }
    } catch (Exception e) {
            if(log.isDebugEnabled()) log.debug("Caught exception "+e+" whilst attempting to query URL "+href);
      e.printStackTrace();
    }
    return result;
  }
  //---------------------------------------------------------------------------
  /**
   * Process all services that serve datasets in the thredds catalog
   *
   * @param  cata        the XML of the catalog
   * @param  serviceStyleSheet  name of the stylesheet to produce 19119
   **/
 
  private void processServices(Element cata, String serviceStyleSheet) throws Exception {

    for (String sUrl : services.keySet()) {
   
      ThreddsService ts = services.get(sUrl);
      InvService serv = ts.service;

            if(log.isDebugEnabled()) log.debug("Processing Thredds service: "+serv.toString());

      String sUuid = Sha1Encoder.encodeString (sUrl);

      //--- TODO: if service is WCS or WMS then pass the full service url to
      //--- OGCWxS service metadata creator
     
      //---  pass info to stylesheet which will create a 19119 record

            if(log.isDebugEnabled()) log.debug("  - XSLT transformation using "+serviceStyleSheet);

      Map<String, Object> param = new HashMap<String, Object>();
      param.put("lang",    params.lang);
      param.put("topic",  params.topic);
      param.put("uuid",    sUuid);
      param.put("url",    sUrl);
      param.put("name",    serv.getName());
      param.put("type",    serv.getServiceType().toString().toUpperCase());
      param.put("version", ts.version);
      param.put("desc",    serv.toString());
      param.put("props",  serv.getProperties().toString());
      param.put("serverops", ts.ops);

      Element md = Xml.transform (cata, serviceStyleSheet, param);

      String schema = dataMan.autodetectSchema (md, null);
      if (schema == null) {
        log.warning("Skipping metadata with unknown schema.");
        result.unknownSchema ++;
      } else {

        //--- Update ISO19119 for data/service links (ie. operatesOn element)
        md = addOperatesOnUuid (md, ts.datasets);

        //--- Now add to geonetwork
        saveMetadata(md, sUuid, sUrl);

        harvestUris.add(sUrl);
       
        result.serviceRecords ++;
      }
    }      
  }

  //---------------------------------------------------------------------------
  /**
     * Add an Element to a child list at index after specified element
     * 
     * @param md          iso19139 metadata
     * @param theNewElem  the new element to be added
     * @param name        the name of the element to search for
     * @param ns          the namespace of the element to search for
     *                  
     **/
 
  boolean addAfter(Element md, Element theNewElem, String name, Namespace ns) throws Exception {
    Element chSet = md.getChild(name, ns);
   
    if (chSet != null) {
      int pos = md.indexOf(chSet);
      md.addContent(pos+1, theNewElem);
      return true;
    }
   
    return false;
   }

  //---------------------------------------------------------------------------
  /**
     * Add keywords generated from CDM coordinate systems to identificationInfo
      
       <gmd:descriptiveKeywords>
         <gmd:MD_Keywords>
           <gmd:keyword>
             <gco:CharacterString>
             </gco:CharacterString>
           </gmd:keyword>
           ...
           ...
           ...
           <gmd:type>
             <gmd:MD_KeywordType codelist...>
           </gmd:type>
           <gmd:thesaurusName>
             <gmd:CI_Citation>
               ....
             </gmd:CI_Citation>
           </gmd:thesaurusName>
         </gmd:MD_Keywords>
       </gmd:descriptiveKeywords>
      
     * @param md        iso19139 metadata
     * @param keywords  gmd:keywords block to be added to metadata
     *                  
     **/
 
   private Element addKeywords (Element md, Element keywords) throws Exception {
    Element root  = (Element)md.getChild("identificationInfo", gmd).getChildren().get(0);
    boolean ok = addAfter(root, keywords, "descriptiveKeywords", gmd);
    if (!ok) {
      throw new BadXmlResponseEx("The metadata did not have a descriptiveKeywords Element");
    }
    return md;
  }
 
  //---------------------------------------------------------------------------
  /**
     * Add mcp:dataParameters created from CDM coordinate systems to
   * identificationInfo (mcp only)
      
       <mcp:dataParameters>
         <mcp:DP_DataParameters>
           ...
           ...
           ...
         </mcp:DP_DataParameters>
       </mcp:dataParameters>
      
     * @param md              iso19139 MCP metadata
     * @param dataParameters  mcp:dataParameters block to be added to metadata
     *                  
     **/
  
   private Element addDataParameters (Element md, Element dataParameters) throws Exception {
    Element root  = (Element)md.getChild("identificationInfo", gmd).getChildren().get(0);
    root.addContent(dataParameters); // this is dependent on the mcp schema
    return md;
  }
 

  //---------------------------------------------------------------------------
  /**
     * Add OperatesOn elements on an ISO19119 metadata
     * 
     *  <srv:operatesOn>
   *    <gmd:MD_DataIdentification uuidref=""/>
   </srv:operatesOn>
    
     * @param md        iso19119 metadata
     * @param datasets  HashMap of datasets with uuids to be added
     *                  
     **/
  
   private Element addOperatesOnUuid (Element md, Map<String,String> datasets) {
    Element root   = md.getChild("identificationInfo", gmd).getChild("SV_ServiceIdentification", srv);
//    Element co     = root.getChild("containsOperations", srv);

    if (root != null) {
            if(log.isDebugEnabled()) log.debug("  - add operatesOn with uuid and other attributes");
     
      for (Map.Entry<String, String> entry : datasets.entrySet()) {
          String dsUuid = entry.getKey();
         
        Element op = new Element ("operatesOn", srv);
        op.setAttribute("uuidref", dsUuid);
        op.setAttribute("href", context.getBaseUrl() + "/srv/en/metadata.show?uuid=" + dsUuid, xlink);
        op.setAttribute("title", entry.getValue(), xlink);
        root.addContent(op);
      }
    }
   
    return md;
  }
 
  //---------------------------------------------------------------------------
    /**
     * Determine whether dataset metadata should be harvested 
     *
     * @param ds     the dataset to be checked
     **/
 
    private boolean harvestMetadata(InvDataset ds) {
        if (isCollection(ds)) {
            return params.createCollectionDatasetMd && (params.ignoreHarvestOnCollections || ds.isHarvest());
        } else {
            return params.createAtomicDatasetMd && (params.ignoreHarvestOnAtomics || ds.isHarvest());
        }
    }

    //---------------------------------------------------------------------------
    /**
     * Determine whether dataset metadata should be harvested using fragments 
     *
     * @param ds     the dataset to be checked
     **/
   
    private boolean harvestMetadataUsingFragments(InvDataset ds) {
        if (isCollection(ds)) {
            return params.collectionMetadataGeneration.equals(ThreddsParams.FRAGMENTS);
        } else {
            return params.atomicMetadataGeneration.equals(ThreddsParams.FRAGMENTS);
        }
    }

  //---------------------------------------------------------------------------
  /**
   * Determine whether dataset is a collection i.e. has nested datasets
     *
     * @param ds     the dataset to be checked
     **/
   
  private boolean isCollection(InvDataset ds) {
    return ds.hasNestedDatasets();
  }

  //---------------------------------------------------------------------------
  /**
     * Get fragment harvesting parameters for collection datasets
     *  
     * @return    fragment harvesting parameters for collection datasets
     *
     **/
 
  private FragmentParams getCollectionFragmentParams() {
    FragmentParams collectionParams = new FragmentHarvester.FragmentParams();
    collectionParams.categories = params.getCategories();
    collectionParams.createSubtemplates = params.createCollectionSubtemplates;
    collectionParams.isoCategory = params.datasetCategory;
    collectionParams.privileges = params.getPrivileges();
    collectionParams.templateId = params.collectionMetadataTemplate;
    collectionParams.uuid = params.uuid;
    collectionParams.outputSchema = params.outputSchemaOnCollectionsFragments;
    return collectionParams;
  }

  //---------------------------------------------------------------------------
  /**
     * Get fragment harvesting parameters for atomic datasets
     *  
     * @return    fragment harvesting parameters for atomic datasets
     *
     **/
 
  private FragmentParams getAtomicFragmentParams() {
    FragmentParams atomicParams = new FragmentHarvester.FragmentParams();
    atomicParams.categories = params.getCategories();
    atomicParams.createSubtemplates = params.createAtomicSubtemplates;
    atomicParams.isoCategory = params.datasetCategory;
    atomicParams.privileges = params.getPrivileges();
    atomicParams.templateId = params.atomicMetadataTemplate;
    atomicParams.uuid = params.uuid;
    atomicParams.outputSchema = params.outputSchemaOnAtomicsFragments;
    atomicParams.owner = params.ownerId;
    return atomicParams;
  }

  //---------------------------------------------------------------------------
  //---
  //--- Variables
  //---
  //---------------------------------------------------------------------------

  private Logger         log;
  private ServiceContext context;
  private ThreddsParams  params;
  private DataManager    dataMan;
  private SchemaManager  schemaMan;
  private CategoryMapper localCateg;
  private GroupMapper    localGroups;
  private UriMapper      localUris;
  private HarvestResult  result;
  private String         hostUrl;
  private HashSet<String> harvestUris = new HashSet<String>();
  private String         cdmCoordsToIsoKeywordsStyleSheet;
  private String         cdmCoordsToIsoMcpDataParametersStyleSheet;
  private String         fragmentStylesheetDirectory;
  private String          metadataGetService;
  private Map<String,ThreddsService> services = new HashMap<String, Harvester.ThreddsService>();
  private InvCatalogImpl catalog;
 
  private FragmentHarvester atomicFragmentHarvester;
  private FragmentHarvester collectionFragmentHarvester;

  private static class ThreddsService {
    public Map<String,String> datasets = new HashMap<String, String>();
    public InvService service;
    public String version;
    public String ops;
  };

  static private final Namespace difNS = Namespace.getNamespace("http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/");
  static private final Namespace invCatalogNS = Namespace.getNamespace("http://www.unidata.ucar.edu/namespaces/thredds/InvCatalog/v1.0");
  static private final Namespace gmd   = Namespace.getNamespace("gmd", "http://www.isotc211.org/2005/gmd");
  static private final Namespace srv   = Namespace.getNamespace("srv", "http://www.isotc211.org/2005/srv");
  static private final Namespace xlink = Namespace.getNamespace("xlink", "http://www.w3.org/1999/xlink");
 
  private List<HarvestError> errors = new LinkedList<HarvestError>();
  @Override
  public List<HarvestError> getErrors() {
    return errors;
  }
   
}
TOP

Related Classes of org.fao.geonet.kernel.harvest.harvester.thredds.Harvester$ThreddsService

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.