Package org.archive.spring

Source Code of org.archive.spring.PathSharingContext

/*
*  This file is part of the Heritrix web crawler (crawler.archive.org).
*
*  Licensed to the Internet Archive (IA) by one or more individual
*  contributors.
*
*  The IA licenses this file to You under the Apache License, Version 2.0
*  (the "License"); you may not use this file except in compliance with
*  the License.  You may obtain a copy of the License at
*
*      http://www.apache.org/licenses/LICENSE-2.0
*
*  Unless required by applicable law or agreed to in writing, software
*  distributed under the License is distributed on an "AS IS" BASIS,
*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*  See the License for the specific language governing permissions and
*  limitations under the License.
*/

package org.archive.spring;

import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.HashMap;
import java.util.Map.Entry;
import java.util.concurrent.ConcurrentHashMap;
import java.util.logging.Level;
import java.util.logging.Logger;

import org.apache.commons.io.FileUtils;
import org.archive.util.ArchiveUtils;
import org.archive.util.FilesystemLinkMaker;
import org.springframework.beans.BeansException;
import org.springframework.beans.factory.config.ConfigurableListableBeanFactory;
import org.springframework.context.ApplicationContext;
import org.springframework.context.support.FileSystemXmlApplicationContext;
import org.springframework.validation.BeanPropertyBindingResult;
import org.springframework.validation.Errors;
import org.springframework.validation.Validator;

/**
* Spring ApplicationContext extended for Heritrix use.
*
* Notable extensions:
*
* Remembers its primary XML configuration file, and can report its filesystem
* path.
*
* Reports a summary of Errors collected from self-Validating Beans.
*
* Generates launchId from timestamp, creates launch directory
* {jobDir}/{launchId}, and snapshots crawl configuration file into the launch
* directory. Other configuration files, if any, are automatically snapshotted
* into the launch directory when they are read (see
* {@link ConfigFile#obtainReader()}). The token ${launchId} will be
* interpolated in configuration-relative paths (see
* {@link ConfigPathConfigurer}) so that launch-specific paths can be used for
* logs, reports, warcs, etc.
*
* @contributor gojomo
*/
public class PathSharingContext extends FileSystemXmlApplicationContext {
    private static Logger LOGGER =
        Logger.getLogger(PathSharingContext.class.getName());

    public PathSharingContext(String configLocation) throws BeansException {
        super(configLocation);
    }
    public PathSharingContext(String[] configLocations, ApplicationContext parent) throws BeansException {
        super(configLocations, parent);
    }
    public PathSharingContext(String[] configLocations, boolean refresh, ApplicationContext parent) throws BeansException {
        super(configLocations, refresh, parent);
    }
    public PathSharingContext(String[] configLocations, boolean refresh) throws BeansException {
        super(configLocations, refresh);
    }
    public PathSharingContext(String[] configLocations) throws BeansException {
        super(configLocations);
    }

    public String getPrimaryConfigurationPath() {
        return getConfigLocations()[0];
    }

    //
    // Cascading self-validation
    //
    protected HashMap<String,Errors> allErrors; // bean name -> Errors
    public void validate() {
        allErrors = new HashMap<String,Errors>();
           
        for(Entry<String, HasValidator> entry : getBeansOfType(HasValidator.class).entrySet()) {
            String name = entry.getKey();
            HasValidator hv = entry.getValue();
            Validator v = hv.getValidator();
            Errors errors = new BeanPropertyBindingResult(hv,name);
            v.validate(hv, errors);
            if(errors.hasErrors()) {
                allErrors.put(name,errors);
            }
        }
        for(String name : allErrors.keySet()) {
            for(Object obj : allErrors.get(name).getAllErrors()) {
                LOGGER.fine("validation error for '"+name+"': "+obj);
            }
        }
    }

    @Override
    public void start() {
        initLaunchDir();
        super.start();
    }
   
    public HashMap<String,Errors> getAllErrors() {
        return allErrors;
    }
   
    protected transient String currentLaunchId;
    protected void initLaunchId() {
        currentLaunchId = ArchiveUtils.getUnique14DigitDate();
        LOGGER.info("launch id " + currentLaunchId);
    }
    public String getCurrentLaunchId() {
        return currentLaunchId;
    }

    protected transient File currentLaunchDir;
    public File getCurrentLaunchDir() {
        return currentLaunchDir;
    }
   
    protected File getConfigurationFile() {
        String primaryConfigurationPath =  getPrimaryConfigurationPath();
        if (primaryConfigurationPath.startsWith("file:")) {
            // strip URI-scheme if present (as is usual)
            try {
                return new File(new URI(primaryConfigurationPath));
            } catch (URISyntaxException e) {
                throw new RuntimeException(e);
            }
        } else {
            return new File(primaryConfigurationPath);
        }
    }
   
    protected void initLaunchDir() {
        initLaunchId();
        try {
            currentLaunchDir = new File(getConfigurationFile().getParentFile(), getCurrentLaunchId());
            if (!currentLaunchDir.mkdir()) {
                throw new IOException("failed to create directory " + currentLaunchDir);
            }
           
            // copy cxml to launch dir
            FileUtils.copyFileToDirectory(getConfigurationFile(), currentLaunchDir);
           
            // attempt to symlink "latest" to launch dir
            File latestSymlink = new File(getConfigurationFile().getParentFile(), "latest");
            latestSymlink.delete();
            boolean success = FilesystemLinkMaker.makeSymbolicLink(currentLaunchDir.getName(), latestSymlink.getPath());
            if (!success) {
                LOGGER.warning("failed to create symlink from " + latestSymlink + " to " + currentLaunchDir);
            }
        } catch (IOException e) {
            LOGGER.log(Level.SEVERE, "failed to initialize launch directory: " + e);
            currentLaunchDir = null;
        }
    }
   
    /**
     * Initialize the LifecycleProcessor.
     * Uses HeritrixLifecycleProcessor, which prevents an automatic lifecycle
     * start(), if none defined in the context.
     * @see org.springframework.context.support.DefaultLifecycleProcessor
     */
    protected void initLifecycleProcessor() {
        ConfigurableListableBeanFactory beanFactory = getBeanFactory();
        if (!beanFactory.containsLocalBean(LIFECYCLE_PROCESSOR_BEAN_NAME)) {
            HeritrixLifecycleProcessor obj = (HeritrixLifecycleProcessor)beanFactory.createBean(HeritrixLifecycleProcessor.class);
            beanFactory.registerSingleton(LIFECYCLE_PROCESSOR_BEAN_NAME,obj);
        }
        super.initLifecycleProcessor();
    }
   
    protected ConcurrentHashMap<Object, Object> data;

    /**
     * @return a shared map for arbitrary use during a crawl; for example, could
     *         be used for state persisting for the duration of the crawl,
     *         shared among ScriptedProcessor, scripting console, etc scripts
     */
    public ConcurrentHashMap<Object, Object> getData() {
        if (data == null) {
            data = new ConcurrentHashMap<Object, Object>();
        }
        return data;
    }
}
TOP

Related Classes of org.archive.spring.PathSharingContext

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.