/*
* This file is part of the Heritrix web crawler (crawler.archive.org).
*
* Licensed to the Internet Archive (IA) by one or more individual
* contributors.
*
* The IA licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.archive.spring;
import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.HashMap;
import java.util.Map.Entry;
import java.util.concurrent.ConcurrentHashMap;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.commons.io.FileUtils;
import org.archive.util.ArchiveUtils;
import org.archive.util.FilesystemLinkMaker;
import org.springframework.beans.BeansException;
import org.springframework.beans.factory.config.ConfigurableListableBeanFactory;
import org.springframework.context.ApplicationContext;
import org.springframework.context.support.FileSystemXmlApplicationContext;
import org.springframework.validation.BeanPropertyBindingResult;
import org.springframework.validation.Errors;
import org.springframework.validation.Validator;
/**
* Spring ApplicationContext extended for Heritrix use.
*
* Notable extensions:
*
* Remembers its primary XML configuration file, and can report its filesystem
* path.
*
* Reports a summary of Errors collected from self-Validating Beans.
*
* Generates launchId from timestamp, creates launch directory
* {jobDir}/{launchId}, and snapshots crawl configuration file into the launch
* directory. Other configuration files, if any, are automatically snapshotted
* into the launch directory when they are read (see
* {@link ConfigFile#obtainReader()}). The token ${launchId} will be
* interpolated in configuration-relative paths (see
* {@link ConfigPathConfigurer}) so that launch-specific paths can be used for
* logs, reports, warcs, etc.
*
* @contributor gojomo
*/
public class PathSharingContext extends FileSystemXmlApplicationContext {
private static Logger LOGGER =
Logger.getLogger(PathSharingContext.class.getName());
public PathSharingContext(String configLocation) throws BeansException {
super(configLocation);
}
public PathSharingContext(String[] configLocations, ApplicationContext parent) throws BeansException {
super(configLocations, parent);
}
public PathSharingContext(String[] configLocations, boolean refresh, ApplicationContext parent) throws BeansException {
super(configLocations, refresh, parent);
}
public PathSharingContext(String[] configLocations, boolean refresh) throws BeansException {
super(configLocations, refresh);
}
public PathSharingContext(String[] configLocations) throws BeansException {
super(configLocations);
}
public String getPrimaryConfigurationPath() {
return getConfigLocations()[0];
}
//
// Cascading self-validation
//
protected HashMap<String,Errors> allErrors; // bean name -> Errors
public void validate() {
allErrors = new HashMap<String,Errors>();
for(Entry<String, HasValidator> entry : getBeansOfType(HasValidator.class).entrySet()) {
String name = entry.getKey();
HasValidator hv = entry.getValue();
Validator v = hv.getValidator();
Errors errors = new BeanPropertyBindingResult(hv,name);
v.validate(hv, errors);
if(errors.hasErrors()) {
allErrors.put(name,errors);
}
}
for(String name : allErrors.keySet()) {
for(Object obj : allErrors.get(name).getAllErrors()) {
LOGGER.fine("validation error for '"+name+"': "+obj);
}
}
}
@Override
public void start() {
initLaunchDir();
super.start();
}
public HashMap<String,Errors> getAllErrors() {
return allErrors;
}
protected transient String currentLaunchId;
protected void initLaunchId() {
currentLaunchId = ArchiveUtils.getUnique14DigitDate();
LOGGER.info("launch id " + currentLaunchId);
}
public String getCurrentLaunchId() {
return currentLaunchId;
}
protected transient File currentLaunchDir;
public File getCurrentLaunchDir() {
return currentLaunchDir;
}
protected File getConfigurationFile() {
String primaryConfigurationPath = getPrimaryConfigurationPath();
if (primaryConfigurationPath.startsWith("file:")) {
// strip URI-scheme if present (as is usual)
try {
return new File(new URI(primaryConfigurationPath));
} catch (URISyntaxException e) {
throw new RuntimeException(e);
}
} else {
return new File(primaryConfigurationPath);
}
}
protected void initLaunchDir() {
initLaunchId();
try {
currentLaunchDir = new File(getConfigurationFile().getParentFile(), getCurrentLaunchId());
if (!currentLaunchDir.mkdir()) {
throw new IOException("failed to create directory " + currentLaunchDir);
}
// copy cxml to launch dir
FileUtils.copyFileToDirectory(getConfigurationFile(), currentLaunchDir);
// attempt to symlink "latest" to launch dir
File latestSymlink = new File(getConfigurationFile().getParentFile(), "latest");
latestSymlink.delete();
boolean success = FilesystemLinkMaker.makeSymbolicLink(currentLaunchDir.getName(), latestSymlink.getPath());
if (!success) {
LOGGER.warning("failed to create symlink from " + latestSymlink + " to " + currentLaunchDir);
}
} catch (IOException e) {
LOGGER.log(Level.SEVERE, "failed to initialize launch directory: " + e);
currentLaunchDir = null;
}
}
/**
* Initialize the LifecycleProcessor.
* Uses HeritrixLifecycleProcessor, which prevents an automatic lifecycle
* start(), if none defined in the context.
* @see org.springframework.context.support.DefaultLifecycleProcessor
*/
protected void initLifecycleProcessor() {
ConfigurableListableBeanFactory beanFactory = getBeanFactory();
if (!beanFactory.containsLocalBean(LIFECYCLE_PROCESSOR_BEAN_NAME)) {
HeritrixLifecycleProcessor obj = (HeritrixLifecycleProcessor)beanFactory.createBean(HeritrixLifecycleProcessor.class);
beanFactory.registerSingleton(LIFECYCLE_PROCESSOR_BEAN_NAME,obj);
}
super.initLifecycleProcessor();
}
protected ConcurrentHashMap<Object, Object> data;
/**
* @return a shared map for arbitrary use during a crawl; for example, could
* be used for state persisting for the duration of the crawl,
* shared among ScriptedProcessor, scripting console, etc scripts
*/
public ConcurrentHashMap<Object, Object> getData() {
if (data == null) {
data = new ConcurrentHashMap<Object, Object>();
}
return data;
}
}