Package org.apache.stanbol.entityhub.indexing.core.source

Source Code of org.apache.stanbol.entityhub.indexing.core.source.ResourceLoader

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.stanbol.entityhub.indexing.core.source;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.EnumSet;
import java.util.Enumeration;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.Map.Entry;

import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
import org.apache.commons.compress.archivers.zip.ZipFile;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.FilenameUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class ResourceLoader {
    private final boolean failOnError;
   
    private static final Logger log = LoggerFactory.getLogger(ResourceLoader.class);
    private final ResourceImporter resourceImporter;
    private final Map<String,ResourceState> files;
    private File importedDir;
    /**
     * for future uses to activate/deactivate parsing of entries within ZIP
     * archives. If <code>false</code> the ZIP archive will be parsed as a
     * whole. If <code>true</code> the Entries of the ZIP archive will be
     * parsed to the resource handler.
     */
    private boolean loadEntriesWithinZipArchives = true;
    public ResourceLoader(ResourceImporter resourceImporter,boolean failOnError) {
        this(resourceImporter,true,failOnError);
    }
    public ResourceLoader(ResourceImporter resourceImporter, boolean processEntriesWithinArchives,boolean failOnError) {
        if(resourceImporter == null){
            throw new IllegalStateException("The parsed ResourceProcessor instance MUST NOT be NULL!");
        }
        this.resourceImporter = resourceImporter;
        this.loadEntriesWithinZipArchives = processEntriesWithinArchives;
        //use a tree map to have the files sorted
        this.files = new TreeMap<String,ResourceState>();
        this.failOnError = failOnError;
    }

    public void setImportedDir(File importedDir) {
        this.importedDir = importedDir;
        if(importedDir != null){
            if(importedDir.isFile()){
                throw new IllegalArgumentException("The parsed imported directory MUST NOT be a File");
            }
            if(!importedDir.isDirectory()){
                if(!importedDir.mkdirs()){
                    throw new IllegalStateException("Unable to create imported directory "+importedDir);
                }
            }
        }
    }
    /**
     * Adds a new {@link File} resource to this resource loader. In case a
     * directory is parsed, all files directly within this directory will be
     * also added. Note that hidden Files are ignored.
     * @param fileOrDirectory the file/directory to add.
     */
    public void addResource(File fileOrDirectory){
        if(fileOrDirectory != null){
            for(String file : getFiles(fileOrDirectory)){
                ResourceState state = files.get(file);
                if(state == null){
                    log.debug("File {} registered to this RdfLoader",file);
                    files.put(file, ResourceState.REGISTERED);
                } else if(state == ResourceState.ERROR){
                    log.info("Readding file {} after previous error while loading",file);
                } else {
                    log.info("Ignore file {} because it already present with state {}",file,state);
                }
            }
        }
    }
   
    /**
     * Getter for the read only status of the resource loader.
     * @return the read only view of the status
     */
    public Map<String,ResourceState> getResourceStates(){
        return Collections.unmodifiableMap(files);
    }
    /**
     * Getter for all resources that are currently in the parsed state.
     * This Method returns a copy of all resources in the parsed state.
     * @param state the processing state
     * @return A copy of all resources in the parsed state
     */
    public Collection<String> getResources(ResourceState state){
        if(state == null){
            return Collections.emptySet();
        } else {
            return getResources(EnumSet.of(state));
        }
    }
    /**
     * Getter for all resources that are currently in on of the parsed states.
     * This Method returns a copy of all resources in such states.
     * @param states the processing states
     * @return A copy of all resources in one of the parsed states
     */
    public Collection<String> getResources(Set<ResourceState> states){
        if(states == null){
            return Collections.emptySet();
        } else {
            Collection<String> files = new HashSet<String>();
            synchronized (this.files) {
                for(Entry<String,ResourceState> entry : this.files.entrySet()){
                    if(states.contains(entry.getValue())){
                        files.add(entry.getKey());
                    }
                }
            }
            return files;
        }
    }
    public void loadResources(){
        Collection<String> fileToLoad;
        do { //to support adding of new files while loading
            fileToLoad = getResources(ResourceState.REGISTERED);
            long start=System.currentTimeMillis();
            log.info("Loding {} File{} ...",fileToLoad.size(),fileToLoad.size()>1?"s":"");
            for (String file : fileToLoad) {
                loadResource(file);
            }
            log.info(" ... {} files imported in {} seconds",
                fileToLoad.size(),(System.currentTimeMillis()-start)/1000);
        } while(!fileToLoad.isEmpty());
    }
    /**
     * Loads a resource from a file
     * @param file the file resource
     */
    private void loadResource(String file) {
        synchronized (files) {
            //sync to files to avoid two threads loading the same file
            ResourceState state = files.get(file);
            if(state == null || state != ResourceState.REGISTERED){
                log.info("Do not load File {} because of its state {} (null means removed from list)",
                    file,state);
                return; //someone removed it in between
            } else { //set to loading
                setResourceState(file, ResourceState.LOADING, null);
            }
        }
        long startFile = System.currentTimeMillis();
        log.info(" > loading '{}' ...", file);
        String extension = FilenameUtils.getExtension(file);
        if(loadEntriesWithinZipArchives && (
                "zip".equalsIgnoreCase(extension) ||
                "jar".equalsIgnoreCase(extension))){
            log.info("  - processing {}-archive entries:",extension);
            ZipFile zipArchive;
            try {
                zipArchive = new ZipFile(file);
            } catch (IOException e) {
                zipArchive = null;
                setResourceState(file, ResourceState.ERROR,e);
            }
            if(zipArchive != null){
                boolean isError = false;
                Enumeration<ZipArchiveEntry> entries = zipArchive.getEntries();
                while(entries.hasMoreElements()){
                    ZipArchiveEntry entry = entries.nextElement();
                    if(!entry.isDirectory()){
                        String entryName = entry.getName();
                        log.info("     o loading entry '{}'", entryName);
                        try {
                            ResourceState state = resourceImporter.importResource(
                                zipArchive.getInputStream(entry),
                                FilenameUtils.getName(entryName));
                            if(state == ResourceState.ERROR){
                                isError = true;
                            }
                        } catch (IOException e) {
                            isError = true;
                        }
                    }
                }
                //set the state for the Archive as a whole
                setResourceState(file,
                    isError ? ResourceState.ERROR : ResourceState.LOADED, null);
            }
        } else {
            InputStream is;
            try {
                is = new FileInputStream(file);
                ResourceState state = resourceImporter.importResource(is,
                    FilenameUtils.getName(file));
                setResourceState(file, state, null);
            } catch (FileNotFoundException e) {
                //during init it is checked that files exists and are files
                //and there is read access so this can only happen if
                //someone deletes the file in between
                log.warn("Unable to load resource " + file, e);
                setResourceState(file, ResourceState.ERROR, e);
            } catch (IOException e) {
                log.error("Unable to load resource " + file ,e);
                setResourceState(file, ResourceState.ERROR, e);
            } catch (Exception e) {
                log.error("Unable to load resource " + file, e);
                setResourceState(file, ResourceState.ERROR, e);
            }
        }
        log.info("   - completed in {} seconds",
            (System.currentTimeMillis()-startFile)/1000);
    }
    /**
     * Getter for the files based on a parsed File or Directory. Hidden Files
     * are ignored. Doese not search recursively to the directory structure!
     * @param fileOrDir The file or directory
     * @return the Collection of files found based on the parameter
     */
    private static Collection<String> getFiles(File fileOrDir){
        if(fileOrDir == null){
            return Collections.emptySet();
        } else if(fileOrDir.isHidden()){
            return Collections.emptySet();
        } else if(fileOrDir.isFile()){
            return Collections.singleton(fileOrDir.getPath());
        } else if(fileOrDir.isDirectory()){
            Collection<String> files = new ArrayList<String>();
            for(File file : fileOrDir.listFiles()){
                if(file.isFile() && !file.isHidden()){
                    files.add(FilenameUtils.concat(fileOrDir.getPath(), file.getPath()));
                }
            }
            return files;
        } else { //file does not exist
            return Collections.emptySet();
        }
    }
    /**
     * Logs the Exception and sets the file to the {@link ResourceState#ERROR}
     * state
     * @param file the affected file
     * @param e the Exception
     */
    private void setResourceState(String file, ResourceState state,Exception e) {
        if(e != null){
            log.error("Exception while loading file "+file,e);
        }
        if(state == null){
            //ensure that there are no null values in the map
            throw new IllegalArgumentException("The parsed ProcessingState MUST NOT be NULL!");
        }
        if(file == null){
            //ignore calls if file is null
            return;
        }
        synchronized (files) {
            if(files.containsKey(file)){
                log.debug("File {} now in state {}",file,state);
                if(importedDir != null && ResourceState.LOADED == state){
                    files.remove(file);
                    try {
                        files.put(moveToImportedFolder(new File(file)).toString(), state);
                    } catch (IOException ioe) {
                       log.warn("Unable to move loaded Resource {} to imported Directory! "
                           + "Please move the file manually to {}!",file,importedDir);
                       log.warn("Reason: "+ioe.getMessage(),ioe);
                       files.put(file, state);
                    }
                } else { //this does not use an imported folder or the state is not LOADED
                    files.put(file, state);
                }
                //if failOnError is activated we stop the loading on the first
                //error!
                if(failOnError && ResourceState.ERROR == state){
                    String msg = "Error while loading Resource "+file;
                    if(e != null){
                        throw new IllegalStateException(msg,e);
                    } else {
                        throw new IllegalStateException(msg);
                    }
                }
            } else {
                log.info("Ignore Error for File {} because it is no longer registered with this RdfLoader",
                    file);
            }
        }
    }
    private File moveToImportedFolder(File file) throws IOException {
        if(importedDir == null){
            return file;
        }
        File moved = new File(importedDir,file.getName());
        int i=0;
        while(moved.isFile()){
            i++;
            moved = new File(importedDir,i+"_"+file.getName());
        }
        log.info("   ... moving imported file {} to {}/{}",new Object[] {
                file.getName(),importedDir.getName(),moved.getName()});
        FileUtils.moveFile(file, moved);
        return moved;
    }
}
TOP

Related Classes of org.apache.stanbol.entityhub.indexing.core.source.ResourceLoader

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.