/*
* @(#)FileUtils.java 13/11/2004
*
* Copyright (c) 2004, 2005 jASEN.org
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the distribution.
*
* 3. The names of the authors may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* 4. Any modification or additions to the software must be contributed back
* to the project.
*
* 5. Any investigation or reverse engineering of source code or binary to
* enable emails to bypass the filters, and hence inflict spam and or viruses
* onto users who use or do not use jASEN could subject the perpetrator to
* criminal and or civil liability.
*
* THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESSED OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JASEN.ORG,
* OR ANY CONTRIBUTORS TO THIS SOFTWARE BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
* OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
package org.jasen.util;
import java.io.ByteArrayOutputStream;
import java.io.EOFException;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.RandomAccessFile;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Hashtable;
import java.util.Map;
/**
* <P>
* General file utility methods.
* </P>
* @author Jason Polites
*/
public class FileUtils
{
/**
*
*/
public FileUtils() {
super ();
}
/**
* Removes (deletes) all duplicate files found in the given folder.
* @param folder
* @param filename If true, files with the same root name are considered duplicates irrespective of their fingerprint
* @throws IOException
*/
public static void dedupe(File folder, boolean filename) throws IOException {
dedupe(folder, null, null, filename);
}
/**
* Removes all duplicate files found in the given folder and moves them to the deposit folder
* @param folder
* @param deposit
* @param filename If true, files with the same root name are considered duplicates irrespective of their fingerprint
* @throws IOException
*/
public static void dedupe(File folder, File deposit, boolean filename) throws IOException {
dedupe(folder, null, deposit, filename);
}
/**
* Removes (deletes) all duplicate files found in the given folder with the given filter
* @param folder
* @param filter
* @param filename If true, files with the same root name are considered duplicates irrespective of their fingerprint
* @throws IOException
*/
public static void dedupe(File folder, FileFilter filter, boolean filename) throws IOException {
dedupe(folder, filter, null, filename);
}
/**
* Removes duplicate files from the given folder by renaming them to the given extension.
* <p>
* If extension is null, the duplicate files are deleted.
* </p>
* <p>
* If more than one duplicate of the same file is found, an integer count is appended
* to the renamed file.
* </p>
* @param folder The folder in which to look for duplicates
* @param filter The file filter to use when listing files
* @param deposit The path to which duplicates are moved (must be a directory)
* @param filename If true, files with the same root name are considered duplicates irrespective of their fingerprint
*/
public static void dedupe(File folder, FileFilter filter, File deposit, boolean filename) throws IOException {
File[] files = null;
if(!folder.isDirectory()) {
throw new IOException("folder parameter must be a directory");
}
if(deposit != null && !deposit.isDirectory()) {
throw new IOException("deposit parameter must be a directory");
}
if(filter != null) {
files = folder.listFiles(filter);
}
else
{
files = folder.listFiles();
}
if(files != null) {
Map fingerprints = new Hashtable();
String strFingerprint = null;
String strFilename = null;
String rootName = null;
File moveTo = null;
boolean dupe = false;
for (int i = 0; i < files.length; i++)
{
if(files[i].isFile()) {
dupe = false;
// Test for dupe filename
rootName = FileUtils.getFilenameWithoutExtension(files[i].getName());
if(filename) {
strFilename = (String)fingerprints.get(rootName);
if(strFilename == null) {
fingerprints.put(rootName, files[i].getName());
}
else
{
// Duplicate filename
dupe = true;
}
}
if(!dupe) {
// Get the file fingerprint
strFingerprint = fingerPrintFile(files[i], 64);
strFilename = (String)fingerprints.get(strFingerprint);
if(strFilename == null) {
// Add the crc
fingerprints.put(strFingerprint, files[i].getName());
}
else
{
dupe = true;
}
}
if(dupe) {
// We have a duplicate...
System.out.println (files[i].getName() + " is a duplicate of " + strFilename);
if(deposit != null) {
moveTo = new File(deposit.getAbsolutePath() + System.getProperty("file.separator") + files[i].getName());
files[i].renameTo(moveTo);
}
else
{
if(!files[i].delete()) {
System.err.println ("ERROR: Could not delete " + files[i].getName());
}
}
}
}
}
}
}
/**
* Creates a distinct "fingerprint" of the given file such that two files
* with the same content will have the same fingerprint.
* @param file The file to fingerprint
* @param length The length of the fingerprint. The longer the length, the more accurate the fingerprint. NOTE: The size of the actual string returned will be greater than "length" bytes
* @return A String representing a non-unique representation of the file
* @throws IOException
*/
public static String fingerPrintFile(File file, int length) throws IOException {
// TODO Research the PROPER way to do this...
// First determine the number of "points" to extract from the file
// Then assemble those points into a unique string
long size = file.length();
int space = 0;
FileInputStream fin = null;
RandomAccessFile raf = null;
try
{
if(size <= length) {
// We can use the actual file content as the fingerprint
ByteArrayOutputStream bout = new ByteArrayOutputStream();
fin = new FileInputStream(file);
IOUtils.pipe(fin, bout, 1024);
return new String(bout.toByteArray());
}
else
{
// Determine the point space...
space = (int)Math.floor((size / length));
// Create a random access file
raf = new RandomAccessFile(file, "r");
StringBuffer buffer = new StringBuffer();
for (long i = 0; i < size; i += space)
{
try
{
buffer.append(raf.readByte());
raf.skipBytes(space);
}
catch (EOFException e)
{
i = size;
}
}
buffer.append(size);
return buffer.toString();
}
}
finally
{
if(fin != null) {
try
{
fin.close();
}
catch (IOException ignore){}
}
if(raf != null) {
try
{
raf.close();
}
catch (IOException ignore){}
}
}
}
/**
* Gets the absolute path without the file (root path).
* @param pathname
* @return The absolute path to the file excluding the filename
*/
public static String getAbsolutePathWithoutFile(File pathname) {
if(pathname.isDirectory()) {
return getSafePath(pathname.getAbsolutePath());
}
else
{
return getSafePath(pathname.getParentFile().getAbsolutePath());
}
}
/**
* Ensures the path is terminated with a file separator
* @param path
* @return
*/
public static String getSafePath(String path) {
if(path != null) {
int fs = path.indexOf('/');
int bs = path.indexOf('\\');
if(fs > -1 && !path.endsWith("/")) {
path += '/';
}
else if(bs > -1 && !path.endsWith("\\")) {
path += '\\';
}
else if(fs <= -1 && bs <= -1) {
path += System.getProperty("file.separator");
}
}
return path;
}
/**
* Gets the name of a file without the extension (text after last dot)
* @param filename
* @return The root filename without its extension
*/
public static String getFilenameWithoutExtension(String filename) {
if (filename.indexOf('.') > -1) {
return filename.substring(0, filename.lastIndexOf("."));
}
else {
return filename;
}
}
/**
* Returns the String that occurs after the last "dot" in the filename
* @param pathname
* @return The extension of the file
*/
public static String getFileExtension(File pathname) {
return getFileExtension(pathname.getName());
}
public static String getFileExtension(String filename) {
if (filename.indexOf('.') > -1) {
return filename.substring(filename.lastIndexOf(".") + 1, filename.length());
}
else {
return null;
}
}
/**
* Convenience file copy method
* @param source
* @param destination
* @throws IOException
*/
public static final void copy(File source, File destination) throws IOException {
FileInputStream fin = null;
FileOutputStream fout = null;
try {
fin = new FileInputStream(source);
fout = new FileOutputStream(destination);
IOUtils.pipe(fin, fout, 1024);
}
finally {
if(fin != null) {
try {
fin.close();
} catch (IOException ignore) {}
}
if(fout != null) {
try {
fout.close();
} catch (IOException ignore) {}
}
}
}
/**
* Pipes from one stream to another
* @param in
* @param out
* @param buffer
* @throws IOException
* @deprecated use IOUtils
* @see IOUtils#pipe(InputStream, OutputStream, int)
*/
private static void pipe(InputStream in, OutputStream out, byte[] buffer) throws IOException {
int count;
while ((count = in.read(buffer, 0, buffer.length)) != -1) {
out.write(buffer, 0, count);
}
out.flush();
}
/**
* Lists all the .jar files in the given folder as URL references
* @param folder The folder in which to look
* @return An array of java.net.URL objects
* @throws MalformedURLException
*/
public static URL[] listJars(File folder) throws MalformedURLException {
return listFiles(folder, ".jar");
}
/**
* Lists all the files in the given folder with the given extension
* @param folder The folder in which to look
* @param extension The file extension (case sensitive)
* @return An array of java.net.URL objects
* @throws MalformedURLException
*/
public static URL[] listFiles(File folder, String extension) throws MalformedURLException {
File[] files = folder.listFiles(new ExtensionFileFilter(extension));
URL[] urls = null;
if(files != null) {
urls = new URL[files.length];
for (int i = 0; i < files.length; i++) {
urls[i] = files[i].toURL();
}
}
return urls;
}
/**
* <p>
* Inner file filter class for listing files of known extension.
* </p>
*/
public static final class ExtensionFileFilter implements FileFilter {
String extension;
public ExtensionFileFilter(String extension) {
this.extension = extension;
}
public boolean accept(File pathname) {
return pathname.getName().endsWith(extension);
}
}
}