package net.sf.jabref.external;
import net.sf.jabref.*;
import java.io.File;
import java.io.IOException;
import java.io.FilenameFilter;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.*;
/**
* Created by IntelliJ IDEA.
* User: alver
* Date: Apr 12, 2008
* Time: 1:46:44 PM
* To change this template use File | Settings | File Templates.
*/
public class RegExpFileSearch {
final static String EXT_MARKER = "__EXTENSION__";
public static void main(String[] args) {
BibtexEntry entry = new BibtexEntry(Util.createNeutralId());
entry.setField(BibtexFields.KEY_FIELD, "raffel01");
entry.setField("year", "2001");
ArrayList<String> extensions = new ArrayList<String>();
extensions.add("pdf");
extensions.add("ps");
extensions.add("txt");
List<File> dirs = new ArrayList<File>();
dirs.add(new File("/home/alver/Desktop/Tromso_2008"));
System.out.println(findFiles(entry, extensions, dirs,
"**/[bibtexkey].*\\\\.[extension]"));
}
/**
* Search for file links for a set of entries using regexp. Lists of extensions and directories
* are given.
* @param entries The entries to search for.
* @param extensions The extensions that are acceptable.
* @param directories The root directories to search.
* @param regExp The expression deciding which names are acceptable.
* @return A map linking each given entry to a list of files matching the given criteria.
*/
public static Map<BibtexEntry, java.util.List<File>> findFilesForSet(Collection<BibtexEntry> entries,
Collection<String> extensions, List<File> directories, String regExp) {
Map<BibtexEntry, java.util.List<File>> res = new HashMap<BibtexEntry, List<File>>();
for (BibtexEntry entry : entries) {
res.put(entry, findFiles(entry, extensions, directories, regExp));
}
return res;
}
/**
* Method for searching for files using regexp. A list of extensions and directories can be
* given.
* @param entry The entry to search for.
* @param extensions The extensions that are acceptable.
* @param directories The root directories to search.
* @param regularExpression The expression deciding which names are acceptable.
* @return A list of files paths matching the given criteria.
*/
public static List<File> findFiles(BibtexEntry entry, Collection<String> extensions,
Collection<File> directories, String regularExpression) {
StringBuilder sb = new StringBuilder();
for (Iterator<String> i = extensions.iterator(); i.hasNext();) {
sb.append(i.next());
if (i.hasNext())
sb.append("|");
}
String extensionRegExp = "("+sb.toString()+")";
return findFile(entry, null, directories, regularExpression, extensionRegExp, true);
}
/**
* Searches the given directory and file name pattern for a file for the
* bibtexentry.
*
* Used to fix:
*
* http://sourceforge.net/tracker/index.php?func=detail&aid=1503410&group_id=92314&atid=600309
*
* Requirements:
* - Be able to find the associated PDF in a set of given directories.
* - Be able to return a relative path or absolute path.
* - Be fast.
* - Allow for flexible naming schemes in the PDFs.
*
* Syntax scheme for file:
* <ul>
* <li>* Any subDir</li>
* <li>** Any subDir (recursiv)</li>
* <li>[key] Key from bibtex file and database</li>
* <li>.* Anything else is taken to be a Regular expression.</li>
* </ul>
*
* @param entry
* non-null
* @param database
* non-null
* @param dirs
* A set of root directories to start the search from. Paths are
* returned relative to these directories if relative is set to
* true. These directories will not be expanded or anything. Use
* the file attribute for this.
* @param file
* non-null
*
* @param relative
* whether to return relative file paths or absolute ones
*
* @return Will return the first file found to match the given criteria or
* null if none was found.
*/
public static List<File> findFile(BibtexEntry entry, BibtexDatabase database, Collection<File> dirs,
String file, String extensionRegExp, boolean relative) {
ArrayList<File> res = new ArrayList<File>();
for (File directory : dirs) {
List<File> tmp = findFile(entry, database, directory.getPath(), file, extensionRegExp, relative);
if (tmp != null)
res.addAll(tmp);
}
return res;
}
/**
* Internal Version of findFile, which also accepts a current directory to
* base the search on.
*
*/
public static List<File> findFile(BibtexEntry entry, BibtexDatabase database, String directory,
String file, String extensionRegExp, boolean relative) {
List<File> res;
File root;
if (directory == null) {
root = new File(".");
} else {
root = new File(directory);
}
if (!root.exists()) {
return null;
}
res = findFile(entry, database, root, file, extensionRegExp);
if (res.size() > 0) {
for (int i=0; i<res.size(); i++)
try {
/**
* [ 1601651 ] PDF subdirectory - missing first character
*
* http://sourceforge.net/tracker/index.php?func=detail&aid=1601651&group_id=92314&atid=600306
*/
// Changed by M. Alver 2007.01.04:
// Remove first character if it is a directory separator character:
String tmp = res.get(i).getCanonicalPath().substring(root.getCanonicalPath().length());
if ((tmp.length() > 1) && (tmp.charAt(0) == File.separatorChar))
tmp = tmp.substring(1);
res.set(i, new File(tmp));
} catch (IOException e) {
e.printStackTrace();
}
}
return res;
}
/**
* The actual work-horse. Will find absolute filepaths starting from the
* given directory using the given regular expression string for search.
*/
protected static List<File> findFile(BibtexEntry entry, BibtexDatabase database, File directory,
String file, String extensionRegExp) {
ArrayList<File> res = new ArrayList<File>();
if (file.startsWith("/")) {
directory = new File(".");
file = file.substring(1);
}
// Escape handling...
Matcher m = Pattern.compile("([^\\\\])\\\\([^\\\\])").matcher(file);
StringBuffer s = new StringBuffer();
while (m.find()) {
m.appendReplacement(s, m.group(1) + "/" + m.group(2));
}
m.appendTail(s);
file = s.toString();
String[] fileParts = file.split("/");
if (fileParts.length == 0)
return res;
if (fileParts.length > 1) {
for (int i = 0; i < fileParts.length - 1; i++) {
String dirToProcess = fileParts[i];
dirToProcess = Util.expandBrackets(dirToProcess, entry, database);
if (dirToProcess.matches("^.:$")) { // Windows Drive Letter
directory = new File(dirToProcess + "/");
continue;
}
if (dirToProcess.equals(".")) { // Stay in current directory
continue;
}
if (dirToProcess.equals("..")) {
directory = new File(directory.getParent());
continue;
}
if (dirToProcess.equals("*")) { // Do for all direct subdirs
File[] subDirs = directory.listFiles();
if (subDirs != null) {
String restOfFileString = Util.join(fileParts, "/", i + 1, fileParts.length);
for (int sub = 0; sub < subDirs.length; sub++) {
if (subDirs[sub].isDirectory()) {
res.addAll(findFile(entry, database, subDirs[sub],
restOfFileString, extensionRegExp));
}
}
}
}
// Do for all direct and indirect subdirs
if (dirToProcess.equals("**")) {
List<File> toDo = new LinkedList<File>();
toDo.add(directory);
String restOfFileString = Util.join(fileParts, "/", i + 1, fileParts.length);
while (!toDo.isEmpty()) {
// Get all subdirs of each of the elements found in toDo
File[] subDirs = toDo.remove(0).listFiles();
if (subDirs == null) // No permission?
continue;
toDo.addAll(Arrays.asList(subDirs));
for (int sub = 0; sub < subDirs.length; sub++) {
if (!subDirs[sub].isDirectory())
continue;
res.addAll(findFile(entry, database, subDirs[sub], restOfFileString,
extensionRegExp));
}
}
}
} // End process directory information
}
// Last step: check if the given file can be found in this directory
String filePart = fileParts[fileParts.length-1].replaceAll("\\[extension\\]", EXT_MARKER);
String filenameToLookFor = Util.expandBrackets(filePart, entry, database)
.replaceAll(EXT_MARKER, extensionRegExp);
final Pattern toMatch = Pattern.compile("^"
+ filenameToLookFor.replaceAll("\\\\\\\\", "\\\\") + "$", Pattern.CASE_INSENSITIVE);
File[] matches = directory.listFiles(new FilenameFilter() {
public boolean accept(File arg0, String arg1) {
return toMatch.matcher(arg1).matches();
}
});
if (matches != null && (matches.length > 0))
for (int i = 0; i < matches.length; i++) {
File match = matches[i];
res.add(match);
}
return res;
}
}