String filtersFile = Constants.FILTER_FILE; //*-- file containing the list of handlers
String errMsg = null; //*-- Error message
RandomAccessFile tfile = null; //*-- a random access file containing a list of files to scan
BufferedReader filein = null; //*-- reader to remove dups from the task file
PrintWriter outp = null; //*-- writer to create the unique list of files
HashMapOfString h = null; //*-- hash set to save the list of files
logger.info("Start creating task file for crawl");
//*-- before creating a task file, check for a restart
if (crawlConfig.getStartPosition() != -1)
{ ctRef.fileReadTime -= new Date().getTime();
int numFiles = 0; LineNumberReader fileno = null;
try
{ fileno = new LineNumberReader(new FileReader(taskFile));
while ((fileno.readLine()) != null) numFiles++;
}
catch (IOException ie) { logger.error("IO Error in task file " + taskFile + " " + ie.getMessage()); }
finally { try { if (fileno != null) fileno.close(); }
catch (IOException ie) { logger.error("Ignore error"); } }
setNumFiles(numFiles);
ctRef.fileReadTime += new Date().getTime();
return;
}
ctRef.fileReadTime -= new Date().getTime();
String iDirs = crawlConfig.getIncDirs();
String eDirs = crawlConfig.getExcDirs();
boolean skipHidden = crawlConfig.isSkipHidden();
boolean followLinks = crawlConfig.isFollowLinks();
if ( !(iDirs.equals("")) && !(iDirs.endsWith(";")) ) iDirs += ";";
if ( !(eDirs.equals("")) && !(eDirs.endsWith(";")) ) eDirs += ";";
String[] incDirs = (iDirs.equals("") ) ? new String[0]: iDirs.split(";");
String[] excDirs = (eDirs.equals("") ) ? new String[0]: eDirs.split(";");
Pattern[] excPatts = new Pattern[excDirs.length];
for (int i = 0; i < excDirs.length; i++) excPatts[i] = Pattern.compile("^" + excDirs[i] + ".*$");
try
{
tfile = new RandomAccessFile(taskFile, "rw");
tfile.setLength(0); tfile.close();
// *-- Run the directory scan for each directory in the list
DirTools dirTools = new DirTools();
for (int i = 0; i < incDirs.length; i++)
{ logger.info("Start scanning " + incDirs[i]);
dirTools.dirScan(incDirs[i], filtersFile, taskFile, skipHidden, followLinks);
logger.info("Finished scanning " + incDirs[i]);
}
// *-- Read the task file into a hashset and remove dups.
logger.info("Removing duplicate file names from task file");
filein = new BufferedReader(new FileReader(taskFile));
String s; h = new HashMapOfString();
while ((s = filein.readLine()) != null) h.put(s, "");
}
catch (IOException ie)
{ errMsg = "IO Error in task file " + ie.getMessage(); }
finally
{ if (filein != null) try { filein.close(); } catch (IOException ie) { logger.error("Ignore error"); }
if (errMsg != null) ctRef.cleanUp(errMsg);
}
//*-- Dump the file back into tfile with a list of unique file names
//*-- Check if any of the files belong to the list of exclusion directories
try
{
tfile = new RandomAccessFile(taskFile, "rw");
tfile.setLength(0); tfile.seek(0);
outp = new PrintWriter(new FileWriter(taskFile));
if (h != null)
{ String[] keys = h.keys();
FLOOP: for (int i = 0; i < keys.length; i++)
{ for (int j = 0; j < excPatts.length; j++)
{ Matcher matcher = excPatts[j].matcher(keys[i]);
if (matcher.matches()) continue FLOOP;
}