InfiniteFile file = null;
_deleteExistingFilesBySourceKey = false;
try
{
if (source.getUrl().startsWith("inf://")) { // Infinit.e share/custom object
NtlmPasswordAuthentication auth = new NtlmPasswordAuthentication(Arrays.toString(source.getCommunityIds().toArray()), source.getOwnerId().toString(), null);
file = InfiniteFile.create(source.getUrl(), auth);
if (source.getUrl().startsWith("inf://custom/")) {
_customJob = true;
// A few cases:
// 1] If first time, or source has completed:
// Quick check of share/custom date vs last imported doc in this case:
ObjectId customLastRecordId = null;
// Here are the two cases (whether in success/error/success_iteration
// 1) non-append mode ... any time the first_record.time > last_doc.time then re-run (delete all docs)
// 2) append-mode ... any time the last_record.time > last_doc.time then re-run/keep going
// (the status clause below just determines if you keep going or not)
// the file.getTime() call will automatically give you the correct version of 1 vs 2 depending on its status)
if ((null == source.getHarvestStatus()) || (HarvestEnum.success == source.getHarvestStatus().getHarvest_status()))
{
if (!_context.getDuplicateManager().needsUpdated_Url(new Date(file.getDate()), null, source)) {
return files;
}//TESTED
else {
_customLastRecordWritten = _context.getDuplicateManager().getLastModifiedDate();
customLastRecordId = _context.getDuplicateManager().getLastModifiedDocId();
_context.getDuplicateManager().resetForNewSource();
// (reset the saved state since I faked my harvest status)
_deleteExistingFilesBySourceKey = true;
}//TESTED
}
else { // 2] If in the middle of a multiple harvest cycle....
// Specifically for custom, need to handle m/r changing ... we'll fake the harvest status
// to force it to check the last doc's modified time vs the current file time...
HarvestEnum saved = source.getHarvestStatus().getHarvest_status();
source.getHarvestStatus().setHarvest_status(HarvestEnum.success);
try {
if (_context.getDuplicateManager().needsUpdated_Url(new Date(file.getDate()), null, source)) {
_deleteExistingFilesBySourceKey = true;
}
_customLastRecordWritten = _context.getDuplicateManager().getLastModifiedDate();
customLastRecordId = _context.getDuplicateManager().getLastModifiedDocId();
_context.getDuplicateManager().resetForNewSource();
// (reset the saved state since I faked my harvest status)
}
finally { // (rewrite original)
source.getHarvestStatus().setHarvest_status(saved);
}
}//TESTED
if (_streaming) { // Never delete files...
_deleteExistingFilesBySourceKey = false;
}//TESTED
if (null == customLastRecordId) { // no docs, so no need for this
// (or -in the case of distributed sources- the new harvest has already begun)
_deleteExistingFilesBySourceKey = false;
}//TESTED
// Custom append mode: never delete anything, only process new objects
InternalInfiniteFile customHandle = (InternalInfiniteFile)file;
if (customHandle.isAppendingNotReplacing()) {
_deleteExistingFilesBySourceKey = false;
}//TESTED
// Finally, if we wanted to delete the files then go ahead now:
if (_deleteExistingFilesBySourceKey) {
// For now, support only "non-append" mode efficiently:
// Always delete all the old docs, updated docs will work but inefficiently (will delete and re-create)
DocumentPojo docRepresentingSrcKey = new DocumentPojo();
if (null != source.getDistributionFactor()) {
// If split across multiple docs then need a more expensive delete (note: still indexed)
docRepresentingSrcKey.setId(customLastRecordId);
}
docRepresentingSrcKey.setCommunityId(source.getCommunityIds().iterator().next());
docRepresentingSrcKey.setSourceKey(source.getKey());
this.docsToRemove.add(docRepresentingSrcKey);
}//TESTED
}
else { // share - this is much simpler:
if (!_context.getDuplicateManager().needsUpdated_Url(new Date(file.getDate()), null, source)) {
return files;
}//TESTED
}
}//TESTED
else if( source.getFileConfig() == null || source.getFileConfig().password == null || source.getFileConfig().username == null)
{
// Local file: => must be admin to continue
if (harvestSecureMode) { // secure mode, must be admin
if (source.getUrl().startsWith("file:")) {
if (!AuthUtils.isAdmin(source.getOwnerId())) {
throw new ExtractorSourceLevelMajorException("Permission denied");
}
}
}//TODO (INF-2119): come up with something better than this...(this is at least consistent with SAH/UAH security, apart from allowing admin more rights)
file = InfiniteFile.create(source.getUrl());
}
else
{
if (source.getFileConfig().domain == null) {
source.getFileConfig().domain = "";
}
NtlmPasswordAuthentication auth = new NtlmPasswordAuthentication(source.getFileConfig().domain, source.getFileConfig().username, source.getFileConfig().password);
file = InfiniteFile.create(source.getUrl(), auth);
}
traverse(file, source, maxDepth);
}
catch (Exception e) {