// That metadata is ListState.InheritiedSecurity flag which is very
// important while processing ACL related changes.
// TODO: with some re-structuring of code, we can still avoid this extra
// iteration.
for (ListState currentListState : listCollection) {
ListState listState = webState.lookupList(currentListState.getPrimaryKey());
if (null != listState) {
if (!listState.getListURL().equalsIgnoreCase(
currentListState.getListURL())) {
tempCtx.logToFile(SPConstants.DEFAULT_VIEW_URL_CHANGE_LOG,
listState.getListURL());
}
listState.updateList(currentListState);
}
}
/*
* If the nextList belongs the current web and is still existing on the
* SharePoint site, start traversing for this list onwards.
*/
if (null != nextList && nextList.getParentWebState().equals(webState)
&& listCollection.contains(nextList)) {
Collections.rotate(listCollection, -(listCollection.indexOf(nextList)));
}
AclHelper aclHelper = new AclHelper(tempCtx, webState.getWebUrl());
try {
aclHelper.fetchAclChangesSinceTokenAndUpdateState(webState);
} catch (final Exception e) {
LOGGER.log(Level.WARNING, "Problem Interacting with Custom ACL WS. web site [ "
+ webState.getWebUrl() + " ]. ", e);
}
List<SPDocument> aclChangedItems = null;
final ListsHelper listsHelper = new ListsHelper(tempCtx);
for (int i = 0; i < listCollection.size(); i++) {
final ListState currentList = listCollection.get(i);
ListState listState = webState.lookupList(currentList.getPrimaryKey());
if (sharepointClientContext.isUseSPSearchVisibility()) {
// If this list is marked for No Crawling, do not crawl this
// list.
// Please note that, if this list is already known to the
// connector, it'll keep existing in the connector's state. This
// implies that if a list is marked as NoCrawl list on
// SharePoint in between the connector's traversal, crawling of
// this list will be paused at whatever state it is in. As soon
// as the NoCrawl flag on SharePoint is reverted, the crawling
// will be resumed from the saved state.
if (currentList.isNoCrawl()) {
LOGGER.log(Level.WARNING, "Skipping List URL [ "
+ currentList.getListURL()
+ " ] while crawling because it has been marked for No Crawling on SharePoint. ");
if (null == listState) {
// Make this list known by keeping it in the state. But,
// do not crawl
webState.AddOrUpdateListStateInWebState(currentList, currentList.getLastMod());
}
continue;
}
}
/*
* If we already knew about this list, then only fetch docs that have
* changed since the last doc we processed. If it's a new list (e.g. the
* first SharePoint traversal), we fetch everything.
*/
if (listState == null) {
listState = currentList;
listState.setNewList(true);
webState.AddOrUpdateListStateInWebState(listState, listState.getLastMod());
LOGGER.info("discovered new listState. List URL: "
+ listState.getListURL());
if (SPType.SP2007 == webState.getSharePointType()) {
if (FeedType.CONTENT_FEED == sharepointClientContext.getFeedType()) {
// In case of content feed, we need to keep track of
// folders and the items under that. This is required
// for sending delete feeds for the documents when their
// parent folder is deleted.
LOGGER.log(Level.CONFIG, "Discovering all folders under current list/library [ "
+ listState.getListURL() + " ] ");
try {
listsHelper.getSubFoldersRecursively(listState, null, null);
} catch (final Exception e) {
LOGGER.log(Level.WARNING, "Exception occured while getting the folders hierarchy for list [ "
+ listState.getListURL() + " ]. ", e);
} catch (final Throwable t) {
LOGGER.log(Level.WARNING, "Error occured while getting the folders hierarchy for list [ "
+ listState.getListURL() + " ]. ", t);
}
}
try {
listItems = listsHelper.getListItemChangesSinceToken(listState, allWebs);
} catch (final Exception e) {
LOGGER.log(Level.WARNING, "Exception thrown while getting the documents under list [ "
+ listState.getListURL() + " ].", e);
} catch (final Throwable t) {
LOGGER.log(Level.WARNING, "Error thrown while getting the documents under list [ "
+ listState.getListURL() + " ].", t);
}
} else {
try {
listItems = listsHelper.getListItems(listState, null, null, allWebs);
} catch (final Exception e) {
LOGGER.log(Level.WARNING, "Exception thrown while getting the documents under list [ "
+ listState.getListURL() + " ].", e);
}
}
} else {
LOGGER.info("revisiting listState [ " + listState.getListURL() + " ]. ");
listState.setExisting(true);
listState.setNextPage(null);
String lastDocID = null;
SPDocument lastDoc = listState.getLastDocForWSRefresh();
/*
* We must ensure that the last doc that we are using was actually sent
* as ADD feed and not as DELETE feed. It might be possible that in one
* cycle we identify a list as non-existing and hence started sending
* delete feeds for it. But, in the next cycle that list has been
* restored, in that case we can not rely on the lastDoc which has been
* set by a delete feed. We also need to reset the change token in that
* case to start a full crawl.
*/
if (lastDoc != null) {
if (FeedType.CONTENT_FEED == sharepointClientContext.getFeedType()
&& ActionType.DELETE.equals(lastDoc.getAction())) {
listState.resetState();
if (FeedType.CONTENT_FEED == sharepointClientContext.getFeedType()) {
// In case of content feed, we need to keep track of
// folders and the items under that. This is
// required for sending delete feeds for the
// documents when their parent folder is deleted.
LOGGER.log(Level.CONFIG, "Discovering all folders under current list/library [ "
+ listState.getListURL() + " ] ");
try {
listsHelper.getSubFoldersRecursively(listState, null, null);
} catch (final Exception e) {
LOGGER.log(Level.WARNING, "Exception occured while getting the folders hierarchy for list [ "
+ listState.getListURL() + " ]. ", e);
} catch (final Throwable t) {
LOGGER.log(Level.WARNING, "Error occured while getting the folders hierarchy for list [ "
+ listState.getListURL() + " ]. ", t);
}
}
LOGGER.info("recrawling the items under listState [ "
+ listState.getListURL()
+ " ] because this list has been restored after deletion.");
} else {
lastDocID = Util.getOriginalDocId(lastDoc.getDocId(), sharepointClientContext.getFeedType());
}
}
if (SPType.SP2007.equals(webState.getSharePointType())) {
try {
webState.AddOrUpdateListStateInWebState(listState, currentList.getLastMod());
// Any documents to be crawled because of ACL Changes
aclChangedItems = aclHelper.
getListItemsForAclChangeAndUpdateState(listState, listsHelper);
if (null == aclChangedItems
|| aclChangedItems.size() < sharepointClientContext.getBatchHint()) {
// Do regular incremental crawl
listItems = listsHelper.getListItemChangesSinceToken(listState, allWebs);
}
} catch (final Exception e) {
LOGGER.log(Level.WARNING, "Exception thrown while getting the documents under list [ "
+ listState.getListURL() + " ].", e);
} catch (final Throwable t) {
LOGGER.log(Level.WARNING, "Error thrown while getting the documents under list [ "
+ listState.getListURL() + " ].", t);
}
} else {
try {
final Calendar dateSince = listState.getDateForWSRefresh();
webState.AddOrUpdateListStateInWebState(listState, currentList.getLastMod());
LOGGER.info("fetching changes since " + Util.formatDate(dateSince)
+ " for list [ " + listState.getListURL() + " ]. ");
// check if date modified for the document library
final Calendar dateCurrent = listState.getLastModCal();
if (dateSince.before(dateCurrent)) {
listState.setNewList(true);
}
listItems = listsHelper.getListItems(listState, dateSince, lastDocID, allWebs);
} catch (final Exception e) {
LOGGER.log(Level.WARNING, "Exception thrown while getting the documents under list [ "
+ listState.getListURL() + " ].", e);
} catch (final Throwable t) {
LOGGER.log(Level.WARNING, "Error thrown while getting the documents under list [ "
+ listState.getListURL() + " ].", t);
}
}
}
// Get the attachments for each discovered items, if the list allows
// attachments
if (listState.canContainAttachments() && (listItems != null)) {
final List<SPDocument> attachmentItems = new ArrayList<SPDocument>();
for (int j = 0; j < listItems.size(); j++) {
final SPDocument doc = listItems.get(j);
if (ActionType.ADD.equals(doc.getAction())) {
final List<SPDocument> attachments = listsHelper.getAttachments(listState, doc);
attachmentItems.addAll(attachments);
}
}
listItems.addAll(attachmentItems);
}
if (listState.getNextPage() == null) {
if (((listItems != null) && (listItems.size() > 0))
|| (listState.isNewList())) {
SPDocument listDoc = listState.getDocumentInstance(
sharepointClientContext.getFeedType());
listItems.add(listDoc);
listState.setNewList(false);
}
} else {
// Send List home page as part of this batch to complete inheritance
// chain for discovered child items for partially traversed List.
if (listState.isNewList() && listItems != null && listItems.size() > 0
&& sharepointClientContext.getTraversalContext()
.supportsInheritedAcls() && !Strings.isNullOrEmpty(
listState.getListItemCollectionPositionNext())) {
SPDocument listDoc = listState.getDocumentInstance(
sharepointClientContext.getFeedType());
listItems.add(listDoc);
}
// If any of the list has not been traversed completely, doCrawl
// must not be set true.
doCrawl = false;
}
// Add aclChangedItems to the docs crawled under regular crawling.
// This is the right place to do this because all the operations
// pertaining to regular crawling have been made. But, the
// batch-hint check is yet to be done
if (null != aclChangedItems) {
if (null != listItems) {
listItems.addAll(aclChangedItems);
} else {
listItems = aclChangedItems;
}
}
listState.setCrawlQueue(listItems);
// Set the last crawled date time. This is informative value for the
// user viewing the state file
listState.setLastCrawledDateTime(Util.getCurrentTimestampString());
if (null == listItems || listItems.size() == 0) {
LOGGER.log(Level.CONFIG, "No items found from list " + listState);
} else {
Collections.sort(listItems);