private void storeDocumentIndex(final Segments.Process process, final Response queueEntry, final Document document, final Condenser condenser, final SearchEvent searchEvent, final String sourceName) {
// CREATE INDEX
final String dc_title = document.dc_title();
final DigestURI referrerURL = queueEntry.referrerURL();
EventOrigin processCase = queueEntry.processCase(this.peers.mySeed().hash);
if (process == Segments.Process.SURROGATES) {
processCase = EventOrigin.SURROGATES;
}
if (condenser == null || document.indexingDenied()) {
//if (this.log.isInfo()) log.logInfo("Not Indexed Resource '" + queueEntry.url().toNormalform(false, true) + "': denied by rule in document, process case=" + processCase);
addURLtoErrorDB(queueEntry.url(), (referrerURL == null) ? null : referrerURL.hash(), queueEntry.initiator(), dc_title, FailCategory.FINAL_PROCESS_CONTEXT, "denied by rule in document, process case=" + processCase);
return;
}
if (!queueEntry.profile().indexText() && !queueEntry.profile().indexMedia()) {
//if (this.log.isInfo()) log.logInfo("Not Indexed Resource '" + queueEntry.url().toNormalform(false, true) + "': denied by profile rule, process case=" + processCase + ", profile name = " + queueEntry.profile().name());
addURLtoErrorDB(queueEntry.url(), (referrerURL == null) ? null : referrerURL.hash(), queueEntry.initiator(), dc_title, FailCategory.FINAL_LOAD_CONTEXT, "denied by profile rule, process case=" + processCase + ", profile name = " + queueEntry.profile().name());
return;
}
// remove stopwords
this.log.logInfo("Excluded " + condenser.excludeWords(stopwords) + " words in URL " + queueEntry.url());
// STORE WORD INDEX
URIMetadataRow newEntry = null;
try {
newEntry = this.indexSegments.segment(process).storeDocument(
queueEntry.url(),
referrerURL,
queueEntry.lastModified(),
new Date(),
queueEntry.size(),
document,
condenser,
searchEvent,
sourceName);
final RSSFeed feed = yacyChannel.channels(queueEntry.initiator() == null ? yacyChannel.PROXY : Base64Order.enhancedCoder.equal(queueEntry.initiator(), ASCII.getBytes(this.peers.mySeed().hash)) ? yacyChannel.LOCALINDEXING : yacyChannel.REMOTEINDEXING);
feed.addMessage(new RSSMessage("Indexed web page", dc_title, queueEntry.url().toNormalform(true, false)));
} catch (final IOException e) {
//if (this.log.isFine()) log.logFine("Not Indexed Resource '" + queueEntry.url().toNormalform(false, true) + "': process case=" + processCase);
addURLtoErrorDB(queueEntry.url(), (referrerURL == null) ? null : referrerURL.hash(), queueEntry.initiator(), dc_title, FailCategory.FINAL_LOAD_CONTEXT, "error storing url: " + queueEntry.url().toNormalform(false, true) + "': process case=" + processCase + ", error = " + e.getMessage());
return;
}
// store rss feeds in document into rss table
for (final Map.Entry<MultiProtocolURI, String> rssEntry : document.getRSS().entrySet()) {
final Tables.Data rssRow = new Tables.Data();
rssRow.put("referrer", queueEntry.url().hash());
rssRow.put("url", UTF8.getBytes(rssEntry.getKey().toNormalform(true, false)));
rssRow.put("title", UTF8.getBytes(rssEntry.getValue()));
rssRow.put("recording_date", new Date());
try {
this.tables.update("rss", new DigestURI(rssEntry.getKey()).hash(), rssRow);
} catch (final IOException e) {
Log.logException(e);
}
}