final boolean pre,
final int snippetMaxLength,
final int maxDocLen,
final boolean reindexing) {
// heise = "0OQUNU3JSs05"
final DigestURI url = comp.url();
if (queryhashes.isEmpty()) {
//System.out.println("found no queryhashes for URL retrieve " + url);
init(url.hash(), null, ResultClass.ERROR_NO_HASH_GIVEN, "no query hashes given");
return;
}
// try to get snippet from snippetCache
ResultClass source = ResultClass.SOURCE_CACHE;
final String wordhashes = yacySearch.set2string(queryhashes);
final String urls = ASCII.String(url.hash());
String snippetLine = snippetsCache.get(wordhashes, urls);
if (snippetLine != null) {
// found the snippet
init(url.hash(), snippetLine, source, null);
return;
}
/* ===========================================================================
* LOAD RESOURCE DATA
* =========================================================================== */
// if the snippet is not in the cache, we can try to get it from the htcache
final Response response;
try {
// first try to get the snippet from metadata
String loc;
final boolean noCacheUsage = url.isFile() || url.isSMB() || cacheStrategy == null;
if (containsAllHashes(loc = comp.dc_title(), queryhashes)) {
// try to create the snippet from information given in the url itself
init(url.hash(), loc, ResultClass.SOURCE_METADATA, null);
return;
} else if (containsAllHashes(loc = comp.dc_creator(), queryhashes)) {
// try to create the snippet from information given in the creator metadata
init(url.hash(), loc, ResultClass.SOURCE_METADATA, null);
return;
} else if (containsAllHashes(loc = comp.dc_subject(), queryhashes)) {
// try to create the snippet from information given in the subject metadata
init(url.hash(), loc, ResultClass.SOURCE_METADATA, null);
return;
} else if (containsAllHashes(loc = comp.url().toNormalform(true, true).replace('-', ' '), queryhashes)) {
// try to create the snippet from information given in the url
init(url.hash(), loc, ResultClass.SOURCE_METADATA, null);
return;
} else {
// try to load the resource from the cache
response = loader == null ? null : loader.load(loader.request(url, true, reindexing), noCacheUsage ? CacheStrategy.NOCACHE : cacheStrategy, Long.MAX_VALUE, true);
if (response == null) {
// in case that we did not get any result we can still return a success when we are not allowed to go online
if (cacheStrategy == null || cacheStrategy.mustBeOffline()) {
init(url.hash(), null, ResultClass.ERROR_SOURCE_LOADING, "omitted network load (not allowed), no cache entry");
return;
}
// if it is still not available, report an error
init(url.hash(), null, ResultClass.ERROR_RESOURCE_LOADING, "error loading resource from net, no cache entry");
return;
} else {
// place entry on indexing queue
Switchboard.getSwitchboard().toIndexer(response);
source = ResultClass.SOURCE_WEB;
}
}
} catch (final Exception e) {
//Log.logException(e);
init(url.hash(), null, ResultClass.ERROR_SOURCE_LOADING, "error loading resource: " + e.getMessage());
return;
}
/* ===========================================================================
* PARSE RESOURCE
* =========================================================================== */
Document document = null;
try {
document = Document.mergeDocuments(response.url(), response.getMimeType(), response.parse());
} catch (final Parser.Failure e) {
init(url.hash(), null, ResultClass.ERROR_PARSER_FAILED, e.getMessage()); // cannot be parsed
return;
}
if (document == null) {
init(url.hash(), null, ResultClass.ERROR_PARSER_FAILED, "parser error/failed"); // cannot be parsed
return;
}
/* ===========================================================================
* COMPUTE SNIPPET
* =========================================================================== */
// we have found a parseable non-empty file: use the lines
// compute snippet from text
final Collection<StringBuilder> sentences = document.getSentences(pre);
if (sentences == null) {
init(url.hash(), null, ResultClass.ERROR_PARSER_NO_LINES, "parser returned no sentences");
return;
}
final SnippetExtractor tsr;
String textline = null;
HandleSet remainingHashes = queryhashes;
try {
tsr = new SnippetExtractor(sentences, queryhashes, snippetMaxLength);
textline = tsr.getSnippet();
remainingHashes = tsr.getRemainingWords();
} catch (final UnsupportedOperationException e) {
init(url.hash(), null, ResultClass.ERROR_NO_MATCH, "no matching snippet found");
return;
}
// compute snippet from media
//String audioline = computeMediaSnippet(document.getAudiolinks(), queryhashes);
//String videoline = computeMediaSnippet(document.getVideolinks(), queryhashes);
//String appline = computeMediaSnippet(document.getApplinks(), queryhashes);
//String hrefline = computeMediaSnippet(document.getAnchors(), queryhashes);
//String imageline = computeMediaSnippet(document.getAudiolinks(), queryhashes);
snippetLine = "";
//if (audioline != null) line += (line.length() == 0) ? audioline : "<br />" + audioline;
//if (videoline != null) line += (line.length() == 0) ? videoline : "<br />" + videoline;
//if (appline != null) line += (line.length() == 0) ? appline : "<br />" + appline;
//if (hrefline != null) line += (line.length() == 0) ? hrefline : "<br />" + hrefline;
if (textline != null) snippetLine += (snippetLine.length() == 0) ? textline : "<br />" + textline;
if (snippetLine == null || !remainingHashes.isEmpty()) {
init(url.hash(), null, ResultClass.ERROR_NO_MATCH, "no matching snippet found");
return;
}
if (snippetLine.length() > snippetMaxLength) snippetLine = snippetLine.substring(0, snippetMaxLength);
// finally store this snippet in our own cache
snippetsCache.put(wordhashes, urls, snippetLine);
document.close();
init(url.hash(), snippetLine, source, null);
}