Package net.yacy.search.snippet

Examples of net.yacy.search.snippet.ResultEntry


        final String target = sb.getConfig(SwitchboardConstants.SEARCH_TARGET, "_self");
        if (theQuery.contentdom == ContentDomain.TEXT) {
            // text search

            // generate result object
            final ResultEntry result = theSearch.oneResult(item, theQuery.isLocal() ? 1000 : 5000);
            if (result == null) return prop; // no content

            final DigestURI resultURL = result.url();
            final int port = resultURL.getPort();
            DigestURI faviconURL = null;
            if ((fileType == FileType.HTML || fileType == FileType.JSON) && !sb.isIntranetMode() && !resultURL.isLocal()) try {
                faviconURL = new DigestURI(resultURL.getProtocol() + "://" + resultURL.getHost() + ((port != -1) ? (":" + port) : "") + "/favicon.ico");
            } catch (final MalformedURLException e1) {
                Log.logException(e1);
                faviconURL = null;
            }
            final String resource = theQuery.domType.toString();
            prop.put("content", 1); // switch on specific content
            prop.put("content_showDate", sb.getConfigBool("search.result.show.date", true) ? 1 : 0);
            prop.put("content_showSize", sb.getConfigBool("search.result.show.size", true) ? 1 : 0);
            prop.put("content_showMetadata", sb.getConfigBool("search.result.show.metadata", true) ? 1 : 0);
            prop.put("content_showParser", sb.getConfigBool("search.result.show.parser", true) ? 1 : 0);
            prop.put("content_showPictures", sb.getConfigBool("search.result.show.pictures", true) ? 1 : 0);
            prop.put("content_authorized", authenticated ? "1" : "0");
            final String urlhash = ASCII.String(result.hash());
            prop.put("content_authorized_bookmark", sb.tables.bookmarks.hasBookmark("admin", urlhash) ? "0" : "1");
            prop.putHTML("content_authorized_bookmark_bookmarklink", "/yacysearch.html?query=" + theQuery.queryString.replace(' ', '+') + "&Enter=Search&count=" + theQuery.displayResults() + "&offset=" + (theQuery.neededResults() - theQuery.displayResults()) + "&order=" + crypt.simpleEncode(theQuery.ranking.toExternalString()) + "&resource=" + resource + "&time=3&bookmarkref=" + urlhash + "&urlmaskfilter=.*");
            prop.put("content_authorized_recommend", (sb.peers.newsPool.getSpecific(NewsPool.OUTGOING_DB, NewsPool.CATEGORY_SURFTIPP_ADD, "url", result.urlstring()) == null) ? "1" : "0");
            prop.putHTML("content_authorized_recommend_deletelink", "/yacysearch.html?query=" + theQuery.queryString.replace(' ', '+') + "&Enter=Search&count=" + theQuery.displayResults() + "&offset=" + (theQuery.neededResults() - theQuery.displayResults()) + "&order=" + crypt.simpleEncode(theQuery.ranking.toExternalString()) + "&resource=" + resource + "&time=3&deleteref=" + urlhash + "&urlmaskfilter=.*");
            prop.putHTML("content_authorized_recommend_recommendlink", "/yacysearch.html?query=" + theQuery.queryString.replace(' ', '+') + "&Enter=Search&count=" + theQuery.displayResults() + "&offset=" + (theQuery.neededResults() - theQuery.displayResults()) + "&order=" + crypt.simpleEncode(theQuery.ranking.toExternalString()) + "&resource=" + resource + "&time=3&recommendref=" + urlhash + "&urlmaskfilter=.*");
            prop.put("content_authorized_urlhash", urlhash);
            final String resulthashString = urlhash;
            prop.putHTML("content_title", result.title());
            prop.putXML("content_title-xml", result.title());
            prop.putJSON("content_title-json", result.title());
            prop.putHTML("content_link", result.urlstring());
            prop.putHTML("content_showPictures_link", result.urlstring());
            prop.putHTML("content_target", target);
            if (faviconURL != null && fileType == FileType.HTML) sb.loader.loadIfNotExistBackground(faviconURL, 1024 * 1024 * 10);
            prop.putHTML("content_faviconCode", sb.licensedURLs.aquireLicense(faviconURL)); // acquire license for favicon url loading
            prop.put("content_urlhash", resulthashString);
            prop.put("content_ranking", result.ranking);
            prop.put("content_showMetadata_urlhash", resulthashString);
            prop.put("content_showParser_urlhash", resulthashString);
            prop.put("content_urlhexhash", Seed.b64Hash2hexHash(resulthashString));
            prop.putHTML("content_urlname", nxTools.shortenURLString(result.urlname(), MAX_URL_LENGTH));
            prop.put("content_showDate_date", GenericFormatter.RFC1123_SHORT_FORMATTER.format(result.modified()));
            prop.put("content_date822", HeaderFramework.formatRFC1123(result.modified()));
            //prop.put("content_ybr", RankingProcess.ybr(result.hash()));
            prop.putHTML("content_size", Integer.toString(result.filesize())); // we don't use putNUM here because that number shall be usable as sorting key. To print the size, use 'sizename'
            prop.putHTML("content_sizename", sizename(result.filesize()));
            prop.putHTML("content_showSize_sizename", sizename(result.filesize()));
            prop.putHTML("content_host", resultURL.getHost() == null ? "" : resultURL.getHost());
            prop.putHTML("content_file", resultURL.getFile());
            prop.putHTML("content_path", resultURL.getPath());
            prop.put("content_nl", (item == theQuery.offset) ? 0 : 1);
            prop.putHTML("content_publisher", result.publisher());
            prop.putHTML("content_creator", result.creator());// author
            prop.putHTML("content_subject", result.subject());
            final Set<String>[] query = theQuery.queryWords();
            final StringBuilder s = new StringBuilder(query[0].size() * 20);
            for (final String t: query[0]) {
                s.append('+').append(t);
            }
            final String words = (s.length() > 0) ? s.substring(1) : "";
            prop.putHTML("content_words", words);
            prop.putHTML("content_showParser_words", words);
            prop.putHTML("content_former", theQuery.queryString);
            prop.putHTML("content_showPictures_former", theQuery.queryString);
            final TextSnippet snippet = result.textSnippet();
            final String desc = (snippet == null) ? "" : snippet.getLineMarked(theQuery.fullqueryHashes);
            prop.put("content_description", desc);
            prop.putXML("content_description-xml", desc);
            prop.putJSON("content_description-json", desc);
            final SearchEvent.HeuristicResult heuristic = theSearch.getHeuristic(result.hash());
            if (heuristic == null) {
                prop.put("content_heuristic", 0);
            } else {
                if (heuristic.redundant) {
                    prop.put("content_heuristic", 1);
                } else {
                    prop.put("content_heuristic", 2);
                }
                prop.put("content_heuristic_name", heuristic.heuristicName);
            }
            EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(theQuery.id(true), SearchEvent.Type.FINALIZATION, "" + item, 0, 0), false);
            final String ext = resultURL.getFileExtension().toLowerCase();
            if (ext.equals("png") || ext.equals("jpg") || ext.equals("gif")) {
                final String license = sb.licensedURLs.aquireLicense(resultURL);
                prop.put("content_code", license);
            } else {
                prop.put("content_code", "");
            }
            if (result.lat() == 0.0f || result.lon() == 0.0f) {
                prop.put("content_loc", 0);
            } else {
                prop.put("content_loc", 1);
                prop.put("content_loc_lat", result.lat());
                prop.put("content_loc_lon", result.lon());
            }
            theQuery.transmitcount = item + 1;
            return prop;
        }

        if (theQuery.contentdom == ContentDomain.IMAGE) {
            // image search; shows thumbnails

            prop.put("content", theQuery.contentdom.getCode() + 1); // switch on specific content
            final MediaSnippet ms = theSearch.result().oneImage(item);
            if (ms == null) {
                prop.put("content_item", "0");
            } else {
                final String license = sb.licensedURLs.aquireLicense(ms.href);
                sb.loader.loadIfNotExistBackground(ms.href, 1024 * 1024 * 10);
                prop.putHTML("content_item_hrefCache", (auth) ? "/ViewImage.png?url=" + ms.href.toNormalform(true, false) : ms.href.toNormalform(true, false));
                prop.putHTML("content_item_href", ms.href.toNormalform(true, false));
                prop.putHTML("content_item_target", target);
                prop.put("content_item_code", license);
                prop.putHTML("content_item_name", shorten(ms.name, MAX_NAME_LENGTH));
                prop.put("content_item_mimetype", ms.mime);
                prop.put("content_item_fileSize", ms.fileSize);
                prop.put("content_item_width", ms.width);
                prop.put("content_item_height", ms.height);
                prop.put("content_item_attr", (ms.attr.equals("-1 x -1")) ? "" : "(" + ms.attr + ")"); // attributes, here: original size of image
                prop.put("content_item_urlhash", ASCII.String(ms.source.hash()));
                prop.put("content_item_source", ms.source.toNormalform(true, false));
                prop.putXML("content_item_source-xml", ms.source.toNormalform(true, false));
                prop.put("content_item_sourcedom", ms.source.getHost());
                prop.put("content_item_nl", (item == theQuery.offset) ? 0 : 1);
                prop.put("content_item", 1);
            }
            theQuery.transmitcount = item + 1;
            return prop;
        }

        if ((theQuery.contentdom == ContentDomain.AUDIO) ||
            (theQuery.contentdom == ContentDomain.VIDEO) ||
            (theQuery.contentdom == ContentDomain.APP)) {
            // any other media content

            // generate result object
            final ResultEntry result = theSearch.oneResult(item, 500);
            if (result == null) return prop; // no content

            prop.put("content", theQuery.contentdom.getCode() + 1); // switch on specific content
            final List<MediaSnippet> media = result.mediaSnippets();
            if (item == 0) col = true;
            if (media != null) {
                int c = 0;
                for (final MediaSnippet ms : media) {
                    prop.putHTML("content_items_" + c + "_href", ms.href.toNormalform(true, false));
View Full Code Here


        @Override
        public void run() {

            // start fetching urls and snippets
            URIMetadataRow page;
            ResultEntry resultEntry;
            //final int fetchAhead = snippetMode == 0 ? 0 : 10;
            final boolean nav_topics = SnippetProcess.this.query.navigators.equals("all") || SnippetProcess.this.query.navigators.indexOf("topics",0) >= 0;
            try {
                //System.out.println("DEPLOYED WORKER " + id + " FOR " + this.neededResults + " RESULTS, timeoutd = " + (this.timeout - System.currentTimeMillis()));
                int loops = 0;
                while (this.shallrun && System.currentTimeMillis() < this.timeout) {
                    this.lastLifeSign = System.currentTimeMillis();

                    if (MemoryControl.shortStatus()) {
                      break;
                    }

                    // check if we have enough
                    if (SnippetProcess.this.result.sizeAvailable() >= this.neededResults) {
                        //Log.logWarning("ResultFetcher", SnippetProcess.this.result.sizeAvailable() + " = result.sizeAvailable() >= this.neededResults = " + this.neededResults);
                        break;
                    }

                    // check if we can succeed if we try to take another url
                    if (SnippetProcess.this.rankingProcess.feedingIsFinished() && SnippetProcess.this.rankingProcess.sizeQueue() == 0) {
                        //Log.logWarning("ResultFetcher", "rankingProcess.feedingIsFinished() && rankingProcess.sizeQueue() == 0");
                        break;
                    }

                    // get next entry
                    page = SnippetProcess.this.rankingProcess.takeURL(true, Math.min(100, this.timeout - System.currentTimeMillis()));
                    //if (page != null) Log.logInfo("ResultFetcher", "got one page: " + page.metadata().url().toNormalform(true, false));
                    //if (page == null) page = rankedCache.takeURL(false, this.timeout - System.currentTimeMillis());
                    if (page == null) {
                        //Log.logWarning("ResultFetcher", "page == null");
                        break; // no more available
                    }
                    if (SnippetProcess.this.query.filterfailurls && SnippetProcess.this.workTables.failURLsContains(page.hash())) continue;

                    // in case that we have an attached solr, we load also the solr document
                    String solrContent = null;
                    if (this.solr != null) {
                        SolrDocument sd = null;
                        final SolrDocumentList sdl = this.solr.get("id:" + ASCII.String(page.hash()), 0, 1);
                        if (sdl.size() > 0) sd = sdl.get(0);
                        if (sd != null) solrContent = this.solr.getScheme().solrGetText(sd);
                    }

                    loops++;
                    resultEntry = fetchSnippet(page, solrContent, this.cacheStrategy); // does not fetch snippets if snippetMode == 0
                    if (resultEntry == null) continue; // the entry had some problems, cannot be used
                    //final String rawLine = resultEntry.textSnippet() == null ? null : resultEntry.textSnippet().getLineRaw();
                    //System.out.println("***SNIPPET*** raw='" + rawLine + "', pattern='" + this.snippetPattern.toString() + "'");
                    //if (rawLine != null && !this.snippetPattern.matcher(rawLine).matches()) continue;

                    //if (result.contains(resultEntry)) continue;
                    SnippetProcess.this.urlRetrievalAllTime += resultEntry.dbRetrievalTime;
                    SnippetProcess.this.snippetComputationAllTime += resultEntry.snippetComputationTime;

                    // place the result to the result vector
                    // apply post-ranking
                    long ranking = Long.valueOf(SnippetProcess.this.rankingProcess.getOrder().cardinal(resultEntry.word()));
                    ranking += postRanking(resultEntry, SnippetProcess.this.rankingProcess.getTopicNavigator(10));
                    resultEntry.ranking = ranking;
                    SnippetProcess.this.result.put(new ReverseElement<ResultEntry>(resultEntry, ranking)); // remove smallest in case of overflow
                    if (nav_topics) SnippetProcess.this.rankingProcess.addTopics(resultEntry);
                }
View Full Code Here

            try { Thread.sleep(10); } catch (final InterruptedException e1) {}
        }

        if (this.result.sizeAvailable() > item) {
            // we have the wanted result already in the result array .. return that
            final ResultEntry re = this.result.element(item).getElement();
            EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(this.query.id(true), SearchEvent.Type.ONERESULT, "prefetched, item = " + item + ", available = " + this.result.sizeAvailable() + ": " + re.urlstring(), 0, 0), false);
            return re;
        }

        // finally wait until enough results are there produced from the snippet fetch process
        WeakPriorityBlockingQueue.Element<ResultEntry> entry = null;
        while (System.currentTimeMillis() < finishTime) {

            if (!anyWorkerAlive() && !this.rankingProcess.isAlive() && this.result.sizeAvailable() + this.rankingProcess.sizeQueue() <= item && this.rankingProcess.feedingIsFinished()) {
                //Log.logInfo("SnippetProcess", "interrupted result fetching; item = " + item + "; this.result.sizeAvailable() = " + this.result.sizeAvailable() + ", this.rankingProcess.sizeQueue() = " + this.rankingProcess.sizeQueue());
                break; // the fail case
            }

            // deploy worker to get more results
            if (!anyWorkerAlive()) {
                final int neededInclPrefetch = this.query.neededResults() + ((MemoryControl.available() > 100 * 1024 * 1024) ? this.query.itemsPerPage : 0);
                deployWorker(Math.min(20, this.query.itemsPerPage), neededInclPrefetch);
            }

            try {entry = this.result.element(item, 50);} catch (final InterruptedException e) {break;}
            if (entry != null) break;
        }

        // finally, if there is something, return the result
        if (entry == null) {
            EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(this.query.id(true), SearchEvent.Type.ONERESULT, "not found, item = " + item + ", available = " + this.result.sizeAvailable(), 0, 0), false);
            return null;
        }
        final ResultEntry re = entry.getElement();
        EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(this.query.id(true), SearchEvent.Type.ONERESULT, "retrieved, item = " + item + ", available = " + this.result.sizeAvailable() + ": " + re.urlstring(), 0, 0), false);
        return re;
    }
View Full Code Here

        return re;
    }

    private int resultCounter = 0;
    public ResultEntry nextResult() {
        final ResultEntry re = oneResult(this.resultCounter, 3000);
        this.resultCounter++;
        return re;
    }
View Full Code Here

        // now take the specific item from the image stack
        return this.images.element(item).getElement();
    }

    private int fillImagesCache() {
        final ResultEntry result = nextResult();
        int c = 0;
        if (result == null) return c;
        // iterate over all images in the result
        final List<MediaSnippet> imagemedia = result.mediaSnippets();
        if (imagemedia != null) {
          ResponseHeader header;
            feedloop: for (final MediaSnippet ms: imagemedia) {
                // check cache to see if the mime type of the image url is correct
                header = Cache.getResponseHeader(ms.href.hash());
View Full Code Here

                    null,
                    ((this.query.constraint != null) && (this.query.constraint.get(Condenser.flag_cat_indexof))),
                    220,
                    Integer.MAX_VALUE,
                    !this.query.isLocal());
            return new ResultEntry(page, this.query.getSegment(), this.peers, snippet, null, dbRetrievalTime, 0); // result without snippet
        }

        // load snippet
        if (this.query.contentdom == ContentDomain.TEXT) {
            // attach text snippet
            startTime = System.currentTimeMillis();
            final TextSnippet snippet = new TextSnippet(
                    this.loader,
                    solrText,
                    metadata,
                    this.snippetFetchWordHashes,
                    cacheStrategy,
                    ((this.query.constraint != null) && (this.query.constraint.get(Condenser.flag_cat_indexof))),
                    180,
                    Integer.MAX_VALUE,
                    !this.query.isLocal());
            final long snippetComputationTime = System.currentTimeMillis() - startTime;
            Log.logInfo("SEARCH", "text snippet load time for " + metadata.url() + ": " + snippetComputationTime + ", " + (!snippet.getErrorCode().fail() ? "snippet found" : ("no snippet found (" + snippet.getError() + ")")));

            if (!snippet.getErrorCode().fail()) {
                // we loaded the file and found the snippet
                return new ResultEntry(page, this.query.getSegment(), this.peers, snippet, null, dbRetrievalTime, snippetComputationTime); // result with snippet attached
            } else if (cacheStrategy.mustBeOffline()) {
                // we did not demand online loading, therefore a failure does not mean that the missing snippet causes a rejection of this result
                // this may happen during a remote search, because snippet loading is omitted to retrieve results faster
                return new ResultEntry(page, this.query.getSegment(), this.peers, null, null, dbRetrievalTime, snippetComputationTime); // result without snippet
            } else {
                // problems with snippet fetch
                final String reason = "no text snippet; errorCode = " + snippet.getErrorCode();
                if (this.deleteIfSnippetFail) this.workTables.failURLsRegisterMissingWord(this.query.getSegment().termIndex(), metadata.url(), this.query.queryHashes, reason);
                Log.logInfo("SEARCH", "sorted out url " + metadata.url().toNormalform(true, false) + " during search: " + reason);
                return null;
            }
        } else {
            // attach media information
            startTime = System.currentTimeMillis();
            final List<MediaSnippet> mediaSnippets = MediaSnippet.retrieveMediaSnippets(metadata.url(), this.snippetFetchWordHashes, this.query.contentdom, cacheStrategy, 6000, !this.query.isLocal());
            final long snippetComputationTime = System.currentTimeMillis() - startTime;
            Log.logInfo("SEARCH", "media snippet load time for " + metadata.url() + ": " + snippetComputationTime);

            if (mediaSnippets != null && !mediaSnippets.isEmpty()) {
                // found media snippets, return entry
                return new ResultEntry(page, this.query.getSegment(), this.peers, null, mediaSnippets, dbRetrievalTime, snippetComputationTime);
            } else if (cacheStrategy.mustBeOffline()) {
                return new ResultEntry(page, this.query.getSegment(), this.peers, null, null, dbRetrievalTime, snippetComputationTime);
            } else {
                // problems with snippet fetch
                final String reason = "no media snippet";
                if (this.deleteIfSnippetFail) this.workTables.failURLsRegisterMissingWord(this.query.getSegment().termIndex(), metadata.url(), this.query.queryHashes, reason);
                Log.logInfo("SEARCH", "sorted out url " + metadata.url().toNormalform(true, false) + " during search: " + reason);
View Full Code Here

TOP

Related Classes of net.yacy.search.snippet.ResultEntry

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.