Package de.anomic.search

Examples of de.anomic.search.Switchboard$indexingQueueEntry


    private final static int CONCURRENT_RUNNER = 100;
   
    public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
       
        final serverObjects prop = new serverObjects();
        final Switchboard sb = (Switchboard)env;

        // clean up all search events
        SearchEventCache.cleanupEvents(true);
       
        prop.put("noserverdetected", 0);
        prop.put("servertable", 0);
        prop.put("hosts", "");
        prop.put("intranet.checked", sb.isIntranetMode() ? 1 : 0);

        int timeout = sb.isIntranetMode() ? 200 : 3000;
        timeout = post == null ? timeout : post.getInt("timeout", timeout);
       
        // make a scanhosts entry
        String hosts = post == null ? "" : post.get("scanhosts", "");
        Set<InetAddress> ips = Domains.myIntranetIPs();
        prop.put("intranethosts", ips.toString());
        prop.put("intranetHint", sb.isIntranetMode() ? 0 : 1);
        if (hosts.length() == 0) {
            InetAddress ip;
            if (sb.isIntranetMode()) {
                if (ips.size() > 0) ip = ips.iterator().next();
                else ip = Domains.dnsResolve("192.168.0.1");
            } else {
                ip = Domains.myPublicLocalIP();
                if (Domains.isThisHostIP(ip)) ip = sb.peers.mySeed().getInetAddress();
            }
            if (ip != null) hosts = ip.getHostAddress();
        }
        prop.put("scanhosts", hosts);
       
        // parse post requests
        if (post != null) {
            int repeat_time = 0;
            String repeat_unit = "seldays";
            long validTime = 0;

            // check scheduler
            if (post.get("rescan", "").equals("scheduler")) {
                repeat_time = post.getInt("repeat_time", -1);
                repeat_unit = post.get("repeat_unit", "selminutes"); // selminutes, selhours, seldays
                if (repeat_unit.equals("selminutes")) validTime = repeat_time * 60 * 1000;
                if (repeat_unit.equals("selhours")) validTime = repeat_time * 60 * 60 * 1000;
                if (repeat_unit.equals("seldays")) validTime = repeat_time * 24 * 60 * 60 * 1000;
            }
           
            boolean bigrange = post.getBoolean("bigrange", false);
           
            // case: an IP range was given; scan the range for services and display result
            if (post.containsKey("scan") && "hosts".equals(post.get("source", ""))) {
                final Set<InetAddress> ia = new HashSet<InetAddress>();
                for (String host : hosts.split(",")) {
                    if (host.startsWith("http://")) host = host.substring(7);
                    if (host.startsWith("https://")) host = host.substring(8);
                    if (host.startsWith("ftp://")) host = host.substring(6);
                    if (host.startsWith("smb://")) host = host.substring(6);
                    int p = host.indexOf('/');
                    if (p >= 0) host = host.substring(0, p);
                    ia.add(Domains.dnsResolve(host));
                }
                final Scanner scanner = new Scanner(ia, CONCURRENT_RUNNER, timeout);
                if (post.get("scanftp", "").equals("on")) scanner.addFTP(bigrange);
                if (post.get("scanhttp", "").equals("on")) scanner.addHTTP(bigrange);
                if (post.get("scanhttps", "").equals("on")) scanner.addHTTPS(bigrange);
                if (post.get("scansmb", "").equals("on")) scanner.addSMB(bigrange);
                scanner.start();
                scanner.terminate();
                if ("on".equals(post.get("accumulatescancache", "")) && !"scheduler".equals(post.get("rescan", ""))) {
                    Scanner.scancacheExtend(scanner, validTime);
                } else {
                    Scanner.scancacheReplace(scanner, validTime);
                }
            }
           
            if (post.containsKey("scan") && "intranet".equals(post.get("source", ""))) {
                final Scanner scanner = new Scanner(Domains.myIntranetIPs(), CONCURRENT_RUNNER, timeout);
                if ("on".equals(post.get("scanftp", ""))) scanner.addFTP(bigrange);
                if ("on".equals(post.get("scanhttp", ""))) scanner.addHTTP(bigrange);
                if ("on".equals(post.get("scanhttps", ""))) scanner.addHTTPS(bigrange);
                if ("on".equals(post.get("scansmb", ""))) scanner.addSMB(bigrange);
                scanner.start();
                scanner.terminate();
                if ("on".equals(post.get("accumulatescancache", "")) && !"scheduler".equals(post.get("rescan", ""))) {
                    Scanner.scancacheExtend(scanner, validTime);
                } else {
                    Scanner.scancacheReplace(scanner, validTime);
                }
            }
           
            // check crawl request
            if (post.containsKey("crawl")) {
                // make a pk/url mapping
                final Iterator<Map.Entry<Scanner.Service, Scanner.Access>> se = Scanner.scancacheEntries();
                final Map<byte[], DigestURI> pkmap = new TreeMap<byte[], DigestURI>(Base64Order.enhancedCoder);
                while (se.hasNext()) {
                    Scanner.Service u = se.next().getKey();
                    DigestURI uu;
                    try {
                        uu = new DigestURI(u.url());
                        pkmap.put(uu.hash(), uu);
                    } catch (MalformedURLException e) {
                        Log.logException(e);
                    }
                }
                // search for crawl start requests in this mapping
                for (final Map.Entry<String, String> entry: post.entrySet()) {
                    if (entry.getValue().startsWith("mark_")) {
                        byte [] pk = entry.getValue().substring(5).getBytes();
                        DigestURI url = pkmap.get(pk);
                        if (url != null) {
                            String path = "/Crawler_p.html?createBookmark=off&xsstopw=off&crawlingDomMaxPages=10000&intention=&range=domain&indexMedia=on&recrawl=nodoubles&xdstopw=off&storeHTCache=on&sitemapURL=&repeat_time=7&crawlingQ=on&cachePolicy=iffresh&indexText=on&crawlingMode=url&mustnotmatch=&crawlingDomFilterDepth=1&crawlingDomFilterCheck=off&crawlingstart=Start%20New%20Crawl&xpstopw=off&repeat_unit=seldays&crawlingDepth=99";
                            path += "&crawlingURL=" + url.toNormalform(true, false);
                            WorkTables.execAPICall("localhost", (int) sb.getConfigLong("port", 8090), sb.getConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, ""), path, pk);
                        }
                    }
                }
            }
           
            // check scheduler
            if ("scheduler".equals(post.get("rescan", ""))) {
               
                // store this call as api call
                if (repeat_time > 0) {
                    // store as scheduled api call
                    sb.tables.recordAPICall(post, "CrawlStartScanner_p.html", WorkTables.TABLE_API_TYPE_CRAWLER, "network scanner for hosts: " + hosts, repeat_time, repeat_unit.substring(3));
                }
               
                // execute the scan results
                if (Scanner.scancacheSize() > 0) {
                    // make a comment cache
                    final Map<byte[], String> apiCommentCache = WorkTables.commentCache(sb);
                   
                    String urlString;
                    DigestURI u;
                    try {
                        int i = 0;
                        final Iterator<Map.Entry<Scanner.Service, Scanner.Access>> se = Scanner.scancacheEntries();
                        Map.Entry<Scanner.Service, Scanner.Access> host;
                        while (se.hasNext()) {
                            host = se.next();
                            try {
                                u = new DigestURI(host.getKey().url());
                                urlString = u.toNormalform(true, false);
                                if (host.getValue() == Access.granted && Scanner.inIndex(apiCommentCache, urlString) == null) {
                                    String path = "/Crawler_p.html?createBookmark=off&xsstopw=off&crawlingDomMaxPages=10000&intention=&range=domain&indexMedia=on&recrawl=nodoubles&xdstopw=off&storeHTCache=on&sitemapURL=&repeat_time=7&crawlingQ=on&cachePolicy=iffresh&indexText=on&crawlingMode=url&mustnotmatch=&crawlingDomFilterDepth=1&crawlingDomFilterCheck=off&crawlingstart=Start%20New%20Crawl&xpstopw=off&repeat_unit=seldays&crawlingDepth=99";
                                    path += "&crawlingURL=" + urlString;
                                    WorkTables.execAPICall("localhost", (int) sb.getConfigLong("port", 8090), sb.getConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, ""), path, u.hash());
                                }
                                i++;
                            } catch (MalformedURLException e) {
                                Log.logException(e);
                            }
View Full Code Here


public class IndexImportOAIPMH_p {

    public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
        final serverObjects prop = new serverObjects();
        final Switchboard sb = (Switchboard) env;

        prop.put("import-one", 0);
        prop.put("status", 0);
        prop.put("defaulturl", "");
        int jobcount = OAIPMHImporter.runningJobs.size() + OAIPMHImporter.startedJobs.size() + OAIPMHImporter.finishedJobs.size();
View Full Code Here

    private static           long      imgZIndex[]   = new long[fifoMax];
    private static final     Random rand = new Random();
   
    public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
        final serverObjects prop = new serverObjects();
        final Switchboard sb = (Switchboard) env;
        final boolean authenticated = sb.verifyAuthentication(header, false);
        final ResultImages.OriginEntry nextOrigin = ResultImages.next(!authenticated);
        int posXMax  = 800;
        int posYMax  = 500;
        boolean embed = false;
       
        if (post != null) {
          embed = post.containsKey("emb");
          posXMax = post.getInt("width", posXMax);
          posYMax = post.getInt("height", posYMax);
          if (post.containsKey("max")) fifoMax = post.getInt("max", fifoMax);
        }
        prop.put("emb", (embed) ? "0" : "1");
       
        if (nextOrigin != null) {
          System.out.println("NEXTORIGIN=" + nextOrigin.imageEntry.url().toNormalform(true, false));
            if (fifoSize == 0 || origins[fifoPos] != nextOrigin) {
                fifoPos = fifoPos + 1 >= fifoMax ? 0 : fifoPos + 1;
                fifoSize = fifoSize + 1 > fifoMax ? fifoMax : fifoSize + 1;
                origins[fifoPos] = nextOrigin;
               
                final float scale = rand.nextFloat() * 1.5f + 1;
                imgWidth[fifoPos= (int) ((nextOrigin.imageEntry.width()) / scale);
                imgHeight[fifoPos] = (int) ((nextOrigin.imageEntry.height()) / scale);

                imgPosX[fifoPos]   = rand.nextInt((imgWidth[fifoPos] == 0) ? posXMax / 2 : Math.max(1, posXMax - imgWidth[fifoPos]));
                imgPosY[fifoPos]   = rand.nextInt((imgHeight[fifoPos] == 0) ? posYMax / 2 : Math.max(1, posYMax - imgHeight[fifoPos]));
               
                imgZIndex[fifoPos] = zIndex;
                zIndex += 1;
            }
        }
       
        if (fifoSize > 0) {
            prop.put("imgurl", "1");       
            int c = 0;
            final int yOffset = embed ? 0 : 70;
            for (int i = 0; i < fifoSize; i++) {
            
                final MultiProtocolURI baseURL = origins[i].baseURL;
                final MultiProtocolURI imageURL = origins[i].imageEntry.url();
               
                // check if this loads a page from localhost, which must be prevented to protect the server
                // against attacks to the administration interface when localhost access is granted
                if ((Domains.isLocal(baseURL.getHost()) || Domains.isLocal(imageURL.getHost())) &&
                    sb.getConfigBool("adminAccountForLocalhost", false)) continue;
               
                final long z = imgZIndex[i];
                prop.put("imgurl_list_" + c + "_url",
                       "<a href=\"" + baseURL.toNormalform(true, false) + "\">"
                       + "<img src=\"" + imageURL.toNormalform(true, false) + "\" "
View Full Code Here

    private static final int MAX_HIGHLIGHTS = 6;

    public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {

        final serverObjects prop = new serverObjects();
        final Switchboard sb = (Switchboard)env;

        if (post == null) {
            prop.put("display", 1);
            prop.put("error_display", 0);
            prop.putHTML("error_words", "");
            prop.put("error_vMode-sentences", "1");
            prop.put("error", "1");
            prop.put("url", "");
            prop.put("viewMode", VIEW_MODE_NO_TEXT);
            return prop;
        }

        final int display = post.getInt("display", 1);

        // get segment
        Segment indexSegment = null;
        final boolean authorized = sb.verifyAuthentication(header, false);
        if (post != null && post.containsKey("segment") && authorized) {
            indexSegment = sb.indexSegments.segment(post.get("segment"));
        } else {
            indexSegment = sb.indexSegments.segment(Segments.Process.PUBLIC);
        }

        prop.put("display", display);
        prop.put("error_display", display);

        if (post.containsKey("words"))
            prop.putHTML("error_words", post.get("words"));
        else {
            prop.putHTML("error_words", "");
        }

        final String viewMode = post.get("viewMode","parsed");
        prop.put("error_vMode-" + viewMode, "1");

        DigestURI url = null;
        String descr = "";
        final int wordCount = 0;
        int size = 0;
        boolean pre = false;

        // get the url hash from which the content should be loaded
        String urlHash = post.get("urlHash", "");
        URIMetadataRow urlEntry = null;
        // get the urlEntry that belongs to the url hash
        if (urlHash.length() > 0 && (urlEntry = indexSegment.urlMetadata().load(ASCII.getBytes(urlHash))) != null) {
            // get the url that belongs to the entry
            final URIMetadataRow.Components metadata = urlEntry.metadata();
            if ((metadata == null) || (metadata.url() == null)) {
                prop.put("error", "3");
                prop.put("viewMode", VIEW_MODE_NO_TEXT);
                return prop;
            }
            url = metadata.url();
            descr = metadata.dc_title();
            //urlEntry.wordCount();
            size = urlEntry.size();
            pre = urlEntry.flags().get(Condenser.flag_cat_indexof);
        }

        prop.put("error_inurldb", urlEntry == null ? 0 : 1);

        // alternatively, get the url simply from a url String
        // this can be used as a simple tool to test the text parser
        final String urlString = post.get("url", "");
        if (urlString.length() > 0) try {
            // this call forces the peer to download  web pages
            // it is therefore protected by the admin password

            if (!sb.verifyAuthentication(header, false)) {
                prop.put("AUTHENTICATE", "admin log-in"); // force log-in
                return prop;
            }

            // define an url by post parameter
View Full Code Here

public class IndexCreateLoaderQueue_p {
   
    public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
        // return variable that accumulates replacements
        final Switchboard sb = (Switchboard) env;
        final serverObjects prop = new serverObjects();
       

        if (sb.crawlQueues.workerSize() == 0) {
            prop.put("loader-set", "0");
View Full Code Here

import de.anomic.yacy.yacySeed;

public class News {
   
    public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
        final Switchboard sb = (Switchboard) env;
        final serverObjects prop = new serverObjects();
        final boolean overview = (post == null) || "0".equals(post.get("page", "0"));
        final int tableID = (overview) ? -1 : (post == null ? 0 : post.getInt("page", 0)) - 1;

        // execute commands
        if (post != null) {
           
            if ((post.containsKey("deletespecific")) && (tableID >= 0)) {
                if (sb.adminAuthenticated(header) < 2) {
                    prop.put("AUTHENTICATE", "admin log-in");
                    return prop; // this button needs authentication, force log-in
                }
                final Iterator<String> e = post.keySet().iterator();
                String check;
                String id;
                while (e.hasNext()) {
                    check = e.next();
                    if ((check.startsWith("del_")) && "on".equals(post.get(check, "off"))) {
                        id = check.substring(4);
                        try {
                            sb.peers.newsPool.moveOff(tableID, id);
                        } catch (final Exception ee) {Log.logException(ee);}
                    }
                }
            }
           
            if ((post.containsKey("deleteall")) && (tableID >= 0)) {
                if (sb.adminAuthenticated(header) < 2) {
                    prop.put("AUTHENTICATE", "admin log-in");
                    return prop; // this button needs authentication, force log-in
                }
                try {
                    if ((tableID == yacyNewsPool.PROCESSED_DB) || (tableID == yacyNewsPool.PUBLISHED_DB)) {
View Full Code Here

    private static final int INITIATOR  = 5;
    private static final int MODIFIED   = 6;

    public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
        // return variable that accumulates replacements
        final Switchboard sb = (Switchboard) env;
        final serverObjects prop = new serverObjects();
        int showLimit = 100;
        if (post != null) {
            showLimit = post.getInt("limit", 100);
           
            if (post.containsKey("deleteEntries")) {
                int c = 0;
               
                final String pattern = post.get("pattern", ".*").trim();
                final int option  = post.getInt("option", INVALID);
                if (".*".equals(pattern)) {
                    c = sb.crawlQueues.noticeURL.stackSize(NoticedURL.StackType.CORE);
                    sb.crawlQueues.noticeURL.clear(NoticedURL.StackType.CORE);
                    try { sb.cleanProfiles(); } catch (final InterruptedException e) {/* ignore this */}
                } else if (option > INVALID) {
                    try {
                        // compiling the regular expression
                        final Pattern compiledPattern = Pattern.compile(pattern);
                       
View Full Code Here

    private static final String PEERSTATUS = "peerStatus";

    public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
        // return variable that accumulates replacements
        final serverObjects prop = new serverObjects();
        final Switchboard sb = (Switchboard) env;

        // check if the basic configuration was accessed before and forward
        prop.put("forwardToConfigBasic", 0);
        if ((post == null || !post.containsKey("noforward")) &&
            sb.getConfig("server.servlets.submitted", "").indexOf("ConfigBasic.html") < 0 &&
            yacySeed.isDefaultPeerName(sb.peers.mySeed().getName())) {
            // forward to ConfigBasic
            prop.put("forwardToConfigBasic", 1);
        }
        if (post != null) post.remove("noforward");
       
        if (post != null && post.size() > 0) {
            if (sb.adminAuthenticated(header) < 2) {
                prop.put("AUTHENTICATE","admin log-in");
                return prop;
            }
            boolean redirect = false;
            if (post.containsKey("login")) {
                prop.put("LOCATION","");
                return prop;
            } else if (post.containsKey("pauseCrawlJob")) {
                final String jobType = post.get("jobType");
                if ("localCrawl".equals(jobType)) {
                    sb.pauseCrawlJob(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL);
                } else if ("remoteTriggeredCrawl".equals(jobType)) {
                    sb.pauseCrawlJob(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL);
                }
                redirect = true;
            } else if (post.containsKey("continueCrawlJob")) {
                final String jobType = post.get("jobType");
                if ("localCrawl".equals(jobType)) {
                    sb.continueCrawlJob(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL);
          } else if ("remoteTriggeredCrawl".equals(jobType)) {
                    sb.continueCrawlJob(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL);
                }
                redirect = true;
            } else if (post.containsKey("ResetTraffic")) {
                ByteCount.resetCount();
                redirect = true;
            } else if (post.containsKey("popup")) {
                final boolean trigger_enabled = post.getBoolean("popup", false);
                sb.setConfig("browserPopUpTrigger", trigger_enabled);
                redirect = true;
            } else if (post.containsKey("tray")) {
                final boolean trigger_enabled = post.getBoolean("tray", false);
                sb.setConfig("trayIcon", trigger_enabled);
                redirect = true;
            }
         
            if (redirect) {
                    prop.put("LOCATION","");
                    return prop;
            }
        }

        // update seed info
        sb.updateMySeed();

        final boolean adminaccess = sb.adminAuthenticated(header) >= 2;
        if (adminaccess) {
            prop.put("showPrivateTable", "1");
            prop.put("privateStatusTable", "Status_p.inc");
        } else {
            prop.put("showPrivateTable", "0");
            prop.put("privateStatusTable", "");
        }

        // password protection
        if ((sb.getConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, "").length() == 0) && (!sb.getConfigBool("adminAccountForLocalhost", false))) {
            prop.put("protection", "0"); // not protected
            prop.put("urgentSetPassword", "1");
        } else {
            prop.put("protection", "1"); // protected
        }

        if (sb.getConfigBool("adminAccountForLocalhost", false)) {
            prop.put("unrestrictedLocalAccess", 1);
        }

        // resource observer status
        if (adminaccess) {
            if (!sb.observer.getDiskAvailable()){
                final String minFree = Formatter.bytesToString(sb.observer.getMinFreeDiskSpace());
                prop.put("warningDiskSpaceLow", "1");
                prop.put("warningDiskSpaceLow_minSpace", minFree);
            }
            if (!sb.observer.getMemoryAvailable()){
                final String minFree = Formatter.bytesToString(sb.observer.getMinFreeMemory() * 1024L);
                prop.put("warningMemoryLow", "1");
                prop.put("warningMemoryLow_minSpace", minFree);
            }
         
        }
       
        // version information
        //final String versionstring = yacyVersion.combined2prettyVersion(sb.getConfig("version","0.1"));
        final String versionstring = yacyBuildProperties.getVersion() + "/" + yacyBuildProperties.getSVNRevision();
        prop.put("versionpp", versionstring);
       
        // place some more hints
        if ((adminaccess) && (sb.getThread(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL).getJobCount() == 0)) {
            prop.put("hintCrawlStart", "1");
        }
       
        if ((adminaccess) && (sb.getThread(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL).getJobCount() > 500)) {
            prop.put("hintCrawlMonitor", "1");
        }
       
        // hostname and port
        final String extendedPortString = sb.getConfig("port", "8090");
        final int pos = extendedPortString.indexOf(":");
        prop.put("port",serverCore.getPortNr(extendedPortString));
        if (pos != -1) {
            prop.put("extPortFormat", "1");
            prop.putHTML("extPortFormat_extPort",extendedPortString);
        } else {
            prop.put("extPortFormat", "0");
        }
        InetAddress hostIP = Domains.myPublicLocalIP();
        prop.put("host", hostIP != null ? hostIP.getHostAddress() : "Unkown IP");
       
        // ssl support
        prop.put("sslSupport",sb.getConfig("keyStore", "").length() == 0 ? "0" : "1");

        if (sb.getConfigBool("remoteProxyUse", false)) {
            prop.put("remoteProxy", "1");
            prop.putXML("remoteProxy_host", sb.getConfig("remoteProxyHost", "<unknown>"));
            prop.putXML("remoteProxy_port", sb.getConfig("remoteProxyPort", "<unknown>"));
            prop.put("remoteProxy_4Yacy", sb.getConfigBool("remoteProxyUse4Yacy", true) ? "0" : "1");
        } else {
            prop.put("remoteProxy", "0"); // not used
        }

        // peer information
        String thisHash = "";
        final String thisName = sb.peers.mySeed().getName();
        if (sb.peers.mySeed() == null)  {
            thisHash = "not assigned";
            prop.put("peerAddress", "0");    // not assigned
            prop.put("peerStatistics", "0"); // unknown
        } else {
            final long uptime = 60000 * sb.peers.mySeed().getLong(yacySeed.UPTIME, 0L);
            prop.put("peerStatistics", "1");
            prop.put("peerStatistics_uptime", yacyPeerActions.formatInterval(uptime));
            prop.putNum("peerStatistics_pagesperminute", sb.peers.mySeed().getPPM());
            prop.putNum("peerStatistics_queriesperhour", Math.round(6000d * sb.peers.mySeed().getQPM()) / 100d);
            prop.putNum("peerStatistics_links", sb.peers.mySeed().getLinkCount());
            prop.put("peerStatistics_words", Formatter.number(sb.peers.mySeed().getWordCount()));
            prop.putNum("peerStatistics_disconnects", sb.peers.peerActions.disconnects);
            prop.put("peerStatistics_connects", Formatter.number(sb.peers.mySeed().get(yacySeed.CCOUNT, "0")));
            thisHash = sb.peers.mySeed().hash;
            if (sb.peers.mySeed().getPublicAddress() == null) {
                prop.put("peerAddress", "0"); // not assigned + instructions
                prop.put("warningGoOnline", "1");
            } else {
                prop.put("peerAddress", "1"); // Address
                prop.put("peerAddress_address", sb.peers.mySeed().getPublicAddress());
                prop.putXML("peerAddress_peername", sb.peers.mySeed().getName().toLowerCase());
            }
        }
        final String peerStatus = ((sb.peers.mySeed() == null) ? yacySeed.PEERTYPE_VIRGIN : sb.peers.mySeed().get(yacySeed.PEERTYPE, yacySeed.PEERTYPE_VIRGIN));
        if (yacySeed.PEERTYPE_VIRGIN.equals(peerStatus) && "freeworld".equals(sb.getConfig(SwitchboardConstants.NETWORK_NAME, ""))) {
            prop.put(PEERSTATUS, "0");
            prop.put("urgentStatusVirgin", "1");
        } else if (yacySeed.PEERTYPE_JUNIOR.equals(peerStatus) && "freeworld".equals(sb.getConfig(SwitchboardConstants.NETWORK_NAME, ""))) {
            prop.put(PEERSTATUS, "1");
            prop.put("warningStatusJunior", "1");
        } else if (yacySeed.PEERTYPE_SENIOR.equals(peerStatus)) {
            prop.put(PEERSTATUS, "2");
            prop.put("hintStatusSenior", "1");
        } else if (yacySeed.PEERTYPE_PRINCIPAL.equals(peerStatus)) {
            prop.put(PEERSTATUS, "3");
            prop.put("hintStatusPrincipal", "1");
            prop.put("hintStatusPrincipal_seedURL", sb.peers.mySeed().get(yacySeed.SEEDLISTURL, "?"));
        }
        prop.putHTML("peerName", thisName);
        prop.put("hash", thisHash);
       
        final String seedUploadMethod = sb.getConfig("seedUploadMethod", "");
        if (!"none".equalsIgnoreCase(seedUploadMethod) ||
            ("".equals(seedUploadMethod) && (sb.getConfig("seedFTPPassword", "").length() > 0 ||
            sb.getConfig("seedFilePath", "").length() > 0))) {
            if ("".equals(seedUploadMethod)) {
                if (sb.getConfig("seedFTPPassword", "").length() > 0) {
                    sb.setConfig("seedUploadMethod","Ftp");
                }
                if (sb.getConfig("seedFilePath", "").length() > 0) {
                    sb.setConfig("seedUploadMethod","File");
                }
            }

            if ("ftp".equalsIgnoreCase(seedUploadMethod)) {
                prop.put(SEEDSERVER, "1"); // enabled
                prop.putHTML("seedServer_seedServer", sb.getConfig("seedFTPServer", ""));
            } else if ("scp".equalsIgnoreCase(seedUploadMethod)) {
                prop.put(SEEDSERVER, "1"); // enabled
                prop.putHTML("seedServer_seedServer", sb.getConfig("seedScpServer", ""));
            } else if ("file".equalsIgnoreCase(seedUploadMethod)) {
                prop.put(SEEDSERVER, "2"); // enabled
                prop.putHTML("seedServer_seedFile", sb.getConfig("seedFilePath", ""));
            }
            prop.put("seedServer_lastUpload",
                    yacyPeerActions.formatInterval(System.currentTimeMillis() - sb.peers.lastSeedUpload_timeStamp));
        } else {
            prop.put(SEEDSERVER, "0"); // disabled
        }
       
        if (sb.peers != null && sb.peers.sizeConnected() > 0){
            prop.put("otherPeers", "1");
            prop.putNum("otherPeers_num", sb.peers.sizeConnected());
        }else{
            prop.put("otherPeers", "0"); // not online
        }

        if (!sb.getConfigBool("browserPopUpTrigger", false)) {
            prop.put("popup", "0");
        } else {
            prop.put("popup", "1");
        }
       
        if (!OS.isWindows) {
          prop.put("tray", "2");
        } else if (!sb.getConfigBool("trayIcon", false)) {
            prop.put("tray", "0");
        } else {
            prop.put("tray", "1");
        }

        // memory usage and system attributes
        prop.put("freeMemory", Formatter.bytesToString(MemoryControl.free()));
        prop.put("totalMemory", Formatter.bytesToString(MemoryControl.total()));
        prop.put("maxMemory", Formatter.bytesToString(MemoryControl.maxMemory));
        prop.put("processors", WorkflowProcessor.availableCPU);

        // proxy traffic
        //prop.put("trafficIn",bytesToString(httpdByteCountInputStream.getGlobalCount()));
        prop.put("trafficProxy", Formatter.bytesToString(ByteCount.getAccountCount(ByteCount.PROXY)));
        prop.put("trafficCrawler", Formatter.bytesToString(ByteCount.getAccountCount(ByteCount.CRAWLER)));

        // connection information
        final serverCore httpd = (serverCore) sb.getThread("10_httpd");
        prop.putNum("connectionsActive", httpd.getJobCount());
        prop.putNum("connectionsMax", httpd.getMaxSessionCount());
       
        // Queue information
        final int loaderJobCount = sb.crawlQueues.workerSize();
        final int loaderMaxCount = sb.getConfigInt(SwitchboardConstants.CRAWLER_THREADS_ACTIVE_MAX, 10);
        final int loaderPercent = (loaderMaxCount == 0) ? 0 : loaderJobCount * 100 / loaderMaxCount;
        prop.putNum("loaderQueueSize", loaderJobCount);
        prop.putNum("loaderQueueMax", loaderMaxCount);       
        prop.put("loaderQueuePercent", (loaderPercent>100) ? 100 : loaderPercent);
       
        prop.putNum("localCrawlQueueSize", sb.getThread(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL).getJobCount());
        prop.put("localCrawlPaused",sb.crawlJobIsPaused(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL) ? "1" : "0");

        prop.putNum("remoteTriggeredCrawlQueueSize", sb.getThread(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL).getJobCount());
        prop.put("remoteTriggeredCrawlPaused",sb.crawlJobIsPaused(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL) ? "1" : "0");
       
        prop.putNum("stackCrawlQueueSize", sb.crawlStacker.size());

        // return rewrite properties
        prop.put("date",(new Date()).toString());
View Full Code Here

     * this is used to respond on a remote crawling request
     */

    public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
        // return variable that accumulates replacements
        final Switchboard sb = (Switchboard) env;
        final serverObjects prop = new serverObjects();
        if ((post == null) || (env == null) || !yacyNetwork.authentifyRequest(post, env)) {
            return prop;
        }
       
        final Log log = sb.getLog();

        //int proxyPrefetchDepth = Integer.parseInt(env.getConfig("proxyPrefetchDepth", "0"));
        //int crawlingDepth = Integer.parseInt(env.getConfig("crawlingDepth", "0"));

        // request values
        final String iam        = post.get("iam", "");      // seed hash of requester
        final String youare     = post.get("youare", "");    // seed hash of the target peer, needed for network stability
        //String process    = post.get("process", "");  // process type
        final String key        = post.get("key", "");      // transmission key
        //String receivedUrlhash    = post.get("urlhash", "");  // the url hash that has been crawled
        final String result     = post.get("result", "");   // the result; either "ok" or "fail"
        final String reason     = post.get("reason", "");   // the reason for that result
        //String words      = post.get("wordh", "");    // priority word hashes
        final String propStr    = crypt.simpleDecode(post.get("lurlEntry", ""), key);
       
        /*
         the result can have one of the following values:
         negative cases, no retry
           unavailable - the resource is not available (a broken link); not found or interrupted
           exception   - an exception occurred
           robot       - a robot-file has denied to crawl that resource

         negative cases, retry possible
           rejected    - the peer has rejected to load the resource
           dequeue     - peer too busy - rejected to crawl
        
         positive cases with crawling
           fill        - the resource was loaded and processed
           update      - the resource was already in database but re-loaded and processed
  
         positive cases without crawling  
           known       - the resource is already in database, believed to be fresh and not reloaded
           stale       - the resource was reloaded but not processed because source had no changes

        */
       
        final yacySeed otherPeer = sb.peers.get(iam);
        final String otherPeerName = iam + ":" + ((otherPeer == null) ? "NULL" : (otherPeer.getName() + "/" + otherPeer.getVersion()));       

        if ((sb.peers.mySeed() == null) || (!(sb.peers.mySeed().hash.equals(youare)))) {
            // no yacy connection / unknown peers
            prop.put("delay", "3600");
            return prop;
        }
       
        if (propStr == null) {
            // error with url / wrong key
            prop.put("delay", "3600");
            return prop;
        }
       
        if ((sb.isRobinsonMode()) && (!sb.isInMyCluster(otherPeer))) {
          // we reject urls that are from outside our cluster
          prop.put("delay", "9999");
          return prop;
      }
       
View Full Code Here

public final class transferRWI {

    public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) throws InterruptedException {
       
        // return variable that accumulates replacements
        final Switchboard sb = (Switchboard) env;
        final serverObjects prop = new serverObjects();
        final String contentType = header.getContentType();
        if ((post == null) || (env == null)) {
            logWarning(contentType, "post or env is null!");
            return prop;
        }
        if (!yacyNetwork.authentifyRequest(post, env)) {
            logWarning(contentType, "not authentified");
            return prop;
        }
        if (!post.containsKey("wordc")) {
            logWarning(contentType, "missing wordc");
            return prop;
        }
        if (!post.containsKey("entryc")) {
            logWarning(contentType, "missing entryc");
            return prop;
        }
       
        // request values
        final String iam      = post.get("iam", "");                      // seed hash of requester
        final String youare   = post.get("youare", "");                   // seed hash of the target peer, needed for network stability
//      final String key      = (String) post.get("key", "");             // transmission key
        final int wordc       = post.getInt("wordc", 0);                  // number of different words
        final int entryc      = post.getInt("entryc", 0);                 // number of entries in indexes
        byte[] indexes        = post.get("indexes", "").getBytes();       // the indexes, as list of word entries
        boolean granted       = sb.getConfigBool("allowReceiveIndex", false);
        final boolean blockBlacklist = sb.getConfigBool("indexReceiveBlockBlacklist", false);
        final long cachelimit = sb.getConfigLong(SwitchboardConstants.WORDCACHE_MAX_COUNT, 100000);
        final yacySeed otherPeer = sb.peers.get(iam);
        final String otherPeerName = iam + ":" + ((otherPeer == null) ? "NULL" : (otherPeer.getName() + "/" + otherPeer.getVersion()));               
       
        // response values
        int pause = 0;
        String result = "ok";
        final StringBuilder unknownURLs = new StringBuilder(6000);
       
        if ((youare == null) || (!youare.equals(sb.peers.mySeed().hash))) {
          sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". Wrong target. Wanted peer=" + youare + ", iam=" + sb.peers.mySeed().hash);
            result = "wrong_target";
            pause = 0;
        } else if (otherPeer == null) {
            // we dont want to receive indexes
            sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". Not granted. Other Peer is unknown");
            result = "not_granted";
            pause = 60000;
        } else if (!granted) {
            // we dont want to receive indexes
            sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". Granted is false");
            result = "not_granted";
            pause = 60000;
        } else if (sb.isRobinsonMode()) {
            // we dont want to receive indexes
            sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". Not granted. This peer is in robinson mode");
            result = "not_granted";
            pause = 60000;
        } else if (sb.indexSegments.termIndex(Segments.Process.DHTIN).getBufferSize() > cachelimit) {
            // we are too busy to receive indexes
            sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". We are too busy (buffersize=" + sb.indexSegments.termIndex(Segments.Process.DHTIN).getBufferSize() + ").");
            granted = false; // don't accept more words if there are too many words to flush
            result = "busy";
            pause = 60000;
        } else if (otherPeer.getVersion() < 0.75005845 && otherPeer.getVersion() >= 0.75005821) {
          // version that sends [B@... hashes
            sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". Bad version.");
            result = "not_granted";
            pause = 1800000;
        } else {
            // we want and can receive indexes
            // log value status (currently added to find outOfMemory error
            if (sb.getLog().isFine()) sb.getLog().logFine("Processing " + indexes.length + " bytes / " + wordc + " words / " + entryc + " entries from " + otherPeerName);
            final long startProcess = System.currentTimeMillis();

            // decode request
            System.out.println("STRINGS " + UTF8.String(indexes));
            Iterator<String> it = FileUtils.strings(indexes);

            // free memory
            indexes = null;

            // now parse the Strings in the value-vector and write index entries
            String estring;
            int p;
            String wordHash;
            byte[] urlHash;
            WordReferenceRow iEntry;
            final HandleSet unknownURL = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0);
            final HandleSet knownURL = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0);
            final ArrayList<String> wordhashes = new ArrayList<String>();
            int received = 0;
            int blocked = 0;
            int receivedURL = 0;
            while (it.hasNext()) {
                serverCore.checkInterruption();
                estring = it.next();
               
                // check if RWI entry is well-formed
                p = estring.indexOf('{');
                if ((p < 0) || (estring.indexOf("x=") < 0) || !(estring.indexOf("[B@") < 0)) {
                    blocked++;
                    continue;
                }
                wordHash = estring.substring(0, p);
                wordhashes.add(wordHash);
                iEntry = new WordReferenceRow(estring.substring(p));
                urlHash = iEntry.urlhash();
               
                // block blacklisted entries
                if ((blockBlacklist) && (Switchboard.urlBlacklist.hashInBlacklistedCache(Blacklist.BLACKLIST_DHT, urlHash))) {
                    if (yacyCore.log.isFine()) yacyCore.log.logFine("transferRWI: blocked blacklisted URLHash '" + ASCII.String(urlHash) + "' from peer " + otherPeerName);
                    blocked++;
                    continue;
                }
               
                // check if the entry is in our network domain
                final String urlRejectReason = sb.crawlStacker.urlInAcceptedDomainHash(urlHash);
                if (urlRejectReason != null) {
                    yacyCore.log.logWarning("transferRWI: blocked URL hash '" + ASCII.String(urlHash) + "' (" + urlRejectReason + ") from peer " + otherPeerName + "; peer is suspected to be a spam-peer (or something is wrong)");
                    //if (yacyCore.log.isFine()) yacyCore.log.logFine("transferRWI: blocked URL hash '" + urlHash + "' (" + urlRejectReason + ") from peer " + otherPeerName);
                    blocked++;
                    continue;
                }
               
                // learn entry
                try {
                    sb.indexSegments.termIndex(Segments.Process.DHTIN).add(wordHash.getBytes(), iEntry);
                } catch (Exception e) {
                    Log.logException(e);
                }
                serverCore.checkInterruption();

                // check if we need to ask for the corresponding URL
                if (!(knownURL.has(urlHash) || unknownURL.has(urlHash)))  try {
                    if (sb.indexSegments.urlMetadata(Segments.Process.DHTIN).exists(urlHash)) {
                        knownURL.put(urlHash);
                    } else {
                        unknownURL.put(urlHash);
                    }
                    receivedURL++;
                } catch (final Exception ex) {
                    sb.getLog().logWarning(
                                "transferRWI: DB-Error while trying to determine if URL with hash '" +
                                ASCII.String(urlHash) + "' is known.", ex);
                }
                received++;
            }
            sb.peers.mySeed().incRI(received);

            // finally compose the unknownURL hash list
            Iterator<byte[]> bit = unknownURL.iterator()
            unknownURLs.ensureCapacity(unknownURL.size() * 25);
            while (bit.hasNext()) {
                unknownURLs.append(",").append(UTF8.String(bit.next()));
            }
            if (unknownURLs.length() > 0) { unknownURLs.delete(0, 1); }
            if (wordhashes.isEmpty() || received == 0) {
                sb.getLog().logInfo("Received 0 RWIs from " + otherPeerName + ", processed in " + (System.currentTimeMillis() - startProcess) + " milliseconds, requesting " + unknownURL.size() + " URLs, blocked " + blocked + " RWIs");
            } else {
                String firstHash = wordhashes.get(0);
                String lastHash = wordhashes.get(wordhashes.size() - 1);
                final long avdist = (FlatWordPartitionScheme.std.dhtDistance(firstHash.getBytes(), null, sb.peers.mySeed()) + FlatWordPartitionScheme.std.dhtDistance(lastHash.getBytes(), null, sb.peers.mySeed())) / 2;
                sb.getLog().logInfo("Received " + received + " RWIs, " + wordc + " Words [" + firstHash + " .. " + lastHash + "]/" + avdist + ", blocked " + blocked + ", requesting " + unknownURL.size() + "/" + receivedURL + " URLs from " + otherPeerName);
                yacyChannel.channels(yacyChannel.DHTRECEIVE).addMessage(new RSSMessage("Received " + received + " RWIs, " + wordc + " Words [" + firstHash + " .. " + lastHash + "]/" + avdist + ", blocked " + blocked + ", requesting " + unknownURL.size() + "/" + receivedURL + " URLs from " + otherPeerName, "", otherPeer.hash));
            }
            result = "ok";
           
            pause = (int) (sb.indexSegments.termIndex(Segments.Process.DHTIN).getBufferSize() * 20000 / sb.getConfigLong(SwitchboardConstants.WORDCACHE_MAX_COUNT, 100000)); // estimation of necessary pause time
        }

        prop.put("unknownURL", unknownURLs.toString());
        prop.put("result", result);
        prop.put("pause", pause);
View Full Code Here

TOP

Related Classes of de.anomic.search.Switchboard$indexingQueueEntry

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.