Examples of net.yacy.kelondro.logging.Log

net.yacy.kelondro.logging.Log

        final serverObjects prop = new serverObjects();
        if ((post == null) || (env == null) || !yacyNetwork.authentifyRequest(post, env)) {
            return prop;
        }
        
        final Log log = sb.getLog();


        //int proxyPrefetchDepth = Integer.parseInt(env.getConfig("proxyPrefetchDepth", "0"));
        //int crawlingDepth = Integer.parseInt(env.getConfig("crawlingDepth", "0"));


        // request values
        final String iam        = post.get("iam", "");      // seed hash of requester
        final String youare     = post.get("youare", "");    // seed hash of the target peer, needed for network stability
        //String process    = post.get("process", "");  // process type
        final String key        = post.get("key", "");      // transmission key
        //String receivedUrlhash    = post.get("urlhash", "");  // the url hash that has been crawled
        final String result     = post.get("result", "");   // the result; either "ok" or "fail"
        final String reason     = post.get("reason", "");   // the reason for that result
        //String words      = post.get("wordh", "");    // priority word hashes
        final String propStr    = crypt.simpleDecode(post.get("lurlEntry", ""), key);
        
        /*
         the result can have one of the following values:
         negative cases, no retry
           unavailable - the resource is not available (a broken link); not found or interrupted
           exception   - an exception occurred
           robot       - a robot-file has denied to crawl that resource


         negative cases, retry possible
           rejected    - the peer has rejected to load the resource
           dequeue     - peer too busy - rejected to crawl
         
         positive cases with crawling
           fill        - the resource was loaded and processed
           update      - the resource was already in database but re-loaded and processed
   
         positive cases without crawling   
           known       - the resource is already in database, believed to be fresh and not reloaded
           stale       - the resource was reloaded but not processed because source had no changes


        */
        
        final yacySeed otherPeer = sb.peers.get(iam);
        final String otherPeerName = iam + ":" + ((otherPeer == null) ? "NULL" : (otherPeer.getName() + "/" + otherPeer.getVersion()));        


        if ((sb.peers.mySeed() == null) || (!(sb.peers.mySeed().hash.equals(youare)))) {
            // no yacy connection / unknown peers
            prop.put("delay", "3600");
            return prop;
        }
        
        if (propStr == null) {
            // error with url / wrong key
            prop.put("delay", "3600");
            return prop;
        }
        
        if ((sb.isRobinsonMode()) && (!sb.isInMyCluster(otherPeer))) {
          // we reject urls that are from outside our cluster
          prop.put("delay", "9999");
          return prop;
      }
        
        // generating a new loaded URL entry
        final URIMetadataRow entry = URIMetadataRow.importEntry(propStr);
        if (entry == null) {
            if (log.isWarning()) log.logWarning("crawlReceipt: RECEIVED wrong RECEIPT (entry null) from peer " + iam + "\n\tURL properties: "+ propStr);
            prop.put("delay", "3600");
            return prop;
        }
        
        final URIMetadataRow.Components metadata = entry.metadata();
        if (metadata.url() == null) {
            if (log.isWarning()) log.logWarning("crawlReceipt: RECEIVED wrong RECEIPT (url null) for hash " + ASCII.String(entry.hash()) + " from peer " + iam + "\n\tURL properties: "+ propStr);
            prop.put("delay", "3600");
            return prop;
        }
        
        // check if the entry is in our network domain
        final String urlRejectReason = sb.crawlStacker.urlInAcceptedDomain(metadata.url());
        if (urlRejectReason != null) {
            if (log.isWarning()) log.logWarning("crawlReceipt: RECEIVED wrong RECEIPT (" + urlRejectReason + ") for hash " + ASCII.String(entry.hash()) + " from peer " + iam + "\n\tURL properties: "+ propStr);
            prop.put("delay", "9999");
            return prop;
        }
        
        if ("fill".equals(result)) try {
            // put new entry into database
            sb.indexSegments.urlMetadata(Segments.Process.RECEIPTS).store(entry);
            ResultURLs.stack(entry, youare.getBytes(), iam.getBytes(), EventOrigin.REMOTE_RECEIPTS);
            sb.crawlQueues.delegatedURL.remove(entry.hash()); // the delegated work has been done
            if (log.isInfo()) log.logInfo("crawlReceipt: RECEIVED RECEIPT from " + otherPeerName + " for URL " + ASCII.String(entry.hash()) + ":" + metadata.url().toNormalform(false, true));


            // ready for more
            prop.put("delay", "10");
            return prop;
        } catch (final IOException e) {

View Full Code Here

            final int timeout
            ) {
        this.transmissionCloud = new ConcurrentHashMap<ByteArray, Transmission.Chunk>();
        this.segment = segment;
        this.seeds = seeds;
        this.log = new Log("INDEX-TRANSFER-DISPATCHER");
        this.transmission = new Transmission(
            this.log,
            segment,
            seeds,
            gzipBody,

View Full Code Here

        final List<String> processArgs = new ArrayList<String>();
        processArgs.add("java");
        processArgs.add("-Xms4m");
        processArgs.add("-Xmx" + Integer.toString(mem) + "m");
        try {
        line = ConsoleInterface.getLastLineConsoleOutput(processArgs, new Log("MEMCHECK"));
    } catch (final IOException e) {
      return false;
    }
    return (line.indexOf("space for object heap") > -1) ? false : true;
  }

View Full Code Here


    static final ThreadGroup workerThreadGroup = new ThreadGroup("workerThreadGroup");
    
    
    public DocumentIndex(final File segmentPath, CallbackListener callback, int cachesize) throws IOException {
        super(new Log("DocumentIndex"), segmentPath, cachesize, targetFileSize * 4 - 1, false, false);
        int cores = Runtime.getRuntime().availableProcessors() + 1;
        this.callback = callback;
        this.queue = new LinkedBlockingQueue<DigestURI>(cores * 300);
        this.worker = new Worker[cores];
        for (int i = 0; i < cores; i++) {

View Full Code Here

     * Damaged URL-Entries will be marked in a HashSet and removed at the end of the function.
     * 
     * @param proxyConfig 
     */
    public void deadlinkCleaner() {
        final Log log = new Log("URLDBCLEANUP");
        final HashSet<String> damagedURLS = new HashSet<String>();
        try {
            final Iterator<URIMetadataRow> eiter = entries(true, null);
            int iteratorCount = 0;
            while (eiter.hasNext()) try {
                eiter.next();
                iteratorCount++;
            } catch (final RuntimeException e) {
                if(e.getMessage() != null) {
                    final String m = e.getMessage();
                    damagedURLS.add(m.substring(m.length() - 12));
                } else {
                    log.logSevere("RuntimeException:", e);
                }
            }
            log.logInfo("URLs vorher: " + urlIndexFile.size() + " Entries loaded during Iteratorloop: " + iteratorCount + " kaputte URLs: " + damagedURLS.size());


            final HTTPClient client = new HTTPClient();
            final Iterator<String> eiter2 = damagedURLS.iterator();
            byte[] urlHashBytes;
            while (eiter2.hasNext()) {
                urlHashBytes = ASCII.getBytes(eiter2.next());


                // trying to fix the invalid URL
                String oldUrlStr = null;
                try {
                    // getting the url data as byte array
                    final Row.Entry entry = urlIndexFile.get(urlHashBytes);


                    // getting the wrong url string
                    oldUrlStr = entry.getColString(1).trim();


                    int pos = -1;
                    if ((pos = oldUrlStr.indexOf("://")) != -1) {
                        // trying to correct the url
                        final String newUrlStr = "http://" + oldUrlStr.substring(pos + 3);
                        final DigestURI newUrl = new DigestURI(newUrlStr);


                        if (client.HEADResponse(newUrl.toString()) != null
                            && client.getHttpResponse().getStatusLine().getStatusCode() == 200) {
                            entry.setCol(1, UTF8.getBytes(newUrl.toString()));
                            urlIndexFile.put(entry);
                            if (log.isInfo()) log.logInfo("UrlDB-Entry with urlHash '" + ASCII.String(urlHashBytes) + "' corrected\n\tURL: " + oldUrlStr + " -> " + newUrlStr);
                        } else {
                            remove(urlHashBytes);
                            if (log.isInfo()) log.logInfo("UrlDB-Entry with urlHash '" + ASCII.String(urlHashBytes) + "' removed\n\tURL: " + oldUrlStr + "\n\tConnection Status: " + (client.getHttpResponse() == null ? "null" : client.getHttpResponse().getStatusLine()));
                        }
                    }
                } catch (final Exception e) {
                    remove(urlHashBytes);
                    if (log.isInfo()) log.logInfo("UrlDB-Entry with urlHash '" + ASCII.String(urlHashBytes) + "' removed\n\tURL: " + oldUrlStr + "\n\tExecption: " + e.getMessage());
                }
            }


            log.logInfo("URLs nachher: " + size() + " kaputte URLs: " + damagedURLS.size());
        } catch (final IOException e) {
            log.logSevere("IOException", e);
        }
    }

View Full Code Here

        this.denyHost = (blockAttack) ? new ConcurrentHashMap<String, String>() : null;
        this.handlerPrototype = handlerPrototype;
        this.switchboard = switchboard;
        
        // initialize logger
        this.log = new Log("SERVER");


        // init the ssl socket factory
        this.sslSocketFactory = initSSLFactory();


        // init session parameter

View Full Code Here

        super(dataPath, appPath, initPath, configPath);
        MemoryTracker.startSystemProfiling();
        sb = this;


        // set loglevel and log
        setLog(new Log("PLASMA"));


        // set default peer name
        yacySeed.ANON_PREFIX = getConfig("peernameprefix", "_anon");


        // UPnP port mapping

View Full Code Here

        thread.start();
        return thread;
    }
    
    public static WorkflowThread oneTimeJob(final Runnable thread, final long startupDelay) {
        final Log log = new Log(thread.getClass().getName() + "/run");
        log.setLevel(java.util.logging.Level.INFO);
        return oneTimeJob(thread, "run", log, startupDelay);
    }

View Full Code Here

    public  NoticedURL noticeURL;
    public  ZURL errorURL, delegatedURL;


    public CrawlQueues(final Switchboard sb, final File queuePath) {
        this.sb = sb;
        this.log = new Log("CRAWLER");
        this.workers = new ConcurrentHashMap<Integer, Loader>();
        this.remoteCrawlProviderHashes = new ArrayList<String>();


        // start crawling management
        this.log.logConfig("Starting Crawling Management");

View Full Code Here

    public static void minimizeUrlDB(final File dataHome, final File appHome, final String networkName) {
        // run with "java -classpath classes yacy -minimizeUrlDB"
        try {Log.configureLogging(dataHome, appHome, new File(dataHome, "DATA/LOG/yacy.logging"));} catch (final Exception e) {}
        final File indexPrimaryRoot = new File(dataHome, "DATA/INDEX");
        final File indexRoot2 = new File(dataHome, "DATA/INDEX2");
        final Log log = new Log("URL-CLEANUP");
        try {
            log.logInfo("STARTING URL CLEANUP");


            // db containing all currently loades urls
            final MetadataRepository currentUrlDB = new MetadataRepository(new File(new File(indexPrimaryRoot, networkName), "TEXT"), "text.urlmd", false, false);


            // db used to hold all neede urls
            final MetadataRepository minimizedUrlDB = new MetadataRepository(new File(new File(indexRoot2, networkName), "TEXT"), "text.urlmd", false, false);


            final int cacheMem = (int)(MemoryControl.maxMemory - MemoryControl.total());
            if (cacheMem < 2048000) throw new OutOfMemoryError("Not enough memory available to start clean up.");


            final Segment wordIndex = new Segment(
                    log,
                    new File(new File(indexPrimaryRoot, "freeworld"), "TEXT"),
                    10000,
                    (long) Integer.MAX_VALUE, false, false);
            final Iterator<ReferenceContainer<WordReference>> indexContainerIterator = wordIndex.termIndex().references("AAAAAAAAAAAA".getBytes(), false, false);


            long urlCounter = 0, wordCounter = 0;
            long wordChunkStart = System.currentTimeMillis(), wordChunkEnd = 0;
            String wordChunkStartHash = "AAAAAAAAAAAA", wordChunkEndHash;


            while (indexContainerIterator.hasNext()) {
                ReferenceContainer<WordReference> wordIdxContainer = null;
                try {
                    wordCounter++;
                    wordIdxContainer = indexContainerIterator.next();


                    // the combined container will fit, read the container
                    final Iterator<WordReference> wordIdxEntries = wordIdxContainer.entries();
                    Reference iEntry;
                    while (wordIdxEntries.hasNext()) {
                        iEntry = wordIdxEntries.next();
                        final byte[] urlHash = iEntry.urlhash();
                        if ((currentUrlDB.exists(urlHash)) && (!minimizedUrlDB.exists(urlHash))) try {
                            final URIMetadataRow urlEntry = currentUrlDB.load(urlHash);
                            urlCounter++;
                            minimizedUrlDB.store(urlEntry);
                            if (urlCounter % 500 == 0) {
                                log.logInfo(urlCounter + " URLs found so far.");
                            }
                        } catch (final IOException e) {}
                    }


                    if (wordCounter%500 == 0) {
                        wordChunkEndHash = ASCII.String(wordIdxContainer.getTermHash());
                        wordChunkEnd = System.currentTimeMillis();
                        final long duration = wordChunkEnd - wordChunkStart;
                        log.logInfo(wordCounter + " words scanned " +
                                "[" + wordChunkStartHash + " .. " + wordChunkEndHash + "]\n" +
                                "Duration: "+ 500*1000/duration + " words/s" +
                                " | Free memory: " + MemoryControl.free() +
                                " | Total memory: " + MemoryControl.total());
                        wordChunkStart = wordChunkEnd;
                        wordChunkStartHash = wordChunkEndHash;
                    }


                    // we have read all elements, now we can close it
                    wordIdxContainer = null;


                } catch (final Exception e) {
                    log.logSevere("Exception", e);
                } finally {
                    if (wordIdxContainer != null) try { wordIdxContainer = null; } catch (final Exception e) {}
                }
            }
            log.logInfo("current LURL DB contains " + currentUrlDB.size() + " entries.");
            log.logInfo("mimimized LURL DB contains " + minimizedUrlDB.size() + " entries.");


            currentUrlDB.close();
            minimizedUrlDB.close();
            wordIndex.close();


            // TODO: rename the mimimized UrlDB to the name of the previous UrlDB


            log.logInfo("FINISHED URL CLEANUP, WAIT FOR DUMP");
            log.logInfo("You can now backup your old URL DB and rename minimized/urlHash.db to urlHash.db");


            log.logInfo("TERMINATED URL CLEANUP");
        } catch (final Exception e) {
            log.logSevere("Exception: " + e.getMessage(), e);
        } catch (final Error e) {
            log.logSevere("Error: " + e.getMessage(), e);
        }
    }

View Full Code Here

0 1 2

TOP

Related Classes of net.yacy.kelondro.logging.Log

crawlReceipt

de.anomic.crawler.CrawlQueues

de.anomic.search.DocumentIndex

de.anomic.search.MetadataRepository

de.anomic.search.Switchboard

de.anomic.server.serverCore

de.anomic.yacy.dht.Dispatcher

net.yacy.kelondro.util.OS

net.yacy.kelondro.workflow.InstantBusyThread

net.yacy.peers.dht.Dispatcher

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.