Examples of net.yacy.kelondro.logging.Log

net.yacy.kelondro.logging.Log

        currentUrlDB.close();
    }


    private static void RWIHashList(final File dataHome, final File appHome, final String targetName, final String resource, final String format) {
        Segment WordIndex = null;
        final Log log = new Log("HASHLIST");
        final File indexPrimaryRoot = new File(dataHome, "DATA/INDEX");
        final String wordChunkStartHash = "AAAAAAAAAAAA";
        try {Log.configureLogging(dataHome, appHome, new File(dataHome, "DATA/LOG/yacy.logging"));} catch (final Exception e) {}
        log.logInfo("STARTING CREATION OF RWI-HASHLIST");
        final File root = dataHome;
        try {
            Iterator<ReferenceContainer<WordReference>> indexContainerIterator = null;
            if (resource.equals("all")) {
                WordIndex = new Segment(
                        log,
                        new File(new File(indexPrimaryRoot, "freeworld"), "TEXT"),
                        10000,
                        (long) Integer.MAX_VALUE, false, false);
                indexContainerIterator = WordIndex.termIndex().references(wordChunkStartHash.getBytes(), false, false);
            }
            int counter = 0;
            ReferenceContainer<WordReference> container = null;
            if (format.equals("zip")) {
                log.logInfo("Writing Hashlist to ZIP-file: " + targetName + ".zip");
                final ZipEntry zipEntry = new ZipEntry(targetName + ".txt");
                final File file = new File(root, targetName + ".zip");
                final ZipOutputStream bos = new ZipOutputStream(new FileOutputStream(file));
                bos.putNextEntry(zipEntry);
                if(indexContainerIterator != null) {
                    while (indexContainerIterator.hasNext()) {
                        counter++;
                        container = indexContainerIterator.next();
                        bos.write(container.getTermHash());
                        bos.write(serverCore.CRLF);
                        if (counter % 500 == 0) {
                            log.logInfo("Found " + counter + " Hashs until now. Last found Hash: " + ASCII.String(container.getTermHash()));
                        }
                    }
                }
                bos.flush();
                bos.close();
            } else {
                log.logInfo("Writing Hashlist to TXT-file: " + targetName + ".txt");
                final File file = new File(root, targetName + ".txt");
                final BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(file));
                if(indexContainerIterator != null) {
                    while (indexContainerIterator.hasNext()) {
                        counter++;
                        container = indexContainerIterator.next();
                        bos.write(container.getTermHash());
                        bos.write(serverCore.CRLF);
                        if (counter % 500 == 0) {
                            log.logInfo("Found " + counter + " Hashs until now. Last found Hash: " + ASCII.String(container.getTermHash()));
                        }
                    }
                }
                bos.flush();
                bos.close();
            }
            log.logInfo("Total number of Hashs: " + counter + ". Last found Hash: " + (container == null ? "null" : ASCII.String(container.getTermHash())));
        } catch (final IOException e) {
            log.logSevere("IOException", e);
        }
        if (WordIndex != null) {
            WordIndex.close();
            WordIndex = null;
        }

View Full Code Here

    public LoaderDispatcher(final Switchboard sb) {
        this.sb = sb;
        this.supportedProtocols = new HashSet<String>(Arrays.asList(new String[]{"http","https","ftp","smb","file"}));


        // initiate loader objects
        this.log = new Log("LOADER");
        this.httpLoader = new HTTPLoader(sb, this.log);
        this.ftpLoader = new FTPLoader(sb, this.log);
        this.smbLoader = new SMBLoader(sb, this.log);
        this.fileLoader = new FileLoader(sb, this.log);
        this.loaderSteering = new HashMap<String, Semaphore>();

View Full Code Here

        final serverObjects prop = new serverObjects();
        if ((post == null) || (env == null) || !Protocol.authentifyRequest(post, env)) {
            return prop;
        }


        final Log log = sb.getLog();


        //int proxyPrefetchDepth = Integer.parseInt(env.getConfig("proxyPrefetchDepth", "0"));
        //int crawlingDepth = Integer.parseInt(env.getConfig("crawlingDepth", "0"));


        // request values
        final String iam        = post.get("iam", "");      // seed hash of requester
        final String youare     = post.get("youare", "");    // seed hash of the target peer, needed for network stability
        //String process    = post.get("process", "");  // process type
        final String key        = post.get("key", "");      // transmission key
        //String receivedUrlhash    = post.get("urlhash", "");  // the url hash that has been crawled
        final String result     = post.get("result", "");   // the result; either "ok" or "fail"
        final String reason     = post.get("reason", "");   // the reason for that result
        //String words      = post.get("wordh", "");    // priority word hashes
        final String propStr    = crypt.simpleDecode(post.get("lurlEntry", ""), key);


        /*
         the result can have one of the following values:
         negative cases, no retry
           unavailable - the resource is not available (a broken link); not found or interrupted
           exception   - an exception occurred
           robot       - a robot-file has denied to crawl that resource


         negative cases, retry possible
           rejected    - the peer has rejected to load the resource
           dequeue     - peer too busy - rejected to crawl


         positive cases with crawling
           fill        - the resource was loaded and processed
           update      - the resource was already in database but re-loaded and processed


         positive cases without crawling
           known       - the resource is already in database, believed to be fresh and not reloaded
           stale       - the resource was reloaded but not processed because source had no changes


        */


        final Seed otherPeer = sb.peers.get(iam);
        final String otherPeerName = iam + ":" + ((otherPeer == null) ? "NULL" : (otherPeer.getName() + "/" + otherPeer.getVersion()));


        if ((sb.peers.mySeed() == null) || (!(sb.peers.mySeed().hash.equals(youare)))) {
            // no yacy connection / unknown peers
            prop.put("delay", "3600");
            return prop;
        }


        if (propStr == null) {
            // error with url / wrong key
            prop.put("delay", "3600");
            return prop;
        }


        if ((sb.isRobinsonMode()) && (!sb.isInMyCluster(otherPeer))) {
          // we reject urls that are from outside our cluster
          prop.put("delay", "9999");
          return prop;
      }


        // generating a new loaded URL entry
        final URIMetadataRow entry = URIMetadataRow.importEntry(propStr);
        if (entry == null) {
            if (log.isWarning()) log.logWarning("crawlReceipt: RECEIVED wrong RECEIPT (entry null) from peer " + iam + "\n\tURL properties: "+ propStr);
            prop.put("delay", "3600");
            return prop;
        }


        final URIMetadataRow.Components metadata = entry.metadata();
        if (metadata.url() == null) {
            if (log.isWarning()) log.logWarning("crawlReceipt: RECEIVED wrong RECEIPT (url null) for hash " + ASCII.String(entry.hash()) + " from peer " + iam + "\n\tURL properties: "+ propStr);
            prop.put("delay", "3600");
            return prop;
        }


        // check if the entry is in our network domain
        final String urlRejectReason = sb.crawlStacker.urlInAcceptedDomain(metadata.url());
        if (urlRejectReason != null) {
            if (log.isWarning()) log.logWarning("crawlReceipt: RECEIVED wrong RECEIPT (" + urlRejectReason + ") for hash " + ASCII.String(entry.hash()) + " from peer " + iam + "\n\tURL properties: "+ propStr);
            prop.put("delay", "9999");
            return prop;
        }


        if ("fill".equals(result)) try {
            // put new entry into database
            sb.indexSegments.urlMetadata(Segments.Process.RECEIPTS).store(entry);
            ResultURLs.stack(entry, youare.getBytes(), iam.getBytes(), EventOrigin.REMOTE_RECEIPTS);
            sb.crawlQueues.delegatedURL.remove(entry.hash()); // the delegated work has been done
            if (log.isInfo()) log.logInfo("crawlReceipt: RECEIVED RECEIPT from " + otherPeerName + " for URL " + ASCII.String(entry.hash()) + ":" + metadata.url().toNormalform(false, true));


            // ready for more
            prop.put("delay", "10");
            return prop;
        } catch (final IOException e) {

View Full Code Here


public class termlist_p {


    public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {


      final Log log = new Log("TERMLIST");
        final serverObjects prop = new serverObjects();
        final Switchboard sb = (Switchboard) env;
        Segment segment = null;
        final boolean delete = post != null && post.containsKey("delete");
        final long mincount = post == null ? 10000 : post.getLong("mincount", 10000);
        if (post != null && post.containsKey("segment") && sb.verifyAuthentication(header)) {
            segment = sb.indexSegments.segment(post.get("segment"));
        }
        if (segment == null) segment = sb.indexSegments.segment(Segments.Process.PUBLIC);
        final Iterator<Rating<byte[]>> i = segment.termIndex().referenceCountIterator(null, false);
        Rating<byte[]> e;
        int c = 0, termnumber = 0;
        byte[] termhash, maxterm = null;
        long count, mem, maxcount = 0, totalmemory = 0;
        String hstring;
        final Row referenceRow = segment.termIndex().referenceRow();
        final int rowsize = referenceRow.objectsize;
        final ArrayList<byte[]> deleteterms = new ArrayList<byte[]>();
        long over1000 = 0, over10000 = 0, over100000 = 0, over1000000 = 0, over10000000 = 0, over100000000 = 0;
        while (i.hasNext()) {
            e = i.next();
            termnumber++;
            count = e.getScore();
            if (count >= 1000) over1000++;
            if (count >= 10000) over10000++;
            if (count >= 100000) over100000++;
            if (count >= 1000000) over1000000++;
            if (count >= 10000000) over10000000++;
            if (count >= 100000000) over100000000++;
            if (count > maxcount) {
                maxcount = count;
                maxterm = e.getObject();
            }
            if (count < mincount) continue;
            termhash = e.getObject();
            if (delete) deleteterms.add(termhash);
            hstring = ASCII.String(termhash);
            mem = 20 + count * rowsize;
            prop.put("terms_" + c + "_termhash", hstring);
            prop.put("terms_" + c + "_count", count);
            prop.put("terms_" + c + "_memory", mem);
            //log.logWarning("termhash: " + hstring + " | count: " + count + " | memory: " + mem);
            c++;
            totalmemory += mem;
        }
        if (delete) {
            for (final byte[] t: deleteterms) {
                try {
                    segment.termIndex().delete(t);
                } catch (final IOException e1) {
                  log.logWarning("Error deleting " + ASCII.String(t), e1);
                    e1.printStackTrace();
                }
            }
        }
        prop.put("terms", c);
        prop.put("maxterm", maxterm == null ? "" : ASCII.String(maxterm));
        prop.put("maxcount", maxcount);
        prop.put("maxcountmemory", 20 + maxcount * rowsize);
        prop.put("termnumber", termnumber);
        prop.put("totalmemory", totalmemory);
        prop.put("over1000", over1000);
        prop.put("over10000", over10000);
        prop.put("over100000", over100000);
        prop.put("over1000000", over1000000);
        prop.put("over10000000", over10000000);
        prop.put("over100000000", over100000000);


        log.logWarning("finished termlist_p -> terms: " + c);
        log.logWarning("maxterm: "+ (maxterm == null ? "" : ASCII.String(maxterm)));
        log.logWarning("maxcount:  " + maxcount);
        log.logWarning("termnumber: " + termnumber);
        log.logWarning("totalmemory: " + totalmemory);
        // return rewrite properties
        return prop;
    }

View Full Code Here

    public LoaderDispatcher(final Switchboard sb) {
        this.sb = sb;
        this.supportedProtocols = new HashSet<String>(Arrays.asList(new String[]{"http","https","ftp","smb","file"}));


        // initiate loader objects
        this.log = new Log("LOADER");
        this.httpLoader = new HTTPLoader(sb, this.log);
        this.ftpLoader = new FTPLoader(sb, this.log);
        this.smbLoader = new SMBLoader(sb, this.log);
        this.fileLoader = new FileLoader(sb, this.log);
        this.loaderSteering = new ConcurrentHashMap<DigestURI, Semaphore>();

View Full Code Here

        thread.start();
        return thread;
    }


    public static WorkflowThread oneTimeJob(final Runnable thread, final long startupDelay) {
        final Log log = new Log(thread.getClass().getName() + "/run");
        log.setLevel(java.util.logging.Level.INFO);
        return oneTimeJob(thread, "run", log, startupDelay);
    }

View Full Code Here

    public static void minimizeUrlDB(final File dataHome, final File appHome, final String networkName) {
        // run with "java -classpath classes yacy -minimizeUrlDB"
        try {Log.configureLogging(dataHome, appHome, new File(dataHome, "DATA/LOG/yacy.logging"));} catch (final Exception e) {}
        final File indexPrimaryRoot = new File(dataHome, "DATA/INDEX");
        final File indexRoot2 = new File(dataHome, "DATA/INDEX2");
        final Log log = new Log("URL-CLEANUP");
        try {
            log.logInfo("STARTING URL CLEANUP");


            // db containing all currently loades urls
            final MetadataRepository currentUrlDB = new MetadataRepository(new File(new File(indexPrimaryRoot, networkName), "TEXT"), "text.urlmd", false, false);


            // db used to hold all neede urls
            final MetadataRepository minimizedUrlDB = new MetadataRepository(new File(new File(indexRoot2, networkName), "TEXT"), "text.urlmd", false, false);


            final int cacheMem = (int)(MemoryControl.maxMemory() - MemoryControl.total());
            if (cacheMem < 2048000) throw new OutOfMemoryError("Not enough memory available to start clean up.");


            final Segment wordIndex = new Segment(
                    log,
                    new File(new File(indexPrimaryRoot, "freeworld"), "TEXT"),
                    10000,
                    (long) Integer.MAX_VALUE, false, false);
            final Iterator<ReferenceContainer<WordReference>> indexContainerIterator = wordIndex.termIndex().referenceContainerIterator("AAAAAAAAAAAA".getBytes(), false, false);


            long urlCounter = 0, wordCounter = 0;
            long wordChunkStart = System.currentTimeMillis(), wordChunkEnd = 0;
            String wordChunkStartHash = "AAAAAAAAAAAA", wordChunkEndHash;


            while (indexContainerIterator.hasNext()) {
                ReferenceContainer<WordReference> wordIdxContainer = null;
                try {
                    wordCounter++;
                    wordIdxContainer = indexContainerIterator.next();


                    // the combined container will fit, read the container
                    final Iterator<WordReference> wordIdxEntries = wordIdxContainer.entries();
                    Reference iEntry;
                    while (wordIdxEntries.hasNext()) {
                        iEntry = wordIdxEntries.next();
                        final byte[] urlHash = iEntry.urlhash();
                        if ((currentUrlDB.exists(urlHash)) && (!minimizedUrlDB.exists(urlHash))) try {
                            final URIMetadataRow urlEntry = currentUrlDB.load(urlHash);
                            urlCounter++;
                            minimizedUrlDB.store(urlEntry);
                            if (urlCounter % 500 == 0) {
                                log.logInfo(urlCounter + " URLs found so far.");
                            }
                        } catch (final IOException e) {}
                    }


                    if (wordCounter%500 == 0) {
                        wordChunkEndHash = ASCII.String(wordIdxContainer.getTermHash());
                        wordChunkEnd = System.currentTimeMillis();
                        final long duration = wordChunkEnd - wordChunkStart;
                        log.logInfo(wordCounter + " words scanned " +
                                "[" + wordChunkStartHash + " .. " + wordChunkEndHash + "]\n" +
                                "Duration: "+ 500*1000/duration + " words/s" +
                                " | Free memory: " + MemoryControl.free() +
                                " | Total memory: " + MemoryControl.total());
                        wordChunkStart = wordChunkEnd;
                        wordChunkStartHash = wordChunkEndHash;
                    }


                    // we have read all elements, now we can close it
                    wordIdxContainer = null;


                } catch (final Exception e) {
                    log.logSevere("Exception", e);
                } finally {
                    if (wordIdxContainer != null) try { wordIdxContainer = null; } catch (final Exception e) {}
                }
            }
            log.logInfo("current LURL DB contains " + currentUrlDB.size() + " entries.");
            log.logInfo("mimimized LURL DB contains " + minimizedUrlDB.size() + " entries.");


            currentUrlDB.close();
            minimizedUrlDB.close();
            wordIndex.close();


            // TODO: rename the mimimized UrlDB to the name of the previous UrlDB


            log.logInfo("FINISHED URL CLEANUP, WAIT FOR DUMP");
            log.logInfo("You can now backup your old URL DB and rename minimized/urlHash.db to urlHash.db");


            log.logInfo("TERMINATED URL CLEANUP");
        } catch (final Exception e) {
            log.logSevere("Exception: " + e.getMessage(), e);
        } catch (final Error e) {
            log.logSevere("Error: " + e.getMessage(), e);
        }
    }

View Full Code Here

        currentUrlDB.close();
    }


    private static void RWIHashList(final File dataHome, final File appHome, final String targetName, final String resource, final String format) {
        Segment WordIndex = null;
        final Log log = new Log("HASHLIST");
        final File indexPrimaryRoot = new File(dataHome, "DATA/INDEX");
        final String wordChunkStartHash = "AAAAAAAAAAAA";
        try {Log.configureLogging(dataHome, appHome, new File(dataHome, "DATA/LOG/yacy.logging"));} catch (final Exception e) {}
        log.logInfo("STARTING CREATION OF RWI-HASHLIST");
        final File root = dataHome;
        try {
            Iterator<ReferenceContainer<WordReference>> indexContainerIterator = null;
            if (resource.equals("all")) {
                WordIndex = new Segment(
                        log,
                        new File(new File(indexPrimaryRoot, "freeworld"), "TEXT"),
                        10000,
                        (long) Integer.MAX_VALUE, false, false);
                indexContainerIterator = WordIndex.termIndex().referenceContainerIterator(wordChunkStartHash.getBytes(), false, false);
            }
            int counter = 0;
            ReferenceContainer<WordReference> container = null;
            if (format.equals("zip")) {
                log.logInfo("Writing Hashlist to ZIP-file: " + targetName + ".zip");
                final ZipEntry zipEntry = new ZipEntry(targetName + ".txt");
                final File file = new File(root, targetName + ".zip");
                final ZipOutputStream bos = new ZipOutputStream(new FileOutputStream(file));
                bos.putNextEntry(zipEntry);
                if(indexContainerIterator != null) {
                    while (indexContainerIterator.hasNext()) {
                        counter++;
                        container = indexContainerIterator.next();
                        bos.write(container.getTermHash());
                        bos.write(serverCore.CRLF);
                        if (counter % 500 == 0) {
                            log.logInfo("Found " + counter + " Hashs until now. Last found Hash: " + ASCII.String(container.getTermHash()));
                        }
                    }
                }
                bos.flush();
                bos.close();
            } else {
                log.logInfo("Writing Hashlist to TXT-file: " + targetName + ".txt");
                final File file = new File(root, targetName + ".txt");
                final BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(file));
                if(indexContainerIterator != null) {
                    while (indexContainerIterator.hasNext()) {
                        counter++;
                        container = indexContainerIterator.next();
                        bos.write(container.getTermHash());
                        bos.write(serverCore.CRLF);
                        if (counter % 500 == 0) {
                            log.logInfo("Found " + counter + " Hashs until now. Last found Hash: " + ASCII.String(container.getTermHash()));
                        }
                    }
                }
                bos.flush();
                bos.close();
            }
            log.logInfo("Total number of Hashs: " + counter + ". Last found Hash: " + (container == null ? "null" : ASCII.String(container.getTermHash())));
        } catch (final IOException e) {
            log.logSevere("IOException", e);
        }
        if (WordIndex != null) {
            WordIndex.close();
            WordIndex = null;
        }

View Full Code Here

    public  NoticedURL noticeURL;
    public  ZURL errorURL, delegatedURL;


    public CrawlQueues(final Switchboard sb, final File queuePath) {
        this.sb = sb;
        this.log = new Log("CRAWLER");
        this.workers = new ConcurrentHashMap<Integer, Loader>();
        this.remoteCrawlProviderHashes = new ArrayList<String>();


        // start crawling management
        this.log.logConfig("Starting Crawling Management");

View Full Code Here

        this.denyHost = (blockAttack) ? new ConcurrentHashMap<String, String>() : null;
        this.handlerPrototype = handlerPrototype;
        this.switchboard = switchboard;


        // initialize logger
        this.log = new Log("SERVER");


        // init the ssl socket factory
        this.sslSocketFactory = initSSLFactory();


        // init session parameter

View Full Code Here

0 1 2

TOP

Related Classes of net.yacy.kelondro.logging.Log

crawlReceipt

de.anomic.crawler.CrawlQueues

de.anomic.search.DocumentIndex

de.anomic.search.MetadataRepository

de.anomic.search.Switchboard

de.anomic.server.serverCore

de.anomic.yacy.dht.Dispatcher

net.yacy.kelondro.util.OS

net.yacy.kelondro.workflow.InstantBusyThread

net.yacy.peers.dht.Dispatcher

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.