Package com.scaleunlimited.cascading

Examples of com.scaleunlimited.cascading.LoggingFlowProcess


    @SuppressWarnings("unchecked")
    @Override
    public void prepare(FlowProcess flowProcess,
                        OperationCall<NullContext> operationCall) {
        super.prepare(flowProcess, operationCall);
        _flowProcess = new LoggingFlowProcess(flowProcess);
        _flowProcess.addReporter(new LoggingFlowReporter());
    }
View Full Code Here


    public void prepare(FlowProcess flowProcess, cascading.operation.OperationCall<NullContext> operationCall) {
        _executor = new ThreadedExecutor(_fetcher.getMaxThreads(), COMMAND_TIMEOUT);
       
        // FUTURE KKr - use Cascading process vs creating our own, once it
        // supports logging in local mode, and a setStatus() call.
        _flowProcess = new LoggingFlowProcess(flowProcess);
        _flowProcess.addReporter(new LoggingFlowReporter());
    }
View Full Code Here

    @SuppressWarnings("unchecked")
    @Override
    public void prepare(FlowProcess flowProcess, OperationCall<NullContext> operationCall) {
        super.prepare(flowProcess, operationCall);
       
        _flowProcess = new LoggingFlowProcess(flowProcess);
        _flowProcess.addReporter(new LoggingFlowReporter());

        _executor = new ThreadedExecutor(_maxThreads, COMMAND_TIMEOUT);
    }
View Full Code Here

    }

    @SuppressWarnings("rawtypes")
    @Override
    public void run() {
        LoggingFlowProcess process = _fetchMgr.getProcess();
        process.increment(FetchCounters.DOMAINS_PROCESSING, 1);
        final long minPageFetchInterval = _httpFetcher.getFetcherPolicy().getMinPageFetchInterval();
       
        try {
            // TODO KKr - when fetching the last item, send a Connection: close
            // header to let the server know it doesn't need to keep the socket open.
            Iterator<ScoredUrlDatum> iter = _items.iterator();
            while (!Thread.interrupted() && iter.hasNext()) {
                ScoredUrlDatum item = iter.next();
                FetchedDatum result = new FetchedDatum(item);
               
                // We use status as an extra field on the end of of FetchedDatum that lets
                // us generate a full status pipe, and also a content pipe that only has
                // entries which were fetched. By keying off the type (string == OK,
                // BaseFetchException == bad) the FetchPipe can do this magic.
                Comparable status = null;

                long fetchStartTime = System.currentTimeMillis();
               
                try {
                    process.increment(FetchCounters.URLS_FETCHING, 1);
                    result = _httpFetcher.get(item);
                    long deltaTime = System.currentTimeMillis() - fetchStartTime;

                    process.increment(FetchCounters.FETCHED_TIME, (int)deltaTime);
                    process.increment(FetchCounters.URLS_FETCHED, 1);
                    process.increment(FetchCounters.FETCHED_BYTES, result.getContentLength());
                    process.setStatus(Level.SLF4J_TRACE, "Fetched " + result);

                    status = UrlStatus.FETCHED.toString();
                   
                    // TODO - check keep-alive response (if present), and close the connection/delay
                    // for some amount of time if we exceed this limit.
                } catch (AbortedFetchException e) {
                    LOGGER.info("Aborted while fetching " + item.getUrl() + " due to " + e.getAbortReason());
                    if (e.getAbortReason() == AbortedFetchReason.INTERRUPTED) {
                        process.increment(FetchCounters.URLS_SKIPPED, 1);
                       
                        // Make sure our loop terminates.
                        Thread.currentThread().interrupt();
                    } else {
                        process.increment(FetchCounters.URLS_FAILED, 1);
                    }
                   
                    status = (Comparable)e;
                } catch (BaseFetchException e) {
                    LOGGER.info("Fetch exception while fetching " + item.getUrl(), e);
                    process.increment(FetchCounters.URLS_FAILED, 1);

                    // We can do this because each of the concrete subclasses of BaseFetchException implements
                    // WritableComparable
                    status = (Comparable)e;
                } catch (Exception e) {
                    LOGGER.warn("Unexpected exception while fetching " + item.getUrl(), e);

                    process.increment(FetchCounters.URLS_FAILED, 1);
                    status = new IOFetchException(item.getUrl(), new IOException(e));
                } finally {
                    process.decrement(FetchCounters.URLS_FETCHING, 1);

                    Tuple tuple = result.getTuple();
                    tuple.add(status);
                    _fetchMgr.collect(tuple);

                    // Figure out how long it's been since the start of the request.
                    long fetchInterval = System.currentTimeMillis() - fetchStartTime;

                    // We want to avoid fetching faster than a max acceptable rate. Note that we always do
                    // this, even if there's not another page, so that this setting will have impact even
                    // if the next fetch set is ready right away.
                    if (fetchInterval < minPageFetchInterval) {
                        long delay = minPageFetchInterval - fetchInterval;
                        LOGGER.trace(String.format("FetchTask: sleeping for %dms", delay));

                        try {
                            Thread.sleep(delay);
                        } catch (InterruptedException e) {
                            LOGGER.warn("FetchTask interrupted!");
                            Thread.currentThread().interrupt();
                            continue;
                        }
                    }
                }
            }
           
            // While we still have entries, we need to write them out to avoid losing them.
            while (iter.hasNext()) {
                ScoredUrlDatum item = iter.next();
                FetchedDatum result = new FetchedDatum(item);
                process.increment(FetchCounters.URLS_SKIPPED, 1);
                AbortedFetchException status = new AbortedFetchException(item.getUrl(), AbortedFetchReason.INTERRUPTED);
               
                Tuple tuple = result.getTuple();
                tuple.add(status);
               _fetchMgr.collect(tuple);
            }
        } catch (Throwable t) {
            LOGGER.error("Exception while fetching", t);
        } finally {
            _fetchMgr.finished(_ref);
            process.decrement(FetchCounters.DOMAINS_PROCESSING, 1);
        }
    }
View Full Code Here

    @SuppressWarnings({"unchecked" })
    @Override
    public void prepare(FlowProcess flowProcess, OperationCall<NullContext> operationCall) {
        super.prepare(flowProcess, operationCall);

        _flowProcess = new LoggingFlowProcess(flowProcess);
        _flowProcess.addReporter(new LoggingFlowReporter());

        _executor = new ThreadedExecutor(_fetcher.getMaxThreads(), _fetcher.getFetcherPolicy().getRequestTimeout());

        _refLock = new Object();
View Full Code Here

        @SuppressWarnings("unchecked")
        @Override
        public void prepare(FlowProcess flowProcess,
                            OperationCall<NullContext> operationCall) {
            super.prepare(flowProcess, operationCall);
            _flowProcess = new LoggingFlowProcess(flowProcess);
            _flowProcess.addReporter(new LoggingFlowReporter());
        }
View Full Code Here

TOP

Related Classes of com.scaleunlimited.cascading.LoggingFlowProcess

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.