Package org.archive.util

Examples of org.archive.util.Recorder


     */
    @Override
    protected void innerProcess(CrawlURI curi) throws InterruptedException {
        curi.setFetchBeginTime(System.currentTimeMillis());
        ClientFTP client = new ClientFTP();
        Recorder recorder = curi.getRecorder();
       
        try {
            if (logger.isLoggable(Level.FINE)) {
                logger.fine("attempting to fetch ftp uri: " + curi);
            }
View Full Code Here


    public void testBasics() throws InterruptedException, IOException {
        CrawlURI curi1 = new CrawlURI(UURIFactory.getInstance("http://example.org/1"));
        // without Recorder, CrawlURI#getContentLength() returns zero, which makes
        // loader().shoudProcess() return false.
        Recorder rec = new Recorder(getTmpDir(), "rec");
        curi1.setRecorder(rec);
        // give Recorder some content so that getContentLength() returns non-zero.
        InputStream is = rec.inputWrap(new ByteArrayInputStream("HTTP/1.0 200 OK\r\n\r\ntext.".getBytes()));
        is.read(new byte[1024]);
        is.close();
       
        assertFalse(loader().shouldProcess(curi1));
        assertFalse(storer().shouldProcess(curi1));
View Full Code Here

    }
   

    protected Recorder getRecorder() throws IOException {
        if (Recorder.getHttpRecorder() == null) {
            Recorder httpRecorder = new Recorder(TmpDirTestCase.tmpDir(),
                    getClass().getName(), 16 * 1024, 512 * 1024);
            Recorder.setHttpRecorder(httpRecorder);
        }

        return Recorder.getHttpRecorder();
View Full Code Here

            id = "recording-http-connection-" + Long.toString(COUNTER.getAndIncrement());
        }

        @Override
        protected InputStream getSocketInputStream(final Socket socket) throws IOException {
            Recorder recorder = Recorder.getHttpRecorder();
            if (recorder != null) {   // XXX || (isSecure() && isProxied())) {
                return recorder.inputWrap(super.getSocketInputStream(socket));
            } else {
                return super.getSocketInputStream(socket);
            }
        }
View Full Code Here

            }
        }

        @Override
        protected OutputStream getSocketOutputStream(final Socket socket) throws IOException {
            Recorder recorder = Recorder.getHttpRecorder();
            if (recorder != null) {   // XXX || (isSecure() && isProxied())) {
                return recorder.outputWrap(super.getSocketOutputStream(socket));
            } else {
                return super.getSocketOutputStream(socket);
            }
        }
View Full Code Here

            /*
             * Need to do this to avoid "java.io.IOException: RIS already open"
             * on urls that are retried within httpcomponents. Exercised by
             * FetchHTTPTests.testNoResponse()
             */
            Recorder recorder = Recorder.getHttpRecorder();
            if (recorder != null) {
                recorder.close();
                recorder.closeRecorders();
            }
        }
View Full Code Here

     *
     * @throws Exception   just in case
     */
    public void testZeroContent() throws Exception {
        CrawlURI uri = defaultURI();
        Recorder recorder = createRecorder("");
        uri.setContentType("text/plain");
        uri.setRecorder(recorder);
        extractor.process(uri);
        assertEquals(0, uri.getOutLinks().size());
        assertNoSideEffects(uri);
View Full Code Here

    }
   
    public static Recorder createRecorder(String content, String charset)
            throws IOException {
        File temp = File.createTempFile("test", ".tmp");
        Recorder recorder = new Recorder(temp, 1024, 1024);
        byte[] b = content.getBytes(charset);
        ByteArrayInputStream bais = new ByteArrayInputStream(b);
        InputStream is = recorder.inputWrap(bais);
        recorder.markContentBegin();
        for (int x = is.read(); x >= 0; x = is.read());
        is.close();
        return recorder;
    }
View Full Code Here

        UURI testUuri = UURIFactory.getInstance(urlStr);
        CrawlURI testUri = new CrawlURI(testUuri, null, null, LinkContext.NAVLINK_MISC);
       

        File temp = File.createTempFile("test", ".tmp");
        Recorder recorder = new Recorder(temp, 1024, 1024);
        InputStream is = recorder.inputWrap(ExtractorPDFContentTest.class.getClassLoader().getResourceAsStream(resourceFileName));
        recorder.markContentBegin();
        for(int x = is.read(); x>=0; x=is.read());
        is.close();
       

        testUri.setContentType("application/pdf");
        testUri.setFetchStatus(200);
        testUri.setRecorder(recorder);
        testUri.setContentSize(recorder.getResponseContentLength());
        return testUri;
    }
View Full Code Here

    protected void recordDNS(final CrawlURI curi, final Record[] rrecordSet)
            throws IOException {
        final byte[] dnsRecord = getDNSRecord(curi.getFetchBeginTime(),
                rrecordSet);

        Recorder rec = curi.getRecorder();
        // Shall we get a digest on the content downloaded?
        boolean digestContent = getDigestContent();
        String algorithm = null;
        if (digestContent) {
            algorithm = getDigestAlgorithm();
            rec.getRecordedInput().setDigest(algorithm);
        } else {
            rec.getRecordedInput().setDigest((MessageDigest)null);
        }
        InputStream is = curi.getRecorder().inputWrap(
                new ByteArrayInputStream(dnsRecord));

        if (digestContent) {
            rec.getRecordedInput().startDigest();
        }

        // Reading from the wrapped stream, behind the scenes, will write
        // files into scratch space
        try {
            byte[] buf = new byte[256];
            while (is.read(buf) != -1) {
                continue;
            }
        } finally {
            is.close();
            rec.closeRecorders();
        }
        curi.setContentSize(dnsRecord.length);

        if (digestContent) {
            curi.setContentDigest(algorithm,
                rec.getRecordedInput().getDigestValue());
        }
    }
View Full Code Here

TOP

Related Classes of org.archive.util.Recorder

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.