Examples of ReplayInputStream


Examples of org.archive.io.ReplayInputStream

    public void testGapError() throws IOException {
      ARCWriter writer = createArcWithOneRecord("testGapError", true);
        String content = getContent();
        // Make a 'weird' RIS that returns bad 'remaining' length
        // awhen remaining should be 0
        ReplayInputStream ris = new ReplayInputStream(content.getBytes(),
                content.length(), null) {
            public long remaining() {
                return (super.remaining()==0) ? -1 : super.remaining();
            }
        };
View Full Code Here

Examples of org.archive.io.ReplayInputStream

    protected ProcessResult innerProcessResult(CrawlURI puri) {
        CrawlURI curi = (CrawlURI)puri;
       
        long recordLength = getRecordedSize(curi);
       
        ReplayInputStream ris = null;
        try {
            if (shouldWrite(curi)) {
                ris = curi.getRecorder().getRecordedInput()
                        .getReplayInputStream();
                return write(curi, recordLength, ris, getHostAddress(curi));
View Full Code Here

Examples of org.archive.io.ReplayInputStream

     * @param curi CrawlURI to process.
     */
    protected boolean innerExtract(CrawlURI curi){
        int links = 0;
        InputStream contentStream = null;
        ReplayInputStream documentStream = null;
        SeekReader docReader = null;

        // Get the doc as a repositionable reader
        try
        {
            contentStream = curi.getRecorder().getContentReplayInputStream();
            if (contentStream==null) {
                // TODO: note problem
                return false;
            }
            documentStream = new ReplayInputStream(contentStream);
          
           
            docReader = Doc.getText(documentStream);
        } catch(Exception e){
            curi.getNonFatalFailures().add(e);
            return false;
        } finally {
            IOUtils.closeQuietly(contentStream);
        }

        CharSequence cs = new SeekReaderCharSequence(docReader, 0);
        Matcher m = PATTERN.matcher(cs);
        while (m.find()) {
            links++;
            addLink(curi, m.group(1));
        }
        documentStream.destroy();
        logger.fine(curi + " has " + links + " links.");
        return true;
    }
View Full Code Here

Examples of org.archive.io.ReplayInputStream

      }
    }

    // now write the content, or a fake record:
    ARCWriter writer = null;
    ReplayInputStream replayIS = null;
    try {
      writer = cache.getWriter();
      if(gotUrl) {

        RecordingInputStream ris = recorder.getRecordedInput();
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.