Package org.archive.format.cdx

Examples of org.archive.format.cdx.CDXLine


   
    // urlkey, timestamp, original, mimetype, statuscode, digest, redirect, robotflags,
    // length, offset, filename.
    FieldSplitFormat fmt = CDXFieldConstants.CDX_ALL_NAMES;
    testCDXServer.cdxLines = new CDXLine[] {
        new CDXLine(CDXLINE1, fmt)
    };
   
    SearchResults sr = cut.query(wbr);
   
    assertEquals(1,  sr.getReturnedCount());
View Full Code Here


   
    // urlkey, timestamp, original, mimetype, statuscode, digest, redirect, robotflags,
    // length, offset, filename.
    FieldSplitFormat fmt = CDXFieldConstants.CDX_ALL_NAMES;
    testCDXServer.cdxLines = new CDXLine[] {
        new CDXLine(CDXLINE1, fmt)
    };

   
    cut.setBaseStatusRegexp("");
    {
View Full Code Here

   
    // urlkey, timestamp, original, mimetype, statuscode, digest, redirect, robotflags,
    // length, offset, filename.
    FieldSplitFormat fmt = CDXFieldConstants.CDX_ALL_NAMES;
    testCDXServer.cdxLines = new CDXLine[] {
        new CDXLine(CDXLINE2, fmt)
    };
   
    SearchResults sr = cut.query(wbr);
   
    assertEquals(1, testCDXServer.capturedArgs.size());
View Full Code Here

    StringWriter sw = new StringWriter();
    EasyMock.expect(response.getWriter()).andReturn(new PrintWriter(sw));
   
    FieldSplitFormat fmt = CDXFieldConstants.CDX_ALL_NAMES;
    testCDXServer.cdxLines = new CDXLine[] {
        new CDXLine(CDXLINE1, fmt)
    };

    EasyMock.replay(request, response);
    cut.handleRequest(request, response);
   
View Full Code Here

    wbr.setRequestUrl("http://example.com/");
    wbr.setMementoTimemapFormat("memento");
   
    FieldSplitFormat fmt = CDXFieldConstants.CDX_ALL_NAMES;
    testCDXServer.cdxLines = new CDXLine[] {
        new CDXLine(CDXLINE1, fmt)
    };
   
    EasyMock.replay(request, response);
    boolean r = cut.renderMementoTimemap(wbr, request, response);
   
View Full Code Here

    }
   
    protected void handleLine(DupeTrack counter, CDXLine line, boolean isDupe) {
      OrigLineDupeTrack origLineDupeTrack = (OrigLineDupeTrack)counter;
     
      CDXLine origLine = null;
     
      boolean currIsRevisit = isRevisit(line);
     
      if ((origLineDupeTrack.line == null) && !currIsRevisit) {
        origLineDupeTrack.line = line;
View Full Code Here

    if (query.collapse != null && (query.collapse.length > 0)) {
      collapser = new CollapseFieldFilter(query.collapse, parseFormat);
    }

    //CDXLine prev = null;
    CDXLine line = null;

    //boolean prevUrlAllowed = true;
   
    FieldSplitFormat outputFields = null;
   
    if (!authChecker.isAllCdxFieldAccessAllowed(authToken)) {
      outputFields = this.authChecker.getPublicCdxFormat();
    }
   
    if (!query.fl.isEmpty()) {
      if (outputFields == null) {
        outputFields = parseFormat;
      }
      try {
        outputFields = outputFields.createSubset(URLDecoder.decode(query.fl, "UTF-8"));
      } catch (UnsupportedEncodingException e) {

      }
    } else if (outputFields != null) {
      outputFields = parseFormat.createSubset(outputFields);
    }

    outputProcessor.begin();

    int writeCount = 0;
    long allCount = 0;
   
    int writeLimit = query.limit;

    while (cdx.hasNext() && ((writeLimit == 0) || (writeCount < writeLimit)) && (allCount < readLimit) && !responseWriter.isAborted()) {
     
      String rawLine = cdx.next();
      allCount++;

      if (query.offset > 0) {
        --query.offset;
        continue;
      }

//      prev = line;
     
      //line = new CDXLine(rawLine, parseFormat);
      line = this.cdxLineFactory.createStandardCDXLine(rawLine, parseFormat);
     
      //TODO: better way to handle this special case?
      if (line.getMimeType().equals("alexa/dat")) {
        continue;
      }
     
      // Additional access check, per capture
      if (accessChecker != null) {
        if (!accessChecker.includeCapture(line)) {
          continue;
        }
      }

//      if (!authChecker.isAllUrlAccessAllowed(authToken)) {
//        if ((query.matchType != MatchType.exact) && ((prev == null) || !line.getUrlKey().equals(prev.getUrlKey()))) {
//          prevUrlAllowed = authChecker.isUrlAllowed(line.getOriginalUrl(), authToken);
//        }
//
//        if (!prevUrlAllowed) {
//          continue;
//        }
//      }
//     
//      if (!authChecker.isCaptureAllowed(line, authToken)) {
//        continue;
//      }
//     
      outputProcessor.trackLine(line);

      // Timestamp Range Filtering
      String timestamp = line.getTimestamp();

      if (!query.from.isEmpty() && (timestamp.compareTo(query.from) < 0)) {
        continue;
      }

      if (!query.to.isEmpty() && (timestamp.compareTo(query.to) > 0) && !timestamp.startsWith(query.to)) {
        if (query.matchType == MatchType.exact) {
          break;
        } else {
          continue;
        }
      }

      // Check regex matcher if it exists
      if ((filterMatcher != null) && !filterMatcher.include(line)) {
        continue;
      }

      // Check collapser
      if ((collapser != null) && !collapser.include(line)) {
        continue;
      }

      // Filter to only include output fields
      if (outputFields != null) {
        line = new CDXLine(line, outputFields);
      }
     
      writeCount += outputProcessor.writeLine(line);

      if (Thread.interrupted()) {
        break;
      }
    }

    if (query.showResumeKey && (line != null) && (writeLimit > 0) && (writeCount >= writeLimit)) {
      StringBuilder sb = new StringBuilder();
      sb.append(line.getUrlKey());
      sb.append(' ');
      sb.append(UrlSurtRangeComputer.incLastChar(line.getTimestamp()));
      String resumeKey;
      try {
        resumeKey = URLEncoder.encode(sb.toString(), "UTF-8");
        outputProcessor.writeResumeKey(resumeKey);
      } catch (UnsupportedEncodingException e) {
View Full Code Here

 

  @Override
    public void writeResumeKey(String resumeKey) {
    writeHeaderAndFirstLine(prevLine, FIRST_LAST, LAST);
    CDXLine lastLine = prevLine;
    prevLine = null;
   
    String timestamp = lastLine.getTimestamp();
   
    Date date = ArchiveUtils.getDate(timestamp, null);
   
    if (date == null) {
      return;
View Full Code Here

               
      resultWriter.begin();
     
      while (iter.hasNext() && !resultWriter.isAborted()) {
        String rawLine = iter.next();
        CDXLine line = cdxLineFactory.createStandardCDXLine(rawLine, StandardCDXLineFactory.cdx11);
        resultWriter.writeLine(line);
      }
     
      resultWriter.end();
      iter.close();
View Full Code Here

TOP

Related Classes of org.archive.format.cdx.CDXLine

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.