Package com.twpnn.demo.webreaper.model

Examples of com.twpnn.demo.webreaper.model.DownloadContent


 
  @Before
  public void preparingObject(){
    mockDownloadLst = new ArrayList<DownloadContent>();
   
    DownloadContent dc1 = new DownloadContent();
    dc1.setName("site A");
    dc1.setTime(1.222222);
   
    DownloadContent dc2 = new DownloadContent();
    dc2.setName("site B");
    dc2.setTime(0.0022222);
   
    DownloadContent dc3 = new DownloadContent();
    dc3.setName("site C");
    dc3.setTime(0.000222);
   
    mockDownloadLst.add(dc1);
    mockDownloadLst.add(dc2);
    mockDownloadLst.add(dc3);
  }
View Full Code Here


     * 1st : download main page and get its document objects which will be use for fetching its links and scripts later
     * */
    UrlFileDownloader downloadMgr = new UrlFileDownloader(homepageUrl, downloadDirectory);
    Document targetUrlDocument= downloadMgr.getUrlDocument(homepageUrl);
   
    DownloadContent mainPageDownloadContent = downloadMgr.getMainUrlDownloadContent(homepageUrl);
    List<DownloadContent> mainDownloadLst = new ArrayList<DownloadContent>();
    mainDownloadLst.add(mainPageDownloadContent);
    // download a main page
    logger.info("download main page time : " + mainPageDownloadContent.getTime());
    // response status
    logger.info("response status code : " + mainPageDownloadContent.getStatus());
   
    /*
     * 2nd : download main page's links and scripts.
     * */
    List<DownloadContent> completeDownloadLst = downloadMgr.getUrlDownloadContents(targetUrlDocument);
   
    completeDownloadLst = UrlSorting.sort(completeDownloadLst);
   
    logger.info("######### Result ( "+ completeDownloadLst.size()+ " )##############");
   
    List<DownloadContent> incompleteDownloadLst = new ArrayList<DownloadContent>();
   
    Iterator<DownloadContent> i = completeDownloadLst.iterator();
    while (i.hasNext()) {
      DownloadContent dw = i.next();
      if(null == dw.getTime()){
        incompleteDownloadLst.add(dw);
        i.remove();
      }else {
        logger.info( "<" + dw.getType().toString() + "> : " + dw.getName() + ", with time : " + dw.getTime() + " , with status code : " + dw.getStatus());
      }
     
    }
   
    logger.info("######### Not complete ( "+ incompleteDownloadLst.size()+ " ) ##############");
View Full Code Here

    return response;
  }

  public DownloadContent getMainUrlDownloadContent(String url) {
    DownloadContent mainUrlDownloadContent = downloadFile(url, ContentType.MAIN);
    return mainUrlDownloadContent;
  }
View Full Code Here

    Elements media = document.select("[src]");
    Elements imports = document.select("link[href]");
    Elements pages = document.select("a[href]");

    logOutputFormat("\nTotal Media: (%d)", media.size());
    DownloadContent downloadContent = null;
    for (Element src : media) {
      downloadContent = downloadFile(src.absUrl("src"), ContentType.MEDIA);
      if(null != downloadContent) {
        if (src.tagName().equals("img")) {
          logOutputFormat(" * %s: <%s> %sx%s (%s) , %.3f%n", src.tagName(),  src.attr("abs:src"), src.attr("width"), src.attr("height"), trim(src.attr("alt"), 20),  downloadContent.getTime());
        } else {
          logOutputFormat(" * %s: <%s> , %.3f%n ", src.tagName(), src.attr("abs:src"), downloadContent.getTime());
        }
      } else {
        logOutputFormat("%s: <%s>", "There is a problem downloading file : ", src.attr("abs:src"));
        downloadContent = new DownloadContent();
        downloadContent.setErrorMsg("Cannot download file from this URL");
      }
      downloadLst.add(downloadContent);
    }

    logOutputFormat("\nTotal Imports: (%d)", imports.size());
    for (Element link : imports) {
      downloadContent = downloadFile(link.attr("abs:href"), ContentType.IMPORT);
      if(null != downloadContent) {
        logOutputFormat(" * %s <%s> (%s)", link.tagName(), link.attr("abs:href"), link.attr("rel"));
      } else {
        logOutputFormat("%s: <%s>", "There is a problem downloading file : ", link.attr("abs:href"));
        downloadContent = new DownloadContent();
        downloadContent.setErrorMsg("Cannot download file from this URL");
      }
      downloadLst.add(downloadContent);
    }

    logOutputFormat("\nTotal Pages: (%d)", pages.size());
    for (Element page : pages) {
      //response = getConnectionResponse(link.attr("abs:href"));
      downloadContent = downloadFile(page.attr("abs:href"), ContentType.PAGE);
      if(null != downloadContent ){
        logOutputFormat(" * a: <%s>  (%s)", page.attr("abs:href"), trim(page.text(), 35));
      } else {
        downloadContent = new DownloadContent();
        downloadContent.setTime(-1.00);
        downloadContent.setErrorMsg("Cannot download file from this URL");
      }
      downloadLst.add(downloadContent);
    }
    return downloadLst;
  }
View Full Code Here

    }
    return downloadLst;
  }

  private DownloadContent downloadFile(String url, ContentType type) {
    DownloadContent downloadContent = new DownloadContent();
    Long start = System.nanoTime();
    Long end = null;
    Double totalTime = null;

    // check whether directory is existing or not
    File file = new File(prepareDirectory(type));
    if (!file.exists())
      file.mkdirs();

    HttpClient client = HttpClientBuilder.create().build();
    HttpGet request = new HttpGet(url);
    HttpResponse response;
    InputStream is = null;
    FileOutputStream fos = null;
    int responseCode = 0;
    try {
      response = client.execute(request);
      HttpEntity entity = response.getEntity();
      if (null != response && null != entity) {
        is = entity.getContent();
        responseCode = response.getStatusLine().getStatusCode();
        String filename = trimFileName(url, type);
        file = new File(filename);
        logger.info("file name : " + file.getName());
        fos = new FileOutputStream(file);
        int inByte;
        while ((inByte = is.read()) != -1)
          fos.write(inByte);

        logger.info("response code : " + responseCode);

        end = System.nanoTime();
        long elapsedTime = end - start;
        totalTime = (double) elapsedTime / 1000000000.0;
       
        downloadContent.setName(url);
        downloadContent.setType(type);
        downloadContent.setStatus(responseCode);
        downloadContent.setTime(totalTime);
        is.close();
        fos.close();
       
      }
    } catch (Exception e){
      logger.error("caused by, " + e.getMessage());
      downloadContent.setName(url);
      downloadContent.setType(type);
      downloadContent.setStatus(responseCode);
      downloadContent.setErrorMsg("caused by, " + e.getMessage());
    }

    return downloadContent;
  }
View Full Code Here

TOP

Related Classes of com.twpnn.demo.webreaper.model.DownloadContent

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.