Examples of WebUrl

  • edu.uci.ics.crawler4j.url.WebURL
    @author Yasser Ganjisaffar
  • org.apache.manifoldcf.crawler.connectors.rss.WebURL
    Replacement class for java.net.URI, which is broken in many ways.
  • org.apache.manifoldcf.crawler.connectors.webcrawler.WebURL
    Replacement class for java.net.URI, which is broken in many ways.

  • Examples of ch.entwine.weblounge.common.url.WebUrl

       *          the http response
       */
      private void serveHTML(Action action, WebloungeRequest request,
          WebloungeResponse response) {

        WebUrl url = request.getUrl();
        Site site = request.getSite();

        // Load the target page used to render the action
        Page page = null;
        try {
    View Full Code Here

    Examples of ch.entwine.weblounge.common.url.WebUrl

        // Cache the action urls
        StringBuffer flavors = new StringBuffer();
        synchronized (urlCache) {
          for (RequestFlavor flavor : action.getFlavors()) {
            WebUrl actionUrl = new WebUrlImpl(action.getSite(), action.getPath(), Resource.LIVE, flavor);
            String normalizedUrl = actionUrl.normalize(false, false, true);
            urlCache.put(normalizedUrl, pool);
            if (flavors.length() > 0)
              flavors.append(",");
            flavors.append(flavor.toString().toLowerCase());
            logger.trace("Caching action '{}' for url {}", action, normalizedUrl);
    View Full Code Here

    Examples of ch.entwine.weblounge.common.url.WebUrl

       *
       * @see ch.entwine.weblounge.dispatcher.RequestHandler#service(ch.entwine.weblounge.common.request.WebloungeRequest,
       *      ch.entwine.weblounge.common.request.WebloungeResponse)
       */
      public boolean service(WebloungeRequest request, WebloungeResponse response) {
        WebUrl url = request.getUrl();
        String path = url.getPath();

        // Is the request intended for this handler?
        if (!URI_PREFIX.equals(path)) {
          logger.debug("Skipping request for {}, request path does not start with {}", URI_PREFIX);
          return false;
    View Full Code Here

    Examples of ch.entwine.weblounge.common.url.WebUrl

      private static final String ACTION_MOUNTPOINT = "/my/action/";

      @Before
      public void setUp() {
        Site site = EasyMock.createNiceMock(Site.class);
        WebUrl url = new WebUrlImpl(site, REQUEST_PATH);

        request = EasyMock.createNiceMock(WebloungeRequest.class);
        EasyMock.expect(request.getUrl()).andReturn(url).anyTimes();

        action = EasyMock.createNiceMock(Action.class);
    View Full Code Here

    Examples of ch.entwine.weblounge.common.url.WebUrl

        result = new SearchResultImpl(query, hitCount, documentCount);
        result.setSearchTime(searchTime);

        Object source = new Object();
        String id = "4bb19980-8f98-4873-a813-71b5dfac22af";
        WebUrl url = new WebUrlImpl(site, "/");
        for (int i = 0; i < limit; i++) {
          double relevance = Math.random();
          SearchResultItemImpl item = new SearchResultItemImpl(id, site, url, relevance, source);
          result.addResultItem(item);
        }
    View Full Code Here

    Examples of ch.entwine.weblounge.common.url.WebUrl

        // resource version
        long version = (Long) metadataMap.get(VERSION).getValues().get(0);

        // path
        String path = null;
        WebUrl url = null;
        if (metadataMap.get(PATH) != null) {
          try {
            path = (String) metadataMap.get(PATH).getValues().get(0);
            url = new WebUrlImpl(site, path);
          } catch (IllegalArgumentException e) {
    View Full Code Here

    Examples of ch.entwine.weblounge.common.url.WebUrl

        String uuid = UUID.randomUUID().toString();
        if (!StringUtils.isBlank(path)) {
          try {
            if (!path.startsWith("/"))
              path = "/" + path;
            WebUrl url = new WebUrlImpl(site, path);
            path = url.getPath();
          } catch (IllegalArgumentException e) {
            logger.warn("Tried to create a page with an invalid path '{}': {}", path, e.getMessage());
            throw new WebApplicationException(Status.BAD_REQUEST);
          }
        } else {
    View Full Code Here

    Examples of ch.entwine.weblounge.common.url.WebUrl

        // resource version
        long version = (Long) metadataMap.get(VERSION).getValues().get(0);

        // FIXME Add exception handling
        ResourceURI uri = new PageURIImpl(site, path, id, version);
        WebUrl url = new WebUrlImpl(site, path);

        PageSearchResultItemImpl result = new PageSearchResultItemImpl(uri, url, relevance, site, metadata);

        if (metadataMap.get(XML) != null)
          result.setResourceXml((String) metadataMap.get(XML).getValues().get(0));
    View Full Code Here

    Examples of ch.entwine.weblounge.common.url.WebUrl

        // resource version
        long version = (Long) metadataMap.get(VERSION).getValues().get(0);

        // path
        String path = null;
        WebUrl url = null;
        if (metadataMap.get(PATH) != null) {
          try {
            path = (String) metadataMap.get(PATH).getValues().get(0);
            url = new WebUrlImpl(site, path);
          } catch (IllegalArgumentException e) {
    View Full Code Here

    Examples of edu.uci.ics.crawler4j.url.WebURL

              movedToUrl = URLCanonicalizer.getCanonicalURL(movedToUrl);
              int newdocid = DocIDServer.getDocID(movedToUrl);
              if (newdocid > 0) {
                return PageFetchStatus.RedirectedPageIsSeen;
              } else {
                WebURL webURL = new WebURL();
                webURL.setURL(movedToUrl);
                webURL.setParentDocid(curURL.getParentDocid());
                webURL.setDepth((short) (curURL.getDepth()));
                webURL.setDocid(-1);
                if (shouldVisit(webURL) && RobotstxtServer.allows(webURL)) {
                  webURL.setDocid(DocIDServer.getNewDocID(movedToUrl))
                  Frontier.schedule(webURL);
                }
              }
            }
            return PageFetchStatus.Moved;
          } else if (statusCode == PageFetchStatus.PageTooBig) {
            logger.error("Page was bigger than max allowed size: " + curURL.getURL());
          }
          return statusCode;
        }

        try {
          if (!page.isBinary()) {
            htmlParser.parse(page.getHTML(), curURL.getURL());
            page.setText(htmlParser.getText());
            page.setTitle(htmlParser.getTitle());

            if (page.getText() == null) {
              return PageFetchStatus.NotInTextFormat;
            }

            Iterator<String> it = htmlParser.getLinks().iterator();
            List<WebURL> toSchedule = new ArrayList<WebURL>();
            List<WebURL> toList = new ArrayList<WebURL>();
            while (it.hasNext()) {
              String url = it.next();
              if (url != null) {
                int newdocid = DocIDServer.getDocID(url);
                if (newdocid > 0) {
                  if (newdocid != docid) {
                    WebURL webURL = new WebURL();
                    webURL.setURL(url);
                    webURL.setDocid(newdocid);
                    toList.add(webURL);
                  }
                } else {
                  WebURL webURL = new WebURL();
                  webURL.setURL(url);
                  webURL.setDocid(-1);
                  webURL.setParentDocid(docid);
                  webURL.setDepth((short) (curURL.getDepth() + 1));             
                  if (shouldVisit(webURL) && RobotstxtServer.allows(webURL)) {
                    if (MAX_CRAWL_DEPTH == -1 || curURL.getDepth() < MAX_CRAWL_DEPTH) {
                      webURL.setDocid(DocIDServer.getNewDocID(url));
                      toSchedule.add(webURL);
                      toList.add(webURL);
                    }
                  }
                }
    View Full Code Here
    TOP
    Copyright © 2018 www.massapi.com. All rights reserved.
    All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.