Package de.anomic.crawler.retrieval

Examples of de.anomic.crawler.retrieval.Response.url()


            }
        } else if (viewMode.equals("parsed") || viewMode.equals("sentences"|| viewMode.equals("words") || viewMode.equals("links")) {
            // parsing the resource content
            Document document = null;
            try {
                document = Document.mergeDocuments(response.url(), response.getMimeType(), response.parse());
                if (document == null) {
                    prop.put("error", "5");
                    prop.put("error_errorText", "Unknown error");
                    prop.put("viewMode", VIEW_MODE_NO_TEXT);
                    return prop;
View Full Code Here


                            Integer.toString(client.getHttpResponse().getStatusLine().getStatusCode()),
                            sb.crawler.defaultProxyProfile
                    );
                    final String storeError = response.shallStoreCacheForProxy();
                    final boolean storeHTCache = response.profile().storeHTCache();
                    final String supportError = TextParser.supports(response.url(), response.getMimeType());
                    if (
                            /*
                             * Now we store the response into the htcache directory if
                             * a) the response is cacheable AND
                             */
 
View Full Code Here

                        if (sizeBeforeDelete == -1) {
                            // totally fresh file
                            response.setContent(cacheArray);
                            try {
                                Cache.store(response.url(), response.getResponseHeader(), cacheArray);
                                sb.toIndexer(response);
                            } catch (IOException e) {
                                log.logWarning("cannot write " + response.url() + " to Cache (1): " + e.getMessage(), e);
                            }
                            conProp.put(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_CODE, "TCP_MISS");
View Full Code Here

                            response.setContent(cacheArray);
                            try {
                                Cache.store(response.url(), response.getResponseHeader(), cacheArray);
                                sb.toIndexer(response);
                            } catch (IOException e) {
                                log.logWarning("cannot write " + response.url() + " to Cache (1): " + e.getMessage(), e);
                            }
                            conProp.put(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_CODE, "TCP_MISS");
                        } else if (cacheArray != null && sizeBeforeDelete == cacheArray.length) {
                            // before we came here we deleted a cache entry
                            cacheArray = null;
View Full Code Here

                            conProp.put(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_CODE, "TCP_REF_FAIL_HIT");
                        } else {
                            // before we came here we deleted a cache entry
                            response.setContent(cacheArray);
                            try {
                                Cache.store(response.url(), response.getResponseHeader(), cacheArray);
                                sb.toIndexer(response);
                            } catch (IOException e) {
                                log.logWarning("cannot write " + response.url() + " to Cache (2): " + e.getMessage(), e);
                            }
                            conProp.put(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_CODE, "TCP_REFRESH_MISS");
View Full Code Here

                            response.setContent(cacheArray);
                            try {
                                Cache.store(response.url(), response.getResponseHeader(), cacheArray);
                                sb.toIndexer(response);
                            } catch (IOException e) {
                                log.logWarning("cannot write " + response.url() + " to Cache (2): " + e.getMessage(), e);
                            }
                            conProp.put(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_CODE, "TCP_REFRESH_MISS");
                        }
                    } else {
                        // no caching
View Full Code Here

        Log.logException(e1);
      }
      return null;
    }
    try {
      return Document.mergeDocuments(response.url(), response.getMimeType(), response.parse());
    } catch (final Failure e) {
      Log.logWarning(YMarkTables.BOOKMARKS_LOG, "loadDocument failed due to a parser failure for url: "+url);
      return null;
    }
  }
View Full Code Here

  public void loadDocument(final LoaderDispatcher loader) throws IOException, Failure {
    if(this.document == null) {
      Response response = null;
      response = loader.load(loader.request(this.uri, true, false), CacheStrategy.IFEXIST, Long.MAX_VALUE, true);
      this.document = Document.mergeDocuments(response.url(), response.getMimeType(), response.parse());
    }
  }

  public EnumMap<METADATA, String> getMetadata() {
    final EnumMap<METADATA, String> metadata = new EnumMap<METADATA, String>(METADATA.class);
View Full Code Here

        /* ===========================================================================
         * PARSE RESOURCE
         * =========================================================================== */
        Document document = null;
        try {
            document = Document.mergeDocuments(response.url(), response.getMimeType(), response.parse());
        } catch (final Parser.Failure e) {
            init(url.hash(), null, ResultClass.ERROR_PARSER_FAILED, e.getMessage()); // cannot be parsed
            return;
        }
        if (document == null) {
View Full Code Here

            final String storeError = response.shallStoreCacheForCrawler();
            if (storeError == null) {
                try {
                    Cache.store(url, response.getResponseHeader(), response.getContent());
                } catch (final IOException e) {
                    this.log.logWarning("cannot write " + response.url() + " to Cache (3): " + e.getMessage(), e);
                }
            } else {
                this.log.logWarning("cannot write " + response.url() + " to Cache (4): " + storeError);
            }
            return response;
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.