Package de.anomic.crawler.retrieval

Examples of de.anomic.crawler.retrieval.Response.url()


                    Cache.store(url, response.getResponseHeader(), response.getContent());
                } catch (final IOException e) {
                    this.log.logWarning("cannot write " + response.url() + " to Cache (3): " + e.getMessage(), e);
                }
            } else {
                this.log.logWarning("cannot write " + response.url() + " to Cache (4): " + storeError);
            }
            return response;
        }

        throw new IOException("Unsupported protocol '" + protocol + "' in url " + url);
View Full Code Here


            }
        } else if (viewMode.equals("parsed") || viewMode.equals("sentences"|| viewMode.equals("words") || viewMode.equals("links")) {
            // parsing the resource content
            Document document = null;
            try {
                document = Document.mergeDocuments(response.url(), response.getMimeType(), response.parse());
                if (document == null) {
                    prop.put("error", "5");
                    prop.put("error_errorText", "Unknown error");
                    prop.put("viewMode", VIEW_MODE_NO_TEXT);
                    return prop;
View Full Code Here

                            Integer.toString(client.getHttpResponse().getStatusLine().getStatusCode()),
                            sb.crawler.defaultProxyProfile
                    );
                    final String storeError = response.shallStoreCacheForProxy();
                    final boolean storeHTCache = response.profile().storeHTCache();
                    final String supportError = TextParser.supports(response.url(), response.getMimeType());
                    if (
                            /*
                             * Now we store the response into the htcache directory if
                             * a) the response is cacheable AND
                             */
 
View Full Code Here

                        if (sizeBeforeDelete == -1) {
                            // totally fresh file
                            response.setContent(cacheArray);
                            try {
                                Cache.store(response.url(), response.getResponseHeader(), cacheArray);
                                sb.toIndexer(response);
                            } catch (final IOException e) {
                                log.logWarning("cannot write " + response.url() + " to Cache (1): " + e.getMessage(), e);
                            }
                            conProp.put(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_CODE, "TCP_MISS");
View Full Code Here

                            response.setContent(cacheArray);
                            try {
                                Cache.store(response.url(), response.getResponseHeader(), cacheArray);
                                sb.toIndexer(response);
                            } catch (final IOException e) {
                                log.logWarning("cannot write " + response.url() + " to Cache (1): " + e.getMessage(), e);
                            }
                            conProp.put(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_CODE, "TCP_MISS");
                        } else if (cacheArray != null && sizeBeforeDelete == cacheArray.length) {
                            // before we came here we deleted a cache entry
                            cacheArray = null;
View Full Code Here

                            conProp.put(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_CODE, "TCP_REF_FAIL_HIT");
                        } else {
                            // before we came here we deleted a cache entry
                            response.setContent(cacheArray);
                            try {
                                Cache.store(response.url(), response.getResponseHeader(), cacheArray);
                                sb.toIndexer(response);
                            } catch (final IOException e) {
                                log.logWarning("cannot write " + response.url() + " to Cache (2): " + e.getMessage(), e);
                            }
                            conProp.put(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_CODE, "TCP_REFRESH_MISS");
View Full Code Here

                            response.setContent(cacheArray);
                            try {
                                Cache.store(response.url(), response.getResponseHeader(), cacheArray);
                                sb.toIndexer(response);
                            } catch (final IOException e) {
                                log.logWarning("cannot write " + response.url() + " to Cache (2): " + e.getMessage(), e);
                            }
                            conProp.put(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_CODE, "TCP_REFRESH_MISS");
                        }
                    } else {
                        // no caching
View Full Code Here

  public Document loadDocument(final LoaderDispatcher loader) throws IOException, Failure {
    if(this.document == null) {
      Response response = null;
      response = loader.load(loader.request(this.uri, true, false), CacheStrategy.IFEXIST, Integer.MAX_VALUE, true);
      this.document = Document.mergeDocuments(response.url(), response.getMimeType(), response.parse());
    }
    return this.document;
 

  public EnumMap<METADATA, String> getMetadata() {
View Full Code Here

            final String storeError = response.shallStoreCacheForCrawler();
            if (storeError == null) {
                try {
                    Cache.store(url, response.getResponseHeader(), response.getContent());
                } catch (final IOException e) {
                    this.log.logWarning("cannot write " + response.url() + " to Cache (3): " + e.getMessage(), e);
                }
            } else {
                this.log.logWarning("cannot write " + response.url() + " to Cache (4): " + storeError);
            }
            return response;
View Full Code Here

                    Cache.store(url, response.getResponseHeader(), response.getContent());
                } catch (final IOException e) {
                    this.log.logWarning("cannot write " + response.url() + " to Cache (3): " + e.getMessage(), e);
                }
            } else {
                this.log.logWarning("cannot write " + response.url() + " to Cache (4): " + storeError);
            }
            return response;
        }

        throw new IOException("Unsupported protocol '" + protocol + "' in url " + url);
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.