Package bixo.datum

Source Code of bixo.datum.FetchedDatum

/*
* Copyright 2009-2013 Scale Unlimited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package bixo.datum;

import java.io.Serializable;
import java.security.InvalidParameterException;

import com.scaleunlimited.cascading.Payload;
import com.scaleunlimited.cascading.PayloadDatum;


import cascading.tuple.Fields;
import cascading.tuple.Tuple;
import cascading.tuple.TupleEntry;

@SuppressWarnings("serial")
public class FetchedDatum extends PayloadDatum implements Serializable {
   
    public static final String URL_FN = fieldName(FetchedDatum.class, "url");
    public static final String NEW_BASE_URL_FN = fieldName(FetchedDatum.class, "newBaseUrl");
    public static final String FETCHED_URL_FN = fieldName(FetchedDatum.class, "fetchedUrl");
    public static final String FETCH_TIME_FN = fieldName(FetchedDatum.class, "fetchTime");
    public static final String CONTENT_FN = fieldName(FetchedDatum.class, "content");
    public static final String CONTENT_TYPE_FN = fieldName(FetchedDatum.class, "contentType");
    public static final String RESPONSE_RATE_FN = fieldName(FetchedDatum.class, "responseRate");
    public static final String NUM_REDIRECTS_FN = fieldName(FetchedDatum.class, "numRedirects");
    public static final String HOST_ADDRESS_FN = fieldName(FetchedDatum.class, "hostAddress");
    public static final String HTTP_HEADERS_FN = fieldName(FetchedDatum.class, "httpHeaders");

    public static final Fields FIELDS = new Fields(URL_FN, NEW_BASE_URL_FN,
                    FETCHED_URL_FN, FETCH_TIME_FN, CONTENT_FN, CONTENT_TYPE_FN,
                    RESPONSE_RATE_FN, NUM_REDIRECTS_FN, HOST_ADDRESS_FN,
                    HTTP_HEADERS_FN).append(getSuperFields(FetchedDatum.class));

    public FetchedDatum(Tuple tuple) {
        super(FIELDS, tuple);
    }
   
    public FetchedDatum(TupleEntry tupleEntry) {
        super(tupleEntry);
        validateFields(tupleEntry, FIELDS);
    }
   
    public FetchedDatum(String baseUrl, String fetchedUrl, long fetchTime, HttpHeaders headers,
                    ContentBytes content, String contentType, int responseRate) {
        super(FIELDS);

        setUrl(baseUrl);
        setFetchedUrl(fetchedUrl);
        setFetchTime(fetchTime);
        setContent(content);
        setContentType(contentType);
        setResponseRate(responseRate);
        setHeaders(headers);
       
        setNumRedirects(0);
        setNewBaseUrl(null);
    }

    /**
     * Create place-holder FetchedDatum from the data used to attempt the fetch.
     *
     * @param url
     *            - Base & redirected url
     * @param payload
     *            - User supplied payload
     */
    public FetchedDatum(String url, Payload payload) {
        this(url, url, 0, new HttpHeaders(), new ContentBytes(), "", 0);
        setPayload(payload);
    }

    /**
     * Create place-holder FetchedDatum from the data used to attempt the fetch.
     *
     * @param scoredDatum
     *            Valid datum with url/metadata needed to create FetchedDatum
     */
    public FetchedDatum(final ScoredUrlDatum scoredDatum) {
        // Note: Here we share the payload between the ScoredUrlDatum and the
        // FetchedDatum we're constructing, but we assume noone is modifying
        // this data within the subassembly.
        this(scoredDatum.getUrl(), scoredDatum.getPayload());
    }

    /**
     * Return the original base URL.
     *
     * @return original URL we tried to fetch
     */
    public String getUrl() {
       
        return _tupleEntry.getString(URL_FN);
    }

    public void setUrl(String baseUrl) {
        if (baseUrl == null) {
            throw new InvalidParameterException("baseUrl cannot be null");
        }

        _tupleEntry.setString(URL_FN, baseUrl);
    }
   
    public String getNewBaseUrl() {
        return _tupleEntry.getString(NEW_BASE_URL_FN);
    }

    public void setNewBaseUrl(String newBaseUrl) {
        _tupleEntry.setString(NEW_BASE_URL_FN, newBaseUrl);
    }

    public String getFetchedUrl() {
        return _tupleEntry.getString(FETCHED_URL_FN);
    }

    public void setFetchedUrl(String fetchedUrl) {
        if (fetchedUrl == null) {
            throw new InvalidParameterException("fetchedUrl cannot be null");
        }

        _tupleEntry.setString(FETCHED_URL_FN, fetchedUrl);
    }
   
    public long getFetchTime() {
        return _tupleEntry.getLong(FETCH_TIME_FN);
    }
   
    public void setFetchTime(long fetchTime) {
        _tupleEntry.setLong(FETCH_TIME_FN, fetchTime);
    }

    public byte[] getContentBytes() {
        return ((ContentBytes)_tupleEntry.getObject(CONTENT_FN)).getBytes();
    }
   
    public int getContentLength() {
        return ((ContentBytes)_tupleEntry.getObject(CONTENT_FN)).getLength();
    }
   
    public void setContent(ContentBytes content) {
        if (content == null) {
            throw new InvalidParameterException("content cannot be null");
        }

        _tupleEntry.setObject(CONTENT_FN, content);
    }
   
    public String getContentType() {
        return _tupleEntry.getString(CONTENT_TYPE_FN);
    }

    public void setContentType(String contentType) {
        if (contentType == null) {
            throw new InvalidParameterException("contentType cannot be null");
        }

        _tupleEntry.setString(CONTENT_TYPE_FN, contentType);
    }
   
    public int getResponseRate() {
        return _tupleEntry.getInteger(RESPONSE_RATE_FN);
    }

    public void setResponseRate(int responseRate) {
        _tupleEntry.setInteger(RESPONSE_RATE_FN, responseRate);
    }
   
    public int getNumRedirects() {
        return _tupleEntry.getInteger(NUM_REDIRECTS_FN);
    }

    public void setNumRedirects(int numRedirects) {
        _tupleEntry.setInteger(NUM_REDIRECTS_FN, numRedirects);
    }

    public String getHostAddress() {
        return _tupleEntry.getString(HOST_ADDRESS_FN);
    }

    public void setHostAddress(String hostAddress) {
        _tupleEntry.setString(HOST_ADDRESS_FN, hostAddress);
    }

    public HttpHeaders getHeaders() {
        return new HttpHeaders((Tuple)_tupleEntry.getObject(HTTP_HEADERS_FN));
    }

    public void setHeaders(HttpHeaders headers) {
        if (headers == null) {
            throw new InvalidParameterException("headers cannot be null");
        }

        _tupleEntry.setObject(HTTP_HEADERS_FN, headers.toTuple());
    }

    @Override
    public String toString() {
        StringBuilder result = new StringBuilder("[base URL] ");
        result.append(getUrl());
        if (getNewBaseUrl() != null) {
            result.append(" | [perm redir URL] ");
            result.append(getNewBaseUrl());
        }

        if (!getUrl().equals(getFetchedUrl())) {
            result.append(" | [final URL] ");
            result.append(getFetchedUrl());
        }

        HttpHeaders headers = getHeaders();
        for (String headerName : headers.getNames()) {
            result.append(" | [header] ");
            result.append(headerName);
            result.append(": ");
            result.append(headers.getFirst(headerName));
        }

        return result.toString();
    }

}
TOP

Related Classes of bixo.datum.FetchedDatum

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.