Package bixo.datum

Source Code of bixo.datum.ParsedDatum

/*
* Copyright 2009-2013 Scale Unlimited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package bixo.datum;

import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;

import com.scaleunlimited.cascading.PayloadDatum;

import cascading.tuple.Fields;
import cascading.tuple.Tuple;
import cascading.tuple.TupleEntry;


@SuppressWarnings("serial")
public class ParsedDatum extends PayloadDatum {
   
    public static final String URL_FN = fieldName(ParsedDatum.class, "url");
    public static final String HOST_ADDRESS_FN = fieldName(ParsedDatum.class, "hostAddress");
    public static final String PARSED_TEXT_FN = fieldName(ParsedDatum.class, "parsedText");
    public static final String LANGUAGE_FN = fieldName(ParsedDatum.class, "language");
    public static final String TITLE_FN = fieldName(ParsedDatum.class, "title");
    public static final String OUTLINKS_FN = fieldName(ParsedDatum.class, "outLinks");
    public static final String PARSED_META_FN = fieldName(ParsedDatum.class, "parsedMeta");

    public static final Fields FIELDS = new Fields(URL_FN, HOST_ADDRESS_FN, PARSED_TEXT_FN, LANGUAGE_FN,
                    TITLE_FN, OUTLINKS_FN, PARSED_META_FN).append(getSuperFields(ParsedDatum.class));

    /**
     * No argument constructor for use with FutureTask
     */
    public ParsedDatum() {
        super(FIELDS);
    }
   
    public ParsedDatum(TupleEntry tupleEntry) {
        super(tupleEntry);
        validateFields(tupleEntry, FIELDS);
    }
   
    public ParsedDatum(String url, String hostAddress, String parsedText, String language, String title, Outlink[] outlinks, Map<String, String> parsedMeta) {
        super(FIELDS);
       
        setUrl(url);
        setHostAddress(hostAddress);
        setParsedText(parsedText);
        setLanguage(language);
        setTitle(title);
        setOutlinks(outlinks);
        setParsedMeta(parsedMeta);
    }

    public String getUrl() {
        return _tupleEntry.getString(URL_FN);
    }

    public void setUrl(String url) {
        _tupleEntry.setString(URL_FN, url);
    }

    public String getHostAddress() {
        return _tupleEntry.getString(HOST_ADDRESS_FN);
    }

    public void setHostAddress(String hostAddress) {
        _tupleEntry.setString(HOST_ADDRESS_FN, hostAddress);
    }

    public String getParsedText() {
        return _tupleEntry.getString(PARSED_TEXT_FN);
    }

    public void setParsedText(String parsedText) {
        _tupleEntry.setString(PARSED_TEXT_FN, parsedText);
    }

    public String getLanguage() {
        return _tupleEntry.getString(LANGUAGE_FN);
    }

    public void setLanguage(String language) {
        _tupleEntry.setString(LANGUAGE_FN, language);
    }

    public String getTitle() {
        return _tupleEntry.getString(TITLE_FN);
    }

    public void setTitle(String title) {
        _tupleEntry.setString(TITLE_FN, title);
    }

    public Outlink[] getOutlinks() {
        return convertTupleToOutlinks((Tuple)_tupleEntry.getObject(OUTLINKS_FN));
    }

    public void setOutlinks(Outlink[] outlinks) {
        _tupleEntry.setObject(OUTLINKS_FN, convertOutlinksToTuple(outlinks));
    }

    public Map<String, String> getParsedMeta() {
        return convertTupleToMap((Tuple)_tupleEntry.getObject(PARSED_META_FN));
    }

    public void setParsedMeta(Map<String, String> parsedMeta) {
        _tupleEntry.setObject(PARSED_META_FN, convertMapToTuple(parsedMeta));
    }

    private Tuple convertOutlinksToTuple(Outlink[] outLinks) {
        Tuple tuple = new Tuple();
        for (Outlink outlink : outLinks) {
            tuple.add(outlink.getToUrl());
            tuple.add(outlink.getAnchor());
            tuple.add(outlink.getRelAttributes());
        }
       
        return tuple;
    }

    private Outlink[] convertTupleToOutlinks(Tuple tuple) {
        int numOutlinks = tuple.size() / 3;
        Outlink[] result = new Outlink[numOutlinks];
       
        for (int i = 0; i < numOutlinks; i++) {
            int tupleOffset = i * 3;
            result[i] = new Outlink(tuple.getString(tupleOffset), tuple.getString(tupleOffset + 1), tuple.getString(tupleOffset + 2));
        }
       
        return result;
    }
   
    private Tuple convertMapToTuple(Map<String, String> map) {
        Tuple result = new Tuple();
        if (map != null) {
            for (Entry<String, String> entry : map.entrySet()) {
                result.add(entry.getKey());
                result.add(entry.getValue());
            }
        }
       
        return result;
    }
   
    private Map<String, String> convertTupleToMap(Tuple tuple) {
        Map<String, String> result = new HashMap<String, String>();
        Iterator<Object> iter = tuple.iterator();
        while (iter.hasNext()) {
            String key = (String)iter.next();
            String value = (String)iter.next();
            result.put(key, value);
        }
       
        return result;
    }

    public static Fields getParsedTextField() {
        return new Fields(ParsedDatum.PARSED_TEXT_FN);
    }

}
TOP

Related Classes of bixo.datum.ParsedDatum

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.