Package io.lumify.dbpedia.mapreduce.model

Source Code of io.lumify.dbpedia.mapreduce.model.LineData

package io.lumify.dbpedia.mapreduce.model;

import io.lumify.core.exception.LumifyException;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class LineData {
    private static final Pattern LINE_PATTERN = Pattern.compile("^<(.*?)> <(.*?)> (.*) \\.$");

    private final String pageUrl;
    private final String propertyIri;
    private final String valueRaw;
    private final Value value;
    private final String pageTitle;

    public LineData(String pageUrl, String pageTitle, String propertyIri, String valueRaw, Value value) {
        this.pageUrl = pageUrl;
        this.pageTitle = pageTitle;
        this.propertyIri = propertyIri;
        this.valueRaw = valueRaw;
        this.value = value;
    }

    public String getPageTitle() {
        return pageTitle;
    }

    public String getPropertyIri() {
        return propertyIri;
    }

    public Value getValue() {
        return value;
    }

    // <http://dbpedia.org/resource/Autism> <http://dbpedia.org/ontology/diseasesdb> "1142"@en .
    public static LineData parse(String line) {
        Matcher m = LINE_PATTERN.matcher(line);
        if (!m.matches()) {
            throw new LumifyException("Could not find match for line: " + line);
        }

        String pageUrl = m.group(1);
        String propertyIri = m.group(2);
        String valueRaw = m.group(3);
        Value value = Value.parse(valueRaw);
        String pageTitle = parsePageTitleFromPageUrl(pageUrl);
        return new LineData(pageUrl, pageTitle, propertyIri, valueRaw, value);
    }

    public static String parsePageTitleFromPageUrl(String pageUrl) {
        int lastSlash = pageUrl.lastIndexOf('/');
        if (lastSlash < 0) {
            throw new LumifyException("Could not parse page title from page url: " + pageUrl);
        }
        String pageTitle = pageUrl.substring(lastSlash + 1);
        pageTitle = pageTitle.replace('_', ' ');
        return pageTitle;
    }
}
TOP

Related Classes of io.lumify.dbpedia.mapreduce.model.LineData

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.