Package io.lumify.gdelt

Source Code of io.lumify.gdelt.GDELTMapper

package io.lumify.gdelt;

import com.altamiracorp.bigtable.model.accumulo.AccumuloSession;
import io.lumify.core.config.Configuration;
import io.lumify.core.config.HashMapConfigurationLoader;
import io.lumify.core.model.audit.Audit;
import io.lumify.core.model.audit.AuditAction;
import io.lumify.core.model.properties.types.LumifyProperty;
import io.lumify.core.model.user.UserRepository;
import io.lumify.core.user.SystemUser;
import io.lumify.core.util.LumifyLogger;
import io.lumify.core.util.LumifyLoggerFactory;
import io.lumify.core.version.VersionService;
import io.lumify.securegraph.model.audit.SecureGraphAuditRepository;
import org.apache.accumulo.core.data.Mutation;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.securegraph.*;
import org.securegraph.accumulo.AccumuloAuthorizations;
import org.securegraph.accumulo.AccumuloGraph;
import org.securegraph.accumulo.mapreduce.ElementMapper;
import org.securegraph.accumulo.mapreduce.SecureGraphMRUtils;
import org.securegraph.id.IdGenerator;
import org.securegraph.type.GeoPoint;
import org.securegraph.util.MapUtils;

import java.io.IOException;
import java.text.ParseException;
import java.util.Map;

public class GDELTMapper extends ElementMapper<LongWritable, Text, Text, Mutation> {
    private static final LumifyLogger LOGGER = LumifyLoggerFactory.getLogger(GDELTMapper.class);

    private GDELTParser parser;
    private AccumuloGraph graph;
    private Visibility visibility;
    private AccumuloAuthorizations authorizations;
    private SystemUser user;
    private SecureGraphAuditRepository auditRepository;
    private UserRepository userRepository;

    @Override
    protected void setup(Context context) throws IOException, InterruptedException {
        super.setup(context);
        Map configurationMap = SecureGraphMRUtils.toMap(context.getConfiguration());
        this.parser = new GDELTParser();
        this.graph = (AccumuloGraph) new GraphFactory().createGraph(MapUtils.getAllWithPrefix(configurationMap, "graph"));
        this.visibility = new Visibility("");
        this.authorizations = new AccumuloAuthorizations();
        this.user = new SystemUser(null);
        VersionService versionService = new VersionService();
        Configuration configuration = new HashMapConfigurationLoader(configurationMap).createConfiguration();
        this.auditRepository = new SecureGraphAuditRepository(null, versionService, configuration, null, userRepository);
    }

    @Override
    protected void map(LongWritable filePosition, Text line, Context context) {
        try {
            safeMap(filePosition, line, context);
        } catch (Exception ex) {
            context.getCounter(GDELTImportCounters.ERRORS).increment(1);
            LOGGER.error("failed mapping " + filePosition, ex);
        }
    }

    private void safeMap(LongWritable filePosition, Text line, Context context) throws IOException, InterruptedException, ParseException {
        GDELTEvent event = parser.parseLine(line.toString());
        Vertex eventVertex = importEvent(context, event);
        importActor1(context, event, eventVertex);
        importActor2(context, event, eventVertex);
    }

    private Vertex importEvent(Context context, GDELTEvent event) throws IOException, InterruptedException {
        context.getCounter(GDELTImportCounters.EVENTS_ATTEMPTED).increment(1);

        // event vertex
        VertexBuilder eventVertexBuilder = prepareVertex(generateEventId(event), visibility);
        GDELTProperties.CONCEPT_TYPE.setProperty(eventVertexBuilder, GDELTConstants.EVENT_CONCEPT_URI, visibility);
        GDELTProperties.GLOBAL_EVENT_ID.setProperty(eventVertexBuilder, event.getGlobalEventId(), visibility);
        GDELTProperties.EVENT_DATE_OF_OCCURRENCE.setProperty(eventVertexBuilder, event.getDateOfOccurrence(), visibility);
        GDELTProperties.EVENT_CODE.setProperty(eventVertexBuilder, event.getEventCode(), visibility);
        setOptionalProperty(GDELTProperties.EVENT_IS_ROOT_EVENT, eventVertexBuilder, event.isRootEvent());
        setOptionalProperty(GDELTProperties.EVENT_BASE_CODE, eventVertexBuilder, event.getEventBaseCode());
        setOptionalProperty(GDELTProperties.EVENT_ROOT_CODE, eventVertexBuilder, event.getEventRootCode());
        setOptionalProperty(GDELTProperties.EVENT_QUAD_CLASS, eventVertexBuilder, event.getQuadClass());
        setOptionalProperty(GDELTProperties.EVENT_GOLDSTEIN_SCALE, eventVertexBuilder, event.getGoldsteinScale());
        setOptionalProperty(GDELTProperties.EVENT_NUM_MENTIONS, eventVertexBuilder, event.getNumMentions());
        setOptionalProperty(GDELTProperties.EVENT_NUM_SOURCES, eventVertexBuilder, event.getNumSources());
        setOptionalProperty(GDELTProperties.EVENT_NUM_ARTICLES, eventVertexBuilder, event.getNumArticles());
        setOptionalProperty(GDELTProperties.EVENT_AVG_TONE, eventVertexBuilder, event.getAverageTone());
        setOptionalProperty(GDELTProperties.EVENT_DATE_ADDED, eventVertexBuilder, event.getDateAdded());
        setOptionalProperty(GDELTProperties.EVENT_SOURCE_URL, eventVertexBuilder, event.getSourceUrl());

        setGeolocationProperty(event, eventVertexBuilder);
        Vertex eventVertex = eventVertexBuilder.save(authorizations);

        // audit event
        Text eventKey = new Text(Audit.TABLE_NAME);
        Audit auditEvent = auditRepository.createAudit(AuditAction.CREATE, eventVertex.getId(), "GDELT MR", "", user, visibility);
        context.write(eventKey, AccumuloSession.createMutationFromRow(auditEvent));

        context.getCounter(GDELTImportCounters.EVENTS_PROCESSED).increment(1);
        return eventVertex;
    }

    private void setGeolocationProperty(GDELTEvent event, VertexBuilder eventVertexBuilder) {
        if (event.getActionGeoFullName() != null) {
            GeoPoint point = new GeoPoint(event.getActionGeoLatitude(), event.getActionGeoLongitude(), event.getActionGeoFullName());
            GDELTProperties.EVENT_GEOLOCATION.setProperty(eventVertexBuilder, point, visibility);
        }
    }

    private void importActor1(Context context, GDELTEvent event, Vertex eventVertex) throws IOException, InterruptedException {
        GDELTActor actor = event.getActor1();

        if (actor.getCode() == null) {
            context.getCounter(GDELTImportCounters.ACTORS_SKIPPED).increment(1);
            return;
        }

        // actor 1 vertex
        Vertex actorVertex = importActor(actor, context);

        // audit actor 1
        Text key = new Text(Audit.TABLE_NAME);
        Audit auditActor = auditRepository.createAudit(AuditAction.CREATE, actorVertex.getId(), "GDELT MR", "", user, visibility);
        context.write(key, AccumuloSession.createMutationFromRow(auditActor));

        context.getCounter(GDELTImportCounters.EDGES_ATTEMPTED).increment(1);

        // actor 1 to event edge
        EdgeBuilder actor1EdgeBuilder = prepareEdge(generateActor1ToEventEdgeId(actorVertex, eventVertex), actorVertex, eventVertex, GDELTProperties.ACTOR1_TO_EVENT_EDGE, visibility);
        Edge actor1ToEventEdge = actor1EdgeBuilder.save(authorizations);

        // audit actor 1 to event edge
        Text actor1ToEventEdgeKey = new Text(Audit.TABLE_NAME);
        Audit auditActor1ToEventEdge = auditRepository.createAudit(AuditAction.CREATE, actor1ToEventEdge.getId(), "GDELT MR", "", user, visibility);
        context.write(actor1ToEventEdgeKey, AccumuloSession.createMutationFromRow(auditActor1ToEventEdge));

        context.getCounter(GDELTImportCounters.EDGES_PROCESSED).increment(1);
    }

    private void importActor2(Context context, GDELTEvent event, Vertex eventVertex) throws IOException, InterruptedException {
        GDELTActor actor = event.getActor2();

        if (actor.getCode() == null) {
            context.getCounter(GDELTImportCounters.ACTORS_SKIPPED).increment(1);
            return;
        }

        // actor 2
        Vertex actorVertex = importActor(actor, context);

        // audit actor 2
        Text key = new Text(Audit.TABLE_NAME);
        Audit auditActor = auditRepository.createAudit(AuditAction.CREATE, actorVertex.getId(), "GDELT MR", "", user, visibility);
        context.write(key, AccumuloSession.createMutationFromRow(auditActor));

        context.getCounter(GDELTImportCounters.EDGES_ATTEMPTED).increment(1);

        // event to actor 2 edge
        EdgeBuilder eventToActor2EdgeBuilder = prepareEdge(generateEventToActor2EdgeId(actorVertex, eventVertex), eventVertex, actorVertex, GDELTProperties.EVENT_TO_ACTOR2_EDGE, visibility);
        Edge eventToActor2Edge = eventToActor2EdgeBuilder.save(authorizations);

        // audit event to actor 2 edge
        Text eventToActor2EdgeKey = new Text(Audit.TABLE_NAME);
        Audit auditEventToActor2Edge = auditRepository.createAudit(AuditAction.CREATE, eventToActor2Edge.getId(), "GDELT MR", "", user, visibility);
        context.write(eventToActor2EdgeKey, AccumuloSession.createMutationFromRow(auditEventToActor2Edge));

        context.getCounter(GDELTImportCounters.EDGES_PROCESSED).increment(1);
    }

    private Vertex importActor(GDELTActor actor, Context context) {
        context.getCounter(GDELTImportCounters.ACTORS_ATTEMPTED).increment(1);

        VertexBuilder vertexBuilder = prepareVertex(generateActorId(actor), visibility);
        GDELTProperties.CONCEPT_TYPE.setProperty(vertexBuilder, GDELTConstants.ACTOR_CONCEPT_URI, visibility);
        GDELTProperties.ACTOR_CODE.setProperty(vertexBuilder, actor.getCode(), visibility);
        setOptionalProperty(GDELTProperties.ACTOR_NAME, vertexBuilder, actor.getName());
        setOptionalProperty(GDELTProperties.ACTOR_COUNTRY_CODE, vertexBuilder, actor.getCountryCode());
        setOptionalProperty(GDELTProperties.ACTOR_KNOWN_GROUP_CODE, vertexBuilder, actor.getKnownGroupCode());
        setOptionalProperty(GDELTProperties.ACTOR_ETHNIC_CODE, vertexBuilder, actor.getEthnicCode());
        addOptionPropertyValue(GDELTProperties.ACTOR_RELIGION_CODE, vertexBuilder, "1", actor.getReligion1Code());
        addOptionPropertyValue(GDELTProperties.ACTOR_RELIGION_CODE, vertexBuilder, "2", actor.getReligion2Code());
        addOptionPropertyValue(GDELTProperties.ACTOR_TYPE_CODE, vertexBuilder, "1", actor.getType1Code());
        addOptionPropertyValue(GDELTProperties.ACTOR_TYPE_CODE, vertexBuilder, "2", actor.getType2Code());
        addOptionPropertyValue(GDELTProperties.ACTOR_TYPE_CODE, vertexBuilder, "3", actor.getType3Code());

        context.getCounter(GDELTImportCounters.ACTORS_PROCESSED).increment(1);

        return vertexBuilder.save(authorizations);
    }

    private void setOptionalProperty(LumifyProperty property, VertexBuilder vertexBuilder, Object propertyValue) {
        if (propertyValue != null) {
            property.setProperty(vertexBuilder, propertyValue, visibility);
        }
    }

    private void addOptionPropertyValue(LumifyProperty property, VertexBuilder vertexBuilder, String propertyKey, Object propertyValue) {
        if (propertyValue != null) {
            property.addPropertyValue(vertexBuilder, propertyKey, propertyValue, visibility);
        }
    }

    private String generateEventId(GDELTEvent event) {
        return "GDELTEvent_" + event.getGlobalEventId();
    }

    private String generateActorId(GDELTActor actor) {
        return "GDELTActor_" + actor.getId();
    }


    private String generateActor1ToEventEdgeId(Vertex actor1Vertex, Vertex eventVertex) {
        return "GDELTActor1ToEvent_" + actor1Vertex.getId() + eventVertex.getId();
    }

    private String generateEventToActor2EdgeId(Vertex actor2Vertex, Vertex eventVertex) {
        return "GDELTEventToActor2_" + eventVertex.getId() + actor2Vertex.getId();
    }

    @Override
    protected void saveDataMutation(Context context, Text tableName, Mutation mutation) throws IOException, InterruptedException {
        context.write(tableName, mutation);
    }

    @Override
    protected void saveEdgeMutation(Context context, Text tableName, Mutation mutation) throws IOException, InterruptedException {
        context.write(tableName, mutation);
    }

    @Override
    protected void saveVertexMutation(Context context, Text tableName, Mutation mutation) throws IOException, InterruptedException {
        context.write(tableName, mutation);
    }

    @Override
    public IdGenerator getIdGenerator() {
        return this.graph.getIdGenerator();
    }
}
TOP

Related Classes of io.lumify.gdelt.GDELTMapper

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.