Package io.lumify.translate

Source Code of io.lumify.translate.TranslateGraphPropertyWorker

package io.lumify.translate;

import com.cybozu.labs.langdetect.Detector;
import com.cybozu.labs.langdetect.DetectorFactory;
import com.cybozu.labs.langdetect.LangDetectException;
import com.google.common.io.Files;
import com.google.inject.Inject;
import io.lumify.core.exception.LumifyException;
import io.lumify.core.ingest.graphProperty.GraphPropertyWorkData;
import io.lumify.core.ingest.graphProperty.GraphPropertyWorker;
import io.lumify.core.ingest.graphProperty.GraphPropertyWorkerPrepareData;
import io.lumify.core.model.properties.LumifyProperties;
import io.lumify.core.util.LumifyLogger;
import io.lumify.core.util.LumifyLoggerFactory;
import org.apache.commons.io.IOUtils;
import org.json.JSONObject;
import org.securegraph.Element;
import org.securegraph.Property;
import org.securegraph.mutation.ExistingElementMutation;
import org.securegraph.property.StreamingPropertyValue;

import java.io.*;
import java.util.Map;

public class TranslateGraphPropertyWorker extends GraphPropertyWorker {
    private static final LumifyLogger LOGGER = LumifyLoggerFactory.getLogger(TranslateGraphPropertyWorker.class);
    private Translator translator;

    @Override
    public void prepare(GraphPropertyWorkerPrepareData workerPrepareData) throws Exception {
        super.prepare(workerPrepareData);

        File profileDirectory = createTempProfileDirectory();
        DetectorFactory.loadProfile(profileDirectory);
    }

    @Override
    public void execute(InputStream in, GraphPropertyWorkData data) throws Exception {
        String text = IOUtils.toString(in, "UTF-8");
        if (text.length() < 50) {
            LOGGER.debug("Skipping language detection because the text is too short. (length: %d)", text.length());
            return;
        }

        String language;
        try {
            language = detectLanguage(text);
            if (language == null) {
                return;
            }
        } catch (Throwable ex) {
            LOGGER.warn("Could not detect language", ex);
            return;
        }

        ExistingElementMutation m = data.getElement().prepareMutation()
                .alterPropertyMetadata(data.getProperty(), LumifyProperties.META_DATA_LANGUAGE, language);

        boolean translated = false;
        String translatedTextPropertyKey = data.getProperty().getKey() + "#en";
        if (!language.equals("en") && !hasTranslatedProperty(data, translatedTextPropertyKey)) {
            LOGGER.debug("translating text of property: %s", data.getProperty().toString());
            String translatedText = translator.translate(text, language, data);
            if (translatedText != null && translatedText.length() > 0) {
                Object translatedTextValue;
                if (data.getProperty().getValue() instanceof StreamingPropertyValue) {
                    translatedTextValue = new StreamingPropertyValue(new ByteArrayInputStream(translatedText.getBytes()), String.class);
                } else {
                    translatedTextValue = translatedText;
                }
                Map<String, Object> metadata = data.createPropertyMetadata();
                metadata.put(LumifyProperties.META_DATA_LANGUAGE, "en");
                String description = (String) data.getProperty().getMetadata().get(LumifyProperties.META_DATA_TEXT_DESCRIPTION);
                if (description == null || description.length() == 0) {
                    description = "Text";
                }
                metadata.put(LumifyProperties.META_DATA_TEXT_DESCRIPTION, description + " (en)");
                metadata.put(LumifyProperties.META_DATA_MIME_TYPE, "text/plain");
                m.addPropertyValue(translatedTextPropertyKey, data.getProperty().getName(), translatedTextValue, metadata, data.getProperty().getVisibility());
                translated = true;
            }
        }

        m.save(getAuthorizations());

        if (translated) {
            getGraph().flush();
            getWorkQueueRepository().pushGraphPropertyQueue(data.getElement(), translatedTextPropertyKey, data.getProperty().getName(), data.getWorkspaceId(), data.getVisibilitySource());
        }
    }

    public boolean hasTranslatedProperty(GraphPropertyWorkData data, String translatedTextPropertyKey) {
        return data.getElement().getProperty(translatedTextPropertyKey, data.getProperty().getName()) != null;
    }

    private String detectLanguage(String text) throws LangDetectException, IOException {
        Detector detector = DetectorFactory.create();
        detector.append(text);
        String lang = detector.detect();
        if (lang.length() == 0) {
            return null;
        }
        return lang;
    }

    @Override
    public boolean isHandled(Element element, Property property) {
        return isTextProperty(property);
    }

    public File createTempProfileDirectory() throws IOException {
        File tempDirectory = Files.createTempDir();
        tempDirectory.deleteOnExit();
        String[] filesList = getProfileFilesList();
        for (String profileFileName : filesList) {
            LOGGER.info("Copying langdetect profile file: %s", profileFileName);
            try {
                copyProfileFile(profileFileName, tempDirectory);
            } catch (Exception ex) {
                throw new LumifyException("Could not load profile file '" + profileFileName + "' to '" + tempDirectory + "'");
            }
        }
        LOGGER.info("created profile directory: %s", tempDirectory);
        return tempDirectory;
    }

    public void copyProfileFile(String profileFileName, File tempDirectory) throws IOException {
        File profileFile = new File(tempDirectory, profileFileName);
        String profileFileString = getFileAsString(profileFileName);
        new JSONObject(profileFileString).length(); // validate the json
        OutputStream profileFileOut = new FileOutputStream(profileFile);
        try {
            profileFileOut.write(profileFileString.getBytes("UTF-8"));
        } finally {
            profileFileOut.close();
        }
        profileFile.deleteOnExit();
    }

    public String getFileAsString(String profileFileName) throws IOException {
        String profileFileString;
        InputStream profileFileIn = TranslateGraphPropertyWorker.class.getResourceAsStream(profileFileName);
        try {
            profileFileString = IOUtils.toString(profileFileIn, "UTF-8");
        } finally {
            profileFileIn.close();
        }
        return profileFileString;
    }

    public String[] getProfileFilesList() throws IOException {
        String filesListContents = IOUtils.toString(TranslateGraphPropertyWorker.class.getResourceAsStream("files.list"), "UTF-8");
        return filesListContents.split("\n");
    }

    @Inject
    public void setTranslator(Translator translator) {
        this.translator = translator;
    }
}
TOP

Related Classes of io.lumify.translate.TranslateGraphPropertyWorker

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.