Package de.pdf_scrutinizer

Source Code of de.pdf_scrutinizer.Scrutinizer

/*
* PDF Scrutinizer, a library for detecting and analyzing malicious PDF documents.
* Copyright 2013  Florian Schmitt <florian@florianschmitt.de>, Fraunhofer FKIE
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

package de.pdf_scrutinizer;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.net.URL;
import java.util.Date;
import java.util.List;

import de.pdf_scrutinizer.data.AnalysisResult;
import de.pdf_scrutinizer.data.AnalysisResult.Classification;
import de.pdf_scrutinizer.document.DocumentAdapter;
import de.pdf_scrutinizer.dynamic_heuristics.DynamicHeuristics;
import de.pdf_scrutinizer.emulation.InterpreterEmulation;
import de.pdf_scrutinizer.emulation.SimpleInterpreterEmulation;
import de.pdf_scrutinizer.emulation.StandaloneInterpreterEmulation;
import de.pdf_scrutinizer.exposures.DocumentExposureScanTask;
import de.pdf_scrutinizer.static_heuristics.StaticAnalysis;
import de.pdf_scrutinizer.utils.*;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

public class Scrutinizer {
    public enum Intensity {
        lousy, intense
    }

    private static Log log = LogFactory.getLog(Scrutinizer.class);
    private Intensity intensity = Intensity.lousy;
    private String nestedFilename = "";
    private String hash;
    private String resultFolder = "result";
    private File rootPDF;
    private File jscode;
    // private boolean nestedPDF = false;
    private AnalysisResult analysisresult;
    private Output output;
    private Benchmark benchmark = new Benchmark();
    private DocumentAdapter documentAdapter;
    private InterpreterEmulation interpreterEmulation;
    private DynamicHeuristics dynamicHeuristics = new DynamicHeuristics(this);
    private StaticAnalysis staticAnalysis = new StaticAnalysis(this);

    public Scrutinizer() {
        System.setProperty("rhino.regexp.engine", "java.util.regex");
    }

    public AnalysisResult analyze() {
        if (rootPDF == null) {
            log.error("no document loaded");
            return AnalysisResult.error();
        }

        log.info("PDF Scrutinizer: Starting analysis");

        getBenchmark().scrutinizerStart();
        analysisresult = new AnalysisResult(this.rootPDF.getName(), hash);

        if (output == null) {
            output = new OutputToFiles(this, hash, resultFolder);
        }

        // load the pdf document using the DocumentAdapter.
        documentAdapter = new DocumentAdapter(this, this.rootPDF);

        if (documentAdapter == null || !documentAdapter.IsDocLoaded()) {
            log.error("no document loaded");
            return AnalysisResult.error();
        }

        // The document exposure scan task scans the document for
        // non-JavaScript-based attacks.
        // Since the document only need to be read by this task, it can run in a
        // separate thread.
        DocumentExposureScanTask exposureScanTask = new DocumentExposureScanTask(
                this, documentAdapter.getDocument());
        Thread documentExposureScanThread = new Thread(exposureScanTask);
        documentExposureScanThread.start();

        analysisresult.classification = Classification.benign;

        log.info("document information:" + documentAdapter.getDocInfo());

        // search for embedded files
        List<String[]> files = documentAdapter.getEmbeddedFiles();
        if (files.size() > 0) {
            for (String[] str : files) {
                if (str[0] != null && str[0].equals("application/pdf")) {
                    log.info("nested PDF found");
                    log.info("the embedded PDF will be extracted and analyzed afterwards");
                    // nestedPDF = true;
                    nestedFilename = this.rootPDF.getName() + ".nest";

                    BufferedWriter writer = null;
                    try {
                        writer = new BufferedWriter(new OutputStreamWriter(
                                new FileOutputStream(nestedFilename)));
                        writer.write(str[1]);
                    } catch (FileNotFoundException e) {
                        log.warn(e.getMessage(), e);
                    } catch (IOException e) {
                        log.warn(e.getMessage(), e);
                    } finally {
                        try {
                            if (writer != null)
                                writer.close();
                        } catch (IOException e) {
                            log.warn(e.getMessage(), e);
                        }
                    }
                }

                getOutput().saveEmbedFile(str);
            }
        }
        analysisresult.embeddedFiles = files;

        // try to find JavaScript code events and execute them, using the
        // interpreter emulation.
        List<String> codes = documentAdapter.getCodeEvents();
        if (codes.size() > 0) {
            analysisresult.codeFound = true;
            analysisresult.codes = codes;
            getOutput().saveExtractedcode(codes);
            getInterpreterEmulation().execute(codes);
        }

        // wait for document exposures scan to finish, since that thread can
        // affect the analysis result
        try {
            documentExposureScanThread.join();
        } catch (InterruptedException e) {
            log.error(
                    "error while waiting for document exposures thread to finish: "
                            + e.getMessage(), e);
        }

        // close the pdf document
        try {
            documentAdapter.getDocument().close();
        } catch (IOException e) {
            log.error("could not close document" + e.getMessage(), e);
        }

        analysisresult.analysisEnd = new Date();
        getBenchmark().scrutinizerStop();
        getOutput().saveAnalysisResult(analysisresult);

        return analysisresult;
        // FIXME: Problem: there can be multiple nested PDF's
        // if (nestedPDF) {
        // new PDF_Scrutinizer(nestedFilename, justDetect).start();
        // }
    }

    public void runJSCode() {
        if (rootPDF == null) {
            log.error("no document loaded");
            return;
        }
        analysisresult = new AnalysisResult(rootPDF.getName(), hash);
        setInterpreterEmulation(new SimpleInterpreterEmulation(this));
        getInterpreterEmulation().execute(Input.readFile(rootPDF));
    }

    public void runJSCodeAndPDF() {
        if (rootPDF == null) {
            log.error("no document loaded");
            return;
        }

        // load the pdf document using the DocumentAdapter.
        documentAdapter = new DocumentAdapter(this, this.rootPDF);

        if (documentAdapter == null || !documentAdapter.IsDocLoaded()) {
            log.error("no document loaded");
            return;
        }

        analysisresult = new AnalysisResult(rootPDF.getName(), hash);
        setInterpreterEmulation(new SimpleInterpreterEmulation(this));
        getInterpreterEmulation().execute(Input.readFile(jscode));
        analysisresult.analysisEnd = new Date();
    }

    public String setRootDocument(File file) throws FileNotFoundException {
        if (!file.exists()) {
            throw new FileNotFoundException(String.format(
                    "document '%s' does not exist", file.getName()));
        }
        this.rootPDF = file;
        this.hash = Hashes.getHashOfFile(file);
        log.info(String.format("loaded '%s' [md5=%s] ", rootPDF.getName(), hash));
        return this.hash;
    }

    public void setJSCode(File file) throws FileNotFoundException {
        if (!file.exists()) {
            throw new FileNotFoundException(String.format(
                    "js code '%s' does not exist", file.getName()));
        }
        this.jscode = file;
        log.info(String.format("loaded js code '%s' ", jscode.getName()));
    }

    public void setURL(URL url) {
        File downloadedfile = Input.downloadFile(url);
        log.info(String.format("downloaded %s from %s",
                downloadedfile.getName(), url));
        try {
            setRootDocument(downloadedfile);
        } catch (FileNotFoundException e) {
            log.error(e.getMessage());
        }
    }

    public Intensity getIntensity() {
        return intensity;
    }

    public void setIntensity(Intensity intensity) {
        this.intensity = intensity;
    }

    private InterpreterEmulation getInterpreterEmulation() {
        return (interpreterEmulation == null) ? new StandaloneInterpreterEmulation(
                this) : interpreterEmulation;
    }

    public void setInterpreterEmulation(
            InterpreterEmulation interpreterEmulation) {
        this.interpreterEmulation = interpreterEmulation;
    }

    public AnalysisResult getAnalysisResult() {
        return (analysisresult == null) ? AnalysisResult.error()
                : analysisresult;
    }

    public Output getOutput() {
        return (output == null) ? new OutputNull() : output;
    }

    public void setOutput(Output output) {
        this.output = output;
    }

    public Benchmark getBenchmark() {
        return benchmark;
    }

    public DocumentAdapter getDocumentAdapter() {
        return documentAdapter;
    }

    public DynamicHeuristics getDynamicHeuristics() {
        return dynamicHeuristics;
    }

    public StaticAnalysis getStaticAnalysis() {
        return staticAnalysis;
    }

    public String getHash() {
        return hash;
    }
}
TOP

Related Classes of de.pdf_scrutinizer.Scrutinizer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.