Source Code of de.pdf_scrutinizer.Scrutinizer

/*
 * PDF Scrutinizer, a library for detecting and analyzing malicious PDF documents.
 * Copyright 2013  Florian Schmitt <florian@florianschmitt.de>, Fraunhofer FKIE
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */


package de.pdf_scrutinizer;


import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.net.URL;
import java.util.Date;
import java.util.List;


import de.pdf_scrutinizer.data.AnalysisResult;
import de.pdf_scrutinizer.data.AnalysisResult.Classification;
import de.pdf_scrutinizer.document.DocumentAdapter;
import de.pdf_scrutinizer.dynamic_heuristics.DynamicHeuristics;
import de.pdf_scrutinizer.emulation.InterpreterEmulation;
import de.pdf_scrutinizer.emulation.SimpleInterpreterEmulation;
import de.pdf_scrutinizer.emulation.StandaloneInterpreterEmulation;
import de.pdf_scrutinizer.exposures.DocumentExposureScanTask;
import de.pdf_scrutinizer.static_heuristics.StaticAnalysis;
import de.pdf_scrutinizer.utils.*;


import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;


public class Scrutinizer {
    public enum Intensity {
        lousy, intense
    }


    private static Log log = LogFactory.getLog(Scrutinizer.class);
    private Intensity intensity = Intensity.lousy;
    private String nestedFilename = "";
    private String hash;
    private String resultFolder = "result";
    private File rootPDF;
    private File jscode;
    // private boolean nestedPDF = false;
    private AnalysisResult analysisresult;
    private Output output;
    private Benchmark benchmark = new Benchmark();
    private DocumentAdapter documentAdapter;
    private InterpreterEmulation interpreterEmulation;
    private DynamicHeuristics dynamicHeuristics = new DynamicHeuristics(this);
    private StaticAnalysis staticAnalysis = new StaticAnalysis(this);


    public Scrutinizer() {
        System.setProperty("rhino.regexp.engine", "java.util.regex");
    }


    public AnalysisResult analyze() {
        if (rootPDF == null) {
            log.error("no document loaded");
            return AnalysisResult.error();
        }


        log.info("PDF Scrutinizer: Starting analysis");


        getBenchmark().scrutinizerStart();
        analysisresult = new AnalysisResult(this.rootPDF.getName(), hash);


        if (output == null) {
            output = new OutputToFiles(this, hash, resultFolder);
        }


        // load the pdf document using the DocumentAdapter.
        documentAdapter = new DocumentAdapter(this, this.rootPDF);


        if (documentAdapter == null || !documentAdapter.IsDocLoaded()) {
            log.error("no document loaded");
            return AnalysisResult.error();
        }


        // The document exposure scan task scans the document for
        // non-JavaScript-based attacks.
        // Since the document only need to be read by this task, it can run in a
        // separate thread.
        DocumentExposureScanTask exposureScanTask = new DocumentExposureScanTask(
                this, documentAdapter.getDocument());
        Thread documentExposureScanThread = new Thread(exposureScanTask);
        documentExposureScanThread.start();


        analysisresult.classification = Classification.benign;


        log.info("document information:" + documentAdapter.getDocInfo());


        // search for embedded files
        List<String[]> files = documentAdapter.getEmbeddedFiles();
        if (files.size() > 0) {
            for (String[] str : files) {
                if (str[0] != null && str[0].equals("application/pdf")) {
                    log.info("nested PDF found");
                    log.info("the embedded PDF will be extracted and analyzed afterwards");
                    // nestedPDF = true;
                    nestedFilename = this.rootPDF.getName() + ".nest";


                    BufferedWriter writer = null;
                    try {
                        writer = new BufferedWriter(new OutputStreamWriter(
                                new FileOutputStream(nestedFilename)));
                        writer.write(str[1]);
                    } catch (FileNotFoundException e) {
                        log.warn(e.getMessage(), e);
                    } catch (IOException e) {
                        log.warn(e.getMessage(), e);
                    } finally {
                        try {
                            if (writer != null)
                                writer.close();
                        } catch (IOException e) {
                            log.warn(e.getMessage(), e);
                        }
                    }
                }


                getOutput().saveEmbedFile(str);
            }
        }
        analysisresult.embeddedFiles = files;


        // try to find JavaScript code events and execute them, using the
        // interpreter emulation.
        List<String> codes = documentAdapter.getCodeEvents();
        if (codes.size() > 0) {
            analysisresult.codeFound = true;
            analysisresult.codes = codes;
            getOutput().saveExtractedcode(codes);
            getInterpreterEmulation().execute(codes);
        }


        // wait for document exposures scan to finish, since that thread can
        // affect the analysis result
        try {
            documentExposureScanThread.join();
        } catch (InterruptedException e) {
            log.error(
                    "error while waiting for document exposures thread to finish: "
                            + e.getMessage(), e);
        }


        // close the pdf document
        try {
            documentAdapter.getDocument().close();
        } catch (IOException e) {
            log.error("could not close document" + e.getMessage(), e);
        }


        analysisresult.analysisEnd = new Date();
        getBenchmark().scrutinizerStop();
        getOutput().saveAnalysisResult(analysisresult);


        return analysisresult;
        // FIXME: Problem: there can be multiple nested PDF's
        // if (nestedPDF) {
        // new PDF_Scrutinizer(nestedFilename, justDetect).start();
        // }
    }


    public void runJSCode() {
        if (rootPDF == null) {
            log.error("no document loaded");
            return;
        }
        analysisresult = new AnalysisResult(rootPDF.getName(), hash);
        setInterpreterEmulation(new SimpleInterpreterEmulation(this));
        getInterpreterEmulation().execute(Input.readFile(rootPDF));
    }


    public void runJSCodeAndPDF() {
        if (rootPDF == null) {
            log.error("no document loaded");
            return;
        }


        // load the pdf document using the DocumentAdapter.
        documentAdapter = new DocumentAdapter(this, this.rootPDF);


        if (documentAdapter == null || !documentAdapter.IsDocLoaded()) {
            log.error("no document loaded");
            return;
        }


        analysisresult = new AnalysisResult(rootPDF.getName(), hash);
        setInterpreterEmulation(new SimpleInterpreterEmulation(this));
        getInterpreterEmulation().execute(Input.readFile(jscode));
        analysisresult.analysisEnd = new Date();
    }


    public String setRootDocument(File file) throws FileNotFoundException {
        if (!file.exists()) {
            throw new FileNotFoundException(String.format(
                    "document '%s' does not exist", file.getName()));
        }
        this.rootPDF = file;
        this.hash = Hashes.getHashOfFile(file);
        log.info(String.format("loaded '%s' [md5=%s] ", rootPDF.getName(), hash));
        return this.hash;
    }


    public void setJSCode(File file) throws FileNotFoundException {
        if (!file.exists()) {
            throw new FileNotFoundException(String.format(
                    "js code '%s' does not exist", file.getName()));
        }
        this.jscode = file;
        log.info(String.format("loaded js code '%s' ", jscode.getName()));
    }


    public void setURL(URL url) {
        File downloadedfile = Input.downloadFile(url);
        log.info(String.format("downloaded %s from %s",
                downloadedfile.getName(), url));
        try {
            setRootDocument(downloadedfile);
        } catch (FileNotFoundException e) {
            log.error(e.getMessage());
        }
    }


    public Intensity getIntensity() {
        return intensity;
    }


    public void setIntensity(Intensity intensity) {
        this.intensity = intensity;
    }


    private InterpreterEmulation getInterpreterEmulation() {
        return (interpreterEmulation == null) ? new StandaloneInterpreterEmulation(
                this) : interpreterEmulation;
    }


    public void setInterpreterEmulation(
            InterpreterEmulation interpreterEmulation) {
        this.interpreterEmulation = interpreterEmulation;
    }


    public AnalysisResult getAnalysisResult() {
        return (analysisresult == null) ? AnalysisResult.error()
                : analysisresult;
    }


    public Output getOutput() {
        return (output == null) ? new OutputNull() : output;
    }


    public void setOutput(Output output) {
        this.output = output;
    }


    public Benchmark getBenchmark() {
        return benchmark;
    }


    public DocumentAdapter getDocumentAdapter() {
        return documentAdapter;
    }


    public DynamicHeuristics getDynamicHeuristics() {
        return dynamicHeuristics;
    }


    public StaticAnalysis getStaticAnalysis() {
        return staticAnalysis;
    }


    public String getHash() {
        return hash;
    }
}
Source Code of de.pdf_scrutinizer.Scrutinizer

Related Classes of de.pdf_scrutinizer.Scrutinizer