/*
* PDF Scrutinizer, a library for detecting and analyzing malicious PDF documents.
* Copyright 2013 Florian Schmitt <florian@florianschmitt.de>, Fraunhofer FKIE
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package de.pdf_scrutinizer;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.net.URL;
import java.util.Date;
import java.util.List;
import de.pdf_scrutinizer.data.AnalysisResult;
import de.pdf_scrutinizer.data.AnalysisResult.Classification;
import de.pdf_scrutinizer.document.DocumentAdapter;
import de.pdf_scrutinizer.dynamic_heuristics.DynamicHeuristics;
import de.pdf_scrutinizer.emulation.InterpreterEmulation;
import de.pdf_scrutinizer.emulation.SimpleInterpreterEmulation;
import de.pdf_scrutinizer.emulation.StandaloneInterpreterEmulation;
import de.pdf_scrutinizer.exposures.DocumentExposureScanTask;
import de.pdf_scrutinizer.static_heuristics.StaticAnalysis;
import de.pdf_scrutinizer.utils.*;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
public class Scrutinizer {
public enum Intensity {
lousy, intense
}
private static Log log = LogFactory.getLog(Scrutinizer.class);
private Intensity intensity = Intensity.lousy;
private String nestedFilename = "";
private String hash;
private String resultFolder = "result";
private File rootPDF;
private File jscode;
// private boolean nestedPDF = false;
private AnalysisResult analysisresult;
private Output output;
private Benchmark benchmark = new Benchmark();
private DocumentAdapter documentAdapter;
private InterpreterEmulation interpreterEmulation;
private DynamicHeuristics dynamicHeuristics = new DynamicHeuristics(this);
private StaticAnalysis staticAnalysis = new StaticAnalysis(this);
public Scrutinizer() {
System.setProperty("rhino.regexp.engine", "java.util.regex");
}
public AnalysisResult analyze() {
if (rootPDF == null) {
log.error("no document loaded");
return AnalysisResult.error();
}
log.info("PDF Scrutinizer: Starting analysis");
getBenchmark().scrutinizerStart();
analysisresult = new AnalysisResult(this.rootPDF.getName(), hash);
if (output == null) {
output = new OutputToFiles(this, hash, resultFolder);
}
// load the pdf document using the DocumentAdapter.
documentAdapter = new DocumentAdapter(this, this.rootPDF);
if (documentAdapter == null || !documentAdapter.IsDocLoaded()) {
log.error("no document loaded");
return AnalysisResult.error();
}
// The document exposure scan task scans the document for
// non-JavaScript-based attacks.
// Since the document only need to be read by this task, it can run in a
// separate thread.
DocumentExposureScanTask exposureScanTask = new DocumentExposureScanTask(
this, documentAdapter.getDocument());
Thread documentExposureScanThread = new Thread(exposureScanTask);
documentExposureScanThread.start();
analysisresult.classification = Classification.benign;
log.info("document information:" + documentAdapter.getDocInfo());
// search for embedded files
List<String[]> files = documentAdapter.getEmbeddedFiles();
if (files.size() > 0) {
for (String[] str : files) {
if (str[0] != null && str[0].equals("application/pdf")) {
log.info("nested PDF found");
log.info("the embedded PDF will be extracted and analyzed afterwards");
// nestedPDF = true;
nestedFilename = this.rootPDF.getName() + ".nest";
BufferedWriter writer = null;
try {
writer = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(nestedFilename)));
writer.write(str[1]);
} catch (FileNotFoundException e) {
log.warn(e.getMessage(), e);
} catch (IOException e) {
log.warn(e.getMessage(), e);
} finally {
try {
if (writer != null)
writer.close();
} catch (IOException e) {
log.warn(e.getMessage(), e);
}
}
}
getOutput().saveEmbedFile(str);
}
}
analysisresult.embeddedFiles = files;
// try to find JavaScript code events and execute them, using the
// interpreter emulation.
List<String> codes = documentAdapter.getCodeEvents();
if (codes.size() > 0) {
analysisresult.codeFound = true;
analysisresult.codes = codes;
getOutput().saveExtractedcode(codes);
getInterpreterEmulation().execute(codes);
}
// wait for document exposures scan to finish, since that thread can
// affect the analysis result
try {
documentExposureScanThread.join();
} catch (InterruptedException e) {
log.error(
"error while waiting for document exposures thread to finish: "
+ e.getMessage(), e);
}
// close the pdf document
try {
documentAdapter.getDocument().close();
} catch (IOException e) {
log.error("could not close document" + e.getMessage(), e);
}
analysisresult.analysisEnd = new Date();
getBenchmark().scrutinizerStop();
getOutput().saveAnalysisResult(analysisresult);
return analysisresult;
// FIXME: Problem: there can be multiple nested PDF's
// if (nestedPDF) {
// new PDF_Scrutinizer(nestedFilename, justDetect).start();
// }
}
public void runJSCode() {
if (rootPDF == null) {
log.error("no document loaded");
return;
}
analysisresult = new AnalysisResult(rootPDF.getName(), hash);
setInterpreterEmulation(new SimpleInterpreterEmulation(this));
getInterpreterEmulation().execute(Input.readFile(rootPDF));
}
public void runJSCodeAndPDF() {
if (rootPDF == null) {
log.error("no document loaded");
return;
}
// load the pdf document using the DocumentAdapter.
documentAdapter = new DocumentAdapter(this, this.rootPDF);
if (documentAdapter == null || !documentAdapter.IsDocLoaded()) {
log.error("no document loaded");
return;
}
analysisresult = new AnalysisResult(rootPDF.getName(), hash);
setInterpreterEmulation(new SimpleInterpreterEmulation(this));
getInterpreterEmulation().execute(Input.readFile(jscode));
analysisresult.analysisEnd = new Date();
}
public String setRootDocument(File file) throws FileNotFoundException {
if (!file.exists()) {
throw new FileNotFoundException(String.format(
"document '%s' does not exist", file.getName()));
}
this.rootPDF = file;
this.hash = Hashes.getHashOfFile(file);
log.info(String.format("loaded '%s' [md5=%s] ", rootPDF.getName(), hash));
return this.hash;
}
public void setJSCode(File file) throws FileNotFoundException {
if (!file.exists()) {
throw new FileNotFoundException(String.format(
"js code '%s' does not exist", file.getName()));
}
this.jscode = file;
log.info(String.format("loaded js code '%s' ", jscode.getName()));
}
public void setURL(URL url) {
File downloadedfile = Input.downloadFile(url);
log.info(String.format("downloaded %s from %s",
downloadedfile.getName(), url));
try {
setRootDocument(downloadedfile);
} catch (FileNotFoundException e) {
log.error(e.getMessage());
}
}
public Intensity getIntensity() {
return intensity;
}
public void setIntensity(Intensity intensity) {
this.intensity = intensity;
}
private InterpreterEmulation getInterpreterEmulation() {
return (interpreterEmulation == null) ? new StandaloneInterpreterEmulation(
this) : interpreterEmulation;
}
public void setInterpreterEmulation(
InterpreterEmulation interpreterEmulation) {
this.interpreterEmulation = interpreterEmulation;
}
public AnalysisResult getAnalysisResult() {
return (analysisresult == null) ? AnalysisResult.error()
: analysisresult;
}
public Output getOutput() {
return (output == null) ? new OutputNull() : output;
}
public void setOutput(Output output) {
this.output = output;
}
public Benchmark getBenchmark() {
return benchmark;
}
public DocumentAdapter getDocumentAdapter() {
return documentAdapter;
}
public DynamicHeuristics getDynamicHeuristics() {
return dynamicHeuristics;
}
public StaticAnalysis getStaticAnalysis() {
return staticAnalysis;
}
public String getHash() {
return hash;
}
}