Package org.jakstab

Source Code of org.jakstab.Program

/*
* Program.java - This file is part of the Jakstab project.
* Copyright 2007-2012 Johannes Kinder <jk@jakstab.org>
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, see <http://www.gnu.org/licenses/>.
*/

package org.jakstab;

import java.io.File;
import java.io.IOException;
import java.util.*;

import org.jakstab.util.Logger;
import org.jakstab.asm.*;
import org.jakstab.cfa.CFAEdge;
import org.jakstab.cfa.Location;
import org.jakstab.disasm.DisassemblyException;
import org.jakstab.loader.*;
import org.jakstab.loader.elf.ELFModule;
import org.jakstab.loader.pe.*;
import org.jakstab.rtl.expressions.SetOfVariables;
import org.jakstab.rtl.statements.RTLHalt;
import org.jakstab.rtl.statements.RTLStatement;
import org.jakstab.rtl.statements.StatementSequence;
import org.jakstab.ssl.Architecture;
import org.jakstab.util.FastSet;


/**
* There is one singleton Program object for all modules currently under analysis.
* It stores all non-analysis information about the analyzed programs, including
* statements, the current control flow graph, and symbols.
*
* @author Johannes Kinder
*/
public final class Program {
 
  @SuppressWarnings("unused")
  private final static Logger logger = Logger.getLogger(Program.class);
  private static Program programInstance;

  /**
   * Get the singleton Program object.
   *
   * @return the singleton instance of the Program class
   */
  public static Program getProgram() {
    return programInstance;
  }
 
  /**
   * Initially creates the Program object.
   * 
   * @param arch An Architecture object with architecture specific information
   * @return the new singleton instance of the Program class
   */
  public static Program createProgram(Architecture arch) {
    //assert programInstance == null;
    programInstance = new Program(arch);
    return programInstance;
  }

  private final Architecture arch;
  private Location start;
  private Map<Location, RTLStatement> statementMap;
  private Map<AbsoluteAddress, Instruction> assemblyMap;
  private ExecutableImage mainModule;
  private List<ExecutableImage> modules;
  private Set<CFAEdge> cfa;
  private final Map<String, ExportedSymbol> exportedSymbols;
  private final Set<UnresolvedSymbol> unresolvedSymbols;
  private Set<Location> unresolvedBranches;
  private StubProvider stubLibrary;
  private Harness harness;
 
  public enum TargetOS {WINDOWS, LINUX, UNKNOWN};
  private TargetOS targetOS;
 
  private Program(Architecture arch) {
    this.arch = arch;
    this.targetOS = TargetOS.UNKNOWN;

    modules = new LinkedList<ExecutableImage>();
    assemblyMap = new TreeMap<AbsoluteAddress, Instruction>();
    statementMap = new HashMap<Location, RTLStatement>(2000);
    cfa = new FastSet<CFAEdge>();
    exportedSymbols = new HashMap<String, ExportedSymbol>();
    unresolvedSymbols = new FastSet<UnresolvedSymbol>();
   
    unresolvedBranches = new FastSet<Location>();
  }
 
  /**
   * Loads the module containing the main function. This function should be called last
   * for correct symbol resolution.
   *
   * @param moduleFile the file to load
   * @return the ExecutableImage class for the loaded module
   * @throws IOException
   * @throws BinaryParseException
   */
  public ExecutableImage loadMainModule(File moduleFile) throws IOException, BinaryParseException {
    ExecutableImage module = loadModule(moduleFile);
    mainModule = module;
    setEntryAddress(module.getEntryPoint());
    installStubs();
    return module;
  }
 
  /**
   * Loads a secondary (library or stub) module for analysis. Automatically determines
   * the correct file type.
   * 
   * @param moduleFile the file to load
   * @return the ExecutableImage class for the loaded module
   * @throws IOException
   * @throws BinaryParseException
   */
  public ExecutableImage loadModule(File moduleFile) throws IOException, BinaryParseException {
    // First try to load it as a PE file, then object file, ELF and finally raw binary code
    // The right thing to do would be some smart IDing of the file type, but
    // this exception chaining works for now...   
    ExecutableImage module = null;
    try {
      module = new PEModule(moduleFile, getArchitecture());
      targetOS = TargetOS.WINDOWS;
    } catch (BinaryParseException e) {
      try {
        module = new ObjectFile(moduleFile, getArchitecture());
      } catch (BinaryParseException e2) {
        try {
          module = new ELFModule(moduleFile, getArchitecture());
          targetOS = TargetOS.LINUX;
        } catch (BinaryParseException e3) {
          module = new RawModule(moduleFile, getArchitecture());
        }
      }
    }
   
    for (ExecutableImage existingModule : modules) {
      if (existingModule.getMaxAddress().getValue() >= module.getMinAddress().getValue() &&
          existingModule.getMinAddress().getValue() <= module.getMaxAddress().getValue()) {
        throw new RuntimeException("Virtual addresses of modules overlap!");
      }
    }
   
   
    modules.add(module);
    unresolvedSymbols.addAll(module.getUnresolvedSymbols());
    for (ExportedSymbol symbol : module.getExportedSymbols()) {
      exportedSymbols.put(removeDecoration(symbol.getName()), symbol);
    }
    resolveSymbols();
    return module;
  }
 
  private String removeDecoration(String s) {
    if (s.charAt(0) == '@' || s.charAt(0) == '_')
      s = s.substring(1);
    int i = s.indexOf('@');
    if (i >= 0)
      s = s.substring(0, i);
    return s;
  }
 
  /**
   * Resolves symbols between the loaded modules.
   */
  private void resolveSymbols() {
    Iterator<UnresolvedSymbol> sIter = unresolvedSymbols.iterator();
    while (sIter.hasNext()) {
      UnresolvedSymbol unresolvedSymbol = sIter.next();
      ExportedSymbol symbol = exportedSymbols.get(removeDecoration(unresolvedSymbol.getName()));
     
      if (symbol != null) {
        logger.debug("Resolving symbol " + unresolvedSymbol.getName());
        unresolvedSymbol.resolve(symbol.getAddress());
        sIter.remove();
      }
    }
  }
 
  /**
   * Returns the address of the given procedure within the given library. Procedures
   * present within the analyzed modules are given precedence over stub functions.
   *
   * @param library
   * @param procedure
   * @return the virtual address of the procedure
   */
  public AbsoluteAddress getProcAddress(String library, String procedure) {
    ExportedSymbol expSymbol = exportedSymbols.get(procedure);
    if (expSymbol != null) {
      return expSymbol.getAddress();
    } else {
      return stubLibrary.resolveSymbol(library, procedure);
    }
  }
 
  /**
   * For all unresolved symbols, install simple stubs.
   */
  public void installStubs() {
    if (mainModule instanceof AbstractCOFFModule) {
      stubLibrary = new Win32StubLibrary(arch);
    } else if (mainModule instanceof ELFModule){
      stubLibrary = new LinuxStubLibrary(arch);
    }
   
    Iterator<UnresolvedSymbol> sIter = unresolvedSymbols.iterator();
    while (sIter.hasNext()) {
      UnresolvedSymbol unresolvedSymbol = sIter.next();
      AbsoluteAddress address = stubLibrary.resolveSymbol(unresolvedSymbol.getFromLibrary(), unresolvedSymbol.getName());
      if (address != null) {
        //logger.debug("Installing stack height stub for " + unresolvedSymbol.getName());
        unresolvedSymbol.resolve(address);
        sIter.remove();
      }
    }
   
    if (!unresolvedSymbols.isEmpty())
      logger.warn("Unresolved symbols remaining: " + unresolvedSymbols);
  }
 
  /**
   * Install a harness that sets up the symbolic environment before calling main
   * and provides a return point with a termination statement.
   *
   * @param harness the harness object to install
   */
  public void installHarness(Harness harness) {
    this.harness = harness;
    harness.install(this);
  }
 
  /**
   * Set the program entry point to the given label.
   * @param label the new entry point
   */
  public void setStart(Location label) {
    this.start = label;
  }

  /**
   * Set the program entry point to the given address.
   * @param entryAddress the new entry address
   */
  public void setEntryAddress(AbsoluteAddress entryAddress) {
    setStart(new Location(entryAddress));
  }
 
  /**
   * Get the main module. 
   * @return the main module
   */
  public ExecutableImage getMainModule() {
    return mainModule;
  }
 
  /**
   * Get the module that contains the specified virtual address at runtime.
   * @param a a virtual address
   * @return the module to which the given virtual address belongs.
   */
  public ExecutableImage getModule(AbsoluteAddress a) {
    for (ExecutableImage module : modules) {
      if (module.getFilePointer(a) >= 0)
        return module;
    }
    return null;
  }
 
  public Iterator<AbsoluteAddress> codeAddressIterator() {
    return getMainModule().codeBytesIterator();
  }
 
  /**
   * Get all statements in the Program.
   *
   * @return a collection of all statements in all loaded modules.
   */
  public Collection<RTLStatement> getStatements() {
    return statementMap.values();
  }

  /**
   * Get the statement at a specific label. If there is no statement stored, attempts
   * to disassemble the instruction at the label's virtual address. If the address is
   * outside of the file area, logs an error and returns a Halt statement by default.
   *
   * @param label The label for which to get the statement
   * @return The statement object at label.
   */
  public final RTLStatement getStatement(Location label) {
    if (!statementMap.containsKey(label)) {
      AbsoluteAddress address = label.getAddress();
      Instruction instr = getInstruction(address);
      // If we did not get an instruction, add an artificial Halt for recovery
      if (instr == null) {
        RTLHalt halt = new RTLHalt();
        halt.setLabel(label);
        putStatement(halt);
        logger.error("ERROR: Replacing unknown instruction with HALT.");
        if (Options.debug.getValue())
          throw new DisassemblyException("Disassembly failed at " + address);
      } else {
        StatementSequence seq = arch.getRTLEquivalent(address, instr);
        for (RTLStatement s : seq) {
          putStatement(s);
        }
        assert statementMap.containsKey(label) : "Disassembly did not produce label: " + label;
      }
    }
    return statementMap.get(label);
  }
 
  /**
   * Stores a statement in the program. If a statement already exists with the same
   * label, it is replaced.
   *
   * @param stmt The statement to be stored. Has to contain a proper label.
   */
  public final void putStatement(RTLStatement stmt) {
    RTLStatement existing = statementMap.get(stmt.getLabel());
    if (existing != null) {
      if (existing.equals(stmt)) return;
      logger.debug("Replacing statement at " + stmt.getLabel());
    }
    statementMap.put(stmt.getLabel(), stmt);
  }
 
  public boolean containsLabel(Location label) {
    return statementMap.containsKey(label);
  }

  public final int getStatementCount() {
    return statementMap.size();
  }

  public final int getInstructionCount() {
    return assemblyMap.size();
  }
 
  public Harness getHarness() {
    return harness;
  }
 
  /**
   * Gets the assembly instruction at the specified virtual address.
   * @param address a virtual address
   * @return the assembly instruction at the specified address
   */
  public final Instruction getInstruction(AbsoluteAddress address) {

    Instruction instr = assemblyMap.get(address);
    if (instr != null) {
      return instr;
    } else {
      // No real instructions in prologue/epilogue
      if (harness.contains(address) || address.getValue() >= StubProvider.STUB_BASE)
        return null;
     
      ExecutableImage module = getModule(address);

      long fp = -1;
      if (module == null) {
        logger.error("No module for address " + address + ". Cannot disassemble instruction!");
      } else {
        fp = module.getFilePointer(address);
        // Also check whether fp is out of the int range, since the X86Disassembler actually
        // performs this cast in its implementation.
        if (fp < 0 || (int)fp < 0) {
          logger.error("Requested instruction outside of file area: " + address);
        } else {
          if (!module.isCodeArea(address)) {
            logger.error("Requested instruction outside code section: " + address);
            return null;
          }
          instr = module.getDisassembler().decodeInstruction(fp);
          if (instr == null) {
            logger.error("Instruction could not be disassembled at: " + address);
          }
        }
      }

      if (instr != null)
        putInstruction(address, instr);
      return instr;
    }
  }

  /**
   * Stores an assembly instruction at the given address, overwriting
   * any existing instruction.
   * @param addr the virtual address to save the instruction at
   * @param instr the assembly instruction
   * @return true if there was no instruction stored for that address before, false otherwise.
   */
  public final boolean putInstruction(AbsoluteAddress addr, Instruction instr) {
    //logger.info(addr + " " + instr.toString(addr.getValue(), new DummySymbolFinder()));
    return assemblyMap.put(addr, instr) == null;
  }
 
  /**
   * Get the string representation of the assembly instruction at the given address.
   * @param addr a virtual address
   * @return a string representation of the assembly code at the given address
   */
  public String getInstructionString(AbsoluteAddress addr) {
    Instruction instr = getInstruction(addr);
    if (instr == null) return "NON_EXISTENT";
    return instr.toString(addr.getValue(), symbolFinder(addr));
  }
 
  public String getSymbolFor(Location label) {
    SymbolFinder symFinder = symbolFinder(label.getAddress());
    if (symFinder.hasSymbolFor(label.getAddress())) {
      return symFinder.getSymbolFor(label.getAddress());
    } else {
      return label.toString();
    }
  }
 
  public String getSymbolFor(AbsoluteAddress addr) {
    return symbolFinder(addr).getSymbolFor(addr);
  }
 
  private SymbolFinder symbolFinder(AbsoluteAddress addr) {
    if (addr.getValue() >= StubProvider.STUB_BASE)
      return stubLibrary.getSymbolFinder();
   
    ExecutableImage module = getModule(addr);
    return (module == null) ? new DummySymbolFinder() : module.getSymbolFinder();
  }
 
  public Set<Location> getUnresolvedBranches() {
    return unresolvedBranches;
  }

  public void setUnresolvedBranches(Set<Location> unresolvedBranches) {
    this.unresolvedBranches = unresolvedBranches;
  }

  /**
   * @return the assemblyMap
   */
  public final Map<AbsoluteAddress, Instruction> getAssemblyMap() {
    return assemblyMap;
  }

  public TargetOS getTargetOS() {
    return targetOS;
  }

  /**
   * Returns all variables used in the program. At the current state of
   * the implementation, this includes only registers and flags.
   *
   * @return A set containing all variables used in this program.
   */
  public SetOfVariables getUsedVariables() {
    SetOfVariables result = new SetOfVariables();
    for (CFAEdge edge : cfa)
      result.addAll(((RTLStatement)edge.getTransformer()).getUsedVariables());
    return result;
  }

 
  public Architecture getArchitecture() {
    return arch;
  }
 
  public Collection<ExportedSymbol> getSymbols() {
    return exportedSymbols.values();
  }
 
  public Set<CFAEdge> getCFA() {
    return Collections.unmodifiableSet(cfa);
  }

  public void setCFA(Set<CFAEdge> cfa) {
    this.cfa = cfa;
  }

  public Location getStart() {
    return start;
  }
 
  public int countIndirectBranches() {
    int res = 0;
    for (Map.Entry<AbsoluteAddress, Instruction> entry : assemblyMap.entrySet()) {
      Instruction instr = entry.getValue();
     
      if (instr instanceof BranchInstruction) {
        BranchInstruction branch = (BranchInstruction)instr;
        if (branch.isIndirect()) {
          // if branch target is not a memory operand pointing into a static data area of the binary (imports)
          if (branch.getBranchDestination() instanceof MemoryOperand) {
            MemoryOperand memOp = (MemoryOperand)branch.getBranchDestination();
            // Import calls have only displacement
            if (memOp.getBase() == null && memOp.getIndex() == null) {
              AbsoluteAddress disp = new AbsoluteAddress(memOp.getDisplacement());
              // Check whether displacement points into import table
              ExecutableImage module = getModule(disp);
              if (module instanceof PEModule && ((PEModule)module).getImportTable().containsKey(disp))
                continue;
            }
          }
          res++;
          //logger.verbose(entry.getKey() + "\t" + getInstructionString(entry.getKey()));
        }
      }
    }
    return res;
  }
}
TOP

Related Classes of org.jakstab.Program

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.