Coverage for /Syzygy/core/disassembler.cc

CoverageLines executed / instrumented / missingexe / inst / missLanguageGroup
80.5%1071330.C++source

Line-by-line coverage:

   1    :  // Copyright 2012 Google Inc.
   2    :  //
   3    :  // Licensed under the Apache License, Version 2.0 (the "License");
   4    :  // you may not use this file except in compliance with the License.
   5    :  // You may obtain a copy of the License at
   6    :  //
   7    :  //     http://www.apache.org/licenses/LICENSE-2.0
   8    :  //
   9    :  // Unless required by applicable law or agreed to in writing, software
  10    :  // distributed under the License is distributed on an "AS IS" BASIS,
  11    :  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12    :  // See the License for the specific language governing permissions and
  13    :  // limitations under the License.
  14    :  //
  15    :  // Implementation of disassembler.
  16    :  #include "syzygy/core/disassembler.h"
  17    :  
  18    :  #include "base/logging.h"
  19    :  #include "base/stringprintf.h"
  20    :  
  21    :  namespace core {
  22    :  
  23    :  Disassembler::Disassembler(const uint8* code,
  24    :                             size_t code_size,
  25    :                             AbsoluteAddress code_addr,
  26    :                             const InstructionCallback& on_instruction)
  27    :      : code_(code),
  28    :        code_size_(code_size),
  29    :        code_addr_(code_addr),
  30    :        on_instruction_(on_instruction),
  31  E :        disassembled_bytes_(0) {
  32  E :  }
  33    :  
  34    :  Disassembler::Disassembler(const uint8* code,
  35    :                             size_t code_size,
  36    :                             AbsoluteAddress code_addr,
  37    :                             const AddressSet& entry_points,
  38    :                             const InstructionCallback& on_instruction)
  39    :      : code_(code),
  40    :        code_size_(code_size),
  41    :        code_addr_(code_addr),
  42    :        on_instruction_(on_instruction),
  43  E :        disassembled_bytes_(0) {
  44    :  
  45  E :    AddressSet::const_iterator it = entry_points.begin();
  46  E :    for (; it != entry_points.end(); ++it)
  47  E :      Unvisited(*it);
  48  E :  }
  49    :  
  50  E :  Disassembler::~Disassembler() {
  51  E :  }
  52    :  
  53    :  Disassembler::CallbackDirective Disassembler::OnInstruction(
  54  E :      AbsoluteAddress addr, const _DInst& inst) {
  55  E :    return kDirectiveContinue;
  56  E :  }
  57    :  
  58    :  Disassembler::CallbackDirective Disassembler::OnBranchInstruction(
  59  E :      AbsoluteAddress addr, const _DInst& inst, AbsoluteAddress dest) {
  60  E :    return kDirectiveContinue;
  61  E :  }
  62    :  
  63    :  Disassembler::CallbackDirective Disassembler::OnStartInstructionRun(
  64  E :      AbsoluteAddress start_address) {
  65  E :    return kDirectiveContinue;
  66  E :  }
  67    :  
  68    :  Disassembler::CallbackDirective Disassembler::OnEndInstructionRun(
  69  E :      AbsoluteAddress addr, const _DInst& inst, ControlFlowFlag control_flow) {
  70  E :    return kDirectiveContinue;
  71  E :  }
  72    :  
  73  E :  Disassembler::CallbackDirective Disassembler::OnDisassemblyComplete() {
  74  E :    return kDirectiveContinue;
  75  E :  }
  76    :  
  77  E :  Disassembler::WalkResult Disassembler::Walk() {
  78    :    // Initialize our disassembly state.
  79  E :    _CodeInfo code = {};
  80  E :    code.dt = Decode32Bits;
  81  E :    code.features = DF_NONE;
  82    :  
  83    :    // This is to keep track of whether we cover the entire function.
  84  E :    bool incomplete_branches = false;
  85    :  
  86  E :    while (!unvisited_.empty()) {
  87  E :      AddressSet::iterator it = unvisited_.begin();
  88  E :      AbsoluteAddress addr(*it);
  89  E :      unvisited_.erase(it);
  90    :  
  91    :      // Unvisited addresses must be within the code block we're currently
  92    :      // disassembling.
  93  E :      DCHECK_LE(code_addr_, addr);
  94  E :      DCHECK_GT(code_addr_ + code_size_, addr);
  95    :  
  96    :      // Notify of the beginning of a new instruction run.
  97  E :      if (OnStartInstructionRun(addr) == kDirectiveAbort)
  98  i :        return kWalkError;
  99    :  
 100    :      // This continues disassembly along a contiguous instruction run until we
 101    :      // run out of code, jump somewhere else, or are requested to terminate the
 102    :      // path by the OnInstruction callback. We call notification methods to
 103    :      // notify of the start of a run, the end of a run and when branch
 104    :      // instructions with computable destination addresses are hit.
 105  E :      bool terminate = false;
 106  E :      ControlFlowFlag control_flow = kControlFlowTerminates;
 107  E :      _DInst inst = {};
 108  E :      for (; addr != AbsoluteAddress(0) && !terminate; addr += inst.size) {
 109  E :        code.codeOffset = addr.value();
 110  E :        code.codeLen = code_size_ - (addr - code_addr_);
 111  E :        code.code = code_ + (addr - code_addr_);
 112  E :        if (code.codeLen == 0)
 113  i :          break;
 114    :  
 115  E :        bool conditional_branch_handled = false;
 116    :  
 117  E :        unsigned int decoded = 0;
 118  E :        _DecodeResult result = distorm_decompose(&code, &inst, 1, &decoded);
 119    :  
 120  E :        if (decoded == 0) {
 121  i :          LOG(ERROR) << "Unable to decode instruction at " << addr << ".";
 122    :  
 123    :          // Dump the next few bytes. The longest X86 instruction possible is 15
 124    :          // bytes according to distorm.
 125  i :          int max_bytes = code.codeLen;
 126  i :          if (max_bytes > 15)
 127  i :            max_bytes = 15;
 128  i :          std::string dump;
 129  i :          for (int i = 0; i < max_bytes; ++i) {
 130  i :            dump += base::StringPrintf(" 0x%02X", code.code[i]);
 131  i :          }
 132  i :          LOG(ERROR) << ".text =" << dump
 133    :                     << (max_bytes < code.codeLen ? " ..." : ".");
 134  i :          return kWalkError;
 135    :        }
 136    :  
 137  E :        CHECK_EQ(1U, decoded);
 138  E :        CHECK(result == DECRES_MEMORYERR || result == DECRES_SUCCESS);
 139    :  
 140    :        // Try to visit this instruction.
 141  E :        VisitedSpace::Range range(addr, inst.size);
 142  E :        if (!visited_.Insert(range, 0)) {
 143    :          // If the collision is a repeat of a previously disassembled
 144    :          // instruction at a different offset then something went wrong.
 145  E :          if (!visited_.ContainsExactly(range)) {
 146  i :            LOG(ERROR) << "Two disassembled instructions overlap.";
 147  i :            return kWalkError;
 148    :          }
 149  E :          break;
 150    :        }
 151    :  
 152    :        // Tally the code bytes we just disassembled.
 153  E :        disassembled_bytes_ += inst.size;
 154    :  
 155    :        // Invoke the callback and terminate if need be.
 156  E :        switch (NotifyOnInstruction(addr, inst)) {
 157    :          case kDirectiveTerminateWalk:
 158  E :            return kWalkTerminated;
 159    :  
 160    :          case kDirectiveAbort:
 161  i :            return kWalkError;
 162    :  
 163    :          case kDirectiveTerminatePath:
 164  E :            terminate = true;
 165    :  
 166    :          default:
 167    :            break;
 168    :        }
 169    :  
 170  E :        uint8 fc = META_GET_FC(inst.meta);
 171  E :        switch (fc) {
 172    :          case FC_NONE:
 173    :          case FC_CALL:
 174    :          case FC_CMOV:
 175    :            // Do nothing with these flow control types.
 176  E :            break;
 177    :  
 178    :          case FC_RET:
 179    :            // It's a RET instruction, we're done with this branch.
 180  E :            terminate = true;
 181  E :            break;
 182    :  
 183    :          case FC_SYS:
 184  i :            incomplete_branches = true;
 185  i :            terminate = true;
 186  i :            NOTREACHED() << "Unexpected SYS* instruction encountered";
 187  i :            break;
 188    :  
 189    :          case FC_CND_BRANCH:
 190    :            // Conditional branch, schedule a visit to the branch-not-taken
 191    :            // basic block.
 192  E :            Unvisited(addr + inst.size);
 193    :            // And fall through to visit branch target.
 194    :  
 195    :          case FC_UNC_BRANCH: {
 196  E :              terminate = true;  // The basic block ends here.
 197  E :              AbsoluteAddress dest;
 198  E :              switch (inst.ops[0].type) {
 199    :                case O_REG:
 200    :                case O_MEM:
 201    :                  // Computed branch, we can't chase this.
 202  E :                  break;
 203    :  
 204    :                case O_SMEM:
 205    :                  // Branch to a register, can't chase this.
 206  i :                  break;
 207    :  
 208    :                case O_DISP:
 209    :                  // Indirect address, this may be e.g. a jump to an import.
 210    :                  // TODO(siggi): validate that this is so.
 211  E :                  DCHECK_EQ(32, inst.ops[0].size);
 212  E :                  break;
 213    :  
 214    :                case O_PC:
 215    :                  // PC relative address.
 216  E :                  dest = addr + static_cast<size_t>(inst.size + inst.imm.addr);
 217  E :                  conditional_branch_handled = true;
 218  E :                  break;
 219    :  
 220    :                default:
 221  i :                  NOTREACHED() << "Unexpected branch destination type";
 222    :                  break;
 223    :              }
 224    :  
 225    :              // Make sure to visit the branch destination.
 226  E :              if (dest != AbsoluteAddress(0)) {
 227  E :                if (IsInBlock(dest))
 228  E :                  Unvisited(dest);
 229    :              }
 230    :  
 231    :              // Notify of a newly-discovered branch destination.
 232  E :              if (OnBranchInstruction(addr, inst, dest) == kDirectiveAbort)
 233  i :                return kWalkError;
 234    :  
 235  E :              if (dest == AbsoluteAddress(0)) {
 236    :                // We couldn't compute the destination, if not handled,
 237    :                // we may have incomplete coverage for the function.
 238    :                incomplete_branches =
 239  E :                    incomplete_branches || !conditional_branch_handled;
 240    :              }
 241    :            }
 242  E :            break;
 243    :  
 244    :          case FC_INT:
 245    :            // We encounter int3 inline in functions sometimes.
 246  E :            break;
 247    :  
 248    :          default:
 249  i :            NOTREACHED() << "Unexpected instruction type encountered";
 250  i :            terminate = true;
 251    :            break;
 252    :        }
 253    :  
 254    :        // If the next instruction is flagged as a disassembly start point, we
 255    :        // should end this run of instructions (basic-block) and let it be picked
 256    :        // up on the next iteration.
 257  E :        if (unvisited_.count(addr + inst.size) != 0 && !terminate) {
 258  E :          control_flow = kControlFlowContinues;
 259  E :          terminate = true;
 260    :        }
 261  E :      }
 262    :  
 263    :      // Notify that we are terminating an instruction run. Note that we have to
 264    :      // back up the address by the last instruction size.
 265    :      if (OnEndInstructionRun(addr - inst.size,
 266    :                              inst,
 267  E :                              control_flow) == kDirectiveAbort)
 268  i :        return kWalkError;
 269  E :    }
 270    :  
 271    :    // Notify when we've completed disassembly.
 272  E :    if (OnDisassemblyComplete() == kDirectiveAbort)
 273  i :      return kWalkError;
 274    :  
 275    :    // If we covered every byte in the function, we don't
 276    :    // care that we didn't chase all computed branches.
 277  E :    if (incomplete_branches && disassembled_bytes_ == code_size_)
 278  E :      return kWalkSuccess;
 279    :  
 280    :    // Otherwise we return success only in case of no computed branches.
 281  E :    return incomplete_branches ? kWalkIncomplete : kWalkSuccess;
 282  E :  }
 283    :  
 284  E :  bool Disassembler::Unvisited(AbsoluteAddress addr) {
 285  E :    DCHECK(IsInBlock(addr));
 286    :  
 287  E :    if (visited_.Intersects(addr))
 288  E :      return false;
 289    :  
 290  E :    return unvisited_.insert(addr).second;
 291  E :  }
 292    :  
 293    :  Disassembler::CallbackDirective Disassembler::NotifyOnInstruction(
 294  E :      AbsoluteAddress addr, const _DInst& inst) {
 295    :    // Invoke our local callback.
 296  E :    CallbackDirective directive = OnInstruction(addr, inst);
 297    :  
 298    :    // Invoke the external callback if we're not already aborted.
 299  E :    if (directive == kDirectiveContinue && !on_instruction_.is_null())
 300  E :      directive = on_instruction_.Run(*this, inst);
 301    :  
 302  E :    return directive;
 303  E :  }
 304    :  
 305  E :  bool Disassembler::IsInBlock(AbsoluteAddress addr) const {
 306    :    return addr >= code_addr_ &&
 307  E :        static_cast<size_t>(addr - code_addr_) + 1 <= code_size_;
 308  E :  }
 309    :  
 310    :  }  // namespace core

Coverage information generated Thu Sep 06 11:30:46 2012.