Coverage for /Syzygy/core/disassembler.cc

CoverageLines executed / instrumented / missingexe / inst / missLanguageGroup
72.7%931280.C++source

Line-by-line coverage:

   1    :  // Copyright 2012 Google Inc. All Rights Reserved.
   2    :  //
   3    :  // Licensed under the Apache License, Version 2.0 (the "License");
   4    :  // you may not use this file except in compliance with the License.
   5    :  // You may obtain a copy of the License at
   6    :  //
   7    :  //     http://www.apache.org/licenses/LICENSE-2.0
   8    :  //
   9    :  // Unless required by applicable law or agreed to in writing, software
  10    :  // distributed under the License is distributed on an "AS IS" BASIS,
  11    :  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12    :  // See the License for the specific language governing permissions and
  13    :  // limitations under the License.
  14    :  //
  15    :  // Implementation of disassembler.
  16    :  #include "syzygy/core/disassembler.h"
  17    :  
  18    :  #include "base/logging.h"
  19    :  #include "base/strings/stringprintf.h"
  20    :  #include "syzygy/core/disassembler_util.h"
  21    :  
  22    :  namespace core {
  23    :  
  24    :  Disassembler::Disassembler(const uint8* code,
  25    :                             size_t code_size,
  26    :                             AbsoluteAddress code_addr,
  27    :                             const InstructionCallback& on_instruction)
  28    :      : code_(code),
  29    :        code_size_(code_size),
  30    :        code_addr_(code_addr),
  31    :        on_instruction_(on_instruction),
  32  E :        disassembled_bytes_(0) {
  33  E :  }
  34    :  
  35    :  Disassembler::Disassembler(const uint8* code,
  36    :                             size_t code_size,
  37    :                             AbsoluteAddress code_addr,
  38    :                             const AddressSet& entry_points,
  39    :                             const InstructionCallback& on_instruction)
  40    :      : code_(code),
  41    :        code_size_(code_size),
  42    :        code_addr_(code_addr),
  43    :        on_instruction_(on_instruction),
  44    :        disassembled_bytes_(0) {
  45    :  
  46    :    AddressSet::const_iterator it = entry_points.begin();
  47    :    for (; it != entry_points.end(); ++it)
  48    :      Unvisited(*it);
  49    :  }
  50    :  
  51  E :  Disassembler::~Disassembler() {
  52  E :  }
  53    :  
  54    :  Disassembler::CallbackDirective Disassembler::OnInstruction(
  55  E :      AbsoluteAddress addr, const _DInst& inst) {
  56  E :    return kDirectiveContinue;
  57  E :  }
  58    :  
  59    :  Disassembler::CallbackDirective Disassembler::OnBranchInstruction(
  60  E :      AbsoluteAddress addr, const _DInst& inst, AbsoluteAddress dest) {
  61  E :    return kDirectiveContinue;
  62  E :  }
  63    :  
  64    :  Disassembler::CallbackDirective Disassembler::OnStartInstructionRun(
  65  E :      AbsoluteAddress start_address) {
  66  E :    return kDirectiveContinue;
  67  E :  }
  68    :  
  69    :  Disassembler::CallbackDirective Disassembler::OnEndInstructionRun(
  70  E :      AbsoluteAddress addr, const _DInst& inst, ControlFlowFlag control_flow) {
  71  E :    return kDirectiveContinue;
  72  E :  }
  73    :  
  74  E :  Disassembler::CallbackDirective Disassembler::OnDisassemblyComplete() {
  75  E :    return kDirectiveContinue;
  76  E :  }
  77    :  
  78  E :  Disassembler::WalkResult Disassembler::Walk() {
  79    :    // Initialize our disassembly state.
  80  E :    _CodeInfo code = {};
  81  E :    code.dt = Decode32Bits;
  82  E :    code.features = DF_NONE;
  83    :  
  84    :    // This is to keep track of whether we cover the entire function.
  85  E :    bool incomplete_branches = false;
  86    :  
  87  E :    while (!unvisited_.empty()) {
  88  E :      AddressSet::iterator it = unvisited_.begin();
  89  E :      AbsoluteAddress addr(*it);
  90  E :      unvisited_.erase(it);
  91    :  
  92    :      // Unvisited addresses must be within the code block we're currently
  93    :      // disassembling.
  94  E :      DCHECK_LE(code_addr_, addr);
  95  E :      DCHECK_GT(code_addr_ + code_size_, addr);
  96    :  
  97    :      // Notify of the beginning of a new instruction run.
  98  E :      if (OnStartInstructionRun(addr) == kDirectiveAbort)
  99  i :        return kWalkError;
 100    :  
 101    :      // This continues disassembly along a contiguous instruction run until we
 102    :      // run out of code, jump somewhere else, or are requested to terminate the
 103    :      // path by the OnInstruction callback. We call notification methods to
 104    :      // notify of the start of a run, the end of a run and when branch
 105    :      // instructions with computable destination addresses are hit.
 106  E :      bool terminate = false;
 107  E :      ControlFlowFlag control_flow = kControlFlowTerminates;
 108  E :      _DInst inst = {};
 109  E :      for (; addr != AbsoluteAddress(0) && !terminate; addr += inst.size) {
 110  E :        code.codeOffset = addr.value();
 111  E :        code.codeLen = code_size_ - (addr - code_addr_);
 112  E :        code.code = code_ + (addr - code_addr_);
 113  E :        if (code.codeLen == 0)
 114  i :          break;
 115    :  
 116  E :        bool conditional_branch_handled = false;
 117    :  
 118  E :        unsigned int decoded = 0;
 119  E :        _DecodeResult result = DistormDecompose(&code, &inst, 1, &decoded);
 120    :  
 121  E :        if (decoded == 0) {
 122  i :          LOG(ERROR) << "Unable to decode instruction at " << addr << ".";
 123    :  
 124    :          // Dump the next few bytes. The longest X86 instruction possible is 15
 125    :          // bytes according to distorm.
 126  i :          int max_bytes = code.codeLen;
 127  i :          if (max_bytes > 15)
 128  i :            max_bytes = 15;
 129  i :          std::string dump;
 130  i :          for (int i = 0; i < max_bytes; ++i) {
 131  i :            dump += base::StringPrintf(" 0x%02X", code.code[i]);
 132  i :          }
 133  i :          LOG(ERROR) << ".text =" << dump
 134    :                     << (max_bytes < code.codeLen ? " ..." : ".");
 135  i :          return kWalkError;
 136    :        }
 137    :  
 138  E :        CHECK_EQ(1U, decoded);
 139  E :        CHECK(result == DECRES_MEMORYERR || result == DECRES_SUCCESS);
 140    :  
 141    :        // Try to visit this instruction.
 142  E :        VisitedSpace::Range range(addr, inst.size);
 143  E :        if (!visited_.Insert(range, 0)) {
 144    :          // If the collision is a repeat of a previously disassembled
 145    :          // instruction at a different offset then something went wrong.
 146  i :          if (!visited_.ContainsExactly(range)) {
 147  i :            LOG(ERROR) << "Two disassembled instructions overlap.";
 148  i :            return kWalkError;
 149    :          }
 150  i :          break;
 151    :        }
 152    :  
 153    :        // Tally the code bytes we just disassembled.
 154  E :        disassembled_bytes_ += inst.size;
 155    :  
 156    :        // Invoke the callback and terminate if need be.
 157  E :        switch (NotifyOnInstruction(addr, inst)) {
 158    :          case kDirectiveTerminateWalk:
 159  E :            return kWalkTerminated;
 160    :  
 161    :          case kDirectiveAbort:
 162  i :            return kWalkError;
 163    :  
 164    :          case kDirectiveTerminatePath:
 165  E :            terminate = true;
 166    :            break;
 167    :        }
 168    :  
 169  E :        uint8 fc = META_GET_FC(inst.meta);
 170  E :        switch (fc) {
 171    :          case FC_NONE:
 172    :          case FC_CALL:
 173    :          case FC_CMOV:
 174    :            // Do nothing with these flow control types.
 175  E :            break;
 176    :  
 177    :          case FC_RET:
 178    :            // It's a RET instruction, we're done with this branch.
 179  E :            terminate = true;
 180  E :            break;
 181    :  
 182    :          case FC_SYS:
 183  i :            incomplete_branches = true;
 184  i :            terminate = true;
 185  i :            NOTREACHED() << "Unexpected SYS* instruction encountered";
 186  i :            break;
 187    :  
 188    :          case FC_CND_BRANCH:
 189    :            // Conditional branch, schedule a visit to the branch-not-taken
 190    :            // basic block.
 191  E :            Unvisited(addr + inst.size);
 192    :            // And fall through to visit branch target.
 193    :  
 194    :          case FC_UNC_BRANCH: {
 195  E :              terminate = true;  // The basic block ends here.
 196  E :              AbsoluteAddress dest;
 197  E :              switch (inst.ops[0].type) {
 198    :                case O_REG:
 199    :                case O_MEM:
 200    :                  // Computed branch, we can't chase this.
 201  i :                  break;
 202    :  
 203    :                case O_SMEM:
 204    :                  // Branch to a register, can't chase this.
 205  i :                  break;
 206    :  
 207    :                case O_DISP:
 208    :                  // Indirect address, this may be e.g. a jump to an import.
 209    :                  // TODO(siggi): validate that this is so.
 210  i :                  DCHECK_EQ(32, inst.ops[0].size);
 211  i :                  break;
 212    :  
 213    :                case O_PC:
 214    :                  // PC relative address.
 215  E :                  dest = addr + static_cast<size_t>(inst.size + inst.imm.addr);
 216  E :                  conditional_branch_handled = true;
 217  E :                  break;
 218    :  
 219    :                default:
 220  i :                  NOTREACHED() << "Unexpected branch destination type";
 221    :                  break;
 222    :              }
 223    :  
 224    :              // Make sure to visit the branch destination.
 225  E :              if (dest != AbsoluteAddress(0)) {
 226  E :                if (IsInBlock(dest))
 227  E :                  Unvisited(dest);
 228    :              }
 229    :  
 230    :              // Notify of a newly-discovered branch destination.
 231  E :              if (OnBranchInstruction(addr, inst, dest) == kDirectiveAbort)
 232  i :                return kWalkError;
 233    :  
 234  E :              if (dest == AbsoluteAddress(0)) {
 235    :                // We couldn't compute the destination, if not handled,
 236    :                // we may have incomplete coverage for the function.
 237    :                incomplete_branches =
 238  i :                    incomplete_branches || !conditional_branch_handled;
 239    :              }
 240    :            }
 241  E :            break;
 242    :  
 243    :          case FC_INT:
 244    :            // We encounter int3 inline in functions sometimes.
 245  i :            break;
 246    :  
 247    :          default:
 248  i :            NOTREACHED() << "Unexpected instruction type encountered";
 249  i :            terminate = true;
 250    :            break;
 251    :        }
 252    :  
 253    :        // If the next instruction is flagged as a disassembly start point, we
 254    :        // should end this run of instructions (basic-block) and let it be picked
 255    :        // up on the next iteration.
 256  E :        if (unvisited_.count(addr + inst.size) != 0 && !terminate) {
 257  E :          control_flow = kControlFlowContinues;
 258  E :          terminate = true;
 259    :        }
 260  E :      }
 261    :  
 262    :      // Notify that we are terminating an instruction run. Note that we have to
 263    :      // back up the address by the last instruction size.
 264    :      if (OnEndInstructionRun(addr - inst.size,
 265    :                              inst,
 266  E :                              control_flow) == kDirectiveAbort)
 267  i :        return kWalkError;
 268  E :    }
 269    :  
 270    :    // Notify when we've completed disassembly.
 271  E :    if (OnDisassemblyComplete() == kDirectiveAbort)
 272  i :      return kWalkError;
 273    :  
 274    :    // If we covered every byte in the function, we don't
 275    :    // care that we didn't chase all computed branches.
 276  E :    if (incomplete_branches && disassembled_bytes_ == code_size_)
 277  i :      return kWalkSuccess;
 278    :  
 279    :    // Otherwise we return success only in case of no computed branches.
 280  E :    return incomplete_branches ? kWalkIncomplete : kWalkSuccess;
 281  E :  }
 282    :  
 283  E :  bool Disassembler::Unvisited(AbsoluteAddress addr) {
 284  E :    DCHECK(IsInBlock(addr));
 285    :  
 286  E :    if (visited_.Intersects(addr))
 287  i :      return false;
 288    :  
 289  E :    return unvisited_.insert(addr).second;
 290  E :  }
 291    :  
 292    :  Disassembler::CallbackDirective Disassembler::NotifyOnInstruction(
 293  E :      AbsoluteAddress addr, const _DInst& inst) {
 294    :    // Invoke our local callback.
 295  E :    CallbackDirective directive = OnInstruction(addr, inst);
 296    :  
 297    :    // Invoke the external callback if we're not already aborted.
 298  E :    if (directive == kDirectiveContinue && !on_instruction_.is_null())
 299  E :      directive = on_instruction_.Run(*this, inst);
 300    :  
 301  E :    return directive;
 302  E :  }
 303    :  
 304  E :  bool Disassembler::IsInBlock(AbsoluteAddress addr) const {
 305    :    return addr >= code_addr_ &&
 306  E :        static_cast<size_t>(addr - code_addr_) + 1 <= code_size_;
 307  E :  }
 308    :  
 309    :  }  // namespace core

Coverage information generated Thu Jan 14 17:40:38 2016.