Coverage for /Syzygy/core/disassembler.cc

CoverageLines executed / instrumented / missingexe / inst / missLanguageGroup
73.5%971320.C++source

Line-by-line coverage:

   1    :  // Copyright 2012 Google Inc. All Rights Reserved.
   2    :  //
   3    :  // Licensed under the Apache License, Version 2.0 (the "License");
   4    :  // you may not use this file except in compliance with the License.
   5    :  // You may obtain a copy of the License at
   6    :  //
   7    :  //     http://www.apache.org/licenses/LICENSE-2.0
   8    :  //
   9    :  // Unless required by applicable law or agreed to in writing, software
  10    :  // distributed under the License is distributed on an "AS IS" BASIS,
  11    :  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12    :  // See the License for the specific language governing permissions and
  13    :  // limitations under the License.
  14    :  //
  15    :  // Implementation of disassembler.
  16    :  #include "syzygy/core/disassembler.h"
  17    :  
  18    :  #include "base/logging.h"
  19    :  #include "base/strings/stringprintf.h"
  20    :  #include "syzygy/core/disassembler_util.h"
  21    :  
  22    :  namespace core {
  23    :  
  24    :  Disassembler::Disassembler(const uint8_t* code,
  25    :                             size_t code_size,
  26    :                             AbsoluteAddress code_addr,
  27    :                             const InstructionCallback& on_instruction)
  28  E :      : code_(code),
  29  E :        code_size_(code_size),
  30  E :        code_addr_(code_addr),
  31  E :        on_instruction_(on_instruction),
  32  E :        disassembled_bytes_(0) {
  33  E :  }
  34    :  
  35    :  Disassembler::Disassembler(const uint8_t* code,
  36    :                             size_t code_size,
  37    :                             AbsoluteAddress code_addr,
  38    :                             const AddressSet& entry_points,
  39    :                             const InstructionCallback& on_instruction)
  40    :      : code_(code),
  41    :        code_size_(code_size),
  42    :        code_addr_(code_addr),
  43    :        on_instruction_(on_instruction),
  44    :        disassembled_bytes_(0) {
  45    :    AddressSet::const_iterator it = entry_points.begin();
  46    :    for (; it != entry_points.end(); ++it)
  47    :      Unvisited(*it);
  48    :  }
  49    :  
  50  E :  Disassembler::~Disassembler() {
  51  E :  }
  52    :  
  53    :  Disassembler::CallbackDirective Disassembler::OnInstruction(
  54  E :      AbsoluteAddress addr, const _DInst& inst) {
  55  E :    return kDirectiveContinue;
  56  E :  }
  57    :  
  58    :  Disassembler::CallbackDirective Disassembler::OnBranchInstruction(
  59  E :      AbsoluteAddress addr, const _DInst& inst, AbsoluteAddress dest) {
  60  E :    return kDirectiveContinue;
  61  E :  }
  62    :  
  63    :  Disassembler::CallbackDirective Disassembler::OnStartInstructionRun(
  64  E :      AbsoluteAddress start_address) {
  65  E :    return kDirectiveContinue;
  66  E :  }
  67    :  
  68    :  Disassembler::CallbackDirective Disassembler::OnEndInstructionRun(
  69  E :      AbsoluteAddress addr, const _DInst& inst, ControlFlowFlag control_flow) {
  70  E :    return kDirectiveContinue;
  71  E :  }
  72    :  
  73  E :  Disassembler::CallbackDirective Disassembler::OnDisassemblyComplete() {
  74  E :    return kDirectiveContinue;
  75  E :  }
  76    :  
  77  E :  Disassembler::WalkResult Disassembler::Walk() {
  78    :    // Initialize our disassembly state.
  79  E :    _CodeInfo code = {};
  80  E :    code.dt = Decode32Bits;
  81  E :    code.features = DF_NONE;
  82    :  
  83    :    // This is to keep track of whether we cover the entire function.
  84  E :    bool incomplete_branches = false;
  85    :  
  86  E :    while (!unvisited_.empty()) {
  87  E :      AddressSet::iterator it = unvisited_.begin();
  88  E :      AbsoluteAddress addr(*it);
  89  E :      unvisited_.erase(it);
  90    :  
  91    :      // Unvisited addresses must be within the code block we're currently
  92    :      // disassembling.
  93  E :      DCHECK_LE(code_addr_, addr);
  94  E :      DCHECK_GT(code_addr_ + code_size_, addr);
  95    :  
  96    :      // Notify of the beginning of a new instruction run.
  97  E :      if (OnStartInstructionRun(addr) == kDirectiveAbort)
  98  i :        return kWalkError;
  99    :  
 100    :      // This continues disassembly along a contiguous instruction run until we
 101    :      // run out of code, jump somewhere else, or are requested to terminate the
 102    :      // path by the OnInstruction callback. We call notification methods to
 103    :      // notify of the start of a run, the end of a run and when branch
 104    :      // instructions with computable destination addresses are hit.
 105  E :      bool terminate = false;
 106  E :      ControlFlowFlag control_flow = kControlFlowTerminates;
 107  E :      _DInst inst = {};
 108  E :      for (; addr != AbsoluteAddress(0) && !terminate; addr += inst.size) {
 109  E :        code.codeOffset = addr.value();
 110  E :        code.codeLen = code_size_ - (addr - code_addr_);
 111  E :        code.code = code_ + (addr - code_addr_);
 112  E :        if (code.codeLen == 0)
 113  i :          break;
 114    :  
 115  E :        bool conditional_branch_handled = false;
 116    :  
 117  E :        unsigned int decoded = 0;
 118  E :        _DecodeResult result = DistormDecompose(&code, &inst, 1, &decoded);
 119    :  
 120  E :        if (decoded == 0) {
 121  i :          LOG(ERROR) << "Unable to decode instruction at " << addr << ".";
 122    :  
 123    :          // Dump the next few bytes. The longest X86 instruction possible is 15
 124    :          // bytes according to distorm.
 125  i :          int max_bytes = code.codeLen;
 126  i :          if (max_bytes > 15)
 127  i :            max_bytes = 15;
 128  i :          std::string dump;
 129  i :          for (int i = 0; i < max_bytes; ++i) {
 130  i :            dump += base::StringPrintf(" 0x%02X", code.code[i]);
 131  i :          }
 132  i :          LOG(ERROR) << ".text =" << dump
 133    :                     << (max_bytes < code.codeLen ? " ..." : ".");
 134  i :          return kWalkError;
 135    :        }
 136    :  
 137  E :        CHECK_EQ(1U, decoded);
 138  E :        CHECK(result == DECRES_MEMORYERR || result == DECRES_SUCCESS);
 139    :  
 140    :        // Try to visit this instruction.
 141  E :        VisitedSpace::Range range(addr, inst.size);
 142  E :        if (!visited_.Insert(range, 0)) {
 143    :          // If the collision is a repeat of a previously disassembled
 144    :          // instruction at a different offset then something went wrong.
 145  i :          if (!visited_.ContainsExactly(range)) {
 146  i :            LOG(ERROR) << "Two disassembled instructions overlap.";
 147  i :            return kWalkError;
 148    :          }
 149  i :          break;
 150    :        }
 151    :  
 152    :        // Tally the code bytes we just disassembled.
 153  E :        disassembled_bytes_ += inst.size;
 154    :  
 155    :        // Invoke the callback and terminate if need be.
 156  E :        switch (NotifyOnInstruction(addr, inst)) {
 157    :          case kDirectiveTerminateWalk:
 158  E :            return kWalkTerminated;
 159    :  
 160    :          case kDirectiveAbort:
 161  i :            return kWalkError;
 162    :  
 163    :          case kDirectiveTerminatePath:
 164  E :            terminate = true;
 165    :            break;
 166    :        }
 167    :  
 168  E :        uint8_t fc = META_GET_FC(inst.meta);
 169  E :        switch (fc) {
 170    :          case FC_NONE:
 171    :          case FC_CALL:
 172    :          case FC_CMOV:
 173    :            // Do nothing with these flow control types.
 174  E :            break;
 175    :  
 176    :          case FC_RET:
 177    :            // It's a RET instruction, we're done with this branch.
 178  E :            terminate = true;
 179  E :            break;
 180    :  
 181    :          case FC_SYS:
 182  i :            incomplete_branches = true;
 183  i :            terminate = true;
 184  i :            NOTREACHED() << "Unexpected SYS* instruction encountered";
 185  i :            break;
 186    :  
 187    :          case FC_CND_BRANCH:
 188    :            // Conditional branch, schedule a visit to the branch-not-taken
 189    :            // basic block.
 190  E :            Unvisited(addr + inst.size);
 191    :            // And fall through to visit branch target.
 192    :  
 193    :          case FC_UNC_BRANCH: {
 194  E :              terminate = true;  // The basic block ends here.
 195  E :              AbsoluteAddress dest;
 196  E :              switch (inst.ops[0].type) {
 197    :                case O_REG:
 198    :                case O_MEM:
 199    :                  // Computed branch, we can't chase this.
 200  i :                  break;
 201    :  
 202    :                case O_SMEM:
 203    :                  // Branch to a register, can't chase this.
 204  i :                  break;
 205    :  
 206    :                case O_DISP:
 207    :                  // Indirect address, this may be e.g. a jump to an import.
 208    :                  // TODO(siggi): validate that this is so.
 209  i :                  DCHECK_EQ(32, inst.ops[0].size);
 210  i :                  break;
 211    :  
 212    :                case O_PC:
 213    :                  // PC relative address.
 214  E :                  dest = addr + static_cast<size_t>(inst.size + inst.imm.addr);
 215  E :                  conditional_branch_handled = true;
 216  E :                  break;
 217    :  
 218    :                default:
 219  i :                  NOTREACHED() << "Unexpected branch destination type";
 220    :                  break;
 221    :              }
 222    :  
 223    :              // Make sure to visit the branch destination.
 224  E :              if (dest != AbsoluteAddress(0)) {
 225  E :                if (IsInBlock(dest))
 226  E :                  Unvisited(dest);
 227    :              }
 228    :  
 229    :              // Notify of a newly-discovered branch destination.
 230  E :              if (OnBranchInstruction(addr, inst, dest) == kDirectiveAbort)
 231  i :                return kWalkError;
 232    :  
 233  E :              if (dest == AbsoluteAddress(0)) {
 234    :                // We couldn't compute the destination, if not handled,
 235    :                // we may have incomplete coverage for the function.
 236  i :                incomplete_branches =
 237    :                    incomplete_branches || !conditional_branch_handled;
 238    :              }
 239    :            }
 240  E :            break;
 241    :  
 242    :          case FC_INT:
 243    :            // We encounter int3 inline in functions sometimes.
 244  i :            break;
 245    :  
 246    :          default:
 247  i :            NOTREACHED() << "Unexpected instruction type encountered";
 248  i :            terminate = true;
 249    :            break;
 250    :        }
 251    :  
 252    :        // If the next instruction is flagged as a disassembly start point, we
 253    :        // should end this run of instructions (basic-block) and let it be picked
 254    :        // up on the next iteration.
 255  E :        if (unvisited_.count(addr + inst.size) != 0 && !terminate) {
 256  E :          control_flow = kControlFlowContinues;
 257  E :          terminate = true;
 258    :        }
 259  E :      }
 260    :  
 261    :      // Notify that we are terminating an instruction run. Note that we have to
 262    :      // back up the address by the last instruction size.
 263    :      if (OnEndInstructionRun(addr - inst.size,
 264    :                              inst,
 265  E :                              control_flow) == kDirectiveAbort)
 266  i :        return kWalkError;
 267  E :    }
 268    :  
 269    :    // Notify when we've completed disassembly.
 270  E :    if (OnDisassemblyComplete() == kDirectiveAbort)
 271  i :      return kWalkError;
 272    :  
 273    :    // If we covered every byte in the function, we don't
 274    :    // care that we didn't chase all computed branches.
 275  E :    if (incomplete_branches && disassembled_bytes_ == code_size_)
 276  i :      return kWalkSuccess;
 277    :  
 278    :    // Otherwise we return success only in case of no computed branches.
 279  E :    return incomplete_branches ? kWalkIncomplete : kWalkSuccess;
 280  E :  }
 281    :  
 282  E :  bool Disassembler::Unvisited(AbsoluteAddress addr) {
 283  E :    DCHECK(IsInBlock(addr));
 284    :  
 285  E :    if (visited_.Intersects(addr))
 286  i :      return false;
 287    :  
 288  E :    return unvisited_.insert(addr).second;
 289  E :  }
 290    :  
 291    :  Disassembler::CallbackDirective Disassembler::NotifyOnInstruction(
 292  E :      AbsoluteAddress addr, const _DInst& inst) {
 293    :    // Invoke our local callback.
 294  E :    CallbackDirective directive = OnInstruction(addr, inst);
 295    :  
 296    :    // Invoke the external callback if we're not already aborted.
 297  E :    if (directive == kDirectiveContinue && !on_instruction_.is_null())
 298  E :      directive = on_instruction_.Run(*this, inst);
 299    :  
 300  E :    return directive;
 301  E :  }
 302    :  
 303  E :  bool Disassembler::IsInBlock(AbsoluteAddress addr) const {
 304  E :    return addr >= code_addr_ &&
 305    :        static_cast<size_t>(addr - code_addr_) + 1 <= code_size_;
 306  E :  }
 307    :  
 308    :  }  // namespace core

Coverage information generated Fri Jul 29 11:00:21 2016.