Coverage for /Syzygy/core/disassembler_util.cc

CoverageLines executed / instrumented / missingexe / inst / missLanguageGroup
89.3%1501680.C++source

Line-by-line coverage:

   1    :  // Copyright 2012 Google Inc. All Rights Reserved.
   2    :  //
   3    :  // Licensed under the Apache License, Version 2.0 (the "License");
   4    :  // you may not use this file except in compliance with the License.
   5    :  // You may obtain a copy of the License at
   6    :  //
   7    :  //     http://www.apache.org/licenses/LICENSE-2.0
   8    :  //
   9    :  // Unless required by applicable law or agreed to in writing, software
  10    :  // distributed under the License is distributed on an "AS IS" BASIS,
  11    :  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12    :  // See the License for the specific language governing permissions and
  13    :  // limitations under the License.
  14    :  
  15    :  #include "syzygy/core/disassembler_util.h"
  16    :  
  17    :  #include "base/logging.h"
  18    :  #include "base/stringprintf.h"
  19    :  #include "mnemonics.h"  // NOLINT
  20    :  
  21    :  namespace core {
  22    :  
  23    :  namespace {
  24    :  
  25    :  // Handle improperly decoded instructions. Returns true if an instruction was
  26    :  // handled, false otherwise. If this returns false then none of the output
  27    :  // parameters will have been changed.
  28    :  bool HandleBadDecode(_CodeInfo* ci,
  29    :                       _DInst result[],
  30    :                       unsigned int max_instructions,
  31    :                       unsigned int* used_instructions_count,
  32  E :                       _DecodeResult* ret) {
  33  E :    DCHECK_NE(reinterpret_cast<_CodeInfo*>(NULL), ci);
  34  E :    DCHECK_LE(1u, max_instructions);
  35  E :    DCHECK_NE(reinterpret_cast<unsigned int*>(NULL), used_instructions_count);
  36  E :    DCHECK_NE(reinterpret_cast<_DecodeResult*>(NULL), ret);
  37    :  
  38  E :    size_t size = 0;
  39    :  
  40    :    // 3-byte VEX encoded instructions.
  41  E :    if (ci->code[0] == 0xC4) {
  42    :      // vpermq
  43  E :      if (ci->code[1] == 0xE3 && ci->code[2] == 0xFD) {
  44  E :        size = 6;
  45  E :      } else if (ci->code[1] == 0xE2 && ci->code[2] == 0x4D) {
  46    :        // vpermd
  47  E :        size = 5;
  48    :      }
  49    :    }
  50    :  
  51  E :    if (size == 0)
  52  i :      return false;
  53    :  
  54    :    // We set the bare minimum properties that are required for any
  55    :    // subsequent processing that we perform.
  56    :  
  57  E :    *used_instructions_count = 1;
  58    :  
  59  E :    ::memset(result, 0, sizeof(result[0]));
  60  E :    result[0].addr = ci->codeOffset;
  61  E :    result[0].size = size;
  62    :  
  63  E :    DCHECK_EQ(FC_NONE, META_GET_FC(result[0].meta));
  64  E :    DCHECK_EQ(O_NONE, result[0].ops[0].type);
  65  E :    DCHECK_EQ(O_NONE, result[0].ops[1].type);
  66  E :    DCHECK_EQ(O_NONE, result[0].ops[2].type);
  67  E :    DCHECK_EQ(O_NONE, result[0].ops[3].type);
  68    :  
  69  E :    *ret = DECRES_SUCCESS;
  70    :  
  71  E :    return true;
  72  E :  }
  73    :  
  74    :  }  // namespace
  75    :  
  76    :  _DecodeResult DistormDecompose(_CodeInfo* ci,
  77    :                                 _DInst result[],
  78    :                                 unsigned int max_instructions,
  79  E :                                 unsigned int* used_instructions_count) {
  80    :    _DecodeResult ret =
  81  E :        distorm_decompose(ci, result, max_instructions, used_instructions_count);
  82    :  
  83    :    // Distorm @229 has a bug where it has problems decoding some AVX
  84    :    // instructions. The encoding is described in detail here:
  85    :    //   http://en.wikipedia.org/wiki/VEX_prefix
  86    :    // An issue has been filed here:
  87    :    //   https://code.google.com/p/distorm/issues/detail?id=77
  88    :    // This is a workaround until the bug is fixed. We only care about the case
  89    :    // where decoding failed.
  90  E :    if (ret != DECRES_SUCCESS && *used_instructions_count == 0) {
  91    :      if (HandleBadDecode(ci, result, max_instructions, used_instructions_count,
  92  E :                          &ret)) {
  93  E :        return ret;
  94    :      }
  95    :    }
  96    :  
  97  E :    for (unsigned int i = 0; i < *used_instructions_count; ++i) {
  98    :      // Distorm @229 has a bug where the access size for I_FNSTCW and I_FLDCW
  99    :      // destination operand is 0 instead of 16. I've filed issue
 100    :      // http://code.google.com/p/distorm/issues/detail?id=58 to have this fixed.
 101    :      // In the meantime this is a workaround to have the correct operand size.
 102  E :      switch (result[i].opcode) {
 103    :        case I_FNSTCW:
 104    :        case I_FLDCW:
 105    :          // If result[i].ops[0].size is not zero that means that distorm has been
 106    :          // fixed and that this workaround is not needed anymore.
 107  E :          DCHECK(result[i].ops[0].size == 0);
 108  E :          result[i].ops[0].size = 16;
 109  E :          break;
 110    :        case I_FST:
 111    :        case I_FSTP:
 112    :        case I_FIST:
 113    :        case I_FISTP:
 114    :          // Distorm @229 has a bug, the flag do no reflect the memory store.
 115    :          // https://code.google.com/p/distorm/issues/detail?id=70
 116    :          // If FLAG_DST_WR is set that means that distorm has been fixed.
 117  E :          DCHECK_EQ(0, result[i].flags & FLAG_DST_WR);
 118  E :          result[i].flags |= FLAG_DST_WR;
 119    :          break;
 120    :        default:
 121    :          break;
 122    :      }
 123  E :    }
 124  E :    return ret;
 125  E :  }
 126    :  
 127    :  bool DecodeOneInstruction(
 128  E :      uint32 address, const uint8* buffer, size_t length, _DInst* instruction) {
 129  E :    DCHECK(buffer != NULL);
 130  E :    DCHECK(instruction != NULL);
 131    :  
 132  E :    _CodeInfo code = {};
 133  E :    code.dt = Decode32Bits;
 134  E :    code.features = DF_NONE;
 135  E :    code.codeOffset = address;
 136  E :    code.codeLen = length;
 137  E :    code.code = buffer;
 138    :  
 139  E :    unsigned int decoded = 0;
 140  E :    ::memset(instruction, 0, sizeof(*instruction));
 141  E :    _DecodeResult result = DistormDecompose(&code, instruction, 1, &decoded);
 142    :  
 143  E :    if (result != DECRES_MEMORYERR && result != DECRES_SUCCESS)
 144  i :      return false;
 145    :  
 146    :    // It's possible for the decode to fail as having decoded a single partially
 147    :    // valid instruction (ie: valid prefix of an instruction, waiting on more
 148    :    // data), in which case it will return MEMORYERR (wants more data) and a
 149    :    // decoded length of zero.
 150  E :    if (decoded == 0)
 151  i :      return false;
 152    :  
 153  E :    DCHECK_GE(length, instruction->size);
 154  E :    DCHECK_LT(0, instruction->size);
 155    :  
 156  E :    return true;
 157  E :  }
 158    :  
 159    :  bool DecodeOneInstruction(
 160  E :      const uint8* buffer, size_t length, _DInst* instruction) {
 161  E :    DCHECK(buffer != NULL);
 162  E :    DCHECK(instruction != NULL);
 163  E :    if (!DecodeOneInstruction(0x10000000, buffer, length, instruction))
 164  i :      return false;
 165  E :    return true;
 166  E :  }
 167    :  
 168    :  bool InstructionToString(
 169    :      const _DInst& instruction,
 170    :      const uint8_t* data,
 171    :      int code_length,
 172  E :      std::string* buffer) {
 173  E :    DCHECK(data != NULL);
 174  E :    DCHECK(buffer != NULL);
 175    :  
 176  E :    _CodeInfo code = {};
 177  E :    code.codeOffset = 0;
 178  E :    code.code = data;
 179  E :    code.codeLen = code_length;
 180  E :    code.dt = Decode32Bits;
 181  E :    _DecodedInst decoded = {};
 182  E :    _DInst dinst = instruction;
 183    :  
 184  E :    dinst.addr = 0;
 185  E :    distorm_format64(&code, &dinst, &decoded);
 186    :  
 187    :    *buffer = base::StringPrintf("%-14s %s %s",
 188    :                                 decoded.instructionHex.p,
 189    :                                 decoded.mnemonic.p,
 190  E :                                 decoded.operands.p);
 191  E :    return true;
 192  E :  }
 193    :  
 194  E :  bool IsNop(const _DInst& instruction) {
 195  E :    switch (instruction.opcode) {
 196    :      default:
 197    :        // Only the sequences recognized below qualify as NOP instructions.
 198  E :        return false;
 199    :  
 200    :      case I_XCHG:
 201    :        // This handles the 1 bytes NOP sequence.
 202    :        //     1-byte: xchg eax, eax.
 203    :        return instruction.ops[0].type == O_REG &&
 204    :            instruction.ops[0].index == RM_AX &&
 205    :            instruction.ops[1].type == O_REG &&
 206  i :            instruction.ops[1].index == RM_AX;
 207    :  
 208    :      case I_NOP:
 209    :        // This handles the 2, 4, 5, 7, 8 and 9 byte NOP sequences.
 210    :        //     2-byte: 66 NOP
 211    :        //     4-byte: NOP DWORD PTR [EAX + 0] (8-bit displacement)
 212    :        //     5-byte: NOP DWORD PTR [EAX + EAX*1 + 0] (8-bit displacement)
 213    :        //     7-byte: NOP DWORD PTR [EAX + 0] (32-bit displacement)
 214    :        //     8-byte: NOP DWORD PTR [EAX + EAX*1 + 0] (32-bit displacement)
 215    :        //     9-byte: NOP WORD PTR [EAX + EAX*1 + 0] (32-bit displacement)
 216  E :        return true;
 217    :  
 218    :      case I_LEA:
 219    :        // This handles the 3 and 6 byte NOP sequences.
 220    :        //     3-byte: LEA REG, 0 (REG) (8-bit displacement)
 221    :        //     6-byte: LEA REG, 0 (REG) (32-bit displacement)
 222    :        return instruction.ops[0].type == O_REG &&
 223    :            instruction.ops[1].type == O_SMEM &&
 224    :            instruction.ops[0].index == instruction.ops[1].index &&
 225  E :            instruction.disp == 0;
 226    :  
 227    :      case I_MOV:
 228    :        // Not documented in the Intel manuals, but we see "mov reg, reg" a lot.
 229    :        return instruction.ops[0].type == O_REG &&
 230    :            instruction.ops[1].type == O_REG &&
 231  E :            instruction.ops[0].index == instruction.ops[1].index;
 232    :    }
 233  E :  }
 234    :  
 235  E :  bool IsCall(const _DInst& instruction) {
 236  E :    return META_GET_FC(instruction.meta) == FC_CALL;
 237  E :  }
 238    :  
 239  E :  bool IsReturn(const _DInst& instruction) {
 240  E :    return META_GET_FC(instruction.meta) == FC_RET;
 241  E :  }
 242    :  
 243  E :  bool IsSystemCall(const _DInst& instruction) {
 244  E :    return META_GET_FC(instruction.meta) == FC_SYS;
 245  E :  }
 246    :  
 247  E :  bool IsConditionalBranch(const _DInst& instruction) {
 248  E :    return META_GET_FC(instruction.meta) == FC_CND_BRANCH;
 249  E :  }
 250    :  
 251  E :  bool IsUnconditionalBranch(const _DInst& instruction) {
 252  E :    return META_GET_FC(instruction.meta) == FC_UNC_BRANCH;
 253  E :  }
 254    :  
 255  E :  bool IsBranch(const _DInst& instruction) {
 256  E :    return IsConditionalBranch(instruction) || IsUnconditionalBranch(instruction);
 257  E :  }
 258    :  
 259  E :  bool HasPcRelativeOperand(const _DInst& instruction, int operand_index) {
 260  E :    DCHECK_LE(0, operand_index);
 261  E :    DCHECK_LT(operand_index, static_cast<int>(arraysize(instruction.ops)));
 262  E :    return instruction.ops[operand_index].type == O_PC;
 263  E :  }
 264    :  
 265  E :  bool IsControlFlow(const _DInst& instruction) {
 266    :    // For the purposes of Syzygy we include all of the control flow altering
 267    :    // instruction EXCEPT for call as true control flow.
 268    :    return IsBranch(instruction) ||
 269    :        IsReturn(instruction) ||
 270  E :        IsSystemCall(instruction);
 271  E :  }
 272    :  
 273  E :  bool IsImplicitControlFlow(const _DInst& instruction) {
 274    :    // Control flow jumps implicitly out of the block for RET and SYS
 275  E :    if (IsReturn(instruction) || IsSystemCall(instruction))
 276  E :      return true;
 277    :  
 278    :    // Control flow is implicit for non PC-relative jumps (i.e., explicit
 279    :    // branches where the target is computed, stored in a register, stored
 280    :    // in a memory location, or otherwise indirect).
 281    :    if (IsUnconditionalBranch(instruction) &&
 282  E :        !HasPcRelativeOperand(instruction, 0)) {
 283  E :      return true;
 284    :    }
 285    :  
 286    :    // Otherwise it's not implicit control flow.
 287  E :    return false;
 288  E :  }
 289    :  
 290  E :  bool IsInterrupt(const _DInst& instruction) {
 291  E :    return META_GET_FC(instruction.meta) == FC_INT;
 292  E :  }
 293    :  
 294  E :  bool IsDebugInterrupt(const _DInst& instruction) {
 295    :    return IsInterrupt(instruction) && instruction.size == 1 &&
 296  E :        instruction.opcode == I_INT_3;
 297  E :  }
 298    :  
 299  E :  _RegisterType GetRegisterType(const Register& reg) {
 300  E :    return GetRegisterType(reg.id());
 301  E :  }
 302    :  
 303  E :  _RegisterType GetRegisterType(RegisterId reg_id) {
 304    :    static const _RegisterType kRegisterTypesById[kRegisterMax] = {
 305    :      R_AL,  R_CL,  R_DL,  R_BL,  R_AH,  R_CH,  R_DH,  R_BH,  // 8-bit.
 306    :      R_AX,  R_CX,  R_DX,  R_BX,  R_SP,  R_BP,  R_SI,  R_DI,  // 16-bit.
 307    :      R_EAX, R_ECX, R_EDX, R_EBX, R_ESP, R_EBP, R_ESI, R_EDI  // 32-bit.
 308    :    };
 309  E :    DCHECK_LE(kRegisterMin, reg_id);
 310  E :    DCHECK_GT(kRegisterMax, reg_id);
 311  E :    return kRegisterTypesById[reg_id];
 312  E :  }
 313    :  
 314  E :  RegisterId GetRegisterId(uint32 distorm_reg_type) {
 315  E :    switch (distorm_reg_type) {
 316    :      // 8-bit registers.
 317  E :      case R_AL: return kRegisterAl;
 318  i :      case R_CL: return kRegisterCl;
 319  i :      case R_DL: return kRegisterDl;
 320  i :      case R_BL: return kRegisterBl;
 321  i :      case R_AH: return kRegisterAh;
 322  i :      case R_CH: return kRegisterCh;
 323  i :      case R_DH: return kRegisterDh;
 324  E :      case R_BH: return kRegisterBh;
 325    :  
 326    :      // 16-bit registers.
 327  i :      case R_AX: return kRegisterAx;
 328  E :      case R_CX: return kRegisterCx;
 329  i :      case R_DX: return kRegisterDx;
 330  i :      case R_BX: return kRegisterBx;
 331  E :      case R_SP: return kRegisterSp;
 332  i :      case R_BP: return kRegisterBp;
 333  i :      case R_SI: return kRegisterSi;
 334  i :      case R_DI: return kRegisterDi;
 335    :  
 336    :      // 32-bit registers.
 337  E :      case R_EAX: return kRegisterEax;
 338  E :      case R_ECX: return kRegisterEcx;
 339  E :      case R_EDX: return kRegisterEdx;
 340  E :      case R_EBX: return kRegisterEbx;
 341  E :      case R_ESP: return kRegisterEsp;
 342  E :      case R_EBP: return kRegisterEbp;
 343  E :      case R_ESI: return kRegisterEsi;
 344  E :      case R_EDI: return kRegisterEdi;
 345    :  
 346  i :      default: return kRegisterNone;
 347    :    }
 348  E :  }
 349    :  
 350  E :  const Register& GetRegister(uint32 distorm_reg_type) {
 351  E :    return Register::Get(GetRegisterId(distorm_reg_type));
 352  E :  }
 353    :  
 354    :  }  // namespace core

Coverage information generated Wed Dec 11 11:34:16 2013.