Coverage for /Syzygy/core/disassembler_util.cc

CoverageLines executed / instrumented / missingexe / inst / missLanguageGroup
89.5%1621810.C++source

Line-by-line coverage:

   1    :  // Copyright 2012 Google Inc. All Rights Reserved.
   2    :  //
   3    :  // Licensed under the Apache License, Version 2.0 (the "License");
   4    :  // you may not use this file except in compliance with the License.
   5    :  // You may obtain a copy of the License at
   6    :  //
   7    :  //     http://www.apache.org/licenses/LICENSE-2.0
   8    :  //
   9    :  // Unless required by applicable law or agreed to in writing, software
  10    :  // distributed under the License is distributed on an "AS IS" BASIS,
  11    :  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12    :  // See the License for the specific language governing permissions and
  13    :  // limitations under the License.
  14    :  
  15    :  #include "syzygy/core/disassembler_util.h"
  16    :  
  17    :  #include "base/logging.h"
  18    :  #include "base/strings/stringprintf.h"
  19    :  #include "mnemonics.h"  // NOLINT
  20    :  
  21    :  namespace core {
  22    :  
  23    :  namespace {
  24    :  
  25    :  // Return the size of a 3-byte VEX encoded instruction.
  26    :  //
  27    :  // The layout of these instructions is as follows, starting with a byte with
  28    :  // value 0xC4:
  29    :  //     - First byte:
  30    :  //         +---+---+---+---+---+---+---+---+
  31    :  //         | 1   1   0   0   0   1   0   0 |
  32    :  //         +---+---+---+---+---+---+---+---+
  33    :  //     - Second byte:
  34    :  //         +---+---+---+---+---+---+---+---+
  35    :  //         |~R |~X |~B |     map_select    |
  36    :  //         +---+---+---+---+---+---+---+---+
  37    :  //     - Third byte:
  38    :  //         +---+---+---+---+---+---+---+---+
  39    :  //         |W/E|     ~vvvv     | L |   pp  |
  40    :  //         +---+---+---+---+---+---+---+---+
  41    :  //     - Fourth byte: The opcode for this instruction.
  42    :  //
  43    :  // |map_select| Indicates the opcode map that should be used for this
  44    :  // instruction.
  45    :  //
  46    :  // See http://wiki.osdev.org/X86-64_Instruction_Encoding#Three_byte_VEX_escape_prefix
  47    :  // for more details.
  48  E :  size_t Get3ByteVexEncodedInstructionSize(_CodeInfo* ci) {
  49  E :    DCHECK_EQ(0xC4, ci->code[0]);
  50    :    // Switch case based on the opcode map used by this instruction.
  51  E :    switch (ci->code[1] & 0x1F) {
  52    :      case 0x01: {
  53  i :        switch (ci->code[3]) {
  54  i :          case 0x1D: return 5;  // vpermd
  55    :          default: break;
  56    :        }
  57  i :        break;
  58    :      }
  59    :      case 0x02: {
  60  E :        switch (ci->code[3]) {
  61  E :          case 0x13: return 5;  // vcvtps2ps
  62  E :          case 0x36: return 5;  // vpermd
  63  E :          case 0x5A: return 6;  // vbroadcasti128
  64  E :          case 0x78: return 5;  // vpbroadcastb
  65    :          default: break;
  66    :        }
  67  i :        break;
  68    :      }
  69    :      case 0x03: {
  70  E :        switch (ci->code[3]) {
  71  E :          case 0x00: return 6;  // vpermq
  72  E :          case 0x1D: return 6;  // vcvtps2ph
  73  E :          case 0x38: return 7;  // vinserti128
  74  E :          case 0x39: return 6;  // vextracti128
  75    :          default: break;
  76    :        }
  77    :        break;
  78    :      }
  79    :      default:
  80    :        break;
  81    :    }
  82  i :    return 0;
  83  E :  }
  84    :  
  85    :  // Handle improperly decoded instructions. Returns true if an instruction was
  86    :  // handled, false otherwise. If this returns false then none of the output
  87    :  // parameters will have been changed.
  88    :  bool HandleBadDecode(_CodeInfo* ci,
  89    :                       _DInst result[],
  90    :                       unsigned int max_instructions,
  91    :                       unsigned int* used_instructions_count,
  92  E :                       _DecodeResult* ret) {
  93  E :    DCHECK_NE(reinterpret_cast<_CodeInfo*>(NULL), ci);
  94  E :    DCHECK_LE(1u, max_instructions);
  95  E :    DCHECK_NE(reinterpret_cast<unsigned int*>(NULL), used_instructions_count);
  96  E :    DCHECK_NE(reinterpret_cast<_DecodeResult*>(NULL), ret);
  97    :  
  98  E :    size_t size = 0;
  99    :  
 100  E :    if (ci->code[0] == 0xC4)
 101  E :      size = Get3ByteVexEncodedInstructionSize(ci);
 102    :  
 103  E :    if (size == 0)
 104  E :      return false;
 105    :  
 106    :    // We set the bare minimum properties that are required for any
 107    :    // subsequent processing that we perform.
 108    :  
 109  E :    *used_instructions_count = 1;
 110    :  
 111  E :    ::memset(result, 0, sizeof(result[0]));
 112  E :    result[0].addr = ci->codeOffset;
 113  E :    result[0].size = static_cast<uint8_t>(size);
 114    :  
 115  E :    DCHECK_EQ(FC_NONE, META_GET_FC(result[0].meta));
 116  E :    DCHECK_EQ(O_NONE, result[0].ops[0].type);
 117  E :    DCHECK_EQ(O_NONE, result[0].ops[1].type);
 118  E :    DCHECK_EQ(O_NONE, result[0].ops[2].type);
 119  E :    DCHECK_EQ(O_NONE, result[0].ops[3].type);
 120    :  
 121  E :    *ret = DECRES_SUCCESS;
 122    :  
 123  E :    return true;
 124  E :  }
 125    :  
 126    :  }  // namespace
 127    :  
 128    :  _DecodeResult DistormDecompose(_CodeInfo* ci,
 129    :                                 _DInst result[],
 130    :                                 unsigned int max_instructions,
 131  E :                                 unsigned int* used_instructions_count) {
 132    :    _DecodeResult ret =
 133  E :        distorm_decompose(ci, result, max_instructions, used_instructions_count);
 134    :  
 135    :    // Distorm @ac277fb has a bug where it has problems decoding some AVX
 136    :    // instructions. The encoding is described in detail here:
 137    :    //   http://en.wikipedia.org/wiki/VEX_prefix
 138    :    // An issue has been filed here:
 139    :    //   https://code.google.com/p/distorm/issues/detail?id=77
 140    :    // This is a workaround until the bug is fixed. We only care about the case
 141    :    // where decoding failed.
 142  E :    if (ret != DECRES_SUCCESS && *used_instructions_count == 0) {
 143  E :      if (HandleBadDecode(ci, result, max_instructions, used_instructions_count,
 144    :                          &ret)) {
 145  E :        return ret;
 146    :      }
 147    :    }
 148    :  
 149  E :    for (unsigned int i = 0; i < *used_instructions_count; ++i) {
 150  E :      switch (result[i].opcode) {
 151    :        // Distorm @ac277fb has a bug where the access size for I_FXRSTOR and
 152    :        // I_FXSAVE destination operand is 0 instead of 64. I've filed
 153    :        // https://github.com/gdabah/distorm/issues/96 to have this fixed.
 154    :        // In the meantime this is a workaround to have the correct operand size.
 155    :        case I_FXRSTOR:
 156    :        case I_FXSAVE:
 157  E :          DCHECK_EQ(0U, result[i].ops[0].size);
 158  E :          result[i].ops[0].size = 64;
 159    :          break;
 160    :        default:
 161    :          break;
 162    :      }
 163  E :    }
 164    :  
 165  E :    return ret;
 166  E :  }
 167    :  
 168    :  bool DecodeOneInstruction(uint32_t address,
 169    :                            const uint8_t* buffer,
 170    :                            size_t length,
 171  E :                            _DInst* instruction) {
 172  E :    DCHECK(buffer != NULL);
 173  E :    DCHECK(instruction != NULL);
 174    :  
 175  E :    _CodeInfo code = {};
 176  E :    code.dt = Decode32Bits;
 177  E :    code.features = DF_NONE;
 178  E :    code.codeOffset = address;
 179  E :    code.codeLen = length;
 180  E :    code.code = buffer;
 181    :  
 182  E :    unsigned int decoded = 0;
 183  E :    ::memset(instruction, 0, sizeof(*instruction));
 184  E :    _DecodeResult result = DistormDecompose(&code, instruction, 1, &decoded);
 185    :  
 186  E :    if (result != DECRES_MEMORYERR && result != DECRES_SUCCESS)
 187  i :      return false;
 188    :  
 189    :    // It's possible for the decode to fail as having decoded a single partially
 190    :    // valid instruction (ie: valid prefix of an instruction, waiting on more
 191    :    // data), in which case it will return MEMORYERR (wants more data) and a
 192    :    // decoded length of zero.
 193  E :    if (decoded == 0)
 194  E :      return false;
 195    :  
 196  E :    DCHECK_GE(length, instruction->size);
 197  E :    DCHECK_LT(0, instruction->size);
 198    :  
 199  E :    return true;
 200  E :  }
 201    :  
 202    :  bool DecodeOneInstruction(const uint8_t* buffer,
 203    :                            size_t length,
 204  E :                            _DInst* instruction) {
 205  E :    DCHECK(buffer != NULL);
 206  E :    DCHECK(instruction != NULL);
 207  E :    if (!DecodeOneInstruction(0x10000000, buffer, length, instruction))
 208  E :      return false;
 209  E :    return true;
 210  E :  }
 211    :  
 212    :  bool InstructionToString(
 213    :      const _DInst& instruction,
 214    :      const uint8_t* data,
 215    :      int code_length,
 216  E :      std::string* buffer) {
 217  E :    DCHECK(data != NULL);
 218  E :    DCHECK(buffer != NULL);
 219    :  
 220  E :    _CodeInfo code = {};
 221  E :    code.codeOffset = 0;
 222  E :    code.code = data;
 223  E :    code.codeLen = code_length;
 224  E :    code.dt = Decode32Bits;
 225  E :    _DecodedInst decoded = {};
 226  E :    _DInst dinst = instruction;
 227    :  
 228  E :    dinst.addr = 0;
 229  E :    distorm_format64(&code, &dinst, &decoded);
 230    :  
 231  E :    *buffer = base::StringPrintf("%-14s %s %s",
 232    :                                 decoded.instructionHex.p,
 233    :                                 decoded.mnemonic.p,
 234    :                                 decoded.operands.p);
 235  E :    return true;
 236  E :  }
 237    :  
 238  E :  bool IsNop(const _DInst& instruction) {
 239  E :    switch (instruction.opcode) {
 240    :      default:
 241    :        // Only the sequences recognized below qualify as NOP instructions.
 242  E :        return false;
 243    :  
 244    :      case I_XCHG:
 245    :        // This handles the 1 bytes NOP sequence.
 246    :        //     1-byte: xchg eax, eax.
 247  E :        return instruction.ops[0].type == O_REG &&
 248    :            instruction.ops[0].index == RM_AX &&
 249    :            instruction.ops[1].type == O_REG &&
 250    :            instruction.ops[1].index == RM_AX;
 251    :  
 252    :      case I_NOP:
 253    :        // This handles the 2, 4, 5, 7, 8 and 9 byte NOP sequences.
 254    :        //     2-byte: 66 NOP
 255    :        //     4-byte: NOP DWORD PTR [EAX + 0] (8-bit displacement)
 256    :        //     5-byte: NOP DWORD PTR [EAX + EAX*1 + 0] (8-bit displacement)
 257    :        //     7-byte: NOP DWORD PTR [EAX + 0] (32-bit displacement)
 258    :        //     8-byte: NOP DWORD PTR [EAX + EAX*1 + 0] (32-bit displacement)
 259    :        //     9-byte: NOP WORD PTR [EAX + EAX*1 + 0] (32-bit displacement)
 260  E :        return true;
 261    :  
 262    :      case I_LEA:
 263    :        // This handles the 3 and 6 byte NOP sequences.
 264    :        //     3-byte: LEA REG, 0 (REG) (8-bit displacement)
 265    :        //     6-byte: LEA REG, 0 (REG) (32-bit displacement)
 266  E :        return instruction.ops[0].type == O_REG &&
 267    :            instruction.ops[1].type == O_SMEM &&
 268    :            instruction.ops[0].index == instruction.ops[1].index &&
 269    :            instruction.disp == 0;
 270    :  
 271    :      case I_MOV:
 272    :        // Not documented in the Intel manuals, but we see "mov reg, reg" a lot.
 273  E :        return instruction.ops[0].type == O_REG &&
 274    :            instruction.ops[1].type == O_REG &&
 275    :            instruction.ops[0].index == instruction.ops[1].index;
 276    :    }
 277  E :  }
 278    :  
 279  E :  bool IsCall(const _DInst& instruction) {
 280  E :    return META_GET_FC(instruction.meta) == FC_CALL;
 281  E :  }
 282    :  
 283  E :  bool IsReturn(const _DInst& instruction) {
 284  E :    return META_GET_FC(instruction.meta) == FC_RET;
 285  E :  }
 286    :  
 287  E :  bool IsSystemCall(const _DInst& instruction) {
 288  E :    return META_GET_FC(instruction.meta) == FC_SYS;
 289  E :  }
 290    :  
 291  E :  bool IsConditionalBranch(const _DInst& instruction) {
 292  E :    return META_GET_FC(instruction.meta) == FC_CND_BRANCH;
 293  E :  }
 294    :  
 295  E :  bool IsUnconditionalBranch(const _DInst& instruction) {
 296  E :    return META_GET_FC(instruction.meta) == FC_UNC_BRANCH;
 297  E :  }
 298    :  
 299  E :  bool IsBranch(const _DInst& instruction) {
 300  E :    return IsConditionalBranch(instruction) || IsUnconditionalBranch(instruction);
 301  E :  }
 302    :  
 303  E :  bool HasPcRelativeOperand(const _DInst& instruction, int operand_index) {
 304  E :    DCHECK_LE(0, operand_index);
 305  E :    DCHECK_LT(operand_index, static_cast<int>(arraysize(instruction.ops)));
 306  E :    return instruction.ops[operand_index].type == O_PC;
 307  E :  }
 308    :  
 309  E :  bool IsControlFlow(const _DInst& instruction) {
 310    :    // For the purposes of Syzygy we include all of the control flow altering
 311    :    // instruction EXCEPT for call as true control flow.
 312  E :    return IsBranch(instruction) ||
 313    :        IsReturn(instruction) ||
 314    :        IsSystemCall(instruction);
 315  E :  }
 316    :  
 317  E :  bool IsImplicitControlFlow(const _DInst& instruction) {
 318    :    // Control flow jumps implicitly out of the block for RET and SYS
 319  E :    if (IsReturn(instruction) || IsSystemCall(instruction))
 320  E :      return true;
 321    :  
 322    :    // Control flow is implicit for non PC-relative jumps (i.e., explicit
 323    :    // branches where the target is computed, stored in a register, stored
 324    :    // in a memory location, or otherwise indirect).
 325  E :    if (IsUnconditionalBranch(instruction) &&
 326    :        !HasPcRelativeOperand(instruction, 0)) {
 327  E :      return true;
 328    :    }
 329    :  
 330    :    // Otherwise it's not implicit control flow.
 331  E :    return false;
 332  E :  }
 333    :  
 334  E :  bool IsInterrupt(const _DInst& instruction) {
 335  E :    return META_GET_FC(instruction.meta) == FC_INT;
 336  E :  }
 337    :  
 338  E :  bool IsDebugInterrupt(const _DInst& instruction) {
 339  E :    return IsInterrupt(instruction) && instruction.size == 1 &&
 340    :        instruction.opcode == I_INT_3;
 341  E :  }
 342    :  
 343  E :  _RegisterType GetRegisterType(const Register& reg) {
 344  E :    return GetRegisterType(reg.id());
 345  E :  }
 346    :  
 347  E :  _RegisterType GetRegisterType(RegisterId reg_id) {
 348    :    static const _RegisterType kRegisterTypesById[assm::kRegisterMax] = {
 349    :      R_AL,  R_CL,  R_DL,  R_BL,  R_AH,  R_CH,  R_DH,  R_BH,  // 8-bit.
 350    :      R_AX,  R_CX,  R_DX,  R_BX,  R_SP,  R_BP,  R_SI,  R_DI,  // 16-bit.
 351    :      R_EAX, R_ECX, R_EDX, R_EBX, R_ESP, R_EBP, R_ESI, R_EDI  // 32-bit.
 352    :    };
 353  E :    DCHECK_LE(assm::kRegisterMin, reg_id);
 354  E :    DCHECK_GT(assm::kRegisterMax, reg_id);
 355  E :    return kRegisterTypesById[reg_id];
 356  E :  }
 357    :  
 358  E :  RegisterId GetRegisterId(uint32_t distorm_reg_type) {
 359  E :    switch (distorm_reg_type) {
 360    :      // 8-bit registers.
 361  E :      case R_AL: return assm::kRegisterAl;
 362  i :      case R_CL: return assm::kRegisterCl;
 363  i :      case R_DL: return assm::kRegisterDl;
 364  i :      case R_BL: return assm::kRegisterBl;
 365  i :      case R_AH: return assm::kRegisterAh;
 366  i :      case R_CH: return assm::kRegisterCh;
 367  i :      case R_DH: return assm::kRegisterDh;
 368  E :      case R_BH: return assm::kRegisterBh;
 369    :  
 370    :      // 16-bit registers.
 371  i :      case R_AX: return assm::kRegisterAx;
 372  E :      case R_CX: return assm::kRegisterCx;
 373  i :      case R_DX: return assm::kRegisterDx;
 374  i :      case R_BX: return assm::kRegisterBx;
 375  E :      case R_SP: return assm::kRegisterSp;
 376  i :      case R_BP: return assm::kRegisterBp;
 377  i :      case R_SI: return assm::kRegisterSi;
 378  i :      case R_DI: return assm::kRegisterDi;
 379    :  
 380    :      // 32-bit registers.
 381  E :      case R_EAX: return assm::kRegisterEax;
 382  E :      case R_ECX: return assm::kRegisterEcx;
 383  E :      case R_EDX: return assm::kRegisterEdx;
 384  E :      case R_EBX: return assm::kRegisterEbx;
 385  E :      case R_ESP: return assm::kRegisterEsp;
 386  E :      case R_EBP: return assm::kRegisterEbp;
 387  E :      case R_ESI: return assm::kRegisterEsi;
 388  E :      case R_EDI: return assm::kRegisterEdi;
 389    :  
 390  i :      default: return assm::kRegisterNone;
 391    :    }
 392  E :  }
 393    :  
 394  E :  const Register& GetRegister(uint32_t distorm_reg_type) {
 395  E :    return Register::Get(GetRegisterId(distorm_reg_type));
 396  E :  }
 397    :  
 398    :  }  // namespace core

Coverage information generated Fri Jul 29 11:00:21 2016.