Coverage for /Syzygy/core/disassembler_util.cc

CoverageLines executed / instrumented / missingexe / inst / missLanguageGroup
87.4%1521740.C++source

Line-by-line coverage:

   1    :  // Copyright 2012 Google Inc. All Rights Reserved.
   2    :  //
   3    :  // Licensed under the Apache License, Version 2.0 (the "License");
   4    :  // you may not use this file except in compliance with the License.
   5    :  // You may obtain a copy of the License at
   6    :  //
   7    :  //     http://www.apache.org/licenses/LICENSE-2.0
   8    :  //
   9    :  // Unless required by applicable law or agreed to in writing, software
  10    :  // distributed under the License is distributed on an "AS IS" BASIS,
  11    :  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12    :  // See the License for the specific language governing permissions and
  13    :  // limitations under the License.
  14    :  
  15    :  #include "syzygy/core/disassembler_util.h"
  16    :  
  17    :  #include "base/logging.h"
  18    :  #include "base/strings/stringprintf.h"
  19    :  #include "mnemonics.h"  // NOLINT
  20    :  
  21    :  namespace core {
  22    :  
  23    :  namespace {
  24    :  
  25    :  // Return the size of a 3-byte VEX encoded instruction.
  26    :  //
  27    :  // The layout of these instructions is as follows, starting with a byte with
  28    :  // value 0xC4:
  29    :  //     - First byte:
  30    :  //         +---+---+---+---+---+---+---+---+
  31    :  //         | 1   1   0   0   0   1   0   0 |
  32    :  //         +---+---+---+---+---+---+---+---+
  33    :  //     - Second byte:
  34    :  //         +---+---+---+---+---+---+---+---+
  35    :  //         |~R |~X |~B |     map_select    |
  36    :  //         +---+---+---+---+---+---+---+---+
  37    :  //     - Third byte:
  38    :  //         +---+---+---+---+---+---+---+---+
  39    :  //         |W/E|     ~vvvv     | L |   pp  |
  40    :  //         +---+---+---+---+---+---+---+---+
  41    :  //     - Fourth byte: The opcode for this instruction.
  42    :  //
  43    :  // |map_select| Indicates the opcode map that should be used for this
  44    :  // instruction.
  45    :  //
  46    :  // See http://wiki.osdev.org/X86-64_Instruction_Encoding#Three_byte_VEX_escape_prefix
  47    :  // for more details.
  48  E :  size_t Get3ByteVexEncodedInstructionSize(_CodeInfo* ci) {
  49  E :    DCHECK_EQ(0xC4, ci->code[0]);
  50    :    // Switch case based on the opcode map used by this instruction.
  51  E :    switch (ci->code[1] & 0x1F) {
  52    :      case 0x01: {
  53  i :        switch (ci->code[3]) {
  54  i :          case 0x1D: return 5;  // vpermd
  55    :          default: break;
  56    :        }
  57  i :        break;
  58    :      }
  59    :      case 0x02: {
  60  E :        switch (ci->code[3]) {
  61  E :          case 0x36: return 5;  // vpermd
  62  E :          case 0x5A: return 6;  // vbroadcasti128
  63  E :          case 0x78: return 5;  // vpbroadcastb
  64    :          default: break;
  65    :        }
  66  i :        break;
  67    :      }
  68    :      case 0x03: {
  69  E :        switch (ci->code[3]) {
  70  E :          case 0x00: return 6;  // vpermq
  71  E :          case 0x38: return 7;  // vinserti128
  72  E :          case 0x39: return 6;  // vextracti128
  73    :          default: break;
  74    :        }
  75    :        break;
  76    :      }
  77    :      default:
  78    :        break;
  79    :    }
  80  i :    return 0;
  81  E :  }
  82    :  
  83    :  // Handle improperly decoded instructions. Returns true if an instruction was
  84    :  // handled, false otherwise. If this returns false then none of the output
  85    :  // parameters will have been changed.
  86    :  bool HandleBadDecode(_CodeInfo* ci,
  87    :                       _DInst result[],
  88    :                       unsigned int max_instructions,
  89    :                       unsigned int* used_instructions_count,
  90  E :                       _DecodeResult* ret) {
  91  E :    DCHECK_NE(reinterpret_cast<_CodeInfo*>(NULL), ci);
  92  E :    DCHECK_LE(1u, max_instructions);
  93  E :    DCHECK_NE(reinterpret_cast<unsigned int*>(NULL), used_instructions_count);
  94  E :    DCHECK_NE(reinterpret_cast<_DecodeResult*>(NULL), ret);
  95    :  
  96  E :    size_t size = 0;
  97    :  
  98  E :    if (ci->code[0] == 0xC4)
  99  E :      size = Get3ByteVexEncodedInstructionSize(ci);
 100    :  
 101  E :    if (size == 0)
 102  i :      return false;
 103    :  
 104    :    // We set the bare minimum properties that are required for any
 105    :    // subsequent processing that we perform.
 106    :  
 107  E :    *used_instructions_count = 1;
 108    :  
 109  E :    ::memset(result, 0, sizeof(result[0]));
 110  E :    result[0].addr = ci->codeOffset;
 111  E :    result[0].size = size;
 112    :  
 113  E :    DCHECK_EQ(FC_NONE, META_GET_FC(result[0].meta));
 114  E :    DCHECK_EQ(O_NONE, result[0].ops[0].type);
 115  E :    DCHECK_EQ(O_NONE, result[0].ops[1].type);
 116  E :    DCHECK_EQ(O_NONE, result[0].ops[2].type);
 117  E :    DCHECK_EQ(O_NONE, result[0].ops[3].type);
 118    :  
 119  E :    *ret = DECRES_SUCCESS;
 120    :  
 121  E :    return true;
 122  E :  }
 123    :  
 124    :  }  // namespace
 125    :  
 126    :  _DecodeResult DistormDecompose(_CodeInfo* ci,
 127    :                                 _DInst result[],
 128    :                                 unsigned int max_instructions,
 129  E :                                 unsigned int* used_instructions_count) {
 130    :    _DecodeResult ret =
 131  E :        distorm_decompose(ci, result, max_instructions, used_instructions_count);
 132    :  
 133    :    // Distorm @229 has a bug where it has problems decoding some AVX
 134    :    // instructions. The encoding is described in detail here:
 135    :    //   http://en.wikipedia.org/wiki/VEX_prefix
 136    :    // An issue has been filed here:
 137    :    //   https://code.google.com/p/distorm/issues/detail?id=77
 138    :    // This is a workaround until the bug is fixed. We only care about the case
 139    :    // where decoding failed.
 140  E :    if (ret != DECRES_SUCCESS && *used_instructions_count == 0) {
 141    :      if (HandleBadDecode(ci, result, max_instructions, used_instructions_count,
 142  E :                          &ret)) {
 143  E :        return ret;
 144    :      }
 145    :    }
 146  E :    return ret;
 147  E :  }
 148    :  
 149    :  bool DecodeOneInstruction(
 150  E :      uint32 address, const uint8* buffer, size_t length, _DInst* instruction) {
 151  E :    DCHECK(buffer != NULL);
 152  E :    DCHECK(instruction != NULL);
 153    :  
 154  E :    _CodeInfo code = {};
 155  E :    code.dt = Decode32Bits;
 156  E :    code.features = DF_NONE;
 157  E :    code.codeOffset = address;
 158  E :    code.codeLen = length;
 159  E :    code.code = buffer;
 160    :  
 161  E :    unsigned int decoded = 0;
 162  E :    ::memset(instruction, 0, sizeof(*instruction));
 163  E :    _DecodeResult result = DistormDecompose(&code, instruction, 1, &decoded);
 164    :  
 165  E :    if (result != DECRES_MEMORYERR && result != DECRES_SUCCESS)
 166  i :      return false;
 167    :  
 168    :    // It's possible for the decode to fail as having decoded a single partially
 169    :    // valid instruction (ie: valid prefix of an instruction, waiting on more
 170    :    // data), in which case it will return MEMORYERR (wants more data) and a
 171    :    // decoded length of zero.
 172  E :    if (decoded == 0)
 173  i :      return false;
 174    :  
 175  E :    DCHECK_GE(length, instruction->size);
 176  E :    DCHECK_LT(0, instruction->size);
 177    :  
 178  E :    return true;
 179  E :  }
 180    :  
 181    :  bool DecodeOneInstruction(
 182  E :      const uint8* buffer, size_t length, _DInst* instruction) {
 183  E :    DCHECK(buffer != NULL);
 184  E :    DCHECK(instruction != NULL);
 185  E :    if (!DecodeOneInstruction(0x10000000, buffer, length, instruction))
 186  i :      return false;
 187  E :    return true;
 188  E :  }
 189    :  
 190    :  bool InstructionToString(
 191    :      const _DInst& instruction,
 192    :      const uint8_t* data,
 193    :      int code_length,
 194  E :      std::string* buffer) {
 195  E :    DCHECK(data != NULL);
 196  E :    DCHECK(buffer != NULL);
 197    :  
 198  E :    _CodeInfo code = {};
 199  E :    code.codeOffset = 0;
 200  E :    code.code = data;
 201  E :    code.codeLen = code_length;
 202  E :    code.dt = Decode32Bits;
 203  E :    _DecodedInst decoded = {};
 204  E :    _DInst dinst = instruction;
 205    :  
 206  E :    dinst.addr = 0;
 207  E :    distorm_format64(&code, &dinst, &decoded);
 208    :  
 209    :    *buffer = base::StringPrintf("%-14s %s %s",
 210    :                                 decoded.instructionHex.p,
 211    :                                 decoded.mnemonic.p,
 212  E :                                 decoded.operands.p);
 213  E :    return true;
 214  E :  }
 215    :  
 216  E :  bool IsNop(const _DInst& instruction) {
 217  E :    switch (instruction.opcode) {
 218    :      default:
 219    :        // Only the sequences recognized below qualify as NOP instructions.
 220  E :        return false;
 221    :  
 222    :      case I_XCHG:
 223    :        // This handles the 1 bytes NOP sequence.
 224    :        //     1-byte: xchg eax, eax.
 225    :        return instruction.ops[0].type == O_REG &&
 226    :            instruction.ops[0].index == RM_AX &&
 227    :            instruction.ops[1].type == O_REG &&
 228  E :            instruction.ops[1].index == RM_AX;
 229    :  
 230    :      case I_NOP:
 231    :        // This handles the 2, 4, 5, 7, 8 and 9 byte NOP sequences.
 232    :        //     2-byte: 66 NOP
 233    :        //     4-byte: NOP DWORD PTR [EAX + 0] (8-bit displacement)
 234    :        //     5-byte: NOP DWORD PTR [EAX + EAX*1 + 0] (8-bit displacement)
 235    :        //     7-byte: NOP DWORD PTR [EAX + 0] (32-bit displacement)
 236    :        //     8-byte: NOP DWORD PTR [EAX + EAX*1 + 0] (32-bit displacement)
 237    :        //     9-byte: NOP WORD PTR [EAX + EAX*1 + 0] (32-bit displacement)
 238  E :        return true;
 239    :  
 240    :      case I_LEA:
 241    :        // This handles the 3 and 6 byte NOP sequences.
 242    :        //     3-byte: LEA REG, 0 (REG) (8-bit displacement)
 243    :        //     6-byte: LEA REG, 0 (REG) (32-bit displacement)
 244    :        return instruction.ops[0].type == O_REG &&
 245    :            instruction.ops[1].type == O_SMEM &&
 246    :            instruction.ops[0].index == instruction.ops[1].index &&
 247  E :            instruction.disp == 0;
 248    :  
 249    :      case I_MOV:
 250    :        // Not documented in the Intel manuals, but we see "mov reg, reg" a lot.
 251    :        return instruction.ops[0].type == O_REG &&
 252    :            instruction.ops[1].type == O_REG &&
 253  E :            instruction.ops[0].index == instruction.ops[1].index;
 254    :    }
 255  E :  }
 256    :  
 257  E :  bool IsCall(const _DInst& instruction) {
 258  E :    return META_GET_FC(instruction.meta) == FC_CALL;
 259  E :  }
 260    :  
 261  E :  bool IsReturn(const _DInst& instruction) {
 262  E :    return META_GET_FC(instruction.meta) == FC_RET;
 263  E :  }
 264    :  
 265  E :  bool IsSystemCall(const _DInst& instruction) {
 266  E :    return META_GET_FC(instruction.meta) == FC_SYS;
 267  E :  }
 268    :  
 269  E :  bool IsConditionalBranch(const _DInst& instruction) {
 270  E :    return META_GET_FC(instruction.meta) == FC_CND_BRANCH;
 271  E :  }
 272    :  
 273  E :  bool IsUnconditionalBranch(const _DInst& instruction) {
 274  E :    return META_GET_FC(instruction.meta) == FC_UNC_BRANCH;
 275  E :  }
 276    :  
 277  E :  bool IsBranch(const _DInst& instruction) {
 278  E :    return IsConditionalBranch(instruction) || IsUnconditionalBranch(instruction);
 279  E :  }
 280    :  
 281  E :  bool HasPcRelativeOperand(const _DInst& instruction, int operand_index) {
 282  E :    DCHECK_LE(0, operand_index);
 283  E :    DCHECK_LT(operand_index, static_cast<int>(arraysize(instruction.ops)));
 284  E :    return instruction.ops[operand_index].type == O_PC;
 285  E :  }
 286    :  
 287  E :  bool IsControlFlow(const _DInst& instruction) {
 288    :    // For the purposes of Syzygy we include all of the control flow altering
 289    :    // instruction EXCEPT for call as true control flow.
 290    :    return IsBranch(instruction) ||
 291    :        IsReturn(instruction) ||
 292  E :        IsSystemCall(instruction);
 293  E :  }
 294    :  
 295  E :  bool IsImplicitControlFlow(const _DInst& instruction) {
 296    :    // Control flow jumps implicitly out of the block for RET and SYS
 297  E :    if (IsReturn(instruction) || IsSystemCall(instruction))
 298  E :      return true;
 299    :  
 300    :    // Control flow is implicit for non PC-relative jumps (i.e., explicit
 301    :    // branches where the target is computed, stored in a register, stored
 302    :    // in a memory location, or otherwise indirect).
 303    :    if (IsUnconditionalBranch(instruction) &&
 304  E :        !HasPcRelativeOperand(instruction, 0)) {
 305  E :      return true;
 306    :    }
 307    :  
 308    :    // Otherwise it's not implicit control flow.
 309  E :    return false;
 310  E :  }
 311    :  
 312  E :  bool IsInterrupt(const _DInst& instruction) {
 313  E :    return META_GET_FC(instruction.meta) == FC_INT;
 314  E :  }
 315    :  
 316  E :  bool IsDebugInterrupt(const _DInst& instruction) {
 317    :    return IsInterrupt(instruction) && instruction.size == 1 &&
 318  E :        instruction.opcode == I_INT_3;
 319  E :  }
 320    :  
 321  E :  _RegisterType GetRegisterType(const Register& reg) {
 322  E :    return GetRegisterType(reg.id());
 323  E :  }
 324    :  
 325  E :  _RegisterType GetRegisterType(RegisterId reg_id) {
 326    :    static const _RegisterType kRegisterTypesById[assm::kRegisterMax] = {
 327    :      R_AL,  R_CL,  R_DL,  R_BL,  R_AH,  R_CH,  R_DH,  R_BH,  // 8-bit.
 328    :      R_AX,  R_CX,  R_DX,  R_BX,  R_SP,  R_BP,  R_SI,  R_DI,  // 16-bit.
 329    :      R_EAX, R_ECX, R_EDX, R_EBX, R_ESP, R_EBP, R_ESI, R_EDI  // 32-bit.
 330    :    };
 331  E :    DCHECK_LE(assm::kRegisterMin, reg_id);
 332  E :    DCHECK_GT(assm::kRegisterMax, reg_id);
 333  E :    return kRegisterTypesById[reg_id];
 334  E :  }
 335    :  
 336  E :  RegisterId GetRegisterId(uint32 distorm_reg_type) {
 337  E :    switch (distorm_reg_type) {
 338    :      // 8-bit registers.
 339  E :      case R_AL: return assm::kRegisterAl;
 340  i :      case R_CL: return assm::kRegisterCl;
 341  i :      case R_DL: return assm::kRegisterDl;
 342  i :      case R_BL: return assm::kRegisterBl;
 343  i :      case R_AH: return assm::kRegisterAh;
 344  i :      case R_CH: return assm::kRegisterCh;
 345  i :      case R_DH: return assm::kRegisterDh;
 346  E :      case R_BH: return assm::kRegisterBh;
 347    :  
 348    :      // 16-bit registers.
 349  i :      case R_AX: return assm::kRegisterAx;
 350  E :      case R_CX: return assm::kRegisterCx;
 351  i :      case R_DX: return assm::kRegisterDx;
 352  i :      case R_BX: return assm::kRegisterBx;
 353  E :      case R_SP: return assm::kRegisterSp;
 354  i :      case R_BP: return assm::kRegisterBp;
 355  i :      case R_SI: return assm::kRegisterSi;
 356  i :      case R_DI: return assm::kRegisterDi;
 357    :  
 358    :      // 32-bit registers.
 359  E :      case R_EAX: return assm::kRegisterEax;
 360  E :      case R_ECX: return assm::kRegisterEcx;
 361  E :      case R_EDX: return assm::kRegisterEdx;
 362  E :      case R_EBX: return assm::kRegisterEbx;
 363  E :      case R_ESP: return assm::kRegisterEsp;
 364  E :      case R_EBP: return assm::kRegisterEbp;
 365  E :      case R_ESI: return assm::kRegisterEsi;
 366  E :      case R_EDI: return assm::kRegisterEdi;
 367    :  
 368  i :      default: return assm::kRegisterNone;
 369    :    }
 370  E :  }
 371    :  
 372  E :  const Register& GetRegister(uint32 distorm_reg_type) {
 373  E :    return Register::Get(GetRegisterId(distorm_reg_type));
 374  E :  }
 375    :  
 376    :  }  // namespace core

Coverage information generated Thu Jan 14 17:40:38 2016.