1 : // Copyright 2012 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 :
15 : #include "syzygy/pe/decompose_image_to_text_app.h"
16 :
17 : #include "pcrecpp.h"
18 : #include "syzygy/block_graph/basic_block_decomposer.h"
19 : #include "syzygy/pe/decomposer.h"
20 : #include "syzygy/pe/pe_file.h"
21 : #include "syzygy/pe/pe_transform_policy.h"
22 :
23 : #include "distorm.h" // NOLINT
24 :
25 : namespace pe {
26 :
27 : using block_graph::BasicBlockDecomposer;
28 : using core::RelativeAddress;
29 : using pe::Decomposer;
30 : using pe::ImageLayout;
31 : using pe::PEFile;
32 : using pe::PETransformPolicy;
33 :
34 : namespace {
35 :
36 : const char kUsageFormatStr[] =
37 : "Usage: %ls [options]\n"
38 : "\n"
39 : " A tool that decomposes a given image file, and decomposes it to a\n"
40 : " human-readable textual description.\n"
41 : "\n"
42 : "Available options\n"
43 : " --basic-blocks\n"
44 : " Breaks each function down to basic blocks and dumps it at that\n"
45 : " level.\n"
46 : " --image=<image file>\n"
47 : " --block-pattern=<regexp>\n"
48 : " Only dump blocks whose name matches regexp.\n";
49 :
50 : using block_graph::BlockGraph;
51 : using block_graph::BasicBlock;
52 : using block_graph::BasicCodeBlock;
53 : using block_graph::BasicDataBlock;
54 : using block_graph::BasicEndBlock;
55 : using block_graph::BasicBlockReference;
56 :
57 E : void DumpReference(const BasicBlockReference& ref, FILE* out) {
58 E : DCHECK(out != NULL);
59 :
60 E : switch (ref.referred_type()) {
61 : case BasicBlockReference::REFERRED_TYPE_BLOCK: {
62 E : const BlockGraph::Block* block = ref.block();
63 E : if (ref.base() == 0) {
64 E : ::fprintf(out, " ; (%s", block->name().c_str());
65 E : } else if (ref.base() < 0) {
66 i : ::fprintf(out, " ; (%s%d", block->name().c_str(), ref.base());
67 i : } else {
68 E : BlockGraph::Label label;
69 E : if (block->GetLabel(ref.base(), &label)) {
70 E : ::fprintf(out, " ; (%s:%s",
71 : block->name().c_str(),
72 : label.ToString().c_str());
73 E : } else {
74 E : ::fprintf(out, " ; (%s+%d", block->name().c_str(), ref.base());
75 : }
76 E : }
77 E : if (ref.offset() == ref.base()) {
78 E : ::fprintf(out, ")");
79 E : } else {
80 E : ::fprintf(out, ", +indirect offset:%d)", ref.offset() - ref.base());
81 : }
82 : }
83 E : break;
84 :
85 : case BasicBlockReference::REFERRED_TYPE_BASIC_BLOCK: {
86 E : const BasicBlock* bb = ref.basic_block();
87 E : DCHECK_EQ(0, ref.base());
88 E : DCHECK_EQ(0, ref.offset());
89 :
90 E : ::fprintf(out, " ; (%s)", bb->name().c_str());
91 : }
92 E : break;
93 :
94 : case BasicBlockReference::REFERRED_TYPE_UNKNOWN:
95 : default:
96 i : NOTREACHED() << "All references should be typed.";
97 : break;
98 : }
99 E : }
100 :
101 E : void HexDump(const uint8_t* data, size_t size, FILE* out) {
102 E : for (size_t i = 0; i < size; ++i)
103 E : ::fprintf(out, "%02x", data[i]);
104 E : }
105 :
106 : } // namespace
107 :
108 :
109 : DecomposeImageToTextApp::DecomposeImageToTextApp()
110 E : : application::AppImplBase("Image To Text Decomposer"),
111 E : dump_basic_blocks_(false),
112 E : num_refs_(0) {
113 E : }
114 :
115 : void DecomposeImageToTextApp::PrintUsage(const base::FilePath& program,
116 E : const base::StringPiece& message) {
117 E : if (!message.empty()) {
118 E : ::fwrite(message.data(), 1, message.length(), out());
119 E : ::fprintf(out(), "\n\n");
120 : }
121 :
122 E : ::fprintf(out(), kUsageFormatStr, program.BaseName().value().c_str());
123 E : }
124 :
125 : bool DecomposeImageToTextApp::ParseCommandLine(
126 E : const base::CommandLine* cmd_line) {
127 E : image_path_ = cmd_line->GetSwitchValuePath("image");
128 E : if (image_path_.empty()) {
129 E : PrintUsage(cmd_line->GetProgram(),
130 : "You must provide the path to an image file.");
131 E : return false;
132 : }
133 :
134 E : dump_basic_blocks_ = cmd_line->HasSwitch("basic-blocks");
135 E : regexp_ = cmd_line->GetSwitchValueASCII("block-pattern");
136 E : if (!regexp_.empty()) {
137 i : pcrecpp::RE re(regexp_);
138 i : if (!re.error().empty()) {
139 i : PrintUsage(cmd_line->GetProgram(), "Invalid regular expression.");
140 i : return false;
141 : }
142 i : }
143 :
144 E : return true;
145 E : }
146 :
147 E : int DecomposeImageToTextApp::Run() {
148 E : DCHECK(!image_path_.empty());
149 :
150 E : if (!DumpImageToText(image_path_))
151 i : return 1;
152 :
153 E : return 0;
154 E : }
155 :
156 : void DecomposeImageToTextApp::DumpAddressSpaceToText(
157 E : const BlockGraph::AddressSpace& address_space) {
158 : BlockGraph::AddressSpace::RangeMap::const_iterator block_it(
159 E : address_space.address_space_impl().ranges().begin());
160 : BlockGraph::AddressSpace::RangeMap::const_iterator block_end(
161 E : address_space.address_space_impl().ranges().end());
162 :
163 E : pcrecpp::RE re(regexp_);
164 E : DCHECK(regexp_.empty() || re.error().empty());
165 :
166 E : for (; block_it != block_end; ++block_it) {
167 E : const BlockGraph::Block* block = block_it->second;
168 E : if (regexp_.empty() || re.FullMatch(block->name())) {
169 E : RelativeAddress addr = block_it->first.start();
170 E : DumpBlockToText(addr, block);
171 : }
172 E : }
173 E : }
174 :
175 : void DecomposeImageToTextApp::DumpSubGraphToText(
176 E : BasicBlockSubGraph& subgraph) {
177 : typedef BasicBlockSubGraph::BlockDescription BlockDescription;
178 : typedef BasicBlockSubGraph::BasicBlockOrdering BasicBlockOrdering;
179 : typedef block_graph::BasicBlock BasicBlock;
180 : typedef block_graph::BasicBlockReference BasicBlockReference;
181 :
182 : // Post-decomposition we have a single description only.
183 E : DCHECK_EQ(1U, subgraph.block_descriptions().size());
184 E : DCHECK(subgraph.original_block() != NULL);
185 :
186 E : const BlockGraph::Block* block = subgraph.original_block();
187 E : const BlockDescription& descr = subgraph.block_descriptions().front();
188 E : BasicBlockOrdering::const_iterator bb_it(descr.basic_block_order.begin());
189 E : for (; bb_it != descr.basic_block_order.end(); ++bb_it) {
190 E : const BasicBlock* bb = *bb_it;
191 E : DCHECK(bb != NULL);
192 :
193 : // Print the BB's name for an identifying label.
194 E : ::fprintf(out(), "%s:\n", bb->name().c_str());
195 :
196 E : switch (bb->type()) {
197 : case BasicBlock::BASIC_CODE_BLOCK:
198 E : DumpCodeBBToText(block, BasicCodeBlock::Cast(bb));
199 E : break;
200 :
201 : case BasicBlock::BASIC_DATA_BLOCK:
202 E : DumpDataBBToText(block, BasicDataBlock::Cast(bb));
203 E : break;
204 :
205 : case BasicBlock::BASIC_END_BLOCK:
206 E : DumpEndBBToText(block, BasicEndBlock::Cast(bb));
207 E : break;
208 :
209 : default:
210 i : NOTREACHED();
211 : break;
212 : }
213 E : }
214 E : }
215 :
216 : void DecomposeImageToTextApp::DumpCodeBBToText(
217 E : const BlockGraph::Block* block, const BasicCodeBlock* bb) {
218 : BasicBlock::Instructions::const_iterator instr_it(
219 E : bb->instructions().begin());
220 E : BasicBlock::Offset instr_offs = bb->offset();
221 E : for (; instr_it != bb->instructions().end(); ++instr_it) {
222 E : const block_graph::Instruction& instr = *instr_it;
223 :
224 E : _CodeInfo code = {};
225 E : code.codeOffset = 0;
226 E : code.code = instr.data();
227 E : code.codeLen = instr.size();
228 E : code.dt = Decode32Bits;
229 E : _DecodedInst decoded = {};
230 E : _DInst dinst = instr.representation();
231 :
232 E : dinst.addr = 0;
233 E : distorm_format(&code, &dinst, &decoded);
234 E : ::fprintf(out(), " +%04X: %-14s %s %s",
235 : instr_offs,
236 : decoded.instructionHex.p,
237 : decoded.mnemonic.p,
238 : decoded.operands.p);
239 :
240 : BasicBlock::BasicBlockReferenceMap::const_iterator ref_it(
241 E : instr_it->references().begin());
242 E : for (; ref_it != instr_it->references().end(); ++ref_it) {
243 E : DumpReference(ref_it->second, out());
244 E : }
245 E : ::fprintf(out(), "\n");
246 E : instr_offs += instr.size();
247 E : }
248 :
249 E : BasicBlock::Successors::const_iterator succ_it(bb->successors().begin());
250 E : for (; succ_it != bb->successors().end(); ++succ_it) {
251 E : const block_graph::Successor& succ = *succ_it;
252 :
253 : // Shortcut alert! As we know the blocks are in-order right after
254 : // decomposition, we can get away with just disassembling the (sole)
255 : // successor that has a size.
256 : // The other successor, if any, will be fall-through.
257 E : if (succ.instruction_size()) {
258 E : _CodeInfo code = {};
259 E : code.codeOffset = 0;
260 E : code.code = block->data() + bb->offset() + bb->GetInstructionSize();
261 E : code.codeLen = succ.instruction_size();
262 E : code.dt = Decode32Bits;
263 E : _DecodedInst decoded = {};
264 E : _DInst instr = {};
265 :
266 E : unsigned int count = 0;
267 E : distorm_decompose64(&code, &instr, 1, &count);
268 E : instr.addr = 0;
269 E : distorm_format(&code, &instr, &decoded);
270 E : ::fprintf(out(), " +%04X: %-14s %s %s",
271 : instr_offs,
272 : decoded.instructionHex.p,
273 : decoded.mnemonic.p,
274 : decoded.operands.p);
275 :
276 E : DumpReference(succ.reference(), out());
277 E : ::fprintf(out(), "\n");
278 E : instr_offs += succ.instruction_size();
279 : }
280 E : }
281 E : }
282 :
283 : void DecomposeImageToTextApp::DumpDataBBToText(
284 E : const BlockGraph::Block* block, const BasicDataBlock* bb) {
285 : // Here we proceed by dumping a hex chunk up to the next reference, then
286 : // the reference and so on.
287 E : size_t curr_start = 0;
288 :
289 E : while (curr_start < bb->size()) {
290 : BasicBlock::BasicBlockReferenceMap::const_iterator it(
291 E : bb->references().lower_bound(curr_start));
292 :
293 E : size_t next_chunk_end = bb->size();
294 E : if (it != bb->references().end())
295 E : next_chunk_end = it->first;
296 E : if (next_chunk_end == curr_start) {
297 : // We're on a reference, dump it and it's reference.
298 E : switch (it->second.size()) {
299 : case 1:
300 i : ::fprintf(out(), " DB ");
301 i : break;
302 : case 2:
303 i : ::fprintf(out(), " DW ");
304 i : break;
305 : case 4:
306 E : ::fprintf(out(), " DD ");
307 E : break;
308 : default:
309 i : NOTREACHED();
310 : break;
311 : }
312 E : HexDump(bb->data() + curr_start, it->second.size(), out());
313 E : DumpReference(it->second, out());
314 E : ::fprintf(out(), "\n");
315 :
316 E : curr_start += it->second.size();
317 E : } else {
318 E : if (next_chunk_end - curr_start > 16)
319 E : next_chunk_end = curr_start + 16;
320 :
321 E : ::fprintf(out(), " DB ");
322 E : HexDump(bb->data() + curr_start, next_chunk_end - curr_start, out());
323 E : ::fprintf(out(), "\n");
324 :
325 E : curr_start = next_chunk_end;
326 : }
327 E : }
328 E : }
329 :
330 : void DecomposeImageToTextApp::DumpEndBBToText(
331 E : const BlockGraph::Block* block, const BasicEndBlock* bb) {
332 :
333 : // Dump the references of the basic end block.
334 E : for (const auto& entry : bb->references()) {
335 i : ::fprintf(out(), " end basic-block reference with offset %d: ",
336 : entry.first);
337 i : DumpReference(entry.second, out());
338 i : ::fprintf(out(), "\n");
339 i : }
340 E : }
341 :
342 : void DecomposeImageToTextApp::DumpBlockToText(
343 E : core::RelativeAddress addr, const BlockGraph::Block* block) {
344 E : ::fprintf(out(), "0x%08X(%d): %s\n %s\n", addr.value(), block->size(),
345 : block->name().c_str(),
346 : block_graph::BlockGraph::BlockAttributesToString(
347 : block->attributes()).c_str());
348 :
349 E : pe::PETransformPolicy policy;
350 :
351 : // Allow inline assembly for dumping.
352 E : policy.set_allow_inline_assembly(true);
353 :
354 : // Attempt basic block decomposition if BB-dumping is requested.
355 : // Note that on success we return early from here.
356 E : if (dump_basic_blocks_) {
357 E : if (policy.BlockIsSafeToBasicBlockDecompose(block)) {
358 E : BasicBlockSubGraph subgraph;
359 E : BasicBlockDecomposer decomposer(block, &subgraph);
360 :
361 E : if (decomposer.Decompose()) {
362 E : DumpSubGraphToText(subgraph);
363 E : return;
364 : }
365 : // Fall through on failure to decompose.
366 i : ::fprintf(out(), " Basic-block decomposition failure.\n");
367 i : } else {
368 E : ::fprintf(out(), " Unsafe to basic-block decompose.\n");
369 : }
370 : }
371 :
372 : BlockGraph::Block::LabelMap::const_iterator
373 E : label_it(block->labels().begin());
374 E : for (; label_it != block->labels().end(); ++label_it) {
375 E : ::fprintf(out(), "\t+0x%04X: %s\n",
376 : label_it->first,
377 : label_it->second.ToString().c_str());
378 E : }
379 :
380 : BlockGraph::Block::ReferenceMap::const_iterator ref_it(
381 E : block->references().begin());
382 E : for (; ref_it != block->references().end(); ++ref_it) {
383 E : ++num_refs_;
384 E : const BlockGraph::Reference& ref = ref_it->second;
385 E : if (ref.offset() == 0) {
386 E : ::fprintf(out(), "\t+0x%04X->%s(%d)\n",
387 : ref_it->first,
388 : ref.referenced()->name().c_str(),
389 : ref.size());
390 E : } else {
391 : // See if there's a label at the destination's offset, and if so
392 : // use that in preference to a raw numeric offset.
393 : BlockGraph::Block::LabelMap::const_iterator label =
394 E : ref.referenced()->labels().find(ref.offset());
395 E : if (label != ref.referenced()->labels().end()) {
396 E : ::fprintf(out(), "\t+0x%04X->%s:%s[%d]\n",
397 : ref_it->first,
398 : ref.referenced()->name().c_str(),
399 : label->second.ToString().c_str(),
400 : ref.size());
401 E : } else {
402 E : ::fprintf(out(), "\t+0x%04X->%s+0x%04X(%d)\n",
403 : ref_it->first,
404 : ref.referenced()->name().c_str(),
405 : ref.offset(),
406 : ref.size());
407 : }
408 : }
409 E : }
410 E : }
411 :
412 : bool DecomposeImageToTextApp::DumpImageToText(
413 E : const base::FilePath& image_path) {
414 : // Load the image file.
415 E : PEFile image_file;
416 E : if (!image_file.Init(image_path)) {
417 i : LOG(ERROR) << "Unable to initialize image " << image_path.value();
418 i : return false;
419 : }
420 :
421 E : BlockGraph block_graph;
422 E : ImageLayout image_layout(&block_graph);
423 :
424 : // And decompose it to an ImageLayout.
425 E : Decomposer decomposer(image_file);
426 E : if (!decomposer.Decompose(&image_layout)) {
427 i : LOG(ERROR) << "Unable to decompose image \""
428 : << image_path.value() << "\".";
429 i : return false;
430 : }
431 :
432 E : num_refs_ = 0;
433 E : DumpAddressSpaceToText(image_layout.blocks);
434 :
435 E : ::fprintf(out(), "Discovered: %d blocks\nand %d references.\n",
436 : block_graph.blocks().size(),
437 : num_refs_);
438 :
439 E : return true;
440 E : }
441 :
442 : } // namespace pe
|