1 : // Copyright 2012 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 : //
15 : // Implementation of the basic-block entry counting agent library.
16 :
17 : #include "syzygy/agent/basic_block_entry/basic_block_entry.h"
18 :
19 : #include "base/at_exit.h"
20 : #include "base/command_line.h"
21 : #include "base/environment.h"
22 : #include "base/lazy_instance.h"
23 : #include "base/stringprintf.h"
24 : #include "base/utf_string_conversions.h"
25 : #include "base/memory/scoped_ptr.h"
26 : #include "sawbuck/common/com_utils.h"
27 : #include "syzygy/agent/common/process_utils.h"
28 : #include "syzygy/agent/common/scoped_last_error_keeper.h"
29 : #include "syzygy/common/logging.h"
30 : #include "syzygy/trace/protocol/call_trace_defs.h"
31 :
32 : // Save caller-save registers (eax, ecx, edx) and flags (eflags).
33 : #define BBENTRY_SAVE_REGISTERS \
34 : __asm push eax \
35 : __asm lahf \
36 : __asm seto al \
37 : __asm push eax \
38 : __asm push ecx \
39 : __asm push edx
40 :
41 : // Restore caller-save registers (eax, ecx, edx) and flags (eflags).
42 : #define BBENTRY_RESTORE_REGISTERS \
43 : __asm pop edx \
44 : __asm pop ecx \
45 : __asm pop eax \
46 : __asm add al, 0x7f \
47 : __asm sahf \
48 : __asm pop eax
49 :
50 : extern "C" uint32* _stdcall GetRawFrequencyData(
51 E : ::common::IndexedFrequencyData* data) {
52 E : DCHECK(data != NULL);
53 E : return agent::basic_block_entry::BasicBlockEntry::GetRawFrequencyData(data);
54 E : }
55 :
56 i : extern "C" void __declspec(naked) _basic_block_enter() {
57 : __asm {
58 : // This is expected to be called via instrumentation that looks like:
59 : // push bb_id
60 : // push module_data
61 : // call [_basic_block_enter]
62 : //
63 : // Stack: ... bb_id, module_data, ret_addr.
64 :
65 : // Stash volatile registers.
66 i : BBENTRY_SAVE_REGISTERS
67 :
68 : // Stack: ... bb_id, module_data, ret_addr, [4x register]
69 :
70 : // Push the original esp value onto the stack as the entry-hook data.
71 : // This gives the entry-hook a pointer to ret_addr, module_data and bb_id.
72 i : lea eax, DWORD PTR[esp + 0x10]
73 i : push eax
74 :
75 : // Stack: ..., bb_id, module_data, ret_addr, [4x register], esp, &ret_addr.
76 i : call agent::basic_block_entry::BasicBlockEntry::BasicBlockEntryHook
77 :
78 : // Stack: ... bb_id, module_data, ret_addr, [4x register].
79 :
80 : // Restore volatile registers.
81 i : BBENTRY_RESTORE_REGISTERS
82 :
83 : // Stack: ... bb_id, module_data, ret_addr.
84 :
85 : // Return to the address pushed by our caller, popping off the bb_id and
86 : // module_data values from the stack.
87 i : ret 8
88 :
89 : // Stack: ...
90 : }
91 : }
92 :
93 i : extern "C" void __declspec(naked) _indirect_penter_dllmain() {
94 : __asm {
95 : // This is expected to be called via a thunk that looks like:
96 : // push module_data
97 : // push function
98 : // jmp [_indirect_penter_dllmain]
99 : //
100 : // Stack: ... reserved, reason, module, ret_addr, module_data, function.
101 :
102 : // Stash volatile registers.
103 i : BBENTRY_SAVE_REGISTERS
104 :
105 : // Stack: ... reserved, reason, module, ret_addr, module_data, function,
106 : // [4x register].
107 :
108 : // Push the original esp value onto the stack as the entry-hook data.
109 : // This gives the dll entry-hook a pointer to function, module_data,
110 : // ret_addr, module, reason and reserved.
111 i : lea eax, DWORD PTR[esp + 0x10]
112 i : push eax
113 :
114 : // Stack: ... reserved, reason, module, ret_addr, module_data, function,
115 : // [4x register], &function.
116 :
117 i : call agent::basic_block_entry::BasicBlockEntry::DllMainEntryHook
118 :
119 : // Stack: ... reserved, reason, module, ret_addr, module_data, function,
120 : // [4x register].
121 :
122 : // Restore volatile registers.
123 i : BBENTRY_RESTORE_REGISTERS
124 :
125 : // Stack: ... reserved, reason, module, ret_addr, module_data, function.
126 :
127 : // Return to the thunked function, popping module_data off the stack as
128 : // we go.
129 i : ret 4
130 :
131 : // Stack: ... reserved, reason, module, ret_addr.
132 : }
133 : }
134 :
135 i : extern "C" void __declspec(naked) _indirect_penter_exemain() {
136 : __asm {
137 : // This is expected to be called via a thunk that looks like:
138 : // push module_data
139 : // push function
140 : // jmp [_indirect_penter_exe_main]
141 : //
142 : // Stack: ... ret_addr, module_data, function.
143 :
144 : // Stash volatile registers.
145 i : BBENTRY_SAVE_REGISTERS
146 :
147 : // Stack: ... ret_addr, module_data, function, [4x register].
148 :
149 : // Push the original esp value onto the stack as the entry-hook data.
150 : // This gives the exe entry-hook a pointer to function, module_data,
151 : // and ret_addr.
152 i : lea eax, DWORD PTR[esp + 0x10]
153 i : push eax
154 :
155 : // Stack: ... ret_addr, module_data, function, [4x register], frame.
156 :
157 i : call agent::basic_block_entry::BasicBlockEntry::ExeMainEntryHook
158 :
159 : // Stack: ... ret_addr, module_data, function, [4x register].
160 :
161 : // Restore volatile registers.
162 i : BBENTRY_RESTORE_REGISTERS
163 :
164 : // Stack: ... ret_addr, module_data, function.
165 :
166 : // Return to the thunked function, popping module_data off the stack as
167 : // we go.
168 i : ret 4
169 :
170 : // Stack: ... reserved, reason, module, ret_addr.
171 : }
172 : }
173 :
174 E : BOOL WINAPI DllMain(HMODULE instance, DWORD reason, LPVOID reserved) {
175 : // Our AtExit manager required by base.
176 : static base::AtExitManager* at_exit = NULL;
177 :
178 E : switch (reason) {
179 : case DLL_PROCESS_ATTACH:
180 E : DCHECK(at_exit == NULL);
181 E : at_exit = new base::AtExitManager();
182 :
183 E : CommandLine::Init(0, NULL);
184 E : common::InitLoggingForDll(L"basic_block_entry");
185 E : LOG(INFO) << "Initialized basic-block entry counting agent library.";
186 E : break;
187 :
188 : case DLL_THREAD_ATTACH:
189 i : break;
190 :
191 : case DLL_THREAD_DETACH:
192 i : break;
193 :
194 : case DLL_PROCESS_DETACH:
195 E : DCHECK(at_exit != NULL);
196 E : delete at_exit;
197 E : at_exit = NULL;
198 E : break;
199 :
200 : default:
201 i : NOTREACHED();
202 : break;
203 : }
204 :
205 E : return TRUE;
206 E : }
207 :
208 : namespace agent {
209 : namespace basic_block_entry {
210 :
211 : namespace {
212 :
213 : using ::common::IndexedFrequencyData;
214 : using ::common::kBasicBlockEntryAgentId;
215 : using ::common::kBasicBlockFrequencyDataVersion;
216 : using agent::common::ScopedLastErrorKeeper;
217 : using trace::client::TraceFileSegment;
218 :
219 : // All tracing runs through this object.
220 : base::LazyInstance<BasicBlockEntry> static_bbentry_instance =
221 : LAZY_INSTANCE_INITIALIZER;
222 :
223 : // Get the address of the module containing @p addr. We do this by querying
224 : // for the allocation that contains @p addr. This must lie within the
225 : // instrumented module, and be part of the single allocation in which the
226 : // image of the module lies. The base of the module will be the base address
227 : // of the allocation.
228 : // TODO(rogerm): Move to agent::common.
229 E : HMODULE GetModuleForAddr(const void* addr) {
230 E : MEMORY_BASIC_INFORMATION mem_info = {};
231 :
232 : // Lookup up the allocation in which addr is located.
233 E : if (::VirtualQuery(addr, &mem_info, sizeof(mem_info)) == 0) {
234 i : DWORD error = ::GetLastError();
235 i : LOG(ERROR) << "VirtualQuery failed: " << com::LogWe(error) << ".";
236 i : return NULL;
237 : }
238 :
239 : // Check that the allocation base has a valid PE header magic number.
240 E : base::win::PEImage image(reinterpret_cast<HMODULE>(mem_info.AllocationBase));
241 E : if (!image.VerifyMagic()) {
242 i : LOG(ERROR) << "Invalid module found for "
243 : << base::StringPrintf("0x%08X", addr) << ".";
244 i : return NULL;
245 : }
246 :
247 : // Then it's a module.
248 E : return image.module();
249 E : }
250 :
251 : } // namespace
252 :
253 : // The BasicBlockEntryHook parameters.
254 : struct BasicBlockEntry::BasicBlockEntryFrame {
255 : const void* ret_addr;
256 : IndexedFrequencyData* module_data;
257 : uint32 basic_block_id;
258 : };
259 : COMPILE_ASSERT_IS_POD_OF_SIZE(BasicBlockEntry::BasicBlockEntryFrame, 12);
260 :
261 : // The DllMainEntryHook parameters.
262 : struct BasicBlockEntry::DllMainEntryFrame {
263 : FuncAddr function;
264 : IndexedFrequencyData* module_data;
265 : const void* ret_addr;
266 : HMODULE module;
267 : DWORD reason;
268 : DWORD reserved;
269 : };
270 : COMPILE_ASSERT_IS_POD_OF_SIZE(BasicBlockEntry::DllMainEntryFrame, 24);
271 :
272 : // The ExeMainEntryHook parameters.
273 : struct BasicBlockEntry::ExeMainEntryFrame {
274 : FuncAddr function;
275 : IndexedFrequencyData* module_data;
276 : const void* ret_addr;
277 : };
278 : COMPILE_ASSERT_IS_POD_OF_SIZE(BasicBlockEntry::ExeMainEntryFrame, 12);
279 :
280 : // The per-thread-per-instrumented-module state managed by this agent.
281 : class BasicBlockEntry::ThreadState : public agent::common::ThreadStateBase {
282 : public:
283 : // Initialize a ThreadState instance.
284 : ThreadState(BasicBlockEntry* agent, void* buffer);
285 :
286 : // Destroy a ThreadState instance.
287 : ~ThreadState();
288 :
289 : // @name Accessors.
290 : // @{
291 E : uint32* frequency_data() { return frequency_data_; }
292 E : TraceFileSegment* segment() { return &segment_; }
293 : TraceIndexedFrequencyData* trace_data() { return trace_data_; }
294 : // @}
295 :
296 : // @name Mutators.
297 : // @{
298 : void set_frequency_data(void* buffer);
299 : void set_trace_data(TraceIndexedFrequencyData* trace_data);
300 : // @}
301 :
302 : // A helper to return a ThreadState pointer given a TLS index.
303 : static ThreadState* Get(DWORD tls_index);
304 :
305 : // A helper to assign a ThreadState pointer to a TLS index.
306 : void Assign(DWORD tls_index);
307 :
308 : // Saturation increment the frequency record for @p basic_block_id. Note
309 : // that in Release mode, no range checking is performed on basic_block_id.
310 : void Increment(uint32 basic_block_id);
311 :
312 : protected:
313 : // As a shortcut, this points to the beginning of the array of basic-block
314 : // entry frequency values. With tracing enabled, this is equivalent to:
315 : // reinterpret_cast<uint32*>(this->trace_data->frequency_data)
316 : // If tracing is not enabled, this will be set to point to a static
317 : // allocation of IndexedFrequencyData::frequency_data.
318 : uint32* frequency_data_;
319 :
320 : // The basic-block entry agent this thread state belongs to.
321 : BasicBlockEntry* agent_;
322 :
323 : // The thread's current trace-file segment, if any.
324 : trace::client::TraceFileSegment segment_;
325 :
326 : // The basic-block frequency record we're populating. This will point into
327 : // the associated trace file segment's buffer.
328 : TraceIndexedFrequencyData* trace_data_;
329 :
330 : private:
331 : DISALLOW_COPY_AND_ASSIGN(ThreadState);
332 : };
333 :
334 : BasicBlockEntry::ThreadState::ThreadState(BasicBlockEntry* agent, void* buffer)
335 : : agent_(agent),
336 : frequency_data_(static_cast<uint32*>(buffer)),
337 E : trace_data_(NULL) {
338 E : DCHECK(agent != NULL);
339 E : DCHECK(buffer != NULL);
340 E : }
341 :
342 i : BasicBlockEntry::ThreadState::~ThreadState() {
343 : // If we have an outstanding buffer, let's deallocate it now.
344 i : if (segment_.write_ptr != NULL && !agent_->session_.IsDisabled())
345 i : agent_->session_.ReturnBuffer(&segment_);
346 i : }
347 :
348 E : void BasicBlockEntry::ThreadState::set_frequency_data(void* buffer) {
349 E : DCHECK(buffer != NULL);
350 E : frequency_data_ = static_cast<uint32*>(buffer);
351 E : }
352 :
353 : void BasicBlockEntry::ThreadState::set_trace_data(
354 : TraceIndexedFrequencyData* trace_data) {
355 : DCHECK(trace_data != NULL);
356 : trace_data_ = trace_data;
357 : }
358 :
359 : BasicBlockEntry::ThreadState* BasicBlockEntry::ThreadState::Get(
360 E : DWORD tls_index) {
361 E : DCHECK_NE(TLS_OUT_OF_INDEXES, tls_index);
362 E : return static_cast<ThreadState*>(::TlsGetValue(tls_index));
363 E : }
364 :
365 E : void BasicBlockEntry::ThreadState::Assign(DWORD tls_index) {
366 E : DCHECK_NE(TLS_OUT_OF_INDEXES, tls_index);
367 E : ::TlsSetValue(tls_index, this);
368 E : }
369 :
370 E : inline void BasicBlockEntry::ThreadState::Increment(uint32 basic_block_id) {
371 E : DCHECK(frequency_data_ != NULL);
372 E : DCHECK(trace_data_ == NULL || basic_block_id < trace_data_->num_entries);
373 E : uint32& element = frequency_data_[basic_block_id];
374 E : if (element != ~0U)
375 E : ++element;
376 E : }
377 :
378 E : BasicBlockEntry* BasicBlockEntry::Instance() {
379 E : return static_bbentry_instance.Pointer();
380 E : }
381 :
382 E : BasicBlockEntry::BasicBlockEntry() {
383 : // Create a session. We immediately return the buffer that gets allocated
384 : // to us. The client will perform thread-local buffer management on an as-
385 : // needed basis.
386 E : trace::client::TraceFileSegment dummy_segment;
387 E : if (trace::client::InitializeRpcSession(&session_, &dummy_segment))
388 E : CHECK(session_.ReturnBuffer(&dummy_segment));
389 E : }
390 :
391 E : BasicBlockEntry::~BasicBlockEntry() {
392 E : }
393 :
394 E : uint32* BasicBlockEntry::GetRawFrequencyData(IndexedFrequencyData* data) {
395 E : DCHECK(data != NULL);
396 E : ThreadState* state = ThreadState::Get(data->tls_index);
397 E : if (state == NULL)
398 E : state = Instance()->CreateThreadState(data);
399 E : return state->frequency_data();
400 E : }
401 :
402 E : void BasicBlockEntry::BasicBlockEntryHook(BasicBlockEntryFrame* entry_frame) {
403 E : ScopedLastErrorKeeper scoped_last_error_keeper;
404 E : DCHECK(entry_frame != NULL);
405 E : DCHECK(entry_frame->module_data != NULL);
406 : DCHECK_GT(entry_frame->module_data->num_entries,
407 E : entry_frame->basic_block_id);
408 :
409 : // TODO(rogerm): Consider extracting a fast path for state != NULL? Inline it
410 : // during instrumentation? Move it into the _basic_block_enter function?
411 E : ThreadState* state = ThreadState::Get(entry_frame->module_data->tls_index);
412 E : if (state == NULL)
413 E : state = Instance()->CreateThreadState(entry_frame->module_data);
414 E : state->Increment(entry_frame->basic_block_id);
415 E : }
416 :
417 E : void BasicBlockEntry::DllMainEntryHook(DllMainEntryFrame* entry_frame) {
418 E : ScopedLastErrorKeeper scoped_last_error_keeper;
419 E : DCHECK(entry_frame != NULL);
420 E : switch (entry_frame->reason) {
421 : case DLL_PROCESS_ATTACH:
422 E : Instance()->OnProcessAttach(entry_frame->module_data);
423 E : break;
424 :
425 : case DLL_THREAD_ATTACH:
426 : // We don't handle this event because the thread may never actually
427 : // call into an instrumented module, so we don't want to allocate
428 : // resources needlessly. Further, we won't get this event for thread
429 : // that were created before the agent was loaded. On first use of
430 : // an instrumented basic-block in a given thread, any thread specific
431 : // resources will be allocated.
432 i : break;
433 :
434 : case DLL_PROCESS_DETACH:
435 : case DLL_THREAD_DETACH:
436 E : Instance()->OnThreadDetach(entry_frame->module_data);
437 E : break;
438 :
439 : default:
440 i : NOTREACHED();
441 : }
442 E : }
443 :
444 E : void BasicBlockEntry::ExeMainEntryHook(ExeMainEntryFrame* entry_frame) {
445 E : ScopedLastErrorKeeper scoped_last_error_keeper;
446 E : DCHECK(entry_frame != NULL);
447 E : Instance()->OnProcessAttach(entry_frame->module_data);
448 E : }
449 :
450 E : void BasicBlockEntry::RegisterModule(const void* addr) {
451 E : DCHECK(addr != NULL);
452 :
453 : // Allocate a segment for the module information.
454 E : trace::client::TraceFileSegment module_info_segment;
455 E : CHECK(session_.AllocateBuffer(&module_info_segment));
456 :
457 : // Log the module. This is required in order to associate basic-block
458 : // frequency with a module and PDB file during post-processing.
459 E : HMODULE module = GetModuleForAddr(addr);
460 E : CHECK(module != NULL);
461 E : CHECK(agent::common::LogModule(module, &session_, &module_info_segment));
462 :
463 : // Commit the module information.
464 E : CHECK(session_.ReturnBuffer(&module_info_segment));
465 E : }
466 :
467 E : void BasicBlockEntry::OnProcessAttach(IndexedFrequencyData* module_data) {
468 E : DCHECK(module_data != NULL);
469 :
470 : // Exit if the magic number does not match.
471 E : CHECK_EQ(kBasicBlockEntryAgentId, module_data->agent_id);
472 :
473 : // Exit if the version does not match.
474 E : CHECK_EQ(kBasicBlockFrequencyDataVersion, module_data->version);
475 :
476 : // We allow for this hook to be called multiple times. We expect the first
477 : // time to occur under the loader lock, so we don't need to worry about
478 : // concurrency for this check.
479 E : if (module_data->initialization_attempted)
480 E : return;
481 :
482 : // Flag the module as initialized.
483 E : module_data->initialization_attempted = 1U;
484 :
485 : // We expect this to be executed exactly once for each module.
486 E : CHECK_EQ(TLS_OUT_OF_INDEXES, module_data->tls_index);
487 E : module_data->tls_index = ::TlsAlloc();
488 E : CHECK_NE(TLS_OUT_OF_INDEXES, module_data->tls_index);
489 :
490 : // Register this module with the call_trace if the session is not disabled.
491 : // Note that we expect module_data to be statically defined within the
492 : // module of interest, so we can use its address to lookup the module.
493 E : if (!session_.IsDisabled())
494 E : RegisterModule(module_data);
495 E : }
496 :
497 E : void BasicBlockEntry::OnThreadDetach(IndexedFrequencyData* module_data) {
498 E : DCHECK(module_data != NULL);
499 E : DCHECK_EQ(1U, module_data->initialization_attempted);
500 E : DCHECK_NE(TLS_OUT_OF_INDEXES, module_data->tls_index);
501 :
502 E : ThreadState* state = ThreadState::Get(module_data->tls_index);
503 E : if (state != NULL)
504 E : thread_state_manager_.MarkForDeath(state);
505 E : }
506 :
507 : BasicBlockEntry::ThreadState* BasicBlockEntry::CreateThreadState(
508 E : IndexedFrequencyData* module_data) {
509 E : DCHECK(module_data != NULL);
510 :
511 : // Create the thread-local state for this thread. By default, just point the
512 : // counter array to the statically allocated fall-back area.
513 E : ThreadState* state = new ThreadState(this, module_data->frequency_data);
514 E : CHECK(state != NULL);
515 :
516 : // Associate the thread_state with the current thread.
517 E : state->Assign(module_data->tls_index);
518 :
519 : // Register the thread state with the thread state manager.
520 E : thread_state_manager_.Register(state);
521 :
522 : // If we're not actually tracing, then we're done.
523 E : if (session_.IsDisabled())
524 E : return state;
525 :
526 : // Nothing to allocate? We're done!
527 E : if (module_data->num_entries == 0) {
528 i : LOG(WARNING) << "Module contains no instrumented basic blocks, not "
529 : << "allocating basic-block trace data segment.";
530 i : return state;
531 : }
532 :
533 : // Determine the size of the basic block frequency table.
534 E : size_t data_size = module_data->num_entries * sizeof(uint32);
535 :
536 : // Determine the size of the basic block frequency record.
537 E : size_t record_size = sizeof(TraceIndexedFrequencyData) + data_size - 1;
538 :
539 : // Determine the size of the buffer we need. We need room for the basic block
540 : // frequency struct plus a single RecordPrefix header.
541 E : size_t segment_size = sizeof(RecordPrefix) + record_size;
542 :
543 : // Allocate the actual segment for the basic block entry data.
544 E : CHECK(session_.AllocateBuffer(segment_size, state->segment()));
545 :
546 : // Ensure it's big enough to allocate the basic-block frequency data
547 : // we want. This automatically accounts for the RecordPrefix overhead.
548 E : CHECK(state->segment()->CanAllocate(record_size));
549 :
550 : // Allocate the basic-block frequency data. We will leave this allocated and
551 : // let it get flushed during tear-down of the call-trace client.
552 : TraceIndexedFrequencyData* trace_data =
553 : reinterpret_cast<TraceIndexedFrequencyData*>(
554 : state->segment()->AllocateTraceRecordImpl(TRACE_INDEXED_FREQUENCY,
555 E : record_size));
556 E : DCHECK(trace_data != NULL);
557 :
558 : // Initialize the basic block frequency data struct.
559 E : HMODULE module = GetModuleForAddr(module_data);
560 E : CHECK(module != NULL);
561 E : const base::win::PEImage image(module);
562 E : const IMAGE_NT_HEADERS* nt_headers = image.GetNTHeaders();
563 E : trace_data->data_type = TraceIndexedFrequencyData::BASIC_BLOCK;
564 E : trace_data->module_base_addr = reinterpret_cast<ModuleAddr>(image.module());
565 E : trace_data->module_base_size = nt_headers->OptionalHeader.SizeOfImage;
566 E : trace_data->module_checksum = nt_headers->OptionalHeader.CheckSum;
567 E : trace_data->module_time_date_stamp = nt_headers->FileHeader.TimeDateStamp;
568 E : trace_data->frequency_size = sizeof(uint32);
569 E : trace_data->num_entries = module_data->num_entries;
570 :
571 : // Hook up the newly allocated buffer to the call-trace instrumentation.
572 E : state->set_frequency_data(trace_data->frequency_data);
573 :
574 E : return state;
575 E : }
576 :
577 : } // namespace basic_block_entry
578 : } // namespace agent
|