1 : // Copyright 2012 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 : //
15 : // Implementation of the basic-block entry counting agent library.
16 : //
17 : // The operation of this module is in two parts: instrumentation and agent.
18 : // Both parts work together to gather metrics on the execution of a module.
19 : //
20 : // * Instrumentation
21 : // The instrumenter is responsible for injecting probes within the
22 : // instrumented module to call entry-points in the agent. There are two
23 : // kinds of supported instrumentation: basic block entry count and branch
24 : // profiling.
25 : //
26 : // Instrumentation for basic block entry count:
27 : // BB1: [code] ---> BB1: push bb_id
28 : // call func push module_data
29 : // jz BB2 call [increment_hook]
30 : // [code]
31 : // call func
32 : // jz BB2
33 : //
34 : // Instrumentation for branch profiling:
35 : // BB1: [code] ---> BB1: push bb_id
36 : // call func push module_data
37 : // jz BB2 call [entry_hook]
38 : // [code]
39 : // call func
40 : // push bb_id
41 : // push module_data
42 : // call [leave_hook]
43 : // jz BB2
44 : //
45 : // Using the last block id produced by an entry_hook to determine the
46 : // previous executed basic block won't work. As an example, the call to
47 : // 'func' will move the control flow to another function and modify the last
48 : // executed basic block. The leave hook must be called at the end the basic
49 : // block, before following control flow to any other basic blocks.
50 : //
51 : // The calling convention is callee clean-up. The callee is responsible for
52 : // cleaning up any values on the stack. This calling convention is chosen
53 : // to keep the application code size as low as possible.
54 : //
55 : // * Agent
56 : // The agent is responsible for allocating a trace segment and collecting
57 : // metrics. The trace segment with be dump to a file for post-processing.
58 : //
59 : // There are two mechanisms to collect metrics:
60 : // - Basic mode: In the basic mode, the hook acquires a lock and updates a
61 : // process-wide segment shared by all threads. In this mode, no events can
62 : // be lost.
63 : // - Buffered mode: A per-thread buffer is used to collect execution
64 : // information. A batch commit is done when the buffer is full. In this
65 : // mode, under a non-standard execution (crash, force exit, ...) pending
66 : // events may be lost.
67 : //
68 : // The agent keeps a ThreadState for each running thread. The thread state
69 : // is accessible through a TLS mechanism and contains information needed by
70 : // the hook (pointer to trace segment, buffer, lock, ...).
71 : //
72 : // There are two mechanisms to keep a reference to the thread state:
73 : // - TLS: The default mechanism uses the standard windows TLS API to keep
74 : // a per-thread reference to the thread state. The TLS index is allocated
75 : // and kept inside the module data information in the instrumented image.
76 : // - FS-Slot: This mechanism uses application specific slot available through
77 : // the FS segment (fs:[0x700] Reserved for user application).
78 : // See: http://en.wikipedia.org/wiki/Win32_Thread_Information_Block.
79 : // There is no API to check whether another module is using this slot, thus
80 : // this mechanism must be used in a controlled environment.
81 :
82 : #include "syzygy/agent/basic_block_entry/basic_block_entry.h"
83 :
84 : #include "base/at_exit.h"
85 : #include "base/command_line.h"
86 : #include "base/environment.h"
87 : #include "base/lazy_instance.h"
88 : #include "base/memory/scoped_ptr.h"
89 : #include "base/strings/stringprintf.h"
90 : #include "base/strings/utf_string_conversions.h"
91 : #include "syzygy/agent/common/agent.h"
92 : #include "syzygy/agent/common/process_utils.h"
93 : #include "syzygy/agent/common/scoped_last_error_keeper.h"
94 : #include "syzygy/common/com_utils.h"
95 : #include "syzygy/common/indexed_frequency_data.h"
96 : #include "syzygy/common/logging.h"
97 : #include "syzygy/trace/protocol/call_trace_defs.h"
98 :
99 : unsigned long __readfsdword(unsigned long);
100 : void __writefsdword(unsigned long, unsigned long);
101 : #pragma intrinsic(__readfsdword, __writefsdword)
102 :
103 : // Save caller-save registers (eax, ecx, edx) and flags (eflags).
104 : #define BBPROBE_SAVE_REGISTERS \
105 : __asm push eax \
106 : __asm lahf \
107 : __asm seto al \
108 : __asm push eax \
109 : __asm push ecx \
110 : __asm push edx
111 :
112 : // Restore caller-save registers (eax, ecx, edx) and flags (eflags).
113 : #define BBPROBE_RESTORE_REGISTERS \
114 : __asm pop edx \
115 : __asm pop ecx \
116 : __asm pop eax \
117 : __asm add al, 0x7f \
118 : __asm sahf \
119 : __asm pop eax
120 :
121 : #define BBPROBE_REDIRECT_CALL(function_name, handler, stack_size) \
122 : extern "C" void __declspec(naked) function_name() { \
123 : /* Stash volatile registers. */ \
124 : BBPROBE_SAVE_REGISTERS \
125 : \
126 : /* Stack: ... basic_block_id, module_data, ret_addr, [4x register] */ \
127 : \
128 : /* Push the original esp value onto the stack as the entry-hook data. */ \
129 : /* This gives the entry-hook a pointer to ret_addr, module_data and */ \
130 : /* basic block id. */ \
131 : __asm lea eax, DWORD PTR[esp + 0x10] \
132 : __asm push eax \
133 : \
134 : /* Stack: ..., basic_block_id, module_data, ret_addr, [4x register], */ \
135 : /* esp, &ret_addr. */ \
136 : __asm call agent::basic_block_entry::BasicBlockEntry::handler \
137 : /* Stack: ... basic_block_id, module_data, ret_addr, [4x register]. */ \
138 : \
139 : /* Restore volatile registers. */ \
140 : BBPROBE_RESTORE_REGISTERS \
141 : __asm ret stack_size \
142 : }
143 :
144 : #define BBPROBE_REDIRECT_CALL_SLOT(function_name, handler, type, slot) \
145 : static void __fastcall safe ## function_name ## _s ## slot(type index) { \
146 : agent::basic_block_entry::BasicBlockEntry::handler<slot>(index); \
147 : } \
148 : extern "C" void __declspec(naked) function_name ## _s ## slot() { \
149 : /* Stash volatile registers. */ \
150 : BBPROBE_SAVE_REGISTERS \
151 : /* Call handler */ \
152 : __asm mov ecx, DWORD PTR[esp + 0x14] \
153 : __asm call safe ## function_name ## _s ## slot \
154 : /* Restore volatile registers. */ \
155 : BBPROBE_RESTORE_REGISTERS \
156 : /* Return and remove index from stack. */ \
157 : __asm ret 4 \
158 : }
159 :
160 : // This is expected to be called via instrumentation that looks like:
161 : // push basic_block_id
162 : // push module_data
163 : // call [function_name]
164 i : BBPROBE_REDIRECT_CALL(_branch_enter, BranchEnterHook, 8)
165 i : BBPROBE_REDIRECT_CALL(_branch_enter_buffered, BranchEnterBufferedHook, 8)
166 i : BBPROBE_REDIRECT_CALL(_branch_exit, BranchExitHook, 8)
167 : BBPROBE_REDIRECT_CALL(_increment_indexed_freq_data,
168 : IncrementIndexedFreqDataHook,
169 i : 8)
170 :
171 : // This is expected to be called via instrumentation that looks like:
172 : // push module_data
173 : // call [function_name]
174 : BBPROBE_REDIRECT_CALL_SLOT(_function_enter,
175 : FunctionEnterHookSlot,
176 : ::common::IndexedFrequencyData*,
177 E : 1)
178 : BBPROBE_REDIRECT_CALL_SLOT(_function_enter,
179 : FunctionEnterHookSlot,
180 : ::common::IndexedFrequencyData*,
181 i : 2)
182 : BBPROBE_REDIRECT_CALL_SLOT(_function_enter,
183 : FunctionEnterHookSlot,
184 : ::common::IndexedFrequencyData*,
185 i : 3)
186 : BBPROBE_REDIRECT_CALL_SLOT(_function_enter,
187 : FunctionEnterHookSlot,
188 : ::common::IndexedFrequencyData*,
189 i : 4)
190 :
191 : // This is expected to be called via instrumentation that looks like:
192 : // push basic_block_id
193 : // call [function_name]
194 E : BBPROBE_REDIRECT_CALL_SLOT(_branch_enter, BranchEnterHookSlot, DWORD, 1)
195 i : BBPROBE_REDIRECT_CALL_SLOT(_branch_enter, BranchEnterHookSlot, DWORD, 2)
196 i : BBPROBE_REDIRECT_CALL_SLOT(_branch_enter, BranchEnterHookSlot, DWORD, 3)
197 i : BBPROBE_REDIRECT_CALL_SLOT(_branch_enter, BranchEnterHookSlot, DWORD, 4)
198 :
199 : BBPROBE_REDIRECT_CALL_SLOT(_branch_enter_buffered,
200 E : BranchEnterBufferedHookSlot, DWORD, 1)
201 : BBPROBE_REDIRECT_CALL_SLOT(_branch_enter_buffered,
202 i : BranchEnterBufferedHookSlot, DWORD, 2)
203 : BBPROBE_REDIRECT_CALL_SLOT(_branch_enter_buffered,
204 i : BranchEnterBufferedHookSlot, DWORD, 3)
205 : BBPROBE_REDIRECT_CALL_SLOT(_branch_enter_buffered,
206 i : BranchEnterBufferedHookSlot, DWORD, 4)
207 :
208 E : BBPROBE_REDIRECT_CALL_SLOT(_branch_exit, BranchExitHookSlot, DWORD, 1)
209 i : BBPROBE_REDIRECT_CALL_SLOT(_branch_exit, BranchExitHookSlot, DWORD, 2)
210 i : BBPROBE_REDIRECT_CALL_SLOT(_branch_exit, BranchExitHookSlot, DWORD, 3)
211 i : BBPROBE_REDIRECT_CALL_SLOT(_branch_exit, BranchExitHookSlot, DWORD, 4)
212 :
213 : // This is expected to be called via a thunk that looks like:
214 : // push module_data
215 : // push function
216 : // jmp [function_name]
217 i : BBPROBE_REDIRECT_CALL(_indirect_penter_dllmain, DllMainEntryHook, 4)
218 i : BBPROBE_REDIRECT_CALL(_indirect_penter_exemain, ExeMainEntryHook, 4)
219 :
220 E : BOOL WINAPI DllMain(HMODULE instance, DWORD reason, LPVOID reserved) {
221 : // Our AtExit manager required by base.
222 : static base::AtExitManager* at_exit = NULL;
223 :
224 E : agent::common::InitializeCrt();
225 :
226 E : switch (reason) {
227 : case DLL_PROCESS_ATTACH:
228 E : DCHECK(at_exit == NULL);
229 E : at_exit = new base::AtExitManager();
230 :
231 E : CommandLine::Init(0, NULL);
232 E : common::InitLoggingForDll(L"basic_block_entry");
233 E : LOG(INFO) << "Initialized basic-block entry counting agent library.";
234 E : break;
235 :
236 : case DLL_THREAD_ATTACH:
237 E : break;
238 :
239 : case DLL_THREAD_DETACH:
240 E : break;
241 :
242 : case DLL_PROCESS_DETACH:
243 E : CommandLine::Reset();
244 E : DCHECK(at_exit != NULL);
245 E : delete at_exit;
246 E : at_exit = NULL;
247 E : break;
248 :
249 : default:
250 i : NOTREACHED();
251 : break;
252 : }
253 :
254 E : return TRUE;
255 E : }
256 :
257 : namespace agent {
258 : namespace basic_block_entry {
259 :
260 : namespace {
261 :
262 : using ::common::IndexedFrequencyData;
263 : using agent::common::ScopedLastErrorKeeper;
264 : using trace::client::TraceFileSegment;
265 :
266 : const uint32 kUserApplicationSlot = 0x700;
267 : const uint32 kNumSlots = 4U;
268 : const uint32 kInvalidBasicBlockId = ~0U;
269 :
270 : // The indexed_frequency_data for the bbentry instrumentation mode has 1 column.
271 : struct BBEntryFrequency {
272 : uint32 frequency;
273 : };
274 :
275 : // The indexed_frequency_data for the branch instrumentation mode has 3 columns.
276 : struct BranchFrequency {
277 : uint32 frequency;
278 : uint32 branch_taken;
279 : uint32 mispredicted;
280 : };
281 :
282 : // An entry in the basic block id buffer.
283 : struct BranchBufferEntry {
284 : uint32 basic_block_id;
285 : uint32 last_basic_block_id;
286 : };
287 :
288 : // All tracing runs through this object.
289 : base::LazyInstance<BasicBlockEntry> static_bbentry_instance =
290 : LAZY_INSTANCE_INITIALIZER;
291 :
292 : // Increment and saturate a 32-bit value.
293 E : inline uint32 IncrementAndSaturate(uint32 value) {
294 E : if (value != ~0U)
295 E : ++value;
296 E : return value;
297 E : }
298 :
299 : // Get the address of the module containing @p addr. We do this by querying
300 : // for the allocation that contains @p addr. This must lie within the
301 : // instrumented module, and be part of the single allocation in which the
302 : // image of the module lies. The base of the module will be the base address
303 : // of the allocation.
304 : // TODO(rogerm): Move to agent::common.
305 E : HMODULE GetModuleForAddr(const void* addr) {
306 E : MEMORY_BASIC_INFORMATION mem_info = {};
307 :
308 : // Lookup up the allocation in which addr is located.
309 E : if (::VirtualQuery(addr, &mem_info, sizeof(mem_info)) == 0) {
310 i : DWORD error = ::GetLastError();
311 i : LOG(ERROR) << "VirtualQuery failed: " << ::common::LogWe(error) << ".";
312 i : return NULL;
313 : }
314 :
315 : // Check that the allocation base has a valid PE header magic number.
316 E : base::win::PEImage image(reinterpret_cast<HMODULE>(mem_info.AllocationBase));
317 E : if (!image.VerifyMagic()) {
318 i : LOG(ERROR) << "Invalid module found for "
319 : << base::StringPrintf("0x%08X", addr) << ".";
320 i : return NULL;
321 : }
322 :
323 : // Then it's a module.
324 E : return image.module();
325 E : }
326 :
327 : // Returns true if @p version is the expected version for @p datatype_id.
328 : bool DatatypeVersionIsValid(uint32 data_type,
329 : uint32 agent_id,
330 : uint32 version,
331 : uint32 frequency_size,
332 E : uint32 num_columns) {
333 : // We can only handle this if it looks right.
334 E : const size_t kIntSize = sizeof(int);
335 E : if (data_type == IndexedFrequencyData::BRANCH) {
336 : if (agent_id != ::common::kBasicBlockEntryAgentId ||
337 : version != ::common::kBranchFrequencyDataVersion ||
338 : frequency_size != kIntSize ||
339 E : num_columns != 3U) {
340 i : LOG(ERROR) << "Unexpected values in the branch data structures.";
341 i : return false;
342 E : }
343 E : } else if (data_type == IndexedFrequencyData::BASIC_BLOCK_ENTRY) {
344 : if (agent_id != ::common::kBasicBlockEntryAgentId ||
345 : version != ::common::kBasicBlockFrequencyDataVersion ||
346 : frequency_size != kIntSize ||
347 E : num_columns != 1U) {
348 i : LOG(ERROR) << "Unexpected values in the basic block data structures.";
349 i : return false;
350 : }
351 E : } else {
352 i : LOG(ERROR) << "Unexpected entry kind.";
353 i : return false;
354 : }
355 :
356 E : return true;
357 E : }
358 :
359 : } // namespace
360 :
361 : // The IncrementIndexedFreqDataHook parameters.
362 : struct BasicBlockEntry::IncrementIndexedFreqDataFrame {
363 : const void* ret_addr;
364 : IndexedFrequencyData* module_data;
365 : uint32 index;
366 : };
367 : COMPILE_ASSERT_IS_POD_OF_SIZE(BasicBlockEntry::IncrementIndexedFreqDataFrame,
368 : 12);
369 :
370 : // The DllMainEntryHook parameters.
371 : struct BasicBlockEntry::DllMainEntryFrame {
372 : FuncAddr function;
373 : IndexedFrequencyData* module_data;
374 : const void* ret_addr;
375 : HMODULE module;
376 : DWORD reason;
377 : DWORD reserved;
378 : };
379 : COMPILE_ASSERT_IS_POD_OF_SIZE(BasicBlockEntry::DllMainEntryFrame, 24);
380 :
381 : // The ExeMainEntryHook parameters.
382 : struct BasicBlockEntry::ExeMainEntryFrame {
383 : FuncAddr function;
384 : IndexedFrequencyData* module_data;
385 : const void* ret_addr;
386 : };
387 : COMPILE_ASSERT_IS_POD_OF_SIZE(BasicBlockEntry::ExeMainEntryFrame, 12);
388 :
389 : // The per-thread-per-instrumented-module state managed by this agent.
390 : class BasicBlockEntry::ThreadState : public agent::common::ThreadStateBase {
391 : public:
392 : // Initialize a ThreadState instance.
393 : // @param module_data Module information injected in the instrumented
394 : // application.
395 : // @param lock Lock associated with the @p frequency_data.
396 : // @param frequency_data Buffer to commit counters update.
397 : ThreadState(IndexedFrequencyData* module_data,
398 : base::Lock* lock,
399 : void* frequency_data);
400 :
401 : // Destroy a ThreadState instance.
402 : ~ThreadState();
403 :
404 : // Allocate space to buffer basic block ids.
405 : void AllocateBasicBlockIdBuffer();
406 :
407 : // Allocate temporary space to simulate a branch predictor.
408 : void AllocatePredictorCache();
409 :
410 : // Saturation increment the frequency record for @p index. Note that in
411 : // Release mode, no range checking is performed on index.
412 : // @param basic_block_id the basic block index.
413 : void Increment(uint32 basic_block_id);
414 :
415 : // Update state and frequency when a jump enters the basic block @p index
416 : // coming from the basic block @last.
417 : // @param basic_block_id the basic block index.
418 : // @param last_basic_block_id the originating basic block index from which we
419 : // enter @p basic_block_id.
420 : void Enter(uint32 basic_block_id, uint32 last_basic_block_id);
421 :
422 : // Update state and frequency when a jump leaves the basic block @p index.
423 : // @param basic_block_id the basic block index.
424 : void Leave(uint32 basic_block_id);
425 :
426 : // Push a basic block id in the basic block ids buffer, to be processed later.
427 : // @param basic_block_id the basic block index.
428 : // @returns true when the buffer is full and there is no room for an other
429 : // entry, false otherwise.
430 : bool Push(uint32 basic_block_id);
431 :
432 : // Flush pending values in the basic block ids buffer.
433 : void Flush();
434 :
435 : // Return the id of the most recent basic block executed.
436 E : uint32 last_basic_block_id() { return last_basic_block_id_; }
437 :
438 : // Reset the most recent basic block executed.
439 : void reset_last_basic_block_id();
440 :
441 : // Return the lock associated with 'trace_data_' for atomic update.
442 E : base::Lock* trace_lock() { return trace_lock_; }
443 :
444 : // For a given basic block id, returns the corresponding BBEntryFrequency.
445 : // @param basic_block_id the basic block index.
446 : // @returns the bbentry frequency entry for a given basic block id.
447 : BBEntryFrequency& GetBBEntryFrequency(uint32 basic_block_id);
448 :
449 : // For a given basic block id, returns the corresponding BranchFrequency.
450 : // @param basic_block_id the basic block index.
451 : // @returns the branch frequency entry for a given basic block id.
452 : BranchFrequency& GetBranchFrequency(uint32 basic_block_id);
453 :
454 : // Retrieve the indexed_frequency_data specific fields for this agent.
455 : // @returns a pointer to the specific fields.
456 E : const BasicBlockIndexedFrequencyData* GetBasicBlockData() const {
457 : return
458 E : reinterpret_cast<const BasicBlockIndexedFrequencyData*>(module_data_);
459 E : }
460 :
461 : protected:
462 : // As a shortcut, this points to the beginning of the array of basic-block
463 : // entry frequency values. With tracing enabled, this is equivalent to:
464 : // reinterpret_cast<uint32*>(this->trace_data->frequency_data)
465 : // If tracing is not enabled, this will be set to point to a static
466 : // allocation of IndexedFrequencyData::frequency_data.
467 : uint32* frequency_data_; // Under trace_lock_.
468 :
469 : // Module information this thread state is gathering information on.
470 : const IndexedFrequencyData* module_data_;
471 :
472 : // Lock corresponding to 'frequency_data_'.
473 : base::Lock* trace_lock_;
474 :
475 : // Buffer used to queue basic block ids for later processing in batches.
476 : std::vector<BranchBufferEntry> basic_block_id_buffer_;
477 :
478 : // Current offset of the next available entry in the basic block id buffer.
479 : uint32 basic_block_id_buffer_offset_;
480 :
481 : // The branch predictor state (2-bit saturating counter).
482 : std::vector<uint8> predictor_data_;
483 :
484 : // The last basic block id executed.
485 : uint32 last_basic_block_id_;
486 :
487 : private:
488 : DISALLOW_COPY_AND_ASSIGN(ThreadState);
489 : };
490 :
491 : BasicBlockEntry::ThreadState::ThreadState(IndexedFrequencyData* module_data,
492 : base::Lock* lock,
493 : void* frequency_data)
494 : : frequency_data_(static_cast<uint32*>(frequency_data)),
495 : module_data_(module_data),
496 : trace_lock_(lock),
497 : basic_block_id_buffer_offset_(0),
498 E : last_basic_block_id_(kInvalidBasicBlockId) {
499 E : }
500 :
501 E : BasicBlockEntry::ThreadState::~ThreadState() {
502 E : if (!basic_block_id_buffer_.empty())
503 E : Flush();
504 :
505 E : uint32 slot = GetBasicBlockData()->fs_slot;
506 E : if (slot != 0) {
507 E : uint32 address = kUserApplicationSlot + 4 * (slot - 1);
508 E : __writefsdword(address, 0);
509 : }
510 E : }
511 :
512 E : void BasicBlockEntry::ThreadState::AllocateBasicBlockIdBuffer() {
513 E : DCHECK(basic_block_id_buffer_.empty());
514 E : basic_block_id_buffer_.resize(kBufferSize * sizeof(BranchBufferEntry));
515 E : }
516 :
517 E : void BasicBlockEntry::ThreadState::AllocatePredictorCache() {
518 E : DCHECK(predictor_data_.empty());
519 E : predictor_data_.resize(kPredictorCacheSize);
520 E : }
521 :
522 E : void BasicBlockEntry::ThreadState::reset_last_basic_block_id() {
523 E : last_basic_block_id_ = kInvalidBasicBlockId;
524 E : }
525 :
526 : BBEntryFrequency& BasicBlockEntry::ThreadState::GetBBEntryFrequency(
527 E : uint32 basic_block_id) {
528 E : DCHECK(frequency_data_ != NULL);
529 : BBEntryFrequency* frequencies =
530 E : reinterpret_cast<BBEntryFrequency*>(frequency_data_);
531 E : BBEntryFrequency& entry = frequencies[basic_block_id];
532 E : return entry;
533 E : }
534 :
535 : BranchFrequency& BasicBlockEntry::ThreadState::GetBranchFrequency(
536 E : uint32 basic_block_id) {
537 E : DCHECK(frequency_data_ != NULL);
538 : BranchFrequency* frequencies =
539 E : reinterpret_cast<BranchFrequency*>(frequency_data_);
540 E : BranchFrequency& entry = frequencies[basic_block_id];
541 E : return entry;
542 E : }
543 :
544 E : inline void BasicBlockEntry::ThreadState::Increment(uint32 basic_block_id) {
545 E : DCHECK(frequency_data_ != NULL);
546 E : DCHECK(module_data_ != NULL);
547 E : DCHECK_LT(basic_block_id, module_data_->num_entries);
548 :
549 : // Retrieve information for the basic block.
550 E : BBEntryFrequency& entry = GetBBEntryFrequency(basic_block_id);
551 E : entry.frequency = IncrementAndSaturate(entry.frequency);
552 E : }
553 :
554 : void BasicBlockEntry::ThreadState::Enter(
555 E : uint32 basic_block_id, uint32 last_basic_block_id) {
556 E : DCHECK(frequency_data_ != NULL);
557 E : DCHECK(module_data_ != NULL);
558 E : DCHECK_LT(basic_block_id, module_data_->num_entries);
559 :
560 : // Retrieve information for the current basic block.
561 E : BranchFrequency& current = GetBranchFrequency(basic_block_id);
562 :
563 : // Count the execution of this basic block.
564 E : if (current.frequency != kInvalidBasicBlockId)
565 E : current.frequency = IncrementAndSaturate(current.frequency);
566 :
567 : // Check if entering from a jump or something else (call).
568 E : if (last_basic_block_id == kInvalidBasicBlockId)
569 E : return;
570 :
571 : // Retrieve information for the previous basic block.
572 E : BranchFrequency& previous = GetBranchFrequency(last_basic_block_id);
573 :
574 : // If last jump was taken, count the branch taken in the previous basic block.
575 E : bool taken = (basic_block_id != last_basic_block_id + 1);
576 E : if (taken) {
577 E : if (previous.branch_taken != kInvalidBasicBlockId)
578 E : previous.branch_taken = IncrementAndSaturate(previous.branch_taken);
579 : }
580 :
581 : // Simulate the branch predictor.
582 : // see: http://en.wikipedia.org/wiki/Branch_predictor
583 : // states:
584 : // 0: Strongly not taken
585 : // 1: Weakly not taken
586 : // 2: Weakly taken
587 : // 3: Strongly taken
588 E : if (predictor_data_.empty())
589 i : return;
590 E : DCHECK(predictor_data_.size() == kPredictorCacheSize);
591 E : if (last_basic_block_id != kInvalidBasicBlockId) {
592 E : size_t offset = last_basic_block_id % kPredictorCacheSize;
593 E : uint8& state = predictor_data_[offset];
594 E : if (taken) {
595 E : if (state < 2)
596 E : previous.mispredicted = IncrementAndSaturate(previous.mispredicted);
597 E : if (state < 3)
598 E : ++state;
599 E : } else {
600 E : if (state > 1)
601 E : previous.mispredicted = IncrementAndSaturate(previous.mispredicted);
602 E : if (state != 0)
603 E : --state;
604 : }
605 : }
606 E : }
607 :
608 E : inline void BasicBlockEntry::ThreadState::Leave(uint32 basic_block_id) {
609 E : DCHECK(module_data_ != NULL);
610 E : DCHECK_LT(basic_block_id, module_data_->num_entries);
611 :
612 E : last_basic_block_id_ = basic_block_id;
613 E : }
614 :
615 E : bool BasicBlockEntry::ThreadState::Push(uint32 basic_block_id) {
616 E : DCHECK(module_data_ != NULL);
617 E : DCHECK(basic_block_id < module_data_->num_entries);
618 :
619 E : uint32 last_offset = basic_block_id_buffer_offset_;
620 E : DCHECK_LT(last_offset, basic_block_id_buffer_.size());
621 :
622 E : BranchBufferEntry* entry = &basic_block_id_buffer_[last_offset];
623 E : entry->basic_block_id = basic_block_id;
624 E : entry->last_basic_block_id = last_basic_block_id_;
625 :
626 E : ++basic_block_id_buffer_offset_;
627 :
628 E : return basic_block_id_buffer_offset_ == kBufferSize;
629 E : }
630 :
631 E : void BasicBlockEntry::ThreadState::Flush() {
632 E : uint32 last_offset = basic_block_id_buffer_offset_;
633 :
634 E : for (size_t offset = 0; offset < last_offset; ++offset) {
635 E : BranchBufferEntry* entry = &basic_block_id_buffer_[offset];
636 E : Enter(entry->basic_block_id, entry->last_basic_block_id);
637 E : }
638 :
639 : // Reset buffer.
640 E : basic_block_id_buffer_offset_ = 0;
641 E : }
642 :
643 E : BasicBlockEntry* BasicBlockEntry::Instance() {
644 E : return static_bbentry_instance.Pointer();
645 E : }
646 :
647 E : BasicBlockEntry::BasicBlockEntry() : registered_slots_() {
648 : // Create a session.
649 E : trace::client::InitializeRpcSession(&session_, &segment_);
650 E : }
651 :
652 E : BasicBlockEntry::~BasicBlockEntry() {
653 E : }
654 :
655 E : bool BasicBlockEntry::InitializeFrequencyData(IndexedFrequencyData* data) {
656 E : DCHECK(data != NULL);
657 :
658 : // Nothing to allocate? We're done!
659 E : if (data->num_entries == 0) {
660 i : LOG(WARNING) << "Module contains no instrumented basic blocks, not "
661 : << "allocating data segment.";
662 i : return true;
663 : }
664 :
665 : // Determine the size of the basic block frequency table.
666 E : DCHECK_LT(0U, data->frequency_size);
667 E : DCHECK_LT(0U, data->num_columns);
668 : size_t data_size = data->num_entries * data->frequency_size *
669 E : data->num_columns;
670 :
671 : // Determine the size of the basic block frequency record.
672 E : size_t record_size = sizeof(TraceIndexedFrequencyData) + data_size - 1;
673 :
674 : // Determine the size of the buffer we need. We need room for the basic block
675 : // frequency struct plus a single RecordPrefix header.
676 E : size_t segment_size = sizeof(RecordPrefix) + record_size;
677 :
678 : // Allocate the actual segment for the frequency data.
679 E : if (!session_.AllocateBuffer(segment_size, &segment_)) {
680 i : LOG(ERROR) << "Failed to allocate frequency data segment.";
681 i : return false;
682 : }
683 :
684 : // Ensure it's big enough to allocate the basic-block frequency data we want.
685 : // This automatically accounts for the RecordPrefix overhead.
686 E : if (!segment_.CanAllocate(record_size)) {
687 i : LOG(ERROR) << "Returned frequency data segment smaller than expected.";
688 i : return false;
689 : }
690 :
691 : // Allocate the basic-block frequency data. We will leave this allocated and
692 : // let it get flushed during tear-down of the call-trace client.
693 : TraceIndexedFrequencyData* trace_data =
694 : reinterpret_cast<TraceIndexedFrequencyData*>(
695 : segment_.AllocateTraceRecordImpl(TRACE_INDEXED_FREQUENCY,
696 E : record_size));
697 E : DCHECK(trace_data != NULL);
698 :
699 : // Initialize the basic block frequency data struct.
700 E : HMODULE module = GetModuleForAddr(data);
701 E : CHECK(module != NULL);
702 E : const base::win::PEImage image(module);
703 E : const IMAGE_NT_HEADERS* nt_headers = image.GetNTHeaders();
704 E : trace_data->data_type = data->data_type;
705 E : trace_data->module_base_addr = reinterpret_cast<ModuleAddr>(image.module());
706 E : trace_data->module_base_size = nt_headers->OptionalHeader.SizeOfImage;
707 E : trace_data->module_checksum = nt_headers->OptionalHeader.CheckSum;
708 E : trace_data->module_time_date_stamp = nt_headers->FileHeader.TimeDateStamp;
709 E : trace_data->frequency_size = data->frequency_size;
710 E : trace_data->num_entries = data->num_entries;
711 E : trace_data->num_columns = data->num_columns;
712 :
713 : // Hook up the newly allocated buffer to the call-trace instrumentation.
714 : data->frequency_data =
715 E : reinterpret_cast<uint32*>(&trace_data->frequency_data[0]);
716 :
717 E : return true;
718 E : }
719 :
720 : BasicBlockEntry::ThreadState* BasicBlockEntry::CreateThreadState(
721 E : IndexedFrequencyData* module_data) {
722 E : DCHECK(module_data != NULL);
723 E : CHECK_NE(IndexedFrequencyData::INVALID_DATA_TYPE, module_data->data_type);
724 :
725 : // Get a pointer to the extended indexed frequency data.
726 : BasicBlockIndexedFrequencyData* basicblock_data =
727 E : reinterpret_cast<BasicBlockIndexedFrequencyData*>(module_data);
728 :
729 : // Create the thread-local state for this thread. By default, just point the
730 : // counter array to the statically allocated fall-back area.
731 : ThreadState* state =
732 E : new ThreadState(module_data, &lock_, module_data->frequency_data);
733 E : CHECK(state != NULL);
734 :
735 : // Register the thread state with the thread state manager.
736 E : thread_state_manager_.Register(state);
737 :
738 : // Store the thread state in the TLS slot.
739 E : DCHECK_NE(TLS_OUT_OF_INDEXES, basicblock_data->tls_index);
740 E : ::TlsSetValue(basicblock_data->tls_index, state);
741 :
742 : // If we're not actually tracing, then we're done.
743 E : if (session_.IsDisabled())
744 E : return state;
745 :
746 E : uint32 slot = basicblock_data->fs_slot;
747 E : if (slot != 0) {
748 E : uint32 address = kUserApplicationSlot + 4 * (slot - 1);
749 : // Sanity check: The slot must be available (not used by an other tool).
750 E : DWORD content = __readfsdword(address);
751 E : CHECK_EQ(content, 0U);
752 : // Put the current state to the TLS slot.
753 E : __writefsdword(address, reinterpret_cast<unsigned long>(state));
754 : }
755 :
756 : // Nothing to allocate? We're done!
757 E : if (module_data->num_entries == 0) {
758 i : LOG(WARNING) << "Module contains no instrumented basic blocks.";
759 i : return state;
760 : }
761 :
762 : // Allocate space used by branch instrumentation.
763 E : if (module_data->data_type == ::common::IndexedFrequencyData::BRANCH)
764 E : state->AllocatePredictorCache();
765 :
766 : // Allocate buffer to which basic block id are pushed before being committed.
767 E : state->AllocateBasicBlockIdBuffer();
768 :
769 E : return state;
770 E : }
771 :
772 : inline BasicBlockEntry::ThreadState* BasicBlockEntry::GetThreadState(
773 E : IndexedFrequencyData* module_data) {
774 E : DCHECK(module_data != NULL);
775 E : ScopedLastErrorKeeper scoped_last_error_keeper;
776 :
777 : // Get a pointer to the extended indexed frequency data.
778 : BasicBlockIndexedFrequencyData* basicblock_data =
779 E : reinterpret_cast<BasicBlockIndexedFrequencyData*>(module_data);
780 :
781 E : DWORD tls_index = basicblock_data->tls_index;
782 E : DCHECK_NE(TLS_OUT_OF_INDEXES, tls_index);
783 E : ThreadState* state = static_cast<ThreadState*>(::TlsGetValue(tls_index));
784 E : return state;
785 E : }
786 :
787 : template<int S>
788 E : inline BasicBlockEntry::ThreadState* BasicBlockEntry::GetThreadStateSlot() {
789 E : uint32 address = kUserApplicationSlot + 4 * (S - 1);
790 E : DWORD content = __readfsdword(address);
791 E : return reinterpret_cast<BasicBlockEntry::ThreadState*>(content);
792 E : }
793 :
794 : void WINAPI BasicBlockEntry::IncrementIndexedFreqDataHook(
795 E : IncrementIndexedFreqDataFrame* entry_frame) {
796 E : DCHECK(entry_frame != NULL);
797 E : DCHECK(entry_frame->module_data != NULL);
798 : DCHECK_GT(entry_frame->module_data->num_entries,
799 E : entry_frame->index);
800 :
801 E : ThreadState* state = GetThreadState(entry_frame->module_data);
802 E : if (state == NULL) {
803 E : ScopedLastErrorKeeper scoped_last_error_keeper;
804 E : state = Instance()->CreateThreadState(entry_frame->module_data);
805 E : }
806 :
807 E : base::AutoLock scoped_lock(*state->trace_lock());
808 E : state->Increment(entry_frame->index);
809 E : }
810 :
811 : void WINAPI BasicBlockEntry::BranchEnterHook(
812 E : IncrementIndexedFreqDataFrame* entry_frame) {
813 E : DCHECK(entry_frame != NULL);
814 E : DCHECK(entry_frame->module_data != NULL);
815 : DCHECK_GT(entry_frame->module_data->num_entries,
816 E : entry_frame->index);
817 E : ThreadState* state = GetThreadState(entry_frame->module_data);
818 E : if (state == NULL) {
819 E : ScopedLastErrorKeeper scoped_last_error_keeper;
820 E : state = Instance()->CreateThreadState(entry_frame->module_data);
821 E : }
822 :
823 E : base::AutoLock scoped_lock(*state->trace_lock());
824 E : uint32 last_basic_block_id = state->last_basic_block_id();
825 E : state->Enter(entry_frame->index, last_basic_block_id);
826 E : state->reset_last_basic_block_id();
827 E : }
828 :
829 : void WINAPI BasicBlockEntry::BranchEnterBufferedHook(
830 E : IncrementIndexedFreqDataFrame* entry_frame) {
831 E : DCHECK(entry_frame != NULL);
832 E : DCHECK(entry_frame->module_data != NULL);
833 : DCHECK_GT(entry_frame->module_data->num_entries,
834 E : entry_frame->index);
835 E : ThreadState* state = GetThreadState(entry_frame->module_data);
836 E : if (state == NULL) {
837 E : ScopedLastErrorKeeper scoped_last_error_keeper;
838 E : state = Instance()->CreateThreadState(entry_frame->module_data);
839 E : }
840 :
841 E : if (state->Push(entry_frame->index)) {
842 E : base::AutoLock scoped_lock(*state->trace_lock());
843 E : state->Flush();
844 E : }
845 E : state->reset_last_basic_block_id();
846 E : }
847 :
848 : template<int S>
849 : void __fastcall BasicBlockEntry::FunctionEnterHookSlot(
850 E : IndexedFrequencyData* module_data) {
851 E : DCHECK(module_data != NULL);
852 :
853 : // Check if ThreadState is already created.
854 E : ThreadState* state = GetThreadStateSlot<S>();
855 E : if (state != NULL)
856 E : return;
857 :
858 : // Get or create the ThreadState.
859 E : state = GetThreadState(module_data);
860 E : if (state == NULL) {
861 E : ScopedLastErrorKeeper scoped_last_error_keeper;
862 E : state = Instance()->CreateThreadState(module_data);
863 E : }
864 E : }
865 :
866 : template<int S>
867 E : void __fastcall BasicBlockEntry::BranchEnterHookSlot(uint32 index) {
868 E : ThreadState* state = GetThreadStateSlot<S>();
869 E : if (state == NULL)
870 i : return;
871 :
872 E : base::AutoLock scoped_lock(*state->trace_lock());
873 E : uint32 last_basic_block_id = state->last_basic_block_id();
874 E : state->Enter(index, last_basic_block_id);
875 E : state->reset_last_basic_block_id();
876 E : }
877 :
878 : template<int S>
879 E : void __fastcall BasicBlockEntry::BranchEnterBufferedHookSlot(uint32 index) {
880 E : ThreadState* state = GetThreadStateSlot<S>();
881 E : if (state == NULL)
882 i : return;
883 :
884 E : if (state->Push(index)) {
885 E : base::AutoLock scoped_lock(*state->trace_lock());
886 E : state->Flush();
887 E : }
888 E : state->reset_last_basic_block_id();
889 E : }
890 :
891 : template<int S>
892 E : void __fastcall BasicBlockEntry::BranchExitHookSlot(uint32 index) {
893 E : ThreadState* state = GetThreadStateSlot<S>();
894 E : if (state == NULL)
895 i : return;
896 :
897 E : state->Leave(index);
898 E : }
899 :
900 : inline void WINAPI BasicBlockEntry::BranchExitHook(
901 E : IncrementIndexedFreqDataFrame* entry_frame) {
902 E : DCHECK(entry_frame != NULL);
903 E : DCHECK(entry_frame->module_data != NULL);
904 : DCHECK_GT(entry_frame->module_data->num_entries,
905 E : entry_frame->index);
906 :
907 E : ThreadState* state = GetThreadState(entry_frame->module_data);
908 E : if (state == NULL)
909 i : return;
910 :
911 E : state->Leave(entry_frame->index);
912 E : }
913 :
914 E : void WINAPI BasicBlockEntry::DllMainEntryHook(DllMainEntryFrame* entry_frame) {
915 E : ScopedLastErrorKeeper scoped_last_error_keeper;
916 E : DCHECK(entry_frame != NULL);
917 E : switch (entry_frame->reason) {
918 : case DLL_PROCESS_ATTACH:
919 E : Instance()->OnProcessAttach(entry_frame->module_data);
920 E : break;
921 :
922 : case DLL_THREAD_ATTACH:
923 : // We don't handle this event because the thread may never actually
924 : // call into an instrumented module, so we don't want to allocate
925 : // resources needlessly. Further, we won't get this event for thread
926 : // that were created before the agent was loaded. On first use of
927 : // an instrumented basic-block in a given thread, any thread specific
928 : // resources will be allocated.
929 E : break;
930 :
931 : case DLL_PROCESS_DETACH:
932 : case DLL_THREAD_DETACH:
933 E : Instance()->OnThreadDetach(entry_frame->module_data);
934 E : break;
935 :
936 : default:
937 i : NOTREACHED();
938 : }
939 E : }
940 :
941 E : void WINAPI BasicBlockEntry::ExeMainEntryHook(ExeMainEntryFrame* entry_frame) {
942 E : ScopedLastErrorKeeper scoped_last_error_keeper;
943 E : DCHECK(entry_frame != NULL);
944 E : Instance()->OnProcessAttach(entry_frame->module_data);
945 E : }
946 :
947 E : void BasicBlockEntry::RegisterModule(const void* addr) {
948 E : DCHECK(addr != NULL);
949 :
950 : // Allocate a segment for the module information.
951 E : trace::client::TraceFileSegment module_info_segment;
952 E : CHECK(session_.AllocateBuffer(&module_info_segment));
953 :
954 : // Log the module. This is required in order to associate basic-block
955 : // frequency with a module and PDB file during post-processing.
956 E : HMODULE module = GetModuleForAddr(addr);
957 E : CHECK(module != NULL);
958 E : CHECK(agent::common::LogModule(module, &session_, &module_info_segment));
959 :
960 : // Commit the module information.
961 E : CHECK(session_.ReturnBuffer(&module_info_segment));
962 E : }
963 :
964 : void BasicBlockEntry::RegisterFastPathSlot(
965 E : IndexedFrequencyData* module_data, unsigned int slot) {
966 E : DCHECK_NE(slot, 0U);
967 E : DCHECK_LE(slot, kNumSlots);
968 E : DCHECK(module_data != NULL);
969 :
970 : // The slot must not have been registered.
971 E : CHECK_EQ((1 << slot) & registered_slots_, 0U);
972 E : registered_slots_ |= (1 << slot);
973 E : }
974 :
975 : void BasicBlockEntry::UnregisterFastPathSlot(
976 : IndexedFrequencyData* module_data, unsigned int slot) {
977 : DCHECK_NE(slot, 0U);
978 : DCHECK_LE(slot, kNumSlots);
979 : DCHECK(module_data != NULL);
980 :
981 : // The slot must be registered.
982 : CHECK_NE((1 << slot) & registered_slots_, 0U);
983 : registered_slots_ &= ~(1 << slot);
984 : }
985 :
986 E : void BasicBlockEntry::OnProcessAttach(IndexedFrequencyData* module_data) {
987 E : DCHECK(module_data != NULL);
988 :
989 : // Get a pointer to the extended indexed frequency data.
990 : BasicBlockIndexedFrequencyData* basicblock_data =
991 E : reinterpret_cast<BasicBlockIndexedFrequencyData*>(module_data);
992 :
993 : // Exit if the magic number does not match.
994 E : CHECK_EQ(::common::kBasicBlockEntryAgentId, module_data->agent_id);
995 :
996 : // Exit if the version does not match.
997 : CHECK(DatatypeVersionIsValid(module_data->data_type,
998 : module_data->agent_id,
999 : module_data->version,
1000 : module_data->frequency_size,
1001 E : module_data->num_columns));
1002 :
1003 : // We allow for this hook to be called multiple times. We expect the first
1004 : // time to occur under the loader lock, so we don't need to worry about
1005 : // concurrency for this check.
1006 E : if (module_data->initialization_attempted)
1007 i : return;
1008 :
1009 : // Flag the module as initialized.
1010 E : module_data->initialization_attempted = 1U;
1011 :
1012 : // We expect this to be executed exactly once for each module.
1013 E : CHECK_EQ(TLS_OUT_OF_INDEXES, basicblock_data->tls_index);
1014 E : basicblock_data->tls_index = ::TlsAlloc();
1015 E : CHECK_NE(TLS_OUT_OF_INDEXES, basicblock_data->tls_index);
1016 :
1017 : // If there is a FS slot configured, register it.
1018 E : if (basicblock_data->fs_slot != 0)
1019 E : RegisterFastPathSlot(module_data, basicblock_data->fs_slot);
1020 :
1021 : // Register this module with the call_trace if the session is not disabled.
1022 : // Note that we expect module_data to be statically defined within the
1023 : // module of interest, so we can use its address to lookup the module.
1024 E : if (session_.IsDisabled()) {
1025 E : LOG(WARNING) << "Unable to initialize client as we are not tracing.";
1026 E : return;
1027 : }
1028 :
1029 E : if (!InitializeFrequencyData(module_data)) {
1030 i : LOG(ERROR) << "Failed to initialize frequency data.";
1031 i : return;
1032 : }
1033 :
1034 E : RegisterModule(module_data);
1035 :
1036 E : LOG(INFO) << "BBEntry client initialized.";
1037 E : }
1038 :
1039 E : void BasicBlockEntry::OnThreadDetach(IndexedFrequencyData* module_data) {
1040 E : DCHECK(module_data != NULL);
1041 E : DCHECK_EQ(1U, module_data->initialization_attempted);
1042 :
1043 : // Get a pointer to the extended indexed frequency data.
1044 : BasicBlockIndexedFrequencyData* basicblock_data =
1045 E : reinterpret_cast<BasicBlockIndexedFrequencyData*>(module_data);
1046 :
1047 E : DCHECK_NE(TLS_OUT_OF_INDEXES, basicblock_data->tls_index);
1048 :
1049 E : ThreadState* state = GetThreadState(module_data);
1050 E : if (state == NULL)
1051 E : return;
1052 :
1053 E : state->Flush();
1054 E : thread_state_manager_.MarkForDeath(state);
1055 E : }
1056 :
1057 : } // namespace basic_block_entry
1058 : } // namespace agent
|