1 : // Copyright 2012 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 : //
15 : // Implementation of the basic-block entry counting agent library.
16 : //
17 : // The operation of this module is in two parts: instrumentation and agent.
18 : // Both parts work together to gather metrics on the execution of a module.
19 : //
20 : // * Instrumentation
21 : // The instrumenter is responsible for injecting probes within the
22 : // instrumented module to call entry-points in the agent. There are two
23 : // kinds of supported instrumentation: basic block entry count and branch
24 : // profiling.
25 : //
26 : // Instrumentation for basic block entry count:
27 : // BB1: [code] ---> BB1: push bb_id
28 : // call func push module_data
29 : // jz BB2 call [increment_hook]
30 : // [code]
31 : // call func
32 : // jz BB2
33 : //
34 : // Instrumentation for branch profiling:
35 : // BB1: [code] ---> BB1: push bb_id
36 : // call func push module_data
37 : // jz BB2 call [entry_hook]
38 : // [code]
39 : // call func
40 : // push bb_id
41 : // push module_data
42 : // call [leave_hook]
43 : // jz BB2
44 : //
45 : // Using the last block id produced by an entry_hook to determine the
46 : // previous executed basic block won't work. As an example, the call to
47 : // 'func' will move the control flow to another function and modify the last
48 : // executed basic block. The leave hook must be called at the end the basic
49 : // block, before following control flow to any other basic blocks.
50 : //
51 : // The calling convention is callee clean-up. The callee is responsible for
52 : // cleaning up any values on the stack. This calling convention is chosen
53 : // to keep the application code size as low as possible.
54 : //
55 : // * Agent
56 : // The agent is responsible for allocating a trace segment and collecting
57 : // metrics. The trace segment with be dump to a file for post-processing.
58 : //
59 : // There are two mechanisms to collect metrics:
60 : // - Basic mode: In the basic mode, the hook acquires a lock and updates a
61 : // process-wide segment shared by all threads. In this mode, no events can
62 : // be lost.
63 : // - Buffered mode: A per-thread buffer is used to collect execution
64 : // information. A batch commit is done when the buffer is full. In this
65 : // mode, under a non-standard execution (crash, force exit, ...) pending
66 : // events may be lost.
67 : //
68 : // The agent keeps a ThreadState for each running thread. The thread state
69 : // is accessible through a TLS mechanism and contains information needed by
70 : // the hook (pointer to trace segment, buffer, lock, ...).
71 : //
72 : // There are two mechanisms to keep a reference to the thread state:
73 : // - TLS: The default mechanism uses the standard windows TLS API to keep
74 : // a per-thread reference to the thread state. The TLS index is allocated
75 : // and kept inside the module data information in the instrumented image.
76 : // - FS-Slot: This mechanism uses application specific slot available through
77 : // the FS segment (fs:[0x700] Reserved for user application).
78 : // See: http://en.wikipedia.org/wiki/Win32_Thread_Information_Block.
79 : // There is no API to check whether another module is using this slot, thus
80 : // this mechanism must be used in a controlled environment.
81 :
82 : #include "syzygy/agent/basic_block_entry/basic_block_entry.h"
83 :
84 : #include "base/at_exit.h"
85 : #include "base/command_line.h"
86 : #include "base/environment.h"
87 : #include "base/lazy_instance.h"
88 : #include "base/stringprintf.h"
89 : #include "base/utf_string_conversions.h"
90 : #include "base/memory/scoped_ptr.h"
91 : #include "sawbuck/common/com_utils.h"
92 : #include "syzygy/agent/common/process_utils.h"
93 : #include "syzygy/agent/common/scoped_last_error_keeper.h"
94 : #include "syzygy/common/indexed_frequency_data.h"
95 : #include "syzygy/common/logging.h"
96 : #include "syzygy/trace/protocol/call_trace_defs.h"
97 :
98 : unsigned long __readfsdword(unsigned long);
99 : void __writefsdword(unsigned long, unsigned long);
100 : #pragma intrinsic(__readfsdword, __writefsdword)
101 :
102 : // Save caller-save registers (eax, ecx, edx) and flags (eflags).
103 : #define BBPROBE_SAVE_REGISTERS \
104 : __asm push eax \
105 : __asm lahf \
106 : __asm seto al \
107 : __asm push eax \
108 : __asm push ecx \
109 : __asm push edx
110 :
111 : // Restore caller-save registers (eax, ecx, edx) and flags (eflags).
112 : #define BBPROBE_RESTORE_REGISTERS \
113 : __asm pop edx \
114 : __asm pop ecx \
115 : __asm pop eax \
116 : __asm add al, 0x7f \
117 : __asm sahf \
118 : __asm pop eax
119 :
120 : #define BBPROBE_REDIRECT_CALL(function_name, handler, stack_size) \
121 : extern "C" void __declspec(naked) function_name() { \
122 : /* Stash volatile registers. */ \
123 : BBPROBE_SAVE_REGISTERS \
124 : \
125 : /* Stack: ... basic_block_id, module_data, ret_addr, [4x register] */ \
126 : \
127 : /* Push the original esp value onto the stack as the entry-hook data. */ \
128 : /* This gives the entry-hook a pointer to ret_addr, module_data and */ \
129 : /* basic block id. */ \
130 : __asm lea eax, DWORD PTR[esp + 0x10] \
131 : __asm push eax \
132 : \
133 : /* Stack: ..., basic_block_id, module_data, ret_addr, [4x register], */ \
134 : /* esp, &ret_addr. */ \
135 : __asm call agent::basic_block_entry::BasicBlockEntry::handler \
136 : /* Stack: ... basic_block_id, module_data, ret_addr, [4x register]. */ \
137 : \
138 : /* Restore volatile registers. */ \
139 : BBPROBE_RESTORE_REGISTERS \
140 : __asm ret stack_size \
141 : }
142 :
143 : #define BBPROBE_REDIRECT_CALL_SLOT(function_name, handler, type, slot) \
144 : static void __fastcall safe ## function_name ## _s ## slot(type index) { \
145 : agent::basic_block_entry::BasicBlockEntry::handler<slot>(index); \
146 : } \
147 : extern "C" void __declspec(naked) function_name ## _s ## slot() { \
148 : /* Stash volatile registers. */ \
149 : BBPROBE_SAVE_REGISTERS \
150 : /* Call handler */ \
151 : __asm mov ecx, DWORD PTR[esp + 0x14] \
152 : __asm call safe ## function_name ## _s ## slot \
153 : /* Restore volatile registers. */ \
154 : BBPROBE_RESTORE_REGISTERS \
155 : /* Return and remove index from stack. */ \
156 : __asm ret 4 \
157 : }
158 :
159 : // This is expected to be called via instrumentation that looks like:
160 : // push basic_block_id
161 : // push module_data
162 : // call [function_name]
163 i : BBPROBE_REDIRECT_CALL(_branch_enter, BranchEnterHook, 8)
164 i : BBPROBE_REDIRECT_CALL(_branch_enter_buffered, BranchEnterBufferedHook, 8)
165 i : BBPROBE_REDIRECT_CALL(_branch_exit, BranchExitHook, 8)
166 : BBPROBE_REDIRECT_CALL(_increment_indexed_freq_data,
167 : IncrementIndexedFreqDataHook,
168 i : 8)
169 :
170 : // This is expected to be called via instrumentation that looks like:
171 : // push module_data
172 : // call [function_name]
173 : BBPROBE_REDIRECT_CALL_SLOT(_function_enter,
174 : FunctionEnterHookSlot,
175 : ::common::IndexedFrequencyData*,
176 E : 1)
177 : BBPROBE_REDIRECT_CALL_SLOT(_function_enter,
178 : FunctionEnterHookSlot,
179 : ::common::IndexedFrequencyData*,
180 i : 2)
181 : BBPROBE_REDIRECT_CALL_SLOT(_function_enter,
182 : FunctionEnterHookSlot,
183 : ::common::IndexedFrequencyData*,
184 i : 3)
185 : BBPROBE_REDIRECT_CALL_SLOT(_function_enter,
186 : FunctionEnterHookSlot,
187 : ::common::IndexedFrequencyData*,
188 i : 4)
189 :
190 : // This is expected to be called via instrumentation that looks like:
191 : // push basic_block_id
192 : // call [function_name]
193 E : BBPROBE_REDIRECT_CALL_SLOT(_branch_enter, BranchEnterHookSlot, DWORD, 1)
194 i : BBPROBE_REDIRECT_CALL_SLOT(_branch_enter, BranchEnterHookSlot, DWORD, 2)
195 i : BBPROBE_REDIRECT_CALL_SLOT(_branch_enter, BranchEnterHookSlot, DWORD, 3)
196 i : BBPROBE_REDIRECT_CALL_SLOT(_branch_enter, BranchEnterHookSlot, DWORD, 4)
197 :
198 : BBPROBE_REDIRECT_CALL_SLOT(_branch_enter_buffered,
199 E : BranchEnterBufferedHookSlot, DWORD, 1)
200 : BBPROBE_REDIRECT_CALL_SLOT(_branch_enter_buffered,
201 i : BranchEnterBufferedHookSlot, DWORD, 2)
202 : BBPROBE_REDIRECT_CALL_SLOT(_branch_enter_buffered,
203 i : BranchEnterBufferedHookSlot, DWORD, 3)
204 : BBPROBE_REDIRECT_CALL_SLOT(_branch_enter_buffered,
205 i : BranchEnterBufferedHookSlot, DWORD, 4)
206 :
207 E : BBPROBE_REDIRECT_CALL_SLOT(_branch_exit, BranchExitHookSlot, DWORD, 1)
208 i : BBPROBE_REDIRECT_CALL_SLOT(_branch_exit, BranchExitHookSlot, DWORD, 2)
209 i : BBPROBE_REDIRECT_CALL_SLOT(_branch_exit, BranchExitHookSlot, DWORD, 3)
210 i : BBPROBE_REDIRECT_CALL_SLOT(_branch_exit, BranchExitHookSlot, DWORD, 4)
211 :
212 : // This is expected to be called via a thunk that looks like:
213 : // push module_data
214 : // push function
215 : // jmp [function_name]
216 i : BBPROBE_REDIRECT_CALL(_indirect_penter_dllmain, DllMainEntryHook, 4)
217 i : BBPROBE_REDIRECT_CALL(_indirect_penter_exemain, ExeMainEntryHook, 4)
218 :
219 E : BOOL WINAPI DllMain(HMODULE instance, DWORD reason, LPVOID reserved) {
220 : // Our AtExit manager required by base.
221 : static base::AtExitManager* at_exit = NULL;
222 :
223 E : switch (reason) {
224 : case DLL_PROCESS_ATTACH:
225 E : DCHECK(at_exit == NULL);
226 E : at_exit = new base::AtExitManager();
227 :
228 E : CommandLine::Init(0, NULL);
229 E : common::InitLoggingForDll(L"basic_block_entry");
230 E : LOG(INFO) << "Initialized basic-block entry counting agent library.";
231 E : break;
232 :
233 : case DLL_THREAD_ATTACH:
234 E : break;
235 :
236 : case DLL_THREAD_DETACH:
237 E : break;
238 :
239 : case DLL_PROCESS_DETACH:
240 E : DCHECK(at_exit != NULL);
241 E : delete at_exit;
242 E : at_exit = NULL;
243 E : break;
244 :
245 : default:
246 i : NOTREACHED();
247 : break;
248 : }
249 :
250 E : return TRUE;
251 E : }
252 :
253 : namespace agent {
254 : namespace basic_block_entry {
255 :
256 : namespace {
257 :
258 : using ::common::IndexedFrequencyData;
259 : using agent::common::ScopedLastErrorKeeper;
260 : using trace::client::TraceFileSegment;
261 :
262 : const uint32 kUserApplicationSlot = 0x700;
263 : const uint32 kNumSlots = 4U;
264 : const uint32 kInvalidBasicBlockId = ~0U;
265 :
266 : // The indexed_frequency_data for the bbentry instrumentation mode has 1 column.
267 : struct BBEntryFrequency {
268 : uint32 frequency;
269 : };
270 :
271 : // The indexed_frequency_data for the branch instrumentation mode has 3 columns.
272 : struct BranchFrequency {
273 : uint32 frequency;
274 : uint32 branch_taken;
275 : uint32 mispredicted;
276 : };
277 :
278 : // An entry in the basic block id buffer.
279 : struct BranchBufferEntry {
280 : uint32 basic_block_id;
281 : uint32 last_basic_block_id;
282 : };
283 :
284 : // All tracing runs through this object.
285 : base::LazyInstance<BasicBlockEntry> static_bbentry_instance =
286 : LAZY_INSTANCE_INITIALIZER;
287 :
288 : // Increment and saturate a 32-bit value.
289 E : inline uint32 IncrementAndSaturate(uint32 value) {
290 E : if (value != ~0U)
291 E : ++value;
292 E : return value;
293 E : }
294 :
295 : // Get the address of the module containing @p addr. We do this by querying
296 : // for the allocation that contains @p addr. This must lie within the
297 : // instrumented module, and be part of the single allocation in which the
298 : // image of the module lies. The base of the module will be the base address
299 : // of the allocation.
300 : // TODO(rogerm): Move to agent::common.
301 E : HMODULE GetModuleForAddr(const void* addr) {
302 E : MEMORY_BASIC_INFORMATION mem_info = {};
303 :
304 : // Lookup up the allocation in which addr is located.
305 E : if (::VirtualQuery(addr, &mem_info, sizeof(mem_info)) == 0) {
306 i : DWORD error = ::GetLastError();
307 i : LOG(ERROR) << "VirtualQuery failed: " << com::LogWe(error) << ".";
308 i : return NULL;
309 : }
310 :
311 : // Check that the allocation base has a valid PE header magic number.
312 E : base::win::PEImage image(reinterpret_cast<HMODULE>(mem_info.AllocationBase));
313 E : if (!image.VerifyMagic()) {
314 i : LOG(ERROR) << "Invalid module found for "
315 : << base::StringPrintf("0x%08X", addr) << ".";
316 i : return NULL;
317 : }
318 :
319 : // Then it's a module.
320 E : return image.module();
321 E : }
322 :
323 : // Returns true if @p version is the expected version for @p datatype_id.
324 : bool DatatypeVersionIsValid(uint32 data_type,
325 : uint32 agent_id,
326 : uint32 version,
327 : uint32 frequency_size,
328 E : uint32 num_columns) {
329 : // We can only handle this if it looks right.
330 E : const size_t kIntSize = sizeof(int);
331 E : if (data_type == IndexedFrequencyData::BRANCH) {
332 : if (agent_id != ::common::kBasicBlockEntryAgentId ||
333 : version != ::common::kBranchFrequencyDataVersion ||
334 : frequency_size != kIntSize ||
335 E : num_columns != 3U) {
336 i : LOG(ERROR) << "Unexpected values in the branch data structures.";
337 i : return false;
338 E : }
339 E : } else if (data_type == IndexedFrequencyData::BASIC_BLOCK_ENTRY) {
340 : if (agent_id != ::common::kBasicBlockEntryAgentId ||
341 : version != ::common::kBasicBlockFrequencyDataVersion ||
342 : frequency_size != kIntSize ||
343 E : num_columns != 1U) {
344 i : LOG(ERROR) << "Unexpected values in the basic block data structures.";
345 i : return false;
346 : }
347 E : } else {
348 i : LOG(ERROR) << "Unexpected entry kind.";
349 i : return false;
350 : }
351 :
352 E : return true;
353 E : }
354 :
355 : } // namespace
356 :
357 : // The IncrementIndexedFreqDataHook parameters.
358 : struct BasicBlockEntry::IncrementIndexedFreqDataFrame {
359 : const void* ret_addr;
360 : IndexedFrequencyData* module_data;
361 : uint32 index;
362 : };
363 : COMPILE_ASSERT_IS_POD_OF_SIZE(BasicBlockEntry::IncrementIndexedFreqDataFrame,
364 : 12);
365 :
366 : // The DllMainEntryHook parameters.
367 : struct BasicBlockEntry::DllMainEntryFrame {
368 : FuncAddr function;
369 : IndexedFrequencyData* module_data;
370 : const void* ret_addr;
371 : HMODULE module;
372 : DWORD reason;
373 : DWORD reserved;
374 : };
375 : COMPILE_ASSERT_IS_POD_OF_SIZE(BasicBlockEntry::DllMainEntryFrame, 24);
376 :
377 : // The ExeMainEntryHook parameters.
378 : struct BasicBlockEntry::ExeMainEntryFrame {
379 : FuncAddr function;
380 : IndexedFrequencyData* module_data;
381 : const void* ret_addr;
382 : };
383 : COMPILE_ASSERT_IS_POD_OF_SIZE(BasicBlockEntry::ExeMainEntryFrame, 12);
384 :
385 : // The per-thread-per-instrumented-module state managed by this agent.
386 : class BasicBlockEntry::ThreadState : public agent::common::ThreadStateBase {
387 : public:
388 : // Initialize a ThreadState instance.
389 : // @param module_data Module information injected in the instrumented
390 : // application.
391 : // @param lock Lock associated with the @p frequency_data.
392 : // @param frequency_data Buffer to commit counters update.
393 : ThreadState(IndexedFrequencyData* module_data,
394 : base::Lock* lock,
395 : void* frequency_data);
396 :
397 : // Destroy a ThreadState instance.
398 : ~ThreadState();
399 :
400 : // Allocate space to buffer basic block ids.
401 : void AllocateBasicBlockIdBuffer();
402 :
403 : // Allocate temporary space to simulate a branch predictor.
404 : void AllocatePredictorCache();
405 :
406 : // Saturation increment the frequency record for @p index. Note that in
407 : // Release mode, no range checking is performed on index.
408 : // @param basic_block_id the basic block index.
409 : void Increment(uint32 basic_block_id);
410 :
411 : // Update state and frequency when a jump enters the basic block @p index
412 : // coming from the basic block @last.
413 : // @param basic_block_id the basic block index.
414 : // @param last_basic_block_id the originating basic block index from which we
415 : // enter @p basic_block_id.
416 : void Enter(uint32 basic_block_id, uint32 last_basic_block_id);
417 :
418 : // Update state and frequency when a jump leaves the basic block @p index.
419 : // @param basic_block_id the basic block index.
420 : void Leave(uint32 basic_block_id);
421 :
422 : // Push a basic block id in the basic block ids buffer, to be processed later.
423 : // @param basic_block_id the basic block index.
424 : // @returns true when the buffer is full and there is no room for an other
425 : // entry, false otherwise.
426 : bool Push(uint32 basic_block_id);
427 :
428 : // Flush pending values in the basic block ids buffer.
429 : void Flush();
430 :
431 : // Return the id of the most recent basic block executed.
432 E : uint32 last_basic_block_id() { return last_basic_block_id_; }
433 :
434 : // Reset the most recent basic block executed.
435 : void reset_last_basic_block_id();
436 :
437 : // Return the lock associated with 'trace_data_' for atomic update.
438 E : base::Lock* trace_lock() { return trace_lock_; }
439 :
440 : // For a given basic block id, returns the corresponding BBEntryFrequency.
441 : // @param basic_block_id the basic block index.
442 : // @returns the bbentry frequency entry for a given basic block id.
443 : BBEntryFrequency& GetBBEntryFrequency(uint32 basic_block_id);
444 :
445 : // For a given basic block id, returns the corresponding BranchFrequency.
446 : // @param basic_block_id the basic block index.
447 : // @returns the branch frequency entry for a given basic block id.
448 : BranchFrequency& GetBranchFrequency(uint32 basic_block_id);
449 :
450 : // Retrieve the indexed_frequency_data specific fields for this agent.
451 : // @returns a pointer to the specific fields.
452 E : const BasicBlockIndexedFrequencyData* GetBasicBlockData() const {
453 : return
454 E : reinterpret_cast<const BasicBlockIndexedFrequencyData*>(module_data_);
455 E : }
456 :
457 : protected:
458 : // As a shortcut, this points to the beginning of the array of basic-block
459 : // entry frequency values. With tracing enabled, this is equivalent to:
460 : // reinterpret_cast<uint32*>(this->trace_data->frequency_data)
461 : // If tracing is not enabled, this will be set to point to a static
462 : // allocation of IndexedFrequencyData::frequency_data.
463 : uint32* frequency_data_; // Under trace_lock_.
464 :
465 : // Module information this thread state is gathering information on.
466 : const IndexedFrequencyData* module_data_;
467 :
468 : // Lock corresponding to 'frequency_data_'.
469 : base::Lock* trace_lock_;
470 :
471 : // Buffer used to queue basic block ids for later processing in batches.
472 : std::vector<BranchBufferEntry> basic_block_id_buffer_;
473 :
474 : // Current offset of the next available entry in the basic block id buffer.
475 : uint32 basic_block_id_buffer_offset_;
476 :
477 : // The branch predictor state (2-bit saturating counter).
478 : std::vector<uint8> predictor_data_;
479 :
480 : // The last basic block id executed.
481 : uint32 last_basic_block_id_;
482 :
483 : private:
484 : DISALLOW_COPY_AND_ASSIGN(ThreadState);
485 : };
486 :
487 : BasicBlockEntry::ThreadState::ThreadState(IndexedFrequencyData* module_data,
488 : base::Lock* lock,
489 : void* frequency_data)
490 : : frequency_data_(static_cast<uint32*>(frequency_data)),
491 : module_data_(module_data),
492 : trace_lock_(lock),
493 : basic_block_id_buffer_offset_(0),
494 E : last_basic_block_id_(kInvalidBasicBlockId) {
495 E : }
496 :
497 E : BasicBlockEntry::ThreadState::~ThreadState() {
498 E : if (!basic_block_id_buffer_.empty())
499 E : Flush();
500 :
501 E : uint32 slot = GetBasicBlockData()->fs_slot;
502 E : if (slot != 0) {
503 E : uint32 address = kUserApplicationSlot + 4 * (slot - 1);
504 E : __writefsdword(address, 0);
505 : }
506 E : }
507 :
508 E : void BasicBlockEntry::ThreadState::AllocateBasicBlockIdBuffer() {
509 E : DCHECK(basic_block_id_buffer_.empty());
510 E : basic_block_id_buffer_.resize(kBufferSize * sizeof(BranchBufferEntry));
511 E : }
512 :
513 E : void BasicBlockEntry::ThreadState::AllocatePredictorCache() {
514 E : DCHECK(predictor_data_.empty());
515 E : predictor_data_.resize(kPredictorCacheSize);
516 E : }
517 :
518 E : void BasicBlockEntry::ThreadState::reset_last_basic_block_id() {
519 E : last_basic_block_id_ = kInvalidBasicBlockId;
520 E : }
521 :
522 : BBEntryFrequency& BasicBlockEntry::ThreadState::GetBBEntryFrequency(
523 E : uint32 basic_block_id) {
524 E : DCHECK(frequency_data_ != NULL);
525 : BBEntryFrequency* frequencies =
526 E : reinterpret_cast<BBEntryFrequency*>(frequency_data_);
527 E : BBEntryFrequency& entry = frequencies[basic_block_id];
528 E : return entry;
529 E : }
530 :
531 : BranchFrequency& BasicBlockEntry::ThreadState::GetBranchFrequency(
532 E : uint32 basic_block_id) {
533 E : DCHECK(frequency_data_ != NULL);
534 : BranchFrequency* frequencies =
535 E : reinterpret_cast<BranchFrequency*>(frequency_data_);
536 E : BranchFrequency& entry = frequencies[basic_block_id];
537 E : return entry;
538 E : }
539 :
540 E : inline void BasicBlockEntry::ThreadState::Increment(uint32 basic_block_id) {
541 E : DCHECK(frequency_data_ != NULL);
542 E : DCHECK(module_data_ != NULL);
543 E : DCHECK_LT(basic_block_id, module_data_->num_entries);
544 :
545 : // Retrieve information for the basic block.
546 E : BBEntryFrequency& entry = GetBBEntryFrequency(basic_block_id);
547 E : entry.frequency = IncrementAndSaturate(entry.frequency);
548 E : }
549 :
550 : void BasicBlockEntry::ThreadState::Enter(
551 E : uint32 basic_block_id, uint32 last_basic_block_id) {
552 E : DCHECK(frequency_data_ != NULL);
553 E : DCHECK(module_data_ != NULL);
554 E : DCHECK_LT(basic_block_id, module_data_->num_entries);
555 :
556 : // Retrieve information for the current basic block.
557 E : BranchFrequency& current = GetBranchFrequency(basic_block_id);
558 :
559 : // Count the execution of this basic block.
560 E : if (current.frequency != kInvalidBasicBlockId)
561 E : current.frequency = IncrementAndSaturate(current.frequency);
562 :
563 : // Check if entering from a jump or something else (call).
564 E : if (last_basic_block_id == kInvalidBasicBlockId)
565 E : return;
566 :
567 : // Retrieve information for the previous basic block.
568 E : BranchFrequency& previous = GetBranchFrequency(last_basic_block_id);
569 :
570 : // If last jump was taken, count the branch taken in the previous basic block.
571 E : bool taken = (basic_block_id != last_basic_block_id + 1);
572 E : if (taken) {
573 E : if (previous.branch_taken != kInvalidBasicBlockId)
574 E : previous.branch_taken = IncrementAndSaturate(previous.branch_taken);
575 : }
576 :
577 : // Simulate the branch predictor.
578 : // see: http://en.wikipedia.org/wiki/Branch_predictor
579 : // states:
580 : // 0: Strongly not taken
581 : // 1: Weakly not taken
582 : // 2: Weakly taken
583 : // 3: Strongly taken
584 E : if (predictor_data_.empty())
585 i : return;
586 E : DCHECK(predictor_data_.size() == kPredictorCacheSize);
587 E : if (last_basic_block_id != kInvalidBasicBlockId) {
588 E : size_t offset = last_basic_block_id % kPredictorCacheSize;
589 E : uint8& state = predictor_data_[offset];
590 E : if (taken) {
591 E : if (state < 2)
592 E : previous.mispredicted = IncrementAndSaturate(previous.mispredicted);
593 E : if (state < 3)
594 E : ++state;
595 E : } else {
596 E : if (state > 1)
597 E : previous.mispredicted = IncrementAndSaturate(previous.mispredicted);
598 E : if (state != 0)
599 E : --state;
600 : }
601 : }
602 E : }
603 :
604 E : inline void BasicBlockEntry::ThreadState::Leave(uint32 basic_block_id) {
605 E : DCHECK(module_data_ != NULL);
606 E : DCHECK_LT(basic_block_id, module_data_->num_entries);
607 :
608 E : last_basic_block_id_ = basic_block_id;
609 E : }
610 :
611 E : bool BasicBlockEntry::ThreadState::Push(uint32 basic_block_id) {
612 E : DCHECK(module_data_ != NULL);
613 E : DCHECK(basic_block_id < module_data_->num_entries);
614 :
615 E : uint32 last_offset = basic_block_id_buffer_offset_;
616 E : DCHECK_LT(last_offset, basic_block_id_buffer_.size());
617 :
618 E : BranchBufferEntry* entry = &basic_block_id_buffer_[last_offset];
619 E : entry->basic_block_id = basic_block_id;
620 E : entry->last_basic_block_id = last_basic_block_id_;
621 :
622 E : ++basic_block_id_buffer_offset_;
623 :
624 E : return basic_block_id_buffer_offset_ == kBufferSize;
625 E : }
626 :
627 E : void BasicBlockEntry::ThreadState::Flush() {
628 E : uint32 last_offset = basic_block_id_buffer_offset_;
629 :
630 E : for (size_t offset = 0; offset < last_offset; ++offset) {
631 E : BranchBufferEntry* entry = &basic_block_id_buffer_[offset];
632 E : Enter(entry->basic_block_id, entry->last_basic_block_id);
633 E : }
634 :
635 : // Reset buffer.
636 E : basic_block_id_buffer_offset_ = 0;
637 E : }
638 :
639 E : BasicBlockEntry* BasicBlockEntry::Instance() {
640 E : return static_bbentry_instance.Pointer();
641 E : }
642 :
643 E : BasicBlockEntry::BasicBlockEntry() : registered_slots_() {
644 : // Create a session.
645 E : trace::client::InitializeRpcSession(&session_, &segment_);
646 E : }
647 :
648 E : BasicBlockEntry::~BasicBlockEntry() {
649 E : }
650 :
651 E : bool BasicBlockEntry::InitializeFrequencyData(IndexedFrequencyData* data) {
652 E : DCHECK(data != NULL);
653 :
654 : // Nothing to allocate? We're done!
655 E : if (data->num_entries == 0) {
656 i : LOG(WARNING) << "Module contains no instrumented basic blocks, not "
657 : << "allocating data segment.";
658 i : return true;
659 : }
660 :
661 : // Determine the size of the basic block frequency table.
662 E : DCHECK_LT(0U, data->frequency_size);
663 E : DCHECK_LT(0U, data->num_columns);
664 : size_t data_size = data->num_entries * data->frequency_size *
665 E : data->num_columns;
666 :
667 : // Determine the size of the basic block frequency record.
668 E : size_t record_size = sizeof(TraceIndexedFrequencyData) + data_size - 1;
669 :
670 : // Determine the size of the buffer we need. We need room for the basic block
671 : // frequency struct plus a single RecordPrefix header.
672 E : size_t segment_size = sizeof(RecordPrefix) + record_size;
673 :
674 : // Allocate the actual segment for the frequency data.
675 E : if (!session_.AllocateBuffer(segment_size, &segment_)) {
676 i : LOG(ERROR) << "Failed to allocate frequency data segment.";
677 i : return false;
678 : }
679 :
680 : // Ensure it's big enough to allocate the basic-block frequency data we want.
681 : // This automatically accounts for the RecordPrefix overhead.
682 E : if (!segment_.CanAllocate(record_size)) {
683 i : LOG(ERROR) << "Returned frequency data segment smaller than expected.";
684 i : return false;
685 : }
686 :
687 : // Allocate the basic-block frequency data. We will leave this allocated and
688 : // let it get flushed during tear-down of the call-trace client.
689 : TraceIndexedFrequencyData* trace_data =
690 : reinterpret_cast<TraceIndexedFrequencyData*>(
691 : segment_.AllocateTraceRecordImpl(TRACE_INDEXED_FREQUENCY,
692 E : record_size));
693 E : DCHECK(trace_data != NULL);
694 :
695 : // Initialize the basic block frequency data struct.
696 E : HMODULE module = GetModuleForAddr(data);
697 E : CHECK(module != NULL);
698 E : const base::win::PEImage image(module);
699 E : const IMAGE_NT_HEADERS* nt_headers = image.GetNTHeaders();
700 E : trace_data->data_type = data->data_type;
701 E : trace_data->module_base_addr = reinterpret_cast<ModuleAddr>(image.module());
702 E : trace_data->module_base_size = nt_headers->OptionalHeader.SizeOfImage;
703 E : trace_data->module_checksum = nt_headers->OptionalHeader.CheckSum;
704 E : trace_data->module_time_date_stamp = nt_headers->FileHeader.TimeDateStamp;
705 E : trace_data->frequency_size = data->frequency_size;
706 E : trace_data->num_entries = data->num_entries;
707 E : trace_data->num_columns = data->num_columns;
708 :
709 : // Hook up the newly allocated buffer to the call-trace instrumentation.
710 : data->frequency_data =
711 E : reinterpret_cast<uint32*>(&trace_data->frequency_data[0]);
712 :
713 E : return true;
714 E : }
715 :
716 : BasicBlockEntry::ThreadState* BasicBlockEntry::CreateThreadState(
717 E : IndexedFrequencyData* module_data) {
718 E : DCHECK(module_data != NULL);
719 E : CHECK_NE(IndexedFrequencyData::INVALID_DATA_TYPE, module_data->data_type);
720 :
721 : // Get a pointer to the extended indexed frequency data.
722 : BasicBlockIndexedFrequencyData* basicblock_data =
723 E : reinterpret_cast<BasicBlockIndexedFrequencyData*>(module_data);
724 :
725 : // Create the thread-local state for this thread. By default, just point the
726 : // counter array to the statically allocated fall-back area.
727 : ThreadState* state =
728 E : new ThreadState(module_data, &lock_, module_data->frequency_data);
729 E : CHECK(state != NULL);
730 :
731 : // Register the thread state with the thread state manager.
732 E : thread_state_manager_.Register(state);
733 :
734 : // Store the thread state in the TLS slot.
735 E : DCHECK_NE(TLS_OUT_OF_INDEXES, basicblock_data->tls_index);
736 E : ::TlsSetValue(basicblock_data->tls_index, state);
737 :
738 : // If we're not actually tracing, then we're done.
739 E : if (session_.IsDisabled())
740 E : return state;
741 :
742 E : uint32 slot = basicblock_data->fs_slot;
743 E : if (slot != 0) {
744 E : uint32 address = kUserApplicationSlot + 4 * (slot - 1);
745 : // Sanity check: The slot must be available (not used by an other tool).
746 E : DWORD content = __readfsdword(address);
747 E : CHECK_EQ(content, 0U);
748 : // Put the current state to the TLS slot.
749 E : __writefsdword(address, reinterpret_cast<unsigned long>(state));
750 : }
751 :
752 : // Nothing to allocate? We're done!
753 E : if (module_data->num_entries == 0) {
754 i : LOG(WARNING) << "Module contains no instrumented basic blocks.";
755 i : return state;
756 : }
757 :
758 : // Allocate space used by branch instrumentation.
759 E : if (module_data->data_type == ::common::IndexedFrequencyData::BRANCH)
760 E : state->AllocatePredictorCache();
761 :
762 : // Allocate buffer to which basic block id are pushed before being committed.
763 E : state->AllocateBasicBlockIdBuffer();
764 :
765 E : return state;
766 E : }
767 :
768 : inline BasicBlockEntry::ThreadState* BasicBlockEntry::GetThreadState(
769 E : IndexedFrequencyData* module_data) {
770 E : DCHECK(module_data != NULL);
771 E : ScopedLastErrorKeeper scoped_last_error_keeper;
772 :
773 : // Get a pointer to the extended indexed frequency data.
774 : BasicBlockIndexedFrequencyData* basicblock_data =
775 E : reinterpret_cast<BasicBlockIndexedFrequencyData*>(module_data);
776 :
777 E : DWORD tls_index = basicblock_data->tls_index;
778 E : DCHECK_NE(TLS_OUT_OF_INDEXES, tls_index);
779 E : ThreadState* state = static_cast<ThreadState*>(::TlsGetValue(tls_index));
780 E : return state;
781 E : }
782 :
783 : template<int S>
784 E : inline BasicBlockEntry::ThreadState* BasicBlockEntry::GetThreadStateSlot() {
785 E : uint32 address = kUserApplicationSlot + 4 * (S - 1);
786 E : DWORD content = __readfsdword(address);
787 E : return reinterpret_cast<BasicBlockEntry::ThreadState*>(content);
788 E : }
789 :
790 : void WINAPI BasicBlockEntry::IncrementIndexedFreqDataHook(
791 E : IncrementIndexedFreqDataFrame* entry_frame) {
792 E : DCHECK(entry_frame != NULL);
793 E : DCHECK(entry_frame->module_data != NULL);
794 : DCHECK_GT(entry_frame->module_data->num_entries,
795 E : entry_frame->index);
796 :
797 E : ThreadState* state = GetThreadState(entry_frame->module_data);
798 E : if (state == NULL) {
799 E : ScopedLastErrorKeeper scoped_last_error_keeper;
800 E : state = Instance()->CreateThreadState(entry_frame->module_data);
801 E : }
802 :
803 E : base::AutoLock scoped_lock(*state->trace_lock());
804 E : state->Increment(entry_frame->index);
805 E : }
806 :
807 : void WINAPI BasicBlockEntry::BranchEnterHook(
808 E : IncrementIndexedFreqDataFrame* entry_frame) {
809 E : DCHECK(entry_frame != NULL);
810 E : DCHECK(entry_frame->module_data != NULL);
811 : DCHECK_GT(entry_frame->module_data->num_entries,
812 E : entry_frame->index);
813 E : ThreadState* state = GetThreadState(entry_frame->module_data);
814 E : if (state == NULL) {
815 E : ScopedLastErrorKeeper scoped_last_error_keeper;
816 E : state = Instance()->CreateThreadState(entry_frame->module_data);
817 E : }
818 :
819 E : base::AutoLock scoped_lock(*state->trace_lock());
820 E : uint32 last_basic_block_id = state->last_basic_block_id();
821 E : state->Enter(entry_frame->index, last_basic_block_id);
822 E : state->reset_last_basic_block_id();
823 E : }
824 :
825 : void WINAPI BasicBlockEntry::BranchEnterBufferedHook(
826 E : IncrementIndexedFreqDataFrame* entry_frame) {
827 E : DCHECK(entry_frame != NULL);
828 E : DCHECK(entry_frame->module_data != NULL);
829 : DCHECK_GT(entry_frame->module_data->num_entries,
830 E : entry_frame->index);
831 E : ThreadState* state = GetThreadState(entry_frame->module_data);
832 E : if (state == NULL) {
833 E : ScopedLastErrorKeeper scoped_last_error_keeper;
834 E : state = Instance()->CreateThreadState(entry_frame->module_data);
835 E : }
836 :
837 E : if (state->Push(entry_frame->index)) {
838 E : base::AutoLock scoped_lock(*state->trace_lock());
839 E : state->Flush();
840 E : }
841 E : state->reset_last_basic_block_id();
842 E : }
843 :
844 : template<int S>
845 : void __fastcall BasicBlockEntry::FunctionEnterHookSlot(
846 E : IndexedFrequencyData* module_data) {
847 E : DCHECK(module_data != NULL);
848 :
849 : // Check if ThreadState is already created.
850 E : ThreadState* state = GetThreadStateSlot<S>();
851 E : if (state != NULL)
852 E : return;
853 :
854 : // Get or create the ThreadState.
855 E : state = GetThreadState(module_data);
856 E : if (state == NULL) {
857 E : ScopedLastErrorKeeper scoped_last_error_keeper;
858 E : state = Instance()->CreateThreadState(module_data);
859 E : }
860 E : }
861 :
862 : template<int S>
863 E : void __fastcall BasicBlockEntry::BranchEnterHookSlot(uint32 index) {
864 E : ThreadState* state = GetThreadStateSlot<S>();
865 E : if (state == NULL)
866 i : return;
867 :
868 E : base::AutoLock scoped_lock(*state->trace_lock());
869 E : uint32 last_basic_block_id = state->last_basic_block_id();
870 E : state->Enter(index, last_basic_block_id);
871 E : state->reset_last_basic_block_id();
872 E : }
873 :
874 : template<int S>
875 E : void __fastcall BasicBlockEntry::BranchEnterBufferedHookSlot(uint32 index) {
876 E : ThreadState* state = GetThreadStateSlot<S>();
877 E : if (state == NULL)
878 i : return;
879 :
880 E : if (state->Push(index)) {
881 E : base::AutoLock scoped_lock(*state->trace_lock());
882 E : state->Flush();
883 E : }
884 E : state->reset_last_basic_block_id();
885 E : }
886 :
887 : template<int S>
888 E : void __fastcall BasicBlockEntry::BranchExitHookSlot(uint32 index) {
889 E : ThreadState* state = GetThreadStateSlot<S>();
890 E : if (state == NULL)
891 i : return;
892 :
893 E : state->Leave(index);
894 E : }
895 :
896 : inline void WINAPI BasicBlockEntry::BranchExitHook(
897 E : IncrementIndexedFreqDataFrame* entry_frame) {
898 E : DCHECK(entry_frame != NULL);
899 E : DCHECK(entry_frame->module_data != NULL);
900 : DCHECK_GT(entry_frame->module_data->num_entries,
901 E : entry_frame->index);
902 :
903 E : ThreadState* state = GetThreadState(entry_frame->module_data);
904 E : if (state == NULL)
905 i : return;
906 :
907 E : state->Leave(entry_frame->index);
908 E : }
909 :
910 E : void WINAPI BasicBlockEntry::DllMainEntryHook(DllMainEntryFrame* entry_frame) {
911 E : ScopedLastErrorKeeper scoped_last_error_keeper;
912 E : DCHECK(entry_frame != NULL);
913 E : switch (entry_frame->reason) {
914 : case DLL_PROCESS_ATTACH:
915 E : Instance()->OnProcessAttach(entry_frame->module_data);
916 E : break;
917 :
918 : case DLL_THREAD_ATTACH:
919 : // We don't handle this event because the thread may never actually
920 : // call into an instrumented module, so we don't want to allocate
921 : // resources needlessly. Further, we won't get this event for thread
922 : // that were created before the agent was loaded. On first use of
923 : // an instrumented basic-block in a given thread, any thread specific
924 : // resources will be allocated.
925 E : break;
926 :
927 : case DLL_PROCESS_DETACH:
928 : case DLL_THREAD_DETACH:
929 E : Instance()->OnThreadDetach(entry_frame->module_data);
930 E : break;
931 :
932 : default:
933 i : NOTREACHED();
934 : }
935 E : }
936 :
937 E : void WINAPI BasicBlockEntry::ExeMainEntryHook(ExeMainEntryFrame* entry_frame) {
938 E : ScopedLastErrorKeeper scoped_last_error_keeper;
939 E : DCHECK(entry_frame != NULL);
940 E : Instance()->OnProcessAttach(entry_frame->module_data);
941 E : }
942 :
943 E : void BasicBlockEntry::RegisterModule(const void* addr) {
944 E : DCHECK(addr != NULL);
945 :
946 : // Allocate a segment for the module information.
947 E : trace::client::TraceFileSegment module_info_segment;
948 E : CHECK(session_.AllocateBuffer(&module_info_segment));
949 :
950 : // Log the module. This is required in order to associate basic-block
951 : // frequency with a module and PDB file during post-processing.
952 E : HMODULE module = GetModuleForAddr(addr);
953 E : CHECK(module != NULL);
954 E : CHECK(agent::common::LogModule(module, &session_, &module_info_segment));
955 :
956 : // Commit the module information.
957 E : CHECK(session_.ReturnBuffer(&module_info_segment));
958 E : }
959 :
960 : void BasicBlockEntry::RegisterFastPathSlot(
961 E : IndexedFrequencyData* module_data, unsigned int slot) {
962 E : DCHECK_NE(slot, 0U);
963 E : DCHECK_LE(slot, kNumSlots);
964 E : DCHECK(module_data != NULL);
965 :
966 : // The slot must not have been registered.
967 E : CHECK_EQ((1 << slot) & registered_slots_, 0U);
968 E : registered_slots_ |= (1 << slot);
969 E : }
970 :
971 : void BasicBlockEntry::UnregisterFastPathSlot(
972 : IndexedFrequencyData* module_data, unsigned int slot) {
973 : DCHECK_NE(slot, 0U);
974 : DCHECK_LE(slot, kNumSlots);
975 : DCHECK(module_data != NULL);
976 :
977 : // The slot must be registered.
978 : CHECK_NE((1 << slot) & registered_slots_, 0U);
979 : registered_slots_ &= ~(1 << slot);
980 : }
981 :
982 E : void BasicBlockEntry::OnProcessAttach(IndexedFrequencyData* module_data) {
983 E : DCHECK(module_data != NULL);
984 :
985 : // Get a pointer to the extended indexed frequency data.
986 : BasicBlockIndexedFrequencyData* basicblock_data =
987 E : reinterpret_cast<BasicBlockIndexedFrequencyData*>(module_data);
988 :
989 : // Exit if the magic number does not match.
990 E : CHECK_EQ(::common::kBasicBlockEntryAgentId, module_data->agent_id);
991 :
992 : // Exit if the version does not match.
993 : CHECK(DatatypeVersionIsValid(module_data->data_type,
994 : module_data->agent_id,
995 : module_data->version,
996 : module_data->frequency_size,
997 E : module_data->num_columns));
998 :
999 : // We allow for this hook to be called multiple times. We expect the first
1000 : // time to occur under the loader lock, so we don't need to worry about
1001 : // concurrency for this check.
1002 E : if (module_data->initialization_attempted)
1003 i : return;
1004 :
1005 : // Flag the module as initialized.
1006 E : module_data->initialization_attempted = 1U;
1007 :
1008 : // We expect this to be executed exactly once for each module.
1009 E : CHECK_EQ(TLS_OUT_OF_INDEXES, basicblock_data->tls_index);
1010 E : basicblock_data->tls_index = ::TlsAlloc();
1011 E : CHECK_NE(TLS_OUT_OF_INDEXES, basicblock_data->tls_index);
1012 :
1013 : // If there is a FS slot configured, register it.
1014 E : if (basicblock_data->fs_slot != 0)
1015 E : RegisterFastPathSlot(module_data, basicblock_data->fs_slot);
1016 :
1017 : // Register this module with the call_trace if the session is not disabled.
1018 : // Note that we expect module_data to be statically defined within the
1019 : // module of interest, so we can use its address to lookup the module.
1020 E : if (session_.IsDisabled()) {
1021 E : LOG(WARNING) << "Unable to initialize client as we are not tracing.";
1022 E : return;
1023 : }
1024 :
1025 E : if (!InitializeFrequencyData(module_data)) {
1026 i : LOG(ERROR) << "Failed to initialize frequency data.";
1027 i : return;
1028 : }
1029 :
1030 E : RegisterModule(module_data);
1031 :
1032 E : LOG(INFO) << "BBEntry client initialized.";
1033 E : }
1034 :
1035 E : void BasicBlockEntry::OnThreadDetach(IndexedFrequencyData* module_data) {
1036 E : DCHECK(module_data != NULL);
1037 E : DCHECK_EQ(1U, module_data->initialization_attempted);
1038 :
1039 : // Get a pointer to the extended indexed frequency data.
1040 : BasicBlockIndexedFrequencyData* basicblock_data =
1041 E : reinterpret_cast<BasicBlockIndexedFrequencyData*>(module_data);
1042 :
1043 E : DCHECK_NE(TLS_OUT_OF_INDEXES, basicblock_data->tls_index);
1044 :
1045 E : ThreadState* state = GetThreadState(module_data);
1046 E : if (state == NULL)
1047 E : return;
1048 :
1049 E : state->Flush();
1050 E : thread_state_manager_.MarkForDeath(state);
1051 E : }
1052 :
1053 : } // namespace basic_block_entry
1054 : } // namespace agent
|