1 : // Copyright 2012 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 : //
15 : // Implementation of the profiler DLL.
16 : #include "syzygy/agent/profiler/profiler.h"
17 :
18 : #include <windows.h>
19 : #include <algorithm>
20 :
21 : #include "base/at_exit.h"
22 : #include "base/bind.h"
23 : #include "base/command_line.h"
24 : #include "base/environment.h"
25 : #include "base/logging.h"
26 : #include "base/string_util.h"
27 : #include "base/utf_string_conversions.h"
28 : #include "base/memory/scoped_ptr.h"
29 : #include "base/win/pe_image.h"
30 : #include "base/win/scoped_handle.h"
31 : #include "syzygy/agent/common/dlist.h"
32 : #include "syzygy/agent/common/process_utils.h"
33 : #include "syzygy/agent/common/scoped_last_error_keeper.h"
34 : #include "syzygy/agent/profiler/return_thunk_factory.h"
35 : #include "syzygy/common/logging.h"
36 : #include "syzygy/trace/client/client_utils.h"
37 : #include "syzygy/trace/protocol/call_trace_defs.h"
38 :
39 : namespace {
40 :
41 : using agent::common::ScopedLastErrorKeeper;
42 :
43 : typedef std::pair<RetAddr, FuncAddr> InvocationKey;
44 :
45 : class HashInvocationKey {
46 : public:
47 : static const size_t bucket_size = 4;
48 : static const size_t min_buckets = 8;
49 :
50 E : size_t operator()(const InvocationKey& key) const {
51 : return reinterpret_cast<size_t>(key.first) ^
52 E : reinterpret_cast<size_t>(key.second);
53 E : }
54 :
55 E : bool operator()(const InvocationKey& a, const InvocationKey& b) const {
56 E : return a < b;
57 E : }
58 : };
59 :
60 : using agent::profiler::SymbolMap;
61 :
62 : struct InvocationValue {
63 : // This invocation entry's caller's dynamic symbol, if any.
64 : scoped_refptr<SymbolMap::Symbol> caller_symbol;
65 : // The last observed move count for caller_symbol.
66 : int32 caller_move_count;
67 :
68 : // This invocation entry's callee's dynamic symbol, if any.
69 : scoped_refptr<SymbolMap::Symbol> function_symbol;
70 : // The last observed move count for function_symbol.
71 : int32 function_move_count;
72 :
73 : // Points to the trace buffer entry for the respective function.
74 : InvocationInfo* info;
75 : };
76 :
77 : typedef base::hash_map<
78 : InvocationKey, InvocationValue, HashInvocationKey> InvocationMap;
79 :
80 :
81 : // The information on how to set the thread name comes from
82 : // a MSDN article: http://msdn2.microsoft.com/en-us/library/xcb2z8hs.aspx
83 : const DWORD kVCThreadNameException = 0x406D1388;
84 :
85 : typedef struct tagTHREADNAME_INFO {
86 : DWORD dwType; // Must be 0x1000.
87 : LPCSTR szName; // Pointer to name (in user addr space).
88 : DWORD dwThreadID; // Thread ID (-1=caller thread).
89 : DWORD dwFlags; // Reserved for future use, must be zero.
90 : } THREADNAME_INFO;
91 :
92 : } // namespace
93 :
94 : // See client.cc for a description of the unconventional
95 : // calling conventions for this function.
96 i : extern "C" void __declspec(naked) _indirect_penter() {
97 : __asm {
98 : // Stash volatile registers.
99 i : push eax
100 i : push edx
101 :
102 : // Get the current cycle time ASAP.
103 i : rdtsc
104 :
105 : // Save the value of eax for later, we need the register to stash the flags.
106 i : push ecx
107 i : mov ecx, eax
108 :
109 : // Save the low byte of the flags into AH.
110 i : lahf
111 : // Save the overflow flag into AL.
112 i : seto al
113 :
114 : // Stash the flags to stack.
115 i : push eax
116 :
117 : // Push the cycle time arg.
118 i : push edx
119 i : push ecx
120 :
121 : // Retrieve the original function address, pushed by our caller.
122 i : mov eax, DWORD PTR[esp + 0x18]
123 i : push eax
124 :
125 : // Calculate the position of the return address on stack, and
126 : // push it. This becomes the EntryFrame argument.
127 i : lea eax, DWORD PTR[esp + 0x20]
128 i : push eax
129 i : call agent::profiler::Profiler::FunctionEntryHook
130 :
131 : // Restore volatile registers.
132 i : pop eax
133 : // AL is set to 1 if the overflow flag was set before the call to
134 : // our hook, 0 otherwise. We add 0x7f to it so it'll restore the
135 : // flag.
136 i : add al, 0x7f
137 : // Restore the low byte of the flags.
138 i : sahf
139 :
140 i : pop ecx
141 i : pop edx
142 i : pop eax
143 :
144 : // Return to the address pushed by our caller.
145 i : ret
146 : }
147 : }
148 :
149 i : extern "C" void __declspec(naked) _indirect_penter_dllmain() {
150 : __asm {
151 : // Stash volatile registers.
152 i : push eax
153 i : push edx
154 :
155 : // Get the current cycle time ASAP.
156 i : rdtsc
157 :
158 : // Save the value of eax for later, we need the register to stash the flags.
159 i : push ecx
160 i : mov ecx, eax
161 :
162 : // Save the low byte of the flags into AH.
163 i : lahf
164 : // Save the overflow flag into AL.
165 i : seto al
166 :
167 : // Stash the flags to stack.
168 i : push eax
169 :
170 : // Push the cycle time arg.
171 i : push edx
172 i : push ecx
173 :
174 : // Retrieve the original function address, pushed by our caller.
175 i : mov eax, DWORD PTR[esp + 0x18]
176 i : push eax
177 :
178 : // Calculate the position of the return address on stack, and
179 : // push it. This becomes the EntryFrame argument.
180 i : lea eax, DWORD PTR[esp + 0x20]
181 i : push eax
182 i : call agent::profiler::Profiler::DllMainEntryHook
183 :
184 : // Restore volatile registers.
185 i : pop eax
186 : // AL is set to 1 if the overflow flag was set before the call to
187 : // our hook, 0 otherwise. We add 0x7f to it so it'll restore the
188 : // flag.
189 i : add al, 0x7f
190 : // Restore the low byte of the flags.
191 i : sahf
192 :
193 i : pop ecx
194 i : pop edx
195 i : pop eax
196 :
197 : // Return to the address pushed by our caller.
198 i : ret
199 : }
200 : }
201 :
202 : // On entry, pc_location should point to a location on our own stack.
203 : extern "C" uintptr_t __cdecl ResolveReturnAddressLocation(
204 E : uintptr_t pc_location) {
205 : using agent::profiler::Profiler;
206 : return reinterpret_cast<uintptr_t>(
207 : Profiler::instance().ResolveReturnAddressLocation(
208 E : reinterpret_cast<RetAddr*>(pc_location)));
209 E : }
210 :
211 : // This function needs to match the declaration of FunctionEntryHook in the V8
212 : // API. See http://v8.googlecode.com/svn/trunk/include/v8.h.
213 : extern "C" __declspec(naked) void __cdecl OnDynamicFunctionEntry(
214 i : uintptr_t function, uintptr_t return_addr_location) {
215 : __asm {
216 : // Grab the current time ASAP.
217 i : rdtsc
218 :
219 : // Push the cycle time arg.
220 i : push edx
221 i : push eax
222 :
223 : // Duplicate the function and return_addr_location arguments.
224 i : push DWORD PTR[esp + 0x10]
225 i : push DWORD PTR[esp + 0x10]
226 :
227 i : call agent::profiler::Profiler::OnV8FunctionEntry
228 :
229 i : ret
230 : }
231 : }
232 :
233 E : BOOL WINAPI DllMain(HMODULE instance, DWORD reason, LPVOID reserved) {
234 : using agent::profiler::Profiler;
235 :
236 : // Our AtExit manager required by base.
237 : static base::AtExitManager* at_exit = NULL;
238 :
239 E : switch (reason) {
240 : case DLL_PROCESS_ATTACH:
241 E : DCHECK(at_exit == NULL);
242 E : at_exit = new base::AtExitManager();
243 :
244 E : CommandLine::Init(0, NULL);
245 E : common::InitLoggingForDll(L"profiler");
246 E : break;
247 :
248 : case DLL_THREAD_DETACH:
249 E : Profiler::instance().OnThreadDetach();
250 E : break;
251 :
252 : case DLL_PROCESS_DETACH:
253 E : DCHECK(at_exit != NULL);
254 E : delete at_exit;
255 E : at_exit = NULL;
256 : break;
257 :
258 : default:
259 : break;
260 : }
261 :
262 E : return TRUE;
263 E : }
264 :
265 : void __cdecl AddDynamicSymbol(const void* address, size_t length,
266 E : const char* name, size_t name_len) {
267 : using agent::profiler::Profiler;
268 :
269 E : Profiler::instance().AddSymbol(address, length, name, name_len);
270 E : }
271 :
272 : void __cdecl MoveDynamicSymbol(const void* old_address,
273 E : const void* new_address) {
274 : using agent::profiler::Profiler;
275 :
276 E : Profiler::instance().MoveSymbol(old_address, new_address);
277 E : }
278 :
279 : namespace agent {
280 : namespace profiler {
281 :
282 : // All tracing runs through this object.
283 E : agent::profiler::Profiler Profiler::instance_;
284 :
285 : class Profiler::ThreadState
286 : : public ReturnThunkFactoryImpl<Profiler::ThreadState>,
287 : public agent::common::ThreadStateBase {
288 : public:
289 : explicit ThreadState(Profiler* profiler);
290 : ~ThreadState();
291 :
292 : // Logs @p module and all other modules in the process, then flushes
293 : // the current trace buffer.
294 : void LogAllModules(HMODULE module);
295 :
296 : // Logs @p module.
297 : void LogModule(HMODULE module);
298 :
299 : // Logs @p thread_name as the current thread's name.
300 : void LogThreadName(const base::StringPiece& thread_name);
301 :
302 : // Logs @p symbol into the trace.
303 : void LogSymbol(SymbolMap::Symbol* symbol);
304 :
305 : // Processes a single function entry.
306 : void OnFunctionEntry(EntryFrame* entry_frame,
307 : FuncAddr function,
308 : uint64 cycles);
309 :
310 : // Processes a single V8 function entry.
311 : void OnV8FunctionEntry(FuncAddr function,
312 : RetAddr* return_address_location,
313 : uint64 cycles);
314 :
315 : // @name Callback notification implementation.
316 : // @{
317 : virtual void OnPageAdded(const void* page) OVERRIDE;
318 : virtual void OnPageRemoved(const void* page) OVERRIDE;
319 : // @}
320 :
321 : // Function exit hook.
322 : void OnFunctionExit(const ThunkData* data, uint64 cycles_exit);
323 :
324 E : trace::client::TraceFileSegment* segment() { return &segment_; }
325 :
326 : private:
327 : friend class Profiler;
328 :
329 : void RecordInvocation(RetAddr caller,
330 : FuncAddr function,
331 : uint64 cycles);
332 :
333 : void UpdateOverhead(uint64 entry_cycles);
334 : InvocationInfo* AllocateInvocationInfo();
335 : void ClearCache();
336 : bool FlushSegment();
337 :
338 : // The profiler we're attached to.
339 : Profiler* profiler_;
340 :
341 : // We keep a running tally of the rough amount of wall clock cycles spent
342 : // inside the profiler. We then subtract the profiler's overhead from the
343 : // wall clock cycle timer on each measurement. This results in a timer that
344 : // measures time exclusive of profiling overhead.
345 : uint64 cycles_overhead_;
346 :
347 : // The invocations we've recorded in our buffer.
348 : InvocationMap invocations_;
349 :
350 : // The trace file segment we're recording to.
351 : trace::client::TraceFileSegment segment_;
352 :
353 : // The current batch record we're writing to, if any.
354 : TraceBatchInvocationInfo* batch_;
355 :
356 : // The set of modules we've logged.
357 : ModuleSet logged_modules_;
358 : };
359 :
360 : Profiler::ThreadState::ThreadState(Profiler* profiler)
361 : : profiler_(profiler),
362 : cycles_overhead_(0LL),
363 E : batch_(NULL) {
364 E : Initialize();
365 E : }
366 :
367 E : Profiler::ThreadState::~ThreadState() {
368 E : ClearCache();
369 :
370 : // If we have an outstanding buffer, let's deallocate it now.
371 E : if (segment_.write_ptr != NULL)
372 E : profiler_->session_.ReturnBuffer(&segment_);
373 :
374 E : Uninitialize();
375 E : }
376 :
377 E : void Profiler::ThreadState::LogAllModules(HMODULE module) {
378 : // Bail early if we're disabled.
379 E : if (profiler_->session_.IsDisabled())
380 E : return;
381 :
382 E : agent::common::ModuleVector modules;
383 E : agent::common::GetProcessModules(&modules);
384 :
385 : // Our module should be in the process modules.
386 E : DCHECK(std::find(modules.begin(), modules.end(), module) != modules.end());
387 :
388 E : for (size_t i = 0; i < modules.size(); ++i) {
389 E : DCHECK(modules[i] != NULL);
390 E : LogModule(modules[i]);
391 E : }
392 :
393 : // We need to flush module events right away, so that the module is
394 : // defined in the trace file before events using that module start to
395 : // occur (in another thread).
396 E : FlushSegment();
397 E : }
398 :
399 E : void Profiler::ThreadState::LogModule(HMODULE module) {
400 : // This may flush our buffer, so let's clear our cache.
401 E : ClearCache();
402 E : agent::common::LogModule(module, &profiler_->session_, &segment_);
403 E : }
404 :
405 : void Profiler::ThreadState::LogThreadName(
406 E : const base::StringPiece& thread_name) {
407 E : if (thread_name.empty())
408 i : return;
409 :
410 : // Make sure the event we're about to write will fit.
411 E : if (!segment_.CanAllocate(thread_name.size() + 1) || !FlushSegment()) {
412 : // Failed to allocate a new segment.
413 i : return;
414 : }
415 :
416 E : DCHECK(segment_.CanAllocate(thread_name.size() + 1));
417 E : batch_ = NULL;
418 :
419 : // Allocate a record in the log.
420 : TraceThreadNameInfo* thread_name_event =
421 : reinterpret_cast<TraceThreadNameInfo*>(
422 : segment_.AllocateTraceRecordImpl(
423 E : TRACE_THREAD_NAME, thread_name.size() + 1));
424 E : DCHECK(thread_name_event != NULL);
425 : base::strlcpy(thread_name_event->thread_name,
426 E : thread_name.data(), thread_name.size() + 1);
427 E : }
428 :
429 E : void Profiler::ThreadState::LogSymbol(SymbolMap::Symbol* symbol) {
430 E : DCHECK(symbol != NULL);
431 E : DCHECK_NE(0, symbol->id());
432 :
433 : size_t symbol_size =
434 E : FIELD_OFFSET(TraceDynamicSymbol, symbol_name) + symbol->name().size() + 1;
435 :
436 E : if (!segment_.CanAllocate(symbol_size) || !FlushSegment()) {
437 : // Failed to allocate the symbol record.
438 i : return;
439 : }
440 :
441 E : DCHECK(segment_.CanAllocate(symbol_size));
442 E : batch_ = NULL;
443 :
444 : // Allocate a record in the log.
445 : TraceDynamicSymbol* dynamic_symbol_event =
446 : reinterpret_cast<TraceDynamicSymbol*>(
447 : segment_.AllocateTraceRecordImpl(
448 E : TRACE_DYNAMIC_SYMBOL, symbol_size));
449 E : DCHECK(dynamic_symbol_event != NULL);
450 E : dynamic_symbol_event->symbol_id = symbol->id();
451 : base::strlcpy(dynamic_symbol_event->symbol_name,
452 E : symbol->name().data(), symbol->name().size() + 1);
453 E : }
454 :
455 : void Profiler::ThreadState::OnFunctionEntry(EntryFrame* entry_frame,
456 : FuncAddr function,
457 E : uint64 cycles) {
458 E : if (profiler_->session_.IsDisabled())
459 E : return;
460 :
461 : // Record the details of the entry.
462 : // Note that on tail-recursion and tail-call elimination, the caller recorded
463 : // here will be a thunk. We cater for this case on exit as best we can.
464 E : ThunkData* data = MakeThunk(entry_frame->retaddr);
465 E : DCHECK(data != NULL);
466 E : data->caller = entry_frame->retaddr;
467 E : data->function = function;
468 E : data->cycles_entry = cycles - cycles_overhead_;
469 :
470 E : entry_frame->retaddr = data->thunk;
471 :
472 E : UpdateOverhead(cycles);
473 E : }
474 :
475 : void Profiler::ThreadState::OnV8FunctionEntry(FuncAddr function,
476 : RetAddr* return_address_location,
477 E : uint64 cycles) {
478 E : if (profiler_->session_.IsDisabled())
479 i : return;
480 :
481 : // Record the details of the entry.
482 :
483 : // TODO(siggi): Note that we want to do different exit processing here,
484 : // as we know "function" is a dynamic symbol - it might be better to
485 : // record the fact here than to force a lookup on RecordInvocation.
486 :
487 : // Note that on tail-recursion and tail-call elimination, the caller recorded
488 : // here will be a thunk. We cater for this case on exit as best we can.
489 E : ThunkData* data = MakeThunk(*return_address_location);
490 E : DCHECK(data != NULL);
491 E : data->caller = *return_address_location;
492 E : data->function = function;
493 E : data->cycles_entry = cycles - cycles_overhead_;
494 :
495 E : *return_address_location = data->thunk;
496 :
497 E : UpdateOverhead(cycles);
498 E : }
499 :
500 : void Profiler::ThreadState::OnFunctionExit(const ThunkData* data,
501 E : uint64 cycles_exit) {
502 : // Calculate the number of cycles in the invocation, exclusive our overhead.
503 E : uint64 cycles_executed = cycles_exit - cycles_overhead_ - data->cycles_entry;
504 :
505 : // See if the return address resolves to a thunk, which indicates
506 : // tail recursion or tail call elimination. In that case we record the
507 : // calling function as caller, which isn't totally accurate as that'll
508 : // attribute the cost to the first line of the calling function. In the
509 : // absence of more information, it's the best we can do, however.
510 : // TODO(siggi): Move this into RecordInvocation, as we can elide the lookup
511 : // on a cache hit.
512 E : Thunk* ret_thunk = CastToThunk(data->caller);
513 E : if (ret_thunk == NULL) {
514 E : RecordInvocation(data->caller, data->function, cycles_executed);
515 E : } else {
516 E : ThunkData* ret_data = DataFromThunk(ret_thunk);
517 E : RecordInvocation(ret_data->function, data->function, cycles_executed);
518 : }
519 :
520 E : UpdateOverhead(cycles_exit);
521 E : }
522 :
523 E : void Profiler::ThreadState::OnPageAdded(const void* page) {
524 E : profiler_->OnPageAdded(page);
525 E : }
526 :
527 E : void Profiler::ThreadState::OnPageRemoved(const void* page) {
528 E : profiler_->OnPageRemoved(page);
529 E : }
530 :
531 : void Profiler::ThreadState::RecordInvocation(RetAddr caller,
532 : FuncAddr function,
533 E : uint64 duration_cycles) {
534 : // See whether we've already recorded an entry for this function.
535 E : InvocationKey key(caller, function);
536 E : InvocationMap::iterator it = invocations_.find(key);
537 E : if (it != invocations_.end()) {
538 : // Yup, we already have an entry, validate it.
539 E : InvocationValue& value = it->second;
540 :
541 : if ((value.caller_symbol == NULL ||
542 : value.caller_symbol->move_count() == value.caller_move_count) &&
543 : (value.function_symbol == NULL ||
544 E : value.function_symbol->move_count() == value.function_move_count)) {
545 : // The entry is still good, tally the new data.
546 E : ++(value.info->num_calls);
547 E : value.info->cycles_sum += duration_cycles;
548 E : if (duration_cycles < value.info->cycles_min) {
549 E : value.info->cycles_min = duration_cycles;
550 i : } else if (duration_cycles > value.info->cycles_max) {
551 i : value.info->cycles_max = duration_cycles;
552 : }
553 :
554 : // Early out on success.
555 E : return;
556 i : } else {
557 : // The entry is not valid any more, discard it.
558 E : DCHECK(value.caller_symbol != NULL || value.function_symbol != NULL);
559 :
560 E : invocations_.erase(it);
561 : }
562 : }
563 E : DCHECK(invocations_.find(key) == invocations_.end());
564 :
565 : // We don't have an entry, allocate a new one for this invocation.
566 : // The code below may touch last error.
567 E : ScopedLastErrorKeeper keep_last_error;
568 :
569 : scoped_refptr<SymbolMap::Symbol> caller_symbol =
570 E : profiler_->symbol_map_.FindSymbol(caller);
571 :
572 : // TODO(siggi): This can perhaps be optimized by keeping track of which
573 : // entry hook was invoked. This will however require setting an extra
574 : // bool on every entry, so will require measurement to see whether it's
575 : // a win.
576 : scoped_refptr<SymbolMap::Symbol> function_symbol =
577 E : profiler_->symbol_map_.FindSymbol(function);
578 :
579 : // Trace the symbols if this is the first time either one is observed.
580 E : if (caller_symbol != NULL && caller_symbol->EnsureHasId()) {
581 : // TODO(siggi): It might lead to worst-case behavior to log symbols into
582 : // the same trace buffer as we store invocations, as we're likely to
583 : // alternate symbols and single-entry invocation batches. Fixme.
584 E : LogSymbol(caller_symbol);
585 : }
586 E : if (function_symbol != NULL && function_symbol->EnsureHasId()) {
587 : // TODO(siggi): See above.
588 E : LogSymbol(function_symbol);
589 : }
590 :
591 E : InvocationInfo* info = AllocateInvocationInfo();
592 E : if (info != NULL) {
593 E : InvocationValue& value = invocations_[key];
594 E : value.info = info;
595 E : value.caller_symbol = caller_symbol;
596 E : if (caller_symbol != NULL)
597 E : value.caller_move_count = caller_symbol->move_count();
598 E : else
599 E : value.caller_move_count = 0;
600 :
601 E : value.function_symbol = function_symbol;
602 E : if (function_symbol != NULL)
603 E : value.function_move_count = function_symbol->move_count();
604 E : else
605 E : value.function_move_count = 0;
606 :
607 E : if (function_symbol == NULL) {
608 : // We're not in a dynamic function, record the (conventional) function.
609 E : info->function = function;
610 E : info->flags = 0;
611 E : } else {
612 : // We're in a dynamic function symbol, record the details.
613 E : DCHECK_NE(function_symbol->id(), 0);
614 :
615 E : info->function_symbol_id = function_symbol->id();
616 E : info->flags = kFunctionIsSymbol;
617 : }
618 :
619 E : if (caller_symbol == NULL) {
620 : // We're not in a dynamic caller_symbol, record the (conventional) caller.
621 E : info->caller = caller;
622 E : info->caller_offset = 0;
623 E : } else {
624 : // We're in a dynamic caller_symbol, record the details.
625 E : DCHECK_NE(caller_symbol->id(), 0);
626 :
627 E : info->caller_symbol_id = caller_symbol->id();
628 E : info->flags |= kCallerIsSymbol;
629 : info->caller_offset = reinterpret_cast<const uint8*>(caller) -
630 E : reinterpret_cast<const uint8*>(caller_symbol->address());
631 : }
632 :
633 E : info->num_calls = 1;
634 E : info->cycles_min = info->cycles_max = info->cycles_sum = duration_cycles;
635 : }
636 E : }
637 :
638 E : void Profiler::ThreadState::UpdateOverhead(uint64 entry_cycles) {
639 : // TODO(siggi): Measure the fixed overhead on setup,
640 : // then add it on every update.
641 E : cycles_overhead_ += (__rdtsc() - entry_cycles);
642 E : }
643 :
644 E : InvocationInfo* Profiler::ThreadState::AllocateInvocationInfo() {
645 : // This is kind of self-evident for the moment, as an invocation info batch
646 : // contains at least one invocation info as currently declared.
647 : // If this fails, please reconsider your implementation, or else revisit
648 : // the allocation code below.
649 : COMPILE_ASSERT(sizeof(TraceBatchInvocationInfo) >= sizeof(InvocationInfo),
650 : invocation_info_batch_must_be_larger_than_invocation_info);
651 :
652 : // Do we have a record that we can grow?
653 E : if (batch_ != NULL && segment_.CanAllocateRaw(sizeof(InvocationInfo))) {
654 : InvocationInfo* invocation_info =
655 E : reinterpret_cast<InvocationInfo*>(segment_.write_ptr);
656 E : RecordPrefix* prefix = trace::client::GetRecordPrefix(batch_);
657 E : prefix->size += sizeof(InvocationInfo);
658 :
659 : // Update the book-keeping.
660 E : segment_.write_ptr += sizeof(InvocationInfo);
661 E : segment_.header->segment_length += sizeof(InvocationInfo);
662 :
663 E : return invocation_info;
664 : }
665 :
666 : // Do we need to scarf a new buffer?
667 : if (!segment_.CanAllocate(sizeof(TraceBatchInvocationInfo)) &&
668 E : !FlushSegment()) {
669 : // We failed to allocate a new buffer.
670 i : return NULL;
671 : }
672 :
673 E : DCHECK(segment_.header != NULL);
674 :
675 E : batch_ = segment_.AllocateTraceRecord<TraceBatchInvocationInfo>();
676 E : return &batch_->invocations[0];
677 E : }
678 :
679 E : bool Profiler::ThreadState::FlushSegment() {
680 E : ClearCache();
681 E : return profiler_->session_.ExchangeBuffer(&segment_);
682 E : }
683 :
684 E : void Profiler::ThreadState::ClearCache() {
685 E : batch_ = NULL;
686 E : invocations_.clear();
687 E : }
688 :
689 E : void Profiler::OnThreadDetach() {
690 E : ThreadState* state = GetThreadState();
691 E : if (state != NULL)
692 E : thread_state_manager_.MarkForDeath(state);
693 E : }
694 :
695 E : RetAddr* Profiler::ResolveReturnAddressLocation(RetAddr* pc_location) {
696 E : base::AutoLock lock(lock_);
697 :
698 : // In case of tail-call and tail recursion elimination, we can get chained
699 : // thunks, so we loop around here until we resolve to a non-thunk.
700 E : while (true) {
701 : // See whether the return address is one of our thunks.
702 E : RetAddr ret_addr = *pc_location;
703 :
704 : // Compute the page this return address lives in.
705 : const void* page = reinterpret_cast<const void*>(
706 E : reinterpret_cast<uintptr_t>(ret_addr) & ~0xFFF);
707 E : if (!std::binary_search(pages_.begin(), pages_.end(), page))
708 E : return pc_location;
709 :
710 : // It's one of our own, redirect to the thunk's stash.
711 : ThreadState::Thunk* thunk =
712 E : reinterpret_cast<ThreadState::Thunk*>(const_cast<void*>(ret_addr));
713 :
714 E : ThreadState::ThunkData* data = ThreadState::DataFromThunk(thunk);
715 :
716 : // Update the PC location and go around again, in case this
717 : // thunk links to another one.
718 E : pc_location = &data->caller;
719 E : }
720 E : }
721 :
722 : void Profiler::OnModuleEntry(EntryFrame* entry_frame,
723 : FuncAddr function,
724 E : uint64 cycles) {
725 : // The function invoked has a DllMain-like signature.
726 : // Get the module and reason from its invocation record.
727 E : HMODULE module = reinterpret_cast<HMODULE>(entry_frame->args[0]);
728 E : DWORD reason = entry_frame->args[1];
729 :
730 : // Only log module additions.
731 E : bool should_log_module = false;
732 E : switch (reason) {
733 : case DLL_PROCESS_ATTACH:
734 : case DLL_THREAD_ATTACH:
735 E : should_log_module = true;
736 E : break;
737 :
738 : case DLL_PROCESS_DETACH:
739 : case DLL_THREAD_DETACH:
740 E : break;
741 :
742 : default:
743 i : LOG(WARNING) << "Unrecognized module event: " << reason << ".";
744 : break;
745 : }
746 :
747 : // Make sure we only log each module once per process.
748 E : bool is_new_module = false;
749 E : if (should_log_module) {
750 E : base::AutoLock lock(lock_);
751 :
752 E : is_new_module = logged_modules_.insert(module).second;
753 E : }
754 :
755 E : ThreadState* data = GetOrAllocateThreadState();
756 E : DCHECK(data != NULL);
757 E : if (data == NULL)
758 i : return;
759 :
760 E : if (is_new_module) {
761 : // Delegate the logging to our per-thread data.
762 E : data->LogAllModules(module);
763 : }
764 :
765 : // Handle the function entry.
766 E : data->OnFunctionEntry(entry_frame, function, cycles);
767 E : }
768 :
769 E : void Profiler::OnPageAdded(const void* page) {
770 E : base::AutoLock lock(lock_);
771 :
772 : PageVector::iterator it =
773 E : std::lower_bound(pages_.begin(), pages_.end(), page);
774 E : DCHECK(it == pages_.end() || *it != page);
775 E : pages_.insert(it, page);
776 E : }
777 :
778 E : void Profiler::OnPageRemoved(const void* page) {
779 E : base::AutoLock lock(lock_);
780 :
781 : PageVector::iterator it =
782 E : std::lower_bound(pages_.begin(), pages_.end(), page);
783 : // The page must be in our list.
784 E : DCHECK(it != pages_.end());
785 E : DCHECK_EQ(page, *it);
786 E : pages_.erase(it);
787 E : }
788 :
789 E : void Profiler::OnThreadName(const base::StringPiece& thread_name) {
790 E : ThreadState* state = GetOrAllocateThreadState();
791 E : if (state != NULL)
792 E : state->LogThreadName(thread_name);
793 E : }
794 :
795 E : LONG CALLBACK Profiler::ExceptionHandler(EXCEPTION_POINTERS* ex_info) {
796 : // Log the thread if this is the VC thread name exception.
797 : if (ex_info->ExceptionRecord->ExceptionCode == kVCThreadNameException &&
798 : ex_info->ExceptionRecord->NumberParameters ==
799 E : sizeof(THREADNAME_INFO)/sizeof(DWORD)) {
800 : const THREADNAME_INFO* info =
801 : reinterpret_cast<const THREADNAME_INFO*>(
802 E : &ex_info->ExceptionRecord->ExceptionInformation);
803 :
804 E : if (info->dwType == 0x1000) {
805 E : instance_.OnThreadName(info->szName);
806 E : } else {
807 i : LOG(WARNING) << "Unrecognized event type " << info->dwType;
808 : }
809 : }
810 :
811 E : return EXCEPTION_CONTINUE_SEARCH;
812 E : }
813 :
814 : void Profiler::OnDllEvent(agent::common::DllNotificationWatcher::EventType type,
815 : HMODULE module,
816 : size_t module_size,
817 : const base::StringPiece16& dll_path,
818 E : const base::StringPiece16& dll_base_name) {
819 E : if (type == agent::common::DllNotificationWatcher::kDllLoaded) {
820 : // Bail early if there's no session.
821 E : if (session_.IsDisabled())
822 i : return;
823 :
824 : // Trace the load event.
825 E : ThreadState* state = GetOrAllocateThreadState();
826 E : if (state != NULL)
827 E : state->LogModule(module);
828 : }
829 E : }
830 :
831 E : Profiler::Profiler() : handler_registration_(NULL) {
832 : // Create our RPC session and allocate our initial trace segment on creation,
833 : // aka at load time.
834 E : ThreadState* data = CreateFirstThreadStateAndSession();
835 E : CHECK(data != NULL) << "Failed to allocate thread local state.";
836 :
837 E : handler_registration_ = ::AddVectoredExceptionHandler(TRUE, ExceptionHandler);
838 :
839 E : dll_watcher_.Init(base::Bind(&Profiler::OnDllEvent, base::Unretained(this)));
840 E : }
841 :
842 E : Profiler::~Profiler() {
843 : // Since the DLL notification callback depends on thread and session state,
844 : // let's tear it down first. Note that this grabs the loader's lock,
845 : // so there's deadlock potential here, but no other thread will get a DLL
846 : // notification after this call returns.
847 E : dll_watcher_.Reset();
848 :
849 : // Typically, this will happen on the last thread in the process. We must
850 : // explicitly clean up this thread's state as it will otherwise leak.
851 E : FreeThreadState();
852 :
853 : // Unregister our VEH.
854 E : if (handler_registration_ != NULL) {
855 E : ::RemoveVectoredExceptionHandler(handler_registration_);
856 E : handler_registration_ = NULL;
857 : }
858 E : }
859 :
860 E : Profiler::ThreadState* Profiler::CreateFirstThreadStateAndSession() {
861 E : Profiler::ThreadState* data = GetOrAllocateThreadStateImpl();
862 :
863 : // Create the session (and allocate the first segment).
864 E : trace::client::InitializeRpcSession(&session_, data->segment());
865 :
866 E : return data;
867 E : }
868 :
869 E : Profiler::ThreadState* Profiler::GetOrAllocateThreadState() {
870 E : Profiler::ThreadState* data = GetOrAllocateThreadStateImpl();
871 E : if (!data->segment()->write_ptr && session_.IsTracing()) {
872 E : session_.AllocateBuffer(data->segment());
873 : }
874 E : return data;
875 E : }
876 :
877 E : Profiler::ThreadState* Profiler::GetOrAllocateThreadStateImpl() {
878 E : ThreadState *data = tls_.Get();
879 E : if (data != NULL)
880 E : return data;
881 :
882 E : data = new ThreadState(this);
883 E : if (data == NULL) {
884 i : LOG(ERROR) << "Unable to allocate per-thread data";
885 i : return NULL;
886 : }
887 :
888 E : thread_state_manager_.Register(data);
889 E : tls_.Set(data);
890 :
891 E : return data;
892 E : }
893 :
894 E : Profiler::ThreadState* Profiler::GetThreadState() const {
895 E : return tls_.Get();
896 E : }
897 :
898 E : void Profiler::FreeThreadState() {
899 E : ThreadState *data = GetThreadState();
900 E : if (data != NULL) {
901 E : tls_.Set(NULL);
902 E : thread_state_manager_.Unregister(data);
903 E : delete data;
904 : }
905 E : }
906 :
907 : void WINAPI Profiler::DllMainEntryHook(EntryFrame* entry_frame,
908 : FuncAddr function,
909 E : uint64 cycles) {
910 E : ScopedLastErrorKeeper keep_last_error;
911 :
912 E : instance_.OnModuleEntry(entry_frame, function, cycles);
913 E : }
914 :
915 : void WINAPI Profiler::FunctionEntryHook(EntryFrame* entry_frame,
916 : FuncAddr function,
917 E : uint64 cycles) {
918 E : ScopedLastErrorKeeper keep_last_error;
919 :
920 E : ThreadState* data = instance_.GetOrAllocateThreadState();
921 E : DCHECK(data != NULL);
922 E : if (data != NULL)
923 E : data->OnFunctionEntry(entry_frame, function, cycles);
924 E : }
925 :
926 : void WINAPI Profiler::OnV8FunctionEntry(FuncAddr function,
927 : RetAddr* return_addr_location,
928 E : uint64 cycles) {
929 E : ScopedLastErrorKeeper keep_last_error;
930 :
931 E : ThreadState* data = instance_.GetOrAllocateThreadState();
932 E : if (data != NULL)
933 E : data->OnV8FunctionEntry(function, return_addr_location, cycles);
934 E : }
935 :
936 : void Profiler::AddSymbol(const void* address, size_t length,
937 E : const char* name, size_t name_len) {
938 E : symbol_map_.AddSymbol(address, length, base::StringPiece(name, name_len));
939 E : }
940 :
941 E : void Profiler::MoveSymbol(const void* old_address, const void* new_address) {
942 E : symbol_map_.MoveSymbol(old_address, new_address);
943 E : }
944 :
945 : } // namespace profiler
946 : } // namespace agent
|