1 : // Copyright 2012 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 : //
15 : // Implementation of the profiler DLL.
16 : #include "syzygy/agent/profiler/profiler.h"
17 :
18 : #include <windows.h>
19 : #include <algorithm>
20 :
21 : #include "base/at_exit.h"
22 : #include "base/bind.h"
23 : #include "base/command_line.h"
24 : #include "base/environment.h"
25 : #include "base/logging.h"
26 : #include "base/memory/scoped_ptr.h"
27 : #include "base/strings/string_util.h"
28 : #include "base/strings/utf_string_conversions.h"
29 : #include "base/win/pe_image.h"
30 : #include "base/win/scoped_handle.h"
31 : #include "syzygy/agent/common/agent.h"
32 : #include "syzygy/agent/common/dlist.h"
33 : #include "syzygy/agent/common/process_utils.h"
34 : #include "syzygy/agent/common/scoped_last_error_keeper.h"
35 : #include "syzygy/agent/profiler/return_thunk_factory.h"
36 : #include "syzygy/common/logging.h"
37 : #include "syzygy/common/process_utils.h"
38 : #include "syzygy/trace/client/client_utils.h"
39 : #include "syzygy/trace/protocol/call_trace_defs.h"
40 :
41 : namespace {
42 :
43 : using agent::common::ScopedLastErrorKeeper;
44 :
45 : typedef std::pair<RetAddr, FuncAddr> InvocationKey;
46 :
47 : class HashInvocationKey {
48 : public:
49 : static const size_t bucket_size = 4;
50 : static const size_t min_buckets = 8;
51 :
52 E : size_t operator()(const InvocationKey& key) const {
53 : return reinterpret_cast<size_t>(key.first) ^
54 E : reinterpret_cast<size_t>(key.second);
55 E : }
56 :
57 : bool operator()(const InvocationKey& a, const InvocationKey& b) const {
58 : return a < b;
59 : }
60 : };
61 :
62 : using agent::profiler::SymbolMap;
63 :
64 : struct InvocationValue {
65 : // This invocation entry's caller's dynamic symbol, if any.
66 : scoped_refptr<SymbolMap::Symbol> caller_symbol;
67 : // The last observed move count for caller_symbol.
68 : int32 caller_move_count;
69 :
70 : // This invocation entry's callee's dynamic symbol, if any.
71 : scoped_refptr<SymbolMap::Symbol> function_symbol;
72 : // The last observed move count for function_symbol.
73 : int32 function_move_count;
74 :
75 : // Points to the trace buffer entry for the respective function.
76 : InvocationInfo* info;
77 : };
78 :
79 : typedef base::hash_map<
80 : InvocationKey, InvocationValue, HashInvocationKey> InvocationMap;
81 :
82 :
83 : // The information on how to set the thread name comes from
84 : // a MSDN article: http://msdn2.microsoft.com/en-us/library/xcb2z8hs.aspx
85 : const DWORD kVCThreadNameException = 0x406D1388;
86 :
87 : typedef struct tagTHREADNAME_INFO {
88 : DWORD dwType; // Must be 0x1000.
89 : LPCSTR szName; // Pointer to name (in user addr space).
90 : DWORD dwThreadID; // Thread ID (-1=caller thread).
91 : DWORD dwFlags; // Reserved for future use, must be zero.
92 : } THREADNAME_INFO;
93 :
94 : } // namespace
95 :
96 : // See client.cc for a description of the unconventional
97 : // calling conventions for this function.
98 i : extern "C" void __declspec(naked) _indirect_penter() {
99 : __asm {
100 : // Stash volatile registers.
101 i : push eax
102 i : push edx
103 :
104 : // Get the current cycle time ASAP.
105 i : rdtsc
106 :
107 : // Save the value of eax for later, we need the register to stash the flags.
108 i : push ecx
109 i : mov ecx, eax
110 :
111 : // Save the low byte of the flags into AH.
112 i : lahf
113 : // Save the overflow flag into AL.
114 i : seto al
115 :
116 : // Stash the flags to stack.
117 i : push eax
118 :
119 : // Push the cycle time arg.
120 i : push edx
121 i : push ecx
122 :
123 : // Retrieve the original function address, pushed by our caller.
124 i : mov eax, DWORD PTR[esp + 0x18]
125 i : push eax
126 :
127 : // Calculate the position of the return address on stack, and
128 : // push it. This becomes the EntryFrame argument.
129 i : lea eax, DWORD PTR[esp + 0x20]
130 i : push eax
131 i : call agent::profiler::Profiler::FunctionEntryHook
132 :
133 : // Restore volatile registers.
134 i : pop eax
135 : // AL is set to 1 if the overflow flag was set before the call to
136 : // our hook, 0 otherwise. We add 0x7f to it so it'll restore the
137 : // flag.
138 i : add al, 0x7f
139 : // Restore the low byte of the flags.
140 i : sahf
141 :
142 i : pop ecx
143 i : pop edx
144 i : pop eax
145 :
146 : // Return to the address pushed by our caller.
147 i : ret
148 : }
149 : }
150 :
151 i : extern "C" void __declspec(naked) _indirect_penter_dllmain() {
152 : __asm {
153 : // Stash volatile registers.
154 i : push eax
155 i : push edx
156 :
157 : // Get the current cycle time ASAP.
158 i : rdtsc
159 :
160 : // Save the value of eax for later, we need the register to stash the flags.
161 i : push ecx
162 i : mov ecx, eax
163 :
164 : // Save the low byte of the flags into AH.
165 i : lahf
166 : // Save the overflow flag into AL.
167 i : seto al
168 :
169 : // Stash the flags to stack.
170 i : push eax
171 :
172 : // Push the cycle time arg.
173 i : push edx
174 i : push ecx
175 :
176 : // Retrieve the original function address, pushed by our caller.
177 i : mov eax, DWORD PTR[esp + 0x18]
178 i : push eax
179 :
180 : // Calculate the position of the return address on stack, and
181 : // push it. This becomes the EntryFrame argument.
182 i : lea eax, DWORD PTR[esp + 0x20]
183 i : push eax
184 i : call agent::profiler::Profiler::DllMainEntryHook
185 :
186 : // Restore volatile registers.
187 i : pop eax
188 : // AL is set to 1 if the overflow flag was set before the call to
189 : // our hook, 0 otherwise. We add 0x7f to it so it'll restore the
190 : // flag.
191 i : add al, 0x7f
192 : // Restore the low byte of the flags.
193 i : sahf
194 :
195 i : pop ecx
196 i : pop edx
197 i : pop eax
198 :
199 : // Return to the address pushed by our caller.
200 i : ret
201 : }
202 : }
203 :
204 : // On entry, pc_location should point to a location on our own stack.
205 : extern "C" uintptr_t __cdecl ResolveReturnAddressLocation(
206 E : uintptr_t pc_location) {
207 : using agent::profiler::Profiler;
208 : return reinterpret_cast<uintptr_t>(
209 : Profiler::instance().ResolveReturnAddressLocation(
210 E : reinterpret_cast<RetAddr*>(pc_location)));
211 E : }
212 :
213 : // This function needs to match the declaration of FunctionEntryHook in the V8
214 : // API. See http://v8.googlecode.com/svn/trunk/include/v8.h.
215 : extern "C" __declspec(naked) void __cdecl OnDynamicFunctionEntry(
216 i : uintptr_t function, uintptr_t return_addr_location) {
217 : __asm {
218 : // Grab the current time ASAP.
219 i : rdtsc
220 :
221 : // Push the cycle time arg.
222 i : push edx
223 i : push eax
224 :
225 : // Duplicate the function and return_addr_location arguments.
226 i : push DWORD PTR[esp + 0x10]
227 i : push DWORD PTR[esp + 0x10]
228 :
229 i : call agent::profiler::Profiler::OnV8FunctionEntry
230 :
231 i : ret
232 : }
233 : }
234 :
235 E : BOOL WINAPI DllMain(HMODULE instance, DWORD reason, LPVOID reserved) {
236 : using agent::profiler::Profiler;
237 :
238 : // Our AtExit manager required by base.
239 : static base::AtExitManager* at_exit = NULL;
240 :
241 E : agent::common::InitializeCrt();
242 :
243 E : switch (reason) {
244 : case DLL_PROCESS_ATTACH:
245 E : DCHECK(at_exit == NULL);
246 E : at_exit = new base::AtExitManager();
247 :
248 E : base::CommandLine::Init(0, NULL);
249 E : common::InitLoggingForDll(L"profiler");
250 E : break;
251 :
252 : case DLL_THREAD_DETACH:
253 E : Profiler::instance().OnThreadDetach();
254 E : break;
255 :
256 : case DLL_PROCESS_DETACH:
257 E : base::CommandLine::Reset();
258 E : DCHECK(at_exit != NULL);
259 E : delete at_exit;
260 E : at_exit = NULL;
261 : break;
262 :
263 : default:
264 : break;
265 : }
266 :
267 E : return TRUE;
268 E : }
269 :
270 : void __cdecl AddDynamicSymbol(const void* address, size_t length,
271 E : const char* name, size_t name_len) {
272 : using agent::profiler::Profiler;
273 :
274 E : Profiler::instance().AddSymbol(address, length, name, name_len);
275 E : }
276 :
277 : void __cdecl MoveDynamicSymbol(const void* old_address,
278 E : const void* new_address) {
279 : using agent::profiler::Profiler;
280 :
281 E : Profiler::instance().MoveSymbol(old_address, new_address);
282 E : }
283 :
284 : namespace agent {
285 : namespace profiler {
286 :
287 : // All tracing runs through this object.
288 E : agent::profiler::Profiler Profiler::instance_;
289 :
290 : class Profiler::ThreadState
291 : : public ReturnThunkFactoryImpl<Profiler::ThreadState>,
292 : public agent::common::ThreadStateBase {
293 : public:
294 : explicit ThreadState(Profiler* profiler);
295 : ~ThreadState();
296 :
297 : // Logs @p module and all other modules in the process, then flushes
298 : // the current trace buffer.
299 : void LogAllModules(HMODULE module);
300 :
301 : // Logs @p module.
302 : void LogModule(HMODULE module);
303 :
304 : // Logs @p thread_name as the current thread's name.
305 : void LogThreadName(const base::StringPiece& thread_name);
306 :
307 : // Logs @p symbol into the trace.
308 : void LogSymbol(SymbolMap::Symbol* symbol);
309 :
310 : // Processes a single function entry.
311 : void OnFunctionEntry(EntryFrame* entry_frame,
312 : FuncAddr function,
313 : uint64 cycles);
314 :
315 : // Processes a single V8 function entry.
316 : void OnV8FunctionEntry(FuncAddr function,
317 : RetAddr* return_address_location,
318 : uint64 cycles);
319 :
320 : // @name Callback notification implementation.
321 : // @{
322 : virtual void OnPageAdded(const void* page) override;
323 : virtual void OnPageRemoved(const void* page) override;
324 : // @}
325 :
326 : // Function exit hook.
327 : void OnFunctionExit(const ThunkData* data, uint64 cycles_exit);
328 :
329 E : trace::client::TraceFileSegment* segment() { return &segment_; }
330 :
331 : private:
332 : friend class Profiler;
333 :
334 : void RecordInvocation(RetAddr caller,
335 : FuncAddr function,
336 : uint64 cycles);
337 :
338 : void UpdateOverhead(uint64 entry_cycles);
339 : InvocationInfo* AllocateInvocationInfo();
340 : void ClearCache();
341 : bool FlushSegment();
342 :
343 : // The profiler we're attached to.
344 : Profiler* profiler_;
345 :
346 : // We keep a running tally of the rough amount of wall clock cycles spent
347 : // inside the profiler. We then subtract the profiler's overhead from the
348 : // wall clock cycle timer on each measurement. This results in a timer that
349 : // measures time exclusive of profiling overhead.
350 : uint64 cycles_overhead_;
351 :
352 : // The invocations we've recorded in our buffer.
353 : InvocationMap invocations_;
354 :
355 : // The trace file segment we're recording to.
356 : trace::client::TraceFileSegment segment_;
357 :
358 : // The current batch record we're writing to, if any.
359 : TraceBatchInvocationInfo* batch_;
360 :
361 : // The set of modules we've logged.
362 : ModuleSet logged_modules_;
363 : };
364 :
365 : Profiler::ThreadState::ThreadState(Profiler* profiler)
366 : : profiler_(profiler),
367 : cycles_overhead_(0LL),
368 E : batch_(NULL) {
369 E : Initialize();
370 E : }
371 :
372 E : Profiler::ThreadState::~ThreadState() {
373 E : ClearCache();
374 :
375 : // If we have an outstanding buffer, let's deallocate it now.
376 E : if (segment_.write_ptr != NULL)
377 E : profiler_->session_.ReturnBuffer(&segment_);
378 :
379 E : Uninitialize();
380 E : }
381 :
382 E : void Profiler::ThreadState::LogAllModules(HMODULE module) {
383 : // Bail early if we're disabled.
384 E : if (profiler_->session_.IsDisabled())
385 E : return;
386 :
387 E : ::common::ModuleVector modules;
388 E : ::common::GetCurrentProcessModules(&modules);
389 :
390 : // Our module should be in the process modules.
391 E : DCHECK(std::find(modules.begin(), modules.end(), module) != modules.end());
392 :
393 E : for (size_t i = 0; i < modules.size(); ++i) {
394 E : DCHECK(modules[i] != NULL);
395 E : LogModule(modules[i]);
396 E : }
397 :
398 : // We need to flush module events right away, so that the module is
399 : // defined in the trace file before events using that module start to
400 : // occur (in another thread).
401 E : FlushSegment();
402 E : }
403 :
404 E : void Profiler::ThreadState::LogModule(HMODULE module) {
405 : // This may flush our buffer, so let's clear our cache.
406 E : ClearCache();
407 E : agent::common::LogModule(module, &profiler_->session_, &segment_);
408 E : }
409 :
410 : void Profiler::ThreadState::LogThreadName(
411 E : const base::StringPiece& thread_name) {
412 E : if (thread_name.empty())
413 i : return;
414 :
415 : // Make sure the event we're about to write will fit.
416 E : if (!segment_.CanAllocate(thread_name.size() + 1) || !FlushSegment()) {
417 : // Failed to allocate a new segment.
418 i : return;
419 : }
420 :
421 E : DCHECK(segment_.CanAllocate(thread_name.size() + 1));
422 E : batch_ = NULL;
423 :
424 : // Allocate a record in the log.
425 : TraceThreadNameInfo* thread_name_event =
426 : reinterpret_cast<TraceThreadNameInfo*>(
427 : segment_.AllocateTraceRecordImpl(
428 E : TRACE_THREAD_NAME, thread_name.size() + 1));
429 E : DCHECK(thread_name_event != NULL);
430 : base::strlcpy(thread_name_event->thread_name,
431 E : thread_name.data(), thread_name.size() + 1);
432 E : }
433 :
434 E : void Profiler::ThreadState::LogSymbol(SymbolMap::Symbol* symbol) {
435 E : DCHECK(symbol != NULL);
436 E : DCHECK_NE(0, symbol->id());
437 :
438 : size_t symbol_size =
439 E : FIELD_OFFSET(TraceDynamicSymbol, symbol_name) + symbol->name().size() + 1;
440 :
441 E : if (!segment_.CanAllocate(symbol_size) || !FlushSegment()) {
442 : // Failed to allocate the symbol record.
443 i : return;
444 : }
445 :
446 E : DCHECK(segment_.CanAllocate(symbol_size));
447 E : batch_ = NULL;
448 :
449 : // Allocate a record in the log.
450 : TraceDynamicSymbol* dynamic_symbol_event =
451 : reinterpret_cast<TraceDynamicSymbol*>(
452 : segment_.AllocateTraceRecordImpl(
453 E : TRACE_DYNAMIC_SYMBOL, symbol_size));
454 E : DCHECK(dynamic_symbol_event != NULL);
455 E : dynamic_symbol_event->symbol_id = symbol->id();
456 : base::strlcpy(dynamic_symbol_event->symbol_name,
457 E : symbol->name().data(), symbol->name().size() + 1);
458 E : }
459 :
460 : void Profiler::ThreadState::OnFunctionEntry(EntryFrame* entry_frame,
461 : FuncAddr function,
462 E : uint64 cycles) {
463 E : if (profiler_->session_.IsDisabled())
464 E : return;
465 :
466 : // Record the details of the entry.
467 : // Note that on tail-recursion and tail-call elimination, the caller recorded
468 : // here will be a thunk. We cater for this case on exit as best we can.
469 E : ThunkData* data = MakeThunk(entry_frame->retaddr);
470 E : DCHECK(data != NULL);
471 E : data->caller = entry_frame->retaddr;
472 E : data->function = function;
473 E : data->cycles_entry = cycles - cycles_overhead_;
474 :
475 E : entry_frame->retaddr = data->thunk;
476 :
477 E : UpdateOverhead(cycles);
478 E : }
479 :
480 : void Profiler::ThreadState::OnV8FunctionEntry(FuncAddr function,
481 : RetAddr* return_address_location,
482 E : uint64 cycles) {
483 E : if (profiler_->session_.IsDisabled())
484 i : return;
485 :
486 : // Record the details of the entry.
487 :
488 : // TODO(siggi): Note that we want to do different exit processing here,
489 : // as we know "function" is a dynamic symbol - it might be better to
490 : // record the fact here than to force a lookup on RecordInvocation.
491 :
492 : // Note that on tail-recursion and tail-call elimination, the caller recorded
493 : // here will be a thunk. We cater for this case on exit as best we can.
494 E : ThunkData* data = MakeThunk(*return_address_location);
495 E : DCHECK(data != NULL);
496 E : data->caller = *return_address_location;
497 E : data->function = function;
498 E : data->cycles_entry = cycles - cycles_overhead_;
499 :
500 E : *return_address_location = data->thunk;
501 :
502 E : UpdateOverhead(cycles);
503 E : }
504 :
505 : void Profiler::ThreadState::OnFunctionExit(const ThunkData* data,
506 E : uint64 cycles_exit) {
507 : // Calculate the number of cycles in the invocation, exclusive our overhead.
508 E : uint64 cycles_executed = cycles_exit - cycles_overhead_ - data->cycles_entry;
509 :
510 : // See if the return address resolves to a thunk, which indicates
511 : // tail recursion or tail call elimination. In that case we record the
512 : // calling function as caller, which isn't totally accurate as that'll
513 : // attribute the cost to the first line of the calling function. In the
514 : // absence of more information, it's the best we can do, however.
515 : // TODO(siggi): Move this into RecordInvocation, as we can elide the lookup
516 : // on a cache hit.
517 E : Thunk* ret_thunk = CastToThunk(data->caller);
518 E : if (ret_thunk == NULL) {
519 E : RecordInvocation(data->caller, data->function, cycles_executed);
520 E : } else {
521 E : ThunkData* ret_data = DataFromThunk(ret_thunk);
522 E : RecordInvocation(ret_data->function, data->function, cycles_executed);
523 : }
524 :
525 E : UpdateOverhead(cycles_exit);
526 E : }
527 :
528 E : void Profiler::ThreadState::OnPageAdded(const void* page) {
529 E : profiler_->OnPageAdded(page);
530 E : }
531 :
532 E : void Profiler::ThreadState::OnPageRemoved(const void* page) {
533 E : profiler_->OnPageRemoved(page);
534 E : }
535 :
536 : void Profiler::ThreadState::RecordInvocation(RetAddr caller,
537 : FuncAddr function,
538 E : uint64 duration_cycles) {
539 : // See whether we've already recorded an entry for this function.
540 E : InvocationKey key(caller, function);
541 E : InvocationMap::iterator it = invocations_.find(key);
542 E : if (it != invocations_.end()) {
543 : // Yup, we already have an entry, validate it.
544 E : InvocationValue& value = it->second;
545 :
546 : if ((value.caller_symbol == NULL ||
547 : value.caller_symbol->move_count() == value.caller_move_count) &&
548 : (value.function_symbol == NULL ||
549 E : value.function_symbol->move_count() == value.function_move_count)) {
550 : // The entry is still good, tally the new data.
551 E : ++(value.info->num_calls);
552 E : value.info->cycles_sum += duration_cycles;
553 E : if (duration_cycles < value.info->cycles_min) {
554 E : value.info->cycles_min = duration_cycles;
555 i : } else if (duration_cycles > value.info->cycles_max) {
556 i : value.info->cycles_max = duration_cycles;
557 : }
558 :
559 : // Early out on success.
560 E : return;
561 i : } else {
562 : // The entry is not valid any more, discard it.
563 E : DCHECK(value.caller_symbol != NULL || value.function_symbol != NULL);
564 :
565 E : invocations_.erase(it);
566 : }
567 : }
568 E : DCHECK(invocations_.find(key) == invocations_.end());
569 :
570 : // We don't have an entry, allocate a new one for this invocation.
571 : // The code below may touch last error.
572 E : ScopedLastErrorKeeper keep_last_error;
573 :
574 : scoped_refptr<SymbolMap::Symbol> caller_symbol =
575 E : profiler_->symbol_map_.FindSymbol(caller);
576 :
577 : // TODO(siggi): This can perhaps be optimized by keeping track of which
578 : // entry hook was invoked. This will however require setting an extra
579 : // bool on every entry, so will require measurement to see whether it's
580 : // a win.
581 : scoped_refptr<SymbolMap::Symbol> function_symbol =
582 E : profiler_->symbol_map_.FindSymbol(function);
583 :
584 : // Trace the symbols if this is the first time either one is observed.
585 E : if (caller_symbol != NULL && caller_symbol->EnsureHasId()) {
586 : // TODO(siggi): It might lead to worst-case behavior to log symbols into
587 : // the same trace buffer as we store invocations, as we're likely to
588 : // alternate symbols and single-entry invocation batches. Fixme.
589 E : LogSymbol(caller_symbol.get());
590 : }
591 E : if (function_symbol != NULL && function_symbol->EnsureHasId()) {
592 : // TODO(siggi): See above.
593 E : LogSymbol(function_symbol.get());
594 : }
595 :
596 E : InvocationInfo* info = AllocateInvocationInfo();
597 E : if (info != NULL) {
598 E : InvocationValue& value = invocations_[key];
599 E : value.info = info;
600 E : value.caller_symbol = caller_symbol;
601 E : if (caller_symbol != NULL)
602 E : value.caller_move_count = caller_symbol->move_count();
603 E : else
604 E : value.caller_move_count = 0;
605 :
606 E : value.function_symbol = function_symbol;
607 E : if (function_symbol != NULL)
608 E : value.function_move_count = function_symbol->move_count();
609 E : else
610 E : value.function_move_count = 0;
611 :
612 E : if (function_symbol == NULL) {
613 : // We're not in a dynamic function, record the (conventional) function.
614 E : info->function = function;
615 E : info->flags = 0;
616 E : } else {
617 : // We're in a dynamic function symbol, record the details.
618 E : DCHECK_NE(function_symbol->id(), 0);
619 :
620 E : info->function_symbol_id = function_symbol->id();
621 E : info->flags = kFunctionIsSymbol;
622 : }
623 :
624 E : if (caller_symbol == NULL) {
625 : // We're not in a dynamic caller_symbol, record the (conventional) caller.
626 E : info->caller = caller;
627 E : info->caller_offset = 0;
628 E : } else {
629 : // We're in a dynamic caller_symbol, record the details.
630 E : DCHECK_NE(caller_symbol->id(), 0);
631 :
632 E : info->caller_symbol_id = caller_symbol->id();
633 E : info->flags |= kCallerIsSymbol;
634 : info->caller_offset = reinterpret_cast<const uint8*>(caller) -
635 E : reinterpret_cast<const uint8*>(caller_symbol->address());
636 : }
637 :
638 E : info->num_calls = 1;
639 E : info->cycles_min = info->cycles_max = info->cycles_sum = duration_cycles;
640 : }
641 E : }
642 :
643 E : void Profiler::ThreadState::UpdateOverhead(uint64 entry_cycles) {
644 : // TODO(siggi): Measure the fixed overhead on setup,
645 : // then add it on every update.
646 E : cycles_overhead_ += (__rdtsc() - entry_cycles);
647 E : }
648 :
649 E : InvocationInfo* Profiler::ThreadState::AllocateInvocationInfo() {
650 : // This is kind of self-evident for the moment, as an invocation info batch
651 : // contains at least one invocation info as currently declared.
652 : // If this fails, please reconsider your implementation, or else revisit
653 : // the allocation code below.
654 : static_assert(sizeof(TraceBatchInvocationInfo) >= sizeof(InvocationInfo),
655 : "Invocation info batch must be larger than invocation info.");
656 :
657 : // Do we have a record that we can grow?
658 E : if (batch_ != NULL && segment_.CanAllocateRaw(sizeof(InvocationInfo))) {
659 : InvocationInfo* invocation_info =
660 E : reinterpret_cast<InvocationInfo*>(segment_.write_ptr);
661 E : RecordPrefix* prefix = trace::client::GetRecordPrefix(batch_);
662 E : prefix->size += sizeof(InvocationInfo);
663 :
664 : // Update the book-keeping.
665 E : segment_.write_ptr += sizeof(InvocationInfo);
666 E : segment_.header->segment_length += sizeof(InvocationInfo);
667 :
668 E : return invocation_info;
669 : }
670 :
671 : // Do we need to scarf a new buffer?
672 : if (!segment_.CanAllocate(sizeof(TraceBatchInvocationInfo)) &&
673 E : !FlushSegment()) {
674 : // We failed to allocate a new buffer.
675 i : return NULL;
676 : }
677 :
678 E : DCHECK(segment_.header != NULL);
679 :
680 E : batch_ = segment_.AllocateTraceRecord<TraceBatchInvocationInfo>();
681 E : return &batch_->invocations[0];
682 E : }
683 :
684 E : bool Profiler::ThreadState::FlushSegment() {
685 E : ClearCache();
686 E : return profiler_->session_.ExchangeBuffer(&segment_);
687 E : }
688 :
689 E : void Profiler::ThreadState::ClearCache() {
690 E : batch_ = NULL;
691 E : invocations_.clear();
692 E : }
693 :
694 E : void Profiler::OnThreadDetach() {
695 E : ThreadState* state = GetThreadState();
696 E : if (state != NULL)
697 E : thread_state_manager_.MarkForDeath(state);
698 E : }
699 :
700 E : RetAddr* Profiler::ResolveReturnAddressLocation(RetAddr* pc_location) {
701 E : base::AutoLock lock(lock_);
702 :
703 : // In case of tail-call and tail recursion elimination, we can get chained
704 : // thunks, so we loop around here until we resolve to a non-thunk.
705 E : while (true) {
706 : // See whether the return address is one of our thunks.
707 E : RetAddr ret_addr = *pc_location;
708 :
709 : // Compute the page this return address lives in.
710 : const void* page = reinterpret_cast<const void*>(
711 E : reinterpret_cast<uintptr_t>(ret_addr) & ~0xFFF);
712 E : if (!std::binary_search(pages_.begin(), pages_.end(), page))
713 E : return pc_location;
714 :
715 : // It's one of our own, redirect to the thunk's stash.
716 : ThreadState::Thunk* thunk =
717 E : reinterpret_cast<ThreadState::Thunk*>(const_cast<void*>(ret_addr));
718 :
719 E : ThreadState::ThunkData* data = ThreadState::DataFromThunk(thunk);
720 :
721 : // Update the PC location and go around again, in case this
722 : // thunk links to another one.
723 E : pc_location = &data->caller;
724 E : }
725 E : }
726 :
727 : void Profiler::OnModuleEntry(EntryFrame* entry_frame,
728 : FuncAddr function,
729 E : uint64 cycles) {
730 : // The function invoked has a DllMain-like signature.
731 : // Get the module and reason from its invocation record.
732 E : HMODULE module = reinterpret_cast<HMODULE>(entry_frame->args[0]);
733 E : DWORD reason = entry_frame->args[1];
734 :
735 : // Only log module additions.
736 E : bool should_log_module = false;
737 E : switch (reason) {
738 : case DLL_PROCESS_ATTACH:
739 : case DLL_THREAD_ATTACH:
740 E : should_log_module = true;
741 E : break;
742 :
743 : case DLL_PROCESS_DETACH:
744 : case DLL_THREAD_DETACH:
745 E : break;
746 :
747 : default:
748 i : LOG(WARNING) << "Unrecognized module event: " << reason << ".";
749 : break;
750 : }
751 :
752 : // Make sure we only log each module once per process.
753 E : bool is_new_module = false;
754 E : if (should_log_module) {
755 E : base::AutoLock lock(lock_);
756 :
757 E : is_new_module = logged_modules_.insert(module).second;
758 E : }
759 :
760 E : ThreadState* data = GetOrAllocateThreadState();
761 E : DCHECK(data != NULL);
762 E : if (data == NULL)
763 i : return;
764 :
765 E : if (is_new_module) {
766 : // Delegate the logging to our per-thread data.
767 E : data->LogAllModules(module);
768 : }
769 :
770 : // Handle the function entry.
771 E : data->OnFunctionEntry(entry_frame, function, cycles);
772 E : }
773 :
774 E : void Profiler::OnPageAdded(const void* page) {
775 E : base::AutoLock lock(lock_);
776 :
777 : PageVector::iterator it =
778 E : std::lower_bound(pages_.begin(), pages_.end(), page);
779 E : DCHECK(it == pages_.end() || *it != page);
780 E : pages_.insert(it, page);
781 E : }
782 :
783 E : void Profiler::OnPageRemoved(const void* page) {
784 E : base::AutoLock lock(lock_);
785 :
786 : PageVector::iterator it =
787 E : std::lower_bound(pages_.begin(), pages_.end(), page);
788 : // The page must be in our list.
789 E : DCHECK(it != pages_.end());
790 E : DCHECK_EQ(page, *it);
791 E : pages_.erase(it);
792 E : }
793 :
794 E : void Profiler::OnThreadName(const base::StringPiece& thread_name) {
795 E : ThreadState* state = GetOrAllocateThreadState();
796 E : if (state != NULL)
797 E : state->LogThreadName(thread_name);
798 E : }
799 :
800 E : LONG CALLBACK Profiler::ExceptionHandler(EXCEPTION_POINTERS* ex_info) {
801 : // Log the thread if this is the VC thread name exception.
802 : if (ex_info->ExceptionRecord->ExceptionCode == kVCThreadNameException &&
803 : ex_info->ExceptionRecord->NumberParameters ==
804 E : sizeof(THREADNAME_INFO)/sizeof(DWORD)) {
805 : const THREADNAME_INFO* info =
806 : reinterpret_cast<const THREADNAME_INFO*>(
807 E : &ex_info->ExceptionRecord->ExceptionInformation);
808 :
809 E : if (info->dwType == 0x1000) {
810 E : instance_.OnThreadName(info->szName);
811 E : } else {
812 i : LOG(WARNING) << "Unrecognized event type " << info->dwType;
813 : }
814 : }
815 :
816 E : return EXCEPTION_CONTINUE_SEARCH;
817 E : }
818 :
819 : void Profiler::OnDllEvent(agent::common::DllNotificationWatcher::EventType type,
820 : HMODULE module,
821 : size_t module_size,
822 : const base::StringPiece16& dll_path,
823 E : const base::StringPiece16& dll_base_name) {
824 E : if (type == agent::common::DllNotificationWatcher::kDllLoaded) {
825 : // Bail early if there's no session.
826 E : if (session_.IsDisabled())
827 i : return;
828 :
829 : // Trace the load event.
830 E : ThreadState* state = GetOrAllocateThreadState();
831 E : if (state != NULL)
832 E : state->LogModule(module);
833 : }
834 E : }
835 :
836 E : Profiler::Profiler() : handler_registration_(NULL) {
837 : // Create our RPC session and allocate our initial trace segment on creation,
838 : // aka at load time.
839 E : ThreadState* data = CreateFirstThreadStateAndSession();
840 E : CHECK(data != NULL) << "Failed to allocate thread local state.";
841 :
842 E : handler_registration_ = ::AddVectoredExceptionHandler(TRUE, ExceptionHandler);
843 :
844 E : dll_watcher_.Init(base::Bind(&Profiler::OnDllEvent, base::Unretained(this)));
845 E : }
846 :
847 E : Profiler::~Profiler() {
848 : // Since the DLL notification callback depends on thread and session state,
849 : // let's tear it down first. Note that this grabs the loader's lock,
850 : // so there's deadlock potential here, but no other thread will get a DLL
851 : // notification after this call returns.
852 E : dll_watcher_.Reset();
853 :
854 : // Typically, this will happen on the last thread in the process. We must
855 : // explicitly clean up this thread's state as it will otherwise leak.
856 E : FreeThreadState();
857 :
858 : // Unregister our VEH.
859 E : if (handler_registration_ != NULL) {
860 E : ::RemoveVectoredExceptionHandler(handler_registration_);
861 E : handler_registration_ = NULL;
862 : }
863 E : }
864 :
865 E : Profiler::ThreadState* Profiler::CreateFirstThreadStateAndSession() {
866 E : Profiler::ThreadState* data = GetOrAllocateThreadStateImpl();
867 :
868 : // Create the session (and allocate the first segment).
869 E : trace::client::InitializeRpcSession(&session_, data->segment());
870 :
871 E : return data;
872 E : }
873 :
874 E : Profiler::ThreadState* Profiler::GetOrAllocateThreadState() {
875 E : Profiler::ThreadState* data = GetOrAllocateThreadStateImpl();
876 E : if (!data->segment()->write_ptr && session_.IsTracing()) {
877 E : session_.AllocateBuffer(data->segment());
878 : }
879 E : return data;
880 E : }
881 :
882 E : Profiler::ThreadState* Profiler::GetOrAllocateThreadStateImpl() {
883 E : ThreadState *data = tls_.Get();
884 E : if (data != NULL)
885 E : return data;
886 :
887 E : data = new ThreadState(this);
888 E : if (data == NULL) {
889 i : LOG(ERROR) << "Unable to allocate per-thread data";
890 i : return NULL;
891 : }
892 :
893 E : thread_state_manager_.Register(data);
894 E : tls_.Set(data);
895 :
896 E : return data;
897 E : }
898 :
899 E : Profiler::ThreadState* Profiler::GetThreadState() const {
900 E : return tls_.Get();
901 E : }
902 :
903 E : void Profiler::FreeThreadState() {
904 E : ThreadState *data = GetThreadState();
905 E : if (data != NULL) {
906 E : tls_.Set(NULL);
907 E : thread_state_manager_.Unregister(data);
908 E : delete data;
909 : }
910 E : }
911 :
912 : void WINAPI Profiler::DllMainEntryHook(EntryFrame* entry_frame,
913 : FuncAddr function,
914 E : uint64 cycles) {
915 E : ScopedLastErrorKeeper keep_last_error;
916 :
917 E : instance_.OnModuleEntry(entry_frame, function, cycles);
918 E : }
919 :
920 : void WINAPI Profiler::FunctionEntryHook(EntryFrame* entry_frame,
921 : FuncAddr function,
922 E : uint64 cycles) {
923 E : ScopedLastErrorKeeper keep_last_error;
924 :
925 E : ThreadState* data = instance_.GetOrAllocateThreadState();
926 E : DCHECK(data != NULL);
927 E : if (data != NULL)
928 E : data->OnFunctionEntry(entry_frame, function, cycles);
929 E : }
930 :
931 : void WINAPI Profiler::OnV8FunctionEntry(FuncAddr function,
932 : RetAddr* return_addr_location,
933 E : uint64 cycles) {
934 E : ScopedLastErrorKeeper keep_last_error;
935 :
936 E : ThreadState* data = instance_.GetOrAllocateThreadState();
937 E : if (data != NULL)
938 E : data->OnV8FunctionEntry(function, return_addr_location, cycles);
939 E : }
940 :
941 : void Profiler::AddSymbol(const void* address, size_t length,
942 E : const char* name, size_t name_len) {
943 E : symbol_map_.AddSymbol(address, length, base::StringPiece(name, name_len));
944 E : }
945 :
946 E : void Profiler::MoveSymbol(const void* old_address, const void* new_address) {
947 E : symbol_map_.MoveSymbol(old_address, new_address);
948 E : }
949 :
950 : } // namespace profiler
951 : } // namespace agent
|