1 : // Copyright 2012 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 : //
15 : // Implementation of the profiler DLL.
16 : #include "syzygy/agent/profiler/profiler.h"
17 :
18 : #include <windows.h>
19 : #include <algorithm>
20 :
21 : #include "base/at_exit.h"
22 : #include "base/bind.h"
23 : #include "base/command_line.h"
24 : #include "base/environment.h"
25 : #include "base/logging.h"
26 : #include "base/memory/scoped_ptr.h"
27 : #include "base/strings/string_util.h"
28 : #include "base/strings/utf_string_conversions.h"
29 : #include "base/win/pe_image.h"
30 : #include "base/win/scoped_handle.h"
31 : #include "syzygy/agent/common/agent.h"
32 : #include "syzygy/agent/common/dlist.h"
33 : #include "syzygy/agent/common/process_utils.h"
34 : #include "syzygy/agent/common/scoped_last_error_keeper.h"
35 : #include "syzygy/agent/profiler/return_thunk_factory.h"
36 : #include "syzygy/common/logging.h"
37 : #include "syzygy/trace/client/client_utils.h"
38 : #include "syzygy/trace/protocol/call_trace_defs.h"
39 :
40 : namespace {
41 :
42 : using agent::common::ScopedLastErrorKeeper;
43 :
44 : typedef std::pair<RetAddr, FuncAddr> InvocationKey;
45 :
46 : class HashInvocationKey {
47 : public:
48 : static const size_t bucket_size = 4;
49 : static const size_t min_buckets = 8;
50 :
51 E : size_t operator()(const InvocationKey& key) const {
52 : return reinterpret_cast<size_t>(key.first) ^
53 E : reinterpret_cast<size_t>(key.second);
54 E : }
55 :
56 E : bool operator()(const InvocationKey& a, const InvocationKey& b) const {
57 E : return a < b;
58 E : }
59 : };
60 :
61 : using agent::profiler::SymbolMap;
62 :
63 : struct InvocationValue {
64 : // This invocation entry's caller's dynamic symbol, if any.
65 : scoped_refptr<SymbolMap::Symbol> caller_symbol;
66 : // The last observed move count for caller_symbol.
67 : int32 caller_move_count;
68 :
69 : // This invocation entry's callee's dynamic symbol, if any.
70 : scoped_refptr<SymbolMap::Symbol> function_symbol;
71 : // The last observed move count for function_symbol.
72 : int32 function_move_count;
73 :
74 : // Points to the trace buffer entry for the respective function.
75 : InvocationInfo* info;
76 : };
77 :
78 : typedef base::hash_map<
79 : InvocationKey, InvocationValue, HashInvocationKey> InvocationMap;
80 :
81 :
82 : // The information on how to set the thread name comes from
83 : // a MSDN article: http://msdn2.microsoft.com/en-us/library/xcb2z8hs.aspx
84 : const DWORD kVCThreadNameException = 0x406D1388;
85 :
86 : typedef struct tagTHREADNAME_INFO {
87 : DWORD dwType; // Must be 0x1000.
88 : LPCSTR szName; // Pointer to name (in user addr space).
89 : DWORD dwThreadID; // Thread ID (-1=caller thread).
90 : DWORD dwFlags; // Reserved for future use, must be zero.
91 : } THREADNAME_INFO;
92 :
93 : } // namespace
94 :
95 : // See client.cc for a description of the unconventional
96 : // calling conventions for this function.
97 i : extern "C" void __declspec(naked) _indirect_penter() {
98 : __asm {
99 : // Stash volatile registers.
100 i : push eax
101 i : push edx
102 :
103 : // Get the current cycle time ASAP.
104 i : rdtsc
105 :
106 : // Save the value of eax for later, we need the register to stash the flags.
107 i : push ecx
108 i : mov ecx, eax
109 :
110 : // Save the low byte of the flags into AH.
111 i : lahf
112 : // Save the overflow flag into AL.
113 i : seto al
114 :
115 : // Stash the flags to stack.
116 i : push eax
117 :
118 : // Push the cycle time arg.
119 i : push edx
120 i : push ecx
121 :
122 : // Retrieve the original function address, pushed by our caller.
123 i : mov eax, DWORD PTR[esp + 0x18]
124 i : push eax
125 :
126 : // Calculate the position of the return address on stack, and
127 : // push it. This becomes the EntryFrame argument.
128 i : lea eax, DWORD PTR[esp + 0x20]
129 i : push eax
130 i : call agent::profiler::Profiler::FunctionEntryHook
131 :
132 : // Restore volatile registers.
133 i : pop eax
134 : // AL is set to 1 if the overflow flag was set before the call to
135 : // our hook, 0 otherwise. We add 0x7f to it so it'll restore the
136 : // flag.
137 i : add al, 0x7f
138 : // Restore the low byte of the flags.
139 i : sahf
140 :
141 i : pop ecx
142 i : pop edx
143 i : pop eax
144 :
145 : // Return to the address pushed by our caller.
146 i : ret
147 : }
148 : }
149 :
150 i : extern "C" void __declspec(naked) _indirect_penter_dllmain() {
151 : __asm {
152 : // Stash volatile registers.
153 i : push eax
154 i : push edx
155 :
156 : // Get the current cycle time ASAP.
157 i : rdtsc
158 :
159 : // Save the value of eax for later, we need the register to stash the flags.
160 i : push ecx
161 i : mov ecx, eax
162 :
163 : // Save the low byte of the flags into AH.
164 i : lahf
165 : // Save the overflow flag into AL.
166 i : seto al
167 :
168 : // Stash the flags to stack.
169 i : push eax
170 :
171 : // Push the cycle time arg.
172 i : push edx
173 i : push ecx
174 :
175 : // Retrieve the original function address, pushed by our caller.
176 i : mov eax, DWORD PTR[esp + 0x18]
177 i : push eax
178 :
179 : // Calculate the position of the return address on stack, and
180 : // push it. This becomes the EntryFrame argument.
181 i : lea eax, DWORD PTR[esp + 0x20]
182 i : push eax
183 i : call agent::profiler::Profiler::DllMainEntryHook
184 :
185 : // Restore volatile registers.
186 i : pop eax
187 : // AL is set to 1 if the overflow flag was set before the call to
188 : // our hook, 0 otherwise. We add 0x7f to it so it'll restore the
189 : // flag.
190 i : add al, 0x7f
191 : // Restore the low byte of the flags.
192 i : sahf
193 :
194 i : pop ecx
195 i : pop edx
196 i : pop eax
197 :
198 : // Return to the address pushed by our caller.
199 i : ret
200 : }
201 : }
202 :
203 : // On entry, pc_location should point to a location on our own stack.
204 : extern "C" uintptr_t __cdecl ResolveReturnAddressLocation(
205 E : uintptr_t pc_location) {
206 : using agent::profiler::Profiler;
207 : return reinterpret_cast<uintptr_t>(
208 : Profiler::instance().ResolveReturnAddressLocation(
209 E : reinterpret_cast<RetAddr*>(pc_location)));
210 E : }
211 :
212 : // This function needs to match the declaration of FunctionEntryHook in the V8
213 : // API. See http://v8.googlecode.com/svn/trunk/include/v8.h.
214 : extern "C" __declspec(naked) void __cdecl OnDynamicFunctionEntry(
215 i : uintptr_t function, uintptr_t return_addr_location) {
216 : __asm {
217 : // Grab the current time ASAP.
218 i : rdtsc
219 :
220 : // Push the cycle time arg.
221 i : push edx
222 i : push eax
223 :
224 : // Duplicate the function and return_addr_location arguments.
225 i : push DWORD PTR[esp + 0x10]
226 i : push DWORD PTR[esp + 0x10]
227 :
228 i : call agent::profiler::Profiler::OnV8FunctionEntry
229 :
230 i : ret
231 : }
232 : }
233 :
234 E : BOOL WINAPI DllMain(HMODULE instance, DWORD reason, LPVOID reserved) {
235 : using agent::profiler::Profiler;
236 :
237 : // Our AtExit manager required by base.
238 : static base::AtExitManager* at_exit = NULL;
239 :
240 E : agent::common::InitializeCrt();
241 :
242 E : switch (reason) {
243 : case DLL_PROCESS_ATTACH:
244 E : DCHECK(at_exit == NULL);
245 E : at_exit = new base::AtExitManager();
246 :
247 E : CommandLine::Init(0, NULL);
248 E : common::InitLoggingForDll(L"profiler");
249 E : break;
250 :
251 : case DLL_THREAD_DETACH:
252 E : Profiler::instance().OnThreadDetach();
253 E : break;
254 :
255 : case DLL_PROCESS_DETACH:
256 E : CommandLine::Reset();
257 E : DCHECK(at_exit != NULL);
258 E : delete at_exit;
259 E : at_exit = NULL;
260 : break;
261 :
262 : default:
263 : break;
264 : }
265 :
266 E : return TRUE;
267 E : }
268 :
269 : void __cdecl AddDynamicSymbol(const void* address, size_t length,
270 E : const char* name, size_t name_len) {
271 : using agent::profiler::Profiler;
272 :
273 E : Profiler::instance().AddSymbol(address, length, name, name_len);
274 E : }
275 :
276 : void __cdecl MoveDynamicSymbol(const void* old_address,
277 E : const void* new_address) {
278 : using agent::profiler::Profiler;
279 :
280 E : Profiler::instance().MoveSymbol(old_address, new_address);
281 E : }
282 :
283 : namespace agent {
284 : namespace profiler {
285 :
286 : // All tracing runs through this object.
287 E : agent::profiler::Profiler Profiler::instance_;
288 :
289 : class Profiler::ThreadState
290 : : public ReturnThunkFactoryImpl<Profiler::ThreadState>,
291 : public agent::common::ThreadStateBase {
292 : public:
293 : explicit ThreadState(Profiler* profiler);
294 : ~ThreadState();
295 :
296 : // Logs @p module and all other modules in the process, then flushes
297 : // the current trace buffer.
298 : void LogAllModules(HMODULE module);
299 :
300 : // Logs @p module.
301 : void LogModule(HMODULE module);
302 :
303 : // Logs @p thread_name as the current thread's name.
304 : void LogThreadName(const base::StringPiece& thread_name);
305 :
306 : // Logs @p symbol into the trace.
307 : void LogSymbol(SymbolMap::Symbol* symbol);
308 :
309 : // Processes a single function entry.
310 : void OnFunctionEntry(EntryFrame* entry_frame,
311 : FuncAddr function,
312 : uint64 cycles);
313 :
314 : // Processes a single V8 function entry.
315 : void OnV8FunctionEntry(FuncAddr function,
316 : RetAddr* return_address_location,
317 : uint64 cycles);
318 :
319 : // @name Callback notification implementation.
320 : // @{
321 : virtual void OnPageAdded(const void* page) OVERRIDE;
322 : virtual void OnPageRemoved(const void* page) OVERRIDE;
323 : // @}
324 :
325 : // Function exit hook.
326 : void OnFunctionExit(const ThunkData* data, uint64 cycles_exit);
327 :
328 E : trace::client::TraceFileSegment* segment() { return &segment_; }
329 :
330 : private:
331 : friend class Profiler;
332 :
333 : void RecordInvocation(RetAddr caller,
334 : FuncAddr function,
335 : uint64 cycles);
336 :
337 : void UpdateOverhead(uint64 entry_cycles);
338 : InvocationInfo* AllocateInvocationInfo();
339 : void ClearCache();
340 : bool FlushSegment();
341 :
342 : // The profiler we're attached to.
343 : Profiler* profiler_;
344 :
345 : // We keep a running tally of the rough amount of wall clock cycles spent
346 : // inside the profiler. We then subtract the profiler's overhead from the
347 : // wall clock cycle timer on each measurement. This results in a timer that
348 : // measures time exclusive of profiling overhead.
349 : uint64 cycles_overhead_;
350 :
351 : // The invocations we've recorded in our buffer.
352 : InvocationMap invocations_;
353 :
354 : // The trace file segment we're recording to.
355 : trace::client::TraceFileSegment segment_;
356 :
357 : // The current batch record we're writing to, if any.
358 : TraceBatchInvocationInfo* batch_;
359 :
360 : // The set of modules we've logged.
361 : ModuleSet logged_modules_;
362 : };
363 :
364 : Profiler::ThreadState::ThreadState(Profiler* profiler)
365 : : profiler_(profiler),
366 : cycles_overhead_(0LL),
367 E : batch_(NULL) {
368 E : Initialize();
369 E : }
370 :
371 E : Profiler::ThreadState::~ThreadState() {
372 E : ClearCache();
373 :
374 : // If we have an outstanding buffer, let's deallocate it now.
375 E : if (segment_.write_ptr != NULL)
376 E : profiler_->session_.ReturnBuffer(&segment_);
377 :
378 E : Uninitialize();
379 E : }
380 :
381 E : void Profiler::ThreadState::LogAllModules(HMODULE module) {
382 : // Bail early if we're disabled.
383 E : if (profiler_->session_.IsDisabled())
384 E : return;
385 :
386 E : agent::common::ModuleVector modules;
387 E : agent::common::GetProcessModules(&modules);
388 :
389 : // Our module should be in the process modules.
390 E : DCHECK(std::find(modules.begin(), modules.end(), module) != modules.end());
391 :
392 E : for (size_t i = 0; i < modules.size(); ++i) {
393 E : DCHECK(modules[i] != NULL);
394 E : LogModule(modules[i]);
395 E : }
396 :
397 : // We need to flush module events right away, so that the module is
398 : // defined in the trace file before events using that module start to
399 : // occur (in another thread).
400 E : FlushSegment();
401 E : }
402 :
403 E : void Profiler::ThreadState::LogModule(HMODULE module) {
404 : // This may flush our buffer, so let's clear our cache.
405 E : ClearCache();
406 E : agent::common::LogModule(module, &profiler_->session_, &segment_);
407 E : }
408 :
409 : void Profiler::ThreadState::LogThreadName(
410 E : const base::StringPiece& thread_name) {
411 E : if (thread_name.empty())
412 i : return;
413 :
414 : // Make sure the event we're about to write will fit.
415 E : if (!segment_.CanAllocate(thread_name.size() + 1) || !FlushSegment()) {
416 : // Failed to allocate a new segment.
417 i : return;
418 : }
419 :
420 E : DCHECK(segment_.CanAllocate(thread_name.size() + 1));
421 E : batch_ = NULL;
422 :
423 : // Allocate a record in the log.
424 : TraceThreadNameInfo* thread_name_event =
425 : reinterpret_cast<TraceThreadNameInfo*>(
426 : segment_.AllocateTraceRecordImpl(
427 E : TRACE_THREAD_NAME, thread_name.size() + 1));
428 E : DCHECK(thread_name_event != NULL);
429 : base::strlcpy(thread_name_event->thread_name,
430 E : thread_name.data(), thread_name.size() + 1);
431 E : }
432 :
433 E : void Profiler::ThreadState::LogSymbol(SymbolMap::Symbol* symbol) {
434 E : DCHECK(symbol != NULL);
435 E : DCHECK_NE(0, symbol->id());
436 :
437 : size_t symbol_size =
438 E : FIELD_OFFSET(TraceDynamicSymbol, symbol_name) + symbol->name().size() + 1;
439 :
440 E : if (!segment_.CanAllocate(symbol_size) || !FlushSegment()) {
441 : // Failed to allocate the symbol record.
442 i : return;
443 : }
444 :
445 E : DCHECK(segment_.CanAllocate(symbol_size));
446 E : batch_ = NULL;
447 :
448 : // Allocate a record in the log.
449 : TraceDynamicSymbol* dynamic_symbol_event =
450 : reinterpret_cast<TraceDynamicSymbol*>(
451 : segment_.AllocateTraceRecordImpl(
452 E : TRACE_DYNAMIC_SYMBOL, symbol_size));
453 E : DCHECK(dynamic_symbol_event != NULL);
454 E : dynamic_symbol_event->symbol_id = symbol->id();
455 : base::strlcpy(dynamic_symbol_event->symbol_name,
456 E : symbol->name().data(), symbol->name().size() + 1);
457 E : }
458 :
459 : void Profiler::ThreadState::OnFunctionEntry(EntryFrame* entry_frame,
460 : FuncAddr function,
461 E : uint64 cycles) {
462 E : if (profiler_->session_.IsDisabled())
463 E : return;
464 :
465 : // Record the details of the entry.
466 : // Note that on tail-recursion and tail-call elimination, the caller recorded
467 : // here will be a thunk. We cater for this case on exit as best we can.
468 E : ThunkData* data = MakeThunk(entry_frame->retaddr);
469 E : DCHECK(data != NULL);
470 E : data->caller = entry_frame->retaddr;
471 E : data->function = function;
472 E : data->cycles_entry = cycles - cycles_overhead_;
473 :
474 E : entry_frame->retaddr = data->thunk;
475 :
476 E : UpdateOverhead(cycles);
477 E : }
478 :
479 : void Profiler::ThreadState::OnV8FunctionEntry(FuncAddr function,
480 : RetAddr* return_address_location,
481 E : uint64 cycles) {
482 E : if (profiler_->session_.IsDisabled())
483 i : return;
484 :
485 : // Record the details of the entry.
486 :
487 : // TODO(siggi): Note that we want to do different exit processing here,
488 : // as we know "function" is a dynamic symbol - it might be better to
489 : // record the fact here than to force a lookup on RecordInvocation.
490 :
491 : // Note that on tail-recursion and tail-call elimination, the caller recorded
492 : // here will be a thunk. We cater for this case on exit as best we can.
493 E : ThunkData* data = MakeThunk(*return_address_location);
494 E : DCHECK(data != NULL);
495 E : data->caller = *return_address_location;
496 E : data->function = function;
497 E : data->cycles_entry = cycles - cycles_overhead_;
498 :
499 E : *return_address_location = data->thunk;
500 :
501 E : UpdateOverhead(cycles);
502 E : }
503 :
504 : void Profiler::ThreadState::OnFunctionExit(const ThunkData* data,
505 E : uint64 cycles_exit) {
506 : // Calculate the number of cycles in the invocation, exclusive our overhead.
507 E : uint64 cycles_executed = cycles_exit - cycles_overhead_ - data->cycles_entry;
508 :
509 : // See if the return address resolves to a thunk, which indicates
510 : // tail recursion or tail call elimination. In that case we record the
511 : // calling function as caller, which isn't totally accurate as that'll
512 : // attribute the cost to the first line of the calling function. In the
513 : // absence of more information, it's the best we can do, however.
514 : // TODO(siggi): Move this into RecordInvocation, as we can elide the lookup
515 : // on a cache hit.
516 E : Thunk* ret_thunk = CastToThunk(data->caller);
517 E : if (ret_thunk == NULL) {
518 E : RecordInvocation(data->caller, data->function, cycles_executed);
519 E : } else {
520 E : ThunkData* ret_data = DataFromThunk(ret_thunk);
521 E : RecordInvocation(ret_data->function, data->function, cycles_executed);
522 : }
523 :
524 E : UpdateOverhead(cycles_exit);
525 E : }
526 :
527 E : void Profiler::ThreadState::OnPageAdded(const void* page) {
528 E : profiler_->OnPageAdded(page);
529 E : }
530 :
531 E : void Profiler::ThreadState::OnPageRemoved(const void* page) {
532 E : profiler_->OnPageRemoved(page);
533 E : }
534 :
535 : void Profiler::ThreadState::RecordInvocation(RetAddr caller,
536 : FuncAddr function,
537 E : uint64 duration_cycles) {
538 : // See whether we've already recorded an entry for this function.
539 E : InvocationKey key(caller, function);
540 E : InvocationMap::iterator it = invocations_.find(key);
541 E : if (it != invocations_.end()) {
542 : // Yup, we already have an entry, validate it.
543 E : InvocationValue& value = it->second;
544 :
545 : if ((value.caller_symbol == NULL ||
546 : value.caller_symbol->move_count() == value.caller_move_count) &&
547 : (value.function_symbol == NULL ||
548 E : value.function_symbol->move_count() == value.function_move_count)) {
549 : // The entry is still good, tally the new data.
550 E : ++(value.info->num_calls);
551 E : value.info->cycles_sum += duration_cycles;
552 E : if (duration_cycles < value.info->cycles_min) {
553 E : value.info->cycles_min = duration_cycles;
554 i : } else if (duration_cycles > value.info->cycles_max) {
555 i : value.info->cycles_max = duration_cycles;
556 : }
557 :
558 : // Early out on success.
559 E : return;
560 i : } else {
561 : // The entry is not valid any more, discard it.
562 E : DCHECK(value.caller_symbol != NULL || value.function_symbol != NULL);
563 :
564 E : invocations_.erase(it);
565 : }
566 : }
567 E : DCHECK(invocations_.find(key) == invocations_.end());
568 :
569 : // We don't have an entry, allocate a new one for this invocation.
570 : // The code below may touch last error.
571 E : ScopedLastErrorKeeper keep_last_error;
572 :
573 : scoped_refptr<SymbolMap::Symbol> caller_symbol =
574 E : profiler_->symbol_map_.FindSymbol(caller);
575 :
576 : // TODO(siggi): This can perhaps be optimized by keeping track of which
577 : // entry hook was invoked. This will however require setting an extra
578 : // bool on every entry, so will require measurement to see whether it's
579 : // a win.
580 : scoped_refptr<SymbolMap::Symbol> function_symbol =
581 E : profiler_->symbol_map_.FindSymbol(function);
582 :
583 : // Trace the symbols if this is the first time either one is observed.
584 E : if (caller_symbol != NULL && caller_symbol->EnsureHasId()) {
585 : // TODO(siggi): It might lead to worst-case behavior to log symbols into
586 : // the same trace buffer as we store invocations, as we're likely to
587 : // alternate symbols and single-entry invocation batches. Fixme.
588 E : LogSymbol(caller_symbol);
589 : }
590 E : if (function_symbol != NULL && function_symbol->EnsureHasId()) {
591 : // TODO(siggi): See above.
592 E : LogSymbol(function_symbol);
593 : }
594 :
595 E : InvocationInfo* info = AllocateInvocationInfo();
596 E : if (info != NULL) {
597 E : InvocationValue& value = invocations_[key];
598 E : value.info = info;
599 E : value.caller_symbol = caller_symbol;
600 E : if (caller_symbol != NULL)
601 E : value.caller_move_count = caller_symbol->move_count();
602 E : else
603 E : value.caller_move_count = 0;
604 :
605 E : value.function_symbol = function_symbol;
606 E : if (function_symbol != NULL)
607 E : value.function_move_count = function_symbol->move_count();
608 E : else
609 E : value.function_move_count = 0;
610 :
611 E : if (function_symbol == NULL) {
612 : // We're not in a dynamic function, record the (conventional) function.
613 E : info->function = function;
614 E : info->flags = 0;
615 E : } else {
616 : // We're in a dynamic function symbol, record the details.
617 E : DCHECK_NE(function_symbol->id(), 0);
618 :
619 E : info->function_symbol_id = function_symbol->id();
620 E : info->flags = kFunctionIsSymbol;
621 : }
622 :
623 E : if (caller_symbol == NULL) {
624 : // We're not in a dynamic caller_symbol, record the (conventional) caller.
625 E : info->caller = caller;
626 E : info->caller_offset = 0;
627 E : } else {
628 : // We're in a dynamic caller_symbol, record the details.
629 E : DCHECK_NE(caller_symbol->id(), 0);
630 :
631 E : info->caller_symbol_id = caller_symbol->id();
632 E : info->flags |= kCallerIsSymbol;
633 : info->caller_offset = reinterpret_cast<const uint8*>(caller) -
634 E : reinterpret_cast<const uint8*>(caller_symbol->address());
635 : }
636 :
637 E : info->num_calls = 1;
638 E : info->cycles_min = info->cycles_max = info->cycles_sum = duration_cycles;
639 : }
640 E : }
641 :
642 E : void Profiler::ThreadState::UpdateOverhead(uint64 entry_cycles) {
643 : // TODO(siggi): Measure the fixed overhead on setup,
644 : // then add it on every update.
645 E : cycles_overhead_ += (__rdtsc() - entry_cycles);
646 E : }
647 :
648 E : InvocationInfo* Profiler::ThreadState::AllocateInvocationInfo() {
649 : // This is kind of self-evident for the moment, as an invocation info batch
650 : // contains at least one invocation info as currently declared.
651 : // If this fails, please reconsider your implementation, or else revisit
652 : // the allocation code below.
653 : COMPILE_ASSERT(sizeof(TraceBatchInvocationInfo) >= sizeof(InvocationInfo),
654 : invocation_info_batch_must_be_larger_than_invocation_info);
655 :
656 : // Do we have a record that we can grow?
657 E : if (batch_ != NULL && segment_.CanAllocateRaw(sizeof(InvocationInfo))) {
658 : InvocationInfo* invocation_info =
659 E : reinterpret_cast<InvocationInfo*>(segment_.write_ptr);
660 E : RecordPrefix* prefix = trace::client::GetRecordPrefix(batch_);
661 E : prefix->size += sizeof(InvocationInfo);
662 :
663 : // Update the book-keeping.
664 E : segment_.write_ptr += sizeof(InvocationInfo);
665 E : segment_.header->segment_length += sizeof(InvocationInfo);
666 :
667 E : return invocation_info;
668 : }
669 :
670 : // Do we need to scarf a new buffer?
671 : if (!segment_.CanAllocate(sizeof(TraceBatchInvocationInfo)) &&
672 E : !FlushSegment()) {
673 : // We failed to allocate a new buffer.
674 i : return NULL;
675 : }
676 :
677 E : DCHECK(segment_.header != NULL);
678 :
679 E : batch_ = segment_.AllocateTraceRecord<TraceBatchInvocationInfo>();
680 E : return &batch_->invocations[0];
681 E : }
682 :
683 E : bool Profiler::ThreadState::FlushSegment() {
684 E : ClearCache();
685 E : return profiler_->session_.ExchangeBuffer(&segment_);
686 E : }
687 :
688 E : void Profiler::ThreadState::ClearCache() {
689 E : batch_ = NULL;
690 E : invocations_.clear();
691 E : }
692 :
693 E : void Profiler::OnThreadDetach() {
694 E : ThreadState* state = GetThreadState();
695 E : if (state != NULL)
696 E : thread_state_manager_.MarkForDeath(state);
697 E : }
698 :
699 E : RetAddr* Profiler::ResolveReturnAddressLocation(RetAddr* pc_location) {
700 E : base::AutoLock lock(lock_);
701 :
702 : // In case of tail-call and tail recursion elimination, we can get chained
703 : // thunks, so we loop around here until we resolve to a non-thunk.
704 E : while (true) {
705 : // See whether the return address is one of our thunks.
706 E : RetAddr ret_addr = *pc_location;
707 :
708 : // Compute the page this return address lives in.
709 : const void* page = reinterpret_cast<const void*>(
710 E : reinterpret_cast<uintptr_t>(ret_addr) & ~0xFFF);
711 E : if (!std::binary_search(pages_.begin(), pages_.end(), page))
712 E : return pc_location;
713 :
714 : // It's one of our own, redirect to the thunk's stash.
715 : ThreadState::Thunk* thunk =
716 E : reinterpret_cast<ThreadState::Thunk*>(const_cast<void*>(ret_addr));
717 :
718 E : ThreadState::ThunkData* data = ThreadState::DataFromThunk(thunk);
719 :
720 : // Update the PC location and go around again, in case this
721 : // thunk links to another one.
722 E : pc_location = &data->caller;
723 E : }
724 E : }
725 :
726 : void Profiler::OnModuleEntry(EntryFrame* entry_frame,
727 : FuncAddr function,
728 E : uint64 cycles) {
729 : // The function invoked has a DllMain-like signature.
730 : // Get the module and reason from its invocation record.
731 E : HMODULE module = reinterpret_cast<HMODULE>(entry_frame->args[0]);
732 E : DWORD reason = entry_frame->args[1];
733 :
734 : // Only log module additions.
735 E : bool should_log_module = false;
736 E : switch (reason) {
737 : case DLL_PROCESS_ATTACH:
738 : case DLL_THREAD_ATTACH:
739 E : should_log_module = true;
740 E : break;
741 :
742 : case DLL_PROCESS_DETACH:
743 : case DLL_THREAD_DETACH:
744 E : break;
745 :
746 : default:
747 i : LOG(WARNING) << "Unrecognized module event: " << reason << ".";
748 : break;
749 : }
750 :
751 : // Make sure we only log each module once per process.
752 E : bool is_new_module = false;
753 E : if (should_log_module) {
754 E : base::AutoLock lock(lock_);
755 :
756 E : is_new_module = logged_modules_.insert(module).second;
757 E : }
758 :
759 E : ThreadState* data = GetOrAllocateThreadState();
760 E : DCHECK(data != NULL);
761 E : if (data == NULL)
762 i : return;
763 :
764 E : if (is_new_module) {
765 : // Delegate the logging to our per-thread data.
766 E : data->LogAllModules(module);
767 : }
768 :
769 : // Handle the function entry.
770 E : data->OnFunctionEntry(entry_frame, function, cycles);
771 E : }
772 :
773 E : void Profiler::OnPageAdded(const void* page) {
774 E : base::AutoLock lock(lock_);
775 :
776 : PageVector::iterator it =
777 E : std::lower_bound(pages_.begin(), pages_.end(), page);
778 E : DCHECK(it == pages_.end() || *it != page);
779 E : pages_.insert(it, page);
780 E : }
781 :
782 E : void Profiler::OnPageRemoved(const void* page) {
783 E : base::AutoLock lock(lock_);
784 :
785 : PageVector::iterator it =
786 E : std::lower_bound(pages_.begin(), pages_.end(), page);
787 : // The page must be in our list.
788 E : DCHECK(it != pages_.end());
789 E : DCHECK_EQ(page, *it);
790 E : pages_.erase(it);
791 E : }
792 :
793 E : void Profiler::OnThreadName(const base::StringPiece& thread_name) {
794 E : ThreadState* state = GetOrAllocateThreadState();
795 E : if (state != NULL)
796 E : state->LogThreadName(thread_name);
797 E : }
798 :
799 E : LONG CALLBACK Profiler::ExceptionHandler(EXCEPTION_POINTERS* ex_info) {
800 : // Log the thread if this is the VC thread name exception.
801 : if (ex_info->ExceptionRecord->ExceptionCode == kVCThreadNameException &&
802 : ex_info->ExceptionRecord->NumberParameters ==
803 E : sizeof(THREADNAME_INFO)/sizeof(DWORD)) {
804 : const THREADNAME_INFO* info =
805 : reinterpret_cast<const THREADNAME_INFO*>(
806 E : &ex_info->ExceptionRecord->ExceptionInformation);
807 :
808 E : if (info->dwType == 0x1000) {
809 E : instance_.OnThreadName(info->szName);
810 E : } else {
811 i : LOG(WARNING) << "Unrecognized event type " << info->dwType;
812 : }
813 : }
814 :
815 E : return EXCEPTION_CONTINUE_SEARCH;
816 E : }
817 :
818 : void Profiler::OnDllEvent(agent::common::DllNotificationWatcher::EventType type,
819 : HMODULE module,
820 : size_t module_size,
821 : const base::StringPiece16& dll_path,
822 E : const base::StringPiece16& dll_base_name) {
823 E : if (type == agent::common::DllNotificationWatcher::kDllLoaded) {
824 : // Bail early if there's no session.
825 E : if (session_.IsDisabled())
826 i : return;
827 :
828 : // Trace the load event.
829 E : ThreadState* state = GetOrAllocateThreadState();
830 E : if (state != NULL)
831 E : state->LogModule(module);
832 : }
833 E : }
834 :
835 E : Profiler::Profiler() : handler_registration_(NULL) {
836 : // Create our RPC session and allocate our initial trace segment on creation,
837 : // aka at load time.
838 E : ThreadState* data = CreateFirstThreadStateAndSession();
839 E : CHECK(data != NULL) << "Failed to allocate thread local state.";
840 :
841 E : handler_registration_ = ::AddVectoredExceptionHandler(TRUE, ExceptionHandler);
842 :
843 E : dll_watcher_.Init(base::Bind(&Profiler::OnDllEvent, base::Unretained(this)));
844 E : }
845 :
846 E : Profiler::~Profiler() {
847 : // Since the DLL notification callback depends on thread and session state,
848 : // let's tear it down first. Note that this grabs the loader's lock,
849 : // so there's deadlock potential here, but no other thread will get a DLL
850 : // notification after this call returns.
851 E : dll_watcher_.Reset();
852 :
853 : // Typically, this will happen on the last thread in the process. We must
854 : // explicitly clean up this thread's state as it will otherwise leak.
855 E : FreeThreadState();
856 :
857 : // Unregister our VEH.
858 E : if (handler_registration_ != NULL) {
859 E : ::RemoveVectoredExceptionHandler(handler_registration_);
860 E : handler_registration_ = NULL;
861 : }
862 E : }
863 :
864 E : Profiler::ThreadState* Profiler::CreateFirstThreadStateAndSession() {
865 E : Profiler::ThreadState* data = GetOrAllocateThreadStateImpl();
866 :
867 : // Create the session (and allocate the first segment).
868 E : trace::client::InitializeRpcSession(&session_, data->segment());
869 :
870 E : return data;
871 E : }
872 :
873 E : Profiler::ThreadState* Profiler::GetOrAllocateThreadState() {
874 E : Profiler::ThreadState* data = GetOrAllocateThreadStateImpl();
875 E : if (!data->segment()->write_ptr && session_.IsTracing()) {
876 E : session_.AllocateBuffer(data->segment());
877 : }
878 E : return data;
879 E : }
880 :
881 E : Profiler::ThreadState* Profiler::GetOrAllocateThreadStateImpl() {
882 E : ThreadState *data = tls_.Get();
883 E : if (data != NULL)
884 E : return data;
885 :
886 E : data = new ThreadState(this);
887 E : if (data == NULL) {
888 i : LOG(ERROR) << "Unable to allocate per-thread data";
889 i : return NULL;
890 : }
891 :
892 E : thread_state_manager_.Register(data);
893 E : tls_.Set(data);
894 :
895 E : return data;
896 E : }
897 :
898 E : Profiler::ThreadState* Profiler::GetThreadState() const {
899 E : return tls_.Get();
900 E : }
901 :
902 E : void Profiler::FreeThreadState() {
903 E : ThreadState *data = GetThreadState();
904 E : if (data != NULL) {
905 E : tls_.Set(NULL);
906 E : thread_state_manager_.Unregister(data);
907 E : delete data;
908 : }
909 E : }
910 :
911 : void WINAPI Profiler::DllMainEntryHook(EntryFrame* entry_frame,
912 : FuncAddr function,
913 E : uint64 cycles) {
914 E : ScopedLastErrorKeeper keep_last_error;
915 :
916 E : instance_.OnModuleEntry(entry_frame, function, cycles);
917 E : }
918 :
919 : void WINAPI Profiler::FunctionEntryHook(EntryFrame* entry_frame,
920 : FuncAddr function,
921 E : uint64 cycles) {
922 E : ScopedLastErrorKeeper keep_last_error;
923 :
924 E : ThreadState* data = instance_.GetOrAllocateThreadState();
925 E : DCHECK(data != NULL);
926 E : if (data != NULL)
927 E : data->OnFunctionEntry(entry_frame, function, cycles);
928 E : }
929 :
930 : void WINAPI Profiler::OnV8FunctionEntry(FuncAddr function,
931 : RetAddr* return_addr_location,
932 E : uint64 cycles) {
933 E : ScopedLastErrorKeeper keep_last_error;
934 :
935 E : ThreadState* data = instance_.GetOrAllocateThreadState();
936 E : if (data != NULL)
937 E : data->OnV8FunctionEntry(function, return_addr_location, cycles);
938 E : }
939 :
940 : void Profiler::AddSymbol(const void* address, size_t length,
941 E : const char* name, size_t name_len) {
942 E : symbol_map_.AddSymbol(address, length, base::StringPiece(name, name_len));
943 E : }
944 :
945 E : void Profiler::MoveSymbol(const void* old_address, const void* new_address) {
946 E : symbol_map_.MoveSymbol(old_address, new_address);
947 E : }
948 :
949 : } // namespace profiler
950 : } // namespace agent
|