1 : // Copyright 2012 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 : //
15 : // Implementation of the profiler DLL.
16 : #include "syzygy/agent/profiler/profiler.h"
17 :
18 : #include <windows.h>
19 : #include <algorithm>
20 :
21 : #include "base/at_exit.h"
22 : #include "base/command_line.h"
23 : #include "base/environment.h"
24 : #include "base/lazy_instance.h"
25 : #include "base/logging.h"
26 : #include "base/string_util.h"
27 : #include "base/utf_string_conversions.h"
28 : #include "base/memory/scoped_ptr.h"
29 : #include "base/win/pe_image.h"
30 : #include "base/win/scoped_handle.h"
31 : #include "syzygy/agent/common/dlist.h"
32 : #include "syzygy/agent/common/process_utils.h"
33 : #include "syzygy/agent/common/scoped_last_error_keeper.h"
34 : #include "syzygy/agent/profiler/return_thunk_factory.h"
35 : #include "syzygy/common/logging.h"
36 : #include "syzygy/trace/client/client_utils.h"
37 : #include "syzygy/trace/protocol/call_trace_defs.h"
38 :
39 : namespace {
40 :
41 : using agent::common::ScopedLastErrorKeeper;
42 :
43 : // All tracing runs through this object.
44 : base::LazyInstance<agent::profiler::Profiler> static_profiler_instance =
45 : LAZY_INSTANCE_INITIALIZER;
46 :
47 : typedef std::pair<RetAddr, FuncAddr> InvocationKey;
48 :
49 : class HashInvocationKey {
50 : public:
51 : static const size_t bucket_size = 4;
52 : static const size_t min_buckets = 8;
53 :
54 E : size_t operator()(const InvocationKey& key) const {
55 : return reinterpret_cast<size_t>(key.first) ^
56 E : reinterpret_cast<size_t>(key.second);
57 E : }
58 :
59 E : bool operator()(const InvocationKey& a, const InvocationKey& b) const {
60 E : return a < b;
61 E : }
62 : };
63 :
64 : using agent::profiler::SymbolMap;
65 :
66 : struct InvocationValue {
67 : // This invocation entry's caller's dynamic symbol, if any.
68 : scoped_refptr<SymbolMap::Symbol> caller_symbol;
69 : // The last observed move count for caller_symbol.
70 : int32 caller_move_count;
71 :
72 : // This invocation entry's callee's dynamic symbol, if any.
73 : scoped_refptr<SymbolMap::Symbol> function_symbol;
74 : // The last observed move count for function_symbol.
75 : int32 function_move_count;
76 :
77 : // Points to the trace buffer entry for the respective function.
78 : InvocationInfo* info;
79 : };
80 :
81 : typedef base::hash_map<
82 : InvocationKey, InvocationValue, HashInvocationKey> InvocationMap;
83 :
84 :
85 : // The information on how to set the thread name comes from
86 : // a MSDN article: http://msdn2.microsoft.com/en-us/library/xcb2z8hs.aspx
87 : const DWORD kVCThreadNameException = 0x406D1388;
88 :
89 : typedef struct tagTHREADNAME_INFO {
90 : DWORD dwType; // Must be 0x1000.
91 : LPCSTR szName; // Pointer to name (in user addr space).
92 : DWORD dwThreadID; // Thread ID (-1=caller thread).
93 : DWORD dwFlags; // Reserved for future use, must be zero.
94 : } THREADNAME_INFO;
95 :
96 : } // namespace
97 :
98 : // See client.cc for a description of the unconventional
99 : // calling conventions for this function.
100 i : extern "C" void __declspec(naked) _indirect_penter() {
101 : __asm {
102 : // Stash volatile registers.
103 i : push eax
104 i : push edx
105 :
106 : // Get the current cycle time ASAP.
107 i : rdtsc
108 :
109 : // Save the value of eax for later, we need the register to stash the flags.
110 i : push ecx
111 i : mov ecx, eax
112 :
113 : // Save the low byte of the flags into AH.
114 i : lahf
115 : // Save the overflow flag into AL.
116 i : seto al
117 :
118 : // Stash the flags to stack.
119 i : push eax
120 :
121 : // Push the cycle time arg.
122 i : push edx
123 i : push ecx
124 :
125 : // Retrieve the original function address, pushed by our caller.
126 i : mov eax, DWORD PTR[esp + 0x18]
127 i : push eax
128 :
129 : // Calculate the position of the return address on stack, and
130 : // push it. This becomes the EntryFrame argument.
131 i : lea eax, DWORD PTR[esp + 0x20]
132 i : push eax
133 i : call agent::profiler::Profiler::FunctionEntryHook
134 :
135 : // Restore volatile registers.
136 i : pop eax
137 : // AL is set to 1 if the overflow flag was set before the call to
138 : // our hook, 0 otherwise. We add 0x7f to it so it'll restore the
139 : // flag.
140 i : add al, 0x7f
141 : // Restore the low byte of the flags.
142 i : sahf
143 :
144 i : pop ecx
145 i : pop edx
146 i : pop eax
147 :
148 : // Return to the address pushed by our caller.
149 i : ret
150 : }
151 : }
152 :
153 i : extern "C" void __declspec(naked) _indirect_penter_dllmain() {
154 : __asm {
155 : // Stash volatile registers.
156 i : push eax
157 i : push edx
158 :
159 : // Get the current cycle time ASAP.
160 i : rdtsc
161 :
162 : // Save the value of eax for later, we need the register to stash the flags.
163 i : push ecx
164 i : mov ecx, eax
165 :
166 : // Save the low byte of the flags into AH.
167 i : lahf
168 : // Save the overflow flag into AL.
169 i : seto al
170 :
171 : // Stash the flags to stack.
172 i : push eax
173 :
174 : // Push the cycle time arg.
175 i : push edx
176 i : push ecx
177 :
178 : // Retrieve the original function address, pushed by our caller.
179 i : mov eax, DWORD PTR[esp + 0x18]
180 i : push eax
181 :
182 : // Calculate the position of the return address on stack, and
183 : // push it. This becomes the EntryFrame argument.
184 i : lea eax, DWORD PTR[esp + 0x20]
185 i : push eax
186 i : call agent::profiler::Profiler::DllMainEntryHook
187 :
188 : // Restore volatile registers.
189 i : pop eax
190 : // AL is set to 1 if the overflow flag was set before the call to
191 : // our hook, 0 otherwise. We add 0x7f to it so it'll restore the
192 : // flag.
193 i : add al, 0x7f
194 : // Restore the low byte of the flags.
195 i : sahf
196 :
197 i : pop ecx
198 i : pop edx
199 i : pop eax
200 :
201 : // Return to the address pushed by our caller.
202 i : ret
203 : }
204 : }
205 :
206 : // On entry, pc_location should point to a location on our own stack.
207 : extern "C" uintptr_t __cdecl ResolveReturnAddressLocation(
208 E : uintptr_t pc_location) {
209 : using agent::profiler::Profiler;
210 E : Profiler* profiler = Profiler::Instance();
211 : return reinterpret_cast<uintptr_t>(
212 : profiler->ResolveReturnAddressLocation(
213 E : reinterpret_cast<RetAddr*>(pc_location)));
214 E : }
215 :
216 : // This function needs to match the declaration of FunctionEntryHook in the V8
217 : // API. See http://v8.googlecode.com/svn/trunk/include/v8.h.
218 : extern "C" __declspec(naked) void __cdecl OnDynamicFunctionEntry(
219 i : uintptr_t function, uintptr_t return_addr_location) {
220 : __asm {
221 : // Grab the current time ASAP.
222 i : rdtsc
223 :
224 : // Push the cycle time arg.
225 i : push edx
226 i : push eax
227 :
228 : // Duplicate the function and return_addr_location arguments.
229 i : push DWORD PTR[esp + 0x10]
230 i : push DWORD PTR[esp + 0x10]
231 :
232 i : call agent::profiler::Profiler::OnV8FunctionEntry
233 :
234 i : ret
235 : }
236 : }
237 :
238 E : BOOL WINAPI DllMain(HMODULE instance, DWORD reason, LPVOID reserved) {
239 : using agent::profiler::Profiler;
240 :
241 : // Our AtExit manager required by base.
242 : static base::AtExitManager* at_exit = NULL;
243 :
244 E : switch (reason) {
245 : case DLL_PROCESS_ATTACH:
246 E : DCHECK(at_exit == NULL);
247 E : at_exit = new base::AtExitManager();
248 :
249 E : CommandLine::Init(0, NULL);
250 E : common::InitLoggingForDll(L"profiler");
251 E : break;
252 :
253 : case DLL_THREAD_DETACH:
254 E : Profiler::Instance()->OnThreadDetach();
255 E : break;
256 :
257 : case DLL_PROCESS_DETACH:
258 E : DCHECK(at_exit != NULL);
259 E : delete at_exit;
260 E : at_exit = NULL;
261 : break;
262 :
263 : default:
264 : break;
265 : }
266 :
267 E : return TRUE;
268 E : }
269 :
270 : void __cdecl AddDynamicSymbol(const void* address, size_t length,
271 E : const char* name, size_t name_len) {
272 : using agent::profiler::Profiler;
273 :
274 E : Profiler* profiler = Profiler::Instance();
275 E : profiler->AddSymbol(address, length, name, name_len);
276 E : }
277 :
278 : void __cdecl MoveDynamicSymbol(const void* old_address,
279 E : const void* new_address) {
280 : using agent::profiler::Profiler;
281 :
282 E : Profiler* profiler = Profiler::Instance();
283 E : profiler->MoveSymbol(old_address, new_address);
284 E : }
285 :
286 : namespace agent {
287 : namespace profiler {
288 :
289 : class Profiler::ThreadState
290 : : public ReturnThunkFactoryImpl<Profiler::ThreadState>,
291 : public agent::common::ThreadStateBase {
292 : public:
293 : explicit ThreadState(Profiler* profiler);
294 : ~ThreadState();
295 :
296 : // Logs @p module and all other modules in the process, then flushes
297 : // the current trace buffer.
298 : void LogAllModules(HMODULE module);
299 :
300 : // Logs @p module.
301 : void LogModule(HMODULE module);
302 :
303 : // Logs @p thread_name as the current thread's name.
304 : void LogThreadName(const base::StringPiece& thread_name);
305 :
306 : // Logs @p symbol into the trace.
307 : void LogSymbol(SymbolMap::Symbol* symbol);
308 :
309 : // Processes a single function entry.
310 : void OnFunctionEntry(EntryFrame* entry_frame,
311 : FuncAddr function,
312 : uint64 cycles);
313 :
314 : // Processes a single V8 function entry.
315 : void OnV8FunctionEntry(FuncAddr function,
316 : RetAddr* return_address_location,
317 : uint64 cycles);
318 :
319 : // @name Callback notification implementation.
320 : // @{
321 : virtual void OnPageAdded(const void* page) OVERRIDE;
322 : virtual void OnPageRemoved(const void* page) OVERRIDE;
323 : // @}
324 :
325 : // Function exit hook.
326 : void OnFunctionExit(const ThunkData* data, uint64 cycles_exit);
327 :
328 E : trace::client::TraceFileSegment* segment() { return &segment_; }
329 :
330 : private:
331 : friend class Profiler;
332 :
333 : void RecordInvocation(RetAddr caller,
334 : FuncAddr function,
335 : uint64 cycles);
336 :
337 : void UpdateOverhead(uint64 entry_cycles);
338 : InvocationInfo* AllocateInvocationInfo();
339 : void ClearCache();
340 : bool FlushSegment();
341 :
342 : // The profiler we're attached to.
343 : Profiler* profiler_;
344 :
345 : // We keep a running tally of the rough amount of wall clock cycles spent
346 : // inside the profiler. We then subtract the profiler's overhead from the
347 : // wall clock cycle timer on each measurement. This results in a timer that
348 : // measures time exclusive of profiling overhead.
349 : uint64 cycles_overhead_;
350 :
351 : // The invocations we've recorded in our buffer.
352 : InvocationMap invocations_;
353 :
354 : // The trace file segment we're recording to.
355 : trace::client::TraceFileSegment segment_;
356 :
357 : // The current batch record we're writing to, if any.
358 : TraceBatchInvocationInfo* batch_;
359 :
360 : // The set of modules we've logged.
361 : ModuleSet logged_modules_;
362 : };
363 :
364 : Profiler::ThreadState::ThreadState(Profiler* profiler)
365 : : profiler_(profiler),
366 : cycles_overhead_(0LL),
367 E : batch_(NULL) {
368 E : Initialize();
369 E : }
370 :
371 E : Profiler::ThreadState::~ThreadState() {
372 E : ClearCache();
373 :
374 : // If we have an outstanding buffer, let's deallocate it now.
375 E : if (segment_.write_ptr != NULL)
376 E : profiler_->session_.ReturnBuffer(&segment_);
377 :
378 E : Uninitialize();
379 E : }
380 :
381 E : void Profiler::ThreadState::LogAllModules(HMODULE module) {
382 : // Bail early if we're disabled.
383 E : if (profiler_->session_.IsDisabled())
384 E : return;
385 :
386 E : agent::common::ModuleVector modules;
387 E : agent::common::GetProcessModules(&modules);
388 :
389 : // Our module should be in the process modules.
390 E : DCHECK(std::find(modules.begin(), modules.end(), module) != modules.end());
391 :
392 E : for (size_t i = 0; i < modules.size(); ++i) {
393 E : DCHECK(modules[i] != NULL);
394 E : LogModule(modules[i]);
395 E : }
396 :
397 : // We need to flush module events right away, so that the module is
398 : // defined in the trace file before events using that module start to
399 : // occur (in another thread).
400 E : FlushSegment();
401 E : }
402 :
403 E : void Profiler::ThreadState::LogModule(HMODULE module) {
404 : // This may flush our buffer, so let's clear our cache.
405 E : ClearCache();
406 E : agent::common::LogModule(module, &profiler_->session_, &segment_);
407 E : }
408 :
409 : void Profiler::ThreadState::LogThreadName(
410 E : const base::StringPiece& thread_name) {
411 E : if (thread_name.empty())
412 i : return;
413 :
414 : // Make sure the event we're about to write will fit.
415 E : if (!segment_.CanAllocate(thread_name.size() + 1) || !FlushSegment()) {
416 : // Failed to allocate a new segment.
417 i : return;
418 : }
419 :
420 E : DCHECK(segment_.CanAllocate(thread_name.size() + 1));
421 E : batch_ = NULL;
422 :
423 : // Allocate a record in the log.
424 : TraceThreadNameInfo* thread_name_event =
425 : reinterpret_cast<TraceThreadNameInfo*>(
426 : segment_.AllocateTraceRecordImpl(
427 E : TRACE_THREAD_NAME, thread_name.size() + 1));
428 E : DCHECK(thread_name_event != NULL);
429 : base::strlcpy(thread_name_event->thread_name,
430 E : thread_name.data(), thread_name.size() + 1);
431 E : }
432 :
433 E : void Profiler::ThreadState::LogSymbol(SymbolMap::Symbol* symbol) {
434 E : DCHECK(symbol != NULL);
435 E : DCHECK_NE(0, symbol->id());
436 :
437 : size_t symbol_size =
438 E : FIELD_OFFSET(TraceDynamicSymbol, symbol_name) + symbol->name().size() + 1;
439 :
440 E : if (!segment_.CanAllocate(symbol_size) || !FlushSegment()) {
441 : // Failed to allocate the symbol record.
442 i : return;
443 : }
444 :
445 E : DCHECK(segment_.CanAllocate(symbol_size));
446 E : batch_ = NULL;
447 :
448 : // Allocate a record in the log.
449 : TraceDynamicSymbol* dynamic_symbol_event =
450 : reinterpret_cast<TraceDynamicSymbol*>(
451 : segment_.AllocateTraceRecordImpl(
452 E : TRACE_DYNAMIC_SYMBOL, symbol_size));
453 E : DCHECK(dynamic_symbol_event != NULL);
454 E : dynamic_symbol_event->symbol_id = symbol->id();
455 : base::strlcpy(dynamic_symbol_event->symbol_name,
456 E : symbol->name().data(), symbol->name().size() + 1);
457 E : }
458 :
459 : void Profiler::ThreadState::OnFunctionEntry(EntryFrame* entry_frame,
460 : FuncAddr function,
461 E : uint64 cycles) {
462 E : if (profiler_->session_.IsDisabled())
463 E : return;
464 :
465 : // Record the details of the entry.
466 : // Note that on tail-recursion and tail-call elimination, the caller recorded
467 : // here will be a thunk. We cater for this case on exit as best we can.
468 E : ThunkData* data = MakeThunk(entry_frame->retaddr);
469 E : DCHECK(data != NULL);
470 E : data->caller = entry_frame->retaddr;
471 E : data->function = function;
472 E : data->cycles_entry = cycles - cycles_overhead_;
473 :
474 E : entry_frame->retaddr = data->thunk;
475 :
476 E : UpdateOverhead(cycles);
477 E : }
478 :
479 : void Profiler::ThreadState::OnV8FunctionEntry(FuncAddr function,
480 : RetAddr* return_address_location,
481 E : uint64 cycles) {
482 E : if (profiler_->session_.IsDisabled())
483 i : return;
484 :
485 : // Record the details of the entry.
486 :
487 : // TODO(siggi): Note that we want to do different exit processing here,
488 : // as we know "function" is a dynamic symbol - it might be better to
489 : // record the fact here than to force a lookup on RecordInvocation.
490 :
491 : // Note that on tail-recursion and tail-call elimination, the caller recorded
492 : // here will be a thunk. We cater for this case on exit as best we can.
493 E : ThunkData* data = MakeThunk(*return_address_location);
494 E : DCHECK(data != NULL);
495 E : data->caller = *return_address_location;
496 E : data->function = function;
497 E : data->cycles_entry = cycles - cycles_overhead_;
498 :
499 E : *return_address_location = data->thunk;
500 :
501 E : UpdateOverhead(cycles);
502 E : }
503 :
504 : void Profiler::ThreadState::OnFunctionExit(const ThunkData* data,
505 E : uint64 cycles_exit) {
506 : // Calculate the number of cycles in the invocation, exclusive our overhead.
507 E : uint64 cycles_executed = cycles_exit - cycles_overhead_ - data->cycles_entry;
508 :
509 : // See if the return address resolves to a thunk, which indicates
510 : // tail recursion or tail call elimination. In that case we record the
511 : // calling function as caller, which isn't totally accurate as that'll
512 : // attribute the cost to the first line of the calling function. In the
513 : // absence of more information, it's the best we can do, however.
514 : // TODO(siggi): Move this into RecordInvocation, as we can elide the lookup
515 : // on a cache hit.
516 E : Thunk* ret_thunk = CastToThunk(data->caller);
517 E : if (ret_thunk == NULL) {
518 E : RecordInvocation(data->caller, data->function, cycles_executed);
519 E : } else {
520 E : ThunkData* ret_data = DataFromThunk(ret_thunk);
521 E : RecordInvocation(ret_data->function, data->function, cycles_executed);
522 : }
523 :
524 E : UpdateOverhead(cycles_exit);
525 E : }
526 :
527 E : void Profiler::ThreadState::OnPageAdded(const void* page) {
528 E : profiler_->OnPageAdded(page);
529 E : }
530 :
531 E : void Profiler::ThreadState::OnPageRemoved(const void* page) {
532 E : profiler_->OnPageRemoved(page);
533 E : }
534 :
535 : void Profiler::ThreadState::RecordInvocation(RetAddr caller,
536 : FuncAddr function,
537 E : uint64 duration_cycles) {
538 : // See whether we've already recorded an entry for this function.
539 E : InvocationKey key(caller, function);
540 E : InvocationMap::iterator it = invocations_.find(key);
541 E : if (it != invocations_.end()) {
542 : // Yup, we already have an entry, validate it.
543 E : InvocationValue& value = it->second;
544 :
545 : if ((value.caller_symbol == NULL ||
546 : value.caller_symbol->move_count() == value.caller_move_count) &&
547 : (value.function_symbol == NULL ||
548 E : value.function_symbol->move_count() == value.function_move_count)) {
549 : // The entry is still good, tally the new data.
550 E : ++(value.info->num_calls);
551 E : value.info->cycles_sum += duration_cycles;
552 E : if (duration_cycles < value.info->cycles_min) {
553 E : value.info->cycles_min = duration_cycles;
554 E : } else if (duration_cycles > value.info->cycles_max) {
555 E : value.info->cycles_max = duration_cycles;
556 : }
557 :
558 : // Early out on success.
559 E : return;
560 i : } else {
561 : // The entry is not valid any more, discard it.
562 E : DCHECK(value.caller_symbol != NULL || value.function_symbol != NULL);
563 :
564 E : invocations_.erase(it);
565 : }
566 : }
567 E : DCHECK(invocations_.find(key) == invocations_.end());
568 :
569 : // We don't have an entry, allocate a new one for this invocation.
570 : // The code below may touch last error.
571 E : ScopedLastErrorKeeper keep_last_error;
572 :
573 : scoped_refptr<SymbolMap::Symbol> caller_symbol =
574 E : profiler_->symbol_map_.FindSymbol(caller);
575 :
576 : // TODO(siggi): This can perhaps be optimized by keeping track of which
577 : // entry hook was invoked. This will however require setting an extra
578 : // bool on every entry, so will require measurement to see whether it's
579 : // a win.
580 : scoped_refptr<SymbolMap::Symbol> function_symbol =
581 E : profiler_->symbol_map_.FindSymbol(function);
582 :
583 : // Trace the symbols if this is the first time either one is observed.
584 E : if (caller_symbol != NULL && caller_symbol->EnsureHasId()) {
585 : // TODO(siggi): It might lead to worst-case behavior to log symbols into
586 : // the same trace buffer as we store invocations, as we're likely to
587 : // alternate symbols and single-entry invocation batches. Fixme.
588 E : LogSymbol(caller_symbol);
589 : }
590 E : if (function_symbol != NULL && function_symbol->EnsureHasId()) {
591 : // TODO(siggi): See above.
592 E : LogSymbol(function_symbol);
593 : }
594 :
595 E : InvocationInfo* info = AllocateInvocationInfo();
596 E : if (info != NULL) {
597 E : InvocationValue& value = invocations_[key];
598 E : value.info = info;
599 E : value.caller_symbol = caller_symbol;
600 E : if (caller_symbol != NULL)
601 E : value.caller_move_count = caller_symbol->move_count();
602 E : else
603 E : value.caller_move_count = 0;
604 :
605 E : value.function_symbol = function_symbol;
606 E : if (function_symbol != NULL)
607 E : value.function_move_count = function_symbol->move_count();
608 E : else
609 E : value.function_move_count = 0;
610 :
611 E : if (function_symbol == NULL) {
612 : // We're not in a dynamic function, record the (conventional) function.
613 E : info->function = function;
614 E : info->flags = 0;
615 E : } else {
616 : // We're in a dynamic function symbol, record the details.
617 E : DCHECK(function_symbol->id() != 0);
618 :
619 E : info->function_symbol_id = function_symbol->id();
620 E : info->flags = kFunctionIsSymbol;
621 : }
622 :
623 E : if (caller_symbol == NULL) {
624 : // We're not in a dynamic caller_symbol, record the (conventional) caller.
625 E : info->caller = caller;
626 E : info->caller_offset = 0;
627 E : } else {
628 : // We're in a dynamic caller_symbol, record the details.
629 E : DCHECK(caller_symbol->id() != 0);
630 :
631 E : info->caller_symbol_id = caller_symbol->id();
632 E : info->flags |= kCallerIsSymbol;
633 : info->caller_offset = reinterpret_cast<const uint8*>(caller) -
634 E : reinterpret_cast<const uint8*>(caller_symbol->address());
635 : }
636 :
637 E : info->num_calls = 1;
638 E : info->cycles_min = info->cycles_max = info->cycles_sum = duration_cycles;
639 : }
640 E : }
641 :
642 E : void Profiler::ThreadState::UpdateOverhead(uint64 entry_cycles) {
643 : // TODO(siggi): Measure the fixed overhead on setup,
644 : // then add it on every update.
645 E : cycles_overhead_ += (__rdtsc() - entry_cycles);
646 E : }
647 :
648 E : InvocationInfo* Profiler::ThreadState::AllocateInvocationInfo() {
649 : // This is kind of self-evident for the moment, as an invocation info batch
650 : // contains at least one invocation info as currently declared.
651 : // If this fails, please reconsider your implementation, or else revisit
652 : // the allocation code below.
653 : COMPILE_ASSERT(sizeof(TraceBatchInvocationInfo) >= sizeof(InvocationInfo),
654 : invocation_info_batch_must_be_larger_than_invocation_info);
655 :
656 : // Do we have a record that we can grow?
657 E : if (batch_ != NULL && segment_.CanAllocateRaw(sizeof(InvocationInfo))) {
658 : InvocationInfo* invocation_info =
659 E : reinterpret_cast<InvocationInfo*>(segment_.write_ptr);
660 E : RecordPrefix* prefix = trace::client::GetRecordPrefix(batch_);
661 E : prefix->size += sizeof(InvocationInfo);
662 :
663 : // Update the book-keeping.
664 E : segment_.write_ptr += sizeof(InvocationInfo);
665 E : segment_.header->segment_length += sizeof(InvocationInfo);
666 :
667 E : return invocation_info;
668 : }
669 :
670 : // Do we need to scarf a new buffer?
671 : if (!segment_.CanAllocate(sizeof(TraceBatchInvocationInfo)) &&
672 E : !FlushSegment()) {
673 : // We failed to allocate a new buffer.
674 i : return NULL;
675 : }
676 :
677 E : DCHECK(segment_.header != NULL);
678 :
679 E : batch_ = segment_.AllocateTraceRecord<TraceBatchInvocationInfo>();
680 E : return &batch_->invocations[0];
681 E : }
682 :
683 E : bool Profiler::ThreadState::FlushSegment() {
684 E : ClearCache();
685 E : return profiler_->session_.ExchangeBuffer(&segment_);
686 E : }
687 :
688 E : void Profiler::ThreadState::ClearCache() {
689 E : batch_ = NULL;
690 E : invocations_.clear();
691 E : }
692 :
693 E : void Profiler::OnThreadDetach() {
694 E : ThreadState* state = GetThreadState();
695 E : if (state != NULL)
696 E : thread_state_manager_.MarkForDeath(state);
697 E : }
698 :
699 E : RetAddr* Profiler::ResolveReturnAddressLocation(RetAddr* pc_location) {
700 E : base::AutoLock lock(lock_);
701 :
702 : // In case of tail-call and tail recursion elimination, we can get chained
703 : // thunks, so we loop around here until we resolve to a non-thunk.
704 E : while (true) {
705 : // See whether the return address is one of our thunks.
706 E : RetAddr ret_addr = *pc_location;
707 :
708 : // Compute the page this return address lives in.
709 : const void* page = reinterpret_cast<const void*>(
710 E : reinterpret_cast<uintptr_t>(ret_addr) & ~0xFFF);
711 E : if (!std::binary_search(pages_.begin(), pages_.end(), page))
712 E : return pc_location;
713 :
714 : // It's one of our own, redirect to the thunk's stash.
715 : ThreadState::Thunk* thunk =
716 E : reinterpret_cast<ThreadState::Thunk*>(const_cast<void*>(ret_addr));
717 :
718 E : ThreadState::ThunkData* data = ThreadState::DataFromThunk(thunk);
719 :
720 : // Update the PC location and go around again, in case this
721 : // thunk links to another one.
722 E : pc_location = &data->caller;
723 E : }
724 E : }
725 :
726 : void Profiler::OnModuleEntry(EntryFrame* entry_frame,
727 : FuncAddr function,
728 E : uint64 cycles) {
729 : // The function invoked has a DllMain-like signature.
730 : // Get the module and reason from its invocation record.
731 E : HMODULE module = reinterpret_cast<HMODULE>(entry_frame->args[0]);
732 E : DWORD reason = entry_frame->args[1];
733 :
734 : // Only log module additions.
735 E : bool should_log_module = false;
736 E : switch (reason) {
737 : case DLL_PROCESS_ATTACH:
738 : case DLL_THREAD_ATTACH:
739 E : should_log_module = true;
740 E : break;
741 :
742 : case DLL_PROCESS_DETACH:
743 : case DLL_THREAD_DETACH:
744 E : break;
745 :
746 : default:
747 i : LOG(WARNING) << "Unrecognized module event: " << reason << ".";
748 : break;
749 : }
750 :
751 : // Make sure we only log each module once per process.
752 E : bool is_new_module = false;
753 E : if (should_log_module) {
754 E : base::AutoLock lock(lock_);
755 :
756 E : is_new_module = logged_modules_.insert(module).second;
757 E : }
758 :
759 E : ThreadState* data = GetOrAllocateThreadState();
760 E : DCHECK(data != NULL);
761 E : if (data == NULL)
762 i : return;
763 :
764 E : if (is_new_module) {
765 : // Delegate the logging to our per-thread data.
766 E : data->LogAllModules(module);
767 : }
768 :
769 : // Handle the function entry.
770 E : data->OnFunctionEntry(entry_frame, function, cycles);
771 E : }
772 :
773 E : void Profiler::OnPageAdded(const void* page) {
774 E : base::AutoLock lock(lock_);
775 :
776 : PageVector::iterator it =
777 E : std::lower_bound(pages_.begin(), pages_.end(), page);
778 E : DCHECK(it == pages_.end() || *it != page);
779 E : pages_.insert(it, page);
780 E : }
781 :
782 E : void Profiler::OnPageRemoved(const void* page) {
783 E : base::AutoLock lock(lock_);
784 :
785 : PageVector::iterator it =
786 E : std::lower_bound(pages_.begin(), pages_.end(), page);
787 : // The page must be in our list.
788 E : DCHECK(it != pages_.end());
789 E : DCHECK_EQ(page, *it);
790 E : pages_.erase(it);
791 E : }
792 :
793 E : void Profiler::OnThreadName(const base::StringPiece& thread_name) {
794 E : ThreadState* state = GetOrAllocateThreadState();
795 E : if (state != NULL)
796 E : state->LogThreadName(thread_name);
797 E : }
798 :
799 E : LONG CALLBACK Profiler::ExceptionHandler(EXCEPTION_POINTERS* ex_info) {
800 : // Log the thread if this is the VC thread name exception.
801 : if (ex_info->ExceptionRecord->ExceptionCode == kVCThreadNameException &&
802 : ex_info->ExceptionRecord->NumberParameters ==
803 E : sizeof(THREADNAME_INFO)/sizeof(DWORD)) {
804 : const THREADNAME_INFO* info =
805 : reinterpret_cast<const THREADNAME_INFO*>(
806 E : &ex_info->ExceptionRecord->ExceptionInformation);
807 :
808 E : if (info->dwType == 0x1000) {
809 E : Profiler* instance = Profiler::Instance();
810 E : if (instance != NULL)
811 E : instance->OnThreadName(info->szName);
812 E : } else {
813 i : LOG(WARNING) << "Unrecognised event type " << info->dwType;
814 : }
815 : }
816 :
817 E : return EXCEPTION_CONTINUE_SEARCH;
818 E : }
819 :
820 :
821 E : Profiler* Profiler::Instance() {
822 E : return static_profiler_instance.Pointer();
823 E : }
824 :
825 E : Profiler::Profiler() : handler_registration_(NULL) {
826 : // Create our RPC session and allocate our initial trace segment on first use.
827 E : ThreadState* data = CreateFirstThreadStateAndSession();
828 E : CHECK(data != NULL) << "Failed to allocate thread local state.";
829 :
830 E : handler_registration_ = ::AddVectoredExceptionHandler(TRUE, ExceptionHandler);
831 E : }
832 :
833 E : Profiler::~Profiler() {
834 : // Typically, this will happen on the last thread in the process. We must
835 : // explicitly clean up this thread's state as it will otherwise leak.
836 E : FreeThreadState();
837 :
838 : // Unregister our VEH.
839 E : if (handler_registration_ != NULL) {
840 E : ::RemoveVectoredExceptionHandler(handler_registration_);
841 E : handler_registration_ = NULL;
842 : }
843 E : }
844 :
845 E : Profiler::ThreadState* Profiler::CreateFirstThreadStateAndSession() {
846 E : Profiler::ThreadState* data = GetOrAllocateThreadStateImpl();
847 :
848 : // Create the session (and allocate the first segment).
849 E : trace::client::InitializeRpcSession(&session_, data->segment());
850 :
851 E : return data;
852 E : }
853 :
854 E : Profiler::ThreadState* Profiler::GetOrAllocateThreadState() {
855 E : Profiler::ThreadState* data = GetOrAllocateThreadStateImpl();
856 E : if (!data->segment()->write_ptr && session_.IsTracing()) {
857 E : session_.AllocateBuffer(data->segment());
858 : }
859 E : return data;
860 E : }
861 :
862 E : Profiler::ThreadState* Profiler::GetOrAllocateThreadStateImpl() {
863 E : ThreadState *data = tls_.Get();
864 E : if (data != NULL)
865 E : return data;
866 :
867 E : data = new ThreadState(this);
868 E : if (data == NULL) {
869 i : LOG(ERROR) << "Unable to allocate per-thread data";
870 i : return NULL;
871 : }
872 :
873 E : thread_state_manager_.Register(data);
874 E : tls_.Set(data);
875 :
876 E : return data;
877 E : }
878 :
879 E : Profiler::ThreadState* Profiler::GetThreadState() const {
880 E : return tls_.Get();
881 E : }
882 :
883 E : void Profiler::FreeThreadState() {
884 E : ThreadState *data = GetThreadState();
885 E : if (data != NULL) {
886 E : tls_.Set(NULL);
887 E : thread_state_manager_.Unregister(data);
888 E : delete data;
889 : }
890 E : }
891 :
892 : void WINAPI Profiler::DllMainEntryHook(EntryFrame* entry_frame,
893 : FuncAddr function,
894 E : uint64 cycles) {
895 E : ScopedLastErrorKeeper keep_last_error;
896 :
897 E : Profiler* profiler = Profiler::Instance();
898 E : profiler->OnModuleEntry(entry_frame, function, cycles);
899 E : }
900 :
901 : void WINAPI Profiler::FunctionEntryHook(EntryFrame* entry_frame,
902 : FuncAddr function,
903 E : uint64 cycles) {
904 E : ScopedLastErrorKeeper keep_last_error;
905 :
906 E : Profiler* profiler = Profiler::Instance();
907 E : ThreadState* data = profiler->GetOrAllocateThreadState();
908 E : DCHECK(data != NULL);
909 E : if (data != NULL)
910 E : data->OnFunctionEntry(entry_frame, function, cycles);
911 E : }
912 :
913 : void WINAPI Profiler::OnV8FunctionEntry(FuncAddr function,
914 : RetAddr* return_addr_location,
915 E : uint64 cycles) {
916 E : ScopedLastErrorKeeper keep_last_error;
917 :
918 E : Profiler* profiler = Profiler::Instance();
919 E : ThreadState* data = profiler->GetOrAllocateThreadState();
920 E : if (data != NULL)
921 E : data->OnV8FunctionEntry(function, return_addr_location, cycles);
922 E : }
923 :
924 : void Profiler::AddSymbol(const void* address, size_t length,
925 E : const char* name, size_t name_len) {
926 E : symbol_map_.AddSymbol(address, length, base::StringPiece(name, name_len));
927 E : }
928 :
929 E : void Profiler::MoveSymbol(const void* old_address, const void* new_address) {
930 E : symbol_map_.MoveSymbol(old_address, new_address);
931 E : }
932 :
933 : } // namespace profiler
934 : } // namespace agent
|