1 : // Copyright 2012 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 : //
15 : // Implementation of the profiler DLL.
16 : #include "syzygy/agent/profiler/profiler.h"
17 :
18 : #include <windows.h>
19 : #include <algorithm>
20 :
21 : #include "base/at_exit.h"
22 : #include "base/command_line.h"
23 : #include "base/environment.h"
24 : #include "base/logging.h"
25 : #include "base/string_util.h"
26 : #include "base/utf_string_conversions.h"
27 : #include "base/memory/scoped_ptr.h"
28 : #include "base/win/pe_image.h"
29 : #include "base/win/scoped_handle.h"
30 : #include "syzygy/agent/common/dlist.h"
31 : #include "syzygy/agent/common/process_utils.h"
32 : #include "syzygy/agent/common/scoped_last_error_keeper.h"
33 : #include "syzygy/agent/profiler/return_thunk_factory.h"
34 : #include "syzygy/common/logging.h"
35 : #include "syzygy/trace/client/client_utils.h"
36 : #include "syzygy/trace/protocol/call_trace_defs.h"
37 :
38 : namespace {
39 :
40 : using agent::common::ScopedLastErrorKeeper;
41 :
42 : typedef std::pair<RetAddr, FuncAddr> InvocationKey;
43 :
44 : class HashInvocationKey {
45 : public:
46 : static const size_t bucket_size = 4;
47 : static const size_t min_buckets = 8;
48 :
49 E : size_t operator()(const InvocationKey& key) const {
50 : return reinterpret_cast<size_t>(key.first) ^
51 E : reinterpret_cast<size_t>(key.second);
52 E : }
53 :
54 E : bool operator()(const InvocationKey& a, const InvocationKey& b) const {
55 E : return a < b;
56 E : }
57 : };
58 :
59 : using agent::profiler::SymbolMap;
60 :
61 : struct InvocationValue {
62 : // This invocation entry's caller's dynamic symbol, if any.
63 : scoped_refptr<SymbolMap::Symbol> caller_symbol;
64 : // The last observed move count for caller_symbol.
65 : int32 caller_move_count;
66 :
67 : // This invocation entry's callee's dynamic symbol, if any.
68 : scoped_refptr<SymbolMap::Symbol> function_symbol;
69 : // The last observed move count for function_symbol.
70 : int32 function_move_count;
71 :
72 : // Points to the trace buffer entry for the respective function.
73 : InvocationInfo* info;
74 : };
75 :
76 : typedef base::hash_map<
77 : InvocationKey, InvocationValue, HashInvocationKey> InvocationMap;
78 :
79 :
80 : // The information on how to set the thread name comes from
81 : // a MSDN article: http://msdn2.microsoft.com/en-us/library/xcb2z8hs.aspx
82 : const DWORD kVCThreadNameException = 0x406D1388;
83 :
84 : typedef struct tagTHREADNAME_INFO {
85 : DWORD dwType; // Must be 0x1000.
86 : LPCSTR szName; // Pointer to name (in user addr space).
87 : DWORD dwThreadID; // Thread ID (-1=caller thread).
88 : DWORD dwFlags; // Reserved for future use, must be zero.
89 : } THREADNAME_INFO;
90 :
91 : } // namespace
92 :
93 : // See client.cc for a description of the unconventional
94 : // calling conventions for this function.
95 i : extern "C" void __declspec(naked) _indirect_penter() {
96 : __asm {
97 : // Stash volatile registers.
98 i : push eax
99 i : push edx
100 :
101 : // Get the current cycle time ASAP.
102 i : rdtsc
103 :
104 : // Save the value of eax for later, we need the register to stash the flags.
105 i : push ecx
106 i : mov ecx, eax
107 :
108 : // Save the low byte of the flags into AH.
109 i : lahf
110 : // Save the overflow flag into AL.
111 i : seto al
112 :
113 : // Stash the flags to stack.
114 i : push eax
115 :
116 : // Push the cycle time arg.
117 i : push edx
118 i : push ecx
119 :
120 : // Retrieve the original function address, pushed by our caller.
121 i : mov eax, DWORD PTR[esp + 0x18]
122 i : push eax
123 :
124 : // Calculate the position of the return address on stack, and
125 : // push it. This becomes the EntryFrame argument.
126 i : lea eax, DWORD PTR[esp + 0x20]
127 i : push eax
128 i : call agent::profiler::Profiler::FunctionEntryHook
129 :
130 : // Restore volatile registers.
131 i : pop eax
132 : // AL is set to 1 if the overflow flag was set before the call to
133 : // our hook, 0 otherwise. We add 0x7f to it so it'll restore the
134 : // flag.
135 i : add al, 0x7f
136 : // Restore the low byte of the flags.
137 i : sahf
138 :
139 i : pop ecx
140 i : pop edx
141 i : pop eax
142 :
143 : // Return to the address pushed by our caller.
144 i : ret
145 : }
146 : }
147 :
148 i : extern "C" void __declspec(naked) _indirect_penter_dllmain() {
149 : __asm {
150 : // Stash volatile registers.
151 i : push eax
152 i : push edx
153 :
154 : // Get the current cycle time ASAP.
155 i : rdtsc
156 :
157 : // Save the value of eax for later, we need the register to stash the flags.
158 i : push ecx
159 i : mov ecx, eax
160 :
161 : // Save the low byte of the flags into AH.
162 i : lahf
163 : // Save the overflow flag into AL.
164 i : seto al
165 :
166 : // Stash the flags to stack.
167 i : push eax
168 :
169 : // Push the cycle time arg.
170 i : push edx
171 i : push ecx
172 :
173 : // Retrieve the original function address, pushed by our caller.
174 i : mov eax, DWORD PTR[esp + 0x18]
175 i : push eax
176 :
177 : // Calculate the position of the return address on stack, and
178 : // push it. This becomes the EntryFrame argument.
179 i : lea eax, DWORD PTR[esp + 0x20]
180 i : push eax
181 i : call agent::profiler::Profiler::DllMainEntryHook
182 :
183 : // Restore volatile registers.
184 i : pop eax
185 : // AL is set to 1 if the overflow flag was set before the call to
186 : // our hook, 0 otherwise. We add 0x7f to it so it'll restore the
187 : // flag.
188 i : add al, 0x7f
189 : // Restore the low byte of the flags.
190 i : sahf
191 :
192 i : pop ecx
193 i : pop edx
194 i : pop eax
195 :
196 : // Return to the address pushed by our caller.
197 i : ret
198 : }
199 : }
200 :
201 : // On entry, pc_location should point to a location on our own stack.
202 : extern "C" uintptr_t __cdecl ResolveReturnAddressLocation(
203 E : uintptr_t pc_location) {
204 : using agent::profiler::Profiler;
205 : return reinterpret_cast<uintptr_t>(
206 : Profiler::instance().ResolveReturnAddressLocation(
207 E : reinterpret_cast<RetAddr*>(pc_location)));
208 E : }
209 :
210 : // This function needs to match the declaration of FunctionEntryHook in the V8
211 : // API. See http://v8.googlecode.com/svn/trunk/include/v8.h.
212 : extern "C" __declspec(naked) void __cdecl OnDynamicFunctionEntry(
213 i : uintptr_t function, uintptr_t return_addr_location) {
214 : __asm {
215 : // Grab the current time ASAP.
216 i : rdtsc
217 :
218 : // Push the cycle time arg.
219 i : push edx
220 i : push eax
221 :
222 : // Duplicate the function and return_addr_location arguments.
223 i : push DWORD PTR[esp + 0x10]
224 i : push DWORD PTR[esp + 0x10]
225 :
226 i : call agent::profiler::Profiler::OnV8FunctionEntry
227 :
228 i : ret
229 : }
230 : }
231 :
232 E : BOOL WINAPI DllMain(HMODULE instance, DWORD reason, LPVOID reserved) {
233 : using agent::profiler::Profiler;
234 :
235 : // Our AtExit manager required by base.
236 : static base::AtExitManager* at_exit = NULL;
237 :
238 E : switch (reason) {
239 : case DLL_PROCESS_ATTACH:
240 E : DCHECK(at_exit == NULL);
241 E : at_exit = new base::AtExitManager();
242 :
243 E : CommandLine::Init(0, NULL);
244 E : common::InitLoggingForDll(L"profiler");
245 E : break;
246 :
247 : case DLL_THREAD_DETACH:
248 E : Profiler::instance().OnThreadDetach();
249 E : break;
250 :
251 : case DLL_PROCESS_DETACH:
252 E : DCHECK(at_exit != NULL);
253 E : delete at_exit;
254 E : at_exit = NULL;
255 : break;
256 :
257 : default:
258 : break;
259 : }
260 :
261 E : return TRUE;
262 E : }
263 :
264 : void __cdecl AddDynamicSymbol(const void* address, size_t length,
265 E : const char* name, size_t name_len) {
266 : using agent::profiler::Profiler;
267 :
268 E : Profiler::instance().AddSymbol(address, length, name, name_len);
269 E : }
270 :
271 : void __cdecl MoveDynamicSymbol(const void* old_address,
272 E : const void* new_address) {
273 : using agent::profiler::Profiler;
274 :
275 E : Profiler::instance().MoveSymbol(old_address, new_address);
276 E : }
277 :
278 : namespace agent {
279 : namespace profiler {
280 :
281 : // All tracing runs through this object.
282 E : agent::profiler::Profiler Profiler::instance_;
283 :
284 : class Profiler::ThreadState
285 : : public ReturnThunkFactoryImpl<Profiler::ThreadState>,
286 : public agent::common::ThreadStateBase {
287 : public:
288 : explicit ThreadState(Profiler* profiler);
289 : ~ThreadState();
290 :
291 : // Logs @p module and all other modules in the process, then flushes
292 : // the current trace buffer.
293 : void LogAllModules(HMODULE module);
294 :
295 : // Logs @p module.
296 : void LogModule(HMODULE module);
297 :
298 : // Logs @p thread_name as the current thread's name.
299 : void LogThreadName(const base::StringPiece& thread_name);
300 :
301 : // Logs @p symbol into the trace.
302 : void LogSymbol(SymbolMap::Symbol* symbol);
303 :
304 : // Processes a single function entry.
305 : void OnFunctionEntry(EntryFrame* entry_frame,
306 : FuncAddr function,
307 : uint64 cycles);
308 :
309 : // Processes a single V8 function entry.
310 : void OnV8FunctionEntry(FuncAddr function,
311 : RetAddr* return_address_location,
312 : uint64 cycles);
313 :
314 : // @name Callback notification implementation.
315 : // @{
316 : virtual void OnPageAdded(const void* page) OVERRIDE;
317 : virtual void OnPageRemoved(const void* page) OVERRIDE;
318 : // @}
319 :
320 : // Function exit hook.
321 : void OnFunctionExit(const ThunkData* data, uint64 cycles_exit);
322 :
323 E : trace::client::TraceFileSegment* segment() { return &segment_; }
324 :
325 : private:
326 : friend class Profiler;
327 :
328 : void RecordInvocation(RetAddr caller,
329 : FuncAddr function,
330 : uint64 cycles);
331 :
332 : void UpdateOverhead(uint64 entry_cycles);
333 : InvocationInfo* AllocateInvocationInfo();
334 : void ClearCache();
335 : bool FlushSegment();
336 :
337 : // The profiler we're attached to.
338 : Profiler* profiler_;
339 :
340 : // We keep a running tally of the rough amount of wall clock cycles spent
341 : // inside the profiler. We then subtract the profiler's overhead from the
342 : // wall clock cycle timer on each measurement. This results in a timer that
343 : // measures time exclusive of profiling overhead.
344 : uint64 cycles_overhead_;
345 :
346 : // The invocations we've recorded in our buffer.
347 : InvocationMap invocations_;
348 :
349 : // The trace file segment we're recording to.
350 : trace::client::TraceFileSegment segment_;
351 :
352 : // The current batch record we're writing to, if any.
353 : TraceBatchInvocationInfo* batch_;
354 :
355 : // The set of modules we've logged.
356 : ModuleSet logged_modules_;
357 : };
358 :
359 : Profiler::ThreadState::ThreadState(Profiler* profiler)
360 : : profiler_(profiler),
361 : cycles_overhead_(0LL),
362 E : batch_(NULL) {
363 E : Initialize();
364 E : }
365 :
366 E : Profiler::ThreadState::~ThreadState() {
367 E : ClearCache();
368 :
369 : // If we have an outstanding buffer, let's deallocate it now.
370 E : if (segment_.write_ptr != NULL)
371 E : profiler_->session_.ReturnBuffer(&segment_);
372 :
373 E : Uninitialize();
374 E : }
375 :
376 E : void Profiler::ThreadState::LogAllModules(HMODULE module) {
377 : // Bail early if we're disabled.
378 E : if (profiler_->session_.IsDisabled())
379 E : return;
380 :
381 E : agent::common::ModuleVector modules;
382 E : agent::common::GetProcessModules(&modules);
383 :
384 : // Our module should be in the process modules.
385 E : DCHECK(std::find(modules.begin(), modules.end(), module) != modules.end());
386 :
387 E : for (size_t i = 0; i < modules.size(); ++i) {
388 E : DCHECK(modules[i] != NULL);
389 E : LogModule(modules[i]);
390 E : }
391 :
392 : // We need to flush module events right away, so that the module is
393 : // defined in the trace file before events using that module start to
394 : // occur (in another thread).
395 E : FlushSegment();
396 E : }
397 :
398 E : void Profiler::ThreadState::LogModule(HMODULE module) {
399 : // This may flush our buffer, so let's clear our cache.
400 E : ClearCache();
401 E : agent::common::LogModule(module, &profiler_->session_, &segment_);
402 E : }
403 :
404 : void Profiler::ThreadState::LogThreadName(
405 E : const base::StringPiece& thread_name) {
406 E : if (thread_name.empty())
407 i : return;
408 :
409 : // Make sure the event we're about to write will fit.
410 E : if (!segment_.CanAllocate(thread_name.size() + 1) || !FlushSegment()) {
411 : // Failed to allocate a new segment.
412 i : return;
413 : }
414 :
415 E : DCHECK(segment_.CanAllocate(thread_name.size() + 1));
416 E : batch_ = NULL;
417 :
418 : // Allocate a record in the log.
419 : TraceThreadNameInfo* thread_name_event =
420 : reinterpret_cast<TraceThreadNameInfo*>(
421 : segment_.AllocateTraceRecordImpl(
422 E : TRACE_THREAD_NAME, thread_name.size() + 1));
423 E : DCHECK(thread_name_event != NULL);
424 : base::strlcpy(thread_name_event->thread_name,
425 E : thread_name.data(), thread_name.size() + 1);
426 E : }
427 :
428 E : void Profiler::ThreadState::LogSymbol(SymbolMap::Symbol* symbol) {
429 E : DCHECK(symbol != NULL);
430 E : DCHECK_NE(0, symbol->id());
431 :
432 : size_t symbol_size =
433 E : FIELD_OFFSET(TraceDynamicSymbol, symbol_name) + symbol->name().size() + 1;
434 :
435 E : if (!segment_.CanAllocate(symbol_size) || !FlushSegment()) {
436 : // Failed to allocate the symbol record.
437 i : return;
438 : }
439 :
440 E : DCHECK(segment_.CanAllocate(symbol_size));
441 E : batch_ = NULL;
442 :
443 : // Allocate a record in the log.
444 : TraceDynamicSymbol* dynamic_symbol_event =
445 : reinterpret_cast<TraceDynamicSymbol*>(
446 : segment_.AllocateTraceRecordImpl(
447 E : TRACE_DYNAMIC_SYMBOL, symbol_size));
448 E : DCHECK(dynamic_symbol_event != NULL);
449 E : dynamic_symbol_event->symbol_id = symbol->id();
450 : base::strlcpy(dynamic_symbol_event->symbol_name,
451 E : symbol->name().data(), symbol->name().size() + 1);
452 E : }
453 :
454 : void Profiler::ThreadState::OnFunctionEntry(EntryFrame* entry_frame,
455 : FuncAddr function,
456 E : uint64 cycles) {
457 E : if (profiler_->session_.IsDisabled())
458 E : return;
459 :
460 : // Record the details of the entry.
461 : // Note that on tail-recursion and tail-call elimination, the caller recorded
462 : // here will be a thunk. We cater for this case on exit as best we can.
463 E : ThunkData* data = MakeThunk(entry_frame->retaddr);
464 E : DCHECK(data != NULL);
465 E : data->caller = entry_frame->retaddr;
466 E : data->function = function;
467 E : data->cycles_entry = cycles - cycles_overhead_;
468 :
469 E : entry_frame->retaddr = data->thunk;
470 :
471 E : UpdateOverhead(cycles);
472 E : }
473 :
474 : void Profiler::ThreadState::OnV8FunctionEntry(FuncAddr function,
475 : RetAddr* return_address_location,
476 E : uint64 cycles) {
477 E : if (profiler_->session_.IsDisabled())
478 i : return;
479 :
480 : // Record the details of the entry.
481 :
482 : // TODO(siggi): Note that we want to do different exit processing here,
483 : // as we know "function" is a dynamic symbol - it might be better to
484 : // record the fact here than to force a lookup on RecordInvocation.
485 :
486 : // Note that on tail-recursion and tail-call elimination, the caller recorded
487 : // here will be a thunk. We cater for this case on exit as best we can.
488 E : ThunkData* data = MakeThunk(*return_address_location);
489 E : DCHECK(data != NULL);
490 E : data->caller = *return_address_location;
491 E : data->function = function;
492 E : data->cycles_entry = cycles - cycles_overhead_;
493 :
494 E : *return_address_location = data->thunk;
495 :
496 E : UpdateOverhead(cycles);
497 E : }
498 :
499 : void Profiler::ThreadState::OnFunctionExit(const ThunkData* data,
500 E : uint64 cycles_exit) {
501 : // Calculate the number of cycles in the invocation, exclusive our overhead.
502 E : uint64 cycles_executed = cycles_exit - cycles_overhead_ - data->cycles_entry;
503 :
504 : // See if the return address resolves to a thunk, which indicates
505 : // tail recursion or tail call elimination. In that case we record the
506 : // calling function as caller, which isn't totally accurate as that'll
507 : // attribute the cost to the first line of the calling function. In the
508 : // absence of more information, it's the best we can do, however.
509 : // TODO(siggi): Move this into RecordInvocation, as we can elide the lookup
510 : // on a cache hit.
511 E : Thunk* ret_thunk = CastToThunk(data->caller);
512 E : if (ret_thunk == NULL) {
513 E : RecordInvocation(data->caller, data->function, cycles_executed);
514 E : } else {
515 E : ThunkData* ret_data = DataFromThunk(ret_thunk);
516 E : RecordInvocation(ret_data->function, data->function, cycles_executed);
517 : }
518 :
519 E : UpdateOverhead(cycles_exit);
520 E : }
521 :
522 E : void Profiler::ThreadState::OnPageAdded(const void* page) {
523 E : profiler_->OnPageAdded(page);
524 E : }
525 :
526 E : void Profiler::ThreadState::OnPageRemoved(const void* page) {
527 E : profiler_->OnPageRemoved(page);
528 E : }
529 :
530 : void Profiler::ThreadState::RecordInvocation(RetAddr caller,
531 : FuncAddr function,
532 E : uint64 duration_cycles) {
533 : // See whether we've already recorded an entry for this function.
534 E : InvocationKey key(caller, function);
535 E : InvocationMap::iterator it = invocations_.find(key);
536 E : if (it != invocations_.end()) {
537 : // Yup, we already have an entry, validate it.
538 E : InvocationValue& value = it->second;
539 :
540 : if ((value.caller_symbol == NULL ||
541 : value.caller_symbol->move_count() == value.caller_move_count) &&
542 : (value.function_symbol == NULL ||
543 E : value.function_symbol->move_count() == value.function_move_count)) {
544 : // The entry is still good, tally the new data.
545 E : ++(value.info->num_calls);
546 E : value.info->cycles_sum += duration_cycles;
547 E : if (duration_cycles < value.info->cycles_min) {
548 E : value.info->cycles_min = duration_cycles;
549 E : } else if (duration_cycles > value.info->cycles_max) {
550 E : value.info->cycles_max = duration_cycles;
551 : }
552 :
553 : // Early out on success.
554 E : return;
555 i : } else {
556 : // The entry is not valid any more, discard it.
557 E : DCHECK(value.caller_symbol != NULL || value.function_symbol != NULL);
558 :
559 E : invocations_.erase(it);
560 : }
561 : }
562 E : DCHECK(invocations_.find(key) == invocations_.end());
563 :
564 : // We don't have an entry, allocate a new one for this invocation.
565 : // The code below may touch last error.
566 E : ScopedLastErrorKeeper keep_last_error;
567 :
568 : scoped_refptr<SymbolMap::Symbol> caller_symbol =
569 E : profiler_->symbol_map_.FindSymbol(caller);
570 :
571 : // TODO(siggi): This can perhaps be optimized by keeping track of which
572 : // entry hook was invoked. This will however require setting an extra
573 : // bool on every entry, so will require measurement to see whether it's
574 : // a win.
575 : scoped_refptr<SymbolMap::Symbol> function_symbol =
576 E : profiler_->symbol_map_.FindSymbol(function);
577 :
578 : // Trace the symbols if this is the first time either one is observed.
579 E : if (caller_symbol != NULL && caller_symbol->EnsureHasId()) {
580 : // TODO(siggi): It might lead to worst-case behavior to log symbols into
581 : // the same trace buffer as we store invocations, as we're likely to
582 : // alternate symbols and single-entry invocation batches. Fixme.
583 E : LogSymbol(caller_symbol);
584 : }
585 E : if (function_symbol != NULL && function_symbol->EnsureHasId()) {
586 : // TODO(siggi): See above.
587 E : LogSymbol(function_symbol);
588 : }
589 :
590 E : InvocationInfo* info = AllocateInvocationInfo();
591 E : if (info != NULL) {
592 E : InvocationValue& value = invocations_[key];
593 E : value.info = info;
594 E : value.caller_symbol = caller_symbol;
595 E : if (caller_symbol != NULL)
596 E : value.caller_move_count = caller_symbol->move_count();
597 E : else
598 E : value.caller_move_count = 0;
599 :
600 E : value.function_symbol = function_symbol;
601 E : if (function_symbol != NULL)
602 E : value.function_move_count = function_symbol->move_count();
603 E : else
604 E : value.function_move_count = 0;
605 :
606 E : if (function_symbol == NULL) {
607 : // We're not in a dynamic function, record the (conventional) function.
608 E : info->function = function;
609 E : info->flags = 0;
610 E : } else {
611 : // We're in a dynamic function symbol, record the details.
612 E : DCHECK(function_symbol->id() != 0);
613 :
614 E : info->function_symbol_id = function_symbol->id();
615 E : info->flags = kFunctionIsSymbol;
616 : }
617 :
618 E : if (caller_symbol == NULL) {
619 : // We're not in a dynamic caller_symbol, record the (conventional) caller.
620 E : info->caller = caller;
621 E : info->caller_offset = 0;
622 E : } else {
623 : // We're in a dynamic caller_symbol, record the details.
624 E : DCHECK(caller_symbol->id() != 0);
625 :
626 E : info->caller_symbol_id = caller_symbol->id();
627 E : info->flags |= kCallerIsSymbol;
628 : info->caller_offset = reinterpret_cast<const uint8*>(caller) -
629 E : reinterpret_cast<const uint8*>(caller_symbol->address());
630 : }
631 :
632 E : info->num_calls = 1;
633 E : info->cycles_min = info->cycles_max = info->cycles_sum = duration_cycles;
634 : }
635 E : }
636 :
637 E : void Profiler::ThreadState::UpdateOverhead(uint64 entry_cycles) {
638 : // TODO(siggi): Measure the fixed overhead on setup,
639 : // then add it on every update.
640 E : cycles_overhead_ += (__rdtsc() - entry_cycles);
641 E : }
642 :
643 E : InvocationInfo* Profiler::ThreadState::AllocateInvocationInfo() {
644 : // This is kind of self-evident for the moment, as an invocation info batch
645 : // contains at least one invocation info as currently declared.
646 : // If this fails, please reconsider your implementation, or else revisit
647 : // the allocation code below.
648 : COMPILE_ASSERT(sizeof(TraceBatchInvocationInfo) >= sizeof(InvocationInfo),
649 : invocation_info_batch_must_be_larger_than_invocation_info);
650 :
651 : // Do we have a record that we can grow?
652 E : if (batch_ != NULL && segment_.CanAllocateRaw(sizeof(InvocationInfo))) {
653 : InvocationInfo* invocation_info =
654 E : reinterpret_cast<InvocationInfo*>(segment_.write_ptr);
655 E : RecordPrefix* prefix = trace::client::GetRecordPrefix(batch_);
656 E : prefix->size += sizeof(InvocationInfo);
657 :
658 : // Update the book-keeping.
659 E : segment_.write_ptr += sizeof(InvocationInfo);
660 E : segment_.header->segment_length += sizeof(InvocationInfo);
661 :
662 E : return invocation_info;
663 : }
664 :
665 : // Do we need to scarf a new buffer?
666 : if (!segment_.CanAllocate(sizeof(TraceBatchInvocationInfo)) &&
667 E : !FlushSegment()) {
668 : // We failed to allocate a new buffer.
669 i : return NULL;
670 : }
671 :
672 E : DCHECK(segment_.header != NULL);
673 :
674 E : batch_ = segment_.AllocateTraceRecord<TraceBatchInvocationInfo>();
675 E : return &batch_->invocations[0];
676 E : }
677 :
678 E : bool Profiler::ThreadState::FlushSegment() {
679 E : ClearCache();
680 E : return profiler_->session_.ExchangeBuffer(&segment_);
681 E : }
682 :
683 E : void Profiler::ThreadState::ClearCache() {
684 E : batch_ = NULL;
685 E : invocations_.clear();
686 E : }
687 :
688 E : void Profiler::OnThreadDetach() {
689 E : ThreadState* state = GetThreadState();
690 E : if (state != NULL)
691 E : thread_state_manager_.MarkForDeath(state);
692 E : }
693 :
694 E : RetAddr* Profiler::ResolveReturnAddressLocation(RetAddr* pc_location) {
695 E : base::AutoLock lock(lock_);
696 :
697 : // In case of tail-call and tail recursion elimination, we can get chained
698 : // thunks, so we loop around here until we resolve to a non-thunk.
699 E : while (true) {
700 : // See whether the return address is one of our thunks.
701 E : RetAddr ret_addr = *pc_location;
702 :
703 : // Compute the page this return address lives in.
704 : const void* page = reinterpret_cast<const void*>(
705 E : reinterpret_cast<uintptr_t>(ret_addr) & ~0xFFF);
706 E : if (!std::binary_search(pages_.begin(), pages_.end(), page))
707 E : return pc_location;
708 :
709 : // It's one of our own, redirect to the thunk's stash.
710 : ThreadState::Thunk* thunk =
711 E : reinterpret_cast<ThreadState::Thunk*>(const_cast<void*>(ret_addr));
712 :
713 E : ThreadState::ThunkData* data = ThreadState::DataFromThunk(thunk);
714 :
715 : // Update the PC location and go around again, in case this
716 : // thunk links to another one.
717 E : pc_location = &data->caller;
718 E : }
719 E : }
720 :
721 : void Profiler::OnModuleEntry(EntryFrame* entry_frame,
722 : FuncAddr function,
723 E : uint64 cycles) {
724 : // The function invoked has a DllMain-like signature.
725 : // Get the module and reason from its invocation record.
726 E : HMODULE module = reinterpret_cast<HMODULE>(entry_frame->args[0]);
727 E : DWORD reason = entry_frame->args[1];
728 :
729 : // Only log module additions.
730 E : bool should_log_module = false;
731 E : switch (reason) {
732 : case DLL_PROCESS_ATTACH:
733 : case DLL_THREAD_ATTACH:
734 E : should_log_module = true;
735 E : break;
736 :
737 : case DLL_PROCESS_DETACH:
738 : case DLL_THREAD_DETACH:
739 E : break;
740 :
741 : default:
742 i : LOG(WARNING) << "Unrecognized module event: " << reason << ".";
743 : break;
744 : }
745 :
746 : // Make sure we only log each module once per process.
747 E : bool is_new_module = false;
748 E : if (should_log_module) {
749 E : base::AutoLock lock(lock_);
750 :
751 E : is_new_module = logged_modules_.insert(module).second;
752 E : }
753 :
754 E : ThreadState* data = GetOrAllocateThreadState();
755 E : DCHECK(data != NULL);
756 E : if (data == NULL)
757 i : return;
758 :
759 E : if (is_new_module) {
760 : // Delegate the logging to our per-thread data.
761 E : data->LogAllModules(module);
762 : }
763 :
764 : // Handle the function entry.
765 E : data->OnFunctionEntry(entry_frame, function, cycles);
766 E : }
767 :
768 E : void Profiler::OnPageAdded(const void* page) {
769 E : base::AutoLock lock(lock_);
770 :
771 : PageVector::iterator it =
772 E : std::lower_bound(pages_.begin(), pages_.end(), page);
773 E : DCHECK(it == pages_.end() || *it != page);
774 E : pages_.insert(it, page);
775 E : }
776 :
777 E : void Profiler::OnPageRemoved(const void* page) {
778 E : base::AutoLock lock(lock_);
779 :
780 : PageVector::iterator it =
781 E : std::lower_bound(pages_.begin(), pages_.end(), page);
782 : // The page must be in our list.
783 E : DCHECK(it != pages_.end());
784 E : DCHECK_EQ(page, *it);
785 E : pages_.erase(it);
786 E : }
787 :
788 E : void Profiler::OnThreadName(const base::StringPiece& thread_name) {
789 E : ThreadState* state = GetOrAllocateThreadState();
790 E : if (state != NULL)
791 E : state->LogThreadName(thread_name);
792 E : }
793 :
794 E : LONG CALLBACK Profiler::ExceptionHandler(EXCEPTION_POINTERS* ex_info) {
795 : // Log the thread if this is the VC thread name exception.
796 : if (ex_info->ExceptionRecord->ExceptionCode == kVCThreadNameException &&
797 : ex_info->ExceptionRecord->NumberParameters ==
798 E : sizeof(THREADNAME_INFO)/sizeof(DWORD)) {
799 : const THREADNAME_INFO* info =
800 : reinterpret_cast<const THREADNAME_INFO*>(
801 E : &ex_info->ExceptionRecord->ExceptionInformation);
802 :
803 E : if (info->dwType == 0x1000) {
804 E : instance_.OnThreadName(info->szName);
805 E : } else {
806 i : LOG(WARNING) << "Unrecognised event type " << info->dwType;
807 : }
808 : }
809 :
810 E : return EXCEPTION_CONTINUE_SEARCH;
811 E : }
812 :
813 E : Profiler::Profiler() : handler_registration_(NULL) {
814 : // Create our RPC session and allocate our initial trace segment on first use.
815 E : ThreadState* data = CreateFirstThreadStateAndSession();
816 E : CHECK(data != NULL) << "Failed to allocate thread local state.";
817 :
818 E : handler_registration_ = ::AddVectoredExceptionHandler(TRUE, ExceptionHandler);
819 E : }
820 :
821 E : Profiler::~Profiler() {
822 : // Typically, this will happen on the last thread in the process. We must
823 : // explicitly clean up this thread's state as it will otherwise leak.
824 E : FreeThreadState();
825 :
826 : // Unregister our VEH.
827 E : if (handler_registration_ != NULL) {
828 E : ::RemoveVectoredExceptionHandler(handler_registration_);
829 E : handler_registration_ = NULL;
830 : }
831 E : }
832 :
833 E : Profiler::ThreadState* Profiler::CreateFirstThreadStateAndSession() {
834 E : Profiler::ThreadState* data = GetOrAllocateThreadStateImpl();
835 :
836 : // Create the session (and allocate the first segment).
837 E : trace::client::InitializeRpcSession(&session_, data->segment());
838 :
839 E : return data;
840 E : }
841 :
842 E : Profiler::ThreadState* Profiler::GetOrAllocateThreadState() {
843 E : Profiler::ThreadState* data = GetOrAllocateThreadStateImpl();
844 E : if (!data->segment()->write_ptr && session_.IsTracing()) {
845 E : session_.AllocateBuffer(data->segment());
846 : }
847 E : return data;
848 E : }
849 :
850 E : Profiler::ThreadState* Profiler::GetOrAllocateThreadStateImpl() {
851 E : ThreadState *data = tls_.Get();
852 E : if (data != NULL)
853 E : return data;
854 :
855 E : data = new ThreadState(this);
856 E : if (data == NULL) {
857 i : LOG(ERROR) << "Unable to allocate per-thread data";
858 i : return NULL;
859 : }
860 :
861 E : thread_state_manager_.Register(data);
862 E : tls_.Set(data);
863 :
864 E : return data;
865 E : }
866 :
867 E : Profiler::ThreadState* Profiler::GetThreadState() const {
868 E : return tls_.Get();
869 E : }
870 :
871 E : void Profiler::FreeThreadState() {
872 E : ThreadState *data = GetThreadState();
873 E : if (data != NULL) {
874 E : tls_.Set(NULL);
875 E : thread_state_manager_.Unregister(data);
876 E : delete data;
877 : }
878 E : }
879 :
880 : void WINAPI Profiler::DllMainEntryHook(EntryFrame* entry_frame,
881 : FuncAddr function,
882 E : uint64 cycles) {
883 E : ScopedLastErrorKeeper keep_last_error;
884 :
885 E : instance_.OnModuleEntry(entry_frame, function, cycles);
886 E : }
887 :
888 : void WINAPI Profiler::FunctionEntryHook(EntryFrame* entry_frame,
889 : FuncAddr function,
890 E : uint64 cycles) {
891 E : ScopedLastErrorKeeper keep_last_error;
892 :
893 E : ThreadState* data = instance_.GetOrAllocateThreadState();
894 E : DCHECK(data != NULL);
895 E : if (data != NULL)
896 E : data->OnFunctionEntry(entry_frame, function, cycles);
897 E : }
898 :
899 : void WINAPI Profiler::OnV8FunctionEntry(FuncAddr function,
900 : RetAddr* return_addr_location,
901 E : uint64 cycles) {
902 E : ScopedLastErrorKeeper keep_last_error;
903 :
904 E : ThreadState* data = instance_.GetOrAllocateThreadState();
905 E : if (data != NULL)
906 E : data->OnV8FunctionEntry(function, return_addr_location, cycles);
907 E : }
908 :
909 : void Profiler::AddSymbol(const void* address, size_t length,
910 E : const char* name, size_t name_len) {
911 E : symbol_map_.AddSymbol(address, length, base::StringPiece(name, name_len));
912 E : }
913 :
914 E : void Profiler::MoveSymbol(const void* old_address, const void* new_address) {
915 E : symbol_map_.MoveSymbol(old_address, new_address);
916 E : }
917 :
918 : } // namespace profiler
919 : } // namespace agent
|