Coverage for /Syzygy/agent/profiler/profiler.cc

CoverageLines executed / instrumented / missingexe / inst / missLanguageGroup
83.2%3213860.C++source

Line-by-line coverage:

   1    :  // Copyright 2012 Google Inc. All Rights Reserved.
   2    :  //
   3    :  // Licensed under the Apache License, Version 2.0 (the "License");
   4    :  // you may not use this file except in compliance with the License.
   5    :  // You may obtain a copy of the License at
   6    :  //
   7    :  //     http://www.apache.org/licenses/LICENSE-2.0
   8    :  //
   9    :  // Unless required by applicable law or agreed to in writing, software
  10    :  // distributed under the License is distributed on an "AS IS" BASIS,
  11    :  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12    :  // See the License for the specific language governing permissions and
  13    :  // limitations under the License.
  14    :  //
  15    :  // Implementation of the profiler DLL.
  16    :  #include "syzygy/agent/profiler/profiler.h"
  17    :  
  18    :  #include <windows.h>
  19    :  #include <algorithm>
  20    :  
  21    :  #include "base/at_exit.h"
  22    :  #include "base/command_line.h"
  23    :  #include "base/environment.h"
  24    :  #include "base/logging.h"
  25    :  #include "base/string_util.h"
  26    :  #include "base/utf_string_conversions.h"
  27    :  #include "base/memory/scoped_ptr.h"
  28    :  #include "base/win/pe_image.h"
  29    :  #include "base/win/scoped_handle.h"
  30    :  #include "syzygy/agent/common/dlist.h"
  31    :  #include "syzygy/agent/common/process_utils.h"
  32    :  #include "syzygy/agent/common/scoped_last_error_keeper.h"
  33    :  #include "syzygy/agent/profiler/return_thunk_factory.h"
  34    :  #include "syzygy/common/logging.h"
  35    :  #include "syzygy/trace/client/client_utils.h"
  36    :  #include "syzygy/trace/protocol/call_trace_defs.h"
  37    :  
  38    :  namespace {
  39    :  
  40    :  using agent::common::ScopedLastErrorKeeper;
  41    :  
  42    :  typedef std::pair<RetAddr, FuncAddr> InvocationKey;
  43    :  
  44    :  class HashInvocationKey {
  45    :   public:
  46    :    static const size_t bucket_size = 4;
  47    :    static const size_t min_buckets = 8;
  48    :  
  49  E :    size_t operator()(const InvocationKey& key) const {
  50    :      return reinterpret_cast<size_t>(key.first) ^
  51  E :          reinterpret_cast<size_t>(key.second);
  52  E :    }
  53    :  
  54  E :    bool operator()(const InvocationKey& a, const InvocationKey& b) const {
  55  E :      return a < b;
  56  E :    }
  57    :  };
  58    :  
  59    :  using agent::profiler::SymbolMap;
  60    :  
  61    :  struct InvocationValue {
  62    :    // This invocation entry's caller's dynamic symbol, if any.
  63    :    scoped_refptr<SymbolMap::Symbol> caller_symbol;
  64    :    // The last observed move count for caller_symbol.
  65    :    int32 caller_move_count;
  66    :  
  67    :    // This invocation entry's callee's dynamic symbol, if any.
  68    :    scoped_refptr<SymbolMap::Symbol> function_symbol;
  69    :    // The last observed move count for function_symbol.
  70    :    int32 function_move_count;
  71    :  
  72    :    // Points to the trace buffer entry for the respective function.
  73    :    InvocationInfo* info;
  74    :  };
  75    :  
  76    :  typedef base::hash_map<
  77    :      InvocationKey, InvocationValue, HashInvocationKey> InvocationMap;
  78    :  
  79    :  
  80    :  // The information on how to set the thread name comes from
  81    :  // a MSDN article: http://msdn2.microsoft.com/en-us/library/xcb2z8hs.aspx
  82    :  const DWORD kVCThreadNameException = 0x406D1388;
  83    :  
  84    :  typedef struct tagTHREADNAME_INFO {
  85    :    DWORD dwType;  // Must be 0x1000.
  86    :    LPCSTR szName;  // Pointer to name (in user addr space).
  87    :    DWORD dwThreadID;  // Thread ID (-1=caller thread).
  88    :    DWORD dwFlags;  // Reserved for future use, must be zero.
  89    :  } THREADNAME_INFO;
  90    :  
  91    :  }  // namespace
  92    :  
  93    :  // See client.cc for a description of the unconventional
  94    :  // calling conventions for this function.
  95  i :  extern "C" void __declspec(naked) _indirect_penter() {
  96    :    __asm {
  97    :      // Stash volatile registers.
  98  i :      push eax
  99  i :      push edx
 100    :  
 101    :      // Get the current cycle time ASAP.
 102  i :      rdtsc
 103    :  
 104    :      // Save the value of eax for later, we need the register to stash the flags.
 105  i :      push ecx
 106  i :      mov ecx, eax
 107    :  
 108    :      // Save the low byte of the flags into AH.
 109  i :      lahf
 110    :      // Save the overflow flag into AL.
 111  i :      seto al
 112    :  
 113    :      // Stash the flags to stack.
 114  i :      push eax
 115    :  
 116    :      // Push the cycle time arg.
 117  i :      push edx
 118  i :      push ecx
 119    :  
 120    :      // Retrieve the original function address, pushed by our caller.
 121  i :      mov eax, DWORD PTR[esp + 0x18]
 122  i :      push eax
 123    :  
 124    :      // Calculate the position of the return address on stack, and
 125    :      // push it. This becomes the EntryFrame argument.
 126  i :      lea eax, DWORD PTR[esp + 0x20]
 127  i :      push eax
 128  i :      call agent::profiler::Profiler::FunctionEntryHook
 129    :  
 130    :      // Restore volatile registers.
 131  i :      pop eax
 132    :      // AL is set to 1 if the overflow flag was set before the call to
 133    :      // our hook, 0 otherwise. We add 0x7f to it so it'll restore the
 134    :      // flag.
 135  i :      add al, 0x7f
 136    :      // Restore the low byte of the flags.
 137  i :      sahf
 138    :  
 139  i :      pop ecx
 140  i :      pop edx
 141  i :      pop eax
 142    :  
 143    :      // Return to the address pushed by our caller.
 144  i :      ret
 145    :    }
 146    :  }
 147    :  
 148  i :  extern "C" void __declspec(naked) _indirect_penter_dllmain() {
 149    :    __asm {
 150    :      // Stash volatile registers.
 151  i :      push eax
 152  i :      push edx
 153    :  
 154    :      // Get the current cycle time ASAP.
 155  i :      rdtsc
 156    :  
 157    :      // Save the value of eax for later, we need the register to stash the flags.
 158  i :      push ecx
 159  i :      mov ecx, eax
 160    :  
 161    :      // Save the low byte of the flags into AH.
 162  i :      lahf
 163    :      // Save the overflow flag into AL.
 164  i :      seto al
 165    :  
 166    :      // Stash the flags to stack.
 167  i :      push eax
 168    :  
 169    :      // Push the cycle time arg.
 170  i :      push edx
 171  i :      push ecx
 172    :  
 173    :      // Retrieve the original function address, pushed by our caller.
 174  i :      mov eax, DWORD PTR[esp + 0x18]
 175  i :      push eax
 176    :  
 177    :      // Calculate the position of the return address on stack, and
 178    :      // push it. This becomes the EntryFrame argument.
 179  i :      lea eax, DWORD PTR[esp + 0x20]
 180  i :      push eax
 181  i :      call agent::profiler::Profiler::DllMainEntryHook
 182    :  
 183    :      // Restore volatile registers.
 184  i :      pop eax
 185    :      // AL is set to 1 if the overflow flag was set before the call to
 186    :      // our hook, 0 otherwise. We add 0x7f to it so it'll restore the
 187    :      // flag.
 188  i :      add al, 0x7f
 189    :      // Restore the low byte of the flags.
 190  i :      sahf
 191    :  
 192  i :      pop ecx
 193  i :      pop edx
 194  i :      pop eax
 195    :  
 196    :      // Return to the address pushed by our caller.
 197  i :      ret
 198    :    }
 199    :  }
 200    :  
 201    :  // On entry, pc_location should point to a location on our own stack.
 202    :  extern "C" uintptr_t __cdecl ResolveReturnAddressLocation(
 203  E :      uintptr_t pc_location) {
 204    :    using agent::profiler::Profiler;
 205    :    return reinterpret_cast<uintptr_t>(
 206    :        Profiler::instance().ResolveReturnAddressLocation(
 207  E :            reinterpret_cast<RetAddr*>(pc_location)));
 208  E :  }
 209    :  
 210    :  // This function needs to match the declaration of FunctionEntryHook in the V8
 211    :  // API. See http://v8.googlecode.com/svn/trunk/include/v8.h.
 212    :  extern "C" __declspec(naked) void __cdecl OnDynamicFunctionEntry(
 213  i :      uintptr_t function, uintptr_t return_addr_location) {
 214    :    __asm {
 215    :      // Grab the current time ASAP.
 216  i :      rdtsc
 217    :  
 218    :      // Push the cycle time arg.
 219  i :      push edx
 220  i :      push eax
 221    :  
 222    :      // Duplicate the function and return_addr_location arguments.
 223  i :      push DWORD PTR[esp + 0x10]
 224  i :      push DWORD PTR[esp + 0x10]
 225    :  
 226  i :      call agent::profiler::Profiler::OnV8FunctionEntry
 227    :  
 228  i :      ret
 229    :    }
 230    :  }
 231    :  
 232  E :  BOOL WINAPI DllMain(HMODULE instance, DWORD reason, LPVOID reserved) {
 233    :    using agent::profiler::Profiler;
 234    :  
 235    :    // Our AtExit manager required by base.
 236    :    static base::AtExitManager* at_exit = NULL;
 237    :  
 238  E :    switch (reason) {
 239    :      case DLL_PROCESS_ATTACH:
 240  E :        DCHECK(at_exit == NULL);
 241  E :        at_exit = new base::AtExitManager();
 242    :  
 243  E :        CommandLine::Init(0, NULL);
 244  E :        common::InitLoggingForDll(L"profiler");
 245  E :        break;
 246    :  
 247    :      case DLL_THREAD_DETACH:
 248  E :        Profiler::instance().OnThreadDetach();
 249  E :        break;
 250    :  
 251    :      case DLL_PROCESS_DETACH:
 252  E :        DCHECK(at_exit != NULL);
 253  E :        delete at_exit;
 254  E :        at_exit = NULL;
 255    :        break;
 256    :  
 257    :      default:
 258    :        break;
 259    :    }
 260    :  
 261  E :    return TRUE;
 262  E :  }
 263    :  
 264    :  void __cdecl AddDynamicSymbol(const void* address, size_t length,
 265  E :                                const char* name, size_t name_len) {
 266    :    using agent::profiler::Profiler;
 267    :  
 268  E :    Profiler::instance().AddSymbol(address, length, name, name_len);
 269  E :  }
 270    :  
 271    :  void __cdecl MoveDynamicSymbol(const void* old_address,
 272  E :                                 const void* new_address) {
 273    :    using agent::profiler::Profiler;
 274    :  
 275  E :    Profiler::instance().MoveSymbol(old_address, new_address);
 276  E :  }
 277    :  
 278    :  namespace agent {
 279    :  namespace profiler {
 280    :  
 281    :  // All tracing runs through this object.
 282  E :  agent::profiler::Profiler Profiler::instance_;
 283    :  
 284    :  class Profiler::ThreadState
 285    :      : public ReturnThunkFactoryImpl<Profiler::ThreadState>,
 286    :        public agent::common::ThreadStateBase {
 287    :   public:
 288    :    explicit ThreadState(Profiler* profiler);
 289    :    ~ThreadState();
 290    :  
 291    :    // Logs @p module and all other modules in the process, then flushes
 292    :    // the current trace buffer.
 293    :    void LogAllModules(HMODULE module);
 294    :  
 295    :    // Logs @p module.
 296    :    void LogModule(HMODULE module);
 297    :  
 298    :    // Logs @p thread_name as the current thread's name.
 299    :    void LogThreadName(const base::StringPiece& thread_name);
 300    :  
 301    :    // Logs @p symbol into the trace.
 302    :    void LogSymbol(SymbolMap::Symbol* symbol);
 303    :  
 304    :    // Processes a single function entry.
 305    :    void OnFunctionEntry(EntryFrame* entry_frame,
 306    :                         FuncAddr function,
 307    :                         uint64 cycles);
 308    :  
 309    :    // Processes a single V8 function entry.
 310    :    void OnV8FunctionEntry(FuncAddr function,
 311    :                           RetAddr* return_address_location,
 312    :                           uint64 cycles);
 313    :  
 314    :    // @name Callback notification implementation.
 315    :    // @{
 316    :    virtual void OnPageAdded(const void* page) OVERRIDE;
 317    :    virtual void OnPageRemoved(const void* page) OVERRIDE;
 318    :    // @}
 319    :  
 320    :    // Function exit hook.
 321    :    void OnFunctionExit(const ThunkData* data, uint64 cycles_exit);
 322    :  
 323  E :    trace::client::TraceFileSegment* segment() { return &segment_; }
 324    :  
 325    :   private:
 326    :    friend class Profiler;
 327    :  
 328    :    void RecordInvocation(RetAddr caller,
 329    :                          FuncAddr function,
 330    :                          uint64 cycles);
 331    :  
 332    :    void UpdateOverhead(uint64 entry_cycles);
 333    :    InvocationInfo* AllocateInvocationInfo();
 334    :    void ClearCache();
 335    :    bool FlushSegment();
 336    :  
 337    :    // The profiler we're attached to.
 338    :    Profiler* profiler_;
 339    :  
 340    :    // We keep a running tally of the rough amount of wall clock cycles spent
 341    :    // inside the profiler. We then subtract the profiler's overhead from the
 342    :    // wall clock cycle timer on each measurement. This results in a timer that
 343    :    // measures time exclusive of profiling overhead.
 344    :    uint64 cycles_overhead_;
 345    :  
 346    :    // The invocations we've recorded in our buffer.
 347    :    InvocationMap invocations_;
 348    :  
 349    :    // The trace file segment we're recording to.
 350    :    trace::client::TraceFileSegment segment_;
 351    :  
 352    :    // The current batch record we're writing to, if any.
 353    :    TraceBatchInvocationInfo* batch_;
 354    :  
 355    :    // The set of modules we've logged.
 356    :    ModuleSet logged_modules_;
 357    :  };
 358    :  
 359    :  Profiler::ThreadState::ThreadState(Profiler* profiler)
 360    :      : profiler_(profiler),
 361    :        cycles_overhead_(0LL),
 362  E :        batch_(NULL) {
 363  E :    Initialize();
 364  E :  }
 365    :  
 366  E :  Profiler::ThreadState::~ThreadState() {
 367  E :    ClearCache();
 368    :  
 369    :    // If we have an outstanding buffer, let's deallocate it now.
 370  E :    if (segment_.write_ptr != NULL)
 371  E :      profiler_->session_.ReturnBuffer(&segment_);
 372    :  
 373  E :    Uninitialize();
 374  E :  }
 375    :  
 376  E :  void Profiler::ThreadState::LogAllModules(HMODULE module) {
 377    :    // Bail early if we're disabled.
 378  E :    if (profiler_->session_.IsDisabled())
 379  E :      return;
 380    :  
 381  E :    agent::common::ModuleVector modules;
 382  E :    agent::common::GetProcessModules(&modules);
 383    :  
 384    :    // Our module should be in the process modules.
 385  E :    DCHECK(std::find(modules.begin(), modules.end(), module) != modules.end());
 386    :  
 387  E :    for (size_t i = 0; i < modules.size(); ++i) {
 388  E :      DCHECK(modules[i] != NULL);
 389  E :      LogModule(modules[i]);
 390  E :    }
 391    :  
 392    :    // We need to flush module events right away, so that the module is
 393    :    // defined in the trace file before events using that module start to
 394    :    // occur (in another thread).
 395  E :    FlushSegment();
 396  E :  }
 397    :  
 398  E :  void Profiler::ThreadState::LogModule(HMODULE module) {
 399    :    // This may flush our buffer, so let's clear our cache.
 400  E :    ClearCache();
 401  E :    agent::common::LogModule(module, &profiler_->session_, &segment_);
 402  E :  }
 403    :  
 404    :  void Profiler::ThreadState::LogThreadName(
 405  E :      const base::StringPiece& thread_name) {
 406  E :    if (thread_name.empty())
 407  i :      return;
 408    :  
 409    :    // Make sure the event we're about to write will fit.
 410  E :    if (!segment_.CanAllocate(thread_name.size() + 1) || !FlushSegment()) {
 411    :      // Failed to allocate a new segment.
 412  i :      return;
 413    :    }
 414    :  
 415  E :    DCHECK(segment_.CanAllocate(thread_name.size() + 1));
 416  E :    batch_ = NULL;
 417    :  
 418    :    // Allocate a record in the log.
 419    :    TraceThreadNameInfo* thread_name_event =
 420    :        reinterpret_cast<TraceThreadNameInfo*>(
 421    :          segment_.AllocateTraceRecordImpl(
 422  E :              TRACE_THREAD_NAME, thread_name.size() + 1));
 423  E :    DCHECK(thread_name_event != NULL);
 424    :    base::strlcpy(thread_name_event->thread_name,
 425  E :                  thread_name.data(), thread_name.size() + 1);
 426  E :  }
 427    :  
 428  E :  void Profiler::ThreadState::LogSymbol(SymbolMap::Symbol* symbol) {
 429  E :    DCHECK(symbol != NULL);
 430  E :    DCHECK_NE(0, symbol->id());
 431    :  
 432    :    size_t symbol_size =
 433  E :        FIELD_OFFSET(TraceDynamicSymbol, symbol_name) + symbol->name().size() + 1;
 434    :  
 435  E :    if (!segment_.CanAllocate(symbol_size) || !FlushSegment()) {
 436    :      // Failed to allocate the symbol record.
 437  i :      return;
 438    :    }
 439    :  
 440  E :    DCHECK(segment_.CanAllocate(symbol_size));
 441  E :    batch_ = NULL;
 442    :  
 443    :    // Allocate a record in the log.
 444    :    TraceDynamicSymbol* dynamic_symbol_event =
 445    :        reinterpret_cast<TraceDynamicSymbol*>(
 446    :            segment_.AllocateTraceRecordImpl(
 447  E :                TRACE_DYNAMIC_SYMBOL, symbol_size));
 448  E :    DCHECK(dynamic_symbol_event != NULL);
 449  E :    dynamic_symbol_event->symbol_id = symbol->id();
 450    :    base::strlcpy(dynamic_symbol_event->symbol_name,
 451  E :                  symbol->name().data(), symbol->name().size() + 1);
 452  E :  }
 453    :  
 454    :  void Profiler::ThreadState::OnFunctionEntry(EntryFrame* entry_frame,
 455    :                                              FuncAddr function,
 456  E :                                              uint64 cycles) {
 457  E :    if (profiler_->session_.IsDisabled())
 458  E :      return;
 459    :  
 460    :    // Record the details of the entry.
 461    :    // Note that on tail-recursion and tail-call elimination, the caller recorded
 462    :    // here will be a thunk. We cater for this case on exit as best we can.
 463  E :    ThunkData* data = MakeThunk(entry_frame->retaddr);
 464  E :    DCHECK(data != NULL);
 465  E :    data->caller = entry_frame->retaddr;
 466  E :    data->function = function;
 467  E :    data->cycles_entry = cycles - cycles_overhead_;
 468    :  
 469  E :    entry_frame->retaddr = data->thunk;
 470    :  
 471  E :    UpdateOverhead(cycles);
 472  E :  }
 473    :  
 474    :  void Profiler::ThreadState::OnV8FunctionEntry(FuncAddr function,
 475    :                                                RetAddr* return_address_location,
 476  E :                                                uint64 cycles) {
 477  E :    if (profiler_->session_.IsDisabled())
 478  i :      return;
 479    :  
 480    :    // Record the details of the entry.
 481    :  
 482    :    // TODO(siggi): Note that we want to do different exit processing here,
 483    :    //    as we know "function" is a dynamic symbol - it might be better to
 484    :    //    record the fact here than to force a lookup on RecordInvocation.
 485    :  
 486    :    // Note that on tail-recursion and tail-call elimination, the caller recorded
 487    :    // here will be a thunk. We cater for this case on exit as best we can.
 488  E :    ThunkData* data = MakeThunk(*return_address_location);
 489  E :    DCHECK(data != NULL);
 490  E :    data->caller = *return_address_location;
 491  E :    data->function = function;
 492  E :    data->cycles_entry = cycles - cycles_overhead_;
 493    :  
 494  E :    *return_address_location = data->thunk;
 495    :  
 496  E :    UpdateOverhead(cycles);
 497  E :  }
 498    :  
 499    :  void Profiler::ThreadState::OnFunctionExit(const ThunkData* data,
 500  E :                                             uint64 cycles_exit) {
 501    :    // Calculate the number of cycles in the invocation, exclusive our overhead.
 502  E :    uint64 cycles_executed = cycles_exit - cycles_overhead_ - data->cycles_entry;
 503    :  
 504    :    // See if the return address resolves to a thunk, which indicates
 505    :    // tail recursion or tail call elimination. In that case we record the
 506    :    // calling function as caller, which isn't totally accurate as that'll
 507    :    // attribute the cost to the first line of the calling function. In the
 508    :    // absence of more information, it's the best we can do, however.
 509    :    // TODO(siggi): Move this into RecordInvocation, as we can elide the lookup
 510    :    //     on a cache hit.
 511  E :    Thunk* ret_thunk = CastToThunk(data->caller);
 512  E :    if (ret_thunk == NULL) {
 513  E :      RecordInvocation(data->caller, data->function, cycles_executed);
 514  E :    } else {
 515  E :      ThunkData* ret_data = DataFromThunk(ret_thunk);
 516  E :      RecordInvocation(ret_data->function, data->function, cycles_executed);
 517    :    }
 518    :  
 519  E :    UpdateOverhead(cycles_exit);
 520  E :  }
 521    :  
 522  E :  void Profiler::ThreadState::OnPageAdded(const void* page) {
 523  E :    profiler_->OnPageAdded(page);
 524  E :  }
 525    :  
 526  E :  void Profiler::ThreadState::OnPageRemoved(const void* page) {
 527  E :    profiler_->OnPageRemoved(page);
 528  E :  }
 529    :  
 530    :  void Profiler::ThreadState::RecordInvocation(RetAddr caller,
 531    :                                               FuncAddr function,
 532  E :                                               uint64 duration_cycles) {
 533    :    // See whether we've already recorded an entry for this function.
 534  E :    InvocationKey key(caller, function);
 535  E :    InvocationMap::iterator it = invocations_.find(key);
 536  E :    if (it != invocations_.end()) {
 537    :      // Yup, we already have an entry, validate it.
 538  E :      InvocationValue& value = it->second;
 539    :  
 540    :      if ((value.caller_symbol == NULL ||
 541    :           value.caller_symbol->move_count() == value.caller_move_count) &&
 542    :          (value.function_symbol == NULL ||
 543  E :           value.function_symbol->move_count() == value.function_move_count)) {
 544    :        // The entry is still good, tally the new data.
 545  E :        ++(value.info->num_calls);
 546  E :        value.info->cycles_sum += duration_cycles;
 547  E :        if (duration_cycles < value.info->cycles_min) {
 548  E :          value.info->cycles_min = duration_cycles;
 549  E :        } else if (duration_cycles > value.info->cycles_max) {
 550  E :          value.info->cycles_max = duration_cycles;
 551    :        }
 552    :  
 553    :        // Early out on success.
 554  E :        return;
 555  i :      } else {
 556    :        // The entry is not valid any more, discard it.
 557  E :        DCHECK(value.caller_symbol != NULL || value.function_symbol != NULL);
 558    :  
 559  E :        invocations_.erase(it);
 560    :      }
 561    :    }
 562  E :    DCHECK(invocations_.find(key) == invocations_.end());
 563    :  
 564    :    // We don't have an entry, allocate a new one for this invocation.
 565    :    // The code below may touch last error.
 566  E :    ScopedLastErrorKeeper keep_last_error;
 567    :  
 568    :    scoped_refptr<SymbolMap::Symbol> caller_symbol =
 569  E :        profiler_->symbol_map_.FindSymbol(caller);
 570    :  
 571    :    // TODO(siggi): This can perhaps be optimized by keeping track of which
 572    :    //     entry hook was invoked. This will however require setting an extra
 573    :    //     bool on every entry, so will require measurement to see whether it's
 574    :    //     a win.
 575    :    scoped_refptr<SymbolMap::Symbol> function_symbol =
 576  E :        profiler_->symbol_map_.FindSymbol(function);
 577    :  
 578    :    // Trace the symbols if this is the first time either one is observed.
 579  E :    if (caller_symbol != NULL && caller_symbol->EnsureHasId()) {
 580    :      // TODO(siggi): It might lead to worst-case behavior to log symbols into
 581    :      //    the same trace buffer as we store invocations, as we're likely to
 582    :      //    alternate symbols and single-entry invocation batches. Fixme.
 583  E :      LogSymbol(caller_symbol);
 584    :    }
 585  E :    if (function_symbol != NULL && function_symbol->EnsureHasId()) {
 586    :      // TODO(siggi): See above.
 587  E :      LogSymbol(function_symbol);
 588    :    }
 589    :  
 590  E :    InvocationInfo* info = AllocateInvocationInfo();
 591  E :    if (info != NULL) {
 592  E :      InvocationValue& value = invocations_[key];
 593  E :      value.info = info;
 594  E :      value.caller_symbol = caller_symbol;
 595  E :      if (caller_symbol != NULL)
 596  E :        value.caller_move_count = caller_symbol->move_count();
 597  E :      else
 598  E :        value.caller_move_count = 0;
 599    :  
 600  E :      value.function_symbol = function_symbol;
 601  E :      if (function_symbol != NULL)
 602  E :        value.function_move_count = function_symbol->move_count();
 603  E :      else
 604  E :        value.function_move_count = 0;
 605    :  
 606  E :      if (function_symbol == NULL) {
 607    :        // We're not in a dynamic function, record the (conventional) function.
 608  E :        info->function = function;
 609  E :        info->flags = 0;
 610  E :      } else {
 611    :        // We're in a dynamic function symbol, record the details.
 612  E :        DCHECK(function_symbol->id() != 0);
 613    :  
 614  E :        info->function_symbol_id = function_symbol->id();
 615  E :        info->flags = kFunctionIsSymbol;
 616    :      }
 617    :  
 618  E :      if (caller_symbol == NULL) {
 619    :        // We're not in a dynamic caller_symbol, record the (conventional) caller.
 620  E :        info->caller = caller;
 621  E :        info->caller_offset = 0;
 622  E :      } else {
 623    :        // We're in a dynamic caller_symbol, record the details.
 624  E :        DCHECK(caller_symbol->id() != 0);
 625    :  
 626  E :        info->caller_symbol_id = caller_symbol->id();
 627  E :        info->flags |= kCallerIsSymbol;
 628    :        info->caller_offset = reinterpret_cast<const uint8*>(caller) -
 629  E :            reinterpret_cast<const uint8*>(caller_symbol->address());
 630    :      }
 631    :  
 632  E :      info->num_calls = 1;
 633  E :      info->cycles_min = info->cycles_max = info->cycles_sum = duration_cycles;
 634    :    }
 635  E :  }
 636    :  
 637  E :  void Profiler::ThreadState::UpdateOverhead(uint64 entry_cycles) {
 638    :    // TODO(siggi): Measure the fixed overhead on setup,
 639    :    //     then add it on every update.
 640  E :    cycles_overhead_ += (__rdtsc() - entry_cycles);
 641  E :  }
 642    :  
 643  E :  InvocationInfo* Profiler::ThreadState::AllocateInvocationInfo() {
 644    :    // This is kind of self-evident for the moment, as an invocation info batch
 645    :    // contains at least one invocation info as currently declared.
 646    :    // If this fails, please reconsider your implementation, or else revisit
 647    :    // the allocation code below.
 648    :    COMPILE_ASSERT(sizeof(TraceBatchInvocationInfo) >= sizeof(InvocationInfo),
 649    :                   invocation_info_batch_must_be_larger_than_invocation_info);
 650    :  
 651    :    // Do we have a record that we can grow?
 652  E :    if (batch_ != NULL && segment_.CanAllocateRaw(sizeof(InvocationInfo))) {
 653    :      InvocationInfo* invocation_info =
 654  E :          reinterpret_cast<InvocationInfo*>(segment_.write_ptr);
 655  E :      RecordPrefix* prefix = trace::client::GetRecordPrefix(batch_);
 656  E :      prefix->size += sizeof(InvocationInfo);
 657    :  
 658    :      // Update the book-keeping.
 659  E :      segment_.write_ptr += sizeof(InvocationInfo);
 660  E :      segment_.header->segment_length += sizeof(InvocationInfo);
 661    :  
 662  E :      return invocation_info;
 663    :    }
 664    :  
 665    :    // Do we need to scarf a new buffer?
 666    :    if (!segment_.CanAllocate(sizeof(TraceBatchInvocationInfo)) &&
 667  E :        !FlushSegment()) {
 668    :      // We failed to allocate a new buffer.
 669  i :      return NULL;
 670    :    }
 671    :  
 672  E :    DCHECK(segment_.header != NULL);
 673    :  
 674  E :    batch_ = segment_.AllocateTraceRecord<TraceBatchInvocationInfo>();
 675  E :    return &batch_->invocations[0];
 676  E :  }
 677    :  
 678  E :  bool Profiler::ThreadState::FlushSegment() {
 679  E :    ClearCache();
 680  E :    return profiler_->session_.ExchangeBuffer(&segment_);
 681  E :  }
 682    :  
 683  E :  void Profiler::ThreadState::ClearCache() {
 684  E :    batch_ = NULL;
 685  E :    invocations_.clear();
 686  E :  }
 687    :  
 688  E :  void Profiler::OnThreadDetach() {
 689  E :    ThreadState* state = GetThreadState();
 690  E :    if (state != NULL)
 691  E :      thread_state_manager_.MarkForDeath(state);
 692  E :  }
 693    :  
 694  E :  RetAddr* Profiler::ResolveReturnAddressLocation(RetAddr* pc_location) {
 695  E :    base::AutoLock lock(lock_);
 696    :  
 697    :    // In case of tail-call and tail recursion elimination, we can get chained
 698    :    // thunks, so we loop around here until we resolve to a non-thunk.
 699  E :    while (true) {
 700    :      // See whether the return address is one of our thunks.
 701  E :      RetAddr ret_addr = *pc_location;
 702    :  
 703    :      // Compute the page this return address lives in.
 704    :      const void* page = reinterpret_cast<const void*>(
 705  E :          reinterpret_cast<uintptr_t>(ret_addr) & ~0xFFF);
 706  E :      if (!std::binary_search(pages_.begin(), pages_.end(), page))
 707  E :        return pc_location;
 708    :  
 709    :      // It's one of our own, redirect to the thunk's stash.
 710    :      ThreadState::Thunk* thunk =
 711  E :          reinterpret_cast<ThreadState::Thunk*>(const_cast<void*>(ret_addr));
 712    :  
 713  E :      ThreadState::ThunkData* data = ThreadState::DataFromThunk(thunk);
 714    :  
 715    :      // Update the PC location and go around again, in case this
 716    :      // thunk links to another one.
 717  E :      pc_location = &data->caller;
 718  E :    }
 719  E :  }
 720    :  
 721    :  void Profiler::OnModuleEntry(EntryFrame* entry_frame,
 722    :                               FuncAddr function,
 723  E :                               uint64 cycles) {
 724    :    // The function invoked has a DllMain-like signature.
 725    :    // Get the module and reason from its invocation record.
 726  E :    HMODULE module = reinterpret_cast<HMODULE>(entry_frame->args[0]);
 727  E :    DWORD reason = entry_frame->args[1];
 728    :  
 729    :    // Only log module additions.
 730  E :    bool should_log_module = false;
 731  E :    switch (reason) {
 732    :      case DLL_PROCESS_ATTACH:
 733    :      case DLL_THREAD_ATTACH:
 734  E :        should_log_module = true;
 735  E :        break;
 736    :  
 737    :      case DLL_PROCESS_DETACH:
 738    :      case DLL_THREAD_DETACH:
 739  E :        break;
 740    :  
 741    :      default:
 742  i :        LOG(WARNING) << "Unrecognized module event: " << reason << ".";
 743    :        break;
 744    :    }
 745    :  
 746    :    // Make sure we only log each module once per process.
 747  E :    bool is_new_module = false;
 748  E :    if (should_log_module) {
 749  E :      base::AutoLock lock(lock_);
 750    :  
 751  E :      is_new_module = logged_modules_.insert(module).second;
 752  E :    }
 753    :  
 754  E :    ThreadState* data = GetOrAllocateThreadState();
 755  E :    DCHECK(data != NULL);
 756  E :    if (data == NULL)
 757  i :      return;
 758    :  
 759  E :    if (is_new_module) {
 760    :      // Delegate the logging to our per-thread data.
 761  E :      data->LogAllModules(module);
 762    :    }
 763    :  
 764    :    // Handle the function entry.
 765  E :    data->OnFunctionEntry(entry_frame, function, cycles);
 766  E :  }
 767    :  
 768  E :  void Profiler::OnPageAdded(const void* page) {
 769  E :    base::AutoLock lock(lock_);
 770    :  
 771    :    PageVector::iterator it =
 772  E :        std::lower_bound(pages_.begin(), pages_.end(), page);
 773  E :    DCHECK(it == pages_.end() || *it != page);
 774  E :    pages_.insert(it, page);
 775  E :  }
 776    :  
 777  E :  void Profiler::OnPageRemoved(const void* page) {
 778  E :    base::AutoLock lock(lock_);
 779    :  
 780    :    PageVector::iterator it =
 781  E :        std::lower_bound(pages_.begin(), pages_.end(), page);
 782    :    // The page must be in our list.
 783  E :    DCHECK(it != pages_.end());
 784  E :    DCHECK_EQ(page, *it);
 785  E :    pages_.erase(it);
 786  E :  }
 787    :  
 788  E :  void Profiler::OnThreadName(const base::StringPiece& thread_name) {
 789  E :    ThreadState* state = GetOrAllocateThreadState();
 790  E :    if (state != NULL)
 791  E :      state->LogThreadName(thread_name);
 792  E :  }
 793    :  
 794  E :  LONG CALLBACK Profiler::ExceptionHandler(EXCEPTION_POINTERS* ex_info) {
 795    :    // Log the thread if this is the VC thread name exception.
 796    :    if (ex_info->ExceptionRecord->ExceptionCode == kVCThreadNameException &&
 797    :        ex_info->ExceptionRecord->NumberParameters ==
 798  E :            sizeof(THREADNAME_INFO)/sizeof(DWORD)) {
 799    :      const THREADNAME_INFO* info =
 800    :          reinterpret_cast<const THREADNAME_INFO*>(
 801  E :              &ex_info->ExceptionRecord->ExceptionInformation);
 802    :  
 803  E :      if (info->dwType == 0x1000) {
 804  E :        instance_.OnThreadName(info->szName);
 805  E :      } else {
 806  i :        LOG(WARNING) << "Unrecognised event type " << info->dwType;
 807    :      }
 808    :    }
 809    :  
 810  E :    return EXCEPTION_CONTINUE_SEARCH;
 811  E :  }
 812    :  
 813  E :  Profiler::Profiler() : handler_registration_(NULL) {
 814    :    // Create our RPC session and allocate our initial trace segment on first use.
 815  E :    ThreadState* data = CreateFirstThreadStateAndSession();
 816  E :    CHECK(data != NULL) << "Failed to allocate thread local state.";
 817    :  
 818  E :    handler_registration_ = ::AddVectoredExceptionHandler(TRUE, ExceptionHandler);
 819  E :  }
 820    :  
 821  E :  Profiler::~Profiler() {
 822    :    // Typically, this will happen on the last thread in the process. We must
 823    :    // explicitly clean up this thread's state as it will otherwise leak.
 824  E :    FreeThreadState();
 825    :  
 826    :    // Unregister our VEH.
 827  E :    if (handler_registration_ != NULL) {
 828  E :      ::RemoveVectoredExceptionHandler(handler_registration_);
 829  E :      handler_registration_ = NULL;
 830    :    }
 831  E :  }
 832    :  
 833  E :  Profiler::ThreadState* Profiler::CreateFirstThreadStateAndSession() {
 834  E :    Profiler::ThreadState* data = GetOrAllocateThreadStateImpl();
 835    :  
 836    :    // Create the session (and allocate the first segment).
 837  E :    trace::client::InitializeRpcSession(&session_, data->segment());
 838    :  
 839  E :    return data;
 840  E :  }
 841    :  
 842  E :  Profiler::ThreadState* Profiler::GetOrAllocateThreadState() {
 843  E :    Profiler::ThreadState* data = GetOrAllocateThreadStateImpl();
 844  E :    if (!data->segment()->write_ptr && session_.IsTracing()) {
 845  E :      session_.AllocateBuffer(data->segment());
 846    :    }
 847  E :    return data;
 848  E :  }
 849    :  
 850  E :  Profiler::ThreadState* Profiler::GetOrAllocateThreadStateImpl() {
 851  E :    ThreadState *data = tls_.Get();
 852  E :    if (data != NULL)
 853  E :      return data;
 854    :  
 855  E :    data = new ThreadState(this);
 856  E :    if (data == NULL) {
 857  i :      LOG(ERROR) << "Unable to allocate per-thread data";
 858  i :      return NULL;
 859    :    }
 860    :  
 861  E :    thread_state_manager_.Register(data);
 862  E :    tls_.Set(data);
 863    :  
 864  E :    return data;
 865  E :  }
 866    :  
 867  E :  Profiler::ThreadState* Profiler::GetThreadState() const {
 868  E :    return tls_.Get();
 869  E :  }
 870    :  
 871  E :  void Profiler::FreeThreadState() {
 872  E :    ThreadState *data = GetThreadState();
 873  E :    if (data != NULL) {
 874  E :      tls_.Set(NULL);
 875  E :      thread_state_manager_.Unregister(data);
 876  E :      delete data;
 877    :    }
 878  E :  }
 879    :  
 880    :  void WINAPI Profiler::DllMainEntryHook(EntryFrame* entry_frame,
 881    :                                         FuncAddr function,
 882  E :                                         uint64 cycles) {
 883  E :    ScopedLastErrorKeeper keep_last_error;
 884    :  
 885  E :    instance_.OnModuleEntry(entry_frame, function, cycles);
 886  E :  }
 887    :  
 888    :  void WINAPI Profiler::FunctionEntryHook(EntryFrame* entry_frame,
 889    :                                          FuncAddr function,
 890  E :                                          uint64 cycles) {
 891  E :    ScopedLastErrorKeeper keep_last_error;
 892    :  
 893  E :    ThreadState* data = instance_.GetOrAllocateThreadState();
 894  E :    DCHECK(data != NULL);
 895  E :    if (data != NULL)
 896  E :      data->OnFunctionEntry(entry_frame, function, cycles);
 897  E :  }
 898    :  
 899    :  void WINAPI Profiler::OnV8FunctionEntry(FuncAddr function,
 900    :                                          RetAddr* return_addr_location,
 901  E :                                          uint64 cycles) {
 902  E :    ScopedLastErrorKeeper keep_last_error;
 903    :  
 904  E :    ThreadState* data = instance_.GetOrAllocateThreadState();
 905  E :    if (data != NULL)
 906  E :      data->OnV8FunctionEntry(function, return_addr_location, cycles);
 907  E :  }
 908    :  
 909    :  void Profiler::AddSymbol(const void* address, size_t length,
 910  E :                           const char* name, size_t name_len) {
 911  E :    symbol_map_.AddSymbol(address, length, base::StringPiece(name, name_len));
 912  E :  }
 913    :  
 914  E :  void Profiler::MoveSymbol(const void* old_address, const void* new_address) {
 915  E :    symbol_map_.MoveSymbol(old_address, new_address);
 916  E :  }
 917    :  
 918    :  }  // namespace profiler
 919    :  }  // namespace agent

Coverage information generated Thu Jul 04 09:34:53 2013.