1 : // Copyright 2012 Google Inc.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 : //
15 : // Implementation of the basic-block entry counting agent library.
16 :
17 : #include "syzygy/agent/basic_block_entry/basic_block_entry.h"
18 :
19 : #include "base/at_exit.h"
20 : #include "base/command_line.h"
21 : #include "base/environment.h"
22 : #include "base/file_path.h"
23 : #include "base/lazy_instance.h"
24 : #include "base/stringprintf.h"
25 : #include "base/utf_string_conversions.h"
26 : #include "base/memory/scoped_ptr.h"
27 : #include "sawbuck/common/com_utils.h"
28 : #include "syzygy/agent/common/process_utils.h"
29 : #include "syzygy/agent/common/scoped_last_error_keeper.h"
30 : #include "syzygy/common/logging.h"
31 : #include "syzygy/trace/protocol/call_trace_defs.h"
32 :
33 E : extern "C" void __declspec(naked) _basic_block_enter() {
34 : __asm {
35 : // This is expected to be called via instrumentation that looks like:
36 : // push bb_id
37 : // push module_data
38 : // call [_basic_block_enter]
39 : //
40 : // Stack: ... bb_id, module_data, ret_addr.
41 :
42 : // Stash volatile registers.
43 E : push eax
44 E : push ecx
45 E : push edx
46 E : pushfd
47 :
48 : // Stack: ... bb_id, module_data, ret_addr, eax, ecx, edx, fd.
49 :
50 : // Push the original esp value onto the stack as the entry-hook data.
51 : // This gives the entry-hook a pointer to ret_addr, module_data and bb_id.
52 E : lea eax, DWORD PTR[esp + 0x10]
53 E : push eax
54 :
55 : // Stack: ..., bb_id, module_data, ret_addr, eax, ecx, edx, fd, &ret_addr.
56 E : call agent::basic_block_entry::BasicBlockEntry::BasicBlockEntryHook
57 :
58 : // Stack: ... bb_id, module_data, ret_addr, eax, ecx, edx, fd.
59 :
60 : // Restore volatile registers.
61 E : popfd
62 E : pop edx
63 E : pop ecx
64 E : pop eax
65 :
66 : // Stack: ... bb_id, module_data, ret_addr.
67 :
68 : // Return to the address pushed by our caller, popping off the bb_id and
69 : // module_data values from the stack.
70 E : ret 8
71 :
72 : // Stack: ...
73 : }
74 : }
75 :
76 E : extern "C" void __declspec(naked) _indirect_penter_dllmain() {
77 : __asm {
78 : // This is expected to be called via a thunk that looks like:
79 : // push module_data
80 : // push function
81 : // jmp [_indirect_penter_dllmain]
82 : //
83 : // Stack: ... reserved, reason, module, ret_addr, module_data, function.
84 :
85 : // Stash volatile registers.
86 E : push eax
87 E : push ecx
88 E : push edx
89 E : pushfd
90 :
91 : // Stack: ... reserved, reason, module, ret_addr, module_data, function,
92 : // eax, ecx, edx, fd.
93 :
94 : // Push the original esp value onto the stack as the entry-hook data.
95 : // This gives the entry-hook a pointer to function, module_data, ret_addr,
96 : // module and reason.
97 E : lea eax, DWORD PTR[esp + 0x10]
98 E : push eax
99 :
100 : // Stack: ... reserved, reason, module, ret_addr, module_data, function,
101 : // eax, ecx, edx, fd, &function.
102 :
103 E : call agent::basic_block_entry::BasicBlockEntry::DllMainEntryHook
104 :
105 : // Stack: ... reserved, reason, module, ret_addr, module_data, function,
106 : // eax, ecx, edx, fd.
107 :
108 : // Restore volatile registers.
109 E : popfd
110 E : pop edx
111 E : pop ecx
112 E : pop eax
113 :
114 : // Stack: ... reserved, reason, module, ret_addr, module_data, function.
115 :
116 : // Return to the thunked function, popping module_data off the stack as
117 : // we go.
118 E : ret 4
119 :
120 : // Stack: ... reserved, reason, module, ret_addr.
121 : }
122 : }
123 :
124 E : BOOL WINAPI DllMain(HMODULE instance, DWORD reason, LPVOID reserved) {
125 : // Our AtExit manager required by base.
126 : static base::AtExitManager* at_exit = NULL;
127 :
128 E : switch (reason) {
129 : case DLL_PROCESS_ATTACH:
130 E : DCHECK(at_exit == NULL);
131 E : at_exit = new base::AtExitManager();
132 :
133 E : CommandLine::Init(0, NULL);
134 E : common::InitLoggingForDll(L"basic_block_entry");
135 E : LOG(INFO) << "Initialized basic-block entry counting agent library.";
136 E : break;
137 :
138 : case DLL_THREAD_ATTACH:
139 i : break;
140 :
141 : case DLL_THREAD_DETACH:
142 i : break;
143 :
144 : case DLL_PROCESS_DETACH:
145 E : DCHECK(at_exit != NULL);
146 E : delete at_exit;
147 E : at_exit = NULL;
148 E : break;
149 :
150 : default:
151 i : NOTREACHED();
152 : break;
153 : }
154 :
155 E : return TRUE;
156 E : }
157 :
158 : namespace agent {
159 : namespace basic_block_entry {
160 :
161 : namespace {
162 :
163 : using ::common::BasicBlockFrequencyData;
164 : using agent::common::ScopedLastErrorKeeper;
165 : using trace::client::TraceFileSegment;
166 :
167 : // All tracing runs through this object.
168 : base::LazyInstance<BasicBlockEntry> static_coverage_instance =
169 : LAZY_INSTANCE_INITIALIZER;
170 :
171 : // Get the address of the module containing @p addr. We do this by querying
172 : // for the allocation that contains @p addr. This must lie within the
173 : // instrumented module, and be part of the single allocation in which the
174 : // image of the module lies. The base of the module will be the base address
175 : // of the allocation.
176 : // TODO(rogerm): Move to agent::common.
177 E : HMODULE GetModuleForAddr(const void* addr) {
178 E : MEMORY_BASIC_INFORMATION mem_info = {};
179 :
180 : // Lookup up the allocation in which addr is located.
181 E : if (::VirtualQuery(addr, &mem_info, sizeof(mem_info)) == 0) {
182 i : DWORD error = ::GetLastError();
183 i : LOG(ERROR) << "VirtualQuery failed: " << com::LogWe(error) << ".";
184 i : return NULL;
185 : }
186 :
187 : // Check that the allocation base has a valid PE header magic number.
188 E : base::win::PEImage image(reinterpret_cast<HMODULE>(mem_info.AllocationBase));
189 E : if (!image.VerifyMagic()) {
190 i : LOG(ERROR) << "Invalid module found for "
191 : << base::StringPrintf("0x%08X", addr) << ".";
192 i : return NULL;
193 : }
194 :
195 : // Then it's a module.
196 E : return image.module();
197 E : }
198 :
199 : } // namespace
200 :
201 : // The basic-block entry hook parameters.
202 : struct BasicBlockEntry::BasicBlockEntryFrame {
203 : const void* ret_addr;
204 : BasicBlockFrequencyData* module_data;
205 : uint32 basic_block_id;
206 : };
207 :
208 : // The dllmain entry hook parameters.
209 : struct BasicBlockEntry::DllMainEntryFrame {
210 : FuncAddr function;
211 : BasicBlockFrequencyData* module_data;
212 : const void* ret_addr;
213 : HMODULE module;
214 : DWORD reason;
215 : DWORD reserved;
216 : };
217 :
218 : namespace {
219 :
220 : COMPILE_ASSERT(sizeof(BasicBlockEntry::BasicBlockEntryFrame) == 12,
221 : BasicBlockEntry_BasicBlockEntryFrame_is_not_the_right_size);
222 :
223 : COMPILE_ASSERT(sizeof(BasicBlockEntry::DllMainEntryFrame) == 24,
224 : BasicBlockEntry_DllMainEntryFrame_is_not_the_right_size);
225 :
226 : }
227 :
228 : // The per-thread-per-instrumented-module state managed by this agent.
229 : class BasicBlockEntry::ThreadState : public agent::common::ThreadStateBase {
230 : public:
231 : // Initialize a ThreadState instance.
232 : ThreadState(BasicBlockEntry* agent, void* buffer);
233 :
234 : // Destroy a ThreadState instance.
235 : ~ThreadState();
236 :
237 : // @name Accessors.
238 : // @{
239 : uint32* frequency_data() { return frequency_data_; }
240 E : TraceFileSegment* segment() { return &segment_; }
241 : TraceBasicBlockFrequencyData* trace_data() { return trace_data_; }
242 : // @}
243 :
244 : // @name Mutators.
245 : // @{
246 : void set_frequency_data(void* buffer);
247 : void set_trace_data(TraceBasicBlockFrequencyData* trace_data);
248 : // @}
249 :
250 : // A helper to return a ThreadState pointer given a TLS index.
251 : static ThreadState* Get(DWORD tls_index);
252 :
253 : // A helper to assign a ThreadState pointer to a TLS index.
254 : void Assign(DWORD tls_index);
255 :
256 : // Saturation increment the frequency record for @p basic_block_id. Note
257 : // that in Release mode, no range checking is performed on basic_block_id.
258 : void Increment(uint32 basic_block_id);
259 :
260 : protected:
261 : // As a shortcut, this points to the beginning of the array of basic-block
262 : // entry frequency values. With tracing enabled, this is equivalent to:
263 : // reinterpret_cast<uint32*>(this->trace_data->frequency_data)
264 : // If tracing is not enabled, this will be set to point to a static
265 : // allocation of BasicBlockFrequencyData::frequency_data.
266 : uint32* frequency_data_;
267 :
268 : // The basic-block entry agent this tread state belongs to.
269 : BasicBlockEntry* agent_;
270 :
271 : // The thread's current trace-file segment, if any.
272 : trace::client::TraceFileSegment segment_;
273 :
274 : // The basic-block frequency record we're populating. This will point into
275 : // the associated trace file segment's buffer.
276 : TraceBasicBlockFrequencyData* trace_data_;
277 :
278 : private:
279 : DISALLOW_COPY_AND_ASSIGN(ThreadState);
280 : };
281 :
282 : BasicBlockEntry::ThreadState::ThreadState(BasicBlockEntry* agent, void* buffer)
283 : : agent_(agent),
284 : frequency_data_(static_cast<uint32*>(buffer)),
285 E : trace_data_(NULL) {
286 E : DCHECK(agent != NULL);
287 E : DCHECK(buffer != NULL);
288 E : }
289 :
290 i : BasicBlockEntry::ThreadState::~ThreadState() {
291 : // If we have an outstanding buffer, let's deallocate it now.
292 i : if (segment_.write_ptr != NULL && !agent_->session_.IsDisabled())
293 i : agent_->session_.ReturnBuffer(&segment_);
294 i : }
295 :
296 E : void BasicBlockEntry::ThreadState::set_frequency_data(void* buffer) {
297 E : DCHECK(buffer != NULL);
298 E : frequency_data_ = static_cast<uint32*>(buffer);
299 E : }
300 :
301 : void BasicBlockEntry::ThreadState::set_trace_data(
302 : TraceBasicBlockFrequencyData* trace_data) {
303 : DCHECK(trace_data != NULL);
304 : trace_data_ = trace_data;
305 : }
306 :
307 : BasicBlockEntry::ThreadState* BasicBlockEntry::ThreadState::Get(
308 E : DWORD tls_index) {
309 E : DCHECK_NE(TLS_OUT_OF_INDEXES, tls_index);
310 E : return static_cast<ThreadState*>(::TlsGetValue(tls_index));
311 E : }
312 :
313 E : void BasicBlockEntry::ThreadState::Assign(DWORD tls_index) {
314 E : DCHECK_NE(TLS_OUT_OF_INDEXES, tls_index);
315 E : ::TlsSetValue(tls_index, this);
316 E : }
317 :
318 E : inline void BasicBlockEntry::ThreadState::Increment(uint32 basic_block_id) {
319 E : DCHECK(frequency_data_ != NULL);
320 E : DCHECK(trace_data_ == NULL || basic_block_id < trace_data_->num_basic_blocks);
321 E : uint32& element = frequency_data_[basic_block_id];
322 E : if (element != ~0U)
323 E : ++element;
324 E : }
325 :
326 E : BasicBlockEntry* BasicBlockEntry::Instance() {
327 E : return static_coverage_instance.Pointer();
328 E : }
329 :
330 E : BasicBlockEntry::BasicBlockEntry() {
331 E : scoped_ptr<base::Environment> env(base::Environment::Create());
332 E : std::string id;
333 E : env->GetVar(::kSyzygyRpcInstanceIdEnvVar, &id);
334 E : session_.set_instance_id(UTF8ToWide(id));
335 :
336 : // Create a session. We immediately return the buffer that gets allocated
337 : // to us. The client will perform thread-local buffer management on an as-
338 : // needed basis.
339 E : trace::client::TraceFileSegment dummy_segment;
340 E : if (session_.CreateSession(&dummy_segment)) {
341 E : CHECK(session_.ReturnBuffer(&dummy_segment));
342 : }
343 E : }
344 :
345 E : BasicBlockEntry::~BasicBlockEntry() {
346 E : }
347 :
348 E : void BasicBlockEntry::BasicBlockEntryHook(BasicBlockEntryFrame* entry_frame) {
349 E : ScopedLastErrorKeeper scoped_last_error_keeper;
350 E : DCHECK(entry_frame != NULL);
351 E : DCHECK(entry_frame->module_data != NULL);
352 : DCHECK_GT(entry_frame->module_data->num_basic_blocks,
353 E : entry_frame->basic_block_id);
354 :
355 : // TODO(rogerm): Consider extracting a fast path for state != NULL? Inline it
356 : // during instrumentation? Move it into the _basic_block_enter function?
357 E : ThreadState* state = ThreadState::Get(entry_frame->module_data->tls_index);
358 E : if (state == NULL)
359 E : state = Instance()->CreateThreadState(entry_frame);
360 E : state->Increment(entry_frame->basic_block_id);
361 E : }
362 :
363 E : void BasicBlockEntry::DllMainEntryHook(DllMainEntryFrame* entry_frame) {
364 E : ScopedLastErrorKeeper scoped_last_error_keeper;
365 E : DCHECK(entry_frame != NULL);
366 E : switch (entry_frame->reason) {
367 : case DLL_PROCESS_ATTACH:
368 E : Instance()->OnProcessAttach(entry_frame);
369 E : break;
370 :
371 : case DLL_THREAD_ATTACH:
372 : // We don't handle this event because the thread may never actually
373 : // call into an instrumented module, so we don't want to allocate
374 : // resources needlessly. Further, we won't get this event for thread
375 : // that were created before the agent was loaded. On first use of
376 : // an instrumented basic-block in a given thread, any thread specific
377 : // resources will be allocated.
378 i : break;
379 :
380 : case DLL_PROCESS_DETACH:
381 : case DLL_THREAD_DETACH:
382 E : Instance()->OnThreadDetach(entry_frame);
383 E : break;
384 :
385 : default:
386 i : NOTREACHED();
387 : }
388 E : }
389 :
390 E : void BasicBlockEntry::RegisterModule(const void* addr) {
391 E : DCHECK(addr != NULL);
392 :
393 : // Allocate a segment for the module information.
394 E : trace::client::TraceFileSegment module_info_segment;
395 E : CHECK(session_.AllocateBuffer(&module_info_segment));
396 :
397 : // Log the module. This is required in order to associate basic-block
398 : // frequency with a module and PDB file during post-processing.
399 E : HMODULE module = GetModuleForAddr(addr);
400 E : CHECK(module != NULL);
401 E : CHECK(agent::common::LogModule(module, &session_, &module_info_segment));
402 :
403 : // Commit the module information.
404 E : CHECK(session_.ReturnBuffer(&module_info_segment));
405 E : }
406 :
407 E : void BasicBlockEntry::OnProcessAttach(DllMainEntryFrame* entry_frame) {
408 E : DCHECK(entry_frame != NULL);
409 :
410 : // Exit if the magic number does not match.
411 : CHECK_EQ(::common::kBasicBlockEntryAgentId,
412 E : entry_frame->module_data->agent_id);
413 :
414 : // Exit if the version does not match.
415 : CHECK_EQ(::common::kBasicBlockFrequencyDataVersion,
416 E : entry_frame->module_data->version);
417 :
418 : // We allow for this hook to be called multiple times. We expect the first
419 : // time to occur under the loader lock, so we don't need to worry about
420 : // concurrency for this check.
421 E : if (entry_frame->module_data->initialization_attempted)
422 i : return;
423 :
424 : // Flag the module as initialized.
425 E : entry_frame->module_data->initialization_attempted = 1U;
426 :
427 : // We expect this to be executed exactly once for each module.
428 E : CHECK_EQ(TLS_OUT_OF_INDEXES, entry_frame->module_data->tls_index);
429 E : entry_frame->module_data->tls_index = ::TlsAlloc();
430 E : CHECK_NE(TLS_OUT_OF_INDEXES, entry_frame->module_data->tls_index);
431 :
432 : // Register this module with the call_trace if the session is not disabled.
433 E : if (!session_.IsDisabled())
434 E : RegisterModule(entry_frame->function);
435 E : }
436 :
437 E : void BasicBlockEntry::OnThreadDetach(DllMainEntryFrame* entry_frame) {
438 E : DCHECK(entry_frame != NULL);
439 E : DCHECK_EQ(1U, entry_frame->module_data->initialization_attempted);
440 E : DCHECK_NE(TLS_OUT_OF_INDEXES, entry_frame->module_data->tls_index);
441 :
442 E : ThreadState* state = ThreadState::Get(entry_frame->module_data->tls_index);
443 E : if (state != NULL)
444 E : thread_state_manager_.MarkForDeath(state);
445 E : }
446 :
447 : BasicBlockEntry::ThreadState* BasicBlockEntry::CreateThreadState(
448 E : BasicBlockEntryFrame* entry_frame) {
449 E : DCHECK(entry_frame != NULL);
450 :
451 : // Create the thread-local state for this thread. By default, just point the
452 : // counter array to the statically allocated fall-back area.
453 : ThreadState* state = new ThreadState(
454 E : this, entry_frame->module_data->frequency_data);
455 E : CHECK(state != NULL);
456 :
457 : // Associate the thread_state with the current thread.
458 E : state->Assign(entry_frame->module_data->tls_index);
459 :
460 : // Register the thread state with the thread state manager.
461 E : thread_state_manager_.Register(state);
462 :
463 : // If we're not actually tracing, then we're done.
464 E : if (session_.IsDisabled())
465 E : return state;
466 :
467 : // Nothing to allocate? We're done!
468 E : if (entry_frame->module_data->num_basic_blocks == 0) {
469 i : LOG(WARNING) << "Module contains no instrumented basic blocks, not "
470 : << "allocating basic-block trace data segment.";
471 i : return state;
472 : }
473 :
474 : // Determine the size of the basic block frequency table.
475 : size_t data_size =
476 E : entry_frame->module_data->num_basic_blocks * sizeof(uint32);
477 :
478 : // Determine the size of the basic block frequency record.
479 E : size_t record_size = sizeof(TraceBasicBlockFrequencyData) + data_size - 1;
480 :
481 : // Determine the size of the buffer we need. We need room for the basic block
482 : // frequency struct plus a single RecordPrefix header.
483 E : size_t segment_size = sizeof(RecordPrefix) + record_size;
484 :
485 : // Allocate the actual segment for the coverage data.
486 E : CHECK(session_.AllocateBuffer(segment_size, state->segment()));
487 :
488 : // Ensure it's big enough to allocate the basic-block frequency data
489 : // we want. This automatically accounts for the RecordPrefix overhead.
490 E : CHECK(state->segment()->CanAllocate(record_size));
491 :
492 : // Allocate the basic-block frequency data. We will leave this allocated and
493 : // let it get flushed during tear-down of the call-trace client.
494 : TraceBasicBlockFrequencyData* trace_data =
495 : reinterpret_cast<TraceBasicBlockFrequencyData*>(
496 : state->segment()->AllocateTraceRecordImpl(TRACE_BASIC_BLOCK_FREQUENCY,
497 E : record_size));
498 E : DCHECK(trace_data != NULL);
499 :
500 : // Initialize the basic block frequency data struct.
501 E : HMODULE module = GetModuleForAddr(entry_frame->ret_addr);
502 E : CHECK(module != NULL);
503 E : const base::win::PEImage image(module);
504 E : const IMAGE_NT_HEADERS* nt_headers = image.GetNTHeaders();
505 E : trace_data->module_base_addr = reinterpret_cast<ModuleAddr>(image.module());
506 E : trace_data->module_base_size = nt_headers->OptionalHeader.SizeOfImage;
507 E : trace_data->module_checksum = nt_headers->OptionalHeader.CheckSum;
508 E : trace_data->module_time_date_stamp = nt_headers->FileHeader.TimeDateStamp;
509 E : trace_data->frequency_size = sizeof(uint32);
510 E : trace_data->num_basic_blocks = entry_frame->module_data->num_basic_blocks;
511 :
512 : // Hook up the newly allocated buffer to the call-trace instrumentation.
513 E : state->set_frequency_data(trace_data->frequency_data);
514 :
515 E : return state;
516 E : }
517 :
518 : } // namespace coverage
519 : } // namespace agent
|