Browse Source
Add stopwatch timer mechanism to Trace2. Timers are an alternative to Trace2 Regions. Regions are useful for measuring the time spent in various computation phases, such as the time to read the index, time to scan for unstaged files, time to scan for untracked files, and etc. However, regions are not appropriate in all places. For example, during a checkout, it would be very inefficient to use regions to measure the total time spent inflating objects from the ODB from across the entire lifetime of the process; a per-unzip() region would flood the output and significantly slow the command; and some form of post-processing would be requried to compute the time spent in unzip(). Timers can be used to measure a series of timer intervals and emit a single summary event (at thread and/or process exit). Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>maint
Jeff Hostetler
2 years ago
committed by
Junio C Hamano
15 changed files with 786 additions and 0 deletions
@ -0,0 +1,182 @@
@@ -0,0 +1,182 @@
|
||||
#include "cache.h" |
||||
#include "thread-utils.h" |
||||
#include "trace2/tr2_tgt.h" |
||||
#include "trace2/tr2_tls.h" |
||||
#include "trace2/tr2_tmr.h" |
||||
|
||||
#define MY_MAX(a, b) ((a) > (b) ? (a) : (b)) |
||||
#define MY_MIN(a, b) ((a) < (b) ? (a) : (b)) |
||||
|
||||
/* |
||||
* A global timer block to aggregate values from the partial sums from |
||||
* each thread. |
||||
*/ |
||||
static struct tr2_timer_block final_timer_block; /* access under tr2tls_mutex */ |
||||
|
||||
/* |
||||
* Define metadata for each stopwatch timer. |
||||
* |
||||
* This array must match "enum trace2_timer_id" and the values |
||||
* in "struct tr2_timer_block.timer[*]". |
||||
*/ |
||||
static struct tr2_timer_metadata tr2_timer_metadata[TRACE2_NUMBER_OF_TIMERS] = { |
||||
[TRACE2_TIMER_ID_TEST1] = { |
||||
.category = "test", |
||||
.name = "test1", |
||||
.want_per_thread_events = 0, |
||||
}, |
||||
[TRACE2_TIMER_ID_TEST2] = { |
||||
.category = "test", |
||||
.name = "test2", |
||||
.want_per_thread_events = 1, |
||||
}, |
||||
|
||||
/* Add additional metadata before here. */ |
||||
}; |
||||
|
||||
void tr2_start_timer(enum trace2_timer_id tid) |
||||
{ |
||||
struct tr2tls_thread_ctx *ctx = tr2tls_get_self(); |
||||
struct tr2_timer *t = &ctx->timer_block.timer[tid]; |
||||
|
||||
t->recursion_count++; |
||||
if (t->recursion_count > 1) |
||||
return; /* ignore recursive starts */ |
||||
|
||||
t->start_ns = getnanotime(); |
||||
} |
||||
|
||||
void tr2_stop_timer(enum trace2_timer_id tid) |
||||
{ |
||||
struct tr2tls_thread_ctx *ctx = tr2tls_get_self(); |
||||
struct tr2_timer *t = &ctx->timer_block.timer[tid]; |
||||
uint64_t ns_now; |
||||
uint64_t ns_interval; |
||||
|
||||
assert(t->recursion_count > 0); |
||||
|
||||
t->recursion_count--; |
||||
if (t->recursion_count) |
||||
return; /* still in recursive call(s) */ |
||||
|
||||
ns_now = getnanotime(); |
||||
ns_interval = ns_now - t->start_ns; |
||||
|
||||
t->total_ns += ns_interval; |
||||
|
||||
/* |
||||
* min_ns was initialized to zero (in the xcalloc()) rather |
||||
* than UINT_MAX when the block of timers was allocated, |
||||
* so we should always set both the min_ns and max_ns values |
||||
* the first time that the timer is used. |
||||
*/ |
||||
if (!t->interval_count) { |
||||
t->min_ns = ns_interval; |
||||
t->max_ns = ns_interval; |
||||
} else { |
||||
t->min_ns = MY_MIN(ns_interval, t->min_ns); |
||||
t->max_ns = MY_MAX(ns_interval, t->max_ns); |
||||
} |
||||
|
||||
t->interval_count++; |
||||
|
||||
ctx->used_any_timer = 1; |
||||
if (tr2_timer_metadata[tid].want_per_thread_events) |
||||
ctx->used_any_per_thread_timer = 1; |
||||
} |
||||
|
||||
void tr2_update_final_timers(void) |
||||
{ |
||||
struct tr2tls_thread_ctx *ctx = tr2tls_get_self(); |
||||
enum trace2_timer_id tid; |
||||
|
||||
if (!ctx->used_any_timer) |
||||
return; |
||||
|
||||
/* |
||||
* Accessing `final_timer_block` requires holding `tr2tls_mutex`. |
||||
* We assume that our caller is holding the lock. |
||||
*/ |
||||
|
||||
for (tid = 0; tid < TRACE2_NUMBER_OF_TIMERS; tid++) { |
||||
struct tr2_timer *t_final = &final_timer_block.timer[tid]; |
||||
struct tr2_timer *t = &ctx->timer_block.timer[tid]; |
||||
|
||||
if (t->recursion_count) { |
||||
/* |
||||
* The current thread is exiting with |
||||
* timer[tid] still running. |
||||
* |
||||
* Technically, this is a bug, but I'm going |
||||
* to ignore it. |
||||
* |
||||
* I don't think it is worth calling die() |
||||
* for. I don't think it is worth killing the |
||||
* process for this bookkeeping error. We |
||||
* might want to call warning(), but I'm going |
||||
* to wait on that. |
||||
* |
||||
* The downside here is that total_ns won't |
||||
* include the current open interval (now - |
||||
* start_ns). I can live with that. |
||||
*/ |
||||
} |
||||
|
||||
if (!t->interval_count) |
||||
continue; /* this timer was not used by this thread */ |
||||
|
||||
t_final->total_ns += t->total_ns; |
||||
|
||||
/* |
||||
* final_timer_block.timer[tid].min_ns was initialized to |
||||
* was initialized to zero rather than UINT_MAX, so we should |
||||
* always set both the min_ns and max_ns values the first time |
||||
* that we add a partial sum into it. |
||||
*/ |
||||
if (!t_final->interval_count) { |
||||
t_final->min_ns = t->min_ns; |
||||
t_final->max_ns = t->max_ns; |
||||
} else { |
||||
t_final->min_ns = MY_MIN(t_final->min_ns, t->min_ns); |
||||
t_final->max_ns = MY_MAX(t_final->max_ns, t->max_ns); |
||||
} |
||||
|
||||
t_final->interval_count += t->interval_count; |
||||
} |
||||
} |
||||
|
||||
void tr2_emit_per_thread_timers(tr2_tgt_evt_timer_t *fn_apply) |
||||
{ |
||||
struct tr2tls_thread_ctx *ctx = tr2tls_get_self(); |
||||
enum trace2_timer_id tid; |
||||
|
||||
if (!ctx->used_any_per_thread_timer) |
||||
return; |
||||
|
||||
/* |
||||
* For each timer, if the timer wants per-thread events and |
||||
* this thread used it, emit it. |
||||
*/ |
||||
for (tid = 0; tid < TRACE2_NUMBER_OF_TIMERS; tid++) |
||||
if (tr2_timer_metadata[tid].want_per_thread_events && |
||||
ctx->timer_block.timer[tid].interval_count) |
||||
fn_apply(&tr2_timer_metadata[tid], |
||||
&ctx->timer_block.timer[tid], |
||||
0); |
||||
} |
||||
|
||||
void tr2_emit_final_timers(tr2_tgt_evt_timer_t *fn_apply) |
||||
{ |
||||
enum trace2_timer_id tid; |
||||
|
||||
/* |
||||
* Accessing `final_timer_block` requires holding `tr2tls_mutex`. |
||||
* We assume that our caller is holding the lock. |
||||
*/ |
||||
|
||||
for (tid = 0; tid < TRACE2_NUMBER_OF_TIMERS; tid++) |
||||
if (final_timer_block.timer[tid].interval_count) |
||||
fn_apply(&tr2_timer_metadata[tid], |
||||
&final_timer_block.timer[tid], |
||||
1); |
||||
} |
@ -0,0 +1,140 @@
@@ -0,0 +1,140 @@
|
||||
#ifndef TR2_TMR_H |
||||
#define TR2_TMR_H |
||||
|
||||
#include "trace2.h" |
||||
#include "trace2/tr2_tgt.h" |
||||
|
||||
/* |
||||
* Define a mechanism to allow "stopwatch" timers. |
||||
* |
||||
* Timers can be used to measure "interesting" activity that does not |
||||
* fit the "region" model, such as code called from many different |
||||
* regions (like zlib) and/or where data for individual calls are not |
||||
* interesting or are too numerous to be efficiently logged. |
||||
* |
||||
* Timer values are accumulated during program execution and emitted |
||||
* to the Trace2 logs at program exit. |
||||
* |
||||
* To make this model efficient, we define a compile-time fixed set of |
||||
* timers and timer ids using a "timer block" array in thread-local |
||||
* storage. This gives us constant time access to each timer within |
||||
* each thread, since we want start/stop operations to be as fast as |
||||
* possible. This lets us avoid the complexities of dynamically |
||||
* allocating a timer on the first use by a thread and/or possibly |
||||
* sharing that timer definition with other concurrent threads. |
||||
* However, this does require that we define time the set of timers at |
||||
* compile time. |
||||
* |
||||
* Each thread uses the timer block in its thread-local storage to |
||||
* compute partial sums for each timer (without locking). When a |
||||
* thread exits, those partial sums are (under lock) added to the |
||||
* global final sum. |
||||
* |
||||
* Using this "timer block" model costs ~48 bytes per timer per thread |
||||
* (we have about six uint64 fields per timer). This does increase |
||||
* the size of the thread-local storage block, but it is allocated (at |
||||
* thread create time) and not on the thread stack, so I'm not worried |
||||
* about the size. |
||||
* |
||||
* Partial sums for each timer are optionally emitted when a thread |
||||
* exits. |
||||
* |
||||
* Final sums for each timer are emitted between the "exit" and |
||||
* "atexit" events. |
||||
* |
||||
* A parallel "timer metadata" table contains the "category" and "name" |
||||
* fields for each timer. This eliminates the need to include those |
||||
* args in the various timer APIs. |
||||
*/ |
||||
|
||||
/* |
||||
* The definition of an individual timer and used by an individual |
||||
* thread. |
||||
*/ |
||||
struct tr2_timer { |
||||
/* |
||||
* Total elapsed time for this timer in this thread in nanoseconds. |
||||
*/ |
||||
uint64_t total_ns; |
||||
|
||||
/* |
||||
* The maximum and minimum interval values observed for this |
||||
* timer in this thread. |
||||
*/ |
||||
uint64_t min_ns; |
||||
uint64_t max_ns; |
||||
|
||||
/* |
||||
* The value of the clock when this timer was started in this |
||||
* thread. (Undefined when the timer is not active in this |
||||
* thread.) |
||||
*/ |
||||
uint64_t start_ns; |
||||
|
||||
/* |
||||
* Number of times that this timer has been started and stopped |
||||
* in this thread. (Recursive starts are ignored.) |
||||
*/ |
||||
uint64_t interval_count; |
||||
|
||||
/* |
||||
* Number of nested starts on the stack in this thread. (We |
||||
* ignore recursive starts and use this to track the recursive |
||||
* calls.) |
||||
*/ |
||||
unsigned int recursion_count; |
||||
}; |
||||
|
||||
/* |
||||
* Metadata for a timer. |
||||
*/ |
||||
struct tr2_timer_metadata { |
||||
const char *category; |
||||
const char *name; |
||||
|
||||
/* |
||||
* True if we should emit per-thread events for this timer |
||||
* when individual threads exit. |
||||
*/ |
||||
unsigned int want_per_thread_events:1; |
||||
}; |
||||
|
||||
/* |
||||
* A compile-time fixed-size block of timers to insert into |
||||
* thread-local storage. This wrapper is used to avoid quirks |
||||
* of C and the usual need to pass an array size argument. |
||||
*/ |
||||
struct tr2_timer_block { |
||||
struct tr2_timer timer[TRACE2_NUMBER_OF_TIMERS]; |
||||
}; |
||||
|
||||
/* |
||||
* Private routines used by trace2.c to actually start/stop an |
||||
* individual timer in the current thread. |
||||
*/ |
||||
void tr2_start_timer(enum trace2_timer_id tid); |
||||
void tr2_stop_timer(enum trace2_timer_id tid); |
||||
|
||||
/* |
||||
* Add the current thread's timer data to the global totals. |
||||
* This is called during thread-exit. |
||||
* |
||||
* Caller must be holding the tr2tls_mutex. |
||||
*/ |
||||
void tr2_update_final_timers(void); |
||||
|
||||
/* |
||||
* Emit per-thread timer data for the current thread. |
||||
* This is called during thread-exit. |
||||
*/ |
||||
void tr2_emit_per_thread_timers(tr2_tgt_evt_timer_t *fn_apply); |
||||
|
||||
/* |
||||
* Emit global total timer values. |
||||
* This is called during atexit handling. |
||||
* |
||||
* Caller must be holding the tr2tls_mutex. |
||||
*/ |
||||
void tr2_emit_final_timers(tr2_tgt_evt_timer_t *fn_apply); |
||||
|
||||
#endif /* TR2_TMR_H */ |
Loading…
Reference in new issue