| From 83be9b1c449f3bfa191a40067114f9bc41cbc996 Mon Sep 17 00:00:00 2001 |
| From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> |
| Date: Sun, 24 Jan 2021 15:31:35 +0000 |
| Subject: [PATCH] FROMGIT: drm/i915: Track context current active time |
| |
| Track context active (on hardware) status together with the start |
| timestamp. |
| |
| This will be used to provide better granularity of context |
| runtime reporting in conjunction with already tracked pphwsp accumulated |
| runtime. |
| |
| The latter is only updated on context save so does not give us visibility |
| to any currently executing work. |
| |
| As part of the patch the existing runtime tracking data is moved under the |
| new ce->stats member and updated under the seqlock. This provides the |
| ability to atomically read out accumulated plus active runtime. |
| |
| v2: |
| * Rename and make __intel_context_get_active_time unlocked. |
| |
| Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> |
| Reviewed-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com> # v1 |
| Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> |
| Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> |
| Link: https://patchwork.freedesktop.org/patch/msgid/20210123153733.18139-7-chris@chris-wilson.co.uk |
| Link: https://patchwork.freedesktop.org/patch/msgid/20210124153136.19124-7-chris@chris-wilson.co.uk |
| (cherry picked from commit c209da9764b9d5e63d143e7daf52a1fe148ff8be |
| https://anongit.freedesktop.org/git/drm-intel.git drm-intel-gt-next) |
| |
| BUG=b:179453336 |
| TEST=eDP comes up on brya when built with this commit https://crrev.com/i/3584703 |
| |
| Change-Id: I9dc4e25a73fa822cf9dbaa3e2137afc412825b2a |
| Signed-off-by: Azhar Shaikh <azhar.shaikh@intel.com> |
| --- |
| drivers/gpu/drm/i915/gt/intel_context.c | 27 ++++++++++++++++++- |
| drivers/gpu/drm/i915/gt/intel_context.h | 15 ++++------- |
| drivers/gpu/drm/i915/gt/intel_context_types.h | 24 +++++++++++------ |
| .../drm/i915/gt/intel_execlists_submission.c | 23 ++++++++++++---- |
| .../gpu/drm/i915/gt/intel_gt_clock_utils.c | 4 +++ |
| drivers/gpu/drm/i915/gt/intel_lrc.c | 27 ++++++++++--------- |
| drivers/gpu/drm/i915/gt/intel_lrc.h | 24 +++++++++++++++++ |
| drivers/gpu/drm/i915/gt/selftest_lrc.c | 10 +++---- |
| drivers/gpu/drm/i915/i915_gpu_error.c | 9 +++---- |
| drivers/gpu/drm/i915/i915_gpu_error.h | 2 +- |
| 10 files changed, 116 insertions(+), 49 deletions(-) |
| |
| diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c |
| --- a/drivers/gpu/drm/i915/gt/intel_context.c |
| +++ b/drivers/gpu/drm/i915/gt/intel_context.c |
| @@ -382,7 +382,7 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) |
| ce->ring = NULL; |
| ce->ring_size = SZ_4K; |
| |
| - ewma_runtime_init(&ce->runtime.avg); |
| + ewma_runtime_init(&ce->stats.runtime.avg); |
| |
| ce->vm = i915_vm_get(engine->gt->vm); |
| |
| @@ -513,6 +513,31 @@ struct i915_request *intel_context_create_request(struct intel_context *ce) |
| return rq; |
| } |
| |
| +u64 intel_context_get_total_runtime_ns(const struct intel_context *ce) |
| +{ |
| + u64 total, active; |
| + |
| + total = ce->stats.runtime.total; |
| + if (ce->ops->flags & COPS_RUNTIME_CYCLES) |
| + total *= ce->engine->gt->clock_period_ns; |
| + |
| + active = READ_ONCE(ce->stats.active); |
| + if (active) |
| + active = intel_context_clock() - active; |
| + |
| + return total + active; |
| +} |
| + |
| +u64 intel_context_get_avg_runtime_ns(struct intel_context *ce) |
| +{ |
| + u64 avg = ewma_runtime_read(&ce->stats.runtime.avg); |
| + |
| + if (ce->ops->flags & COPS_RUNTIME_CYCLES) |
| + avg *= ce->engine->gt->clock_period_ns; |
| + |
| + return avg; |
| +} |
| + |
| struct i915_request *intel_context_find_active_request(struct intel_context *ce) |
| { |
| struct i915_request *rq, *active = NULL; |
| diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h |
| --- a/drivers/gpu/drm/i915/gt/intel_context.h |
| +++ b/drivers/gpu/drm/i915/gt/intel_context.h |
| @@ -296,18 +296,13 @@ intel_context_clear_nopreempt(struct intel_context *ce) |
| clear_bit(CONTEXT_NOPREEMPT, &ce->flags); |
| } |
| |
| -static inline u64 intel_context_get_total_runtime_ns(struct intel_context *ce) |
| -{ |
| - const u32 period = ce->engine->gt->clock_period_ns; |
| - |
| - return READ_ONCE(ce->runtime.total) * period; |
| -} |
| +u64 intel_context_get_total_runtime_ns(const struct intel_context *ce); |
| +u64 intel_context_get_avg_runtime_ns(struct intel_context *ce); |
| |
| -static inline u64 intel_context_get_avg_runtime_ns(struct intel_context *ce) |
| +static inline u64 intel_context_clock(void) |
| { |
| - const u32 period = ce->engine->gt->clock_period_ns; |
| - |
| - return mul_u32_u32(ewma_runtime_read(&ce->runtime.avg), period); |
| + /* As we mix CS cycles with CPU clocks, use the raw monotonic clock. */ |
| + return ktime_get_raw_fast_ns(); |
| } |
| |
| #endif /* __INTEL_CONTEXT_H__ */ |
| diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h |
| --- a/drivers/gpu/drm/i915/gt/intel_context_types.h |
| +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h |
| @@ -35,6 +35,9 @@ struct intel_context_ops { |
| #define COPS_HAS_INFLIGHT_BIT 0 |
| #define COPS_HAS_INFLIGHT BIT(COPS_HAS_INFLIGHT_BIT) |
| |
| +#define COPS_RUNTIME_CYCLES_BIT 1 |
| +#define COPS_RUNTIME_CYCLES BIT(COPS_RUNTIME_CYCLES_BIT) |
| + |
| int (*alloc)(struct intel_context *ce); |
| |
| void (*ban)(struct intel_context *ce, struct i915_request *rq); |
| @@ -127,14 +130,19 @@ struct intel_context { |
| } lrc; |
| u32 tag; /* cookie passed to HW to track this context on submission */ |
| |
| - /* Time on GPU as tracked by the hw. */ |
| - struct { |
| - struct ewma_runtime avg; |
| - u64 total; |
| - u32 last; |
| - I915_SELFTEST_DECLARE(u32 num_underflow); |
| - I915_SELFTEST_DECLARE(u32 max_underflow); |
| - } runtime; |
| + /** stats: Context GPU engine busyness tracking. */ |
| + struct intel_context_stats { |
| + u64 active; |
| + |
| + /* Time on GPU as tracked by the hw. */ |
| + struct { |
| + struct ewma_runtime avg; |
| + u64 total; |
| + u32 last; |
| + I915_SELFTEST_DECLARE(u32 num_underflow); |
| + I915_SELFTEST_DECLARE(u32 max_underflow); |
| + } runtime; |
| + } stats; |
| |
| unsigned int active_count; /* protected by timeline->mutex */ |
| |
| diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c |
| --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c |
| +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c |
| @@ -621,8 +621,6 @@ static void __execlists_schedule_out(struct i915_request * const rq, |
| GEM_BUG_ON(test_bit(ccid - 1, &engine->context_tag)); |
| __set_bit(ccid - 1, &engine->context_tag); |
| } |
| - |
| - lrc_update_runtime(ce); |
| intel_engine_context_out(engine); |
| execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); |
| if (engine->fw_domain && !--engine->fw_active) |
| @@ -2003,8 +2001,23 @@ process_csb(struct intel_engine_cs *engine, struct i915_request **inactive) |
| * and merits a fresh timeslice. We reinstall the timer after |
| * inspecting the queue to see if we need to resumbit. |
| */ |
| - if (*prev != *execlists->active) /* elide lite-restores */ |
| + if (*prev != *execlists->active) { /* elide lite-restores */ |
| + /* |
| + * Note the inherent discrepancy between the HW runtime, |
| + * recorded as part of the context switch, and the CPU |
| + * adjustment for active contexts. We have to hope that |
| + * the delay in processing the CS event is very small |
| + * and consistent. It works to our advantage to have |
| + * the CPU adjustment _undershoot_ (i.e. start later than) |
| + * the CS timestamp so we never overreport the runtime |
| + * and correct overselves later when updating from HW. |
| + */ |
| + if (*prev) |
| + lrc_runtime_stop((*prev)->context); |
| + if (*execlists->active) |
| + lrc_runtime_start((*execlists->active)->context); |
| new_timeslice(execlists); |
| + } |
| |
| return inactive; |
| } |
| @@ -2602,7 +2615,7 @@ static void execlists_context_cancel_request(struct intel_context *ce, |
| } |
| |
| static const struct intel_context_ops execlists_context_ops = { |
| - .flags = COPS_HAS_INFLIGHT, |
| + .flags = COPS_HAS_INFLIGHT | COPS_RUNTIME_CYCLES, |
| |
| .alloc = execlists_context_alloc, |
| |
| @@ -3620,7 +3633,7 @@ virtual_get_sibling(struct intel_engine_cs *engine, unsigned int sibling) |
| } |
| |
| static const struct intel_context_ops virtual_context_ops = { |
| - .flags = COPS_HAS_INFLIGHT, |
| + .flags = COPS_HAS_INFLIGHT | COPS_RUNTIME_CYCLES, |
| |
| .alloc = virtual_context_alloc, |
| |
| diff --git a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c |
| --- a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c |
| +++ b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c |
| @@ -159,6 +159,10 @@ void intel_gt_init_clock_frequency(struct intel_gt *gt) |
| if (gt->clock_frequency) |
| gt->clock_period_ns = intel_gt_clock_interval_to_ns(gt, 1); |
| |
| + /* Icelake appears to use another fixed frequency for CTX_TIMESTAMP */ |
| + if (IS_GEN(gt->i915, 11)) |
| + gt->clock_period_ns = NSEC_PER_SEC / 13750000; |
| + |
| GT_TRACE(gt, |
| "Using clock frequency: %dkHz, period: %dns, wrap: %lldms\n", |
| gt->clock_frequency / 1000, |
| diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c |
| --- a/drivers/gpu/drm/i915/gt/intel_lrc.c |
| +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c |
| @@ -693,7 +693,7 @@ static void init_common_regs(u32 * const regs, |
| CTX_CTRL_RS_CTX_ENABLE); |
| regs[CTX_CONTEXT_CONTROL] = ctl; |
| |
| - regs[CTX_TIMESTAMP] = ce->runtime.last; |
| + regs[CTX_TIMESTAMP] = ce->stats.runtime.last; |
| } |
| |
| static void init_wa_bb_regs(u32 * const regs, |
| @@ -1586,35 +1586,36 @@ void lrc_init_wa_ctx(struct intel_engine_cs *engine) |
| } |
| } |
| |
| -static void st_update_runtime_underflow(struct intel_context *ce, s32 dt) |
| +static void st_runtime_underflow(struct intel_context_stats *stats, s32 dt) |
| { |
| #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) |
| - ce->runtime.num_underflow++; |
| - ce->runtime.max_underflow = max_t(u32, ce->runtime.max_underflow, -dt); |
| + stats->runtime.num_underflow++; |
| + stats->runtime.max_underflow = |
| + max_t(u32, stats->runtime.max_underflow, -dt); |
| #endif |
| } |
| |
| void lrc_update_runtime(struct intel_context *ce) |
| { |
| + struct intel_context_stats *stats = &ce->stats; |
| u32 old; |
| s32 dt; |
| |
| - if (intel_context_is_barrier(ce)) |
| + old = stats->runtime.last; |
| + stats->runtime.last = lrc_get_runtime(ce); |
| + dt = stats->runtime.last - old; |
| + if (!dt) |
| return; |
| |
| - old = ce->runtime.last; |
| - ce->runtime.last = lrc_get_runtime(ce); |
| - dt = ce->runtime.last - old; |
| - |
| if (unlikely(dt < 0)) { |
| CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n", |
| - old, ce->runtime.last, dt); |
| - st_update_runtime_underflow(ce, dt); |
| + old, stats->runtime.last, dt); |
| + st_runtime_underflow(stats, dt); |
| return; |
| } |
| |
| - ewma_runtime_add(&ce->runtime.avg, dt); |
| - ce->runtime.total += dt; |
| + ewma_runtime_add(&stats->runtime.avg, dt); |
| + stats->runtime.total += dt; |
| } |
| |
| #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) |
| diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h |
| --- a/drivers/gpu/drm/i915/gt/intel_lrc.h |
| +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h |
| @@ -79,4 +79,28 @@ static inline u32 lrc_get_runtime(const struct intel_context *ce) |
| return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]); |
| } |
| |
| +static inline void lrc_runtime_start(struct intel_context *ce) |
| +{ |
| + struct intel_context_stats *stats = &ce->stats; |
| + |
| + if (intel_context_is_barrier(ce)) |
| + return; |
| + |
| + if (stats->active) |
| + return; |
| + |
| + WRITE_ONCE(stats->active, intel_context_clock()); |
| +} |
| + |
| +static inline void lrc_runtime_stop(struct intel_context *ce) |
| +{ |
| + struct intel_context_stats *stats = &ce->stats; |
| + |
| + if (!stats->active) |
| + return; |
| + |
| + lrc_update_runtime(ce); |
| + WRITE_ONCE(stats->active, 0); |
| +} |
| + |
| #endif /* __INTEL_LRC_H__ */ |
| diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c |
| --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c |
| +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c |
| @@ -1751,8 +1751,8 @@ static int __live_pphwsp_runtime(struct intel_engine_cs *engine) |
| if (IS_ERR(ce)) |
| return PTR_ERR(ce); |
| |
| - ce->runtime.num_underflow = 0; |
| - ce->runtime.max_underflow = 0; |
| + ce->stats.runtime.num_underflow = 0; |
| + ce->stats.runtime.max_underflow = 0; |
| |
| do { |
| unsigned int loop = 1024; |
| @@ -1790,11 +1790,11 @@ static int __live_pphwsp_runtime(struct intel_engine_cs *engine) |
| intel_context_get_avg_runtime_ns(ce)); |
| |
| err = 0; |
| - if (ce->runtime.num_underflow) { |
| + if (ce->stats.runtime.num_underflow) { |
| pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n", |
| engine->name, |
| - ce->runtime.num_underflow, |
| - ce->runtime.max_underflow); |
| + ce->stats.runtime.num_underflow, |
| + ce->stats.runtime.max_underflow); |
| GEM_TRACE_DUMP(); |
| err = -EOVERFLOW; |
| } |
| diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c |
| --- a/drivers/gpu/drm/i915/i915_gpu_error.c |
| +++ b/drivers/gpu/drm/i915/i915_gpu_error.c |
| @@ -484,13 +484,10 @@ static void error_print_context(struct drm_i915_error_state_buf *m, |
| const char *header, |
| const struct i915_gem_context_coredump *ctx) |
| { |
| - const u32 period = m->i915->gt.clock_period_ns; |
| - |
| err_printf(m, "%s%s[%d] prio %d, guilty %d active %d, runtime total %lluns, avg %lluns\n", |
| header, ctx->comm, ctx->pid, ctx->sched_attr.priority, |
| ctx->guilty, ctx->active, |
| - ctx->total_runtime * period, |
| - mul_u32_u32(ctx->avg_runtime, period)); |
| + ctx->total_runtime, ctx->avg_runtime); |
| } |
| |
| static struct i915_vma_coredump * |
| @@ -1292,8 +1289,8 @@ static bool record_context(struct i915_gem_context_coredump *e, |
| e->guilty = atomic_read(&ctx->guilty_count); |
| e->active = atomic_read(&ctx->active_count); |
| |
| - e->total_runtime = rq->context->runtime.total; |
| - e->avg_runtime = ewma_runtime_read(&rq->context->runtime.avg); |
| + e->total_runtime = intel_context_get_total_runtime_ns(rq->context); |
| + e->avg_runtime = intel_context_get_avg_runtime_ns(rq->context); |
| |
| simulated = i915_gem_context_no_error_capture(ctx); |
| |
| diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h |
| --- a/drivers/gpu/drm/i915/i915_gpu_error.h |
| +++ b/drivers/gpu/drm/i915/i915_gpu_error.h |
| @@ -90,7 +90,7 @@ struct intel_engine_coredump { |
| char comm[TASK_COMM_LEN]; |
| |
| u64 total_runtime; |
| - u32 avg_runtime; |
| + u64 avg_runtime; |
| |
| pid_t pid; |
| int active; |
| -- |
| 2.33.0.685.g46640cef36-goog |
| |