blob: e1512103de4b0f838d6f54fbe732a1a88a363f96 [file] [log] [blame]
/*
* Copyright 2021 Google LLC
* SPDX-License-Identifier: MIT
*/
#include "vkr_ring.h"
#include <stdio.h>
#include <time.h>
#include "venus-protocol/vn_protocol_renderer_dispatches.h"
#include "vkr_context.h"
static inline void *
get_resource_pointer(const struct vkr_resource *res, size_t offset)
{
assert(offset < res->size);
return res->u.data + offset;
}
static void
vkr_ring_init_extra(struct vkr_ring *ring, const struct vkr_ring_layout *layout)
{
struct vkr_ring_extra *extra = &ring->extra;
extra->offset = layout->extra.begin;
extra->region = vkr_region_make_relative(&layout->extra);
}
static void
vkr_ring_init_buffer(struct vkr_ring *ring, const struct vkr_ring_layout *layout)
{
struct vkr_ring_buffer *buf = &ring->buffer;
buf->size = vkr_region_size(&layout->buffer);
assert(util_is_power_of_two_nonzero(buf->size));
buf->mask = buf->size - 1;
buf->cur = 0;
buf->data = get_resource_pointer(layout->resource, layout->buffer.begin);
}
static bool
vkr_ring_init_control(struct vkr_ring *ring, const struct vkr_ring_layout *layout)
{
struct vkr_ring_control *ctrl = &ring->control;
ctrl->head = get_resource_pointer(layout->resource, layout->head.begin);
ctrl->tail = get_resource_pointer(layout->resource, layout->tail.begin);
ctrl->status = get_resource_pointer(layout->resource, layout->status.begin);
/* we will manage head and status, and we expect them to be 0 initially */
if (*ctrl->head || *ctrl->status)
return false;
return true;
}
static void
vkr_ring_store_head(struct vkr_ring *ring, uint32_t ring_head)
{
/* the renderer is expected to load the head with memory_order_acquire,
* forming a release-acquire ordering
*/
atomic_store_explicit(ring->control.head, ring_head, memory_order_release);
}
static uint32_t
vkr_ring_load_tail(const struct vkr_ring *ring)
{
/* the driver is expected to store the tail with memory_order_release,
* forming a release-acquire ordering
*/
return atomic_load_explicit(ring->control.tail, memory_order_acquire);
}
static void
vkr_ring_unset_status_bits(struct vkr_ring *ring, uint32_t mask)
{
atomic_fetch_and_explicit(ring->control.status, ~mask, memory_order_seq_cst);
}
static void
vkr_ring_read_buffer(struct vkr_ring *ring, void *data, uint32_t size)
{
struct vkr_ring_buffer *buf = &ring->buffer;
const size_t offset = buf->cur & buf->mask;
assert(size <= buf->size);
if (offset + size <= buf->size) {
memcpy(data, buf->data + offset, size);
} else {
const size_t s = buf->size - offset;
memcpy(data, buf->data + offset, s);
memcpy((uint8_t *)data + s, buf->data, size - s);
}
/* advance cur */
buf->cur += size;
}
static inline void
vkr_ring_init_dispatch(struct vkr_ring *ring, struct vkr_context *ctx)
{
ring->dispatch = ctx->dispatch;
ring->dispatch.encoder = (struct vn_cs_encoder *)&ring->encoder;
ring->dispatch.decoder = (struct vn_cs_decoder *)&ring->decoder;
}
struct vkr_ring *
vkr_ring_create(const struct vkr_ring_layout *layout,
struct vkr_context *ctx,
uint64_t idle_timeout)
{
struct vkr_ring *ring = calloc(1, sizeof(*ring));
if (!ring)
return NULL;
ring->resource = layout->resource;
if (!vkr_ring_init_control(ring, layout))
goto err_init_control;
vkr_ring_init_buffer(ring, layout);
vkr_ring_init_extra(ring, layout);
ring->cmd = malloc(ring->buffer.size);
if (!ring->cmd)
goto err_cmd_malloc;
vkr_cs_decoder_init(&ring->decoder, &ctx->cs_fatal_error, ctx->object_table);
if (vkr_cs_encoder_init(&ring->encoder, &ctx->cs_fatal_error))
goto err_cs_encoder_init;
vkr_ring_init_dispatch(ring, ctx);
ring->idle_timeout = idle_timeout;
if (mtx_init(&ring->mutex, mtx_plain) != thrd_success)
goto err_mtx_init;
if (cnd_init(&ring->cond) != thrd_success)
goto err_cond_init;
return ring;
err_cond_init:
mtx_destroy(&ring->mutex);
err_mtx_init:
vkr_cs_encoder_fini(&ring->encoder);
err_cs_encoder_init:
free(ring->cmd);
err_cmd_malloc:
err_init_control:
free(ring);
return NULL;
}
void
vkr_ring_destroy(struct vkr_ring *ring)
{
list_del(&ring->head);
assert(!ring->started);
mtx_destroy(&ring->mutex);
cnd_destroy(&ring->cond);
free(ring->cmd);
free(ring);
}
static uint64_t
vkr_ring_now(void)
{
const uint64_t ns_per_sec = 1000000000llu;
struct timespec now;
if (clock_gettime(CLOCK_MONOTONIC, &now))
return 0;
return ns_per_sec * now.tv_sec + now.tv_nsec;
}
static void
vkr_ring_relax(uint32_t *iter)
{
/* TODO do better */
const uint32_t busy_wait_order = 4;
const uint32_t base_sleep_us = 10;
(*iter)++;
if (*iter < (1u << busy_wait_order)) {
thrd_yield();
return;
}
const uint32_t shift = util_last_bit(*iter) - busy_wait_order - 1;
const uint32_t us = base_sleep_us << shift;
const struct timespec ts = {
.tv_sec = us / 1000000,
.tv_nsec = (us % 1000000) * 1000,
};
clock_nanosleep(CLOCK_MONOTONIC, 0, &ts, NULL);
}
static bool
vkr_ring_submit_cmd(struct vkr_ring *ring,
const uint8_t *buffer,
size_t size,
uint32_t ring_head)
{
struct vkr_cs_decoder *dec = &ring->decoder;
if (vkr_cs_decoder_get_fatal(dec)) {
vkr_log("ring_submit_cmd: early bail due to fatal decoder state");
return false;
}
vkr_cs_decoder_set_stream(dec, buffer, size);
while (vkr_cs_decoder_has_command(dec)) {
vn_dispatch_command(&ring->dispatch);
if (vkr_cs_decoder_get_fatal(dec)) {
vkr_log("ring_submit_cmd: vn_dispatch_command failed");
vkr_cs_decoder_reset(dec);
return false;
}
/* update the ring head intra-cs to optimize ring space */
const uint32_t cur_ring_head = ring_head + (dec->cur - buffer);
vkr_ring_store_head(ring, cur_ring_head);
vkr_context_on_ring_seqno_update(ring->dispatch.data, ring->id, cur_ring_head);
}
vkr_cs_decoder_reset(dec);
return true;
}
static int
vkr_ring_thread(void *arg)
{
struct vkr_ring *ring = arg;
struct vkr_context *ctx = ring->dispatch.data;
char thread_name[16];
snprintf(thread_name, ARRAY_SIZE(thread_name), "vkr-ring-%d", ctx->ctx_id);
u_thread_setname(thread_name);
uint64_t last_submit = vkr_ring_now();
uint32_t relax_iter = 0;
int ret = 0;
while (ring->started) {
bool wait = false;
if (vkr_ring_now() >= last_submit + ring->idle_timeout) {
ring->pending_notify = false;
vkr_ring_set_status_bits(ring, VK_RING_STATUS_IDLE_BIT_MESA);
wait = ring->buffer.cur == vkr_ring_load_tail(ring);
if (!wait)
vkr_ring_unset_status_bits(ring, VK_RING_STATUS_IDLE_BIT_MESA);
}
if (wait) {
TRACE_SCOPE("ring idle");
mtx_lock(&ring->mutex);
if (ring->started && !ring->pending_notify)
cnd_wait(&ring->cond, &ring->mutex);
vkr_ring_unset_status_bits(ring, VK_RING_STATUS_IDLE_BIT_MESA);
mtx_unlock(&ring->mutex);
if (!ring->started)
break;
last_submit = vkr_ring_now();
relax_iter = 0;
}
const uint32_t cmd_size = vkr_ring_load_tail(ring) - ring->buffer.cur;
if (cmd_size) {
if (cmd_size > ring->buffer.size) {
ret = -EINVAL;
break;
}
const uint32_t ring_head = ring->buffer.cur;
vkr_ring_read_buffer(ring, ring->cmd, cmd_size);
if (!vkr_ring_submit_cmd(ring, ring->cmd, cmd_size, ring_head)) {
ret = -EINVAL;
break;
}
last_submit = vkr_ring_now();
relax_iter = 0;
} else {
vkr_ring_relax(&relax_iter);
}
}
if (ret < 0)
vkr_ring_set_status_bits(ring, VK_RING_STATUS_FATAL_BIT_MESA);
return ret;
}
void
vkr_ring_start(struct vkr_ring *ring)
{
int ret;
assert(!ring->started);
ring->started = true;
ret = thrd_create(&ring->thread, vkr_ring_thread, ring);
if (ret != thrd_success)
ring->started = false;
}
bool
vkr_ring_stop(struct vkr_ring *ring)
{
mtx_lock(&ring->mutex);
if (thrd_equal(ring->thread, thrd_current())) {
mtx_unlock(&ring->mutex);
return false;
}
assert(ring->started);
ring->started = false;
cnd_signal(&ring->cond);
mtx_unlock(&ring->mutex);
thrd_join(ring->thread, NULL);
return true;
}
void
vkr_ring_notify(struct vkr_ring *ring)
{
mtx_lock(&ring->mutex);
ring->pending_notify = true;
cnd_signal(&ring->cond);
mtx_unlock(&ring->mutex);
{
TRACE_SCOPE("ring notify done");
}
}
bool
vkr_ring_write_extra(struct vkr_ring *ring, size_t offset, uint32_t val)
{
struct vkr_ring_extra *extra = &ring->extra;
if (unlikely(extra->cached_offset != offset || !extra->cached_data)) {
const struct vkr_region access = VKR_REGION_INIT(offset, sizeof(val));
if (!vkr_region_is_valid(&access) || !vkr_region_is_within(&access, &extra->region))
return false;
/* Mesa always sets offset to 0 and the cache hit rate will be 100% */
extra->cached_offset = offset;
extra->cached_data = get_resource_pointer(ring->resource, extra->offset + offset);
}
atomic_store_explicit(extra->cached_data, val, memory_order_release);
{
TRACE_SCOPE("ring extra done");
}
return true;
}
void
vkr_ring_submit_virtqueue_seqno(struct vkr_ring *ring, uint64_t seqno)
{
mtx_lock(&ring->mutex);
ring->virtqueue_seqno = seqno;
/* There are 3 cases:
* 1. ring is not waiting on the cond thus no-op
* 2. ring is idle and then wakes up earlier
* 3. ring is waiting for roundtrip and then checks seqno again
*/
cnd_signal(&ring->cond);
mtx_unlock(&ring->mutex);
{
TRACE_SCOPE("submit vq seqno done");
}
}
bool
vkr_ring_wait_virtqueue_seqno(struct vkr_ring *ring, uint64_t seqno)
{
TRACE_FUNC();
bool ok = true;
mtx_lock(&ring->mutex);
while (ok && ring->started && ring->virtqueue_seqno < seqno)
ok = cnd_wait(&ring->cond, &ring->mutex) == thrd_success;
mtx_unlock(&ring->mutex);
return ok;
}