| /* |
| * Copyright © 2016 Red Hat. |
| * Copyright © 2016 Bas Nieuwenhuizen |
| * |
| * based in part on anv driver which is: |
| * Copyright © 2015 Intel Corporation |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| * IN THE SOFTWARE. |
| */ |
| |
| #include "radv_cs.h" |
| #include "radv_debug.h" |
| #include "radv_meta.h" |
| #include "radv_private.h" |
| #include "radv_radeon_winsys.h" |
| #include "radv_shader.h" |
| #include "sid.h" |
| #include "vk_format.h" |
| #include "vk_util.h" |
| #include "vk_enum_defines.h" |
| #include "vk_common_entrypoints.h" |
| #include "vk_render_pass.h" |
| #include "vk_framebuffer.h" |
| |
| #include "ac_debug.h" |
| #include "ac_shader_args.h" |
| |
| #include "util/fast_idiv_by_const.h" |
| |
| enum { |
| RADV_PREFETCH_VBO_DESCRIPTORS = (1 << 0), |
| RADV_PREFETCH_VS = (1 << 1), |
| RADV_PREFETCH_TCS = (1 << 2), |
| RADV_PREFETCH_TES = (1 << 3), |
| RADV_PREFETCH_GS = (1 << 4), |
| RADV_PREFETCH_PS = (1 << 5), |
| RADV_PREFETCH_MS = (1 << 6), |
| RADV_PREFETCH_SHADERS = (RADV_PREFETCH_VS | RADV_PREFETCH_TCS | RADV_PREFETCH_TES | |
| RADV_PREFETCH_GS | RADV_PREFETCH_PS | RADV_PREFETCH_MS) |
| }; |
| |
| static void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer, |
| struct radv_image *image, |
| VkImageLayout src_layout, VkImageLayout dst_layout, |
| uint32_t src_family_index, uint32_t dst_family_index, |
| const VkImageSubresourceRange *range, |
| struct radv_sample_locations_state *sample_locs); |
| |
| static void radv_set_rt_stack_size(struct radv_cmd_buffer *cmd_buffer, uint32_t size); |
| |
| const struct radv_dynamic_state default_dynamic_state = { |
| .viewport = |
| { |
| .count = 0, |
| }, |
| .scissor = |
| { |
| .count = 0, |
| }, |
| .line_width = 1.0f, |
| .depth_bias = |
| { |
| .bias = 0.0f, |
| .clamp = 0.0f, |
| .slope = 0.0f, |
| }, |
| .blend_constants = {0.0f, 0.0f, 0.0f, 0.0f}, |
| .depth_bounds = |
| { |
| .min = 0.0f, |
| .max = 1.0f, |
| }, |
| .stencil_compare_mask = |
| { |
| .front = ~0u, |
| .back = ~0u, |
| }, |
| .stencil_write_mask = |
| { |
| .front = ~0u, |
| .back = ~0u, |
| }, |
| .stencil_reference = |
| { |
| .front = 0u, |
| .back = 0u, |
| }, |
| .line_stipple = |
| { |
| .factor = 0u, |
| .pattern = 0u, |
| }, |
| .cull_mode = 0u, |
| .front_face = 0u, |
| .primitive_topology = 0u, |
| .fragment_shading_rate = |
| { |
| .size = {1u, 1u}, |
| .combiner_ops = {VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR, |
| VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR}, |
| }, |
| .depth_bias_enable = 0u, |
| .primitive_restart_enable = 0u, |
| .rasterizer_discard_enable = 0u, |
| .logic_op = 0u, |
| .color_write_enable = 0u, |
| .patch_control_points = 0, |
| .polygon_mode = 0, |
| .tess_domain_origin = VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT, |
| .logic_op_enable = 0u, |
| .stippled_line_enable = 0u, |
| .alpha_to_coverage_enable = 0u, |
| .sample_mask = 0u, |
| .depth_clip_enable = 0u, |
| .conservative_rast_mode = VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT, |
| .depth_clip_negative_one_to_one = 0u, |
| .provoking_vertex_mode = VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT, |
| .depth_clamp_enable = 0u, |
| .color_write_mask = 0u, |
| .color_blend_enable = 0u, |
| .rasterization_samples = VK_SAMPLE_COUNT_1_BIT, |
| .line_rasterization_mode = VK_LINE_RASTERIZATION_MODE_DEFAULT_EXT, |
| }; |
| |
| static void |
| radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer, const struct radv_dynamic_state *src) |
| { |
| struct radv_dynamic_state *dest = &cmd_buffer->state.dynamic; |
| uint64_t copy_mask = src->mask; |
| uint64_t dest_mask = 0; |
| |
| dest->discard_rectangle.count = src->discard_rectangle.count; |
| dest->sample_location.count = src->sample_location.count; |
| |
| if (copy_mask & RADV_DYNAMIC_VIEWPORT) { |
| if (dest->viewport.count != src->viewport.count) { |
| dest->viewport.count = src->viewport.count; |
| dest_mask |= RADV_DYNAMIC_VIEWPORT; |
| } |
| |
| if (memcmp(&dest->viewport.viewports, &src->viewport.viewports, |
| src->viewport.count * sizeof(VkViewport))) { |
| typed_memcpy(dest->viewport.viewports, src->viewport.viewports, src->viewport.count); |
| typed_memcpy(dest->viewport.xform, src->viewport.xform, src->viewport.count); |
| dest_mask |= RADV_DYNAMIC_VIEWPORT; |
| } |
| } |
| |
| if (copy_mask & RADV_DYNAMIC_SCISSOR) { |
| if (dest->scissor.count != src->scissor.count) { |
| dest->scissor.count = src->scissor.count; |
| dest_mask |= RADV_DYNAMIC_SCISSOR; |
| } |
| |
| if (memcmp(&dest->scissor.scissors, &src->scissor.scissors, |
| src->scissor.count * sizeof(VkRect2D))) { |
| typed_memcpy(dest->scissor.scissors, src->scissor.scissors, src->scissor.count); |
| dest_mask |= RADV_DYNAMIC_SCISSOR; |
| } |
| } |
| |
| if (copy_mask & RADV_DYNAMIC_BLEND_CONSTANTS) { |
| if (memcmp(&dest->blend_constants, &src->blend_constants, sizeof(src->blend_constants))) { |
| typed_memcpy(dest->blend_constants, src->blend_constants, 4); |
| dest_mask |= RADV_DYNAMIC_BLEND_CONSTANTS; |
| } |
| } |
| |
| if (copy_mask & RADV_DYNAMIC_DISCARD_RECTANGLE) { |
| if (memcmp(&dest->discard_rectangle.rectangles, &src->discard_rectangle.rectangles, |
| src->discard_rectangle.count * sizeof(VkRect2D))) { |
| typed_memcpy(dest->discard_rectangle.rectangles, src->discard_rectangle.rectangles, |
| src->discard_rectangle.count); |
| dest_mask |= RADV_DYNAMIC_DISCARD_RECTANGLE; |
| } |
| } |
| |
| if (copy_mask & RADV_DYNAMIC_SAMPLE_LOCATIONS) { |
| if (dest->sample_location.per_pixel != src->sample_location.per_pixel || |
| dest->sample_location.grid_size.width != src->sample_location.grid_size.width || |
| dest->sample_location.grid_size.height != src->sample_location.grid_size.height || |
| memcmp(&dest->sample_location.locations, &src->sample_location.locations, |
| src->sample_location.count * sizeof(VkSampleLocationEXT))) { |
| dest->sample_location.per_pixel = src->sample_location.per_pixel; |
| dest->sample_location.grid_size = src->sample_location.grid_size; |
| typed_memcpy(dest->sample_location.locations, src->sample_location.locations, |
| src->sample_location.count); |
| dest_mask |= RADV_DYNAMIC_SAMPLE_LOCATIONS; |
| } |
| } |
| |
| #define RADV_CMP_COPY(field, flag) \ |
| if (copy_mask & flag) { \ |
| if (dest->field != src->field) { \ |
| dest->field = src->field; \ |
| dest_mask |= flag; \ |
| } \ |
| } |
| |
| RADV_CMP_COPY(line_width, RADV_DYNAMIC_LINE_WIDTH); |
| |
| RADV_CMP_COPY(depth_bias.bias, RADV_DYNAMIC_DEPTH_BIAS); |
| RADV_CMP_COPY(depth_bias.clamp, RADV_DYNAMIC_DEPTH_BIAS); |
| RADV_CMP_COPY(depth_bias.slope, RADV_DYNAMIC_DEPTH_BIAS); |
| |
| RADV_CMP_COPY(depth_bounds.min, RADV_DYNAMIC_DEPTH_BOUNDS); |
| RADV_CMP_COPY(depth_bounds.max, RADV_DYNAMIC_DEPTH_BOUNDS); |
| |
| RADV_CMP_COPY(stencil_compare_mask.front, RADV_DYNAMIC_STENCIL_COMPARE_MASK); |
| RADV_CMP_COPY(stencil_compare_mask.back, RADV_DYNAMIC_STENCIL_COMPARE_MASK); |
| |
| RADV_CMP_COPY(stencil_write_mask.front, RADV_DYNAMIC_STENCIL_WRITE_MASK); |
| RADV_CMP_COPY(stencil_write_mask.back, RADV_DYNAMIC_STENCIL_WRITE_MASK); |
| |
| RADV_CMP_COPY(stencil_reference.front, RADV_DYNAMIC_STENCIL_REFERENCE); |
| RADV_CMP_COPY(stencil_reference.back, RADV_DYNAMIC_STENCIL_REFERENCE); |
| |
| RADV_CMP_COPY(line_stipple.factor, RADV_DYNAMIC_LINE_STIPPLE); |
| RADV_CMP_COPY(line_stipple.pattern, RADV_DYNAMIC_LINE_STIPPLE); |
| |
| RADV_CMP_COPY(cull_mode, RADV_DYNAMIC_CULL_MODE); |
| RADV_CMP_COPY(front_face, RADV_DYNAMIC_FRONT_FACE); |
| RADV_CMP_COPY(primitive_topology, RADV_DYNAMIC_PRIMITIVE_TOPOLOGY); |
| RADV_CMP_COPY(depth_test_enable, RADV_DYNAMIC_DEPTH_TEST_ENABLE); |
| RADV_CMP_COPY(depth_write_enable, RADV_DYNAMIC_DEPTH_WRITE_ENABLE); |
| RADV_CMP_COPY(depth_compare_op, RADV_DYNAMIC_DEPTH_COMPARE_OP); |
| RADV_CMP_COPY(depth_bounds_test_enable, RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE); |
| RADV_CMP_COPY(stencil_test_enable, RADV_DYNAMIC_STENCIL_TEST_ENABLE); |
| |
| RADV_CMP_COPY(stencil_op.front.fail_op, RADV_DYNAMIC_STENCIL_OP); |
| RADV_CMP_COPY(stencil_op.front.pass_op, RADV_DYNAMIC_STENCIL_OP); |
| RADV_CMP_COPY(stencil_op.front.depth_fail_op, RADV_DYNAMIC_STENCIL_OP); |
| RADV_CMP_COPY(stencil_op.front.compare_op, RADV_DYNAMIC_STENCIL_OP); |
| RADV_CMP_COPY(stencil_op.back.fail_op, RADV_DYNAMIC_STENCIL_OP); |
| RADV_CMP_COPY(stencil_op.back.pass_op, RADV_DYNAMIC_STENCIL_OP); |
| RADV_CMP_COPY(stencil_op.back.depth_fail_op, RADV_DYNAMIC_STENCIL_OP); |
| RADV_CMP_COPY(stencil_op.back.compare_op, RADV_DYNAMIC_STENCIL_OP); |
| |
| RADV_CMP_COPY(fragment_shading_rate.size.width, RADV_DYNAMIC_FRAGMENT_SHADING_RATE); |
| RADV_CMP_COPY(fragment_shading_rate.size.height, RADV_DYNAMIC_FRAGMENT_SHADING_RATE); |
| RADV_CMP_COPY(fragment_shading_rate.combiner_ops[0], RADV_DYNAMIC_FRAGMENT_SHADING_RATE); |
| RADV_CMP_COPY(fragment_shading_rate.combiner_ops[1], RADV_DYNAMIC_FRAGMENT_SHADING_RATE); |
| |
| RADV_CMP_COPY(depth_bias_enable, RADV_DYNAMIC_DEPTH_BIAS_ENABLE); |
| |
| RADV_CMP_COPY(primitive_restart_enable, RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE); |
| |
| RADV_CMP_COPY(rasterizer_discard_enable, RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE); |
| |
| RADV_CMP_COPY(logic_op, RADV_DYNAMIC_LOGIC_OP); |
| |
| RADV_CMP_COPY(color_write_enable, RADV_DYNAMIC_COLOR_WRITE_ENABLE); |
| |
| RADV_CMP_COPY(patch_control_points, RADV_DYNAMIC_PATCH_CONTROL_POINTS); |
| |
| RADV_CMP_COPY(polygon_mode, RADV_DYNAMIC_POLYGON_MODE); |
| |
| RADV_CMP_COPY(tess_domain_origin, RADV_DYNAMIC_TESS_DOMAIN_ORIGIN); |
| |
| RADV_CMP_COPY(logic_op_enable, RADV_DYNAMIC_LOGIC_OP_ENABLE); |
| |
| RADV_CMP_COPY(stippled_line_enable, RADV_DYNAMIC_LINE_STIPPLE_ENABLE); |
| |
| RADV_CMP_COPY(alpha_to_coverage_enable, RADV_DYNAMIC_ALPHA_TO_COVERAGE_ENABLE); |
| |
| RADV_CMP_COPY(sample_mask, RADV_DYNAMIC_SAMPLE_MASK); |
| |
| RADV_CMP_COPY(depth_clip_enable, RADV_DYNAMIC_DEPTH_CLIP_ENABLE); |
| |
| RADV_CMP_COPY(conservative_rast_mode, RADV_DYNAMIC_CONSERVATIVE_RAST_MODE); |
| |
| RADV_CMP_COPY(depth_clip_negative_one_to_one, RADV_DYNAMIC_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE); |
| |
| RADV_CMP_COPY(provoking_vertex_mode, RADV_DYNAMIC_PROVOKING_VERTEX_MODE); |
| |
| RADV_CMP_COPY(depth_clamp_enable, RADV_DYNAMIC_DEPTH_CLAMP_ENABLE); |
| |
| RADV_CMP_COPY(color_write_mask, RADV_DYNAMIC_COLOR_WRITE_MASK); |
| |
| RADV_CMP_COPY(color_blend_enable, RADV_DYNAMIC_COLOR_BLEND_ENABLE); |
| |
| RADV_CMP_COPY(rasterization_samples, RADV_DYNAMIC_RASTERIZATION_SAMPLES); |
| |
| RADV_CMP_COPY(line_rasterization_mode, RADV_DYNAMIC_LINE_RASTERIZATION_MODE); |
| |
| #undef RADV_CMP_COPY |
| |
| cmd_buffer->state.dirty |= dest_mask; |
| } |
| |
| bool |
| radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer) |
| { |
| return cmd_buffer->qf == RADV_QUEUE_COMPUTE && |
| cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7; |
| } |
| |
| enum amd_ip_type |
| radv_queue_family_to_ring(struct radv_physical_device *physical_device, |
| enum radv_queue_family f) |
| { |
| switch (f) { |
| case RADV_QUEUE_GENERAL: |
| return AMD_IP_GFX; |
| case RADV_QUEUE_COMPUTE: |
| return AMD_IP_COMPUTE; |
| case RADV_QUEUE_TRANSFER: |
| return AMD_IP_SDMA; |
| default: |
| unreachable("Unknown queue family"); |
| } |
| } |
| |
| static void |
| radv_emit_write_data_packet(struct radv_cmd_buffer *cmd_buffer, unsigned engine_sel, uint64_t va, |
| unsigned count, const uint32_t *data) |
| { |
| struct radeon_cmdbuf *cs = cmd_buffer->cs; |
| |
| radeon_check_space(cmd_buffer->device->ws, cs, 4 + count); |
| |
| radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + count, 0)); |
| radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(engine_sel)); |
| radeon_emit(cs, va); |
| radeon_emit(cs, va >> 32); |
| radeon_emit_array(cs, data, count); |
| } |
| |
| static void |
| radv_emit_clear_data(struct radv_cmd_buffer *cmd_buffer, unsigned engine_sel, uint64_t va, |
| unsigned size) |
| { |
| uint32_t *zeroes = alloca(size); |
| memset(zeroes, 0, size); |
| radv_emit_write_data_packet(cmd_buffer, engine_sel, va, size / 4, zeroes); |
| } |
| |
| static void |
| radv_destroy_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer) |
| { |
| struct radv_cmd_buffer *cmd_buffer = container_of(vk_cmd_buffer, struct radv_cmd_buffer, vk); |
| |
| list_for_each_entry_safe(struct radv_cmd_buffer_upload, up, &cmd_buffer->upload.list, list) |
| { |
| cmd_buffer->device->ws->buffer_destroy(cmd_buffer->device->ws, up->upload_bo); |
| list_del(&up->list); |
| free(up); |
| } |
| |
| if (cmd_buffer->upload.upload_bo) |
| cmd_buffer->device->ws->buffer_destroy(cmd_buffer->device->ws, cmd_buffer->upload.upload_bo); |
| |
| if (cmd_buffer->cs) |
| cmd_buffer->device->ws->cs_destroy(cmd_buffer->cs); |
| if (cmd_buffer->ace_internal.cs) |
| cmd_buffer->device->ws->cs_destroy(cmd_buffer->ace_internal.cs); |
| |
| for (unsigned i = 0; i < MAX_BIND_POINTS; i++) { |
| struct radv_descriptor_set_header *set = &cmd_buffer->descriptors[i].push_set.set; |
| free(set->mapped_ptr); |
| if (set->layout) |
| vk_descriptor_set_layout_unref(&cmd_buffer->device->vk, &set->layout->vk); |
| vk_object_base_finish(&set->base); |
| } |
| |
| vk_object_base_finish(&cmd_buffer->meta_push_descriptors.base); |
| |
| vk_command_buffer_finish(&cmd_buffer->vk); |
| vk_free(&cmd_buffer->vk.pool->alloc, cmd_buffer); |
| } |
| |
| static VkResult |
| radv_create_cmd_buffer(struct vk_command_pool *pool, |
| struct vk_command_buffer **cmd_buffer_out) |
| { |
| struct radv_device *device = container_of(pool->base.device, struct radv_device, vk); |
| |
| struct radv_cmd_buffer *cmd_buffer; |
| unsigned ring; |
| cmd_buffer = vk_zalloc(&pool->alloc, sizeof(*cmd_buffer), 8, |
| VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); |
| if (cmd_buffer == NULL) |
| return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); |
| |
| VkResult result = |
| vk_command_buffer_init(pool, &cmd_buffer->vk, &radv_cmd_buffer_ops, 0); |
| if (result != VK_SUCCESS) { |
| vk_free(&cmd_buffer->vk.pool->alloc, cmd_buffer); |
| return result; |
| } |
| |
| cmd_buffer->device = device; |
| |
| cmd_buffer->qf = vk_queue_to_radv(device->physical_device, pool->queue_family_index); |
| |
| ring = radv_queue_family_to_ring(device->physical_device, cmd_buffer->qf); |
| |
| cmd_buffer->cs = device->ws->cs_create(device->ws, ring); |
| if (!cmd_buffer->cs) { |
| radv_destroy_cmd_buffer(&cmd_buffer->vk); |
| return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); |
| } |
| |
| vk_object_base_init(&device->vk, &cmd_buffer->meta_push_descriptors.base, |
| VK_OBJECT_TYPE_DESCRIPTOR_SET); |
| |
| for (unsigned i = 0; i < MAX_BIND_POINTS; i++) |
| vk_object_base_init(&device->vk, &cmd_buffer->descriptors[i].push_set.set.base, |
| VK_OBJECT_TYPE_DESCRIPTOR_SET); |
| |
| *cmd_buffer_out = &cmd_buffer->vk; |
| |
| list_inithead(&cmd_buffer->upload.list); |
| |
| return VK_SUCCESS; |
| } |
| |
| void |
| radv_cmd_buffer_reset_rendering(struct radv_cmd_buffer *cmd_buffer) |
| { |
| memset(&cmd_buffer->state.render, 0, sizeof(cmd_buffer->state.render)); |
| } |
| |
| static void |
| radv_reset_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer, |
| UNUSED VkCommandBufferResetFlags flags) |
| { |
| struct radv_cmd_buffer *cmd_buffer = container_of(vk_cmd_buffer, struct radv_cmd_buffer, vk); |
| |
| vk_command_buffer_reset(&cmd_buffer->vk); |
| |
| cmd_buffer->device->ws->cs_reset(cmd_buffer->cs); |
| if (cmd_buffer->ace_internal.cs) |
| cmd_buffer->device->ws->cs_reset(cmd_buffer->ace_internal.cs); |
| |
| list_for_each_entry_safe(struct radv_cmd_buffer_upload, up, &cmd_buffer->upload.list, list) |
| { |
| cmd_buffer->device->ws->buffer_destroy(cmd_buffer->device->ws, up->upload_bo); |
| list_del(&up->list); |
| free(up); |
| } |
| |
| cmd_buffer->push_constant_stages = 0; |
| cmd_buffer->scratch_size_per_wave_needed = 0; |
| cmd_buffer->scratch_waves_wanted = 0; |
| cmd_buffer->compute_scratch_size_per_wave_needed = 0; |
| cmd_buffer->compute_scratch_waves_wanted = 0; |
| cmd_buffer->esgs_ring_size_needed = 0; |
| cmd_buffer->gsvs_ring_size_needed = 0; |
| cmd_buffer->tess_rings_needed = false; |
| cmd_buffer->task_rings_needed = false; |
| cmd_buffer->mesh_scratch_ring_needed = false; |
| cmd_buffer->gds_needed = false; |
| cmd_buffer->gds_oa_needed = false; |
| cmd_buffer->sample_positions_needed = false; |
| cmd_buffer->ace_internal.sem.gfx2ace_value = 0; |
| cmd_buffer->ace_internal.sem.emitted_gfx2ace_value = 0; |
| cmd_buffer->ace_internal.sem.va = 0; |
| |
| if (cmd_buffer->upload.upload_bo) |
| radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, cmd_buffer->upload.upload_bo); |
| cmd_buffer->upload.offset = 0; |
| |
| memset(cmd_buffer->vertex_binding_buffers, 0, sizeof(struct radv_buffer *) * cmd_buffer->used_vertex_bindings); |
| cmd_buffer->used_vertex_bindings = 0; |
| |
| for (unsigned i = 0; i < MAX_BIND_POINTS; i++) { |
| cmd_buffer->descriptors[i].dirty = 0; |
| cmd_buffer->descriptors[i].valid = 0; |
| cmd_buffer->descriptors[i].push_dirty = false; |
| } |
| |
| radv_cmd_buffer_reset_rendering(cmd_buffer); |
| } |
| |
| const struct vk_command_buffer_ops radv_cmd_buffer_ops = { |
| .create = radv_create_cmd_buffer, |
| .reset = radv_reset_cmd_buffer, |
| .destroy = radv_destroy_cmd_buffer, |
| }; |
| |
| static bool |
| radv_cmd_buffer_resize_upload_buf(struct radv_cmd_buffer *cmd_buffer, uint64_t min_needed) |
| { |
| uint64_t new_size; |
| struct radeon_winsys_bo *bo = NULL; |
| struct radv_cmd_buffer_upload *upload; |
| struct radv_device *device = cmd_buffer->device; |
| |
| new_size = MAX2(min_needed, 16 * 1024); |
| new_size = MAX2(new_size, 2 * cmd_buffer->upload.size); |
| |
| VkResult result = |
| device->ws->buffer_create(device->ws, new_size, 4096, device->ws->cs_domain(device->ws), |
| RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | |
| RADEON_FLAG_32BIT | RADEON_FLAG_GTT_WC, |
| RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, &bo); |
| |
| if (result != VK_SUCCESS) { |
| vk_command_buffer_set_error(&cmd_buffer->vk, result); |
| return false; |
| } |
| |
| radv_cs_add_buffer(device->ws, cmd_buffer->cs, bo); |
| if (cmd_buffer->upload.upload_bo) { |
| upload = malloc(sizeof(*upload)); |
| |
| if (!upload) { |
| vk_command_buffer_set_error(&cmd_buffer->vk, VK_ERROR_OUT_OF_HOST_MEMORY); |
| device->ws->buffer_destroy(device->ws, bo); |
| return false; |
| } |
| |
| memcpy(upload, &cmd_buffer->upload, sizeof(*upload)); |
| list_add(&upload->list, &cmd_buffer->upload.list); |
| } |
| |
| cmd_buffer->upload.upload_bo = bo; |
| cmd_buffer->upload.size = new_size; |
| cmd_buffer->upload.offset = 0; |
| cmd_buffer->upload.map = device->ws->buffer_map(cmd_buffer->upload.upload_bo); |
| |
| if (!cmd_buffer->upload.map) { |
| vk_command_buffer_set_error(&cmd_buffer->vk, VK_ERROR_OUT_OF_DEVICE_MEMORY); |
| return false; |
| } |
| |
| return true; |
| } |
| |
| bool |
| radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, unsigned size, |
| unsigned *out_offset, void **ptr) |
| { |
| assert(size % 4 == 0); |
| |
| struct radeon_info *rad_info = &cmd_buffer->device->physical_device->rad_info; |
| |
| /* Align to the scalar cache line size if it results in this allocation |
| * being placed in less of them. |
| */ |
| unsigned offset = cmd_buffer->upload.offset; |
| unsigned line_size = rad_info->gfx_level >= GFX10 ? 64 : 32; |
| unsigned gap = align(offset, line_size) - offset; |
| if ((size & (line_size - 1)) > gap) |
| offset = align(offset, line_size); |
| |
| if (offset + size > cmd_buffer->upload.size) { |
| if (!radv_cmd_buffer_resize_upload_buf(cmd_buffer, size)) |
| return false; |
| offset = 0; |
| } |
| |
| *out_offset = offset; |
| *ptr = cmd_buffer->upload.map + offset; |
| |
| cmd_buffer->upload.offset = offset + size; |
| return true; |
| } |
| |
| bool |
| radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer, unsigned size, const void *data, |
| unsigned *out_offset) |
| { |
| uint8_t *ptr; |
| |
| if (!radv_cmd_buffer_upload_alloc(cmd_buffer, size, out_offset, (void **)&ptr)) |
| return false; |
| assert(ptr); |
| |
| memcpy(ptr, data, size); |
| return true; |
| } |
| |
| void |
| radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer) |
| { |
| struct radv_device *device = cmd_buffer->device; |
| struct radeon_cmdbuf *cs = cmd_buffer->cs; |
| uint64_t va; |
| |
| va = radv_buffer_get_va(device->trace_bo); |
| if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) |
| va += 4; |
| |
| ++cmd_buffer->state.trace_id; |
| radv_emit_write_data_packet(cmd_buffer, V_370_ME, va, 1, &cmd_buffer->state.trace_id); |
| |
| radeon_check_space(cmd_buffer->device->ws, cs, 2); |
| |
| radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); |
| radeon_emit(cs, AC_ENCODE_TRACE_POINT(cmd_buffer->state.trace_id)); |
| } |
| |
| static void |
| radv_ace_internal_barrier(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags2 src_stage_mask, |
| VkPipelineStageFlags2 dst_stage_mask) |
| { |
| /* Update flush bits from the main cmdbuf, except the stage flush. */ |
| cmd_buffer->ace_internal.flush_bits |= |
| cmd_buffer->state.flush_bits & RADV_CMD_FLUSH_ALL_COMPUTE & ~RADV_CMD_FLAG_CS_PARTIAL_FLUSH; |
| |
| /* Add stage flush only when necessary. */ |
| if (src_stage_mask & |
| (VK_PIPELINE_STAGE_2_TASK_SHADER_BIT_EXT | VK_PIPELINE_STAGE_2_TRANSFER_BIT | |
| VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT | VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT)) |
| cmd_buffer->ace_internal.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH; |
| |
| /* Block task shaders when we have to wait for CP DMA on the GFX cmdbuf. */ |
| if (src_stage_mask & |
| (VK_PIPELINE_STAGE_2_COPY_BIT | VK_PIPELINE_STAGE_2_CLEAR_BIT | |
| VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT | VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT | |
| VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT)) |
| dst_stage_mask |= cmd_buffer->state.dma_is_busy ? VK_PIPELINE_STAGE_2_TASK_SHADER_BIT_EXT : 0; |
| |
| /* Increment the GFX/ACE semaphore when task shaders are blocked. */ |
| if (dst_stage_mask & |
| (VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR | VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT | |
| VK_PIPELINE_STAGE_2_TASK_SHADER_BIT_EXT)) |
| cmd_buffer->ace_internal.sem.gfx2ace_value++; |
| } |
| |
| static void |
| radv_ace_internal_cache_flush(struct radv_cmd_buffer *cmd_buffer) |
| { |
| struct radeon_cmdbuf *ace_cs = cmd_buffer->ace_internal.cs; |
| const uint32_t flush_bits = cmd_buffer->ace_internal.flush_bits; |
| enum rgp_flush_bits sqtt_flush_bits = 0; |
| |
| si_cs_emit_cache_flush(ace_cs, cmd_buffer->device->physical_device->rad_info.gfx_level, NULL, 0, |
| true, flush_bits, &sqtt_flush_bits, 0); |
| |
| cmd_buffer->ace_internal.flush_bits = 0; |
| } |
| |
| static uint64_t |
| radv_ace_internal_sem_create(struct radv_cmd_buffer *cmd_buffer) |
| { |
| /* DWORD 0: GFX->ACE semaphore (GFX blocks ACE, ie. ACE waits for GFX) |
| * DWORD 1: ACE->GFX semaphore |
| */ |
| uint64_t sem_init = 0; |
| uint32_t va_off = 0; |
| if (!radv_cmd_buffer_upload_data(cmd_buffer, sizeof(uint64_t), &sem_init, &va_off)) { |
| vk_command_buffer_set_error(&cmd_buffer->vk, VK_ERROR_OUT_OF_HOST_MEMORY); |
| return 0; |
| } |
| |
| return radv_buffer_get_va(cmd_buffer->upload.upload_bo) + va_off; |
| } |
| |
| static bool |
| radv_ace_internal_sem_dirty(const struct radv_cmd_buffer *cmd_buffer) |
| { |
| return cmd_buffer->ace_internal.sem.gfx2ace_value != |
| cmd_buffer->ace_internal.sem.emitted_gfx2ace_value; |
| } |
| |
| ALWAYS_INLINE static bool |
| radv_flush_gfx2ace_semaphore(struct radv_cmd_buffer *cmd_buffer) |
| { |
| if (!radv_ace_internal_sem_dirty(cmd_buffer)) |
| return false; |
| |
| if (!cmd_buffer->ace_internal.sem.va) { |
| cmd_buffer->ace_internal.sem.va = radv_ace_internal_sem_create(cmd_buffer); |
| if (!cmd_buffer->ace_internal.sem.va) |
| return false; |
| } |
| |
| /* GFX writes a value to the semaphore which ACE can wait for.*/ |
| si_cs_emit_write_event_eop( |
| cmd_buffer->cs, cmd_buffer->device->physical_device->rad_info.gfx_level, |
| radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, |
| EOP_DATA_SEL_VALUE_32BIT, cmd_buffer->ace_internal.sem.va, |
| cmd_buffer->ace_internal.sem.gfx2ace_value, cmd_buffer->gfx9_eop_bug_va); |
| |
| cmd_buffer->ace_internal.sem.emitted_gfx2ace_value = cmd_buffer->ace_internal.sem.gfx2ace_value; |
| return true; |
| } |
| |
| ALWAYS_INLINE static void |
| radv_wait_gfx2ace_semaphore(struct radv_cmd_buffer *cmd_buffer) |
| { |
| assert(cmd_buffer->ace_internal.sem.va); |
| struct radeon_cmdbuf *ace_cs = cmd_buffer->ace_internal.cs; |
| radeon_check_space(cmd_buffer->device->ws, ace_cs, 7); |
| |
| /* ACE waits for the semaphore which GFX wrote. */ |
| radv_cp_wait_mem(ace_cs, WAIT_REG_MEM_GREATER_OR_EQUAL, cmd_buffer->ace_internal.sem.va, |
| cmd_buffer->ace_internal.sem.gfx2ace_value, 0xffffffff); |
| } |
| |
| static struct radeon_cmdbuf * |
| radv_ace_internal_create(struct radv_cmd_buffer *cmd_buffer) |
| { |
| assert(!cmd_buffer->ace_internal.cs); |
| struct radv_device *device = cmd_buffer->device; |
| struct radeon_cmdbuf *ace_cs = device->ws->cs_create(device->ws, AMD_IP_COMPUTE); |
| |
| if (!ace_cs) |
| vk_command_buffer_set_error(&cmd_buffer->vk, VK_ERROR_OUT_OF_HOST_MEMORY); |
| |
| return ace_cs; |
| } |
| |
| static VkResult |
| radv_ace_internal_finalize(struct radv_cmd_buffer *cmd_buffer) |
| { |
| assert(cmd_buffer->ace_internal.cs); |
| struct radv_device *device = cmd_buffer->device; |
| struct radeon_cmdbuf *ace_cs = cmd_buffer->ace_internal.cs; |
| |
| /* Emit pending cache flush. */ |
| radv_ace_internal_cache_flush(cmd_buffer); |
| |
| /* Clear the ACE semaphore if it exists. |
| * This is necessary in case the same cmd buffer is submitted again in the future. |
| */ |
| if (cmd_buffer->ace_internal.sem.va) { |
| struct radeon_cmdbuf *main_cs = cmd_buffer->cs; |
| uint64_t gfx2ace_va = cmd_buffer->ace_internal.sem.va; |
| uint64_t ace2gfx_va = cmd_buffer->ace_internal.sem.va + 4; |
| |
| /* ACE: write 1 to the ACE->GFX semaphore. */ |
| si_cs_emit_write_event_eop(ace_cs, cmd_buffer->device->physical_device->rad_info.gfx_level, |
| true, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, |
| EOP_DATA_SEL_VALUE_32BIT, ace2gfx_va, 1, |
| cmd_buffer->gfx9_eop_bug_va); |
| |
| /* Wait for ACE to finish, otherwise we may risk writing 0 to the semaphore |
| * when ACE is still waiting for it. This may not happen in practice, but |
| * better safe than sorry. |
| */ |
| radv_cp_wait_mem(main_cs, WAIT_REG_MEM_GREATER_OR_EQUAL, ace2gfx_va, 1, 0xffffffff); |
| |
| /* GFX: clear GFX->ACE and ACE->GFX semaphores. */ |
| radv_emit_clear_data(cmd_buffer, V_370_ME, gfx2ace_va, 8); |
| } |
| |
| device->ws->cs_add_buffers(ace_cs, cmd_buffer->cs); |
| return device->ws->cs_finalize(ace_cs); |
| } |
| |
| static void |
| radv_cmd_buffer_after_draw(struct radv_cmd_buffer *cmd_buffer, enum radv_cmd_flush_bits flags) |
| { |
| if (unlikely(cmd_buffer->device->thread_trace.bo)) { |
| radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 2); |
| |
| radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); |
| radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_THREAD_TRACE_MARKER) | EVENT_INDEX(0)); |
| } |
| |
| if (cmd_buffer->device->instance->debug_flags & RADV_DEBUG_SYNC_SHADERS) { |
| enum rgp_flush_bits sqtt_flush_bits = 0; |
| assert(flags & (RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH)); |
| |
| ASSERTED const unsigned cdw_max = |
| radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4); |
| |
| /* Force wait for graphics or compute engines to be idle. */ |
| si_cs_emit_cache_flush(cmd_buffer->cs, |
| cmd_buffer->device->physical_device->rad_info.gfx_level, |
| &cmd_buffer->gfx9_fence_idx, cmd_buffer->gfx9_fence_va, |
| radv_cmd_buffer_uses_mec(cmd_buffer), flags, &sqtt_flush_bits, |
| cmd_buffer->gfx9_eop_bug_va); |
| |
| assert(cmd_buffer->cs->cdw <= cdw_max); |
| |
| if (cmd_buffer->state.graphics_pipeline && (flags & RADV_CMD_FLAG_PS_PARTIAL_FLUSH) && |
| radv_pipeline_has_stage(cmd_buffer->state.graphics_pipeline, MESA_SHADER_TASK)) { |
| /* Force wait for compute engines to be idle on the internal cmdbuf. */ |
| si_cs_emit_cache_flush(cmd_buffer->ace_internal.cs, |
| cmd_buffer->device->physical_device->rad_info.gfx_level, NULL, 0, |
| true, RADV_CMD_FLAG_CS_PARTIAL_FLUSH, &sqtt_flush_bits, 0); |
| } |
| } |
| |
| if (unlikely(cmd_buffer->device->trace_bo)) |
| radv_cmd_buffer_trace_emit(cmd_buffer); |
| } |
| |
| static void |
| radv_save_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline) |
| { |
| struct radv_device *device = cmd_buffer->device; |
| enum amd_ip_type ring; |
| uint32_t data[2]; |
| uint64_t va; |
| |
| va = radv_buffer_get_va(device->trace_bo); |
| |
| ring = radv_queue_family_to_ring(device->physical_device, cmd_buffer->qf); |
| |
| switch (ring) { |
| case AMD_IP_GFX: |
| va += 8; |
| break; |
| case AMD_IP_COMPUTE: |
| va += 16; |
| break; |
| default: |
| assert(!"invalid IP type"); |
| } |
| |
| uint64_t pipeline_address = (uintptr_t)pipeline; |
| data[0] = pipeline_address; |
| data[1] = pipeline_address >> 32; |
| |
| radv_emit_write_data_packet(cmd_buffer, V_370_ME, va, 2, data); |
| } |
| |
| static void |
| radv_save_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer, uint64_t vb_ptr) |
| { |
| struct radv_device *device = cmd_buffer->device; |
| uint32_t data[2]; |
| uint64_t va; |
| |
| va = radv_buffer_get_va(device->trace_bo); |
| va += 24; |
| |
| data[0] = vb_ptr; |
| data[1] = vb_ptr >> 32; |
| |
| radv_emit_write_data_packet(cmd_buffer, V_370_ME, va, 2, data); |
| } |
| |
| static void |
| radv_save_vs_prolog(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader_part *prolog) |
| { |
| struct radv_device *device = cmd_buffer->device; |
| uint32_t data[2]; |
| uint64_t va; |
| |
| va = radv_buffer_get_va(device->trace_bo); |
| va += 32; |
| |
| uint64_t prolog_address = (uintptr_t)prolog; |
| data[0] = prolog_address; |
| data[1] = prolog_address >> 32; |
| |
| radv_emit_write_data_packet(cmd_buffer, V_370_ME, va, 2, data); |
| } |
| |
| void |
| radv_set_descriptor_set(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point, |
| struct radv_descriptor_set *set, unsigned idx) |
| { |
| struct radv_descriptor_state *descriptors_state = |
| radv_get_descriptors_state(cmd_buffer, bind_point); |
| |
| descriptors_state->sets[idx] = set; |
| |
| descriptors_state->valid |= (1u << idx); /* active descriptors */ |
| descriptors_state->dirty |= (1u << idx); |
| } |
| |
| static void |
| radv_save_descriptors(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point) |
| { |
| struct radv_descriptor_state *descriptors_state = |
| radv_get_descriptors_state(cmd_buffer, bind_point); |
| struct radv_device *device = cmd_buffer->device; |
| uint32_t data[MAX_SETS * 2] = {0}; |
| uint64_t va; |
| va = radv_buffer_get_va(device->trace_bo) + 40; |
| |
| u_foreach_bit(i, descriptors_state->valid) |
| { |
| struct radv_descriptor_set *set = descriptors_state->sets[i]; |
| data[i * 2] = (uint64_t)(uintptr_t)set; |
| data[i * 2 + 1] = (uint64_t)(uintptr_t)set >> 32; |
| } |
| |
| radv_emit_write_data_packet(cmd_buffer, V_370_ME, va, MAX_SETS * 2, data); |
| } |
| |
| struct radv_userdata_info * |
| radv_lookup_user_sgpr(const struct radv_pipeline *pipeline, gl_shader_stage stage, int idx) |
| { |
| struct radv_shader *shader = radv_get_shader(pipeline, stage); |
| return &shader->info.user_sgprs_locs.shader_data[idx]; |
| } |
| |
| static void |
| radv_emit_userdata_address(struct radv_device *device, struct radeon_cmdbuf *cs, |
| struct radv_pipeline *pipeline, gl_shader_stage stage, int idx, |
| uint64_t va) |
| { |
| struct radv_userdata_info *loc = radv_lookup_user_sgpr(pipeline, stage, idx); |
| uint32_t base_reg = pipeline->user_data_0[stage]; |
| if (loc->sgpr_idx == -1) |
| return; |
| |
| assert(loc->num_sgprs == 1); |
| |
| radv_emit_shader_pointer(device, cs, base_reg + loc->sgpr_idx * 4, va, false); |
| } |
| |
| static uint64_t |
| radv_descriptor_get_va(const struct radv_descriptor_state *descriptors_state, unsigned set_idx) |
| { |
| struct radv_descriptor_set *set = descriptors_state->sets[set_idx]; |
| uint64_t va; |
| |
| if (set) { |
| va = set->header.va; |
| } else { |
| va = descriptors_state->descriptor_buffers[set_idx]; |
| } |
| |
| return va; |
| } |
| |
| static void |
| radv_emit_descriptor_pointers(struct radv_device *device, struct radeon_cmdbuf *cs, |
| struct radv_pipeline *pipeline, |
| struct radv_descriptor_state *descriptors_state, |
| gl_shader_stage stage) |
| { |
| uint32_t sh_base = pipeline->user_data_0[stage]; |
| struct radv_userdata_locations *locs = &pipeline->shaders[stage]->info.user_sgprs_locs; |
| unsigned mask = locs->descriptor_sets_enabled; |
| |
| mask &= descriptors_state->dirty & descriptors_state->valid; |
| |
| while (mask) { |
| int start, count; |
| |
| u_bit_scan_consecutive_range(&mask, &start, &count); |
| |
| struct radv_userdata_info *loc = &locs->descriptor_sets[start]; |
| unsigned sh_offset = sh_base + loc->sgpr_idx * 4; |
| |
| radv_emit_shader_pointer_head(cs, sh_offset, count, true); |
| for (int i = 0; i < count; i++) { |
| uint64_t va = radv_descriptor_get_va(descriptors_state, start + i); |
| |
| radv_emit_shader_pointer_body(device, cs, va, true); |
| } |
| } |
| } |
| |
| static ALWAYS_INLINE unsigned |
| radv_get_rasterization_samples(struct radv_cmd_buffer *cmd_buffer) |
| { |
| const struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; |
| const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; |
| |
| if (d->line_rasterization_mode == VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT && |
| radv_rast_prim_is_line(pipeline->rast_prim)) { |
| /* From the Vulkan spec 1.3.221: |
| * |
| * "When Bresenham lines are being rasterized, sample locations may all be treated as being at |
| * the pixel center (this may affect attribute and depth interpolation)." |
| * |
| * "One consequence of this is that Bresenham lines cover the same pixels regardless of the |
| * number of rasterization samples, and cover all samples in those pixels (unless masked out |
| * or killed)." |
| */ |
| return 1; |
| } |
| |
| return MAX2(1, d->rasterization_samples); |
| } |
| |
| static ALWAYS_INLINE unsigned |
| radv_get_ps_iter_samples(struct radv_cmd_buffer *cmd_buffer) |
| { |
| const struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; |
| const struct radv_rendering_state *render = &cmd_buffer->state.render; |
| unsigned ps_iter_samples = 1; |
| |
| if (pipeline->ms.sample_shading_enable) { |
| unsigned rasterization_samples = radv_get_rasterization_samples(cmd_buffer); |
| unsigned color_samples = MAX2(render->color_samples, rasterization_samples); |
| |
| ps_iter_samples = ceilf(pipeline->ms.min_sample_shading * color_samples); |
| ps_iter_samples = util_next_power_of_two(ps_iter_samples); |
| } |
| |
| return ps_iter_samples; |
| } |
| |
| /** |
| * Convert the user sample locations to hardware sample locations (the values |
| * that will be emitted by PA_SC_AA_SAMPLE_LOCS_PIXEL_*). |
| */ |
| static void |
| radv_convert_user_sample_locs(const struct radv_sample_locations_state *state, |
| uint32_t x, uint32_t y, VkOffset2D *sample_locs) |
| { |
| uint32_t x_offset = x % state->grid_size.width; |
| uint32_t y_offset = y % state->grid_size.height; |
| uint32_t num_samples = (uint32_t)state->per_pixel; |
| uint32_t pixel_offset; |
| |
| pixel_offset = (x_offset + y_offset * state->grid_size.width) * num_samples; |
| |
| assert(pixel_offset <= MAX_SAMPLE_LOCATIONS); |
| const VkSampleLocationEXT *user_locs = &state->locations[pixel_offset]; |
| |
| for (uint32_t i = 0; i < num_samples; i++) { |
| float shifted_pos_x = user_locs[i].x - 0.5; |
| float shifted_pos_y = user_locs[i].y - 0.5; |
| |
| int32_t scaled_pos_x = floorf(shifted_pos_x * 16); |
| int32_t scaled_pos_y = floorf(shifted_pos_y * 16); |
| |
| sample_locs[i].x = CLAMP(scaled_pos_x, -8, 7); |
| sample_locs[i].y = CLAMP(scaled_pos_y, -8, 7); |
| } |
| } |
| |
| /** |
| * Compute the PA_SC_AA_SAMPLE_LOCS_PIXEL_* mask based on hardware sample |
| * locations. |
| */ |
| static void |
| radv_compute_sample_locs_pixel(uint32_t num_samples, VkOffset2D *sample_locs, |
| uint32_t *sample_locs_pixel) |
| { |
| for (uint32_t i = 0; i < num_samples; i++) { |
| uint32_t sample_reg_idx = i / 4; |
| uint32_t sample_loc_idx = i % 4; |
| int32_t pos_x = sample_locs[i].x; |
| int32_t pos_y = sample_locs[i].y; |
| |
| uint32_t shift_x = 8 * sample_loc_idx; |
| uint32_t shift_y = shift_x + 4; |
| |
| sample_locs_pixel[sample_reg_idx] |= (pos_x & 0xf) << shift_x; |
| sample_locs_pixel[sample_reg_idx] |= (pos_y & 0xf) << shift_y; |
| } |
| } |
| |
| /** |
| * Compute the PA_SC_CENTROID_PRIORITY_* mask based on the top left hardware |
| * sample locations. |
| */ |
| static uint64_t |
| radv_compute_centroid_priority(struct radv_cmd_buffer *cmd_buffer, VkOffset2D *sample_locs, |
| uint32_t num_samples) |
| { |
| uint32_t *centroid_priorities = alloca(num_samples * sizeof(*centroid_priorities)); |
| uint32_t sample_mask = num_samples - 1; |
| uint32_t *distances = alloca(num_samples * sizeof(*distances)); |
| uint64_t centroid_priority = 0; |
| |
| /* Compute the distances from center for each sample. */ |
| for (int i = 0; i < num_samples; i++) { |
| distances[i] = (sample_locs[i].x * sample_locs[i].x) + (sample_locs[i].y * sample_locs[i].y); |
| } |
| |
| /* Compute the centroid priorities by looking at the distances array. */ |
| for (int i = 0; i < num_samples; i++) { |
| uint32_t min_idx = 0; |
| |
| for (int j = 1; j < num_samples; j++) { |
| if (distances[j] < distances[min_idx]) |
| min_idx = j; |
| } |
| |
| centroid_priorities[i] = min_idx; |
| distances[min_idx] = 0xffffffff; |
| } |
| |
| /* Compute the final centroid priority. */ |
| for (int i = 0; i < 8; i++) { |
| centroid_priority |= centroid_priorities[i & sample_mask] << (i * 4); |
| } |
| |
| return centroid_priority << 32 | centroid_priority; |
| } |
| |
| /** |
| * Emit the sample locations that are specified with VK_EXT_sample_locations. |
| */ |
| static void |
| radv_emit_sample_locations(struct radv_cmd_buffer *cmd_buffer) |
| { |
| const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; |
| uint32_t num_samples = (uint32_t)d->sample_location.per_pixel; |
| struct radeon_cmdbuf *cs = cmd_buffer->cs; |
| uint32_t sample_locs_pixel[4][2] = {0}; |
| VkOffset2D sample_locs[4][8]; /* 8 is the max. sample count supported */ |
| uint64_t centroid_priority; |
| |
| if (!d->sample_location.count) |
| return; |
| |
| /* Convert the user sample locations to hardware sample locations. */ |
| radv_convert_user_sample_locs(&d->sample_location, 0, 0, sample_locs[0]); |
| radv_convert_user_sample_locs(&d->sample_location, 1, 0, sample_locs[1]); |
| radv_convert_user_sample_locs(&d->sample_location, 0, 1, sample_locs[2]); |
| radv_convert_user_sample_locs(&d->sample_location, 1, 1, sample_locs[3]); |
| |
| /* Compute the PA_SC_AA_SAMPLE_LOCS_PIXEL_* mask. */ |
| for (uint32_t i = 0; i < 4; i++) { |
| radv_compute_sample_locs_pixel(num_samples, sample_locs[i], sample_locs_pixel[i]); |
| } |
| |
| /* Compute the PA_SC_CENTROID_PRIORITY_* mask. */ |
| centroid_priority = radv_compute_centroid_priority(cmd_buffer, sample_locs[0], num_samples); |
| |
| /* Emit the specified user sample locations. */ |
| switch (num_samples) { |
| case 2: |
| case 4: |
| radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, |
| sample_locs_pixel[0][0]); |
| radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, |
| sample_locs_pixel[1][0]); |
| radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, |
| sample_locs_pixel[2][0]); |
| radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, |
| sample_locs_pixel[3][0]); |
| break; |
| case 8: |
| radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, |
| sample_locs_pixel[0][0]); |
| radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, |
| sample_locs_pixel[1][0]); |
| radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, |
| sample_locs_pixel[2][0]); |
| radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, |
| sample_locs_pixel[3][0]); |
| radeon_set_context_reg(cs, R_028BFC_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1, |
| sample_locs_pixel[0][1]); |
| radeon_set_context_reg(cs, R_028C0C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1, |
| sample_locs_pixel[1][1]); |
| radeon_set_context_reg(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1, |
| sample_locs_pixel[2][1]); |
| radeon_set_context_reg(cs, R_028C2C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1, |
| sample_locs_pixel[3][1]); |
| break; |
| default: |
| unreachable("invalid number of samples"); |
| } |
| |
| radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2); |
| radeon_emit(cs, centroid_priority); |
| radeon_emit(cs, centroid_priority >> 32); |
| |
| cmd_buffer->state.context_roll_without_scissor_emitted = true; |
| } |
| |
| static void |
| radv_emit_inline_push_consts(struct radv_device *device, struct radeon_cmdbuf *cs, |
| struct radv_pipeline *pipeline, gl_shader_stage stage, int idx, |
| uint32_t *values) |
| { |
| struct radv_userdata_info *loc = radv_lookup_user_sgpr(pipeline, stage, idx); |
| uint32_t base_reg = pipeline->user_data_0[stage]; |
| if (loc->sgpr_idx == -1) |
| return; |
| |
| radeon_check_space(device->ws, cs, 2 + loc->num_sgprs); |
| |
| radeon_set_sh_reg_seq(cs, base_reg + loc->sgpr_idx * 4, loc->num_sgprs); |
| radeon_emit_array(cs, values, loc->num_sgprs); |
| } |
| |
| struct radv_bin_size_entry { |
| unsigned bpp; |
| VkExtent2D extent; |
| }; |
| |
| static VkExtent2D |
| radv_gfx10_compute_bin_size(struct radv_graphics_pipeline *pipeline, |
| struct radv_cmd_buffer *cmd_buffer) |
| { |
| const struct radv_physical_device *pdevice = pipeline->base.device->physical_device; |
| const struct radv_rendering_state *render = &cmd_buffer->state.render; |
| const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; |
| VkExtent2D extent = {512, 512}; |
| |
| const unsigned db_tag_size = 64; |
| const unsigned db_tag_count = 312; |
| const unsigned color_tag_size = 1024; |
| const unsigned color_tag_count = 31; |
| const unsigned fmask_tag_size = 256; |
| const unsigned fmask_tag_count = 44; |
| |
| const unsigned rb_count = pdevice->rad_info.max_render_backends; |
| const unsigned pipe_count = MAX2(rb_count, pdevice->rad_info.num_tcc_blocks); |
| |
| const unsigned db_tag_part = (db_tag_count * rb_count / pipe_count) * db_tag_size * pipe_count; |
| const unsigned color_tag_part = |
| (color_tag_count * rb_count / pipe_count) * color_tag_size * pipe_count; |
| const unsigned fmask_tag_part = |
| (fmask_tag_count * rb_count / pipe_count) * fmask_tag_size * pipe_count; |
| |
| const unsigned total_samples = radv_get_rasterization_samples(cmd_buffer); |
| const unsigned samples_log = util_logbase2_ceil(total_samples); |
| |
| unsigned color_bytes_per_pixel = 0; |
| unsigned fmask_bytes_per_pixel = 0; |
| |
| for (unsigned i = 0; i < render->color_att_count; ++i) { |
| struct radv_image_view *iview = render->color_att[i].iview; |
| |
| if (!iview) |
| continue; |
| |
| if (!((d->color_write_mask >> (i * 4)) & 0xf)) |
| continue; |
| |
| color_bytes_per_pixel += vk_format_get_blocksize(render->color_att[i].format); |
| |
| if (total_samples > 1) { |
| assert(samples_log <= 3); |
| const unsigned fmask_array[] = {0, 1, 1, 4}; |
| fmask_bytes_per_pixel += fmask_array[samples_log]; |
| } |
| } |
| |
| color_bytes_per_pixel *= total_samples; |
| color_bytes_per_pixel = MAX2(color_bytes_per_pixel, 1); |
| |
| const unsigned color_pixel_count_log = util_logbase2(color_tag_part / color_bytes_per_pixel); |
| extent.width = 1ull << ((color_pixel_count_log + 1) / 2); |
| extent.height = 1ull << (color_pixel_count_log / 2); |
| |
| if (fmask_bytes_per_pixel) { |
| const unsigned fmask_pixel_count_log = util_logbase2(fmask_tag_part / fmask_bytes_per_pixel); |
| |
| const VkExtent2D fmask_extent = |
| (VkExtent2D){.width = 1ull << ((fmask_pixel_count_log + 1) / 2), |
| .height = 1ull << (color_pixel_count_log / 2)}; |
| |
| if (fmask_extent.width * fmask_extent.height < extent.width * extent.height) |
| extent = fmask_extent; |
| } |
| |
| if (render->ds_att.iview) { |
| /* Coefficients taken from AMDVLK */ |
| unsigned depth_coeff = vk_format_has_depth(render->ds_att.format) ? 5 : 0; |
| unsigned stencil_coeff = vk_format_has_stencil(render->ds_att.format) ? 1 : 0; |
| unsigned db_bytes_per_pixel = (depth_coeff + stencil_coeff) * total_samples; |
| |
| const unsigned db_pixel_count_log = util_logbase2(db_tag_part / db_bytes_per_pixel); |
| |
| const VkExtent2D db_extent = (VkExtent2D){.width = 1ull << ((db_pixel_count_log + 1) / 2), |
| .height = 1ull << (color_pixel_count_log / 2)}; |
| |
| if (db_extent.width * db_extent.height < extent.width * extent.height) |
| extent = db_extent; |
| } |
| |
| extent.width = MAX2(extent.width, 128); |
| extent.height = MAX2(extent.width, 64); |
| |
| return extent; |
| } |
| |
| static VkExtent2D |
| radv_gfx9_compute_bin_size(struct radv_graphics_pipeline *pipeline, |
| struct radv_cmd_buffer *cmd_buffer) |
| |
| { |
| const struct radv_physical_device *pdevice = pipeline->base.device->physical_device; |
| const struct radv_rendering_state *render = &cmd_buffer->state.render; |
| const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; |
| static const struct radv_bin_size_entry color_size_table[][3][9] = { |
| { |
| /* One RB / SE */ |
| { |
| /* One shader engine */ |
| {0, {128, 128}}, |
| {1, {64, 128}}, |
| {2, {32, 128}}, |
| {3, {16, 128}}, |
| {17, {0, 0}}, |
| {UINT_MAX, {0, 0}}, |
| }, |
| { |
| /* Two shader engines */ |
| {0, {128, 128}}, |
| {2, {64, 128}}, |
| {3, {32, 128}}, |
| {5, {16, 128}}, |
| {17, {0, 0}}, |
| {UINT_MAX, {0, 0}}, |
| }, |
| { |
| /* Four shader engines */ |
| {0, {128, 128}}, |
| {3, {64, 128}}, |
| {5, {16, 128}}, |
| {17, {0, 0}}, |
| {UINT_MAX, {0, 0}}, |
| }, |
| }, |
| { |
| /* Two RB / SE */ |
| { |
| /* One shader engine */ |
| {0, {128, 128}}, |
| {2, {64, 128}}, |
| {3, {32, 128}}, |
| {5, {16, 128}}, |
| {33, {0, 0}}, |
| {UINT_MAX, {0, 0}}, |
| }, |
| { |
| /* Two shader engines */ |
| {0, {128, 128}}, |
| {3, {64, 128}}, |
| {5, {32, 128}}, |
| {9, {16, 128}}, |
| {33, {0, 0}}, |
| {UINT_MAX, {0, 0}}, |
| }, |
| { |
| /* Four shader engines */ |
| {0, {256, 256}}, |
| {2, {128, 256}}, |
| {3, {128, 128}}, |
| {5, {64, 128}}, |
| {9, {16, 128}}, |
| {33, {0, 0}}, |
| {UINT_MAX, {0, 0}}, |
| }, |
| }, |
| { |
| /* Four RB / SE */ |
| { |
| /* One shader engine */ |
| {0, {128, 256}}, |
| {2, {128, 128}}, |
| {3, {64, 128}}, |
| {5, {32, 128}}, |
| {9, {16, 128}}, |
| {33, {0, 0}}, |
| {UINT_MAX, {0, 0}}, |
| }, |
| { |
| /* Two shader engines */ |
| {0, {256, 256}}, |
| {2, {128, 256}}, |
| {3, {128, 128}}, |
| {5, {64, 128}}, |
| {9, {32, 128}}, |
| {17, {16, 128}}, |
| {33, {0, 0}}, |
| {UINT_MAX, {0, 0}}, |
| }, |
| { |
| /* Four shader engines */ |
| {0, {256, 512}}, |
| {2, {256, 256}}, |
| {3, {128, 256}}, |
| {5, {128, 128}}, |
| {9, {64, 128}}, |
| {17, {16, 128}}, |
| {33, {0, 0}}, |
| {UINT_MAX, {0, 0}}, |
| }, |
| }, |
| }; |
| static const struct radv_bin_size_entry ds_size_table[][3][9] = { |
| { |
| // One RB / SE |
| { |
| // One shader engine |
| {0, {128, 256}}, |
| {2, {128, 128}}, |
| {4, {64, 128}}, |
| {7, {32, 128}}, |
| {13, {16, 128}}, |
| {49, {0, 0}}, |
| {UINT_MAX, {0, 0}}, |
| }, |
| { |
| // Two shader engines |
| {0, {256, 256}}, |
| {2, {128, 256}}, |
| {4, {128, 128}}, |
| {7, {64, 128}}, |
| {13, {32, 128}}, |
| {25, {16, 128}}, |
| {49, {0, 0}}, |
| {UINT_MAX, {0, 0}}, |
| }, |
| { |
| // Four shader engines |
| {0, {256, 512}}, |
| {2, {256, 256}}, |
| {4, {128, 256}}, |
| {7, {128, 128}}, |
| {13, {64, 128}}, |
| {25, {16, 128}}, |
| {49, {0, 0}}, |
| {UINT_MAX, {0, 0}}, |
| }, |
| }, |
| { |
| // Two RB / SE |
| { |
| // One shader engine |
| {0, {256, 256}}, |
| {2, {128, 256}}, |
| {4, {128, 128}}, |
| {7, {64, 128}}, |
| {13, {32, 128}}, |
| {25, {16, 128}}, |
| {97, {0, 0}}, |
| {UINT_MAX, {0, 0}}, |
| }, |
| { |
| // Two shader engines |
| {0, {256, 512}}, |
| {2, {256, 256}}, |
| {4, {128, 256}}, |
| {7, {128, 128}}, |
| {13, {64, 128}}, |
| {25, {32, 128}}, |
| {49, {16, 128}}, |
| {97, {0, 0}}, |
| {UINT_MAX, {0, 0}}, |
| }, |
| { |
| // Four shader engines |
| {0, {512, 512}}, |
| {2, {256, 512}}, |
| {4, {256, 256}}, |
| {7, {128, 256}}, |
| {13, {128, 128}}, |
| {25, {64, 128}}, |
| {49, {16, 128}}, |
| {97, {0, 0}}, |
| {UINT_MAX, {0, 0}}, |
| }, |
| }, |
| { |
| // Four RB / SE |
| { |
| // One shader engine |
| {0, {256, 512}}, |
| {2, {256, 256}}, |
| {4, {128, 256}}, |
| {7, {128, 128}}, |
| {13, {64, 128}}, |
| {25, {32, 128}}, |
| {49, {16, 128}}, |
| {UINT_MAX, {0, 0}}, |
| }, |
| { |
| // Two shader engines |
| {0, {512, 512}}, |
| {2, {256, 512}}, |
| {4, {256, 256}}, |
| {7, {128, 256}}, |
| {13, {128, 128}}, |
| {25, {64, 128}}, |
| {49, {32, 128}}, |
| {97, {16, 128}}, |
| {UINT_MAX, {0, 0}}, |
| }, |
| { |
| // Four shader engines |
| {0, {512, 512}}, |
| {4, {256, 512}}, |
| {7, {256, 256}}, |
| {13, {128, 256}}, |
| {25, {128, 128}}, |
| {49, {64, 128}}, |
| {97, {16, 128}}, |
| {UINT_MAX, {0, 0}}, |
| }, |
| }, |
| }; |
| |
| VkExtent2D extent = {512, 512}; |
| |
| unsigned log_num_rb_per_se = |
| util_logbase2_ceil(pdevice->rad_info.max_render_backends / pdevice->rad_info.max_se); |
| unsigned log_num_se = util_logbase2_ceil(pdevice->rad_info.max_se); |
| |
| unsigned total_samples = radv_get_rasterization_samples(cmd_buffer); |
| unsigned ps_iter_samples = radv_get_ps_iter_samples(cmd_buffer); |
| unsigned effective_samples = total_samples; |
| unsigned color_bytes_per_pixel = 0; |
| |
| for (unsigned i = 0; i < render->color_att_count; ++i) { |
| struct radv_image_view *iview = render->color_att[i].iview; |
| |
| if (!iview) |
| continue; |
| |
| if (!((d->color_write_mask >> (i * 4)) & 0xf)) |
| continue; |
| |
| color_bytes_per_pixel += vk_format_get_blocksize(render->color_att[i].format); |
| } |
| |
| /* MSAA images typically don't use all samples all the time. */ |
| if (effective_samples >= 2 && ps_iter_samples <= 1) |
| effective_samples = 2; |
| color_bytes_per_pixel *= effective_samples; |
| |
| const struct radv_bin_size_entry *color_entry = color_size_table[log_num_rb_per_se][log_num_se]; |
| while (color_entry[1].bpp <= color_bytes_per_pixel) |
| ++color_entry; |
| |
| extent = color_entry->extent; |
| |
| if (render->ds_att.iview) { |
| /* Coefficients taken from AMDVLK */ |
| unsigned depth_coeff = vk_format_has_depth(render->ds_att.format) ? 5 : 0; |
| unsigned stencil_coeff = vk_format_has_stencil(render->ds_att.format) ? 1 : 0; |
| unsigned ds_bytes_per_pixel = 4 * (depth_coeff + stencil_coeff) * total_samples; |
| |
| const struct radv_bin_size_entry *ds_entry = ds_size_table[log_num_rb_per_se][log_num_se]; |
| while (ds_entry[1].bpp <= ds_bytes_per_pixel) |
| ++ds_entry; |
| |
| if (ds_entry->extent.width * ds_entry->extent.height < extent.width * extent.height) |
| extent = ds_entry->extent; |
| } |
| |
| return extent; |
| } |
| |
| static unsigned |
| radv_get_disabled_binning_state(struct radv_graphics_pipeline *pipeline, |
| struct radv_cmd_buffer *cmd_buffer) |
| { |
| const struct radv_physical_device *pdevice = pipeline->base.device->physical_device; |
| const struct radv_rendering_state *render = &cmd_buffer->state.render; |
| const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; |
| uint32_t pa_sc_binner_cntl_0; |
| |
| if (pdevice->rad_info.gfx_level >= GFX10) { |
| unsigned min_bytes_per_pixel = 0; |
| |
| for (unsigned i = 0; i < render->color_att_count; ++i) { |
| struct radv_image_view *iview = render->color_att[i].iview; |
| |
| if (!iview) |
| continue; |
| |
| if (!((d->color_write_mask >> (i * 4)) & 0xf)) |
| continue; |
| |
| unsigned bytes = vk_format_get_blocksize(render->color_att[i].format); |
| if (!min_bytes_per_pixel || bytes < min_bytes_per_pixel) |
| min_bytes_per_pixel = bytes; |
| } |
| |
| pa_sc_binner_cntl_0 = |
| S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_NEW_SC) | S_028C44_BIN_SIZE_X(0) | |
| S_028C44_BIN_SIZE_Y(0) | S_028C44_BIN_SIZE_X_EXTEND(2) | /* 128 */ |
| S_028C44_BIN_SIZE_Y_EXTEND(min_bytes_per_pixel <= 4 ? 2 : 1) | /* 128 or 64 */ |
| S_028C44_DISABLE_START_OF_PRIM(1) | |
| S_028C44_FLUSH_ON_BINNING_TRANSITION(1); |
| } else { |
| pa_sc_binner_cntl_0 = S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC) | |
| S_028C44_DISABLE_START_OF_PRIM(1) | |
| S_028C44_FLUSH_ON_BINNING_TRANSITION(pdevice->rad_info.family == CHIP_VEGA12 || |
| pdevice->rad_info.family == CHIP_VEGA20 || |
| pdevice->rad_info.family >= CHIP_RAVEN2); |
| } |
| |
| return pa_sc_binner_cntl_0; |
| } |
| |
| static unsigned |
| radv_get_binning_state(struct radv_graphics_pipeline *pipeline, struct radv_cmd_buffer *cmd_buffer) |
| { |
| const struct radv_device *device = pipeline->base.device; |
| unsigned pa_sc_binner_cntl_0; |
| VkExtent2D bin_size; |
| |
| if (device->physical_device->rad_info.gfx_level >= GFX10) { |
| bin_size = radv_gfx10_compute_bin_size(pipeline, cmd_buffer); |
| } else { |
| assert(device->physical_device->rad_info.gfx_level == GFX9); |
| bin_size = radv_gfx9_compute_bin_size(pipeline, cmd_buffer); |
| } |
| |
| if (device->pbb_allowed && bin_size.width && bin_size.height) { |
| struct radv_binning_settings *settings = &device->physical_device->binning_settings; |
| |
| pa_sc_binner_cntl_0 = S_028C44_BINNING_MODE(V_028C44_BINNING_ALLOWED) | |
| S_028C44_BIN_SIZE_X(bin_size.width == 16) | |
| S_028C44_BIN_SIZE_Y(bin_size.height == 16) | |
| S_028C44_BIN_SIZE_X_EXTEND(util_logbase2(MAX2(bin_size.width, 32)) - 5) | |
| S_028C44_BIN_SIZE_Y_EXTEND(util_logbase2(MAX2(bin_size.height, 32)) - 5) | |
| S_028C44_CONTEXT_STATES_PER_BIN(settings->context_states_per_bin - 1) | |
| S_028C44_PERSISTENT_STATES_PER_BIN(settings->persistent_states_per_bin - 1) | |
| S_028C44_DISABLE_START_OF_PRIM(1) | |
| S_028C44_FPOVS_PER_BATCH(settings->fpovs_per_batch) | |
| S_028C44_OPTIMAL_BIN_SELECTION(1) | |
| S_028C44_FLUSH_ON_BINNING_TRANSITION(device->physical_device->rad_info.family == CHIP_VEGA12 || |
| device->physical_device->rad_info.family == CHIP_VEGA20 || |
| device->physical_device->rad_info.family >= CHIP_RAVEN2); |
| } else { |
| pa_sc_binner_cntl_0 = radv_get_disabled_binning_state(pipeline, cmd_buffer); |
| } |
| |
| return pa_sc_binner_cntl_0; |
| } |
| |
| static void |
| radv_emit_binning_state(struct radv_cmd_buffer *cmd_buffer, struct radv_graphics_pipeline *pipeline) |
| { |
| unsigned pa_sc_binner_cntl_0; |
| |
| if (pipeline->base.device->physical_device->rad_info.gfx_level < GFX9) |
| return; |
| |
| pa_sc_binner_cntl_0 = radv_get_binning_state(pipeline, cmd_buffer); |
| |
| if (pa_sc_binner_cntl_0 == cmd_buffer->state.last_pa_sc_binner_cntl_0) |
| return; |
| |
| radeon_set_context_reg(cmd_buffer->cs, R_028C44_PA_SC_BINNER_CNTL_0, pa_sc_binner_cntl_0); |
| |
| cmd_buffer->state.context_roll_without_scissor_emitted = true; |
| |
| cmd_buffer->state.last_pa_sc_binner_cntl_0 = pa_sc_binner_cntl_0; |
| } |
| |
| static void |
| radv_emit_shader_prefetch(struct radv_cmd_buffer *cmd_buffer, struct radv_shader *shader) |
| { |
| uint64_t va; |
| |
| if (!shader) |
| return; |
| |
| va = radv_shader_get_va(shader); |
| |
| si_cp_dma_prefetch(cmd_buffer, va, shader->code_size); |
| } |
| |
| static void |
| radv_emit_prefetch_L2(struct radv_cmd_buffer *cmd_buffer, |
| struct radv_graphics_pipeline *pipeline, bool first_stage_only) |
| { |
| struct radv_cmd_state *state = &cmd_buffer->state; |
| uint32_t mask = state->prefetch_L2_mask; |
| |
| /* Fast prefetch path for starting draws as soon as possible. */ |
| if (first_stage_only) |
| mask &= RADV_PREFETCH_VS | RADV_PREFETCH_VBO_DESCRIPTORS | RADV_PREFETCH_MS; |
| |
| if (mask & RADV_PREFETCH_VS) |
| radv_emit_shader_prefetch(cmd_buffer, pipeline->base.shaders[MESA_SHADER_VERTEX]); |
| |
| if (mask & RADV_PREFETCH_MS) |
| radv_emit_shader_prefetch(cmd_buffer, pipeline->base.shaders[MESA_SHADER_MESH]); |
| |
| if (mask & RADV_PREFETCH_VBO_DESCRIPTORS) |
| si_cp_dma_prefetch(cmd_buffer, state->vb_va, pipeline->vb_desc_alloc_size); |
| |
| if (mask & RADV_PREFETCH_TCS) |
| radv_emit_shader_prefetch(cmd_buffer, pipeline->base.shaders[MESA_SHADER_TESS_CTRL]); |
| |
| if (mask & RADV_PREFETCH_TES) |
| radv_emit_shader_prefetch(cmd_buffer, pipeline->base.shaders[MESA_SHADER_TESS_EVAL]); |
| |
| if (mask & RADV_PREFETCH_GS) { |
| radv_emit_shader_prefetch(cmd_buffer, pipeline->base.shaders[MESA_SHADER_GEOMETRY]); |
| if (radv_pipeline_has_gs_copy_shader(&pipeline->base)) |
| radv_emit_shader_prefetch(cmd_buffer, pipeline->base.gs_copy_shader); |
| } |
| |
| if (mask & RADV_PREFETCH_PS) { |
| radv_emit_shader_prefetch(cmd_buffer, pipeline->base.shaders[MESA_SHADER_FRAGMENT]); |
| if (pipeline->ps_epilog) { |
| struct radv_shader_part *ps_epilog = pipeline->ps_epilog; |
| |
| si_cp_dma_prefetch(cmd_buffer, ps_epilog->va, ps_epilog->code_size); |
| } |
| } |
| |
| state->prefetch_L2_mask &= ~mask; |
| } |
| |
| static void |
| radv_emit_rbplus_state(struct radv_cmd_buffer *cmd_buffer) |
| { |
| if (!cmd_buffer->device->physical_device->rad_info.rbplus_allowed) |
| return; |
| |
| struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; |
| const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; |
| struct radv_rendering_state *render = &cmd_buffer->state.render; |
| |
| unsigned sx_ps_downconvert = 0; |
| unsigned sx_blend_opt_epsilon = 0; |
| unsigned sx_blend_opt_control = 0; |
| |
| for (unsigned i = 0; i < render->color_att_count; i++) { |
| unsigned format, swap; |
| bool has_alpha, has_rgb; |
| if (render->color_att[i].iview == NULL) { |
| /* We don't set the DISABLE bits, because the HW can't have holes, |
| * so the SPI color format is set to 32-bit 1-component. */ |
| sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4); |
| continue; |
| } |
| |
| struct radv_color_buffer_info *cb = &render->color_att[i].cb; |
| |
| format = cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11 |
| ? G_028C70_FORMAT_GFX11(cb->cb_color_info) |
| : G_028C70_FORMAT_GFX6(cb->cb_color_info); |
| swap = G_028C70_COMP_SWAP(cb->cb_color_info); |
| has_alpha = cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11 |
| ? !G_028C74_FORCE_DST_ALPHA_1_GFX11(cb->cb_color_attrib) |
| : !G_028C74_FORCE_DST_ALPHA_1_GFX6(cb->cb_color_attrib); |
| |
| uint32_t spi_format = (pipeline->col_format_non_compacted >> (i * 4)) & 0xf; |
| uint32_t colormask = (d->color_write_mask >> (i * 4)) & 0xf; |
| |
| if (format == V_028C70_COLOR_8 || format == V_028C70_COLOR_16 || format == V_028C70_COLOR_32) |
| has_rgb = !has_alpha; |
| else |
| has_rgb = true; |
| |
| /* Check the colormask and export format. */ |
| if (!(colormask & 0x7)) |
| has_rgb = false; |
| if (!(colormask & 0x8)) |
| has_alpha = false; |
| |
| if (spi_format == V_028714_SPI_SHADER_ZERO) { |
| has_rgb = false; |
| has_alpha = false; |
| } |
| |
| /* The HW doesn't quite blend correctly with rgb9e5 if we disable the alpha |
| * optimization, even though it has no alpha. */ |
| if (has_rgb && format == V_028C70_COLOR_5_9_9_9) |
| has_alpha = true; |
| |
| /* Disable value checking for disabled channels. */ |
| if (!has_rgb) |
| sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4); |
| if (!has_alpha) |
| sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4); |
| |
| /* Enable down-conversion for 32bpp and smaller formats. */ |
| switch (format) { |
| case V_028C70_COLOR_8: |
| case V_028C70_COLOR_8_8: |
| case V_028C70_COLOR_8_8_8_8: |
| /* For 1 and 2-channel formats, use the superset thereof. */ |
| if (spi_format == V_028714_SPI_SHADER_FP16_ABGR || |
| spi_format == V_028714_SPI_SHADER_UINT16_ABGR || |
| spi_format == V_028714_SPI_SHADER_SINT16_ABGR) { |
| sx_ps_downconvert |= V_028754_SX_RT_EXPORT_8_8_8_8 << (i * 4); |
| sx_blend_opt_epsilon |= V_028758_8BIT_FORMAT << (i * 4); |
| } |
| break; |
| |
| case V_028C70_COLOR_5_6_5: |
| if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { |
| sx_ps_downconvert |= V_028754_SX_RT_EXPORT_5_6_5 << (i * 4); |
| sx_blend_opt_epsilon |= V_028758_6BIT_FORMAT << (i * 4); |
| } |
| break; |
| |
| case V_028C70_COLOR_1_5_5_5: |
| if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { |
| sx_ps_downconvert |= V_028754_SX_RT_EXPORT_1_5_5_5 << (i * 4); |
| sx_blend_opt_epsilon |= V_028758_5BIT_FORMAT << (i * 4); |
| } |
| break; |
| |
| case V_028C70_COLOR_4_4_4_4: |
| if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { |
| sx_ps_downconvert |= V_028754_SX_RT_EXPORT_4_4_4_4 << (i * 4); |
| sx_blend_opt_epsilon |= V_028758_4BIT_FORMAT << (i * 4); |
| } |
| break; |
| |
| case V_028C70_COLOR_32: |
| if (swap == V_028C70_SWAP_STD && spi_format == V_028714_SPI_SHADER_32_R) |
| sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4); |
| else if (swap == V_028C70_SWAP_ALT_REV && spi_format == V_028714_SPI_SHADER_32_AR) |
| sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_A << (i * 4); |
| break; |
| |
| case V_028C70_COLOR_16: |
| case V_028C70_COLOR_16_16: |
| /* For 1-channel formats, use the superset thereof. */ |
| if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR || |
| spi_format == V_028714_SPI_SHADER_SNORM16_ABGR || |
| spi_format == V_028714_SPI_SHADER_UINT16_ABGR || |
| spi_format == V_028714_SPI_SHADER_SINT16_ABGR) { |
| if (swap == V_028C70_SWAP_STD || swap == V_028C70_SWAP_STD_REV) |
| sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_GR << (i * 4); |
| else |
| sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_AR << (i * 4); |
| } |
| break; |
| |
| case V_028C70_COLOR_10_11_11: |
| if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) |
| sx_ps_downconvert |= V_028754_SX_RT_EXPORT_10_11_11 << (i * 4); |
| break; |
| |
| case V_028C70_COLOR_2_10_10_10: |
| if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { |
| sx_ps_downconvert |= V_028754_SX_RT_EXPORT_2_10_10_10 << (i * 4); |
| sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4); |
| } |
| break; |
| case V_028C70_COLOR_5_9_9_9: |
| if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) |
| sx_ps_downconvert |= V_028754_SX_RT_EXPORT_9_9_9_E5 << (i * 4); |
| break; |
| } |
| } |
| |
| /* Do not set the DISABLE bits for the unused attachments, as that |
| * breaks dual source blending in SkQP and does not seem to improve |
| * performance. */ |
| |
| if (sx_ps_downconvert == cmd_buffer->state.last_sx_ps_downconvert && |
| sx_blend_opt_epsilon == cmd_buffer->state.last_sx_blend_opt_epsilon && |
| sx_blend_opt_control == cmd_buffer->state.last_sx_blend_opt_control) |
| return; |
| |
| radeon_set_context_reg_seq(cmd_buffer->cs, R_028754_SX_PS_DOWNCONVERT, 3); |
| radeon_emit(cmd_buffer->cs, sx_ps_downconvert); |
| radeon_emit(cmd_buffer->cs, sx_blend_opt_epsilon); |
| radeon_emit(cmd_buffer->cs, sx_blend_opt_control); |
| |
| cmd_buffer->state.context_roll_without_scissor_emitted = true; |
| |
| cmd_buffer->state.last_sx_ps_downconvert = sx_ps_downconvert; |
| cmd_buffer->state.last_sx_blend_opt_epsilon = sx_blend_opt_epsilon; |
| cmd_buffer->state.last_sx_blend_opt_control = sx_blend_opt_control; |
| } |
| |
| static void |
| radv_emit_ps_epilog(struct radv_cmd_buffer *cmd_buffer) |
| { |
| struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; |
| struct radv_shader *ps_shader = pipeline->base.shaders[MESA_SHADER_FRAGMENT]; |
| struct radv_shader_part *ps_epilog = pipeline->ps_epilog; |
| |
| if (!ps_epilog) |
| return; |
| |
| /* The main shader must not use less VGPRs than the epilog, otherwise shared vgprs might not |
| * work. |
| */ |
| assert(G_00B848_VGPRS(ps_shader->config.rsrc1) >= G_00B848_VGPRS(ps_epilog->rsrc1)); |
| |
| radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, ps_epilog->bo); |
| |
| assert((ps_epilog->va >> 32) == cmd_buffer->device->physical_device->rad_info.address32_hi); |
| |
| struct radv_userdata_info *loc = |
| &ps_shader->info.user_sgprs_locs.shader_data[AC_UD_PS_EPILOG_PC]; |
| uint32_t base_reg = pipeline->base.user_data_0[MESA_SHADER_FRAGMENT]; |
| assert(loc->sgpr_idx != -1); |
| assert(loc->num_sgprs == 1); |
| radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, |
| ps_epilog->va, false); |
| } |
| |
| static void |
| radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer) |
| { |
| struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; |
| const struct radv_device *device = cmd_buffer->device; |
| |
| if (cmd_buffer->state.emitted_graphics_pipeline == pipeline) |
| return; |
| |
| cmd_buffer->scratch_size_per_wave_needed = |
| MAX2(cmd_buffer->scratch_size_per_wave_needed, pipeline->base.scratch_bytes_per_wave); |
| cmd_buffer->scratch_waves_wanted = MAX2(cmd_buffer->scratch_waves_wanted, pipeline->base.max_waves); |
| |
| if (!cmd_buffer->state.emitted_graphics_pipeline) |
| cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY | |
| RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS | |
| RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS | |
| RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_RESTART_ENABLE | |
| RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE | |
| RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE | |
| RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP | |
| RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE | |
| RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE | |
| RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP | |
| RADV_CMD_DIRTY_DYNAMIC_PATCH_CONTROL_POINTS | |
| RADV_CMD_DIRTY_DYNAMIC_ALPHA_TO_COVERAGE_ENABLE | |
| RADV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE | |
| RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLIP_ENABLE | |
| RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE | |
| RADV_CMD_DIRTY_DYNAMIC_CULL_MODE | |
| RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE | |
| RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS | |
| RADV_CMD_DIRTY_DYNAMIC_POLYGON_MODE | |
| RADV_CMD_DIRTY_DYNAMIC_PROVOKING_VERTEX_MODE | |
| RADV_CMD_DIRTY_DYNAMIC_VIEWPORT | |
| RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLAMP_ENABLE | |
| RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_ENABLE | |
| RADV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE_ENABLE | |
| RADV_CMD_DIRTY_DYNAMIC_CONSERVATIVE_RAST_MODE; |
| |
| if (!cmd_buffer->state.emitted_graphics_pipeline || |
| radv_rast_prim_is_points_or_lines(cmd_buffer->state.emitted_graphics_pipeline->rast_prim) != radv_rast_prim_is_points_or_lines(pipeline->rast_prim)) |
| cmd_buffer->state.dirty |= RADV_CMD_DIRTY_GUARDBAND; |
| |
| if (!cmd_buffer->state.emitted_graphics_pipeline || |
| cmd_buffer->state.emitted_graphics_pipeline->disable_dual_quad != pipeline->disable_dual_quad || |
| cmd_buffer->state.emitted_graphics_pipeline->custom_blend_mode != pipeline->custom_blend_mode) |
| cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_LOGIC_OP | |
| RADV_CMD_DIRTY_DYNAMIC_LOGIC_OP_ENABLE; |
| |
| if (!cmd_buffer->state.emitted_graphics_pipeline || |
| cmd_buffer->state.emitted_graphics_pipeline->vgt_tf_param != pipeline->vgt_tf_param) |
| cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_TESS_DOMAIN_ORIGIN; |
| |
| if (!cmd_buffer->state.emitted_graphics_pipeline || |
| memcmp(cmd_buffer->state.emitted_graphics_pipeline->cb_blend_control, |
| pipeline->cb_blend_control, sizeof(pipeline->cb_blend_control)) || |
| memcmp(cmd_buffer->state.emitted_graphics_pipeline->sx_mrt_blend_opt, |
| pipeline->sx_mrt_blend_opt, sizeof(pipeline->sx_mrt_blend_opt))) |
| cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_ENABLE; |
| |
| if (!cmd_buffer->state.emitted_graphics_pipeline || |
| cmd_buffer->state.emitted_graphics_pipeline->ms.sample_shading_enable != pipeline->ms.sample_shading_enable || |
| cmd_buffer->state.emitted_graphics_pipeline->ms.min_sample_shading != pipeline->ms.min_sample_shading || |
| cmd_buffer->state.emitted_graphics_pipeline->pa_sc_mode_cntl_1 != pipeline->pa_sc_mode_cntl_1 || |
| cmd_buffer->state.emitted_graphics_pipeline->db_render_control != pipeline->db_render_control || |
| cmd_buffer->state.emitted_graphics_pipeline->rast_prim != pipeline->rast_prim) |
| cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_RASTERIZATION_SAMPLES; |
| |
| radeon_emit_array(cmd_buffer->cs, pipeline->base.cs.buf, pipeline->base.cs.cdw); |
| |
| if (pipeline->has_ngg_culling && |
| pipeline->last_vgt_api_stage != MESA_SHADER_GEOMETRY && |
| !cmd_buffer->state.last_nggc_settings) { |
| /* The already emitted RSRC2 contains the LDS required for NGG culling. |
| * Culling is currently disabled, so re-emit RSRC2 to reduce LDS usage. |
| * API GS always needs LDS, so this isn't useful there. |
| */ |
| struct radv_shader *v = pipeline->base.shaders[pipeline->last_vgt_api_stage]; |
| radeon_set_sh_reg(cmd_buffer->cs, R_00B22C_SPI_SHADER_PGM_RSRC2_GS, |
| (v->config.rsrc2 & C_00B22C_LDS_SIZE) | |
| S_00B22C_LDS_SIZE(v->info.num_lds_blocks_when_not_culling)); |
| } |
| |
| if (!cmd_buffer->state.emitted_graphics_pipeline || |
| cmd_buffer->state.emitted_graphics_pipeline->base.ctx_cs.cdw != pipeline->base.ctx_cs.cdw || |
| cmd_buffer->state.emitted_graphics_pipeline->base.ctx_cs_hash != pipeline->base.ctx_cs_hash || |
| memcmp(cmd_buffer->state.emitted_graphics_pipeline->base.ctx_cs.buf, pipeline->base.ctx_cs.buf, |
| pipeline->base.ctx_cs.cdw * 4)) { |
| radeon_emit_array(cmd_buffer->cs, pipeline->base.ctx_cs.buf, pipeline->base.ctx_cs.cdw); |
| cmd_buffer->state.context_roll_without_scissor_emitted = true; |
| } |
| |
| if (device->pbb_allowed) { |
| struct radv_binning_settings *settings = &device->physical_device->binning_settings; |
| |
| if ((!cmd_buffer->state.emitted_graphics_pipeline || |
| cmd_buffer->state.emitted_graphics_pipeline->base.shaders[MESA_SHADER_FRAGMENT] != |
| cmd_buffer->state.graphics_pipeline->base.shaders[MESA_SHADER_FRAGMENT]) && |
| (settings->context_states_per_bin > 1 || settings->persistent_states_per_bin > 1)) { |
| /* Break the batch on PS changes. */ |
| radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); |
| radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0)); |
| } |
| } |
| |
| radv_emit_ps_epilog(cmd_buffer); |
| |
| radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, pipeline->base.slab_bo); |
| |
| /* With graphics pipeline library, binaries are uploaded from a library and they hold a pointer |
| * to the slab BO. |
| */ |
| for (unsigned s = 0; s < MESA_VULKAN_SHADER_STAGES; s++) { |
| struct radv_shader *shader = pipeline->base.shaders[s]; |
| |
| if (!shader || !shader->bo) |
| continue; |
| |
| radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, shader->bo); |
| } |
| |
| if (pipeline->base.gs_copy_shader && pipeline->base.gs_copy_shader->bo) { |
| radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, pipeline->base.gs_copy_shader->bo); |
| } |
| |
| if (unlikely(cmd_buffer->device->trace_bo)) |
| radv_save_pipeline(cmd_buffer, &pipeline->base); |
| |
| cmd_buffer->state.emitted_graphics_pipeline = pipeline; |
| |
| cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_PIPELINE; |
| } |
| |
| static enum radv_depth_clamp_mode |
| radv_get_depth_clamp_mode(struct radv_cmd_buffer *cmd_buffer) |
| { |
| const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; |
| const struct radv_device *device = cmd_buffer->device; |
| enum radv_depth_clamp_mode mode; |
| |
| mode = RADV_DEPTH_CLAMP_MODE_VIEWPORT; |
| if (!d->depth_clamp_enable) { |
| /* For optimal performance, depth clamping should always be enabled except if the application |
| * disables clamping explicitly or uses depth values outside of the [0.0, 1.0] range. |
| */ |
| if (!d->depth_clip_enable || device->vk.enabled_extensions.EXT_depth_range_unrestricted) { |
| mode = RADV_DEPTH_CLAMP_MODE_DISABLED; |
| } else { |
| mode = RADV_DEPTH_CLAMP_MODE_ZERO_TO_ONE; |
| } |
| } |
| |
| return mode; |
| } |
| |
| static void |
| radv_emit_viewport(struct radv_cmd_buffer *cmd_buffer) |
| { |
| const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; |
| enum radv_depth_clamp_mode depth_clamp_mode = radv_get_depth_clamp_mode(cmd_buffer); |
| |
| assert(d->viewport.count); |
| radeon_set_context_reg_seq(cmd_buffer->cs, R_02843C_PA_CL_VPORT_XSCALE, d->viewport.count * 6); |
| |
| for (unsigned i = 0; i < d->viewport.count; i++) { |
| radeon_emit(cmd_buffer->cs, fui(d->viewport.xform[i].scale[0])); |
| radeon_emit(cmd_buffer->cs, fui(d->viewport.xform[i].translate[0])); |
| radeon_emit(cmd_buffer->cs, fui(d->viewport.xform[i].scale[1])); |
| radeon_emit(cmd_buffer->cs, fui(d->viewport.xform[i].translate[1])); |
| |
| double scale_z, translate_z; |
| if (d->depth_clip_negative_one_to_one) { |
| scale_z = d->viewport.xform[i].scale[2] * 0.5f; |
| translate_z = (d->viewport.xform[i].translate[2] + d->viewport.viewports[i].maxDepth) * 0.5f; |
| } else { |
| scale_z = d->viewport.xform[i].scale[2]; |
| translate_z = d->viewport.xform[i].translate[2]; |
| |
| } |
| radeon_emit(cmd_buffer->cs, fui(scale_z)); |
| radeon_emit(cmd_buffer->cs, fui(translate_z)); |
| } |
| |
| radeon_set_context_reg_seq(cmd_buffer->cs, R_0282D0_PA_SC_VPORT_ZMIN_0, d->viewport.count * 2); |
| for (unsigned i = 0; i < d->viewport.count; i++) { |
| float zmin, zmax; |
| |
| if (depth_clamp_mode == RADV_DEPTH_CLAMP_MODE_ZERO_TO_ONE) { |
| zmin = 0.0f; |
| zmax = 1.0f; |
| } else { |
| zmin = MIN2(d->viewport.viewports[i].minDepth, d->viewport.viewports[i].maxDepth); |
| zmax = MAX2(d->viewport.viewports[i].minDepth, d->viewport.viewports[i].maxDepth); |
| } |
| |
| radeon_emit(cmd_buffer->cs, fui(zmin)); |
| radeon_emit(cmd_buffer->cs, fui(zmax)); |
| } |
| } |
| |
| void |
| radv_write_scissors(struct radv_cmd_buffer *cmd_buffer, struct radeon_cmdbuf *cs) |
| { |
| const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; |
| |
| si_write_scissors(cs, d->scissor.count, d->scissor.scissors, d->viewport.viewports); |
| } |
| |
| static void |
| radv_emit_scissor(struct radv_cmd_buffer *cmd_buffer) |
| { |
| radv_write_scissors(cmd_buffer, cmd_buffer->cs); |
| |
| cmd_buffer->state.context_roll_without_scissor_emitted = false; |
| } |
| |
| static void |
| radv_emit_discard_rectangle(struct radv_cmd_buffer *cmd_buffer) |
| { |
| const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; |
| |
| if (!d->discard_rectangle.count) |
| return; |
| |
| radeon_set_context_reg_seq(cmd_buffer->cs, R_028210_PA_SC_CLIPRECT_0_TL, |
| d->discard_rectangle.count * 2); |
| for (unsigned i = 0; i < d->discard_rectangle.count; ++i) { |
| VkRect2D rect = d->discard_rectangle.rectangles[i]; |
| radeon_emit(cmd_buffer->cs, S_028210_TL_X(rect.offset.x) | S_028210_TL_Y(rect.offset.y)); |
| radeon_emit(cmd_buffer->cs, S_028214_BR_X(rect.offset.x + rect.extent.width) | |
| S_028214_BR_Y(rect.offset.y + rect.extent.height)); |
| } |
| } |
| |
| static void |
| radv_emit_line_width(struct radv_cmd_buffer *cmd_buffer) |
| { |
| const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; |
| |
| radeon_set_context_reg(cmd_buffer->cs, R_028A08_PA_SU_LINE_CNTL, |
| S_028A08_WIDTH(CLAMP(d->line_width * 8, 0, 0xFFFF))); |
| } |
| |
| static void |
| radv_emit_blend_constants(struct radv_cmd_buffer *cmd_buffer) |
| { |
| const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; |
| |
| radeon_set_context_reg_seq(cmd_buffer->cs, R_028414_CB_BLEND_RED, 4); |
| radeon_emit_array(cmd_buffer->cs, (uint32_t *)d->blend_constants, 4); |
| } |
| |
| static void |
| radv_emit_stencil(struct radv_cmd_buffer *cmd_buffer) |
| { |
| const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; |
| |
| radeon_set_context_reg_seq(cmd_buffer->cs, R_028430_DB_STENCILREFMASK, 2); |
| radeon_emit(cmd_buffer->cs, S_028430_STENCILTESTVAL(d->stencil_reference.front) | |
| S_028430_STENCILMASK(d->stencil_compare_mask.front) | |
| S_028430_STENCILWRITEMASK(d->stencil_write_mask.front) | |
| S_028430_STENCILOPVAL(1)); |
| radeon_emit(cmd_buffer->cs, S_028434_STENCILTESTVAL_BF(d->stencil_reference.back) | |
| S_028434_STENCILMASK_BF(d->stencil_compare_mask.back) | |
| S_028434_STENCILWRITEMASK_BF(d->stencil_write_mask.back) | |
| S_028434_STENCILOPVAL_BF(1)); |
| } |
| |
| static void |
| radv_emit_depth_bounds(struct radv_cmd_buffer *cmd_buffer) |
| { |
| const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; |
| |
| radeon_set_context_reg_seq(cmd_buffer->cs, R_028020_DB_DEPTH_BOUNDS_MIN, 2); |
| radeon_emit(cmd_buffer->cs, fui(d->depth_bounds.min)); |
| radeon_emit(cmd_buffer->cs, fui(d->depth_bounds.max)); |
| } |
| |
| static void |
| radv_emit_depth_bias(struct radv_cmd_buffer *cmd_buffer) |
| { |
| const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; |
| unsigned slope = fui(d->depth_bias.slope * 16.0f); |
| |
| radeon_set_context_reg_seq(cmd_buffer->cs, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 5); |
| radeon_emit(cmd_buffer->cs, fui(d->depth_bias.clamp)); /* CLAMP */ |
| radeon_emit(cmd_buffer->cs, slope); /* FRONT SCALE */ |
| radeon_emit(cmd_buffer->cs, fui(d->depth_bias.bias)); /* FRONT OFFSET */ |
| radeon_emit(cmd_buffer->cs, slope); /* BACK SCALE */ |
| radeon_emit(cmd_buffer->cs, fui(d->depth_bias.bias)); /* BACK OFFSET */ |
| } |
| |
| static void |
| radv_emit_line_stipple(struct radv_cmd_buffer *cmd_buffer) |
| { |
| const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; |
| uint32_t auto_reset_cntl = 1; |
| |
| if (d->primitive_topology == V_008958_DI_PT_LINESTRIP) |
| auto_reset_cntl = 2; |
| |
| radeon_set_context_reg(cmd_buffer->cs, R_028A0C_PA_SC_LINE_STIPPLE, |
| S_028A0C_LINE_PATTERN(d->line_stipple.pattern) | |
| S_028A0C_REPEAT_COUNT(d->line_stipple.factor - 1) | |
| S_028A0C_AUTO_RESET_CNTL(auto_reset_cntl)); |
| } |
| |
| uint32_t |
| radv_get_pa_su_sc_mode_cntl(const struct radv_cmd_buffer *cmd_buffer) |
| { |
| enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->rad_info.gfx_level; |
| const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; |
| unsigned pa_su_sc_mode_cntl; |
| |
| pa_su_sc_mode_cntl = S_028814_CULL_FRONT(!!(d->cull_mode & VK_CULL_MODE_FRONT_BIT)) | |
| S_028814_CULL_BACK(!!(d->cull_mode & VK_CULL_MODE_BACK_BIT)) | |
| S_028814_FACE(d->front_face) | |
| S_028814_POLY_OFFSET_FRONT_ENABLE(d->depth_bias_enable) | |
| S_028814_POLY_OFFSET_BACK_ENABLE(d->depth_bias_enable) | |
| S_028814_POLY_OFFSET_PARA_ENABLE(d->depth_bias_enable) | |
| S_028814_POLY_MODE(d->polygon_mode != V_028814_X_DRAW_TRIANGLES) | |
| S_028814_POLYMODE_FRONT_PTYPE(d->polygon_mode) | |
| S_028814_POLYMODE_BACK_PTYPE(d->polygon_mode) | |
| S_028814_PROVOKING_VTX_LAST(d->provoking_vertex_mode == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT); |
| |
| if (gfx_level >= GFX10) { |
| pa_su_sc_mode_cntl |= |
| S_028814_KEEP_TOGETHER_ENABLE(d->polygon_mode != V_028814_X_DRAW_TRIANGLES); |
| } |
| |
| return pa_su_sc_mode_cntl; |
| } |
| |
| static void |
| radv_emit_culling(struct radv_cmd_buffer *cmd_buffer) |
| { |
| unsigned pa_su_sc_mode_cntl = radv_get_pa_su_sc_mode_cntl(cmd_buffer); |
| |
| radeon_set_context_reg(cmd_buffer->cs, R_028814_PA_SU_SC_MODE_CNTL, pa_su_sc_mode_cntl); |
| } |
| |
| static void |
| radv_emit_provoking_vertex_mode(struct radv_cmd_buffer *cmd_buffer) |
| { |
| const struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; |
| const unsigned stage = pipeline->last_vgt_api_stage; |
| const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; |
| const struct radv_userdata_info *loc = &pipeline->last_vgt_api_stage_locs[AC_UD_NGG_PROVOKING_VTX]; |
| unsigned provoking_vtx = 0; |
| uint32_t base_reg; |
| |
| if (loc->sgpr_idx == -1) |
| return; |
| |
| if (d->provoking_vertex_mode == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT) { |
| if (stage == MESA_SHADER_VERTEX) { |
| provoking_vtx = si_conv_prim_to_gs_out(d->primitive_topology); |
| } else { |
| assert(stage == MESA_SHADER_GEOMETRY); |
| struct radv_shader *gs = pipeline->base.shaders[stage]; |
| provoking_vtx = gs->info.gs.vertices_in - 1; |
| } |
| } |
| |
| base_reg = pipeline->base.user_data_0[stage]; |
| radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, provoking_vtx); |
| } |
| |
| static void |
| radv_emit_primitive_topology(struct radv_cmd_buffer *cmd_buffer) |
| { |
| const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; |
| |
| assert(!cmd_buffer->state.mesh_shading); |
| |
| if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7) { |
| radeon_set_uconfig_reg_idx(cmd_buffer->device->physical_device, cmd_buffer->cs, |
| R_030908_VGT_PRIMITIVE_TYPE, 1, d->primitive_topology); |
| } else { |
| radeon_set_config_reg(cmd_buffer->cs, R_008958_VGT_PRIMITIVE_TYPE, d->primitive_topology); |
| } |
| } |
| |
| static void |
| radv_emit_depth_control(struct radv_cmd_buffer *cmd_buffer) |
| { |
| struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; |
| |
| radeon_set_context_reg(cmd_buffer->cs, R_028800_DB_DEPTH_CONTROL, |
| S_028800_Z_ENABLE(d->depth_test_enable ? 1 : 0) | |
| S_028800_Z_WRITE_ENABLE(d->depth_write_enable ? 1 : 0) | |
| S_028800_ZFUNC(d->depth_compare_op) | |
| S_028800_DEPTH_BOUNDS_ENABLE(d->depth_bounds_test_enable ? 1 : 0) | |
| S_028800_STENCIL_ENABLE(d->stencil_test_enable ? 1 : 0) | |
| S_028800_BACKFACE_ENABLE(d->stencil_test_enable ? 1 : 0) | |
| S_028800_STENCILFUNC(d->stencil_op.front.compare_op) | |
| S_028800_STENCILFUNC_BF(d->stencil_op.back.compare_op)); |
| } |
| |
| static void |
| radv_emit_stencil_control(struct radv_cmd_buffer *cmd_buffer) |
| { |
| const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; |
| |
| radeon_set_context_reg( |
| cmd_buffer->cs, R_02842C_DB_STENCIL_CONTROL, |
| S_02842C_STENCILFAIL(si_translate_stencil_op(d->stencil_op.front.fail_op)) | |
| S_02842C_STENCILZPASS(si_translate_stencil_op(d->stencil_op.front.pass_op)) | |
| S_02842C_STENCILZFAIL(si_translate_stencil_op(d->stencil_op.front.depth_fail_op)) | |
| S_02842C_STENCILFAIL_BF(si_translate_stencil_op(d->stencil_op.back.fail_op)) | |
| S_02842C_STENCILZPASS_BF(si_translate_stencil_op(d->stencil_op.back.pass_op)) | |
| S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(d->stencil_op.back.depth_fail_op))); |
| } |
| |
| static void |
| radv_emit_fragment_shading_rate(struct radv_cmd_buffer *cmd_buffer) |
| { |
| const struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; |
| const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; |
| uint32_t rate_x = MIN2(2, d->fragment_shading_rate.size.width) - 1; |
| uint32_t rate_y = MIN2(2, d->fragment_shading_rate.size.height) - 1; |
| uint32_t pa_cl_vrs_cntl = pipeline->vrs.pa_cl_vrs_cntl; |
| uint32_t pipeline_comb_mode = d->fragment_shading_rate.combiner_ops[0]; |
| uint32_t htile_comb_mode = d->fragment_shading_rate.combiner_ops[1]; |
| |
| assert(cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10_3); |
| |
| if (!cmd_buffer->state.render.vrs_att.iview) { |
| /* When the current subpass has no VRS attachment, the VRS rates are expected to be 1x1, so we |
| * can cheat by tweaking the different combiner modes. |
| */ |
| switch (htile_comb_mode) { |
| case VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MIN_KHR: |
| /* The result of min(A, 1x1) is always 1x1. */ |
| FALLTHROUGH; |
| case VK_FRAGMENT_SHADING_RATE_COMBINER_OP_REPLACE_KHR: |
| /* Force the per-draw VRS rate to 1x1. */ |
| rate_x = rate_y = 0; |
| |
| /* As the result of min(A, 1x1) or replace(A, 1x1) are always 1x1, set the vertex rate |
| * combiner mode as passthrough. |
| */ |
| pipeline_comb_mode = V_028848_VRS_COMB_MODE_PASSTHRU; |
| break; |
| case VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MAX_KHR: |
| /* The result of max(A, 1x1) is always A. */ |
| FALLTHROUGH; |
| case VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR: |
| /* Nothing to do here because the SAMPLE_ITER combiner mode should already be passthrough. */ |
| break; |
| default: |
| break; |
| } |
| } |
| |
| /* Emit per-draw VRS rate which is the first combiner. */ |
| radeon_set_uconfig_reg(cmd_buffer->cs, R_03098C_GE_VRS_RATE, |
| S_03098C_RATE_X(rate_x) | S_03098C_RATE_Y(rate_y)); |
| |
| /* VERTEX_RATE_COMBINER_MODE controls the combiner mode between the |
| * draw rate and the vertex rate. |
| */ |
| if (cmd_buffer->state.mesh_shading) { |
| pa_cl_vrs_cntl |= S_028848_VERTEX_RATE_COMBINER_MODE(V_028848_VRS_COMB_MODE_PASSTHRU) | |
| S_028848_PRIMITIVE_RATE_COMBINER_MODE(pipeline_comb_mode); |
| } else { |
| pa_cl_vrs_cntl |= S_028848_VERTEX_RATE_COMBINER_MODE(pipeline_comb_mode) | |
| S_028848_PRIMITIVE_RATE_COMBINER_MODE(V_028848_VRS_COMB_MODE_PASSTHRU); |
| } |
| |
| /* HTILE_RATE_COMBINER_MODE controls the combiner mode between the primitive rate and the HTILE |
| * rate. |
| */ |
| pa_cl_vrs_cntl |= S_028848_HTILE_RATE_COMBINER_MODE(htile_comb_mode); |
| |
| radeon_set_context_reg(cmd_buffer->cs, R_028848_PA_CL_VRS_CNTL, pa_cl_vrs_cntl); |
| } |
| |
| static void |
| radv_emit_primitive_restart_enable(struct radv_cmd_buffer *cmd_buffer) |
| { |
| const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; |
| |
| if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) { |
| radeon_set_uconfig_reg(cmd_buffer->cs, R_03092C_GE_MULTI_PRIM_IB_RESET_EN, |
| d->primitive_restart_enable); |
| } else if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9) { |
| radeon_set_uconfig_reg(cmd_buffer->cs, R_03092C_VGT_MULTI_PRIM_IB_RESET_EN, |
| d->primitive_restart_enable); |
| } else { |
| radeon_set_context_reg(cmd_buffer->cs, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, |
| d->primitive_restart_enable); |
| } |
| } |
| |
| static void |
| radv_emit_clipping(struct radv_cmd_buffer *cmd_buffer) |
| { |
| const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; |
| |
| radeon_set_context_reg(cmd_buffer->cs, R_028810_PA_CL_CLIP_CNTL, |
| S_028810_DX_RASTERIZATION_KILL(d->rasterizer_discard_enable) | |
| S_028810_ZCLIP_NEAR_DISABLE(!d->depth_clip_enable) | |
| S_028810_ZCLIP_FAR_DISABLE(!d->depth_clip_enable) | |
| S_028810_DX_CLIP_SPACE_DEF(!d->depth_clip_negative_one_to_one) | |
| S_028810_DX_LINEAR_ATTR_CLIP_ENA(1)); |
| } |
| |
| static void |
| radv_emit_logic_op(struct radv_cmd_buffer *cmd_buffer) |
| { |
| const struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; |
| const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; |
| unsigned cb_color_control = 0; |
| |
| if (d->logic_op_enable) { |
| cb_color_control |= S_028808_ROP3(d->logic_op); |
| } else { |
| cb_color_control |= S_028808_ROP3(V_028808_ROP3_COPY); |
| } |
| |
| if (cmd_buffer->device->physical_device->rad_info.has_rbplus) { |
| cb_color_control |= |
| S_028808_DISABLE_DUAL_QUAD(pipeline->disable_dual_quad || d->logic_op_enable); |
| } |
| |
| if (pipeline->custom_blend_mode) { |
| cb_color_control |= S_028808_MODE(pipeline->custom_blend_mode); |
| } else if (d->color_write_mask) { |
| cb_color_control |= S_028808_MODE(V_028808_CB_NORMAL); |
| } else { |
| cb_color_control |= S_028808_MODE(V_028808_CB_DISABLE); |
| } |
| |
| radeon_set_context_reg(cmd_buffer->cs, R_028808_CB_COLOR_CONTROL, cb_color_control); |
| } |
| |
| static void |
| radv_emit_color_write(struct radv_cmd_buffer *cmd_buffer) |
| { |
| const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; |
| |
| radeon_set_context_reg(cmd_buffer->cs, R_028238_CB_TARGET_MASK, |
| d->color_write_mask & d->color_write_enable); |
| } |
| |
| static void |
| radv_emit_patch_control_points(struct radv_cmd_buffer *cmd_buffer) |
| { |
| const struct radv_physical_device *pdevice = cmd_buffer->device->physical_device; |
| const struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; |
| const struct radv_shader *tcs = pipeline->base.shaders[MESA_SHADER_TESS_CTRL]; |
| const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; |
| unsigned ls_hs_config, base_reg; |
| struct radv_userdata_info *loc; |
| |
| ls_hs_config = S_028B58_NUM_PATCHES(cmd_buffer->state.tess_num_patches) | |
| S_028B58_HS_NUM_INPUT_CP(d->patch_control_points) | |
| S_028B58_HS_NUM_OUTPUT_CP(tcs->info.tcs.tcs_vertices_out); |
| |
| if (pdevice->rad_info.gfx_level >= GFX7) { |
| radeon_set_context_reg_idx(cmd_buffer->cs, R_028B58_VGT_LS_HS_CONFIG, 2, ls_hs_config); |
| } else { |
| radeon_set_context_reg(cmd_buffer->cs, R_028B58_VGT_LS_HS_CONFIG, ls_hs_config); |
| } |
| |
| if (pdevice->rad_info.gfx_level >= GFX9) { |
| unsigned hs_rsrc2 = tcs->config.rsrc2; |
| |
| if (pdevice->rad_info.gfx_level >= GFX10) { |
| hs_rsrc2 |= S_00B42C_LDS_SIZE_GFX10(cmd_buffer->state.tess_lds_size); |
| } else { |
| hs_rsrc2 |= S_00B42C_LDS_SIZE_GFX9(cmd_buffer->state.tess_lds_size); |
| } |
| |
| radeon_set_sh_reg(cmd_buffer->cs, R_00B42C_SPI_SHADER_PGM_RSRC2_HS, hs_rsrc2); |
| } else { |
| struct radv_shader *vs = pipeline->base.shaders[MESA_SHADER_VERTEX]; |
| unsigned ls_rsrc2 = vs->config.rsrc2 | S_00B52C_LDS_SIZE(cmd_buffer->state.tess_lds_size); |
| |
| radeon_set_sh_reg(cmd_buffer->cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, ls_rsrc2); |
| } |
| |
| /* Emit user SGPRs for dynamic patch control points. */ |
| loc = radv_lookup_user_sgpr(&pipeline->base, MESA_SHADER_TESS_CTRL, AC_UD_TCS_OFFCHIP_LAYOUT); |
| if (loc->sgpr_idx == -1) |
| return; |
| assert(loc->num_sgprs == 1); |
| |
| base_reg = pipeline->base.user_data_0[MESA_SHADER_TESS_CTRL]; |
| radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, |
| (cmd_buffer->state.tess_num_patches << 6) | d->patch_control_points); |
| |
| loc = radv_lookup_user_sgpr(&pipeline->base, MESA_SHADER_TESS_EVAL, AC_UD_TES_NUM_PATCHES); |
| assert(loc->sgpr_idx != -1 && loc->num_sgprs == 1); |
| |
| base_reg = pipeline->base.user_data_0[MESA_SHADER_TESS_EVAL]; |
| radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, |
| cmd_buffer->state.tess_num_patches); |
| } |
| |
| static void |
| radv_emit_conservative_rast_mode(struct radv_cmd_buffer *cmd_buffer) |
| { |
| const struct radv_physical_device *pdevice = cmd_buffer->device->physical_device; |
| const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; |
| |
| if (pdevice->rad_info.gfx_level >= GFX9) { |
| uint32_t pa_sc_conservative_rast = S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1); |
| |
| if (d->conservative_rast_mode != VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT) { |
| pa_sc_conservative_rast = S_028C4C_PREZ_AA_MASK_ENABLE(1) | S_028C4C_POSTZ_AA_MASK_ENABLE(1) | |
| S_028C4C_CENTROID_SAMPLE_OVERRIDE(1); |
| |
| if (d->conservative_rast_mode == VK_CONSERVATIVE_RASTERIZATION_MODE_OVERESTIMATE_EXT) { |
| pa_sc_conservative_rast |= |
| S_028C4C_OVER_RAST_ENABLE(1) | S_028C4C_OVER_RAST_SAMPLE_SELECT(0) | |
| S_028C4C_UNDER_RAST_ENABLE(0) | S_028C4C_UNDER_RAST_SAMPLE_SELECT(1) | |
| S_028C4C_PBB_UNCERTAINTY_REGION_ENABLE(1); |
| } else { |
| assert(d->conservative_rast_mode == VK_CONSERVATIVE_RASTERIZATION_MODE_UNDERESTIMATE_EXT); |
| pa_sc_conservative_rast |= |
| S_028C4C_OVER_RAST_ENABLE(0) | S_028C4C_OVER_RAST_SAMPLE_SELECT(1) | |
| S_028C4C_UNDER_RAST_ENABLE(1) | S_028C4C_UNDER_RAST_SAMPLE_SELECT(0) | |
| S_028C4C_PBB_UNCERTAINTY_REGION_ENABLE(0); |
| } |
| } |
| |
| radeon_set_context_reg(cmd_buffer->cs, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL, |
| pa_sc_conservative_rast); |
| } |
| } |
| |
| static void |
| radv_emit_depth_clamp_enable(struct radv_cmd_buffer *cmd_buffer) |
| { |
| enum radv_depth_clamp_mode mode = radv_get_depth_clamp_mode(cmd_buffer); |
| |
| radeon_set_context_reg(cmd_buffer->cs, R_02800C_DB_RENDER_OVERRIDE, |
| S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) | |
| S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE) | |
| S_02800C_DISABLE_VIEWPORT_CLAMP(mode == RADV_DEPTH_CLAMP_MODE_DISABLED)); |
| } |
| |
| static unsigned |
| radv_get_pa_sc_mode_cntl_1(struct radv_cmd_buffer *cmd_buffer) |
| { |
| const struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; |
| unsigned rasterization_samples = radv_get_rasterization_samples(cmd_buffer); |
| unsigned pa_sc_mode_cntl_1 = pipeline->pa_sc_mode_cntl_1; |
| |
| if (rasterization_samples) { |
| unsigned ps_iter_samples = radv_get_ps_iter_samples(cmd_buffer); |
| |
| pa_sc_mode_cntl_1 |= S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1); |
| } |
| |
| return pa_sc_mode_cntl_1; |
| } |
| |
| static void |
| radv_emit_rasterization_samples(struct radv_cmd_buffer *cmd_buffer) |
| { |
| const struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; |
| const struct radv_physical_device *pdevice = pipeline->base.device->physical_device; |
| unsigned rasterization_samples = radv_get_rasterization_samples(cmd_buffer); |
| const struct radv_rendering_state *render = &cmd_buffer->state.render; |
| unsigned pa_sc_mode_cntl_1 = radv_get_pa_sc_mode_cntl_1(cmd_buffer); |
| const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; |
| unsigned spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1); |
| unsigned db_render_control = pipeline->db_render_control; |
| |
| if (!d->sample_location.count) |
| radv_emit_default_sample_locations(cmd_buffer->cs, rasterization_samples); |
| |
| if (rasterization_samples > 1) { |
| unsigned ps_iter_samples = radv_get_ps_iter_samples(cmd_buffer); |
| |
| if (ps_iter_samples > 1) |
| spi_baryc_cntl |= S_0286E0_POS_FLOAT_LOCATION(2); |
| } |
| |
| if (pdevice->rad_info.gfx_level >= GFX11) { |
| unsigned num_samples = render->max_samples; |
| unsigned max_allowed_tiles_in_wave = 0; |
| |
| if (pdevice->rad_info.has_dedicated_vram) { |
| if (num_samples == 8) |
| max_allowed_tiles_in_wave = 7; |
| else if (num_samples == 4) |
| max_allowed_tiles_in_wave = 14; |
| } else { |
| if (num_samples == 8) |
| max_allowed_tiles_in_wave = 8; |
| } |
| |
| /* TODO: We may want to disable this workaround for future chips. */ |
| if (num_samples >= 4) { |
| if (max_allowed_tiles_in_wave) |
| max_allowed_tiles_in_wave--; |
| else |
| max_allowed_tiles_in_wave = 15; |
| } |
| |
| db_render_control |= S_028000_OREO_MODE(V_028000_OMODE_O_THEN_B) | |
| S_028000_MAX_ALLOWED_TILES_IN_WAVE(max_allowed_tiles_in_wave); |
| } |
| |
| radeon_set_context_reg(cmd_buffer->cs, R_028000_DB_RENDER_CONTROL, db_render_control); |
| radeon_set_context_reg(cmd_buffer->cs, R_0286E0_SPI_BARYC_CNTL, spi_baryc_cntl); |
| radeon_set_context_reg(cmd_buffer->cs, R_028A4C_PA_SC_MODE_CNTL_1, pa_sc_mode_cntl_1); |
| |
| /* Pass the number of samples to the fragment shader because it might be needed. */ |
| struct radv_userdata_info *loc = |
| radv_lookup_user_sgpr(&pipeline->base, MESA_SHADER_FRAGMENT, AC_UD_PS_NUM_SAMPLES); |
| if (loc->sgpr_idx != -1) { |
| uint32_t base_reg = pipeline->base.user_data_0[MESA_SHADER_FRAGMENT]; |
| radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, rasterization_samples); |
| } |
| } |
| |
| static void |
| radv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer, int index, |
| struct radv_color_buffer_info *cb, struct radv_image_view *iview, |
| VkImageLayout layout) |
| { |
| bool is_vi = cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX8; |
| uint32_t cb_fdcc_control = cb->cb_dcc_control; |
| uint32_t cb_color_info = cb->cb_color_info; |
| struct radv_image *image = iview->image; |
| |
| if (!radv_layout_dcc_compressed( |
| cmd_buffer->device, image, iview->vk.base_mip_level, layout, |
| radv_image_queue_family_mask(image, cmd_buffer->qf, |
| cmd_buffer->qf))) { |
| if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) { |
| cb_fdcc_control &= C_028C78_FDCC_ENABLE; |
| } else { |
| cb_color_info &= C_028C70_DCC_ENABLE; |
| } |
| } |
| |
| if (!radv_layout_fmask_compressed( |
| cmd_buffer->device, image, layout, |
| radv_image_queue_family_mask(image, cmd_buffer->qf, |
| cmd_buffer->qf))) { |
| cb_color_info &= C_028C70_COMPRESSION; |
| } |
| |
| if (radv_image_is_tc_compat_cmask(image) && (radv_is_fmask_decompress_pipeline(cmd_buffer) || |
| radv_is_dcc_decompress_pipeline(cmd_buffer))) { |
| /* If this bit is set, the FMASK decompression operation |
| * doesn't occur (DCC_COMPRESS also implies FMASK_DECOMPRESS). |
| */ |
| cb_color_info &= C_028C70_FMASK_COMPRESS_1FRAG_ONLY; |
| } |
| |
| if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) { |
| radeon_set_context_reg_seq(cmd_buffer->cs, R_028C6C_CB_COLOR0_VIEW + index * 0x3c, 4); |
| radeon_emit(cmd_buffer->cs, cb->cb_color_view); /* CB_COLOR0_VIEW */ |
| radeon_emit(cmd_buffer->cs, cb->cb_color_info); /* CB_COLOR0_INFO */ |
| radeon_emit(cmd_buffer->cs, cb->cb_color_attrib); /* CB_COLOR0_ATTRIB */ |
| radeon_emit(cmd_buffer->cs, cb_fdcc_control); /* CB_COLOR0_FDCC_CONTROL */ |
| |
| radeon_set_context_reg(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, cb->cb_color_base); |
| radeon_set_context_reg(cmd_buffer->cs, R_028E40_CB_COLOR0_BASE_EXT + index * 4, cb->cb_color_base >> 32); |
| radeon_set_context_reg(cmd_buffer->cs, R_028C94_CB_COLOR0_DCC_BASE + index * 0x3c, cb->cb_dcc_base); |
| radeon_set_context_reg(cmd_buffer->cs, R_028EA0_CB_COLOR0_DCC_BASE_EXT + index * 4, cb->cb_dcc_base >> 32); |
| radeon_set_context_reg(cmd_buffer->cs, R_028EC0_CB_COLOR0_ATTRIB2 + index * 4, cb->cb_color_attrib2); |
| radeon_set_context_reg(cmd_buffer->cs, R_028EE0_CB_COLOR0_ATTRIB3 + index * 4, cb->cb_color_attrib3); |
| } else if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10) { |
| radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11); |
| radeon_emit(cmd_buffer->cs, cb->cb_color_base); |
| radeon_emit(cmd_buffer->cs, 0); |
| radeon_emit(cmd_buffer->cs, 0); |
| radeon_emit(cmd_buffer->cs, cb->cb_color_view); |
| radeon_emit(cmd_buffer->cs, cb_color_info); |
| radeon_emit(cmd_buffer->cs, cb->cb_color_attrib); |
| radeon_emit(cmd_buffer->cs, cb->cb_dcc_control); |
| radeon_emit(cmd_buffer->cs, cb->cb_color_cmask); |
| radeon_emit(cmd_buffer->cs, 0); |
| radeon_emit(cmd_buffer->cs, cb->cb_color_fmask); |
| radeon_emit(cmd_buffer->cs, 0); |
| |
| radeon_set_context_reg(cmd_buffer->cs, R_028C94_CB_COLOR0_DCC_BASE + index * 0x3c, cb->cb_dcc_base); |
| |
| radeon_set_context_reg(cmd_buffer->cs, R_028E40_CB_COLOR0_BASE_EXT + index * 4, |
| cb->cb_color_base >> 32); |
| radeon_set_context_reg(cmd_buffer->cs, R_028E60_CB_COLOR0_CMASK_BASE_EXT + index * 4, |
| cb->cb_color_cmask >> 32); |
| radeon_set_context_reg(cmd_buffer->cs, R_028E80_CB_COLOR0_FMASK_BASE_EXT + index * 4, |
| cb->cb_color_fmask >> 32); |
| radeon_set_context_reg(cmd_buffer->cs, R_028EA0_CB_COLOR0_DCC_BASE_EXT + index * 4, |
| cb->cb_dcc_base >> 32); |
| radeon_set_context_reg(cmd_buffer->cs, R_028EC0_CB_COLOR0_ATTRIB2 + index * 4, |
| cb->cb_color_attrib2); |
| radeon_set_context_reg(cmd_buffer->cs, R_028EE0_CB_COLOR0_ATTRIB3 + index * 4, |
| cb->cb_color_attrib3); |
| } else if (cmd_buffer->device->physical_device->rad_info.gf
|