From d9266ae66c9db12a9c2578a33bbd5ebd131b489f Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Mon, 28 Aug 2017 15:57:20 -0700 Subject: [PATCH 3/5] blorp: Turn anv_CmdCopyBuffer into a blorp_buffer_copy() helper. I want to be able to copy between buffer objects using BLORP in the i965 driver. Anvil already had code to do this, in a reasonably efficient manner - first using large bpp copies, then smaller bpp copies. This patch moves that logic into BLORP as blorp_buffer_copy(), so we can use it in both drivers. Reviewed-by: Jason Ekstrand Signed-off-by: Lyude --- src/intel/blorp/blorp.h | 6 +++ src/intel/blorp/blorp_blit.c | 119 +++++++++++++++++++++++++++++++++++++++++++ src/intel/vulkan/anv_blorp.c | 117 +++++++----------------------------------- 3 files changed, 143 insertions(+), 99 deletions(-) diff --git a/src/intel/blorp/blorp.h b/src/intel/blorp/blorp.h index d19920e87f..e712b4fbb3 100644 --- a/src/intel/blorp/blorp.h +++ b/src/intel/blorp/blorp.h @@ -133,6 +133,12 @@ blorp_copy(struct blorp_batch *batch, uint32_t dst_x, uint32_t dst_y, uint32_t src_width, uint32_t src_height); +void +blorp_buffer_copy(struct blorp_batch *batch, + struct blorp_address src, + struct blorp_address dst, + uint64_t size); + void blorp_fast_clear(struct blorp_batch *batch, const struct blorp_surf *surf, enum isl_format format, diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c index 35008cbbb0..b012a0a0b3 100644 --- a/src/intel/blorp/blorp_blit.c +++ b/src/intel/blorp/blorp_blit.c @@ -2513,3 +2513,122 @@ blorp_copy(struct blorp_batch *batch, do_blorp_blit(batch, ¶ms, &wm_prog_key, &coords); } + +static enum isl_format +isl_format_for_size(unsigned size_B) +{ + switch (size_B) { + case 1: return ISL_FORMAT_R8_UINT; + case 2: return ISL_FORMAT_R8G8_UINT; + case 4: return ISL_FORMAT_R8G8B8A8_UINT; + case 8: return ISL_FORMAT_R16G16B16A16_UINT; + case 16: return ISL_FORMAT_R32G32B32A32_UINT; + default: + unreachable("Not a power-of-two format size"); + } +} + +/** + * Returns the greatest common divisor of a and b that is a power of two. + */ +static uint64_t +gcd_pow2_u64(uint64_t a, uint64_t b) +{ + assert(a > 0 || b > 0); + + unsigned a_log2 = ffsll(a) - 1; + unsigned b_log2 = ffsll(b) - 1; + + /* If either a or b is 0, then a_log2 or b_log2 till be UINT_MAX in which + * case, the MIN2() will take the other one. If both are 0 then we will + * hit the assert above. + */ + return 1 << MIN2(a_log2, b_log2); +} + +static void +do_buffer_copy(struct blorp_batch *batch, + struct blorp_address *src, + struct blorp_address *dst, + int width, int height, int block_size) +{ + /* The actual format we pick doesn't matter as blorp will throw it away. + * The only thing that actually matters is the size. + */ + enum isl_format format = isl_format_for_size(block_size); + + UNUSED bool ok; + struct isl_surf surf; + ok = isl_surf_init(batch->blorp->isl_dev, &surf, + .dim = ISL_SURF_DIM_2D, + .format = format, + .width = width, + .height = height, + .depth = 1, + .levels = 1, + .array_len = 1, + .samples = 1, + .row_pitch = width * block_size, + .usage = ISL_SURF_USAGE_TEXTURE_BIT | + ISL_SURF_USAGE_RENDER_TARGET_BIT, + .tiling_flags = ISL_TILING_LINEAR_BIT); + assert(ok); + + struct blorp_surf src_blorp_surf = { + .surf = &surf, + .addr = *src, + }; + + struct blorp_surf dst_blorp_surf = { + .surf = &surf, + .addr = *dst, + }; + + blorp_copy(batch, &src_blorp_surf, 0, 0, &dst_blorp_surf, 0, 0, + 0, 0, 0, 0, width, height); +} + +/* This is maximum possible width/height our HW can handle */ +#define MAX_SURFACE_DIM (1ull << 14) + +void +blorp_buffer_copy(struct blorp_batch *batch, + struct blorp_address src, + struct blorp_address dst, + uint64_t size) +{ + uint64_t copy_size = size; + + /* First, we compute the biggest format that can be used with the + * given offsets and size. + */ + int bs = 16; + bs = gcd_pow2_u64(bs, src.offset); + bs = gcd_pow2_u64(bs, dst.offset); + bs = gcd_pow2_u64(bs, size); + + /* First, we make a bunch of max-sized copies */ + uint64_t max_copy_size = MAX_SURFACE_DIM * MAX_SURFACE_DIM * bs; + while (copy_size >= max_copy_size) { + do_buffer_copy(batch, &src, &dst, MAX_SURFACE_DIM, MAX_SURFACE_DIM, bs); + copy_size -= max_copy_size; + src.offset += max_copy_size; + dst.offset += max_copy_size; + } + + /* Now make a max-width copy */ + uint64_t height = copy_size / (MAX_SURFACE_DIM * bs); + assert(height < MAX_SURFACE_DIM); + if (height != 0) { + uint64_t rect_copy_size = height * MAX_SURFACE_DIM * bs; + do_buffer_copy(batch, &src, &dst, MAX_SURFACE_DIM, height, bs); + copy_size -= rect_copy_size; + src.offset += rect_copy_size; + dst.offset += rect_copy_size; + } + + /* Finally, make a small copy to finish it off */ + if (copy_size != 0) { + do_buffer_copy(batch, &src, &dst, copy_size / bs, 1, bs); + } +} diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c index c00d38b52c..3a64b60178 100644 --- a/src/intel/vulkan/anv_blorp.c +++ b/src/intel/vulkan/anv_blorp.c @@ -546,56 +546,6 @@ isl_format_for_size(unsigned size_B) } } -static void -do_buffer_copy(struct blorp_batch *batch, - struct anv_bo *src, uint64_t src_offset, - struct anv_bo *dst, uint64_t dst_offset, - int width, int height, int block_size) -{ - struct anv_device *device = batch->blorp->driver_ctx; - - /* The actual format we pick doesn't matter as blorp will throw it away. - * The only thing that actually matters is the size. - */ - enum isl_format format = isl_format_for_size(block_size); - - UNUSED bool ok; - struct isl_surf surf; - ok = isl_surf_init(&device->isl_dev, &surf, - .dim = ISL_SURF_DIM_2D, - .format = format, - .width = width, - .height = height, - .depth = 1, - .levels = 1, - .array_len = 1, - .samples = 1, - .row_pitch = width * block_size, - .usage = ISL_SURF_USAGE_TEXTURE_BIT | - ISL_SURF_USAGE_RENDER_TARGET_BIT, - .tiling_flags = ISL_TILING_LINEAR_BIT); - assert(ok); - - struct blorp_surf src_blorp_surf = { - .surf = &surf, - .addr = { - .buffer = src, - .offset = src_offset, - }, - }; - - struct blorp_surf dst_blorp_surf = { - .surf = &surf, - .addr = { - .buffer = dst, - .offset = dst_offset, - }, - }; - - blorp_copy(batch, &src_blorp_surf, 0, 0, &dst_blorp_surf, 0, 0, - 0, 0, 0, 0, width, height); -} - /** * Returns the greatest common divisor of a and b that is a power of two. */ @@ -632,48 +582,16 @@ void anv_CmdCopyBuffer( blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0); for (unsigned r = 0; r < regionCount; r++) { - uint64_t src_offset = src_buffer->offset + pRegions[r].srcOffset; - uint64_t dst_offset = dst_buffer->offset + pRegions[r].dstOffset; - uint64_t copy_size = pRegions[r].size; - - /* First, we compute the biggest format that can be used with the - * given offsets and size. - */ - int bs = 16; - bs = gcd_pow2_u64(bs, src_offset); - bs = gcd_pow2_u64(bs, dst_offset); - bs = gcd_pow2_u64(bs, pRegions[r].size); - - /* First, we make a bunch of max-sized copies */ - uint64_t max_copy_size = MAX_SURFACE_DIM * MAX_SURFACE_DIM * bs; - while (copy_size >= max_copy_size) { - do_buffer_copy(&batch, src_buffer->bo, src_offset, - dst_buffer->bo, dst_offset, - MAX_SURFACE_DIM, MAX_SURFACE_DIM, bs); - copy_size -= max_copy_size; - src_offset += max_copy_size; - dst_offset += max_copy_size; - } - - /* Now make a max-width copy */ - uint64_t height = copy_size / (MAX_SURFACE_DIM * bs); - assert(height < MAX_SURFACE_DIM); - if (height != 0) { - uint64_t rect_copy_size = height * MAX_SURFACE_DIM * bs; - do_buffer_copy(&batch, src_buffer->bo, src_offset, - dst_buffer->bo, dst_offset, - MAX_SURFACE_DIM, height, bs); - copy_size -= rect_copy_size; - src_offset += rect_copy_size; - dst_offset += rect_copy_size; - } + struct blorp_address src = { + .buffer = src_buffer->bo, + .offset = src_buffer->offset + pRegions[r].srcOffset, + }; + struct blorp_address dst = { + .buffer = dst_buffer->bo, + .offset = dst_buffer->offset + pRegions[r].dstOffset, + }; - /* Finally, make a small copy to finish it off */ - if (copy_size != 0) { - do_buffer_copy(&batch, src_buffer->bo, src_offset, - dst_buffer->bo, dst_offset, - copy_size / bs, 1, bs); - } + blorp_buffer_copy(&batch, src, dst, pRegions[r].size); } blorp_batch_finish(&batch); @@ -715,15 +633,16 @@ void anv_CmdUpdateBuffer( anv_state_flush(cmd_buffer->device, tmp_data); - int bs = 16; - bs = gcd_pow2_u64(bs, dstOffset); - bs = gcd_pow2_u64(bs, copy_size); + struct blorp_address src = { + .buffer = &cmd_buffer->device->dynamic_state_pool.block_pool.bo, + .offset = tmp_data.offset, + }; + struct blorp_address dst = { + .buffer = dst_buffer->bo, + .offset = dst_buffer->offset + dstOffset, + }; - do_buffer_copy(&batch, - &cmd_buffer->device->dynamic_state_pool.block_pool.bo, - tmp_data.offset, - dst_buffer->bo, dst_buffer->offset + dstOffset, - copy_size / bs, 1, bs); + blorp_buffer_copy(&batch, src, dst, copy_size); dataSize -= copy_size; dstOffset += copy_size; -- 2.14.3