gui/SOURCES/0003-blorp-Turn-anv_CmdCopy...

From d9266ae66c9db12a9c2578a33bbd5ebd131b489f Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Mon, 28 Aug 2017 15:57:20 -0700
Subject: [PATCH 3/5] blorp: Turn anv_CmdCopyBuffer into a blorp_buffer_copy()
 helper.

I want to be able to copy between buffer objects using BLORP in the i965
driver.  Anvil already had code to do this, in a reasonably efficient
manner - first using large bpp copies, then smaller bpp copies.

This patch moves that logic into BLORP as blorp_buffer_copy(), so we
can use it in both drivers.

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Signed-off-by: Lyude <lyude@redhat.com>
---
 src/intel/blorp/blorp.h      |   6 +++
 src/intel/blorp/blorp_blit.c | 119 +++++++++++++++++++++++++++++++++++++++++++
 src/intel/vulkan/anv_blorp.c | 117 +++++++-----------------------------------
 3 files changed, 143 insertions(+), 99 deletions(-)

diff --git a/src/intel/blorp/blorp.h b/src/intel/blorp/blorp.h
index d19920e87f..e712b4fbb3 100644
--- a/src/intel/blorp/blorp.h
+++ b/src/intel/blorp/blorp.h
@@ -133,6 +133,12 @@ blorp_copy(struct blorp_batch *batch,
            uint32_t dst_x, uint32_t dst_y,
            uint32_t src_width, uint32_t src_height);
 
+void
+blorp_buffer_copy(struct blorp_batch *batch,
+                  struct blorp_address src,
+                  struct blorp_address dst,
+                  uint64_t size);
+
 void
 blorp_fast_clear(struct blorp_batch *batch,
                  const struct blorp_surf *surf, enum isl_format format,
diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c
index 35008cbbb0..b012a0a0b3 100644
--- a/src/intel/blorp/blorp_blit.c
+++ b/src/intel/blorp/blorp_blit.c
@@ -2513,3 +2513,122 @@ blorp_copy(struct blorp_batch *batch,
 
    do_blorp_blit(batch, &params, &wm_prog_key, &coords);
 }
+
+static enum isl_format
+isl_format_for_size(unsigned size_B)
+{
+   switch (size_B) {
+   case 1:  return ISL_FORMAT_R8_UINT;
+   case 2:  return ISL_FORMAT_R8G8_UINT;
+   case 4:  return ISL_FORMAT_R8G8B8A8_UINT;
+   case 8:  return ISL_FORMAT_R16G16B16A16_UINT;
+   case 16: return ISL_FORMAT_R32G32B32A32_UINT;
+   default:
+      unreachable("Not a power-of-two format size");
+   }
+}
+
+/**
+ * Returns the greatest common divisor of a and b that is a power of two.
+ */
+static uint64_t
+gcd_pow2_u64(uint64_t a, uint64_t b)
+{
+   assert(a > 0 || b > 0);
+
+   unsigned a_log2 = ffsll(a) - 1;
+   unsigned b_log2 = ffsll(b) - 1;
+
+   /* If either a or b is 0, then a_log2 or b_log2 till be UINT_MAX in which
+    * case, the MIN2() will take the other one.  If both are 0 then we will
+    * hit the assert above.
+    */
+   return 1 << MIN2(a_log2, b_log2);
+}
+
+static void
+do_buffer_copy(struct blorp_batch *batch,
+               struct blorp_address *src,
+               struct blorp_address *dst,
+               int width, int height, int block_size)
+{
+   /* The actual format we pick doesn't matter as blorp will throw it away.
+    * The only thing that actually matters is the size.
+    */
+   enum isl_format format = isl_format_for_size(block_size);
+
+   UNUSED bool ok;
+   struct isl_surf surf;
+   ok = isl_surf_init(batch->blorp->isl_dev, &surf,
+                      .dim = ISL_SURF_DIM_2D,
+                      .format = format,
+                      .width = width,
+                      .height = height,
+                      .depth = 1,
+                      .levels = 1,
+                      .array_len = 1,
+                      .samples = 1,
+                      .row_pitch = width * block_size,
+                      .usage = ISL_SURF_USAGE_TEXTURE_BIT |
+                               ISL_SURF_USAGE_RENDER_TARGET_BIT,
+                      .tiling_flags = ISL_TILING_LINEAR_BIT);
+   assert(ok);
+
+   struct blorp_surf src_blorp_surf = {
+      .surf = &surf,
+      .addr = *src,
+   };
+
+   struct blorp_surf dst_blorp_surf = {
+      .surf = &surf,
+      .addr = *dst,
+   };
+
+   blorp_copy(batch, &src_blorp_surf, 0, 0, &dst_blorp_surf, 0, 0,
+              0, 0, 0, 0, width, height);
+}
+
+/* This is maximum possible width/height our HW can handle */
+#define MAX_SURFACE_DIM (1ull << 14)
+
+void
+blorp_buffer_copy(struct blorp_batch *batch,
+                  struct blorp_address src,
+                  struct blorp_address dst,
+                  uint64_t size)
+{
+   uint64_t copy_size = size;
+
+   /* First, we compute the biggest format that can be used with the
+    * given offsets and size.
+    */
+   int bs = 16;
+   bs = gcd_pow2_u64(bs, src.offset);
+   bs = gcd_pow2_u64(bs, dst.offset);
+   bs = gcd_pow2_u64(bs, size);
+
+   /* First, we make a bunch of max-sized copies */
+   uint64_t max_copy_size = MAX_SURFACE_DIM * MAX_SURFACE_DIM * bs;
+   while (copy_size >= max_copy_size) {
+      do_buffer_copy(batch, &src, &dst, MAX_SURFACE_DIM, MAX_SURFACE_DIM, bs);
+      copy_size -= max_copy_size;
+      src.offset += max_copy_size;
+      dst.offset += max_copy_size;
+   }
+
+   /* Now make a max-width copy */
+   uint64_t height = copy_size / (MAX_SURFACE_DIM * bs);
+   assert(height < MAX_SURFACE_DIM);
+   if (height != 0) {
+      uint64_t rect_copy_size = height * MAX_SURFACE_DIM * bs;
+      do_buffer_copy(batch, &src, &dst, MAX_SURFACE_DIM, height, bs);
+      copy_size -= rect_copy_size;
+      src.offset += rect_copy_size;
+      dst.offset += rect_copy_size;
+   }
+
+   /* Finally, make a small copy to finish it off */
+   if (copy_size != 0) {
+      do_buffer_copy(batch, &src, &dst, copy_size / bs, 1, bs);
+   }
+}
diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
index c00d38b52c..3a64b60178 100644
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@@ -546,56 +546,6 @@ isl_format_for_size(unsigned size_B)
    }
 }
 
-static void
-do_buffer_copy(struct blorp_batch *batch,
-               struct anv_bo *src, uint64_t src_offset,
-               struct anv_bo *dst, uint64_t dst_offset,
-               int width, int height, int block_size)
-{
-   struct anv_device *device = batch->blorp->driver_ctx;
-
-   /* The actual format we pick doesn't matter as blorp will throw it away.
-    * The only thing that actually matters is the size.
-    */
-   enum isl_format format = isl_format_for_size(block_size);
-
-   UNUSED bool ok;
-   struct isl_surf surf;
-   ok = isl_surf_init(&device->isl_dev, &surf,
-                      .dim = ISL_SURF_DIM_2D,
-                      .format = format,
-                      .width = width,
-                      .height = height,
-                      .depth = 1,
-                      .levels = 1,
-                      .array_len = 1,
-                      .samples = 1,
-                      .row_pitch = width * block_size,
-                      .usage = ISL_SURF_USAGE_TEXTURE_BIT |
-                               ISL_SURF_USAGE_RENDER_TARGET_BIT,
-                      .tiling_flags = ISL_TILING_LINEAR_BIT);
-   assert(ok);
-
-   struct blorp_surf src_blorp_surf = {
-      .surf = &surf,
-      .addr = {
-         .buffer = src,
-         .offset = src_offset,
-      },
-   };
-
-   struct blorp_surf dst_blorp_surf = {
-      .surf = &surf,
-      .addr = {
-         .buffer = dst,
-         .offset = dst_offset,
-      },
-   };
-
-   blorp_copy(batch, &src_blorp_surf, 0, 0, &dst_blorp_surf, 0, 0,
-              0, 0, 0, 0, width, height);
-}
-
 /**
  * Returns the greatest common divisor of a and b that is a power of two.
  */
@@ -632,48 +582,16 @@ void anv_CmdCopyBuffer(
    blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
 
    for (unsigned r = 0; r < regionCount; r++) {
-      uint64_t src_offset = src_buffer->offset + pRegions[r].srcOffset;
-      uint64_t dst_offset = dst_buffer->offset + pRegions[r].dstOffset;
-      uint64_t copy_size = pRegions[r].size;
-
-      /* First, we compute the biggest format that can be used with the
-       * given offsets and size.
-       */
-      int bs = 16;
-      bs = gcd_pow2_u64(bs, src_offset);
-      bs = gcd_pow2_u64(bs, dst_offset);
-      bs = gcd_pow2_u64(bs, pRegions[r].size);
-
-      /* First, we make a bunch of max-sized copies */
-      uint64_t max_copy_size = MAX_SURFACE_DIM * MAX_SURFACE_DIM * bs;
-      while (copy_size >= max_copy_size) {
-         do_buffer_copy(&batch, src_buffer->bo, src_offset,
-                        dst_buffer->bo, dst_offset,
-                        MAX_SURFACE_DIM, MAX_SURFACE_DIM, bs);
-         copy_size -= max_copy_size;
-         src_offset += max_copy_size;
-         dst_offset += max_copy_size;
-      }
-
-      /* Now make a max-width copy */
-      uint64_t height = copy_size / (MAX_SURFACE_DIM * bs);
-      assert(height < MAX_SURFACE_DIM);
-      if (height != 0) {
-         uint64_t rect_copy_size = height * MAX_SURFACE_DIM * bs;
-         do_buffer_copy(&batch, src_buffer->bo, src_offset,
-                        dst_buffer->bo, dst_offset,
-                        MAX_SURFACE_DIM, height, bs);
-         copy_size -= rect_copy_size;
-         src_offset += rect_copy_size;
-         dst_offset += rect_copy_size;
-      }
+      struct blorp_address src = {
+         .buffer = src_buffer->bo,
+         .offset = src_buffer->offset + pRegions[r].srcOffset,
+      };
+      struct blorp_address dst = {
+         .buffer = dst_buffer->bo,
+         .offset = dst_buffer->offset + pRegions[r].dstOffset,
+      };
 
-      /* Finally, make a small copy to finish it off */
-      if (copy_size != 0) {
-         do_buffer_copy(&batch, src_buffer->bo, src_offset,
-                        dst_buffer->bo, dst_offset,
-                        copy_size / bs, 1, bs);
-      }
+      blorp_buffer_copy(&batch, src, dst, pRegions[r].size);
    }
 
    blorp_batch_finish(&batch);
@@ -715,15 +633,16 @@ void anv_CmdUpdateBuffer(
 
       anv_state_flush(cmd_buffer->device, tmp_data);
 
-      int bs = 16;
-      bs = gcd_pow2_u64(bs, dstOffset);
-      bs = gcd_pow2_u64(bs, copy_size);
+      struct blorp_address src = {
+         .buffer = &cmd_buffer->device->dynamic_state_pool.block_pool.bo,
+         .offset = tmp_data.offset,
+      };
+      struct blorp_address dst = {
+         .buffer = dst_buffer->bo,
+         .offset = dst_buffer->offset + dstOffset,
+      };
 
-      do_buffer_copy(&batch,
-                     &cmd_buffer->device->dynamic_state_pool.block_pool.bo,
-                     tmp_data.offset,
-                     dst_buffer->bo, dst_buffer->offset + dstOffset,
-                     copy_size / bs, 1, bs);
+      blorp_buffer_copy(&batch, src, dst, copy_size);
 
       dataSize -= copy_size;
       dstOffset += copy_size;
-- 
2.14.3
mesa package update Signed-off-by: guibuilder_pel7x64builder0 <guibuilder@powerel.org> 7 years ago			`From d9266ae66c9db12a9c2578a33bbd5ebd131b489f Mon Sep 17 00:00:00 2001`
			`From: Kenneth Graunke <kenneth@whitecape.org>`
			`Date: Mon, 28 Aug 2017 15:57:20 -0700`
			`Subject: [PATCH 3/5] blorp: Turn anv_CmdCopyBuffer into a blorp_buffer_copy()`
			`helper.`

			`I want to be able to copy between buffer objects using BLORP in the i965`
			`driver. Anvil already had code to do this, in a reasonably efficient`
			`manner - first using large bpp copies, then smaller bpp copies.`

			`This patch moves that logic into BLORP as blorp_buffer_copy(), so we`
			`can use it in both drivers.`

			`Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>`
			`Signed-off-by: Lyude <lyude@redhat.com>`
			`---`
			`src/intel/blorp/blorp.h \| 6 +++`
			`src/intel/blorp/blorp_blit.c \| 119 +++++++++++++++++++++++++++++++++++++++++++`
			`src/intel/vulkan/anv_blorp.c \| 117 +++++++-----------------------------------`
			`3 files changed, 143 insertions(+), 99 deletions(-)`

			`diff --git a/src/intel/blorp/blorp.h b/src/intel/blorp/blorp.h`
			`index d19920e87f..e712b4fbb3 100644`
			`--- a/src/intel/blorp/blorp.h`
			`+++ b/src/intel/blorp/blorp.h`
			`@@ -133,6 +133,12 @@ blorp_copy(struct blorp_batch *batch,`
			`uint32_t dst_x, uint32_t dst_y,`
			`uint32_t src_width, uint32_t src_height);`

			`+void`
			`+blorp_buffer_copy(struct blorp_batch *batch,`
			`+ struct blorp_address src,`
			`+ struct blorp_address dst,`
			`+ uint64_t size);`
			`+`
			`void`
			`blorp_fast_clear(struct blorp_batch *batch,`
			`const struct blorp_surf *surf, enum isl_format format,`
			`diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c`
			`index 35008cbbb0..b012a0a0b3 100644`
			`--- a/src/intel/blorp/blorp_blit.c`
			`+++ b/src/intel/blorp/blorp_blit.c`
			`@@ -2513,3 +2513,122 @@ blorp_copy(struct blorp_batch *batch,`

			`do_blorp_blit(batch, &params, &wm_prog_key, &coords);`
			`}`
			`+`
			`+static enum isl_format`
			`+isl_format_for_size(unsigned size_B)`
			`+{`
			`+ switch (size_B) {`
			`+ case 1: return ISL_FORMAT_R8_UINT;`
			`+ case 2: return ISL_FORMAT_R8G8_UINT;`
			`+ case 4: return ISL_FORMAT_R8G8B8A8_UINT;`
			`+ case 8: return ISL_FORMAT_R16G16B16A16_UINT;`
			`+ case 16: return ISL_FORMAT_R32G32B32A32_UINT;`
			`+ default:`
			`+ unreachable("Not a power-of-two format size");`
			`+ }`
			`+}`
			`+`
			`+/**`
			`+ * Returns the greatest common divisor of a and b that is a power of two.`
			`+ */`
			`+static uint64_t`
			`+gcd_pow2_u64(uint64_t a, uint64_t b)`
			`+{`
			`+ assert(a > 0 \|\| b > 0);`
			`+`
			`+ unsigned a_log2 = ffsll(a) - 1;`
			`+ unsigned b_log2 = ffsll(b) - 1;`
			`+`
			`+ /* If either a or b is 0, then a_log2 or b_log2 till be UINT_MAX in which`
			`+ * case, the MIN2() will take the other one. If both are 0 then we will`
			`+ * hit the assert above.`
			`+ */`
			`+ return 1 << MIN2(a_log2, b_log2);`
			`+}`
			`+`
			`+static void`
			`+do_buffer_copy(struct blorp_batch *batch,`
			`+ struct blorp_address *src,`
			`+ struct blorp_address *dst,`
			`+ int width, int height, int block_size)`
			`+{`
			`+ /* The actual format we pick doesn't matter as blorp will throw it away.`
			`+ * The only thing that actually matters is the size.`
			`+ */`
			`+ enum isl_format format = isl_format_for_size(block_size);`
			`+`
			`+ UNUSED bool ok;`
			`+ struct isl_surf surf;`
			`+ ok = isl_surf_init(batch->blorp->isl_dev, &surf,`
			`+ .dim = ISL_SURF_DIM_2D,`
			`+ .format = format,`
			`+ .width = width,`
			`+ .height = height,`
			`+ .depth = 1,`
			`+ .levels = 1,`
			`+ .array_len = 1,`
			`+ .samples = 1,`
			`+ .row_pitch = width * block_size,`
			`+ .usage = ISL_SURF_USAGE_TEXTURE_BIT \|`
			`+ ISL_SURF_USAGE_RENDER_TARGET_BIT,`
			`+ .tiling_flags = ISL_TILING_LINEAR_BIT);`
			`+ assert(ok);`
			`+`
			`+ struct blorp_surf src_blorp_surf = {`
			`+ .surf = &surf,`
			`+ .addr = *src,`
			`+ };`
			`+`
			`+ struct blorp_surf dst_blorp_surf = {`
			`+ .surf = &surf,`
			`+ .addr = *dst,`
			`+ };`
			`+`
			`+ blorp_copy(batch, &src_blorp_surf, 0, 0, &dst_blorp_surf, 0, 0,`
			`+ 0, 0, 0, 0, width, height);`
			`+}`
			`+`
			`+/* This is maximum possible width/height our HW can handle */`
			`+#define MAX_SURFACE_DIM (1ull << 14)`
			`+`
			`+void`
			`+blorp_buffer_copy(struct blorp_batch *batch,`
			`+ struct blorp_address src,`
			`+ struct blorp_address dst,`
			`+ uint64_t size)`
			`+{`
			`+ uint64_t copy_size = size;`
			`+`
			`+ /* First, we compute the biggest format that can be used with the`
			`+ * given offsets and size.`
			`+ */`
			`+ int bs = 16;`
			`+ bs = gcd_pow2_u64(bs, src.offset);`
			`+ bs = gcd_pow2_u64(bs, dst.offset);`
			`+ bs = gcd_pow2_u64(bs, size);`
			`+`
			`+ /* First, we make a bunch of max-sized copies */`
			`+ uint64_t max_copy_size = MAX_SURFACE_DIM * MAX_SURFACE_DIM * bs;`
			`+ while (copy_size >= max_copy_size) {`
			`+ do_buffer_copy(batch, &src, &dst, MAX_SURFACE_DIM, MAX_SURFACE_DIM, bs);`
			`+ copy_size -= max_copy_size;`
			`+ src.offset += max_copy_size;`
			`+ dst.offset += max_copy_size;`
			`+ }`
			`+`
			`+ /* Now make a max-width copy */`
			`+ uint64_t height = copy_size / (MAX_SURFACE_DIM * bs);`
			`+ assert(height < MAX_SURFACE_DIM);`
			`+ if (height != 0) {`
			`+ uint64_t rect_copy_size = height * MAX_SURFACE_DIM * bs;`
			`+ do_buffer_copy(batch, &src, &dst, MAX_SURFACE_DIM, height, bs);`
			`+ copy_size -= rect_copy_size;`
			`+ src.offset += rect_copy_size;`
			`+ dst.offset += rect_copy_size;`
			`+ }`
			`+`
			`+ /* Finally, make a small copy to finish it off */`
			`+ if (copy_size != 0) {`
			`+ do_buffer_copy(batch, &src, &dst, copy_size / bs, 1, bs);`
			`+ }`
			`+}`
			`diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c`
			`index c00d38b52c..3a64b60178 100644`
			`--- a/src/intel/vulkan/anv_blorp.c`
			`+++ b/src/intel/vulkan/anv_blorp.c`
			`@@ -546,56 +546,6 @@ isl_format_for_size(unsigned size_B)`
			`}`
			`}`

			`-static void`
			`-do_buffer_copy(struct blorp_batch *batch,`
			`- struct anv_bo *src, uint64_t src_offset,`
			`- struct anv_bo *dst, uint64_t dst_offset,`
			`- int width, int height, int block_size)`
			`-{`
			`- struct anv_device *device = batch->blorp->driver_ctx;`
			`-`
			`- /* The actual format we pick doesn't matter as blorp will throw it away.`
			`- * The only thing that actually matters is the size.`
			`- */`
			`- enum isl_format format = isl_format_for_size(block_size);`
			`-`
			`- UNUSED bool ok;`
			`- struct isl_surf surf;`
			`- ok = isl_surf_init(&device->isl_dev, &surf,`
			`- .dim = ISL_SURF_DIM_2D,`
			`- .format = format,`
			`- .width = width,`
			`- .height = height,`
			`- .depth = 1,`
			`- .levels = 1,`
			`- .array_len = 1,`
			`- .samples = 1,`
			`- .row_pitch = width * block_size,`
			`- .usage = ISL_SURF_USAGE_TEXTURE_BIT \|`
			`- ISL_SURF_USAGE_RENDER_TARGET_BIT,`
			`- .tiling_flags = ISL_TILING_LINEAR_BIT);`
			`- assert(ok);`
			`-`
			`- struct blorp_surf src_blorp_surf = {`
			`- .surf = &surf,`
			`- .addr = {`
			`- .buffer = src,`
			`- .offset = src_offset,`
			`- },`
			`- };`
			`-`
			`- struct blorp_surf dst_blorp_surf = {`
			`- .surf = &surf,`
			`- .addr = {`
			`- .buffer = dst,`
			`- .offset = dst_offset,`
			`- },`
			`- };`
			`-`
			`- blorp_copy(batch, &src_blorp_surf, 0, 0, &dst_blorp_surf, 0, 0,`
			`- 0, 0, 0, 0, width, height);`
			`-}`
			`-`
			`/**`
			`* Returns the greatest common divisor of a and b that is a power of two.`
			`*/`
			`@@ -632,48 +582,16 @@ void anv_CmdCopyBuffer(`
			`blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);`

			`for (unsigned r = 0; r < regionCount; r++) {`
			`- uint64_t src_offset = src_buffer->offset + pRegions[r].srcOffset;`
			`- uint64_t dst_offset = dst_buffer->offset + pRegions[r].dstOffset;`
			`- uint64_t copy_size = pRegions[r].size;`
			`-`
			`- /* First, we compute the biggest format that can be used with the`
			`- * given offsets and size.`
			`- */`
			`- int bs = 16;`
			`- bs = gcd_pow2_u64(bs, src_offset);`
			`- bs = gcd_pow2_u64(bs, dst_offset);`
			`- bs = gcd_pow2_u64(bs, pRegions[r].size);`
			`-`
			`- /* First, we make a bunch of max-sized copies */`
			`- uint64_t max_copy_size = MAX_SURFACE_DIM * MAX_SURFACE_DIM * bs;`
			`- while (copy_size >= max_copy_size) {`
			`- do_buffer_copy(&batch, src_buffer->bo, src_offset,`
			`- dst_buffer->bo, dst_offset,`
			`- MAX_SURFACE_DIM, MAX_SURFACE_DIM, bs);`
			`- copy_size -= max_copy_size;`
			`- src_offset += max_copy_size;`
			`- dst_offset += max_copy_size;`
			`- }`
			`-`
			`- /* Now make a max-width copy */`
			`- uint64_t height = copy_size / (MAX_SURFACE_DIM * bs);`
			`- assert(height < MAX_SURFACE_DIM);`
			`- if (height != 0) {`
			`- uint64_t rect_copy_size = height * MAX_SURFACE_DIM * bs;`
			`- do_buffer_copy(&batch, src_buffer->bo, src_offset,`
			`- dst_buffer->bo, dst_offset,`
			`- MAX_SURFACE_DIM, height, bs);`
			`- copy_size -= rect_copy_size;`
			`- src_offset += rect_copy_size;`
			`- dst_offset += rect_copy_size;`
			`- }`
			`+ struct blorp_address src = {`
			`+ .buffer = src_buffer->bo,`
			`+ .offset = src_buffer->offset + pRegions[r].srcOffset,`
			`+ };`
			`+ struct blorp_address dst = {`
			`+ .buffer = dst_buffer->bo,`
			`+ .offset = dst_buffer->offset + pRegions[r].dstOffset,`
			`+ };`

			`- /* Finally, make a small copy to finish it off */`
			`- if (copy_size != 0) {`
			`- do_buffer_copy(&batch, src_buffer->bo, src_offset,`
			`- dst_buffer->bo, dst_offset,`
			`- copy_size / bs, 1, bs);`
			`- }`
			`+ blorp_buffer_copy(&batch, src, dst, pRegions[r].size);`
			`}`

			`blorp_batch_finish(&batch);`
			`@@ -715,15 +633,16 @@ void anv_CmdUpdateBuffer(`

			`anv_state_flush(cmd_buffer->device, tmp_data);`

			`- int bs = 16;`
			`- bs = gcd_pow2_u64(bs, dstOffset);`
			`- bs = gcd_pow2_u64(bs, copy_size);`
			`+ struct blorp_address src = {`
			`+ .buffer = &cmd_buffer->device->dynamic_state_pool.block_pool.bo,`
			`+ .offset = tmp_data.offset,`
			`+ };`
			`+ struct blorp_address dst = {`
			`+ .buffer = dst_buffer->bo,`
			`+ .offset = dst_buffer->offset + dstOffset,`
			`+ };`

			`- do_buffer_copy(&batch,`
			`- &cmd_buffer->device->dynamic_state_pool.block_pool.bo,`
			`- tmp_data.offset,`
			`- dst_buffer->bo, dst_buffer->offset + dstOffset,`
			`- copy_size / bs, 1, bs);`
			`+ blorp_buffer_copy(&batch, src, dst, copy_size);`

			`dataSize -= copy_size;`
			`dstOffset += copy_size;`
			`--`
			`2.14.3`