Re: [Mesa-dev] [PATCH v3 06/16] anv/gpu_memcpy: Add a lighter-weight GPU memcpy function

2017-07-10 Thread Nanley Chery
On Mon, Jul 10, 2017 at 09:35:25AM -0700, Jason Ekstrand wrote:
> On Wed, Jun 28, 2017 at 2:14 PM, Nanley Chery  wrote:
> 
> > We'll be performing a GPU memcpy in more places to copy small amounts of
> > data. Add an alternate function that thrashes less state.
> >
> > v2:
> > - Make a new function (Jason Ekstrand).
> > - Move the #define into the function.
> > v3:
> > - Update the function name (Jason).
> > - Update comments.
> >
> > Signed-off-by: Nanley Chery 
> > ---
> >  src/intel/vulkan/anv_genX.h|  5 +
> >  src/intel/vulkan/genX_gpu_memcpy.c | 40 ++
> > 
> >  2 files changed, 45 insertions(+)
> >
> > diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h
> > index 8da5e075dc..0b7322e281 100644
> > --- a/src/intel/vulkan/anv_genX.h
> > +++ b/src/intel/vulkan/anv_genX.h
> > @@ -69,5 +69,10 @@ void genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer
> > *cmd_buffer,
> >  struct anv_bo *src, uint32_t src_offset,
> >  uint32_t size);
> >
> > +void genX(cmd_buffer_mi_memcpy)(struct anv_cmd_buffer *cmd_buffer,
> > +struct anv_bo *dst, uint32_t dst_offset,
> > +struct anv_bo *src, uint32_t src_offset,
> > +uint32_t size);
> > +
> >  void genX(blorp_exec)(struct blorp_batch *batch,
> >const struct blorp_params *params);
> > diff --git a/src/intel/vulkan/genX_gpu_memcpy.c
> > b/src/intel/vulkan/genX_gpu_memcpy.c
> > index 5ef35e6283..9c6b46de94 100644
> > --- a/src/intel/vulkan/genX_gpu_memcpy.c
> > +++ b/src/intel/vulkan/genX_gpu_memcpy.c
> > @@ -52,6 +52,46 @@ gcd_pow2_u64(uint64_t a, uint64_t b)
> >  }
> >
> >  void
> > +genX(cmd_buffer_mi_memcpy)(struct anv_cmd_buffer *cmd_buffer,
> > +   struct anv_bo *dst, uint32_t dst_offset,
> > +   struct anv_bo *src, uint32_t src_offset,
> > +   uint32_t size)
> > +{
> > +   /* This memcpy operates in units of dwords. */
> > +   assert(size % 4 == 0);
> > +   assert(dst_offset % 4 == 0);
> > +   assert(src_offset % 4 == 0);
> > +
> > +   for (uint32_t i = 0; i < size; i += 4) {
> > +  const struct anv_address src_addr =
> > + (struct anv_address) { src, src_offset + i};
> > +  const struct anv_address dst_addr =
> > + (struct anv_address) { dst, dst_offset + i};
> > +#if GEN_GEN >= 8
> > +  anv_batch_emit(_buffer->batch, GENX(MI_COPY_MEM_MEM), cp) {
> > + cp.DestinationMemoryAddress = dst_addr;
> > + cp.SourceMemoryAddress = src_addr;
> > +  }
> > +#else
> > +  /* IVB does not have a general purpose register for command streamer
> > +   * commands. Therefore, we use an alternate temporary register.
> > +   */
> > +#define TEMP_REG 0x2400 /* MI_PREDICATE_SRC0 */
> >
> 
> Using the predicate register seems a bit sketchy.  Vulkan doesn't support
> predication today so it's probably safe but I don't know what form
> predication will take in the future (there's a decent chance it'll get
> added) so I have no idea if this will end up being safe.  Why not use one
> of the indirect dispatch/draw registers?  Those will be safe because we
> only ever set them immediately before 3DPRIMITIVE or GPGPU_WALKER.
> 
> --Jason
> 
> 

I don't mind using any alternate register, so long as it doesn't lose
any bits (like the SO_WRITE_OFFSET registers). The register, Load
Indirect Base Vertex (0x2440), looks like it will work just fine.

Thanks,
Nanley

> > +  anv_batch_emit(_buffer->batch, GENX(MI_LOAD_REGISTER_MEM),
> > load) {
> > + load.RegisterAddress = TEMP_REG;
> > + load.MemoryAddress = src_addr;
> > +  }
> > +  anv_batch_emit(_buffer->batch, GENX(MI_STORE_REGISTER_MEM),
> > store) {
> > + store.RegisterAddress = TEMP_REG;
> > + store.MemoryAddress = dst_addr;
> > +  }
> > +#undef TEMP_REG
> > +#endif
> > +   }
> > +   return;
> > +}
> > +
> > +void
> >  genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
> > struct anv_bo *dst, uint32_t dst_offset,
> > struct anv_bo *src, uint32_t src_offset,
> > --
> > 2.13.1
> >
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> >
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 06/16] anv/gpu_memcpy: Add a lighter-weight GPU memcpy function

2017-07-10 Thread Jason Ekstrand
On Wed, Jun 28, 2017 at 2:14 PM, Nanley Chery  wrote:

> We'll be performing a GPU memcpy in more places to copy small amounts of
> data. Add an alternate function that thrashes less state.
>
> v2:
> - Make a new function (Jason Ekstrand).
> - Move the #define into the function.
> v3:
> - Update the function name (Jason).
> - Update comments.
>
> Signed-off-by: Nanley Chery 
> ---
>  src/intel/vulkan/anv_genX.h|  5 +
>  src/intel/vulkan/genX_gpu_memcpy.c | 40 ++
> 
>  2 files changed, 45 insertions(+)
>
> diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h
> index 8da5e075dc..0b7322e281 100644
> --- a/src/intel/vulkan/anv_genX.h
> +++ b/src/intel/vulkan/anv_genX.h
> @@ -69,5 +69,10 @@ void genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer
> *cmd_buffer,
>  struct anv_bo *src, uint32_t src_offset,
>  uint32_t size);
>
> +void genX(cmd_buffer_mi_memcpy)(struct anv_cmd_buffer *cmd_buffer,
> +struct anv_bo *dst, uint32_t dst_offset,
> +struct anv_bo *src, uint32_t src_offset,
> +uint32_t size);
> +
>  void genX(blorp_exec)(struct blorp_batch *batch,
>const struct blorp_params *params);
> diff --git a/src/intel/vulkan/genX_gpu_memcpy.c
> b/src/intel/vulkan/genX_gpu_memcpy.c
> index 5ef35e6283..9c6b46de94 100644
> --- a/src/intel/vulkan/genX_gpu_memcpy.c
> +++ b/src/intel/vulkan/genX_gpu_memcpy.c
> @@ -52,6 +52,46 @@ gcd_pow2_u64(uint64_t a, uint64_t b)
>  }
>
>  void
> +genX(cmd_buffer_mi_memcpy)(struct anv_cmd_buffer *cmd_buffer,
> +   struct anv_bo *dst, uint32_t dst_offset,
> +   struct anv_bo *src, uint32_t src_offset,
> +   uint32_t size)
> +{
> +   /* This memcpy operates in units of dwords. */
> +   assert(size % 4 == 0);
> +   assert(dst_offset % 4 == 0);
> +   assert(src_offset % 4 == 0);
> +
> +   for (uint32_t i = 0; i < size; i += 4) {
> +  const struct anv_address src_addr =
> + (struct anv_address) { src, src_offset + i};
> +  const struct anv_address dst_addr =
> + (struct anv_address) { dst, dst_offset + i};
> +#if GEN_GEN >= 8
> +  anv_batch_emit(_buffer->batch, GENX(MI_COPY_MEM_MEM), cp) {
> + cp.DestinationMemoryAddress = dst_addr;
> + cp.SourceMemoryAddress = src_addr;
> +  }
> +#else
> +  /* IVB does not have a general purpose register for command streamer
> +   * commands. Therefore, we use an alternate temporary register.
> +   */
> +#define TEMP_REG 0x2400 /* MI_PREDICATE_SRC0 */
>

Using the predicate register seems a bit sketchy.  Vulkan doesn't support
predication today so it's probably safe but I don't know what form
predication will take in the future (there's a decent chance it'll get
added) so I have no idea if this will end up being safe.  Why not use one
of the indirect dispatch/draw registers?  Those will be safe because we
only ever set them immediately before 3DPRIMITIVE or GPGPU_WALKER.

--Jason


> +  anv_batch_emit(_buffer->batch, GENX(MI_LOAD_REGISTER_MEM),
> load) {
> + load.RegisterAddress = TEMP_REG;
> + load.MemoryAddress = src_addr;
> +  }
> +  anv_batch_emit(_buffer->batch, GENX(MI_STORE_REGISTER_MEM),
> store) {
> + store.RegisterAddress = TEMP_REG;
> + store.MemoryAddress = dst_addr;
> +  }
> +#undef TEMP_REG
> +#endif
> +   }
> +   return;
> +}
> +
> +void
>  genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
> struct anv_bo *dst, uint32_t dst_offset,
> struct anv_bo *src, uint32_t src_offset,
> --
> 2.13.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 06/16] anv/gpu_memcpy: Add a lighter-weight GPU memcpy function

2017-06-28 Thread Nanley Chery
We'll be performing a GPU memcpy in more places to copy small amounts of
data. Add an alternate function that thrashes less state.

v2:
- Make a new function (Jason Ekstrand).
- Move the #define into the function.
v3:
- Update the function name (Jason).
- Update comments.

Signed-off-by: Nanley Chery 
---
 src/intel/vulkan/anv_genX.h|  5 +
 src/intel/vulkan/genX_gpu_memcpy.c | 40 ++
 2 files changed, 45 insertions(+)

diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h
index 8da5e075dc..0b7322e281 100644
--- a/src/intel/vulkan/anv_genX.h
+++ b/src/intel/vulkan/anv_genX.h
@@ -69,5 +69,10 @@ void genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer 
*cmd_buffer,
 struct anv_bo *src, uint32_t src_offset,
 uint32_t size);
 
+void genX(cmd_buffer_mi_memcpy)(struct anv_cmd_buffer *cmd_buffer,
+struct anv_bo *dst, uint32_t dst_offset,
+struct anv_bo *src, uint32_t src_offset,
+uint32_t size);
+
 void genX(blorp_exec)(struct blorp_batch *batch,
   const struct blorp_params *params);
diff --git a/src/intel/vulkan/genX_gpu_memcpy.c 
b/src/intel/vulkan/genX_gpu_memcpy.c
index 5ef35e6283..9c6b46de94 100644
--- a/src/intel/vulkan/genX_gpu_memcpy.c
+++ b/src/intel/vulkan/genX_gpu_memcpy.c
@@ -52,6 +52,46 @@ gcd_pow2_u64(uint64_t a, uint64_t b)
 }
 
 void
+genX(cmd_buffer_mi_memcpy)(struct anv_cmd_buffer *cmd_buffer,
+   struct anv_bo *dst, uint32_t dst_offset,
+   struct anv_bo *src, uint32_t src_offset,
+   uint32_t size)
+{
+   /* This memcpy operates in units of dwords. */
+   assert(size % 4 == 0);
+   assert(dst_offset % 4 == 0);
+   assert(src_offset % 4 == 0);
+
+   for (uint32_t i = 0; i < size; i += 4) {
+  const struct anv_address src_addr =
+ (struct anv_address) { src, src_offset + i};
+  const struct anv_address dst_addr =
+ (struct anv_address) { dst, dst_offset + i};
+#if GEN_GEN >= 8
+  anv_batch_emit(_buffer->batch, GENX(MI_COPY_MEM_MEM), cp) {
+ cp.DestinationMemoryAddress = dst_addr;
+ cp.SourceMemoryAddress = src_addr;
+  }
+#else
+  /* IVB does not have a general purpose register for command streamer
+   * commands. Therefore, we use an alternate temporary register.
+   */
+#define TEMP_REG 0x2400 /* MI_PREDICATE_SRC0 */
+  anv_batch_emit(_buffer->batch, GENX(MI_LOAD_REGISTER_MEM), load) {
+ load.RegisterAddress = TEMP_REG;
+ load.MemoryAddress = src_addr;
+  }
+  anv_batch_emit(_buffer->batch, GENX(MI_STORE_REGISTER_MEM), store) {
+ store.RegisterAddress = TEMP_REG;
+ store.MemoryAddress = dst_addr;
+  }
+#undef TEMP_REG
+#endif
+   }
+   return;
+}
+
+void
 genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
struct anv_bo *dst, uint32_t dst_offset,
struct anv_bo *src, uint32_t src_offset,
-- 
2.13.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev