On Tue, Dec 18, 2018 at 9:46 AM Ilia Mirkin <imir...@alum.mit.edu> wrote:
>
> On Tue, Dec 18, 2018 at 9:42 AM Eduardo Lima Mitev <el...@igalia.com> wrote:
> >
> > emit_intrinsic_store_image() is always using 4 components when
> > collecting registers for the value. When image has less than
> > 4 components (e.g, r32f, r32i, r32ui) this results in extra mov
> > instructions.
> >
> > This patch uses the actual number of components from the image format.
> >
> > For example, in a shader like:
> >
> > layout (r32f, binding=0) writeonly uniform imageBuffer u_image;
> > ...
> > void main(void) {
> >    ...
> >    imageStore (u_image, some_offset, vec4(1.0));
> >    ...
> > }
> >
> > instruction count is reduced in at least 3 instructions (note image
> > format is r32f, 1 component only).
> >
> > This obviously reduces register pressure as well.
> >
> > v2: - Added support for image formats from NV_image_format extension
> >     (Ilia Mirkin).
> >     - Return 4 components by default instead of asserting. (Rob Clark).
> >
> > v3: Added more missing formats (Ilia Mirkin).
> > ---
> >  src/freedreno/ir3/ir3_compiler_nir.c | 69 +++++++++++++++++++++++++++-
> >  1 file changed, 67 insertions(+), 2 deletions(-)
> >
> > diff --git a/src/freedreno/ir3/ir3_compiler_nir.c 
> > b/src/freedreno/ir3/ir3_compiler_nir.c
> > index 85f14f354d2..80f6f70475d 100644
> > --- a/src/freedreno/ir3/ir3_compiler_nir.c
> > +++ b/src/freedreno/ir3/ir3_compiler_nir.c
> > @@ -1251,6 +1251,70 @@ emit_intrinsic_load_image(struct ir3_context *ctx, 
> > nir_intrinsic_instr *intr,
> >         ir3_split_dest(b, dst, sam, 0, 4);
> >  }
> >
> > +/* Returns the number of components for the different image formats
> > + * supported by the GLES 3.1 spec, plus those added by the
> > + * GL_NV_image_formats extension.
> > + */
> > +static unsigned
> > +get_num_components_for_glformat(GLuint format)
> > +{
> > +       switch (format) {
> > +       case GL_R32F:
> > +       case GL_R32I:
> > +       case GL_R32UI:
> > +       case GL_R16F:
> > +       case GL_R16I:
> > +       case GL_R16UI:
> > +       case GL_R16:
> > +       case GL_R16_SNORM:
> > +       case GL_R8I:
> > +       case GL_R8UI:
> > +       case GL_R8:
> > +       case GL_R8_SNORM:
> > +               return 1;
> > +
> > +       case GL_RG32F:
> > +       case GL_RG32I:
> > +       case GL_RG32UI:
> > +       case GL_RG16F:
> > +       case GL_RG16I:
> > +       case GL_RG16UI:
> > +       case GL_RG16:
> > +       case GL_RG16_SNORM:
> > +       case GL_RG8I:
> > +       case GL_RG8UI:
> > +       case GL_RG8:
> > +       case GL_RG8_SNORM:
> > +               return 2;
> > +
> > +       case GL_R11F_G11F_B10F:
> > +               return 3;
> > +
> > +       case GL_RGBA32F:
> > +       case GL_RGBA32I:
> > +       case GL_RGBA32UI:
> > +       case GL_RGBA16F:
> > +       case GL_RGBA16I:
> > +       case GL_RGBA16UI:
> > +       case GL_RGBA16:
> > +       case GL_RGBA16_SNORM:
> > +       case GL_RGBA8:
>
> You don't have to resend for this, but all the others go
>
> F (if exists)
> I
> UI
> (UNORM)
> SNORM
>
> While here you have the UNORM first. So please move it down right above SNORM.
>
> > +       case GL_RGBA8I:
> > +       case GL_RGBA8UI:
> > +       case GL_RGBA8_SNORM:
> > +       case GL_RGB10_A2UI:
> > +       case GL_RGB10_A2:
> > +               return 4;
> > +
> > +       default:

also, if we drop the assert, then maybe at least a debug_printf() so
that we know we are seeing an unknown format with debug builds?

With that and Ilia's suggestion, r-b

BR,
-R

> > +               /* Return 4 components also for all other formats we don't 
> > know
> > +                * about. This is always safe. Also, the format should have 
> > been
> > +                * validated already by the higher level API.
> > +                */
> > +               return 4;
> > +       }
> > +}
> > +
> >  /* src[] = { deref, coord, sample_index, value }. const_index[] = {} */
> >  static void
> >  emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr 
> > *intr)
> > @@ -1262,6 +1326,7 @@ emit_intrinsic_store_image(struct ir3_context *ctx, 
> > nir_intrinsic_instr *intr)
> >         struct ir3_instruction * const *coords = ir3_get_src(ctx, 
> > &intr->src[1]);
> >         unsigned ncoords = get_image_coords(var, NULL);
> >         unsigned tex_idx = get_image_slot(ctx, 
> > nir_src_as_deref(intr->src[0]));
> > +       unsigned ncomp = 
> > get_num_components_for_glformat(var->data.image.format);
> >
> >         /* src0 is value
> >          * src1 is coords
> > @@ -1276,10 +1341,10 @@ emit_intrinsic_store_image(struct ir3_context *ctx, 
> > nir_intrinsic_instr *intr)
> >          */
> >
> >         stib = ir3_STIB(b, create_immed(b, tex_idx), 0,
> > -                       ir3_create_collect(ctx, value, 4), 0,
> > +                       ir3_create_collect(ctx, value, ncomp), 0,
> >                         ir3_create_collect(ctx, coords, ncoords), 0,
> >                         offset, 0);
> > -       stib->cat6.iim_val = 4;
> > +       stib->cat6.iim_val = ncomp;
> >         stib->cat6.d = ncoords;
> >         stib->cat6.type = get_image_type(var);
> >         stib->cat6.typed = true;
>
> Reviewed-by: Ilia Mirkin <imir...@alum.mit.edu>
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to