Re: [Mesa-dev] [PATCH v02 27/37] i965: Port gen8+ 3DSTATE_PS_EXTRA to genxml.

2017-04-27 Thread Kenneth Graunke
On Monday, April 24, 2017 3:19:22 PM PDT Rafael Antognolli wrote:
[snip]
> diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c 
> b/src/mesa/drivers/dri/i965/genX_state_upload.c
> index 0f7a222..7ed79b2 100644
> --- a/src/mesa/drivers/dri/i965/genX_state_upload.c
> +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
> @@ -1668,6 +1668,99 @@ static const struct brw_tracked_state 
> genX(raster_state) = {
> },
> .emit = genX(upload_raster),
>  };
> +
> +/* -- */
> +
> +static void
> +genX(upload_ps_extra)(struct brw_context *brw)
> +{
> +   const struct brw_wm_prog_data *prog_data =
> +  brw_wm_prog_data(brw->wm.base.prog_data);
> +
> +   brw_batch_emit(brw, GENX(3DSTATE_PS_EXTRA), pse) {

Could we call this psx?  That's the abbreviation I used in the old code.

> +  pse.PixelShaderValid = true;
> +  pse.PixelShaderComputedDepthMode = prog_data->computed_depth_mode;
> +  pse.PixelShaderKillsPixel = prog_data->uses_kill;
> +  pse.AttributeEnable = prog_data->num_varying_inputs != 0;
> +  pse.PixelShaderUsesSourceDepth = prog_data->uses_src_depth;
> +  pse.PixelShaderUsesSourceW = prog_data->uses_src_w;
> +  pse.PixelShaderIsPerSample = prog_data->persample_dispatch;
> +
> +  /* _NEW_MULTISAMPLE | BRW_NEW_CONSERVATIVE_RASTERIZATION */
> +  if (prog_data->uses_sample_mask) {
> +#if GEN_GEN >= 9
> + struct gl_context *ctx = >ctx;

Let's just move ctx to the top and mark it UNUSED to shut up warnings.
(That way, if we introduce something else that uses ctx, it'll already
be in place...plus, that's just where it normally is...)

> +
> + if (prog_data->post_depth_coverage)
> +pse.InputCoverageMaskState = ICMS_DEPTH_COVERAGE;
> + else if (prog_data->inner_coverage && 
> ctx->IntelConservativeRasterization)
> +pse.InputCoverageMaskState = ICMS_INNER_CONSERVATIVE;
> + else
> +pse.InputCoverageMaskState = ICMS_NORMAL;
> +#else
> + pse.PixelShaderUsesInputCoverageMask = true;
> +#endif
> +  }
> +
> +  pse.oMaskPresenttoRenderTarget = prog_data->uses_omask;
> +#if GEN_GEN >= 9
> +  pse.PixelShaderPullsBary = prog_data->pulls_bary;

Let's move the computed stencil stuff here to combine blocks:

  psx.PixelShaderComputesStencil = prog_data->computed_stencil;

That's 1 line instead of 6.

(I'm not worried about losing the assert - if anything, we could
assert gen >= 9 in the compiler when setting the prog_data field.)

> +#endif
> +
> +  /* The stricter cross-primitive coherency guarantees that the hardware
> +   * gives us with the "Accesses UAV" bit set for at least one shader 
> stage
> +   * and the "UAV coherency required" bit set on the 3DPRIMITIVE command
> +   * are redundant within the current image, atomic counter and SSBO GL
> +   * APIs, which all have very loose ordering and coherency requirements
> +   * and generally rely on the application to insert explicit barriers 
> when
> +   * a shader invocation is expected to see the memory writes performed 
> by
> +   * the invocations of some previous primitive.  Regardless of the value
> +   * of "UAV coherency required", the "Accesses UAV" bits will implicitly
> +   * cause an in most cases useless DC flush when the lowermost stage 
> with
> +   * the bit set finishes execution.
> +   *
> +   * It would be nice to disable it, but in some cases we can't because 
> on
> +   * Gen8+ it also has an influence on rasterization via the PS UAV-only
> +   * signal (which could be set independently from the coherency 
> mechanism
> +   * in the 3DSTATE_WM command on Gen7), and because in some cases it 
> will
> +   * determine whether the hardware skips execution of the fragment 
> shader
> +   * or not via the ThreadDispatchEnable signal.  However if we know that
> +   * GEN8_PS_BLEND_HAS_WRITEABLE_RT is going to be set and
> +   * GEN8_PSX_PIXEL_SHADER_NO_RT_WRITE is not set it shouldn't make any
> +   * difference so we may just disable it here.
> +   *
> +   * Gen8 hardware tries to compute ThreadDispatchEnable for us but 
> doesn't
> +   * take into account KillPixels when no depth or stencil writes are
> +   * enabled.  In order for occlusion queries to work correctly with no
> +   * attachments, we need to force-enable here.
> +   *
> +   * BRW_NEW_FS_PROG_DATA | BRW_NEW_FRAGMENT_PROGRAM | _NEW_BUFFERS |
> +   * _NEW_COLOR
> +   */
> +  if ((prog_data->has_side_effects || prog_data->uses_kill) &&
> +  !brw_color_buffer_write_enabled(brw))
> + pse.PixelShaderHasUAV = true;;

One too many semicolons here

Reviewed-by: Kenneth Graunke 

> +
> +  if (prog_data->computed_stencil) {
> + assert(brw->gen >= 9);
> +#if GEN_GEN >= 9
> + pse.PixelShaderComputesStencil = true;
> +#endif

[Mesa-dev] [PATCH v02 27/37] i965: Port gen8+ 3DSTATE_PS_EXTRA to genxml.

2017-04-24 Thread Rafael Antognolli
Emit 3DSTATE_PS_EXTRA on Gen8+ using brw_batch_emit helper, that uses
pack structs from genxml.

Signed-off-by: Rafael Antognolli 
---
 src/mesa/drivers/dri/i965/Makefile.sources|   1 +-
 src/mesa/drivers/dri/i965/brw_state.h |  10 +-
 src/mesa/drivers/dri/i965/gen8_ps_state.c | 138 +---
 src/mesa/drivers/dri/i965/genX_state_upload.c |  95 -
 4 files changed, 94 insertions(+), 150 deletions(-)
 delete mode 100644 src/mesa/drivers/dri/i965/gen8_ps_state.c

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources 
b/src/mesa/drivers/dri/i965/Makefile.sources
index da09df8..7f25ae1 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -109,7 +109,6 @@ i965_FILES = \
gen8_gs_state.c \
gen8_hs_state.c \
gen8_multisample_state.c \
-   gen8_ps_state.c \
gen8_surface_state.c \
gen8_viewport_state.c \
gen8_vs_state.c \
diff --git a/src/mesa/drivers/dri/i965/brw_state.h 
b/src/mesa/drivers/dri/i965/brw_state.h
index 5010237..a87bf3a 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -146,7 +146,6 @@ extern const struct brw_tracked_state gen8_index_buffer;
 extern const struct brw_tracked_state gen8_multisample_state;
 extern const struct brw_tracked_state gen8_pma_fix;
 extern const struct brw_tracked_state gen8_ps_blend;
-extern const struct brw_tracked_state gen8_ps_extra;
 extern const struct brw_tracked_state gen8_sf_clip_viewport;
 extern const struct brw_tracked_state gen8_vertices;
 extern const struct brw_tracked_state gen8_vf_topology;
@@ -284,15 +283,6 @@ void brw_update_renderbuffer_surfaces(struct brw_context 
*brw,
 void gen7_check_surface_setup(uint32_t *surf, bool is_render_target);
 void gen7_init_vtable_surface_functions(struct brw_context *brw);
 
-/* gen8_ps_state.c */
-void gen8_upload_ps_state(struct brw_context *brw,
-  const struct brw_stage_state *stage_state,
-  const struct brw_wm_prog_data *prog_data,
-  uint32_t fast_clear_op);
-
-void gen8_upload_ps_extra(struct brw_context *brw,
-  const struct brw_wm_prog_data *prog_data);
-
 /* gen8_surface_state.c */
 
 void gen8_init_vtable_surface_functions(struct brw_context *brw);
diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c 
b/src/mesa/drivers/dri/i965/gen8_ps_state.c
deleted file mode 100644
index 1a4a680..000
--- a/src/mesa/drivers/dri/i965/gen8_ps_state.c
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright © 2012 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include 
-#include "program/program.h"
-#include "brw_state.h"
-#include "brw_defines.h"
-#include "brw_wm.h"
-#include "intel_batchbuffer.h"
-
-void
-gen8_upload_ps_extra(struct brw_context *brw,
- const struct brw_wm_prog_data *prog_data)
-{
-   struct gl_context *ctx = >ctx;
-   uint32_t dw1 = 0;
-
-   dw1 |= GEN8_PSX_PIXEL_SHADER_VALID;
-   dw1 |= prog_data->computed_depth_mode << GEN8_PSX_COMPUTED_DEPTH_MODE_SHIFT;
-
-   if (prog_data->uses_kill)
-  dw1 |= GEN8_PSX_KILL_ENABLE;
-
-   if (prog_data->num_varying_inputs != 0)
-  dw1 |= GEN8_PSX_ATTRIBUTE_ENABLE;
-
-   if (prog_data->uses_src_depth)
-  dw1 |= GEN8_PSX_USES_SOURCE_DEPTH;
-
-   if (prog_data->uses_src_w)
-  dw1 |= GEN8_PSX_USES_SOURCE_W;
-
-   if (prog_data->persample_dispatch)
-  dw1 |= GEN8_PSX_SHADER_IS_PER_SAMPLE;
-
-   /* _NEW_MULTISAMPLE | BRW_NEW_CONSERVATIVE_RASTERIZATION */
-   if (prog_data->uses_sample_mask) {
-  if (brw->gen >= 9) {
- if (prog_data->post_depth_coverage)
-dw1 |= BRW_PCICMS_DEPTH << 
GEN9_PSX_SHADER_NORMAL_COVERAGE_MASK_SHIFT;
- else if (prog_data->inner_coverage && 
ctx->IntelConservativeRasterization)
-