[Mesa-dev] [PATCH] gallium/swr: Fix multi-context sync fence deadlock.

2019-01-04 Thread Bruce Cherniak
Various recreation scenarios lead to API thread getting stuck in
swr_fence_finish().  This is a multi-context issue, whereby one context
overwrites the fence read-value with a previous sync's lesser value.
The fence sync value is supposed to be always increasing.

In swr_fence_cb(), only update the "read" value if the new value is
greater.

(This may seem like we're not waiting on the other context to finish, but
had we needed for it to finish there would have been a wait prior to
submitting a new sync.)

cc: mesa-sta...@lists.freedesktop.org
---
 src/gallium/drivers/swr/swr_fence.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/swr/swr_fence.cpp 
b/src/gallium/drivers/swr/swr_fence.cpp
index b05ac8cec0..074d82a3b4 100644
--- a/src/gallium/drivers/swr/swr_fence.cpp
+++ b/src/gallium/drivers/swr/swr_fence.cpp
@@ -50,7 +50,9 @@ swr_fence_cb(uint64_t userData, uint64_t userData2, uint64_t 
userData3)
swr_fence_do_work(fence);
 
/* Correct value is in SwrSync data, and not the fence write field. */
-   fence->read = userData2;
+   /* Contexts may not finish in order, but fence value always increases */
+   if (fence->read < userData2)
+  fence->read = userData2;
 }
 
 /*
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] swr: Fix KNOB_MAX_WORKER_THREADS thread creation override.

2017-12-12 Thread Bruce Cherniak
Environment variable KNOB_MAX_WORKER_THREADS allows the user to override
default thread creation and thread binding.  Previous commit to adjust
linux cpu topology caused setting this KNOB to bind all threads to a single
core.

This patch restores correct functionality of override.

Cc: 
---
 src/gallium/drivers/swr/rasterizer/core/threads.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp 
b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
index f4ddc21226..6242cb3fc7 100644
--- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
@@ -213,8 +213,7 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, 
uint32_t& out_numThread
 {
 for (auto &core : node.cores)
 {
-out_numThreadsPerProcGroup = 
std::max((size_t)out_numThreadsPerProcGroup,
-  core.threadIds.size());
+out_numThreadsPerProcGroup += core.threadIds.size();
 }
 }
 
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2] swr: Correct texture allocation and limit max size to 2GB

2017-11-30 Thread Bruce Cherniak
This patch fixes piglit tex3d-maxsize by correcting 4 things:

The total_size calculation was using 32-bit math, therefore a >4GB
allocation request overflowed and was not returning false (unsupported).

Changed AlignedMalloc arguments from "unsigned int" to size_t, to handle
>4GB allocations.

Added error checking on texture allocations to fail gracefully.

Finally, temporarily decreased supported max texture size from 4GB to 2GB.
The gallivm texture-sampler needs some additional work to correctly handle
larger than 2GB textures (offsets to LLVMBuildGEP are signed).

I'm working on a follow-on patch to allow up to 4GB textures, as this is
useful in HPC visualization applications.

Fixes piglit tex3d-maxsize.

v2: Updated patch description to clarify ">4GB".
---
 src/gallium/drivers/swr/rasterizer/common/os.h |  2 +-
 src/gallium/drivers/swr/swr_screen.cpp | 12 +---
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/common/os.h 
b/src/gallium/drivers/swr/rasterizer/common/os.h
index 4ed6b88e45..358cb33b6e 100644
--- a/src/gallium/drivers/swr/rasterizer/common/os.h
+++ b/src/gallium/drivers/swr/rasterizer/common/os.h
@@ -210,7 +210,7 @@ unsigned char _BitScanReverse(unsigned int *Index, unsigned 
int Mask)
 }
 
 inline
-void *AlignedMalloc(unsigned int size, unsigned int alignment)
+void *AlignedMalloc(size_t size, size_t alignment)
 {
 void *ret;
 if (posix_memalign(&ret, alignment, size))
diff --git a/src/gallium/drivers/swr/swr_screen.cpp 
b/src/gallium/drivers/swr/swr_screen.cpp
index 3f2433e65a..71a07ebe8d 100644
--- a/src/gallium/drivers/swr/swr_screen.cpp
+++ b/src/gallium/drivers/swr/swr_screen.cpp
@@ -50,7 +50,7 @@
  * Max texture sizes
  * XXX Check max texture size values against core and sampler.
  */
-#define SWR_MAX_TEXTURE_SIZE (4 * 1024 * 1024 * 1024ULL) /* 4GB */
+#define SWR_MAX_TEXTURE_SIZE (2 * 1024 * 1024 * 1024ULL) /* 2GB */
 #define SWR_MAX_TEXTURE_2D_LEVELS 14  /* 8K x 8K for now */
 #define SWR_MAX_TEXTURE_3D_LEVELS 12  /* 2K x 2K x 2K for now */
 #define SWR_MAX_TEXTURE_CUBE_LEVELS 14  /* 8K x 8K for now */
@@ -821,13 +821,15 @@ swr_texture_layout(struct swr_screen *screen,
  ComputeSurfaceOffset(0, 0, 0, 0, 0, level, &res->swr);
}
 
-   size_t total_size = res->swr.depth * res->swr.qpitch * res->swr.pitch *
-   res->swr.numSamples;
+   size_t total_size = (uint64_t)res->swr.depth * res->swr.qpitch *
+ res->swr.pitch * res->swr.numSamples;
if (total_size > SWR_MAX_TEXTURE_SIZE)
   return false;
 
if (allocate) {
   res->swr.xpBaseAddress = (gfxptr_t)AlignedMalloc(total_size, 64);
+  if (!res->swr.xpBaseAddress)
+ return false;
 
   if (res->has_depth && res->has_stencil) {
  res->secondary = res->swr;
@@ -843,6 +845,10 @@ swr_texture_layout(struct swr_screen *screen,
   res->secondary.pitch * res->secondary.numSamples;
 
  res->secondary.xpBaseAddress = (gfxptr_t) AlignedMalloc(total_size, 
64);
+ if (!res->secondary.xpBaseAddress) {
+AlignedFree((void *)res->swr.xpBaseAddress);
+return false;
+ }
   }
}
 
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] swr: Correct texture allocation and limit max size to 2GB

2017-11-20 Thread Bruce Cherniak
This patch fixes piglit tex3d-maxsize by correcting 4 things:

The total_size calculation was using 32-bit math, therefore a 4GB
allocation request overflowed and was not returning false (unsupported).

Changed AlignedMalloc arguments from "unsigned int" to size_t, to handle
4GB allocations.

Added error checking on texture allocations to fail gracefully.

Finally, temporarily decreased supported max texture size from 4GB to 2GB.
The gallivm texture-sampler needs some additional work to correctly handle
larger than 2GB textures (offsets to LLVMBuildGEP are signed).

I'm working on a follow-on patch to allow 4GB textures, as this is useful
in HPC visualization applications.

Fixes piglit tex3d-maxsize.
---
 src/gallium/drivers/swr/rasterizer/common/os.h |  2 +-
 src/gallium/drivers/swr/swr_screen.cpp | 12 +---
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/common/os.h 
b/src/gallium/drivers/swr/rasterizer/common/os.h
index 4ed6b88e45..358cb33b6e 100644
--- a/src/gallium/drivers/swr/rasterizer/common/os.h
+++ b/src/gallium/drivers/swr/rasterizer/common/os.h
@@ -210,7 +210,7 @@ unsigned char _BitScanReverse(unsigned int *Index, unsigned 
int Mask)
 }
 
 inline
-void *AlignedMalloc(unsigned int size, unsigned int alignment)
+void *AlignedMalloc(size_t size, size_t alignment)
 {
 void *ret;
 if (posix_memalign(&ret, alignment, size))
diff --git a/src/gallium/drivers/swr/swr_screen.cpp 
b/src/gallium/drivers/swr/swr_screen.cpp
index 3f2433e65a..71a07ebe8d 100644
--- a/src/gallium/drivers/swr/swr_screen.cpp
+++ b/src/gallium/drivers/swr/swr_screen.cpp
@@ -50,7 +50,7 @@
  * Max texture sizes
  * XXX Check max texture size values against core and sampler.
  */
-#define SWR_MAX_TEXTURE_SIZE (4 * 1024 * 1024 * 1024ULL) /* 4GB */
+#define SWR_MAX_TEXTURE_SIZE (2 * 1024 * 1024 * 1024ULL) /* 2GB */
 #define SWR_MAX_TEXTURE_2D_LEVELS 14  /* 8K x 8K for now */
 #define SWR_MAX_TEXTURE_3D_LEVELS 12  /* 2K x 2K x 2K for now */
 #define SWR_MAX_TEXTURE_CUBE_LEVELS 14  /* 8K x 8K for now */
@@ -821,13 +821,15 @@ swr_texture_layout(struct swr_screen *screen,
  ComputeSurfaceOffset(0, 0, 0, 0, 0, level, &res->swr);
}
 
-   size_t total_size = res->swr.depth * res->swr.qpitch * res->swr.pitch *
-   res->swr.numSamples;
+   size_t total_size = (uint64_t)res->swr.depth * res->swr.qpitch *
+ res->swr.pitch * res->swr.numSamples;
if (total_size > SWR_MAX_TEXTURE_SIZE)
   return false;
 
if (allocate) {
   res->swr.xpBaseAddress = (gfxptr_t)AlignedMalloc(total_size, 64);
+  if (!res->swr.xpBaseAddress)
+ return false;
 
   if (res->has_depth && res->has_stencil) {
  res->secondary = res->swr;
@@ -843,6 +845,10 @@ swr_texture_layout(struct swr_screen *screen,
   res->secondary.pitch * res->secondary.numSamples;
 
  res->secondary.xpBaseAddress = (gfxptr_t) AlignedMalloc(total_size, 
64);
+ if (!res->secondary.xpBaseAddress) {
+AlignedFree((void *)res->swr.xpBaseAddress);
+return false;
+ }
   }
}
 
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] swr: Fixed an uncommon freed-memory access during state validation

2017-11-08 Thread Bruce Cherniak
State validation is performed during clear and draw calls.  Validation
during clear was still accessing vertex buffer state.  When the currently
set vertex buffers are client arrays, this could lead to accessing freed
memory.  Such is the case with the VMD application.

Previously, vertex buffer validation depended on a dirty bit or the
draw info indicating an indexed draw.  This required special handling for
clears.  But, vertex buffer validation still occurred which was unnecessary
and wrong.

Now, only minimal validation is performed during clear, deferring the
remainder to the next draw.  And, by setting the dirty bit in swr_draw_vbo
for indexed draws, vertex buffer validation is only dependent upon a
single dirty bit.

This fixes a bug exposed by the VMD application when changing models.
---
 src/gallium/drivers/swr/swr_draw.cpp  |  7 ++-
 src/gallium/drivers/swr/swr_state.cpp | 35 +++
 2 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/src/gallium/drivers/swr/swr_draw.cpp 
b/src/gallium/drivers/swr/swr_draw.cpp
index 57660c7464..a94cdd6da0 100644
--- a/src/gallium/drivers/swr/swr_draw.cpp
+++ b/src/gallium/drivers/swr/swr_draw.cpp
@@ -52,7 +52,12 @@ swr_draw_vbo(struct pipe_context *pipe, const struct 
pipe_draw_info *info)
   return;
}
 
-   /* Update derived state, pass draw info to update function */
+   /* If indexed draw, force vertex validation since index buffer comes
+* from draw info. */
+   if (info->index_size)
+  ctx->dirty |= SWR_NEW_VERTEX;
+
+   /* Update derived state, pass draw info to update function. */
swr_update_derived(pipe, info);
 
swr_update_draw_context(ctx);
diff --git a/src/gallium/drivers/swr/swr_state.cpp 
b/src/gallium/drivers/swr/swr_state.cpp
index c6da4fcb8e..4530d377ee 100644
--- a/src/gallium/drivers/swr/swr_state.cpp
+++ b/src/gallium/drivers/swr/swr_state.cpp
@@ -1204,11 +1204,6 @@ swr_update_derived(struct pipe_context *pipe,
   ctx->api.pfnSwrSetRastState(ctx->swrContext, rastState);
}
 
-   /* Scissor */
-   if (ctx->dirty & SWR_NEW_SCISSOR) {
-  ctx->api.pfnSwrSetScissorRects(ctx->swrContext, 1, &ctx->swr_scissor);
-   }
-
/* Viewport */
if (ctx->dirty & (SWR_NEW_VIEWPORT | SWR_NEW_FRAMEBUFFER
  | SWR_NEW_RASTERIZER)) {
@@ -1249,18 +1244,26 @@ swr_update_derived(struct pipe_context *pipe,
   ctx->api.pfnSwrSetViewports(ctx->swrContext, 1, vp, vpm);
}
 
-   /* Set vertex & index buffers
-* (using draw info if called by swr_draw_vbo)
-* If indexed draw, revalidate since index buffer comes from
-* pipe_draw_info.
-*/
-   if (ctx->dirty & SWR_NEW_VERTEX ||
-  (p_draw_info && p_draw_info->index_size)) {
+   /* When called from swr_clear (p_draw_info = null), render targets,
+* rasterState and viewports (dependent on render targets) are the only
+* necessary validation.  Defer remaining validation by setting
+* post_update_dirty_flags and clear all dirty flags.  BackendState is
+* still unconditionally validated below */
+   if (!p_draw_info) {
+  post_update_dirty_flags = ctx->dirty & ~(SWR_NEW_FRAMEBUFFER |
+   SWR_NEW_RASTERIZER |
+   SWR_NEW_VIEWPORT);
+  ctx->dirty = 0;
+   }
+
+   /* Scissor */
+   if (ctx->dirty & SWR_NEW_SCISSOR) {
+  ctx->api.pfnSwrSetScissorRects(ctx->swrContext, 1, &ctx->swr_scissor);
+   }
 
-  /* If being called by swr_draw_vbo, copy draw details */
-  struct pipe_draw_info info = {0};
-  if (p_draw_info)
- info = *p_draw_info;
+   /* Set vertex & index buffers */
+   if (ctx->dirty & SWR_NEW_VERTEX) {
+  const struct pipe_draw_info &info = *p_draw_info;
 
   /* vertex buffers */
   SWR_VERTEX_BUFFER_STATE swrVertexBuffers[PIPE_MAX_ATTRIBS];
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] st/mesa: only try to create 1x msaa surfaces for "fake" msaa drivers

2017-08-25 Thread Bruce Cherniak
From: Brian Paul 

For software drivers where we want "fake" msaa support for GL 3.x, we
treat 1 sample as being msaa.

For drivers with real msaa support, start format probing at 2x msaa.
For drivers with fake msaa support, start format probing at 1x msaa.

This also tweaks the MaxSamples code in st_init_extensions() so that
we use MaxSamples=1 for fake msaa.  This allows the format proble loops
to run at least one iteration.

This fixes a llvmpipe/VTK regression from commit 6839d3369905eb02151.
And for drivers with fake msaa support, calls such as
glTexImage2DMultisample(samples=1) will now succeed.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102038
---
 src/mesa/state_tracker/st_cb_fbo.c | 13 ++---
 src/mesa/state_tracker/st_cb_texture.c | 11 ---
 src/mesa/state_tracker/st_extensions.c | 14 ++
 3 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_fbo.c 
b/src/mesa/state_tracker/st_cb_fbo.c
index afc7700306..a7c286bcc5 100644
--- a/src/mesa/state_tracker/st_cb_fbo.c
+++ b/src/mesa/state_tracker/st_cb_fbo.c
@@ -155,12 +155,19 @@ st_renderbuffer_alloc_storage(struct gl_context * ctx,
 *   to  and no more than the next larger sample count supported
 *   by the implementation.
 *
-* So let's find the supported number of samples closest to NumSamples.
+* Find the supported number of samples >= rb->NumSamples
 */
if (rb->NumSamples > 0) {
-  unsigned i;
+  unsigned start, i;
 
-  for (i = MAX2(2, rb->NumSamples); i <= ctx->Const.MaxSamples; i++) {
+  if (ctx->Const.MaxSamples > 1 &&  rb->NumSamples == 1) {
+ /* don't try num_samples = 1 with drivers that support real msaa */
+ start = 2;
+  } else {
+ start = rb->NumSamples;
+  }
+
+  for (i = start; i <= ctx->Const.MaxSamples; i++) {
  format = st_choose_renderbuffer_format(st, internalFormat, i);
 
  if (format != PIPE_FORMAT_NONE) {
diff --git a/src/mesa/state_tracker/st_cb_texture.c 
b/src/mesa/state_tracker/st_cb_texture.c
index af2052db24..b5006b05a7 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -2739,13 +2739,18 @@ st_texture_storage(struct gl_context *ctx,
 
bindings = default_bindings(st, fmt);
 
-   /* Raise the sample count if the requested one is unsupported. */
if (num_samples > 0) {
+  /* Find msaa sample count which is actually supported.  For example,
+   * if the user requests 1x but only 4x or 8x msaa is supported, we'll
+   * choose 4x here.
+   */
   enum pipe_texture_target ptarget = gl_target_to_pipe(texObj->Target);
   boolean found = FALSE;
 
-  /* start the query with at least two samples */
-  num_samples = MAX2(num_samples, 2);
+  if (ctx->Const.MaxSamples > 1 && num_samples == 1) {
+ /* don't try num_samples = 1 with drivers that support real msaa */
+ num_samples = 2;
+  }
 
   for (; num_samples <= ctx->Const.MaxSamples; num_samples++) {
  if (screen->is_format_supported(screen, fmt, ptarget,
diff --git a/src/mesa/state_tracker/st_extensions.c 
b/src/mesa/state_tracker/st_extensions.c
index 904d9cd834..2008e28250 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -1046,17 +1046,15 @@ void st_init_extensions(struct pipe_screen *screen,
  void_formats, 32,
  PIPE_BIND_RENDER_TARGET);
}
-   if (consts->MaxSamples == 1) {
-  /* one sample doesn't really make sense */
-  consts->MaxSamples = 0;
-   }
-   else if (consts->MaxSamples >= 2) {
+
+   if (consts->MaxSamples >= 2) {
+  /* Real MSAA support */
   extensions->EXT_framebuffer_multisample = GL_TRUE;
   extensions->EXT_framebuffer_multisample_blit_scaled = GL_TRUE;
}
-
-   if (consts->MaxSamples == 0 &&
-   screen->get_param(screen, PIPE_CAP_FAKE_SW_MSAA)) {
+   else if (consts->MaxSamples > 0 &&
+screen->get_param(screen, PIPE_CAP_FAKE_SW_MSAA)) {
+  /* fake MSAA support */
   consts->FakeSWMSAA = GL_TRUE;
   extensions->EXT_framebuffer_multisample = GL_TRUE;
   extensions->EXT_framebuffer_multisample_blit_scaled = GL_TRUE;
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] swr: Report format max_samples=1 to maintain support for "fake" msaa.

2017-08-25 Thread Bruce Cherniak
Accompanying patch "st/mesa: only try to create 1x msaa surfaces for
'fake' msaa" requires driver to report max_samples=1 to enable "fake"
msaa. Previously, 0 and 1 were treated equivalently in st_init_extensions()
and either could enable "fake" msaa.

This patch raises the swr default msaa_max_count from 0 to 1, so that
swr_is_format_supported will report max_samples=1.

Real msaa can still be enabled by exporting SWR_MSAA_MAX_COUNT with a
pow2 value between 2 and 16.

This patch is necessary to prevent an OpenSWR regression resulting from
the st/mesa patch.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102038
---
 src/gallium/drivers/swr/swr_screen.cpp | 22 +++---
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/gallium/drivers/swr/swr_screen.cpp 
b/src/gallium/drivers/swr/swr_screen.cpp
index 3287bc6fee..cc8d9955b8 100644
--- a/src/gallium/drivers/swr/swr_screen.cpp
+++ b/src/gallium/drivers/swr/swr_screen.cpp
@@ -255,13 +255,13 @@ swr_get_param(struct pipe_screen *screen, enum pipe_cap 
param)
   return 1;
 
/* MSAA support
-* If user has explicitly set max_sample_count = 0 (via SWR_MSAA_MAX_COUNT)
-* then disable all MSAA support and go back to old caps. */
+* If user has explicitly set max_sample_count = 1 (via SWR_MSAA_MAX_COUNT)
+* then disable all MSAA support and go back to old (FAKE_SW_MSAA) caps. */
case PIPE_CAP_TEXTURE_MULTISAMPLE:
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
-  return swr_screen(screen)->msaa_max_count ? 1 : 0;
+  return (swr_screen(screen)->msaa_max_count > 1) ? 1 : 0;
case PIPE_CAP_FAKE_SW_MSAA:
-  return swr_screen(screen)->msaa_max_count ? 0 : 1;
+  return (swr_screen(screen)->msaa_max_count > 1) ? 0 : 1;
 
   /* unsupported features */
case PIPE_CAP_ANISOTROPIC_FILTER:
@@ -1079,22 +1079,22 @@ swr_validate_env_options(struct swr_screen *screen)
   screen->client_copy_limit = client_copy_limit;
 
/* XXX msaa under development, disable by default for now */
-   screen->msaa_max_count = 0; /* was SWR_MAX_NUM_MULTISAMPLES; */
+   screen->msaa_max_count = 1; /* was SWR_MAX_NUM_MULTISAMPLES; */
 
/* validate env override values, within range and power of 2 */
-   int msaa_max_count = debug_get_num_option("SWR_MSAA_MAX_COUNT", 0);
-   if (msaa_max_count) {
-  if ((msaa_max_count < 0) || (msaa_max_count > SWR_MAX_NUM_MULTISAMPLES)
+   int msaa_max_count = debug_get_num_option("SWR_MSAA_MAX_COUNT", 1);
+   if (msaa_max_count != 1) {
+  if ((msaa_max_count < 1) || (msaa_max_count > SWR_MAX_NUM_MULTISAMPLES)
 || !util_is_power_of_two(msaa_max_count)) {
  fprintf(stderr, "SWR_MSAA_MAX_COUNT invalid: %d\n", msaa_max_count);
  fprintf(stderr, "must be power of 2 between 1 and %d" \
- " (or 0 to disable msaa)\n",
+ " (or 1 to disable msaa)\n",
SWR_MAX_NUM_MULTISAMPLES);
- msaa_max_count = 0;
+ msaa_max_count = 1;
   }
 
   fprintf(stderr, "SWR_MSAA_MAX_COUNT: %d\n", msaa_max_count);
-  if (!msaa_max_count)
+  if (msaa_max_count == 1)
  fprintf(stderr, "(msaa disabled)\n");
 
   screen->msaa_max_count = msaa_max_count;
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] st/mesa: add osmesa framebuffer iface hash table per st manager

2017-08-02 Thread Bruce Cherniak
Commit bbc29393d3 didn't include osmesa state_tracker.  This patch adds
necessary initialization.

Fixes crash in OSMesa initialization.

Created-by: Charmaine Lee 
Tested-by: Bruce Cherniak 

Cc: Charmaine Lee 
Cc: 17.2 
---
 src/gallium/state_trackers/osmesa/osmesa.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/src/gallium/state_trackers/osmesa/osmesa.c 
b/src/gallium/state_trackers/osmesa/osmesa.c
index 18f1b88128..751d255c54 100644
--- a/src/gallium/state_trackers/osmesa/osmesa.c
+++ b/src/gallium/state_trackers/osmesa/osmesa.c
@@ -439,6 +439,7 @@ osmesa_st_framebuffer_validate(struct st_context_iface 
*stctx,
return TRUE;
 }
 
+static uint32_t osmesa_fb_ID = 0;
 
 static struct st_framebuffer_iface *
 osmesa_create_st_framebuffer(void)
@@ -448,6 +449,8 @@ osmesa_create_st_framebuffer(void)
   stfbi->flush_front = osmesa_st_framebuffer_flush_front;
   stfbi->validate = osmesa_st_framebuffer_validate;
   p_atomic_set(&stfbi->stamp, 1);
+  stfbi->ID = p_atomic_inc_return(&osmesa_fb_ID);
+  stfbi->state_manager = get_st_manager();
}
return stfbi;
 }
@@ -508,6 +511,14 @@ osmesa_find_buffer(enum pipe_format color_format,
 static void
 osmesa_destroy_buffer(struct osmesa_buffer *osbuffer)
 {
+   struct st_api *stapi = get_st_api();
+
+   /*
+* Notify the state manager that the associated framebuffer interface
+* is no longer valid.
+*/
+   stapi->destroy_drawable(stapi, osbuffer->stfb);
+
FREE(osbuffer->stfb);
FREE(osbuffer);
 }
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] st/mesa: add osmesa framebuffer iface hash table per st manager

2017-08-02 Thread Bruce Cherniak
Commit bbc29393d3 didn't include osmesa state_tracker.  This patch adds
necessary initialization.

Fixes crash in OSMesa initialization.

Created-by: Charmaine Lee 
Tested-by: Bruce Cherniak 

Cc: 17.2 
---
 src/gallium/state_trackers/osmesa/osmesa.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/src/gallium/state_trackers/osmesa/osmesa.c 
b/src/gallium/state_trackers/osmesa/osmesa.c
index 18f1b88128..751d255c54 100644
--- a/src/gallium/state_trackers/osmesa/osmesa.c
+++ b/src/gallium/state_trackers/osmesa/osmesa.c
@@ -439,6 +439,7 @@ osmesa_st_framebuffer_validate(struct st_context_iface 
*stctx,
return TRUE;
 }
 
+static uint32_t osmesa_fb_ID = 0;
 
 static struct st_framebuffer_iface *
 osmesa_create_st_framebuffer(void)
@@ -448,6 +449,8 @@ osmesa_create_st_framebuffer(void)
   stfbi->flush_front = osmesa_st_framebuffer_flush_front;
   stfbi->validate = osmesa_st_framebuffer_validate;
   p_atomic_set(&stfbi->stamp, 1);
+  stfbi->ID = p_atomic_inc_return(&osmesa_fb_ID);
+  stfbi->state_manager = get_st_manager();
}
return stfbi;
 }
@@ -508,6 +511,14 @@ osmesa_find_buffer(enum pipe_format color_format,
 static void
 osmesa_destroy_buffer(struct osmesa_buffer *osbuffer)
 {
+   struct st_api *stapi = get_st_api();
+
+   /*
+* Notify the state manager that the associated framebuffer interface
+* is no longer valid.
+*/
+   stapi->destroy_drawable(stapi, osbuffer->stfb);
+
FREE(osbuffer->stfb);
FREE(osbuffer);
 }
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 3/3] swr: Add path to draw directly from client memory without copy.

2017-07-12 Thread Bruce Cherniak
If size of client memory copy is too large, don't copy. The draw will
access user-buffer directly and then block.  This is faster and more
efficient than queuing many large client draws.

Applications that still use large client arrays benefit from this.  VMD
is an example.

The threshold for this path defaults to 32KB.  This value can be
overridden by setting environment variable SWR_CLIENT_COPY_LIMIT.

v2: Use #define for default value, rather than hard-coded constant.
---
 src/gallium/drivers/swr/swr_context.h  |  1 +
 src/gallium/drivers/swr/swr_draw.cpp   |  9 +
 src/gallium/drivers/swr/swr_screen.cpp | 13 
 src/gallium/drivers/swr/swr_screen.h   |  2 ++
 src/gallium/drivers/swr/swr_state.cpp  | 37 --
 5 files changed, 51 insertions(+), 11 deletions(-)

diff --git a/src/gallium/drivers/swr/swr_context.h 
b/src/gallium/drivers/swr/swr_context.h
index 3ff4bf3e2f..ab3057af96 100644
--- a/src/gallium/drivers/swr/swr_context.h
+++ b/src/gallium/drivers/swr/swr_context.h
@@ -51,6 +51,7 @@
 #define SWR_NEW_FRAMEBUFFER (1 << 15)
 #define SWR_NEW_CLIP (1 << 16)
 #define SWR_NEW_SO (1 << 17)
+#define SWR_LARGE_CLIENT_DRAW (1<<18) // Indicates client draw will block
 
 namespace std
 {
diff --git a/src/gallium/drivers/swr/swr_draw.cpp 
b/src/gallium/drivers/swr/swr_draw.cpp
index f26b8e873c..cbd1558624 100644
--- a/src/gallium/drivers/swr/swr_draw.cpp
+++ b/src/gallium/drivers/swr/swr_draw.cpp
@@ -188,6 +188,15 @@ swr_draw_vbo(struct pipe_context *pipe, const struct 
pipe_draw_info *info)
info->instance_count,
info->start,
info->start_instance);
+
+   /* On large client-buffer draw, we used client buffer directly, without
+* copy.  Block until draw is finished.
+* VMD is an example application that benefits from this. */
+   if (ctx->dirty & SWR_LARGE_CLIENT_DRAW) {
+  struct swr_screen *screen = swr_screen(pipe->screen);
+  swr_fence_submit(ctx, screen->flush_fence);
+  swr_fence_finish(pipe->screen, NULL, screen->flush_fence, 0);
+   }
 }
 
 
diff --git a/src/gallium/drivers/swr/swr_screen.cpp 
b/src/gallium/drivers/swr/swr_screen.cpp
index 9b3897ce6b..3c183629c2 100644
--- a/src/gallium/drivers/swr/swr_screen.cpp
+++ b/src/gallium/drivers/swr/swr_screen.cpp
@@ -61,6 +61,9 @@
 #define SWR_MAX_TEXTURE_CUBE_LEVELS 14  /* 8K x 8K for now */
 #define SWR_MAX_TEXTURE_ARRAY_LAYERS 512 /* 8K x 512 / 8K x 8K x 512 */
 
+/* Default max client_copy_limit */
+#define SWR_CLIENT_COPY_LIMIT 32768
+
 /* Flag indicates creation of alternate surface, to prevent recursive loop
  * in resource creation when msaa_force_enable is set. */
 #define SWR_RESOURCE_FLAG_ALT_SURFACE (PIPE_RESOURCE_FLAG_DRV_PRIV << 0)
@@ -1066,6 +1069,16 @@ swr_destroy_screen(struct pipe_screen *p_screen)
 static void
 swr_validate_env_options(struct swr_screen *screen)
 {
+   /* The client_copy_limit sets a maximum on the amount of user-buffer memory
+* copied to scratch space on a draw.  Past this, the draw will access
+* user-buffer directly and then block.  This is faster than queuing many
+* large client draws. */
+   screen->client_copy_limit = SWR_CLIENT_COPY_LIMIT;
+   int client_copy_limit =
+  debug_get_num_option("SWR_CLIENT_COPY_LIMIT", SWR_CLIENT_COPY_LIMIT);
+   if (client_copy_limit > 0)
+  screen->client_copy_limit = client_copy_limit;
+
/* XXX msaa under development, disable by default for now */
screen->msaa_max_count = 0; /* was SWR_MAX_NUM_MULTISAMPLES; */
 
diff --git a/src/gallium/drivers/swr/swr_screen.h 
b/src/gallium/drivers/swr/swr_screen.h
index dc1bb47f02..6d6d1cb87d 100644
--- a/src/gallium/drivers/swr/swr_screen.h
+++ b/src/gallium/drivers/swr/swr_screen.h
@@ -43,8 +43,10 @@ struct swr_screen {
 
struct sw_winsys *winsys;
 
+   /* Configurable environment settings */
boolean msaa_force_enable;
uint8_t msaa_max_count;
+   uint32_t client_copy_limit;
 
HANDLE hJitMgr;
 };
diff --git a/src/gallium/drivers/swr/swr_state.cpp 
b/src/gallium/drivers/swr/swr_state.cpp
index 45c9c213e5..6c406a37ec 100644
--- a/src/gallium/drivers/swr/swr_state.cpp
+++ b/src/gallium/drivers/swr/swr_state.cpp
@@ -1267,12 +1267,20 @@ swr_update_derived(struct pipe_context *pipe,
 partial_inbounds = 0;
 min_vertex_index = info.min_index;
 
-/* Copy only needed vertices to scratch space */
 size = AlignUp(size, 4);
-const void *ptr = (const uint8_t *) vb->buffer.user + base;
-ptr = (uint8_t *)swr_copy_to_scratch_space(
-   ctx, &ctx->scratch->vertex_buffer, ptr, size);
-p_data = (const uint8_t *)ptr - base;
+/* If size of client memory copy is too large, don't copy. The
+ * draw will access user-buffer directly and then block.  This is
+ * faster than queuing many large client draws. */
+if (size >= screen->client_copy_limit) {
+

[Mesa-dev] [PATCH v2 1/3] swr: Remove hard-coded constant and "todo" comment.

2017-07-12 Thread Bruce Cherniak
Removed the hard-coded constant in favor of a #define.  Also removed
TODO comment.  The constant value doesn't need an environment
configurable option.
---
 src/gallium/drivers/swr/swr_scratch.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/swr/swr_scratch.cpp 
b/src/gallium/drivers/swr/swr_scratch.cpp
index db095dea7e..ea49bbefba 100644
--- a/src/gallium/drivers/swr/swr_scratch.cpp
+++ b/src/gallium/drivers/swr/swr_scratch.cpp
@@ -28,6 +28,7 @@
 #include "swr_fence_work.h"
 #include "api.h"
 
+#define SCRATCH_SINGLE_ALLOCATION_LIMIT 2048
 
 void *
 swr_copy_to_scratch_space(struct swr_context *ctx,
@@ -39,7 +40,7 @@ swr_copy_to_scratch_space(struct swr_context *ctx,
assert(space);
assert(size);
 
-   if (size >= 2048) { /* XXX TODO create KNOB_ for this */
+   if (size >= SCRATCH_SINGLE_ALLOCATION_LIMIT) {
   /* Use per draw SwrAllocDrawContextMemory for larger copies */
   ptr = SwrAllocDrawContextMemory(ctx->swrContext, size, 4);
} else {
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 2/3] swr: Move environment config options into separate function.

2017-07-12 Thread Bruce Cherniak
Moved reading of environment config options out of
swr_create_screen_internal, into a separate swr_validate_env_options.
This is to keep from cluttering create_screen.
---
 src/gallium/drivers/swr/swr_screen.cpp | 60 +++---
 1 file changed, 34 insertions(+), 26 deletions(-)

diff --git a/src/gallium/drivers/swr/swr_screen.cpp 
b/src/gallium/drivers/swr/swr_screen.cpp
index 53b5dadec9..9b3897ce6b 100644
--- a/src/gallium/drivers/swr/swr_screen.cpp
+++ b/src/gallium/drivers/swr/swr_screen.cpp
@@ -1062,6 +1062,39 @@ swr_destroy_screen(struct pipe_screen *p_screen)
FREE(screen);
 }
 
+
+static void
+swr_validate_env_options(struct swr_screen *screen)
+{
+   /* XXX msaa under development, disable by default for now */
+   screen->msaa_max_count = 0; /* was SWR_MAX_NUM_MULTISAMPLES; */
+
+   /* validate env override values, within range and power of 2 */
+   int msaa_max_count = debug_get_num_option("SWR_MSAA_MAX_COUNT", 0);
+   if (msaa_max_count) {
+  if ((msaa_max_count < 0) || (msaa_max_count > SWR_MAX_NUM_MULTISAMPLES)
+|| !util_is_power_of_two(msaa_max_count)) {
+ fprintf(stderr, "SWR_MSAA_MAX_COUNT invalid: %d\n", msaa_max_count);
+ fprintf(stderr, "must be power of 2 between 1 and %d" \
+ " (or 0 to disable msaa)\n",
+   SWR_MAX_NUM_MULTISAMPLES);
+ msaa_max_count = 0;
+  }
+
+  fprintf(stderr, "SWR_MSAA_MAX_COUNT: %d\n", msaa_max_count);
+  if (!msaa_max_count)
+ fprintf(stderr, "(msaa disabled)\n");
+
+  screen->msaa_max_count = msaa_max_count;
+   }
+
+   screen->msaa_force_enable = debug_get_bool_option(
+ "SWR_MSAA_FORCE_ENABLE", false);
+   if (screen->msaa_force_enable)
+  fprintf(stderr, "SWR_MSAA_FORCE_ENABLE: true\n");
+}
+
+
 PUBLIC
 struct pipe_screen *
 swr_create_screen_internal(struct sw_winsys *winsys)
@@ -1099,32 +1132,7 @@ swr_create_screen_internal(struct sw_winsys *winsys)
 
util_format_s3tc_init();
 
-   /* XXX msaa under development, disable by default for now */
-   screen->msaa_max_count = 0; /* was SWR_MAX_NUM_MULTISAMPLES; */
-
-   /* validate env override values, within range and power of 2 */
-   int msaa_max_count = debug_get_num_option("SWR_MSAA_MAX_COUNT", 0);
-   if (msaa_max_count) {
-  if ((msaa_max_count < 0) || (msaa_max_count > SWR_MAX_NUM_MULTISAMPLES)
-|| !util_is_power_of_two(msaa_max_count)) {
- fprintf(stderr, "SWR_MSAA_MAX_COUNT invalid: %d\n", msaa_max_count);
- fprintf(stderr, "must be power of 2 between 1 and %d" \
- " (or 0 to disable msaa)\n",
-   SWR_MAX_NUM_MULTISAMPLES);
- msaa_max_count = 0;
-  }
-
-  fprintf(stderr, "SWR_MSAA_MAX_COUNT: %d\n", msaa_max_count);
-  if (!msaa_max_count)
- fprintf(stderr, "(msaa disabled)\n");
-
-  screen->msaa_max_count = msaa_max_count;
-   }
-
-   screen->msaa_force_enable = debug_get_bool_option(
- "SWR_MSAA_FORCE_ENABLE", false);
-   if (screen->msaa_force_enable)
-  fprintf(stderr, "SWR_MSAA_FORCE_ENABLE: true\n");
+   swr_validate_env_options(screen);
 
return &screen->base;
 }
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 0/3] swr: Optimize large draws from client arrays.

2017-07-12 Thread Bruce Cherniak
If size of client memory copy is too large, don't copy. The draw will
access user-buffer directly and then block.  This is faster and more
efficient than queuing many large client draws.

Applications that use large draws from client arrays benefit from this.
VMD is an example.

The threshold for this path defaults to 32KB.  This value can be
overridden by setting environment variable SWR_CLIENT_COPY_LIMIT.

v2: Use #define for default value, rather than hard-coded constant.



Bruce Cherniak (3):
  swr: Remove hard-coded constant and "todo" comment.
  swr: Move environment config options into separate function.
  swr: Add path to draw directly from client memory without copy.

 src/gallium/drivers/swr/swr_context.h   |  1 +
 src/gallium/drivers/swr/swr_draw.cpp|  9 
 src/gallium/drivers/swr/swr_scratch.cpp |  3 +-
 src/gallium/drivers/swr/swr_screen.cpp  | 73 +
 src/gallium/drivers/swr/swr_screen.h|  2 +
 src/gallium/drivers/swr/swr_state.cpp   | 37 -
 6 files changed, 87 insertions(+), 38 deletions(-)

-- 
2.11.0
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] swr: Add path to draw directly from client memory without copy.

2017-07-11 Thread Bruce Cherniak
If size of client memory copy is too large, don't copy. The draw will
access user-buffer directly and then block.  This is faster and more
efficient than queuing many large client draws.

Applications that use large draws from client arrays benefit from this.
VMD is an example.

The threshold for this path defaults to 32KB.  This value can be
overridden by setting environment variable SWR_CLIENT_COPY_LIMIT.
---
 src/gallium/drivers/swr/swr_context.h  |  1 +
 src/gallium/drivers/swr/swr_draw.cpp   |  9 +
 src/gallium/drivers/swr/swr_screen.cpp | 10 +
 src/gallium/drivers/swr/swr_screen.h   |  2 ++
 src/gallium/drivers/swr/swr_state.cpp  | 37 --
 5 files changed, 48 insertions(+), 11 deletions(-)

diff --git a/src/gallium/drivers/swr/swr_context.h 
b/src/gallium/drivers/swr/swr_context.h
index 3ff4bf3e2f..ab3057af96 100644
--- a/src/gallium/drivers/swr/swr_context.h
+++ b/src/gallium/drivers/swr/swr_context.h
@@ -51,6 +51,7 @@
 #define SWR_NEW_FRAMEBUFFER (1 << 15)
 #define SWR_NEW_CLIP (1 << 16)
 #define SWR_NEW_SO (1 << 17)
+#define SWR_LARGE_CLIENT_DRAW (1<<18) // Indicates client draw will block
 
 namespace std
 {
diff --git a/src/gallium/drivers/swr/swr_draw.cpp 
b/src/gallium/drivers/swr/swr_draw.cpp
index f26b8e873c..cbd1558624 100644
--- a/src/gallium/drivers/swr/swr_draw.cpp
+++ b/src/gallium/drivers/swr/swr_draw.cpp
@@ -188,6 +188,15 @@ swr_draw_vbo(struct pipe_context *pipe, const struct 
pipe_draw_info *info)
info->instance_count,
info->start,
info->start_instance);
+
+   /* On large client-buffer draw, we used client buffer directly, without
+* copy.  Block until draw is finished.
+* VMD is an example application that benefits from this. */
+   if (ctx->dirty & SWR_LARGE_CLIENT_DRAW) {
+  struct swr_screen *screen = swr_screen(pipe->screen);
+  swr_fence_submit(ctx, screen->flush_fence);
+  swr_fence_finish(pipe->screen, NULL, screen->flush_fence, 0);
+   }
 }
 
 
diff --git a/src/gallium/drivers/swr/swr_screen.cpp 
b/src/gallium/drivers/swr/swr_screen.cpp
index 9b3897ce6b..8be09697e6 100644
--- a/src/gallium/drivers/swr/swr_screen.cpp
+++ b/src/gallium/drivers/swr/swr_screen.cpp
@@ -1066,6 +1066,16 @@ swr_destroy_screen(struct pipe_screen *p_screen)
 static void
 swr_validate_env_options(struct swr_screen *screen)
 {
+   /* The client_copy_limit sets a maximum on the amount of user-buffer memory
+* copied to scratch space on a draw.  Past this, the draw will access
+* user-buffer directly and then block.  This is faster than queuing many
+* large client draws. */
+   screen->client_copy_limit = 32768;
+   int client_copy_limit =
+  debug_get_num_option("SWR_CLIENT_COPY_LIMIT", 32768);
+   if (client_copy_limit > 0)
+  screen->client_copy_limit = client_copy_limit;
+
/* XXX msaa under development, disable by default for now */
screen->msaa_max_count = 0; /* was SWR_MAX_NUM_MULTISAMPLES; */
 
diff --git a/src/gallium/drivers/swr/swr_screen.h 
b/src/gallium/drivers/swr/swr_screen.h
index dc1bb47f02..6d6d1cb87d 100644
--- a/src/gallium/drivers/swr/swr_screen.h
+++ b/src/gallium/drivers/swr/swr_screen.h
@@ -43,8 +43,10 @@ struct swr_screen {
 
struct sw_winsys *winsys;
 
+   /* Configurable environment settings */
boolean msaa_force_enable;
uint8_t msaa_max_count;
+   uint32_t client_copy_limit;
 
HANDLE hJitMgr;
 };
diff --git a/src/gallium/drivers/swr/swr_state.cpp 
b/src/gallium/drivers/swr/swr_state.cpp
index 45c9c213e5..6c406a37ec 100644
--- a/src/gallium/drivers/swr/swr_state.cpp
+++ b/src/gallium/drivers/swr/swr_state.cpp
@@ -1267,12 +1267,20 @@ swr_update_derived(struct pipe_context *pipe,
 partial_inbounds = 0;
 min_vertex_index = info.min_index;
 
-/* Copy only needed vertices to scratch space */
 size = AlignUp(size, 4);
-const void *ptr = (const uint8_t *) vb->buffer.user + base;
-ptr = (uint8_t *)swr_copy_to_scratch_space(
-   ctx, &ctx->scratch->vertex_buffer, ptr, size);
-p_data = (const uint8_t *)ptr - base;
+/* If size of client memory copy is too large, don't copy. The
+ * draw will access user-buffer directly and then block.  This is
+ * faster than queuing many large client draws. */
+if (size >= screen->client_copy_limit) {
+   post_update_dirty_flags |= SWR_LARGE_CLIENT_DRAW;
+   p_data = (const uint8_t *) vb->buffer.user;
+} else {
+   /* Copy only needed vertices to scratch space */
+   const void *ptr = (const uint8_t *) vb->buffer.user + base;
+   ptr = (uint8_t *)swr_copy_to_scratch_space(
+ ctx, &ctx->scratch->vertex_buffer, ptr, size);
+   p_data = (const uint8_t *)ptr - base;
+}
  }
 
  swrVertexBuffers[i] = {0};
@@ -

[Mesa-dev] [PATCH 0/3] swr: Optimize large draws from client arrays.

2017-07-11 Thread Bruce Cherniak
If size of client memory copy is too large, don't copy. The draw will
access user-buffer directly and then block.  This is faster and more
efficient than queuing many large client draws.

Applications that use large draws from client arrays benefit from this.
VMD is an example.

The threshold for this path defaults to 32KB.  This value can be
overridden by setting environment variable SWR_CLIENT_COPY_LIMIT.

Bruce Cherniak (3):
  swr: Remove hard-coded constant and "todo" comment.
  swr: Move environment config options into separate function.
  swr: Add path to draw directly from client memory without copy.

 src/gallium/drivers/swr/swr_context.h   |  1 +
 src/gallium/drivers/swr/swr_draw.cpp|  9 +
 src/gallium/drivers/swr/swr_scratch.cpp |  3 +-
 src/gallium/drivers/swr/swr_screen.cpp  | 70 +
 src/gallium/drivers/swr/swr_screen.h|  2 +
 src/gallium/drivers/swr/swr_state.cpp   | 37 +++--
 6 files changed, 84 insertions(+), 38 deletions(-)

-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] swr: Move environment config options into separate function.

2017-07-11 Thread Bruce Cherniak
Moved reading of environment config options out of
swr_create_screen_internal, into a separate swr_validate_env_options.
This is to keep from cluttering create_screen.
---
 src/gallium/drivers/swr/swr_screen.cpp | 60 +++---
 1 file changed, 34 insertions(+), 26 deletions(-)

diff --git a/src/gallium/drivers/swr/swr_screen.cpp 
b/src/gallium/drivers/swr/swr_screen.cpp
index 53b5dadec9..9b3897ce6b 100644
--- a/src/gallium/drivers/swr/swr_screen.cpp
+++ b/src/gallium/drivers/swr/swr_screen.cpp
@@ -1062,6 +1062,39 @@ swr_destroy_screen(struct pipe_screen *p_screen)
FREE(screen);
 }
 
+
+static void
+swr_validate_env_options(struct swr_screen *screen)
+{
+   /* XXX msaa under development, disable by default for now */
+   screen->msaa_max_count = 0; /* was SWR_MAX_NUM_MULTISAMPLES; */
+
+   /* validate env override values, within range and power of 2 */
+   int msaa_max_count = debug_get_num_option("SWR_MSAA_MAX_COUNT", 0);
+   if (msaa_max_count) {
+  if ((msaa_max_count < 0) || (msaa_max_count > SWR_MAX_NUM_MULTISAMPLES)
+|| !util_is_power_of_two(msaa_max_count)) {
+ fprintf(stderr, "SWR_MSAA_MAX_COUNT invalid: %d\n", msaa_max_count);
+ fprintf(stderr, "must be power of 2 between 1 and %d" \
+ " (or 0 to disable msaa)\n",
+   SWR_MAX_NUM_MULTISAMPLES);
+ msaa_max_count = 0;
+  }
+
+  fprintf(stderr, "SWR_MSAA_MAX_COUNT: %d\n", msaa_max_count);
+  if (!msaa_max_count)
+ fprintf(stderr, "(msaa disabled)\n");
+
+  screen->msaa_max_count = msaa_max_count;
+   }
+
+   screen->msaa_force_enable = debug_get_bool_option(
+ "SWR_MSAA_FORCE_ENABLE", false);
+   if (screen->msaa_force_enable)
+  fprintf(stderr, "SWR_MSAA_FORCE_ENABLE: true\n");
+}
+
+
 PUBLIC
 struct pipe_screen *
 swr_create_screen_internal(struct sw_winsys *winsys)
@@ -1099,32 +1132,7 @@ swr_create_screen_internal(struct sw_winsys *winsys)
 
util_format_s3tc_init();
 
-   /* XXX msaa under development, disable by default for now */
-   screen->msaa_max_count = 0; /* was SWR_MAX_NUM_MULTISAMPLES; */
-
-   /* validate env override values, within range and power of 2 */
-   int msaa_max_count = debug_get_num_option("SWR_MSAA_MAX_COUNT", 0);
-   if (msaa_max_count) {
-  if ((msaa_max_count < 0) || (msaa_max_count > SWR_MAX_NUM_MULTISAMPLES)
-|| !util_is_power_of_two(msaa_max_count)) {
- fprintf(stderr, "SWR_MSAA_MAX_COUNT invalid: %d\n", msaa_max_count);
- fprintf(stderr, "must be power of 2 between 1 and %d" \
- " (or 0 to disable msaa)\n",
-   SWR_MAX_NUM_MULTISAMPLES);
- msaa_max_count = 0;
-  }
-
-  fprintf(stderr, "SWR_MSAA_MAX_COUNT: %d\n", msaa_max_count);
-  if (!msaa_max_count)
- fprintf(stderr, "(msaa disabled)\n");
-
-  screen->msaa_max_count = msaa_max_count;
-   }
-
-   screen->msaa_force_enable = debug_get_bool_option(
- "SWR_MSAA_FORCE_ENABLE", false);
-   if (screen->msaa_force_enable)
-  fprintf(stderr, "SWR_MSAA_FORCE_ENABLE: true\n");
+   swr_validate_env_options(screen);
 
return &screen->base;
 }
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/3] swr: Remove hard-coded constant and "todo" comment.

2017-07-11 Thread Bruce Cherniak
Removed the hard-coded constant in favor of a #define.  Also removed
TODO comment, the constant value doesn't need an environment
configurable option.
---
 src/gallium/drivers/swr/swr_scratch.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/swr/swr_scratch.cpp 
b/src/gallium/drivers/swr/swr_scratch.cpp
index db095dea7e..ea49bbefba 100644
--- a/src/gallium/drivers/swr/swr_scratch.cpp
+++ b/src/gallium/drivers/swr/swr_scratch.cpp
@@ -28,6 +28,7 @@
 #include "swr_fence_work.h"
 #include "api.h"
 
+#define SCRATCH_SINGLE_ALLOCATION_LIMIT 2048
 
 void *
 swr_copy_to_scratch_space(struct swr_context *ctx,
@@ -39,7 +40,7 @@ swr_copy_to_scratch_space(struct swr_context *ctx,
assert(space);
assert(size);
 
-   if (size >= 2048) { /* XXX TODO create KNOB_ for this */
+   if (size >= SCRATCH_SINGLE_ALLOCATION_LIMIT) {
   /* Use per draw SwrAllocDrawContextMemory for larger copies */
   ptr = SwrAllocDrawContextMemory(ctx->swrContext, size, 4);
} else {
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] swr: Limit memory held by defer deleted resources.

2017-06-30 Thread Bruce Cherniak
This patch limits the number of items on the fence work queue (the
deferred deletion list) by submitting a sync fence when the queue size
exceeds a threshold.  This initiates deferred deletion of all resources
on the list and decreases the total amount of memory held waiting for
"deferred deletion".

This resolves  bug 101467 filed against swr for the piglit
streaming-texture-leak test.  For those running on smaller memory
(16GB?) systems, this will prevent oom-killer.

Thus far, we have not seen any real world applications that exhibit
behavior like the streaming-texture-leak test; as any form of pipeline
flush will trigger the defer queue and properly free any retained
allocations.  But, this addresses those as well.

Cc: "17.1" 
---
 src/gallium/drivers/swr/swr_screen.cpp | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/gallium/drivers/swr/swr_screen.cpp 
b/src/gallium/drivers/swr/swr_screen.cpp
index a80ec2adba..16a314c28a 100644
--- a/src/gallium/drivers/swr/swr_screen.cpp
+++ b/src/gallium/drivers/swr/swr_screen.cpp
@@ -992,6 +992,12 @@ swr_resource_destroy(struct pipe_screen *p_screen, struct 
pipe_resource *pt)
   swr_fence_work_free(screen->flush_fence, spr->swr.pBaseAddress, true);
   swr_fence_work_free(screen->flush_fence,
   spr->secondary.pBaseAddress, true);
+
+  /* If work queue grows too large, submit a fence to force queue to
+   * drain.  This is mainly to decrease the amount of memory used by the
+   * piglit streaming-texture-leak test */
+  if (screen->pipe && swr_fence(screen->flush_fence)->work.count > 64)
+ swr_fence_submit(swr_context(screen->pipe), screen->flush_fence);
}
 
FREE(spr);
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] swr: Minor cleanup of variable usage, no functional change.

2017-06-29 Thread Bruce Cherniak
In swr_update_derived, for consistency, index buffer validation should
be using the p_draw_info copy "info" rather than referencing
p_draw_info.

No functional change.
---
 src/gallium/drivers/swr/swr_state.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/swr/swr_state.cpp 
b/src/gallium/drivers/swr/swr_state.cpp
index 7a8786d96f..03dc324afe 100644
--- a/src/gallium/drivers/swr/swr_state.cpp
+++ b/src/gallium/drivers/swr/swr_state.cpp
@@ -1293,7 +1293,7 @@ swr_update_derived(struct pipe_context *pipe,
  const uint8_t *p_data;
  uint32_t size, pitch;
 
- pitch = p_draw_info->index_size ? p_draw_info->index_size : 
sizeof(uint32_t);
+ pitch = info.index_size ? info.index_size : sizeof(uint32_t);
  index_type = swr_convert_index_type(pitch);
 
  if (!info.has_user_indices) {
@@ -1319,7 +1319,7 @@ swr_update_derived(struct pipe_context *pipe,
  }
 
  SWR_INDEX_BUFFER_STATE swrIndexBuffer;
- swrIndexBuffer.format = 
swr_convert_index_type(p_draw_info->index_size);
+ swrIndexBuffer.format = swr_convert_index_type(info.index_size);
  swrIndexBuffer.pIndices = p_data;
  swrIndexBuffer.size = size;
 
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] swr: Remove need to allocate vertex buffer scratch space all in one go.

2017-06-28 Thread Bruce Cherniak
Deferred deletion (via "fence_work") has obsoleted the need to allocate
all client vertex buffer scratch space in a single chunk.  Scratch
allocations are now valid until the referenced fence is complete.
---
 src/gallium/drivers/swr/swr_state.cpp | 25 ++---
 1 file changed, 2 insertions(+), 23 deletions(-)

diff --git a/src/gallium/drivers/swr/swr_state.cpp 
b/src/gallium/drivers/swr/swr_state.cpp
index 6dc06ed156..7a8786d96f 100644
--- a/src/gallium/drivers/swr/swr_state.cpp
+++ b/src/gallium/drivers/swr/swr_state.cpp
@@ -1219,32 +1219,12 @@ swr_update_derived(struct pipe_context *pipe,
 */
if (ctx->dirty & SWR_NEW_VERTEX ||
   (p_draw_info && p_draw_info->index_size)) {
-  uint32_t scratch_total;
-  uint8_t *scratch = NULL;
 
   /* If being called by swr_draw_vbo, copy draw details */
   struct pipe_draw_info info = {0};
   if (p_draw_info)
  info = *p_draw_info;
 
-  /* We must get all the scratch space in one go */
-  scratch_total = 0;
-  for (UINT i = 0; i < ctx->num_vertex_buffers; i++) {
- struct pipe_vertex_buffer *vb = &ctx->vertex_buffer[i];
-
- if (!vb->is_user_buffer)
-continue;
-
- uint32_t elems, base, size;
- swr_user_vbuf_range(&info, ctx->velems, vb, i, &elems, &base, &size);
- scratch_total += AlignUp(size, 4);
-  }
-
-  if (scratch_total) {
- scratch = (uint8_t *)swr_copy_to_scratch_space(
-   ctx, &ctx->scratch->vertex_buffer, NULL, scratch_total);
-  }
-
   /* vertex buffers */
   SWR_VERTEX_BUFFER_STATE swrVertexBuffers[PIPE_MAX_ATTRIBS];
   for (UINT i = 0; i < ctx->num_vertex_buffers; i++) {
@@ -1289,9 +1269,8 @@ swr_update_derived(struct pipe_context *pipe,
 /* Copy only needed vertices to scratch space */
 size = AlignUp(size, 4);
 const void *ptr = (const uint8_t *) vb->buffer.user + base;
-memcpy(scratch, ptr, size);
-ptr = scratch;
-scratch += size;
+ptr = (uint8_t *)swr_copy_to_scratch_space(
+   ctx, &ctx->scratch->vertex_buffer, ptr, size);
 p_data = (const uint8_t *)ptr - base;
  }
 
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] swr: conditionally validate vertex buffer state

2017-06-27 Thread Bruce Cherniak
Vertex buffer state doesn't need to be validated on every call,
only on dirty _NEW_VERTEX or indexed draws.

Unconditional validation was introduced as part of patch 330d0607ed6,
"remove pipe_index_buffer and set_index_buffer", with the expectation
we'd optimize later.
---
 src/gallium/drivers/swr/swr_state.cpp | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/swr/swr_state.cpp 
b/src/gallium/drivers/swr/swr_state.cpp
index f65e642753..6dc06ed156 100644
--- a/src/gallium/drivers/swr/swr_state.cpp
+++ b/src/gallium/drivers/swr/swr_state.cpp
@@ -1212,12 +1212,13 @@ swr_update_derived(struct pipe_context *pipe,
   SwrSetViewports(ctx->swrContext, 1, vp, vpm);
}
 
-   /* Set vertex & index buffers */
-   /* (using draw info if called by swr_draw_vbo) */
-   /* TODO: This is always true, because the index buffer comes from
+   /* Set vertex & index buffers
+* (using draw info if called by swr_draw_vbo)
+* If indexed draw, revalidate since index buffer comes from
 * pipe_draw_info.
 */
-   if (1 || ctx->dirty & SWR_NEW_VERTEX) {
+   if (ctx->dirty & SWR_NEW_VERTEX ||
+  (p_draw_info && p_draw_info->index_size)) {
   uint32_t scratch_total;
   uint8_t *scratch = NULL;
 
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] swr: set an explicit clear_rect if scissor is not enabled.

2017-06-26 Thread Bruce Cherniak
Fix regression of "no rendering" on simple apps like glxgears by
setting an explicit full surface clear_rect when scissor is not
enabled.

This regressed with commit 00173d91 "st/mesa: don't set 16
scissors and 16 viewports if they're unused" due to an assumption
that a default scissor rect is always set, which was the case prior
to this optimization.
---
 src/gallium/drivers/swr/swr_clear.cpp | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/swr/swr_clear.cpp 
b/src/gallium/drivers/swr/swr_clear.cpp
index 53f4e02d45..3a35805a7a 100644
--- a/src/gallium/drivers/swr/swr_clear.cpp
+++ b/src/gallium/drivers/swr/swr_clear.cpp
@@ -68,11 +68,19 @@ swr_clear(struct pipe_context *pipe,
((union pipe_color_union *)color)->f[3] = 1.0; /* cast off your 
const'd-ness */
 #endif
 
+   SWR_RECT clear_rect;
+   /* If enabled, clear to scissor; otherwise clear full surface */
+   if (ctx->rasterizer && ctx->rasterizer->scissor) {
+  clear_rect = ctx->swr_scissor;
+   } else {
+  clear_rect = {0, 0, (int32_t)fb->width, (int32_t)fb->height};
+   }
+
for (unsigned i = 0; i < layers; ++i) {
   swr_update_draw_context(ctx);
   SwrClearRenderTarget(ctx->swrContext, clearMask, i,
color->f, depth, stencil,
-   ctx->swr_scissor);
+   clear_rect);
 
   // Mask out the attachments that are out of layers.
   if (fb->zsbuf &&
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2] swr: Don't crash when encountering a VBO with stride = 0.

2017-06-15 Thread Bruce Cherniak
The swr driver uses vertex_buffer->stride to determine the number
of elements in a VBO. A recent change to the state-tracker made it
possible for VBO's with stride=0. This resulted in a divide by zero
crash in the driver. The solution is to use the pre-calculated vertex
element stream_pitch in this case.

This patch fixes the crash in a number of piglit and VTK tests introduced
by 17f776c27be266f2.

There are several VTK tests that still crash and need proper handling of
vertex_buffer_index.  This will come in a follow-on patch.

v2: Correctly update all parameters for VBO constants (stride = 0).
Also fixes the remaining crashes/regressions that v1 did
not address, without touching vertex_buffer_index.
---
 src/gallium/drivers/swr/swr_state.cpp | 25 ++---
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/swr/swr_state.cpp 
b/src/gallium/drivers/swr/swr_state.cpp
index 08549e51a1..316872581d 100644
--- a/src/gallium/drivers/swr/swr_state.cpp
+++ b/src/gallium/drivers/swr/swr_state.cpp
@@ -1247,13 +1247,24 @@ swr_update_derived(struct pipe_context *pipe,
 
  pitch = vb->stride;
  if (!vb->is_user_buffer) {
-/* VBO
- * size is based on buffer->width0 rather than info.max_index
- * to prevent having to validate VBO on each draw */
-size = vb->buffer.resource->width0;
-elems = size / pitch;
-partial_inbounds = size % pitch;
-min_vertex_index = 0;
+/* VBO */
+if (!pitch) {
+   /* If pitch=0 (ie vb->stride), buffer contains a single
+* constant attribute.  Use the stream_pitch which was
+* calculated during creation of vertex_elements_state for the
+* size of the attribute. */
+   size = ctx->velems->stream_pitch[i];
+   elems = 1;
+   partial_inbounds = 0;
+   min_vertex_index = 0;
+} else {
+   /* size is based on buffer->width0 rather than info.max_index
+* to prevent having to validate VBO on each draw. */
+   size = vb->buffer.resource->width0;
+   elems = size / pitch;
+   partial_inbounds = size % pitch;
+   min_vertex_index = 0;
+}
 
 p_data = swr_resource_data(vb->buffer.resource) + 
vb->buffer_offset;
  } else {
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] swr: Don't crash when encountering a VBO with stride = 0.

2017-06-13 Thread Bruce Cherniak
The swr driver uses vertex_buffer->stride to determinine the number
of elements in a VBO. A recent change to the state-tracker made it
possible for VBO's with stride=0. This resulted in a divide by zero
crash in the driver. The solution is to use the pre-calculated vertex
element stream_pitch in this case.

This patch fixes the crash in a number of piglit and VTK tests introduced
by 17f776c27be266f2.

There are several VTK tests that still crash and need proper handling of
vertex_buffer_index.  This will come in a follow-on patch.
---
 src/gallium/drivers/swr/swr_state.cpp | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/swr/swr_state.cpp 
b/src/gallium/drivers/swr/swr_state.cpp
index 08549e51a1..0641699c4b 100644
--- a/src/gallium/drivers/swr/swr_state.cpp
+++ b/src/gallium/drivers/swr/swr_state.cpp
@@ -1249,7 +1249,12 @@ swr_update_derived(struct pipe_context *pipe,
  if (!vb->is_user_buffer) {
 /* VBO
  * size is based on buffer->width0 rather than info.max_index
- * to prevent having to validate VBO on each draw */
+ * to prevent having to validate VBO on each draw.
+ *
+ * If pitch=0, use the stream_pitch which is calculated during
+ * creation of vertex_elements_state. */
+if (!pitch)
+   pitch = ctx->velems->stream_pitch[i];
 size = vb->buffer.resource->width0;
 elems = size / pitch;
 partial_inbounds = size % pitch;
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3] swr: move msaa resolve to generalized StoreTile

2017-05-04 Thread Bruce Cherniak
v3: list piglit tests fixed by this patch. Fixed typo Tim pointed out.
v2: Reword commit message to more closely adhere to community
guidelines.

This patch moves msaa resolve down into core/StoreTiles where the
surface format conversion routines are available.  The previous
"experimental" resolve was limited to 8-bit unsigned render targets.

This fixes a number of piglit msaa tests by adding resolve support for
all the render target formats we support.

Specifically:
layered-rendering/gl-layer-render: fail->pass
layered-rendering/gl-layer-render-storage: fail->pass
multisample-formats *[2,4,8,16] gl_arb_texture_rg: crash->pass
multisample-formats *[2,4,8,16] gl_ext_texture_snorm: crash->pass
multisample-formats *[2,4,8,16] gl_arb_texture_float: fail->pass
multisample-formats *[2,4,8,16] gl_arb_texture_rg-float: fail->pass

MSAA is still disabled by default, but can be enabled with
"export SWR_MSAA_MAX_COUNT=4" (1,2,4,8,16 are options)
The default is 0, which is disabled.

This patch improves the number of multisample-formats supported by swr,
and fixes several crashes currently in the 17.1 branch.  Therefore, it
should be considered for inclusion in the 17.1 stable release.  Being
disabled by default, it poses no risk to most users of swr.

cc: mesa-sta...@lists.freedesktop.org
---
 .../drivers/swr/rasterizer/memory/StoreTile.h  | 75 +
 src/gallium/drivers/swr/swr_context.cpp| 77 +-
 src/gallium/drivers/swr/swr_screen.cpp | 10 +--
 3 files changed, 82 insertions(+), 80 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/memory/StoreTile.h 
b/src/gallium/drivers/swr/rasterizer/memory/StoreTile.h
index ffde574c03..12a5f3d8ce 100644
--- a/src/gallium/drivers/swr/rasterizer/memory/StoreTile.h
+++ b/src/gallium/drivers/swr/rasterizer/memory/StoreTile.h
@@ -1133,6 +1133,64 @@ struct StoreRasterTile
 }
 }
 }
+
+//
+/// @brief Resolves an 8x8 raster tile to the resolve destination surface.
+/// @param pSrc - Pointer to raster tile.
+/// @param pDstSurface - Destination surface state
+/// @param x, y - Coordinates to raster tile.
+/// @param sampleOffset - Offset between adjacent multisamples
+INLINE static void Resolve(
+uint8_t *pSrc,
+SWR_SURFACE_STATE* pDstSurface,
+uint32_t x, uint32_t y, uint32_t sampleOffset, uint32_t 
renderTargetArrayIndex) // (x, y) pixel coordinate to start of raster tile.
+{
+uint32_t lodWidth = std::max(pDstSurface->width >> pDstSurface->lod, 
1U);
+uint32_t lodHeight = std::max(pDstSurface->height >> pDstSurface->lod, 
1U);
+
+float oneOverNumSamples = 1.0f / pDstSurface->numSamples;
+
+// For each raster tile pixel (rx, ry)
+for (uint32_t ry = 0; ry < KNOB_TILE_Y_DIM; ++ry)
+{
+for (uint32_t rx = 0; rx < KNOB_TILE_X_DIM; ++rx)
+{
+// Perform bounds checking.
+if (((x + rx) < lodWidth) &&
+((y + ry) < lodHeight))
+{
+// Sum across samples
+float resolveColor[4] = {0};
+for (uint32_t sampleNum = 0; sampleNum < 
pDstSurface->numSamples; sampleNum++)
+{
+float sampleColor[4] = {0};
+uint8_t *pSampleSrc = pSrc + sampleOffset * sampleNum;
+GetSwizzledSrcColor(pSampleSrc, rx, ry, sampleColor);
+resolveColor[0] += sampleColor[0];
+resolveColor[1] += sampleColor[1];
+resolveColor[2] += sampleColor[2];
+resolveColor[3] += sampleColor[3];
+}
+
+// Divide by numSamples to average
+resolveColor[0] *= oneOverNumSamples;
+resolveColor[1] *= oneOverNumSamples;
+resolveColor[2] *= oneOverNumSamples;
+resolveColor[3] *= oneOverNumSamples;
+
+// Use the resolve surface state
+SWR_SURFACE_STATE* pResolveSurface = 
(SWR_SURFACE_STATE*)pDstSurface->pAuxBaseAddress;
+uint8_t *pDst = (uint8_t*)ComputeSurfaceAddress((x + rx), (y + ry),
+pResolveSurface->arrayIndex + renderTargetArrayIndex, 
pResolveSurface->arrayIndex + renderTargetArrayIndex,
+0, pResolveSurface->lod, pResolveSurface);
+{
+ConvertPixelFromFloat(pDst, resolveColor);
+}
+}
+}
+}
+}
+
 };
 
 template
@@ -2316,6 +2374,9 @@ struct StoreMacroTile
 pfnStore[sampleNum] = (bForceGeneric || 
KNOB_USE_GENERIC_STORETILE) ? StoreRasterTile::Store : OptStoreRasterTile::Store;
 }
 
+// Save

[Mesa-dev] [PATCH v2] swr: move msaa resolve to generalized StoreTile

2017-04-27 Thread Bruce Cherniak
v2: Reword commit message to more closely adhere to community
guidelines.

This patch moves msaa resolve down into core/StoreTiles where the
surface format conversion routines are available.  The previous
"experimental" resolve was limited to 8-bit unsigned render targets.

This fixes a number of piglit msaa tests by adding resolve support for
all the render target formats we support.

MSAA is still disabled by default, but can be enabled with
"export SWR_MSAA_MAX_COUNT=4" (1,2,4,8,16 are options)
The default is 0, which is disabled.

Because it fixes a number of piglit tests, I kindly request inclusion
into 17.1 stable.

cc: mesa-sta...@lists.freedesktop.org
---
 .../drivers/swr/rasterizer/memory/StoreTile.h  | 75 +
 src/gallium/drivers/swr/swr_context.cpp| 77 +-
 src/gallium/drivers/swr/swr_screen.cpp | 10 +--
 3 files changed, 82 insertions(+), 80 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/memory/StoreTile.h 
b/src/gallium/drivers/swr/rasterizer/memory/StoreTile.h
index ffde574c03..12a5f3d8ce 100644
--- a/src/gallium/drivers/swr/rasterizer/memory/StoreTile.h
+++ b/src/gallium/drivers/swr/rasterizer/memory/StoreTile.h
@@ -1133,6 +1133,64 @@ struct StoreRasterTile
 }
 }
 }
+
+//
+/// @brief Resolves an 8x8 raster tile to the resolve destination surface.
+/// @param pSrc - Pointer to raster tile.
+/// @param pDstSurface - Destination surface state
+/// @param x, y - Coordinates to raster tile.
+/// @param sampleOffset - Offset between adjacent multisamples
+INLINE static void Resolve(
+uint8_t *pSrc,
+SWR_SURFACE_STATE* pDstSurface,
+uint32_t x, uint32_t y, uint32_t sampleOffset, uint32_t 
renderTargetArrayIndex) // (x, y) pixel coordinate to start of raster tile.
+{
+uint32_t lodWidth = std::max(pDstSurface->width >> pDstSurface->lod, 
1U);
+uint32_t lodHeight = std::max(pDstSurface->height >> pDstSurface->lod, 
1U);
+
+float oneOverNumSamples = 1.0f / pDstSurface->numSamples;
+
+// For each raster tile pixel (rx, ry)
+for (uint32_t ry = 0; ry < KNOB_TILE_Y_DIM; ++ry)
+{
+for (uint32_t rx = 0; rx < KNOB_TILE_X_DIM; ++rx)
+{
+// Perform bounds checking.
+if (((x + rx) < lodWidth) &&
+((y + ry) < lodHeight))
+{
+// Sum across samples
+float resolveColor[4] = {0};
+for (uint32_t sampleNum = 0; sampleNum < 
pDstSurface->numSamples; sampleNum++)
+{
+float sampleColor[4] = {0};
+uint8_t *pSampleSrc = pSrc + sampleOffset * sampleNum;
+GetSwizzledSrcColor(pSampleSrc, rx, ry, sampleColor);
+resolveColor[0] += sampleColor[0];
+resolveColor[1] += sampleColor[1];
+resolveColor[2] += sampleColor[2];
+resolveColor[3] += sampleColor[3];
+}
+
+// Divide by numSamples to average
+resolveColor[0] *= oneOverNumSamples;
+resolveColor[1] *= oneOverNumSamples;
+resolveColor[2] *= oneOverNumSamples;
+resolveColor[3] *= oneOverNumSamples;
+
+// Use the resolve surface state
+SWR_SURFACE_STATE* pResolveSurface = 
(SWR_SURFACE_STATE*)pDstSurface->pAuxBaseAddress;
+uint8_t *pDst = (uint8_t*)ComputeSurfaceAddress((x + rx), (y + ry),
+pResolveSurface->arrayIndex + renderTargetArrayIndex, 
pResolveSurface->arrayIndex + renderTargetArrayIndex,
+0, pResolveSurface->lod, pResolveSurface);
+{
+ConvertPixelFromFloat(pDst, resolveColor);
+}
+}
+}
+}
+}
+
 };
 
 template
@@ -2316,6 +2374,9 @@ struct StoreMacroTile
 pfnStore[sampleNum] = (bForceGeneric || 
KNOB_USE_GENERIC_STORETILE) ? StoreRasterTile::Store : OptStoreRasterTile::Store;
 }
 
+// Save original for pSrcHotTile resolve.
+uint8_t *pResolveSrcHotTile = pSrcHotTile;
+
 // Store each raster tile from the hot tile to the destination surface.
 for(uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += 
KNOB_TILE_Y_DIM)
 {
@@ -2328,6 +2389,20 @@ struct StoreMacroTile
 }
 }
 }
+
+if (pDstSurface->pAuxBaseAddress)
+{
+uint32_t sampleOffset = KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * 
(FormatTraits::bpp / 8);
+// Store each raster tile from the hot tile to the destination 
surface.
+for(uint32_t row = 0; ro

[Mesa-dev] [PATCH] swr: MSAA fixes: piglit crashes, additional formats, improve perf.

2017-04-26 Thread Bruce Cherniak
This patch moves msaa resolve down into core/StoreTiles where it can
take advantage of all the surface formats - previous resolve
was limited to 8-bit unsigned.  This fixes a number of piglit msaa
tests that were crashing.  MSAA performance is also greatly improved
because resolve is done in parallel.

MSAA is still disabled by default, but can be enabled with
"export SWR_MSAA_MAX_COUNT=4" (1,2,4,8,16 are options)
The default is 0, which is disabled.

Because it fixes piglit crashes, this should be included in 17.1 stable.

cc: mesa-sta...@lists.freedesktop.org

---
 .../drivers/swr/rasterizer/memory/StoreTile.h  | 75 +
 src/gallium/drivers/swr/swr_context.cpp| 77 +-
 src/gallium/drivers/swr/swr_screen.cpp | 10 +--
 3 files changed, 82 insertions(+), 80 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/memory/StoreTile.h 
b/src/gallium/drivers/swr/rasterizer/memory/StoreTile.h
index ffde574c03..12a5f3d8ce 100644
--- a/src/gallium/drivers/swr/rasterizer/memory/StoreTile.h
+++ b/src/gallium/drivers/swr/rasterizer/memory/StoreTile.h
@@ -1133,6 +1133,64 @@ struct StoreRasterTile
 }
 }
 }
+
+//
+/// @brief Resolves an 8x8 raster tile to the resolve destination surface.
+/// @param pSrc - Pointer to raster tile.
+/// @param pDstSurface - Destination surface state
+/// @param x, y - Coordinates to raster tile.
+/// @param sampleOffset - Offset between adjacent multisamples
+INLINE static void Resolve(
+uint8_t *pSrc,
+SWR_SURFACE_STATE* pDstSurface,
+uint32_t x, uint32_t y, uint32_t sampleOffset, uint32_t 
renderTargetArrayIndex) // (x, y) pixel coordinate to start of raster tile.
+{
+uint32_t lodWidth = std::max(pDstSurface->width >> pDstSurface->lod, 
1U);
+uint32_t lodHeight = std::max(pDstSurface->height >> pDstSurface->lod, 
1U);
+
+float oneOverNumSamples = 1.0f / pDstSurface->numSamples;
+
+// For each raster tile pixel (rx, ry)
+for (uint32_t ry = 0; ry < KNOB_TILE_Y_DIM; ++ry)
+{
+for (uint32_t rx = 0; rx < KNOB_TILE_X_DIM; ++rx)
+{
+// Perform bounds checking.
+if (((x + rx) < lodWidth) &&
+((y + ry) < lodHeight))
+{
+// Sum across samples
+float resolveColor[4] = {0};
+for (uint32_t sampleNum = 0; sampleNum < 
pDstSurface->numSamples; sampleNum++)
+{
+float sampleColor[4] = {0};
+uint8_t *pSampleSrc = pSrc + sampleOffset * sampleNum;
+GetSwizzledSrcColor(pSampleSrc, rx, ry, sampleColor);
+resolveColor[0] += sampleColor[0];
+resolveColor[1] += sampleColor[1];
+resolveColor[2] += sampleColor[2];
+resolveColor[3] += sampleColor[3];
+}
+
+// Divide by numSamples to average
+resolveColor[0] *= oneOverNumSamples;
+resolveColor[1] *= oneOverNumSamples;
+resolveColor[2] *= oneOverNumSamples;
+resolveColor[3] *= oneOverNumSamples;
+
+// Use the resolve surface state
+SWR_SURFACE_STATE* pResolveSurface = 
(SWR_SURFACE_STATE*)pDstSurface->pAuxBaseAddress;
+uint8_t *pDst = (uint8_t*)ComputeSurfaceAddress((x + rx), (y + ry),
+pResolveSurface->arrayIndex + renderTargetArrayIndex, 
pResolveSurface->arrayIndex + renderTargetArrayIndex,
+0, pResolveSurface->lod, pResolveSurface);
+{
+ConvertPixelFromFloat(pDst, resolveColor);
+}
+}
+}
+}
+}
+
 };
 
 template
@@ -2316,6 +2374,9 @@ struct StoreMacroTile
 pfnStore[sampleNum] = (bForceGeneric || 
KNOB_USE_GENERIC_STORETILE) ? StoreRasterTile::Store : OptStoreRasterTile::Store;
 }
 
+// Save original for pSrcHotTile resolve.
+uint8_t *pResolveSrcHotTile = pSrcHotTile;
+
 // Store each raster tile from the hot tile to the destination surface.
 for(uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += 
KNOB_TILE_Y_DIM)
 {
@@ -2328,6 +2389,20 @@ struct StoreMacroTile
 }
 }
 }
+
+if (pDstSurface->pAuxBaseAddress)
+{
+uint32_t sampleOffset = KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * 
(FormatTraits::bpp / 8);
+// Store each raster tile from the hot tile to the destination 
surface.
+for(uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += 
KNOB_TILE_Y_DIM)
+{
+for(uint32_t col

[Mesa-dev] [PATCH] swr: Enable MSAA in OpenSWR software renderer

2017-04-13 Thread Bruce Cherniak
This patch enables multisample antialiasing in the OpenSWR software renderer.

MSAA is a proof-of-concept/work-in-progress with bug fixes and performance
on the way.  We wanted to get the changes out now to allow several customers
to begin experimenting with MSAA in a software renderer.  So as not to
impact current customers, MSAA is turned off by default - previous
functionality and performance remain intact.  It is easily enabled via
environment variables, as described below.

It has only been tested with the glx-lib winsys.  The intention is to
enable other state-trackers, both Windows and Linux and more fully support
FBOs.

There are 2 environment variables that affect behavior:

* SWR_MSAA_FORCE_ENABLE - force MSAA on, for apps that are not designed
  for MSAA... Beware, results will vary.  This is mainly for testing.

* SWR_MSAA_MAX_SAMPLE_COUNT - sets maximum supported number of
  samples (1,2,4,8,16), or 0 to disable MSAA altogether.
  (The default is currently 0.)


---
 src/gallium/drivers/swr/swr_context.cpp |  90 +-
 src/gallium/drivers/swr/swr_context.h   |   3 +
 src/gallium/drivers/swr/swr_resource.h  |   4 +
 src/gallium/drivers/swr/swr_screen.cpp  | 159 +---
 src/gallium/drivers/swr/swr_screen.h|   8 ++
 src/gallium/drivers/swr/swr_state.cpp   |  74 +--
 6 files changed, 313 insertions(+), 25 deletions(-)

diff --git a/src/gallium/drivers/swr/swr_context.cpp 
b/src/gallium/drivers/swr/swr_context.cpp
index 6f46d66..aa5cca8 100644
--- a/src/gallium/drivers/swr/swr_context.cpp
+++ b/src/gallium/drivers/swr/swr_context.cpp
@@ -267,20 +267,104 @@ swr_resource_copy(struct pipe_context *pipe,
 }
 
 
+/* XXX: This resolve is incomplete and suboptimal. It will be removed once the
+ * pipelined resolve blit works. */
+void
+swr_do_msaa_resolve(struct pipe_resource *src_resource,
+struct pipe_resource *dst_resource)
+{
+   /* This is a pretty dumb inline resolve.  It only supports 8-bit formats
+* (ex RGBA8/BGRA8) - which are most common display formats anyway.
+*/
+
+   /* quick check for 8-bit and number of components */
+   uint8_t bits_per_component =
+  util_format_get_component_bits(src_resource->format,
+UTIL_FORMAT_COLORSPACE_RGB, 0);
+
+   /* Unsupported resolve format */
+   assert(src_resource->format == dst_resource->format);
+   assert(bits_per_component == 8);
+   if ((src_resource->format != dst_resource->format) ||
+   (bits_per_component != 8)) {
+  return;
+   }
+
+   uint8_t src_num_comps = util_format_get_nr_components(src_resource->format);
+
+   SWR_SURFACE_STATE *src_surface = &swr_resource(src_resource)->swr;
+   SWR_SURFACE_STATE *dst_surface = &swr_resource(dst_resource)->swr;
+
+   uint32_t *src, *dst, offset;
+   uint32_t num_samples = src_surface->numSamples;
+   float recip_num_samples = 1.0f / num_samples;
+   for (uint32_t y = 0; y < src_surface->height; y++) {
+  for (uint32_t x = 0; x < src_surface->width; x++) {
+ float r = 0.0f;
+ float g = 0.0f;
+ float b = 0.0f;
+ float a = 0.0f;
+ for (uint32_t sampleNum = 0;  sampleNum < num_samples; sampleNum++) {
+offset = ComputeSurfaceOffset(x, y, 0, 0, sampleNum, 0, 
src_surface);
+src = (uint32_t *) src_surface->pBaseAddress + 
offset/src_num_comps;
+const uint32_t sample = *src;
+r += (float)((sample >> 24) & 0xff) / 255.0f * recip_num_samples;
+g += (float)((sample >> 16) & 0xff) / 255.0f * recip_num_samples;
+b += (float)((sample >>  8) & 0xff) / 255.0f * recip_num_samples;
+a += (float)((sample  ) & 0xff) / 255.0f * recip_num_samples;
+ }
+ uint32_t result = 0;
+ result  = ((uint8_t)(r * 255.0f) & 0xff) << 24;
+ result |= ((uint8_t)(g * 255.0f) & 0xff) << 16;
+ result |= ((uint8_t)(b * 255.0f) & 0xff) <<  8;
+ result |= ((uint8_t)(a * 255.0f) & 0xff);
+ offset = ComputeSurfaceOffset(x, y, 0, 0, 0, 0, src_surface);
+ dst = (uint32_t *) dst_surface->pBaseAddress + offset/src_num_comps;
+ *dst = result;
+  }
+   }
+}
+
+
 static void
 swr_blit(struct pipe_context *pipe, const struct pipe_blit_info *blit_info)
 {
struct swr_context *ctx = swr_context(pipe);
+   /* Make a copy of the const blit_info, so we can modify it */
struct pipe_blit_info info = *blit_info;
 
-   if (blit_info->render_condition_enable && !swr_check_render_cond(pipe))
+   if (info.render_condition_enable && !swr_check_render_cond(pipe))
   return;
 
if (info.src.resource->nr_samples > 1 && info.dst.resource->nr_samples <= 1
&& !util_format_is_depth_or_stencil(info.src.resource->format)
&& !util_format_is_pure_integer(info.src.resource->format)) {
-  debug_printf("swr: color resolve unimplemented\n");
-  return;
+  debug_printf("swr_blit: color resolve : %d -> %d\n",
+info.src.resou

[Mesa-dev] [PATCH] swr: Removed unnecessary PIPE_BIND flags from swr_is_format_supported

2017-04-12 Thread Bruce Cherniak
Removed unnecessary and probably wrong PIPE_BIND_SCANOUT and PIPE_BIND_SHARED
flags in favor of check on single PIPE_BIND_DISPLAY_TARGET flag.

Reference llvmpipe change 

---
 src/gallium/drivers/swr/swr_screen.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/gallium/drivers/swr/swr_screen.cpp 
b/src/gallium/drivers/swr/swr_screen.cpp
index 3d3d103..87fd898 100644
--- a/src/gallium/drivers/swr/swr_screen.cpp
+++ b/src/gallium/drivers/swr/swr_screen.cpp
@@ -103,8 +103,7 @@ swr_is_format_supported(struct pipe_screen *screen,
if (sample_count > 1)
   return FALSE;
 
-   if (bind
-   & (PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT | PIPE_BIND_SHARED)) {
+   if (bind & PIPE_BIND_DISPLAY_TARGET) {
   if (!winsys->is_displaytarget_format_supported(winsys, bind, format))
  return FALSE;
}
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] swr: Align swr_context allocation to SIMD alignment.

2017-04-12 Thread Bruce Cherniak
The context now contains SIMD vectors which must be aligned (specifically
samplePositions in the rastState in the derived state).  Failure to align
can result in segv crash on unaligned memory access in vector
instructions.

---
 src/gallium/drivers/swr/swr_context.cpp | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/swr/swr_context.cpp 
b/src/gallium/drivers/swr/swr_context.cpp
index 8c5a269..6f46d66 100644
--- a/src/gallium/drivers/swr/swr_context.cpp
+++ b/src/gallium/drivers/swr/swr_context.cpp
@@ -386,7 +386,7 @@ swr_destroy(struct pipe_context *pipe)
if (screen->pipe == pipe)
   screen->pipe = NULL;
 
-   FREE(ctx);
+   AlignedFree(ctx);
 }
 
 
@@ -452,7 +452,10 @@ swr_UpdateStatsFE(HANDLE hPrivateContext, const 
SWR_STATS_FE *pStats)
 struct pipe_context *
 swr_create_context(struct pipe_screen *p_screen, void *priv, unsigned flags)
 {
-   struct swr_context *ctx = CALLOC_STRUCT(swr_context);
+   struct swr_context *ctx = (struct swr_context *)
+  AlignedMalloc(sizeof(struct swr_context), KNOB_SIMD_BYTES);
+   memset(ctx, 0, sizeof(struct swr_context));
+
ctx->blendJIT =
   new std::unordered_map;
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] st/glx: Add awareness for multisample pixel formats to st/glx-xlib.

2017-04-07 Thread Bruce Cherniak
In preparation for enabling MSAA in OpenSWR, the state trackers need to
be aware of multisample pixel formats for software renderers.  This patch
allows glx-xlib to query the renderer for support of pixel
formats with multisample, and create multisample resources.

This change is benign to softpipe and llvmpipe, as is_format_supported
returns FALSE for any sample_count > 1.  OpenSWR does the same at the
moment, but that will change soon.
---
 src/gallium/state_trackers/glx/xlib/glx_api.c | 25 +++--
 src/gallium/state_trackers/glx/xlib/xm_api.c  | 19 ++-
 src/gallium/state_trackers/glx/xlib/xm_st.c   |  1 +
 3 files changed, 30 insertions(+), 15 deletions(-)

diff --git a/src/gallium/state_trackers/glx/xlib/glx_api.c 
b/src/gallium/state_trackers/glx/xlib/glx_api.c
index 642ece7..c473a0f 100644
--- a/src/gallium/state_trackers/glx/xlib/glx_api.c
+++ b/src/gallium/state_trackers/glx/xlib/glx_api.c
@@ -181,7 +181,7 @@ save_glx_visual( Display *dpy, XVisualInfo *vinfo,
  GLint depth_size, GLint stencil_size,
  GLint accumRedSize, GLint accumGreenSize,
  GLint accumBlueSize, GLint accumAlphaSize,
- GLint level, GLint numAuxBuffers )
+ GLint level, GLint numAuxBuffers, GLint num_samples )
 {
GLboolean ximageFlag = GL_TRUE;
XMesaVisual xmvis;
@@ -229,6 +229,7 @@ save_glx_visual( Display *dpy, XVisualInfo *vinfo,
   if (v->display == dpy
   && v->mesa_visual.level == level
   && v->mesa_visual.numAuxBuffers == numAuxBuffers
+  && v->mesa_visual.samples == num_samples
   && v->ximage_flag == ximageFlag
   && v->mesa_visual.rgbMode == rgbFlag
   && v->mesa_visual.doubleBufferMode == dbFlag
@@ -254,7 +255,7 @@ save_glx_visual( Display *dpy, XVisualInfo *vinfo,
   stereoFlag, ximageFlag,
   depth_size, stencil_size,
   accumRedSize, accumBlueSize,
-  accumBlueSize, accumAlphaSize, 0, level,
+  accumBlueSize, accumAlphaSize, num_samples, 
level,
   GLX_NONE_EXT );
if (xmvis) {
   /* Save a copy of the pointer now so we can find this visual again
@@ -344,7 +345,8 @@ create_glx_visual( Display *dpy, XVisualInfo *visinfo )
   accBits, /* b */
   accBits, /* a */
   0, /* level */
-  0  /* numAux */
+  0, /* numAux */
+  0  /* numSamples */
  );
}
else {
@@ -739,6 +741,7 @@ choose_visual( Display *dpy, int screen, const int *list, 
GLboolean fbConfig )
XMesaVisual xmvis = NULL;
int desiredVisualID = -1;
int numAux = 0;
+   GLint num_samples = 0;
 
xmesa_init( dpy );
 
@@ -905,12 +908,13 @@ choose_visual( Display *dpy, int screen, const int *list, 
GLboolean fbConfig )
   * GLX_ARB_multisample
   */
  case GLX_SAMPLE_BUFFERS_ARB:
+/* ignore */
+parselist++;
+parselist++;
+break;
  case GLX_SAMPLES_ARB:
 parselist++;
-if (*parselist++ != 0) {
-   /* ms not supported */
-   return NULL;
-}
+num_samples = *parselist++;
 break;
 
  /*
@@ -1067,7 +1071,8 @@ choose_visual( Display *dpy, int screen, const int *list, 
GLboolean fbConfig )
   xmvis = save_glx_visual( dpy, vis, rgb_flag, alpha_flag, double_flag,
stereo_flag, depth_size, stencil_size,
accumRedSize, accumGreenSize,
-   accumBlueSize, accumAlphaSize, level, numAux );
+   accumBlueSize, accumAlphaSize, level, numAux,
+   num_samples );
}
 
return xmvis;
@@ -1602,10 +1607,10 @@ get_config( XMesaVisual xmvis, int attrib, int *value, 
GLboolean fbconfig )
* GLX_ARB_multisample
*/
   case GLX_SAMPLE_BUFFERS_ARB:
- *value = 0;
+ *value = xmvis->mesa_visual.sampleBuffers;
  return 0;
   case GLX_SAMPLES_ARB:
- *value = 0;
+ *value = xmvis->mesa_visual.samples;
  return 0;
 
   /*
diff --git a/src/gallium/state_trackers/glx/xlib/xm_api.c 
b/src/gallium/state_trackers/glx/xlib/xm_api.c
index 398152e..881dd44 100644
--- a/src/gallium/state_trackers/glx/xlib/xm_api.c
+++ b/src/gallium/state_trackers/glx/xlib/xm_api.c
@@ -453,11 +453,11 @@ choose_pixel_format(XMesaVisual v)
  * stencil sizes.
  */
 static enum pipe_format
-choose_depth_stencil_format(XMesaDisplay xmdpy, int depth, int stencil)
+choose_depth_stencil_format(XMesaDisplay xmdpy, int depth, int stencil,
+int sample_coun

[Mesa-dev] [PATCH] swr: Fix crash in swr_update_derived following st/mesa state changes.

2017-03-01 Thread Bruce Cherniak
Recent change to st/mesa state update logic caused major regressions to
swr validation code.

swr uses the same validation logic (swr_update_derived) for both draw
and Clear calls.  New st/mesa state update logic results in certain state
objects not being set/bound during Clear.  This was causing null ptr
exceptions.  Creation of static dummy state objects allows setting these
pointers during Clear validation, without interfering with relevant state
validation.

Once fixed, new logic also highlighted an error in dirty bit checking for
fragment shader and clip validation.

(The alternative is to have a simplified validation routine for Clear.
Which may do that at some point.)
---
 src/gallium/drivers/swr/swr_shader.cpp |  6 +
 src/gallium/drivers/swr/swr_state.cpp  | 43 +++---
 2 files changed, 46 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/swr/swr_shader.cpp 
b/src/gallium/drivers/swr/swr_shader.cpp
index 676938c..9169f6d 100644
--- a/src/gallium/drivers/swr/swr_shader.cpp
+++ b/src/gallium/drivers/swr/swr_shader.cpp
@@ -366,6 +366,9 @@ BuilderSWR::CompileVS(struct swr_context *ctx, 
swr_jit_vs_key &key)
 PFN_VERTEX_FUNC
 swr_compile_vs(struct swr_context *ctx, swr_jit_vs_key &key)
 {
+   if (!ctx->vs->pipe.tokens)
+  return NULL;
+
BuilderSWR builder(
   reinterpret_cast(swr_screen(ctx->pipe.screen)->hJitMgr),
   "VS");
@@ -726,6 +729,9 @@ BuilderSWR::CompileFS(struct swr_context *ctx, 
swr_jit_fs_key &key)
 PFN_PIXEL_KERNEL
 swr_compile_fs(struct swr_context *ctx, swr_jit_fs_key &key)
 {
+   if (!ctx->fs->pipe.tokens)
+  return NULL;
+
BuilderSWR builder(
   reinterpret_cast(swr_screen(ctx->pipe.screen)->hJitMgr),
   "FS");
diff --git a/src/gallium/drivers/swr/swr_state.cpp 
b/src/gallium/drivers/swr/swr_state.cpp
index 5e3d58d..e1f1734 100644
--- a/src/gallium/drivers/swr/swr_state.cpp
+++ b/src/gallium/drivers/swr/swr_state.cpp
@@ -914,6 +914,39 @@ swr_update_derived(struct pipe_context *pipe,
struct swr_context *ctx = swr_context(pipe);
struct swr_screen *screen = swr_screen(pipe->screen);
 
+   /* When called from swr_clear (p_draw_info = null), set any null
+* state-objects to the dummy state objects to prevent nullptr dereference
+* in validation below.
+*
+* Important that this remains static for zero initialization.  These
+* aren't meant to be proper state objects, just empty structs. They will
+* not be written to.
+*
+* Shaders can't be part of the union since they contain std::unordered_map
+*/
+   static struct {
+  union {
+ struct pipe_rasterizer_state rasterizer;
+ struct pipe_depth_stencil_alpha_state depth_stencil;
+ struct swr_blend_state blend;
+  } state;
+  struct swr_vertex_shader vs;
+  struct swr_fragment_shader fs;
+   } swr_dummy;
+
+   if (!p_draw_info) {
+  if (!ctx->rasterizer)
+ ctx->rasterizer = &swr_dummy.state.rasterizer;
+  if (!ctx->depth_stencil)
+ ctx->depth_stencil = &swr_dummy.state.depth_stencil;
+  if (!ctx->blend)
+ ctx->blend = &swr_dummy.state.blend;
+  if (!ctx->vs)
+ ctx->vs = &swr_dummy.vs;
+  if (!ctx->fs)
+ ctx->fs = &swr_dummy.fs;
+   }
+
/* Update screen->pipe to current pipe context. */
if (screen->pipe != pipe)
   screen->pipe = pipe;
@@ -1236,8 +1269,12 @@ swr_update_derived(struct pipe_context *pipe,
}
 
/* FragmentShader */
-   if (ctx->dirty & (SWR_NEW_FS | SWR_NEW_SAMPLER | SWR_NEW_SAMPLER_VIEW
- | SWR_NEW_RASTERIZER | SWR_NEW_FRAMEBUFFER)) {
+   if (ctx->dirty & (SWR_NEW_FS |
+ SWR_NEW_VS |
+ SWR_NEW_RASTERIZER |
+ SWR_NEW_SAMPLER |
+ SWR_NEW_SAMPLER_VIEW |
+ SWR_NEW_FRAMEBUFFER)) {
   swr_jit_fs_key key;
   swr_generate_fs_key(key, ctx, ctx->fs);
   auto search = ctx->fs->map.find(key);
@@ -1505,7 +1542,7 @@ swr_update_derived(struct pipe_context *pipe,
   }
}
 
-   if (ctx->dirty & SWR_NEW_CLIP) {
+   if (ctx->dirty & (SWR_NEW_CLIP | SWR_NEW_RASTERIZER | SWR_NEW_VS)) {
   // shader exporting clip distances overrides all user clip planes
   if (ctx->rasterizer->clip_plane_enable &&
   !ctx->vs->info.base.num_written_clipdistance)
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] docs: update features.txt for GL_ARB_clear_texture with swr

2017-02-25 Thread Bruce Cherniak
---
 docs/features.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/features.txt b/docs/features.txt
index d9528e9..c42581a 100644
--- a/docs/features.txt
+++ b/docs/features.txt
@@ -192,7 +192,7 @@ GL 4.4, GLSL 4.40 -- all DONE: i965/gen8+, nvc0, radeonsi
 
   GL_MAX_VERTEX_ATTRIB_STRIDE   DONE (all drivers)
   GL_ARB_buffer_storage DONE (i965, nv50, r600)
-  GL_ARB_clear_texture  DONE (i965, nv50, 
r600, llvmpipe, softpipe)
+  GL_ARB_clear_texture  DONE (i965, nv50, 
r600, llvmpipe, softpipe, swr)
   GL_ARB_enhanced_layouts   DONE (i965, nv50, 
llvmpipe, softpipe)
   - compile-time constant expressions   DONE
   - explicit byte offsets for blocksDONE
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] swr: enable clear_texture with util_clear_texture

2017-02-25 Thread Bruce Cherniak
Passes corresponding piglit tests.
---
 src/gallium/drivers/swr/swr_context.cpp | 1 +
 src/gallium/drivers/swr/swr_screen.cpp  | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/swr/swr_context.cpp 
b/src/gallium/drivers/swr/swr_context.cpp
index 3e17edc..b89ce1b 100644
--- a/src/gallium/drivers/swr/swr_context.cpp
+++ b/src/gallium/drivers/swr/swr_context.cpp
@@ -486,6 +486,7 @@ swr_create_context(struct pipe_screen *p_screen, void 
*priv, unsigned flags)
ctx->pipe.buffer_subdata = u_default_buffer_subdata;
ctx->pipe.texture_subdata = u_default_texture_subdata;
 
+   ctx->pipe.clear_texture = util_clear_texture;
ctx->pipe.resource_copy_region = swr_resource_copy;
ctx->pipe.render_condition = swr_render_condition;
 
diff --git a/src/gallium/drivers/swr/swr_screen.cpp 
b/src/gallium/drivers/swr/swr_screen.cpp
index f4fe1f3..f2ad4dd 100644
--- a/src/gallium/drivers/swr/swr_screen.cpp
+++ b/src/gallium/drivers/swr/swr_screen.cpp
@@ -244,6 +244,7 @@ swr_get_param(struct pipe_screen *screen, enum pipe_cap 
param)
case PIPE_CAP_CLIP_HALFZ:
case PIPE_CAP_POLYGON_OFFSET_CLAMP:
case PIPE_CAP_DEPTH_BOUNDS_TEST:
+   case PIPE_CAP_CLEAR_TEXTURE:
case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
case PIPE_CAP_CULL_DISTANCE:
@@ -284,7 +285,6 @@ swr_get_param(struct pipe_screen *screen, enum pipe_cap 
param)
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
-   case PIPE_CAP_CLEAR_TEXTURE:
case PIPE_CAP_DRAW_PARAMETERS:
case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
case PIPE_CAP_MULTI_DRAW_INDIRECT:
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] swr: [rasterizer core] Removed unused clip code.

2017-02-03 Thread Bruce Cherniak
Removed unused Clip() and FRUSTUM_CLIP_MASK define.
---
 src/gallium/drivers/swr/rasterizer/core/clip.cpp | 22 --
 src/gallium/drivers/swr/rasterizer/core/clip.h   |  4 
 2 files changed, 26 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.cpp 
b/src/gallium/drivers/swr/rasterizer/core/clip.cpp
index 7b1e09d..0a6afe5 100644
--- a/src/gallium/drivers/swr/rasterizer/core/clip.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/clip.cpp
@@ -157,28 +157,6 @@ int ClipTriToPlane( const float *pInPts, int numInPts,
 return i;
 }
 
-
-
-void Clip(const float *pTriangle, const float *pAttribs, int numAttribs, float 
*pOutTriangles, int *numVerts, float *pOutAttribs)
-{
-// temp storage to hold at least 6 sets of vertices, the max number that 
can be created during clipping
-OSALIGNSIMD(float) tempPts[6 * 4];
-OSALIGNSIMD(float) tempAttribs[6 * KNOB_NUM_ATTRIBUTES * 4];
-
-// we opt to clip to viewport frustum to produce smaller triangles for 
rasterization precision
-int NumOutPts = ClipTriToPlane(pTriangle, 3, pAttribs, 
numAttribs, tempPts, tempAttribs);
-NumOutPts = ClipTriToPlane(tempPts, NumOutPts, tempAttribs, 
numAttribs, pOutTriangles, pOutAttribs);
-NumOutPts = ClipTriToPlane(pOutTriangles, NumOutPts, 
pOutAttribs, numAttribs, tempPts, tempAttribs);
-NumOutPts = ClipTriToPlane(tempPts, NumOutPts, tempAttribs, 
numAttribs, pOutTriangles, pOutAttribs);
-NumOutPts = ClipTriToPlane(pOutTriangles, NumOutPts, 
pOutAttribs, numAttribs, tempPts, tempAttribs);
-NumOutPts = ClipTriToPlane(tempPts, NumOutPts, tempAttribs, 
numAttribs, pOutTriangles, pOutAttribs);
-
-SWR_ASSERT(NumOutPts <= 6);
-
-*numVerts = NumOutPts;
-return;
-}
-
 void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, 
simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari 
viewportIdx)
 {
 SWR_CONTEXT *pContext = pDC->pContext;
diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.h 
b/src/gallium/drivers/swr/rasterizer/core/clip.h
index f19858f..23a768f 100644
--- a/src/gallium/drivers/swr/rasterizer/core/clip.h
+++ b/src/gallium/drivers/swr/rasterizer/core/clip.h
@@ -56,12 +56,8 @@ enum SWR_CLIPCODES
 GUARDBAND_BOTTOM = (0x80 << CLIPCODE_SHIFT | 0x8)
 };
 
-#define FRUSTUM_CLIP_MASK 
(FRUSTUM_LEFT|FRUSTUM_TOP|FRUSTUM_RIGHT|FRUSTUM_BOTTOM|FRUSTUM_NEAR|FRUSTUM_FAR)
 #define GUARDBAND_CLIP_MASK 
(FRUSTUM_NEAR|FRUSTUM_FAR|GUARDBAND_LEFT|GUARDBAND_TOP|GUARDBAND_RIGHT|GUARDBAND_BOTTOM|NEGW)
 
-void Clip(const float *pTriangle, const float *pAttribs, int numAttribs, float 
*pOutTriangles, 
-  int *numVerts, float *pOutAttribs);
-
 INLINE
 void ComputeClipCodes(const API_STATE& state, const simdvector& vertex, 
simdscalar& clipCodes, simdscalari viewportIndexes)
 {
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2] swr: [rasterizer core] Remove dead code Clipper::ClipScalar()

2017-02-02 Thread Bruce Cherniak
v2: includes bugzilla reference, same code change


Clipper::ClipScalar() is dead code and should be removed.  It is causing
an error with gcc-7 because it references a now defunct member.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99633
CC: "13.0 17.0" 
---
 src/gallium/drivers/swr/rasterizer/core/clip.h | 39 --
 1 file changed, 39 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.h 
b/src/gallium/drivers/swr/rasterizer/core/clip.h
index 085e4a9..f19858f 100644
--- a/src/gallium/drivers/swr/rasterizer/core/clip.h
+++ b/src/gallium/drivers/swr/rasterizer/core/clip.h
@@ -262,45 +262,6 @@ public:
 return _simd_movemask_ps(vClipCullMask);
 }
 
-// clip a single primitive
-int ClipScalar(PA_STATE& pa, uint32_t primIndex, float* pOutPos, float* 
pOutAttribs)
-{
-OSALIGNSIMD(float) inVerts[3 * 4];
-OSALIGNSIMD(float) inAttribs[3 * KNOB_NUM_ATTRIBUTES * 4];
-
-// transpose primitive position
-__m128 verts[3];
-pa.AssembleSingle(VERTEX_POSITION_SLOT, primIndex, verts);
-_mm_store_ps(&inVerts[0], verts[0]);
-_mm_store_ps(&inVerts[4], verts[1]);
-_mm_store_ps(&inVerts[8], verts[2]);
-
-// transpose attribs
-uint32_t numScalarAttribs = this->state.linkageCount * 4;
-
-int idx = 0;
-DWORD slot = 0;
-uint32_t mapIdx = 0;
-uint32_t tmpLinkage = uint32_t(this->state.linkageMask);
-while (_BitScanForward(&slot, tmpLinkage))
-{
-tmpLinkage &= ~(1 << slot);
-// Compute absolute attrib slot in vertex array
-uint32_t inputSlot = VERTEX_ATTRIB_START_SLOT + 
this->state.linkageMap[mapIdx++];
-__m128 attrib[3];// triangle attribs (always 4 wide)
-pa.AssembleSingle(inputSlot, primIndex, attrib);
-_mm_store_ps(&inAttribs[idx], attrib[0]);
-_mm_store_ps(&inAttribs[idx + numScalarAttribs], attrib[1]);
-_mm_store_ps(&inAttribs[idx + numScalarAttribs * 2], attrib[2]);
-idx += 4;
-}
-
-int numVerts;
-Clip(inVerts, inAttribs, numScalarAttribs, pOutPos, &numVerts, 
pOutAttribs);
-
-return numVerts;
-}
-
 // clip SIMD primitives
 void ClipSimd(const simdscalar& vPrimMask, const simdscalar& vClipMask, 
PA_STATE& pa, const simdscalari& vPrimId, const simdscalari& vViewportIdx)
 {
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] swr: [rasterizer core] Remove dead code Clipper::ClipScalar()

2017-02-02 Thread Bruce Cherniak
Clipper::ClipScalar() is dead code and should be removed.  It is causing
an error with gcc-7 because it references a now defunct member.

CC: "13.0 17.0" 
---
 src/gallium/drivers/swr/rasterizer/core/clip.h | 39 --
 1 file changed, 39 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.h 
b/src/gallium/drivers/swr/rasterizer/core/clip.h
index 085e4a9..f19858f 100644
--- a/src/gallium/drivers/swr/rasterizer/core/clip.h
+++ b/src/gallium/drivers/swr/rasterizer/core/clip.h
@@ -262,45 +262,6 @@ public:
 return _simd_movemask_ps(vClipCullMask);
 }
 
-// clip a single primitive
-int ClipScalar(PA_STATE& pa, uint32_t primIndex, float* pOutPos, float* 
pOutAttribs)
-{
-OSALIGNSIMD(float) inVerts[3 * 4];
-OSALIGNSIMD(float) inAttribs[3 * KNOB_NUM_ATTRIBUTES * 4];
-
-// transpose primitive position
-__m128 verts[3];
-pa.AssembleSingle(VERTEX_POSITION_SLOT, primIndex, verts);
-_mm_store_ps(&inVerts[0], verts[0]);
-_mm_store_ps(&inVerts[4], verts[1]);
-_mm_store_ps(&inVerts[8], verts[2]);
-
-// transpose attribs
-uint32_t numScalarAttribs = this->state.linkageCount * 4;
-
-int idx = 0;
-DWORD slot = 0;
-uint32_t mapIdx = 0;
-uint32_t tmpLinkage = uint32_t(this->state.linkageMask);
-while (_BitScanForward(&slot, tmpLinkage))
-{
-tmpLinkage &= ~(1 << slot);
-// Compute absolute attrib slot in vertex array
-uint32_t inputSlot = VERTEX_ATTRIB_START_SLOT + 
this->state.linkageMap[mapIdx++];
-__m128 attrib[3];// triangle attribs (always 4 wide)
-pa.AssembleSingle(inputSlot, primIndex, attrib);
-_mm_store_ps(&inAttribs[idx], attrib[0]);
-_mm_store_ps(&inAttribs[idx + numScalarAttribs], attrib[1]);
-_mm_store_ps(&inAttribs[idx + numScalarAttribs * 2], attrib[2]);
-idx += 4;
-}
-
-int numVerts;
-Clip(inVerts, inAttribs, numScalarAttribs, pOutPos, &numVerts, 
pOutAttribs);
-
-return numVerts;
-}
-
 // clip SIMD primitives
 void ClipSimd(const simdscalar& vPrimMask, const simdscalar& vClipMask, 
PA_STATE& pa, const simdscalari& vPrimId, const simdscalari& vViewportIdx)
 {
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] gallium: Reduce trace_dump_box_bytes size by box->x.

2017-02-01 Thread Bruce Cherniak
If stride is supplied (as either stride or slice_stride),
trace_dump_box_bytes will try to read stride bytes, regardless whether
start address is offset by box->x.  This causes access outside mapped
region, and possible segv. (transfer_map stride and layer_stride are not
adjusted for box dimensions)

Note:  trace_dump_box_bytes only dumps PIPE_BUFFER resources, so there
shouldn't be any complicated boxes.  trace_dump_bytes doesn't handle them
anyway.
---
 src/gallium/drivers/trace/tr_dump.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/trace/tr_dump.c 
b/src/gallium/drivers/trace/tr_dump.c
index b173b8a..591e273 100644
--- a/src/gallium/drivers/trace/tr_dump.c
+++ b/src/gallium/drivers/trace/tr_dump.c
@@ -510,11 +510,13 @@ void trace_dump_box_bytes(const void *data,
   size = 0;
} else {
   enum pipe_format format = resource->format;
-  if (slice_stride)
+  if (slice_stride) {
+ slice_stride -= util_format_get_blockwidth(format) * box->x;
  size = box->depth * slice_stride;
-  else if (stride)
+  } else if (stride) {
+ stride -= util_format_get_blockwidth(format) * box->x;
  size = util_format_get_nblocksy(format, box->height) * stride;
-  else {
+  } else {
  size = util_format_get_nblocksx(format, box->width) * 
util_format_get_blocksize(format);
   }
}
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] swr: Prune empty nodes in CalculateProcessorTopology.

2017-01-19 Thread Bruce Cherniak
CalculateProcessorTopology tries to figure out system topology by
parsing /proc/cpuinfo to determine the number of threads, cores, and
NUMA nodes.  There are some architectures where the "physical id" begins
with 1 rather than 0, which was creating and empty "0" node and causing a
crash in CreateThreadPool.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97102
---
 src/gallium/drivers/swr/rasterizer/core/threads.cpp | 9 +
 1 file changed, 9 insertions(+)

diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp 
b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
index ee12612..f1c3030 100644
--- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
@@ -217,6 +217,15 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, 
uint32_t& out_numThread
 out_numThreadsPerProcGroup++;
 }
 
+/* Prune empty numa nodes */
+for (auto it = out_nodes.begin(); it != out_nodes.end(); ) {
+   if ((*it).cores.size() == 0)
+  it = out_nodes.erase(it);
+   else
+  ++it;
+}
+
+/* Prune empty core nodes */
 for (uint32_t node = 0; node < out_nodes.size(); node++) {
 auto& numaNode = out_nodes[node];
 auto it = numaNode.cores.begin();
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] swr: Fix BugID 9919 compile error (icc-only).

2016-12-22 Thread Bruce Cherniak
ICC doesn't like the use of nullptr (std::nullptr_t) argument in
p_atomic_set.  GCC and clang don't complain.
---
 src/gallium/drivers/swr/swr_fence_work.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/swr/swr_fence_work.cpp 
b/src/gallium/drivers/swr/swr_fence_work.cpp
index 3f83e61..1fd2a83 100644
--- a/src/gallium/drivers/swr/swr_fence_work.cpp
+++ b/src/gallium/drivers/swr/swr_fence_work.cpp
@@ -39,7 +39,7 @@ swr_fence_do_work(struct swr_fence *fence)
   work = fence->work.head.next;
   /* Immediately clear the head so any new work gets added to a new work
* queue */
-  p_atomic_set(&fence->work.head.next, nullptr);
+  p_atomic_set(&fence->work.head.next, 0);
   p_atomic_set(&fence->work.tail, &fence->work.head);
   p_atomic_set(&fence->work.count, 0);
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] swr: Implement fence attached work queues for deferred deletion.

2016-12-12 Thread Bruce Cherniak
Work can now be added to fences and triggered by fence completion. This
allows for deferred resource deletion, and other asynchronous tasks.
---
 src/gallium/drivers/swr/Makefile.sources   |   2 +
 src/gallium/drivers/swr/swr_context.cpp|   7 +-
 src/gallium/drivers/swr/swr_fence.cpp  |  14 ++-
 src/gallium/drivers/swr/swr_fence.h|   8 ++
 src/gallium/drivers/swr/swr_fence_work.cpp | 148 +
 src/gallium/drivers/swr/swr_fence_work.h   |  47 +
 src/gallium/drivers/swr/swr_scratch.cpp|  32 +++
 src/gallium/drivers/swr/swr_screen.cpp |  35 ---
 src/gallium/drivers/swr/swr_state.cpp  |  16 ++--
 9 files changed, 255 insertions(+), 54 deletions(-)
 create mode 100644 src/gallium/drivers/swr/swr_fence_work.cpp
 create mode 100644 src/gallium/drivers/swr/swr_fence_work.h

diff --git a/src/gallium/drivers/swr/Makefile.sources 
b/src/gallium/drivers/swr/Makefile.sources
index d81d458..1afb532 100644
--- a/src/gallium/drivers/swr/Makefile.sources
+++ b/src/gallium/drivers/swr/Makefile.sources
@@ -42,6 +42,8 @@ CXX_SOURCES := \
swr_memory.h \
swr_fence.h \
swr_fence.cpp \
+   swr_fence_work.h \
+   swr_fence_work.cpp \
swr_query.h \
swr_query.cpp
 
diff --git a/src/gallium/drivers/swr/swr_context.cpp 
b/src/gallium/drivers/swr/swr_context.cpp
index b8c87fa..8933085 100644
--- a/src/gallium/drivers/swr/swr_context.cpp
+++ b/src/gallium/drivers/swr/swr_context.cpp
@@ -355,9 +355,6 @@ swr_destroy(struct pipe_context *pipe)
if (ctx->blitter)
   util_blitter_destroy(ctx->blitter);
 
-   /* Idle core before deleting context */
-   SwrWaitForIdle(ctx->swrContext);
-
for (unsigned i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
   pipe_surface_reference(&ctx->framebuffer.cbufs[i], NULL);
}
@@ -372,6 +369,10 @@ swr_destroy(struct pipe_context *pipe)
   pipe_sampler_view_reference(&ctx->sampler_views[PIPE_SHADER_VERTEX][i], 
NULL);
}
 
+   /* Idle core after destroying buffer resources, but before deleting
+* context.  Destroying resources has potentially called StoreTiles.*/
+   SwrWaitForIdle(ctx->swrContext);
+
if (ctx->swrContext)
   SwrDestroyContext(ctx->swrContext);
 
diff --git a/src/gallium/drivers/swr/swr_fence.cpp 
b/src/gallium/drivers/swr/swr_fence.cpp
index 7fe2470..c73bbbf 100644
--- a/src/gallium/drivers/swr/swr_fence.cpp
+++ b/src/gallium/drivers/swr/swr_fence.cpp
@@ -38,10 +38,13 @@
  * to SwrSync call.
  */
 static void
-swr_sync_cb(uint64_t userData, uint64_t userData2, uint64_t userData3)
+swr_fence_cb(uint64_t userData, uint64_t userData2, uint64_t userData3)
 {
struct swr_fence *fence = (struct swr_fence *)userData;
 
+   /* Complete all work attached to the fence */
+   swr_fence_do_work(fence);
+
/* Correct value is in SwrSync data, and not the fence write field. */
fence->read = userData2;
 }
@@ -56,7 +59,7 @@ swr_fence_submit(struct swr_context *ctx, struct 
pipe_fence_handle *fh)
 
fence->write++;
fence->pending = TRUE;
-   SwrSync(ctx->swrContext, swr_sync_cb, (uint64_t)fence, fence->write, 0);
+   SwrSync(ctx->swrContext, swr_fence_cb, (uint64_t)fence, fence->write, 0);
 }
 
 /*
@@ -72,6 +75,7 @@ swr_fence_create()
 
pipe_reference_init(&fence->reference, 1);
fence->id = fence_id++;
+   fence->work.tail = &fence->work.head;
 
return (struct pipe_fence_handle *)fence;
 }
@@ -80,6 +84,8 @@ swr_fence_create()
 static void
 swr_fence_destroy(struct swr_fence *fence)
 {
+   /* Complete any work left if fence was not submitted */
+   swr_fence_do_work(fence);
FREE(fence);
 }
 
@@ -101,8 +107,10 @@ swr_fence_reference(struct pipe_screen *screen,
   old = NULL;
}
 
-   if (pipe_reference(&old->reference, &fence->reference))
+   if (pipe_reference(&old->reference, &fence->reference)) {
+  swr_fence_finish(screen, NULL, (struct pipe_fence_handle *) old, 0);
   swr_fence_destroy(old);
+   }
 }
 
 
diff --git a/src/gallium/drivers/swr/swr_fence.h 
b/src/gallium/drivers/swr/swr_fence.h
index 80a4345..4766b5b 100644
--- a/src/gallium/drivers/swr/swr_fence.h
+++ b/src/gallium/drivers/swr/swr_fence.h
@@ -25,6 +25,8 @@
 #include "pipe/p_state.h"
 #include "util/u_inlines.h"
 
+#include "swr_fence_work.h"
+
 struct pipe_screen;
 
 struct swr_fence {
@@ -36,6 +38,12 @@ struct swr_fence {
unsigned pending;
 
unsigned id; /* Just for reference */
+   
+   struct {
+  uint32_t count;
+  struct swr_fence_work head;
+  struct swr_fence_work *tail;
+   } work;
 };
 
 
diff --git a/src/gallium/drivers/swr/swr_fence_work.cpp 
b/src/gallium/drivers/swr/swr_fence_work.cpp
new file mode 100644
index 000..3f83e61
--- /dev/null
+++ b/src/gallium/drivers/swr/swr_fence_work.cpp
@@ -0,0 +1,148 @@
+/
+ * Copyright (C) 2016 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtain

[Mesa-dev] [PATCH] swr: Fix active_queries count

2016-12-01 Thread Bruce Cherniak
The active_query count was incorrect for query types that don't require
a begin_query.  Removed the unnecessary assert.
---
 src/gallium/drivers/swr/swr_query.cpp | 13 +++--
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/swr/swr_query.cpp 
b/src/gallium/drivers/swr/swr_query.cpp
index a95e0d8..6eb0781 100644
--- a/src/gallium/drivers/swr/swr_query.cpp
+++ b/src/gallium/drivers/swr/swr_query.cpp
@@ -165,8 +165,9 @@ swr_begin_query(struct pipe_context *pipe, struct 
pipe_query *q)
/* Initialize Results */
memset(&pq->result, 0, sizeof(pq->result));
switch (pq->type) {
+   case PIPE_QUERY_GPU_FINISHED:
case PIPE_QUERY_TIMESTAMP:
-  /* nothing to do */
+  /* nothing to do, but don't want the default */
   break;
case PIPE_QUERY_TIME_ELAPSED:
   pq->result.timestamp_start = swr_get_timestamp(pipe->screen);
@@ -181,10 +182,10 @@ swr_begin_query(struct pipe_context *pipe, struct 
pipe_query *q)
  SwrEnableStatsFE(ctx->swrContext, TRUE);
  SwrEnableStatsBE(ctx->swrContext, TRUE);
   }
+  ctx->active_queries++;
   break;
}
 
-   ctx->active_queries++;
 
return true;
 }
@@ -195,11 +196,10 @@ swr_end_query(struct pipe_context *pipe, struct 
pipe_query *q)
struct swr_context *ctx = swr_context(pipe);
struct swr_query *pq = swr_query(q);
 
-   assert(ctx->active_queries
-  && "swr_end_query, there are no active queries!");
-   ctx->active_queries--;
-
switch (pq->type) {
+   case PIPE_QUERY_GPU_FINISHED:
+  /* nothing to do, but don't want the default */
+  break;
case PIPE_QUERY_TIMESTAMP:
case PIPE_QUERY_TIME_ELAPSED:
   pq->result.timestamp_end = swr_get_timestamp(pipe->screen);
@@ -214,6 +214,7 @@ swr_end_query(struct pipe_context *pipe, struct pipe_query 
*q)
   swr_fence_submit(ctx, pq->fence);
 
   /* Only change stat collection if there are no active queries */
+  ctx->active_queries--;
   if (ctx->active_queries == 0) {
  SwrEnableStatsFE(ctx->swrContext, FALSE);
  SwrEnableStatsBE(ctx->swrContext, FALSE);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] swr: Removed stalling SwrWaitForIdle from queries.

2016-09-27 Thread Bruce Cherniak
Previous fundamental change in stats gathering added a temporary
SwrWaitForIdle to begin_query and end_query.  Code has been reworked to
remove stall.
---
 src/gallium/drivers/swr/swr_context.cpp |  33 +++
 src/gallium/drivers/swr/swr_context.h   |  11 ++-
 src/gallium/drivers/swr/swr_query.cpp   | 152 +---
 src/gallium/drivers/swr/swr_query.h |  10 +--
 4 files changed, 87 insertions(+), 119 deletions(-)

diff --git a/src/gallium/drivers/swr/swr_context.cpp 
b/src/gallium/drivers/swr/swr_context.cpp
index 15e60cd..cbc60e0 100644
--- a/src/gallium/drivers/swr/swr_context.cpp
+++ b/src/gallium/drivers/swr/swr_context.cpp
@@ -24,6 +24,7 @@
 #include "util/u_memory.h"
 #include "util/u_inlines.h"
 #include "util/u_format.h"
+#include "util/u_atomic.h"
 
 extern "C" {
 #include "util/u_transfer.h"
@@ -352,9 +353,9 @@ swr_UpdateStats(HANDLE hPrivateContext, const SWR_STATS 
*pStats)
if (!pDC)
   return;
 
-   struct swr_context *ctx = (struct swr_context *)pDC->swr_ctx;
+   struct swr_query_result *pqr = (struct swr_query_result *)pDC->pStats;
 
-   SWR_STATS *pSwrStats = &ctx->stats;
+   SWR_STATS *pSwrStats = &pqr->core;
 
pSwrStats->DepthPassCount += pStats->DepthPassCount;
pSwrStats->PsInvocations += pStats->PsInvocations;
@@ -369,22 +370,24 @@ swr_UpdateStatsFE(HANDLE hPrivateContext, const 
SWR_STATS_FE *pStats)
if (!pDC)
   return;
 
-   struct swr_context *ctx = (struct swr_context *)pDC->swr_ctx;
+   struct swr_query_result *pqr = (struct swr_query_result *)pDC->pStats;
 
-   SWR_STATS_FE *pSwrStats = &ctx->statsFE;
-   pSwrStats->IaVertices += pStats->IaVertices;
-   pSwrStats->IaPrimitives += pStats->IaPrimitives;
-   pSwrStats->VsInvocations += pStats->VsInvocations;
-   pSwrStats->HsInvocations += pStats->HsInvocations;
-   pSwrStats->DsInvocations += pStats->DsInvocations;
-   pSwrStats->GsInvocations += pStats->GsInvocations;
-   pSwrStats->CInvocations += pStats->CInvocations;
-   pSwrStats->CPrimitives += pStats->CPrimitives;
-   pSwrStats->GsPrimitives += pStats->GsPrimitives;
+   SWR_STATS_FE *pSwrStats = &pqr->coreFE;
+   p_atomic_add(&pSwrStats->IaVertices, pStats->IaVertices);
+   p_atomic_add(&pSwrStats->IaPrimitives, pStats->IaPrimitives);
+   p_atomic_add(&pSwrStats->VsInvocations, pStats->VsInvocations);
+   p_atomic_add(&pSwrStats->HsInvocations, pStats->HsInvocations);
+   p_atomic_add(&pSwrStats->DsInvocations, pStats->DsInvocations);
+   p_atomic_add(&pSwrStats->GsInvocations, pStats->GsInvocations);
+   p_atomic_add(&pSwrStats->CInvocations, pStats->CInvocations);
+   p_atomic_add(&pSwrStats->CPrimitives, pStats->CPrimitives);
+   p_atomic_add(&pSwrStats->GsPrimitives, pStats->GsPrimitives);
 
for (unsigned i = 0; i < 4; i++) {
-  pSwrStats->SoPrimStorageNeeded[i] += pStats->SoPrimStorageNeeded[i];
-  pSwrStats->SoNumPrimsWritten[i] += pStats->SoNumPrimsWritten[i];
+  p_atomic_add(&pSwrStats->SoPrimStorageNeeded[i],
+pStats->SoPrimStorageNeeded[i]);
+  p_atomic_add(&pSwrStats->SoNumPrimsWritten[i],
+pStats->SoNumPrimsWritten[i]);
}
 }
 
diff --git a/src/gallium/drivers/swr/swr_context.h 
b/src/gallium/drivers/swr/swr_context.h
index 6854d69..eecfe0d 100644
--- a/src/gallium/drivers/swr/swr_context.h
+++ b/src/gallium/drivers/swr/swr_context.h
@@ -92,7 +92,7 @@ struct swr_draw_context {
float userClipPlanes[PIPE_MAX_CLIP_PLANES][4];
 
SWR_SURFACE_STATE renderTargets[SWR_NUM_ATTACHMENTS];
-   void *swr_ctx;
+   void *pStats;
 };
 
 /* gen_llvm_types FINI */
@@ -159,9 +159,6 @@ struct swr_context {
/* SWR private state - draw context */
struct swr_draw_context swrDC;
 
-   SWR_STATS stats;
-   SWR_STATS_FE statsFE;
-
unsigned dirty; /**< Mask of SWR_NEW_x flags */
 };
 
@@ -172,11 +169,13 @@ swr_context(struct pipe_context *pipe)
 }
 
 static INLINE void
-swr_update_draw_context(struct swr_context *ctx)
+swr_update_draw_context(struct swr_context *ctx,
+  struct swr_query_result *pqr = nullptr)
 {
swr_draw_context *pDC =
   (swr_draw_context *)SwrGetPrivateContextState(ctx->swrContext);
-   ctx->swrDC.swr_ctx = ctx;
+   if (pqr)
+  ctx->swrDC.pStats = pqr;
memcpy(pDC, &ctx->swrDC, sizeof(swr_draw_context));
 }
 
diff --git a/src/gallium/drivers/swr/swr_query.cpp 
b/src/gallium/drivers/swr/swr_query.cpp
index c51c529..8bb0b16 100644
--- a/src/gallium/drivers/swr/swr_query.cpp
+++ b/src/gallium/drivers/swr/swr_query.cpp
@@ -71,48 +71,6 @@ swr_destroy_query(struct pipe_context *pipe, struct 
pipe_query *q)
 }
 
 
-static void
-swr_gather_stats(struct pipe_context *pipe, struct swr_query *pq)
-{
-   struct swr_context *ctx = swr_context(pipe);
-
-   assert(pq->result);
-   struct swr_query_result *result = pq->result;
-   boolean enable_stats = pq->enable_stats;
-
-   /* A few results don't require the core, so don't involve it */
-   switch (pq->type) {
-   case PIPE_QUERY_TIMESTAMP:
-   case PIPE_QUERY_TIME_ELAPSED:
-  result->timestamp

[Mesa-dev] [PATCH v2] swr: Update screen->context pointer with multiple contexts.

2016-06-17 Thread Bruce Cherniak
A pipe pointer in the screen allows for access to current device context
 in flush_frontbuffer and resource_destroy.  This wasn't tracking current
context in multi-context situations.

v2: More caffeine.  Corrected compare, removed unnecessary set of
screen-pipe in create_context, and added a few comments.
---
 src/gallium/drivers/swr/swr_context.cpp |6 +++---
 src/gallium/drivers/swr/swr_state.cpp   |4 
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/swr/swr_context.cpp 
b/src/gallium/drivers/swr/swr_context.cpp
index 3a5d9e0..1f3a14c 100644
--- a/src/gallium/drivers/swr/swr_context.cpp
+++ b/src/gallium/drivers/swr/swr_context.cpp
@@ -322,8 +322,10 @@ swr_destroy(struct pipe_context *pipe)
 
swr_destroy_scratch_buffers(ctx);
 
+   /* Only update screen->pipe if current context is being destroyed */
assert(screen);
-   screen->pipe = NULL;
+   if (screen->pipe == pipe)
+  screen->pipe = NULL;
 
FREE(ctx);
 }
@@ -346,7 +348,6 @@ struct pipe_context *
 swr_create_context(struct pipe_screen *p_screen, void *priv, unsigned flags)
 {
struct swr_context *ctx = CALLOC_STRUCT(swr_context);
-   struct swr_screen *screen = swr_screen(p_screen);
ctx->blendJIT =
   new std::unordered_map;
 
@@ -366,7 +367,6 @@ swr_create_context(struct pipe_screen *p_screen, void 
*priv, unsigned flags)
if (ctx->swrContext == NULL)
   goto fail;
 
-   screen->pipe = &ctx->pipe;
ctx->pipe.screen = p_screen;
ctx->pipe.destroy = swr_destroy;
ctx->pipe.priv = priv;
diff --git a/src/gallium/drivers/swr/swr_state.cpp 
b/src/gallium/drivers/swr/swr_state.cpp
index 3eeb98d..1f34365 100644
--- a/src/gallium/drivers/swr/swr_state.cpp
+++ b/src/gallium/drivers/swr/swr_state.cpp
@@ -776,6 +776,10 @@ swr_update_derived(struct pipe_context *pipe,
struct swr_context *ctx = swr_context(pipe);
struct swr_screen *screen = swr_screen(ctx->pipe.screen);
 
+   /* Update screen->pipe to current pipe context. */
+   if (screen->pipe != pipe)
+  screen->pipe = pipe;
+
/* Any state that requires dirty flags to be re-triggered sets this mask */
/* For example, user_buffer vertex and index buffers. */
unsigned post_update_dirty_flags = 0;
-- 
1.7.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] swr: Update screen->context pointer with multiple contexts.

2016-06-17 Thread Bruce Cherniak
A pipe pointer in the screen allows for access to current device context
 in flush_frontbuffer and resource_destroy.  This wasn't tracking current
context in multi-context situations.
---
 src/gallium/drivers/swr/swr_context.cpp |6 --
 src/gallium/drivers/swr/swr_state.cpp   |4 
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/swr/swr_context.cpp 
b/src/gallium/drivers/swr/swr_context.cpp
index 3a5d9e0..df9e53e 100644
--- a/src/gallium/drivers/swr/swr_context.cpp
+++ b/src/gallium/drivers/swr/swr_context.cpp
@@ -323,7 +323,8 @@ swr_destroy(struct pipe_context *pipe)
swr_destroy_scratch_buffers(ctx);
 
assert(screen);
-   screen->pipe = NULL;
+   if (screen->pipe == pipe)
+  screen->pipe = NULL;
 
FREE(ctx);
 }
@@ -366,7 +367,8 @@ swr_create_context(struct pipe_screen *p_screen, void 
*priv, unsigned flags)
if (ctx->swrContext == NULL)
   goto fail;
 
-   screen->pipe = &ctx->pipe;
+   if (!screen->pipe)
+  screen->pipe = &ctx->pipe;
ctx->pipe.screen = p_screen;
ctx->pipe.destroy = swr_destroy;
ctx->pipe.priv = priv;
diff --git a/src/gallium/drivers/swr/swr_state.cpp 
b/src/gallium/drivers/swr/swr_state.cpp
index 3eeb98d..8162fff 100644
--- a/src/gallium/drivers/swr/swr_state.cpp
+++ b/src/gallium/drivers/swr/swr_state.cpp
@@ -776,6 +776,10 @@ swr_update_derived(struct pipe_context *pipe,
struct swr_context *ctx = swr_context(pipe);
struct swr_screen *screen = swr_screen(ctx->pipe.screen);
 
+   /* Switch current screen->pipe context */
+   if (screen->pipe != screen->pipe)
+  screen->pipe = pipe;
+
/* Any state that requires dirty flags to be re-triggered sets this mask */
/* For example, user_buffer vertex and index buffers. */
unsigned post_update_dirty_flags = 0;
-- 
1.7.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] swr: [rasterizer] Correctly select optimized primitive assembly.

2016-05-24 Thread Bruce Cherniak
Indexed primitives were always using cut-aware primitive assembly,
whether primitive_restart was enabled or not.  Correctly pass down
primitive_restart and select optimized PA when possible.
---
 src/gallium/drivers/swr/rasterizer/core/api.cpp|2 ++
 .../drivers/swr/rasterizer/core/frontend.cpp   |6 --
 src/gallium/drivers/swr/rasterizer/core/frontend.h |1 +
 src/gallium/drivers/swr/rasterizer/core/pa.h   |4 ++--
 src/gallium/drivers/swr/rasterizer/core/state.h|3 ++-
 src/gallium/drivers/swr/swr_draw.cpp   |6 ++
 src/gallium/drivers/swr/swr_state.cpp  |4 
 7 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp 
b/src/gallium/drivers/swr/rasterizer/core/api.cpp
index 8e0c1e1..2e6f8b3 100644
--- a/src/gallium/drivers/swr/rasterizer/core/api.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp
@@ -1069,6 +1069,7 @@ void DrawInstanced(
 pDC->FeWork.type = DRAW;
 pDC->FeWork.pfnWork = GetProcessDrawFunc(
 false,  // IsIndexed
+false, // bEnableCutIndex
 pState->tsState.tsEnable,
 pState->gsState.gsEnable,
 pState->soState.soEnable,
@@ -1202,6 +1203,7 @@ void DrawIndexedInstance(
 pDC->FeWork.type = DRAW;
 pDC->FeWork.pfnWork = GetProcessDrawFunc(
 true,   // IsIndexed
+pState->frontendState.bEnableCutIndex,
 pState->tsState.tsEnable,
 pState->gsState.gsEnable,
 pState->soState.soEnable,
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp 
b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
index d6643c6..ef90a24 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
@@ -1159,6 +1159,7 @@ static void TessellationStages(
 /// @param pUserData - Pointer to DRAW_WORK
 template <
 typename IsIndexedT,
+typename IsCutIndexEnabledT,
 typename HasTessellationT,
 typename HasGeometryShaderT,
 typename HasStreamOutT,
@@ -1283,7 +1284,7 @@ void ProcessDraw(
 }
 
 // choose primitive assembler
-PA_FACTORY paFactory(pDC, state.topology, work.numVerts);
+PA_FACTORY paFactory(pDC, state.topology, 
work.numVerts);
 PA_STATE& pa = paFactory.GetPA();
 
 /// @todo: temporarily move instance loop in the FE to ensure SO ordering
@@ -1434,12 +1435,13 @@ struct FEDrawChooser
 // Selector for correct templated Draw front-end function
 PFN_FE_WORK_FUNC GetProcessDrawFunc(
 bool IsIndexed,
+bool IsCutIndexEnabled,
 bool HasTessellation,
 bool HasGeometryShader,
 bool HasStreamOut,
 bool HasRasterization)
 {
-return TemplateArgUnroller::GetFunc(IsIndexed, 
HasTessellation, HasGeometryShader, HasStreamOut, HasRasterization);
+return TemplateArgUnroller::GetFunc(IsIndexed, 
IsCutIndexEnabled, HasTessellation, HasGeometryShader, HasStreamOut, 
HasRasterization);
 }
 
 
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.h 
b/src/gallium/drivers/swr/rasterizer/core/frontend.h
index e1b0400..dfd3987 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.h
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.h
@@ -322,6 +322,7 @@ uint32_t NumVertsPerPrim(PRIMITIVE_TOPOLOGY topology, bool 
includeAdjVerts);
 // ProcessDraw front-end function.  All combinations of parameter values are 
available
 PFN_FE_WORK_FUNC GetProcessDrawFunc(
 bool IsIndexed,
+bool IsCutIndexEnabled,
 bool HasTessellation,
 bool HasGeometryShader,
 bool HasStreamOut,
diff --git a/src/gallium/drivers/swr/rasterizer/core/pa.h 
b/src/gallium/drivers/swr/rasterizer/core/pa.h
index c98ea14..6aa73c1 100644
--- a/src/gallium/drivers/swr/rasterizer/core/pa.h
+++ b/src/gallium/drivers/swr/rasterizer/core/pa.h
@@ -1149,14 +1149,14 @@ private:
 
 // Primitive Assembler factory class, responsible for creating and 
initializing the correct assembler
 // based on state.
-template 
+template 
 struct PA_FACTORY
 {
 PA_FACTORY(DRAW_CONTEXT* pDC, PRIMITIVE_TOPOLOGY in_topo, uint32_t 
numVerts) : topo(in_topo)
 {
 #if KNOB_ENABLE_CUT_AWARE_PA == TRUE
 const API_STATE& state = GetApiState(pDC);
-if ((IsIndexedT::value && (
+if ((IsIndexedT::value && IsCutIndexEnabledT::value && (
 topo == TOP_TRIANGLE_STRIP || topo == TOP_POINT_LIST ||
 topo == TOP_LINE_LIST || topo == TOP_LINE_STRIP ||
 topo == TOP_TRIANGLE_LIST || topo == TOP_LINE_LIST_ADJ ||
diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h 
b/src/gallium/drivers/swr/rasterizer/core/state.h
index f4813e4..5156c6b 100644
--- a/src/gallium/drivers/swr/rasterizer/core/state.h
+++ b/src/gallium/drivers/swr/rasterizer/core/state.h
@@ -799,6 +799,7 @@ struct SWR_FRONTEND_STATE
 // skip clip test, perspective divide, and viewport transform
 // intended for verts in screen s