[Mesa-dev] [PATCH] a5xx: fix primitive restart

2017-07-07 Thread Ilia Mirkin
Signed-off-by: Ilia Mirkin 
---
 src/gallium/drivers/freedreno/a5xx/a5xx.xml.h |  3 ++-
 src/gallium/drivers/freedreno/a5xx/fd5_emit.c | 32 ++-
 2 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a5xx/a5xx.xml.h 
b/src/gallium/drivers/freedreno/a5xx/a5xx.xml.h
index 22259d74ccb..611eecdaab0 100644
--- a/src/gallium/drivers/freedreno/a5xx/a5xx.xml.h
+++ b/src/gallium/drivers/freedreno/a5xx/a5xx.xml.h
@@ -8,7 +8,7 @@ http://github.com/freedreno/envytools/
 git clone https://github.com/freedreno/envytools.git
 
 The rules-ng-ng source files this header was generated from are:
-- /home/ilia/src/freedreno/envytools/rnndb/adreno/a5xx.xml  ( 141876 
bytes, from 2017-07-07 04:12:33)
+- /home/ilia/src/freedreno/envytools/rnndb/adreno/a5xx.xml  ( 141938 
bytes, from 2017-07-08 01:02:47)
 - /home/ilia/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1572 
bytes, from 2016-02-11 01:04:14)
 - /home/ilia/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  13324 
bytes, from 2017-07-04 02:59:47)
 - /home/ilia/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml(  31866 
bytes, from 2017-07-04 02:59:47)
@@ -3706,6 +3706,7 @@ static inline uint32_t 
A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC(uint32_t val)
 {
return ((val) << A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC__SHIFT) & 
A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC__MASK;
 }
+#define A5XX_PC_PRIMITIVE_CNTL_PRIMITIVE_RESTART   0x0100
 #define A5XX_PC_PRIMITIVE_CNTL_PROVOKING_VTX_LAST  0x0400
 
 #define REG_A5XX_PC_PRIM_VTX_CNTL  0xe385
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_emit.c 
b/src/gallium/drivers/freedreno/a5xx/fd5_emit.c
index bede05e9812..2939aaca868 100644
--- a/src/gallium/drivers/freedreno/a5xx/fd5_emit.c
+++ b/src/gallium/drivers/freedreno/a5xx/fd5_emit.c
@@ -580,15 +580,9 @@ fd5_emit_state(struct fd_context *ctx, struct 
fd_ringbuffer *ring,
if (dirty & FD_DIRTY_PROG)
fd5_program_emit(ctx, ring, emit);
 
-   /* note: must come after program emit.. because there is some overlap
-* in registers, ex. PC_PRIMITIVE_CNTL and we rely on some cached
-* values from fd5_program_emit() to avoid having to re-emit the prog
-* every time rast state changes.
-*/
-   if (dirty & (FD_DIRTY_PROG | FD_DIRTY_RASTERIZER)) {
+   if (dirty & FD_DIRTY_RASTERIZER) {
struct fd5_rasterizer_stateobj *rasterizer =
fd5_rasterizer_stateobj(ctx->rasterizer);
-   unsigned max_loc = fd5_context(ctx)->max_loc;
 
OUT_PKT4(ring, REG_A5XX_GRAS_SU_CNTL, 1);
OUT_RING(ring, rasterizer->gras_su_cntl);
@@ -602,10 +596,6 @@ fd5_emit_state(struct fd_context *ctx, struct 
fd_ringbuffer *ring,
OUT_RING(ring, rasterizer->gras_su_poly_offset_offset);
OUT_RING(ring, rasterizer->gras_su_poly_offset_clamp);
 
-   OUT_PKT4(ring, REG_A5XX_PC_PRIMITIVE_CNTL, 1);
-   OUT_RING(ring, rasterizer->pc_primitive_cntl |
-A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC(max_loc));
-
OUT_PKT4(ring, REG_A5XX_PC_RASTER_CNTL, 1);
OUT_RING(ring, rasterizer->pc_raster_cntl);
 
@@ -613,6 +603,26 @@ fd5_emit_state(struct fd_context *ctx, struct 
fd_ringbuffer *ring,
OUT_RING(ring, rasterizer->gras_cl_clip_cntl);
}
 
+   /* note: must come after program emit.. because there is some overlap
+* in registers, ex. PC_PRIMITIVE_CNTL and we rely on some cached
+* values from fd5_program_emit() to avoid having to re-emit the prog
+* every time rast state changes.
+*
+* Since the primitive restart state is not part of a tracked object, we
+* re-emit this register every time.
+*/
+   if (emit->info && ctx->rasterizer) {
+   struct fd5_rasterizer_stateobj *rasterizer =
+   fd5_rasterizer_stateobj(ctx->rasterizer);
+   unsigned max_loc = fd5_context(ctx)->max_loc;
+
+   OUT_PKT4(ring, REG_A5XX_PC_PRIMITIVE_CNTL, 1);
+   OUT_RING(ring, rasterizer->pc_primitive_cntl |
+A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC(max_loc) |
+COND(emit->info->primitive_restart && 
emit->info->index_size,
+ 
A5XX_PC_PRIMITIVE_CNTL_PRIMITIVE_RESTART));
+   }
+
if (dirty & (FD_DIRTY_FRAMEBUFFER | FD_DIRTY_RASTERIZER)) {
uint32_t posz_regid = ir3_find_output_regid(fp, 
FRAG_RESULT_DEPTH);
unsigned nr = pfb->nr_cbufs;
-- 
2.13.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 09/10] radeonsi: emit param exports after position exports

2017-07-07 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_shader.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 71e84ce..4b2995f 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2341,23 +2341,20 @@ static void si_llvm_export_vs(struct 
lp_build_tgsi_context *bld_base,
break;
case TGSI_SEMANTIC_CLIPVERTEX:
if (!shader->key.opt.clip_disable) {
si_llvm_emit_clipvertex(bld_base, pos_args,
outputs[i].values);
}
break;
}
}
 
-   /* Build parameter exports. */
-   si_build_param_exports(ctx, outputs, noutput);
-
/* We need to add the position output manually if it's missing. */
if (!pos_args[0].out[0]) {
pos_args[0].enabled_channels = 0xf; /* writemask */
pos_args[0].valid_mask = 0; /* EXEC mask */
pos_args[0].done = 0; /* last export? */
pos_args[0].target = V_008DFC_SQ_EXP_POS;
pos_args[0].compr = 0; /* COMPR flag */
pos_args[0].out[0] = base->zero; /* X */
pos_args[0].out[1] = base->zero; /* Y */
pos_args[0].out[2] = base->zero; /* Z */
@@ -2442,20 +2439,23 @@ static void si_llvm_export_vs(struct 
lp_build_tgsi_context *bld_base,
 
/* Specify the target we are exporting */
pos_args[i].target = V_008DFC_SQ_EXP_POS + pos_idx++;
 
if (pos_idx == shader->info.nr_pos_exports)
/* Specify that this is the last export */
pos_args[i].done = 1;
 
ac_build_export(>ac, _args[i]);
}
+
+   /* Build parameter exports. */
+   si_build_param_exports(ctx, outputs, noutput);
 }
 
 /**
  * Forward all outputs from the vertex shader to the TES. This is only used
  * for the fixed function TCS.
  */
 static void si_copy_tcs_inputs(struct lp_build_tgsi_context *bld_base)
 {
struct si_shader_context *ctx = si_shader_context(bld_base);
struct gallivm_state *gallivm = >gallivm;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 10/10] radeonsi/gfx9: add workarounds to avoid VGPR indexing completely

2017-07-07 Thread Marek Olšák
From: Marek Olšák 

For inputs and outputs, indirect indexing is lowered by the GLSL compiler.
For temporaries, use alloca and disable the "promote-alloca" pass.

In the future, we could switch all codepaths to alloca permanently and
just rely on the "promote-alloca" pass.
---
 src/gallium/drivers/radeonsi/si_pipe.c | 25 --
 src/gallium/drivers/radeonsi/si_pipe.h |  1 +
 .../drivers/radeonsi/si_shader_tgsi_setup.c|  3 +--
 3 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index afb2bcb..8a4bc41 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -134,22 +134,23 @@ static void si_emit_string_marker(struct pipe_context 
*ctx,
dd_parse_apitrace_marker(string, len, >apitrace_call_number);
 }
 
 static LLVMTargetMachineRef
 si_create_llvm_target_machine(struct si_screen *sscreen)
 {
const char *triple = "amdgcn--";
char features[256];
 
snprintf(features, sizeof(features),
-"+DumpCode,+vgpr-spilling,-fp32-denormals,+fp64-denormals%s%s",
+
"+DumpCode,+vgpr-spilling,-fp32-denormals,+fp64-denormals%s%s%s",
 sscreen->b.chip_class >= GFX9 ? ",+xnack" : ",-xnack",
+sscreen->llvm_has_working_vgpr_indexing ? "" : 
",-promote-alloca",
 sscreen->b.debug_flags & DBG_SI_SCHED ? ",+si-scheduler" : "");
 
return LLVMCreateTargetMachine(ac_get_llvm_target(triple), triple,
   
r600_get_llvm_processor_name(sscreen->b.family),
   features,
   LLVMCodeGenLevelDefault,
   LLVMRelocDefault,
   LLVMCodeModelDefault);
 }
 
@@ -750,34 +751,41 @@ static int si_get_shader_param(struct pipe_screen* 
pscreen,
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
return 3;
 
/* Supported boolean features. */
case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
-   case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
case PIPE_SHADER_CAP_INTEGERS:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS:
return 1;
 
case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
-   /* TODO: Indirection of geometry shader input dimension is not
-* handled yet
-*/
-   return shader != PIPE_SHADER_GEOMETRY;
+   /* TODO: Indirect indexing of GS inputs is unimplemented. */
+   return shader != PIPE_SHADER_GEOMETRY &&
+  (sscreen->llvm_has_working_vgpr_indexing ||
+   /* TCS and TES load inputs directly from LDS or
+* offchip memory, so indirect indexing is trivial. */
+   shader == PIPE_SHADER_TESS_CTRL ||
+   shader == PIPE_SHADER_TESS_EVAL);
+
+   case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
+   return sscreen->llvm_has_working_vgpr_indexing ||
+  /* TCS stores outputs directly to memory. */
+  shader == PIPE_SHADER_TESS_CTRL;
 
/* Unsupported boolean features. */
case PIPE_SHADER_CAP_SUBROUTINES:
case PIPE_SHADER_CAP_SUPPORTED_IRS:
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
return 0;
}
return 0;
 }
@@ -999,20 +1007,25 @@ struct pipe_screen *radeonsi_screen_create(struct 
radeon_winsys *ws,
 sscreen->b.info.me_fw_version >= 173) ||
(sscreen->b.chip_class == SI &&
 sscreen->b.info.pfp_fw_version >= 121 &&
 sscreen->b.info.me_fw_version >= 87);
 
sscreen->has_ds_bpermute = sscreen->b.chip_class >= VI;
sscreen->has_msaa_sample_loc_bug = (sscreen->b.family >= CHIP_POLARIS10 
&&
sscreen->b.family <= 
CHIP_POLARIS12) ||
   sscreen->b.family == CHIP_VEGA10 ||
   sscreen->b.family == CHIP_RAVEN;
+   /* While it would be nice not to have this flag, we are constrained
+* by the reality that LLVM 5.0 doesn't have working VGPR indexing
+* on GFX9.
+*/
+   sscreen->llvm_has_working_vgpr_indexing = sscreen->b.chip_class <= VI;

[Mesa-dev] [PATCH 06/10] radeonsi: add si_build_fs_interp helper

2017-07-07 Thread Marek Olšák
From: Marek Olšák 

This is much simpler.
---
 src/gallium/drivers/radeonsi/si_shader.c | 100 ---
 1 file changed, 39 insertions(+), 61 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 349e57b..ad1fb7b 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1195,20 +1195,38 @@ static int lookup_interp_param_index(unsigned 
interpolate, unsigned location)
return SI_PARAM_PERSP_CENTROID;
else
return SI_PARAM_PERSP_CENTER;
break;
default:
fprintf(stderr, "Warning: Unhandled interpolation mode.\n");
return -1;
}
 }
 
+static LLVMValueRef si_build_fs_interp(struct si_shader_context *ctx,
+  unsigned attr_index, unsigned chan,
+  LLVMValueRef prim_mask,
+  LLVMValueRef i, LLVMValueRef j)
+{
+   if (i || j) {
+   return ac_build_fs_interp(>ac,
+ LLVMConstInt(ctx->i32, chan, 0),
+ LLVMConstInt(ctx->i32, attr_index, 0),
+ prim_mask, i, j);
+   }
+   return ac_build_fs_interp_mov(>ac,
+ LLVMConstInt(ctx->i32, 2, 0), /* P0 */
+ LLVMConstInt(ctx->i32, chan, 0),
+ LLVMConstInt(ctx->i32, attr_index, 0),
+ prim_mask);
+}
+
 /**
  * Interpolate a fragment shader input.
  *
  * @param ctx  context
  * @param input_index  index of the input in hardware
  * @param semantic_nameTGSI_SEMANTIC_*
  * @param semantic_index   semantic index
  * @param num_interp_inputsnumber of all interpolated inputs (= BCOLOR 
offset)
  * @param colors_read_mask color components read (4 bits for each color, 8 
bits in total)
  * @param interp_param interpolation weights (i,j)
@@ -1221,119 +1239,89 @@ static void interp_fs_input(struct si_shader_context 
*ctx,
unsigned semantic_name,
unsigned semantic_index,
unsigned num_interp_inputs,
unsigned colors_read_mask,
LLVMValueRef interp_param,
LLVMValueRef prim_mask,
LLVMValueRef face,
LLVMValueRef result[4])
 {
struct gallivm_state *gallivm = >gallivm;
-   LLVMValueRef attr_number;
-   LLVMValueRef i, j;
-
+   LLVMValueRef i = NULL, j = NULL;
unsigned chan;
 
/* fs.constant returns the param from the middle vertex, so it's not
 * really useful for flat shading. It's meant to be used for custom
 * interpolation (but the intrinsic can't fetch from the other two
 * vertices).
 *
 * Luckily, it doesn't matter, because we rely on the FLAT_SHADE state
 * to do the right thing. The only reason we use fs.constant is that
 * fs.interp cannot be used on integers, because they can be equal
 * to NaN.
 *
 * When interp is false we will use fs.constant or for newer llvm,
  * amdgcn.interp.mov.
 */
bool interp = interp_param != NULL;
 
-   attr_number = LLVMConstInt(ctx->i32, input_index, 0);
-
if (interp) {
interp_param = LLVMBuildBitCast(gallivm->builder, interp_param,
LLVMVectorType(ctx->f32, 2), 
"");
 
i = LLVMBuildExtractElement(gallivm->builder, interp_param,
ctx->i32_0, "");
j = LLVMBuildExtractElement(gallivm->builder, interp_param,
ctx->i32_1, "");
}
 
if (semantic_name == TGSI_SEMANTIC_COLOR &&
ctx->shader->key.part.ps.prolog.color_two_side) {
LLVMValueRef is_face_positive;
-   LLVMValueRef back_attr_number;
 
/* If BCOLOR0 is used, BCOLOR1 is at offset "num_inputs + 1",
 * otherwise it's at offset "num_inputs".
 */
unsigned back_attr_offset = num_interp_inputs;
if (semantic_index == 1 && colors_read_mask & 0xf)
back_attr_offset += 1;
 
-   back_attr_number = LLVMConstInt(ctx->i32, back_attr_offset, 0);
-
is_face_positive = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
 face, ctx->i32_0, "");
 
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
-   

[Mesa-dev] [PATCH 05/10] radeonsi: merge si_llvm_get_amdgpu_target into ac_get_llvm_target

2017-07-07 Thread Marek Olšák
From: Marek Olšák 

---
 src/amd/common/ac_llvm_util.c  | 18 +-
 src/amd/common/ac_llvm_util.h  |  1 +
 src/gallium/drivers/radeonsi/si_pipe.c |  2 +-
 src/gallium/drivers/radeonsi/si_shader_internal.h  |  2 --
 .../drivers/radeonsi/si_shader_tgsi_setup.c| 42 --
 5 files changed, 12 insertions(+), 53 deletions(-)

diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c
index d14057f..7dde2ef 100644
--- a/src/amd/common/ac_llvm_util.c
+++ b/src/amd/common/ac_llvm_util.c
@@ -33,35 +33,37 @@
 #include 
 #include 
 
 static void ac_init_llvm_target()
 {
LLVMInitializeAMDGPUTargetInfo();
LLVMInitializeAMDGPUTarget();
LLVMInitializeAMDGPUTargetMC();
LLVMInitializeAMDGPUAsmPrinter();
 
-   /*
-* Workaround for bug in llvm 4.0 that causes image intrinsics
+   /* For inline assembly. */
+   LLVMInitializeAMDGPUAsmParser();
+
+   /* Workaround for bug in llvm 4.0 that causes image intrinsics
 * to disappear.
 * https://reviews.llvm.org/D26348
 */
-#if HAVE_LLVM >= 0x0400
-   const char *argv[2] = {"mesa", "-simplifycfg-sink-common=false"};
-   LLVMParseCommandLineOptions(2, argv, NULL);
-#endif
-
+   if (HAVE_LLVM >= 0x0400) {
+   /* "mesa" is the prefix for error messages */
+   const char *argv[2] = { "mesa", 
"-simplifycfg-sink-common=false" };
+   LLVMParseCommandLineOptions(2, argv, NULL);
+   }
 }
 
 static once_flag ac_init_llvm_target_once_flag = ONCE_FLAG_INIT;
 
-static LLVMTargetRef ac_get_llvm_target(const char *triple)
+LLVMTargetRef ac_get_llvm_target(const char *triple)
 {
LLVMTargetRef target = NULL;
char *err_message = NULL;
 
call_once(_init_llvm_target_once_flag, ac_init_llvm_target);
 
if (LLVMGetTargetFromTriple(triple, , _message)) {
fprintf(stderr, "Cannot find target for triple %s ", triple);
if (err_message) {
fprintf(stderr, "%s\n", err_message);
diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h
index 21f3e83..cc4fe3b 100644
--- a/src/amd/common/ac_llvm_util.h
+++ b/src/amd/common/ac_llvm_util.h
@@ -53,20 +53,21 @@ enum ac_func_attr {
 */
AC_FUNC_ATTR_LEGACY   = (1u << 31),
 };
 
 enum ac_target_machine_options {
AC_TM_SUPPORTS_SPILL = (1 << 0),
AC_TM_SISCHED = (1 << 1),
 };
 LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, enum 
ac_target_machine_options tm_options);
 
+LLVMTargetRef ac_get_llvm_target(const char *triple);
 void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes);
 bool ac_is_sgpr_param(LLVMValueRef param);
 void ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function,
   int attr_idx, enum ac_func_attr attr);
 void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function,
unsigned attrib_mask);
 void ac_dump_module(LLVMModuleRef module);
 
 LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call);
 bool ac_llvm_is_function(LLVMValueRef v);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 5f3b7e1..afb2bcb 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -138,21 +138,21 @@ static LLVMTargetMachineRef
 si_create_llvm_target_machine(struct si_screen *sscreen)
 {
const char *triple = "amdgcn--";
char features[256];
 
snprintf(features, sizeof(features),
 "+DumpCode,+vgpr-spilling,-fp32-denormals,+fp64-denormals%s%s",
 sscreen->b.chip_class >= GFX9 ? ",+xnack" : ",-xnack",
 sscreen->b.debug_flags & DBG_SI_SCHED ? ",+si-scheduler" : "");
 
-   return LLVMCreateTargetMachine(si_llvm_get_amdgpu_target(triple), 
triple,
+   return LLVMCreateTargetMachine(ac_get_llvm_target(triple), triple,
   
r600_get_llvm_processor_name(sscreen->b.family),
   features,
   LLVMCodeGenLevelDefault,
   LLVMRelocDefault,
   LLVMCodeModelDefault);
 }
 
 static struct pipe_context *si_create_context(struct pipe_screen *screen,
   unsigned flags)
 {
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h 
b/src/gallium/drivers/radeonsi/si_shader_internal.h
index 3556e69..6e86e0b 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -239,22 +239,20 @@ struct si_shader_context {
 };
 
 static inline struct si_shader_context *
 si_shader_context(struct lp_build_tgsi_context *bld_base)
 {
return (struct 

[Mesa-dev] [PATCH 03/10] radeonsi: don't call gallivm_init_llvm_targets

2017-07-07 Thread Marek Olšák
From: Marek Olšák 

It's for initializing the native (x86) target.
---
 src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c 
b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
index d5e988e..3f20cd1 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
@@ -61,21 +61,20 @@ enum si_llvm_calling_convention {
 void si_llvm_add_attribute(LLVMValueRef F, const char *name, int value)
 {
char str[16];
 
snprintf(str, sizeof(str), "%i", value);
LLVMAddTargetDependentFunctionAttr(F, name, str);
 }
 
 static void init_amdgpu_target()
 {
-   gallivm_init_llvm_targets();
LLVMInitializeAMDGPUTargetInfo();
LLVMInitializeAMDGPUTarget();
LLVMInitializeAMDGPUTargetMC();
LLVMInitializeAMDGPUAsmPrinter();
 
/* For inline assembly. */
LLVMInitializeAMDGPUAsmParser();
 
if (HAVE_LLVM >= 0x0400) {
/*
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 02/10] gallium/radeon: reallocate suballocated buffers when exported

2017-07-07 Thread Marek Olšák
From: Marek Olšák 

This should fix exports of suballocated buffers.
---
 src/gallium/drivers/radeon/r600_buffer_common.c |  3 ++-
 src/gallium/drivers/radeon/r600_texture.c   | 26 +
 2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c 
b/src/gallium/drivers/radeon/r600_buffer_common.c
index 40d763b..dd1c209 100644
--- a/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -281,27 +281,28 @@ void r600_replace_buffer_storage(struct pipe_context *ctx,
 struct pipe_resource *dst,
 struct pipe_resource *src)
 {
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
struct r600_resource *rdst = r600_resource(dst);
struct r600_resource *rsrc = r600_resource(src);
uint64_t old_gpu_address = rdst->gpu_address;
 
pb_reference(>buf, rsrc->buf);
rdst->gpu_address = rsrc->gpu_address;
+   rdst->b.b.bind = rsrc->b.b.bind;
+   rdst->flags = rsrc->flags;
 
assert(rdst->vram_usage == rsrc->vram_usage);
assert(rdst->gart_usage == rsrc->gart_usage);
assert(rdst->bo_size == rsrc->bo_size);
assert(rdst->bo_alignment == rsrc->bo_alignment);
assert(rdst->domains == rsrc->domains);
-   assert(rdst->flags == rsrc->flags);
 
rctx->rebind_buffer(ctx, dst, old_gpu_address);
 }
 
 void r600_invalidate_resource(struct pipe_context *ctx,
  struct pipe_resource *resource)
 {
struct r600_common_context *rctx = (struct r600_common_context*)ctx;
struct r600_resource *rbuffer = r600_resource(resource);
 
diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index f5f7d10..3aac3c7 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -611,20 +611,46 @@ static boolean r600_texture_get_handle(struct 
pipe_screen* screen,
stride = rtex->surface.u.gfx9.surf_pitch *
 rtex->surface.bpe;
slice_size = rtex->surface.u.gfx9.surf_slice_size;
} else {
offset = rtex->surface.u.legacy.level[0].offset;
stride = rtex->surface.u.legacy.level[0].nblk_x *
 rtex->surface.bpe;
slice_size = rtex->surface.u.legacy.level[0].slice_size;
}
} else {
+   /* Move a suballocated buffer into a non-suballocated 
allocation. */
+   if (rscreen->ws->buffer_is_suballocated(res->buf)) {
+   assert(!res->b.is_shared);
+
+   /* Allocate a new buffer with PIPE_BIND_SHARED. */
+   struct pipe_resource templ = res->b.b;
+   templ.bind |= PIPE_BIND_SHARED;
+
+   struct pipe_resource *newb =
+   screen->resource_create(screen, );
+   if (!newb)
+   return false;
+
+   /* Copy the old buffer contents to the new one. */
+   struct pipe_box box;
+   u_box_1d(0, newb->width0, );
+   rctx->b.resource_copy_region(>b, newb, 0, 0, 0, 0,
+>b.b, 0, );
+   /* Move the new buffer storage to the old 
pipe_resource. */
+   r600_replace_buffer_storage(>b, >b.b, newb);
+   pipe_resource_reference(, NULL);
+
+   assert(res->b.b.bind & PIPE_BIND_SHARED);
+   assert(res->flags & RADEON_FLAG_NO_SUBALLOC);
+   }
+
/* Buffers */
offset = 0;
stride = 0;
slice_size = 0;
}
 
if (res->b.is_shared) {
/* USAGE_EXPLICIT_FLUSH must be cleared if at least one user
 * doesn't set it.
 */
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 04/10] gallivm: inline gallivm_init_llvm_targets

2017-07-07 Thread Marek Olšák
From: Marek Olšák 

there is only one user.
---
 src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 23 ---
 src/gallium/auxiliary/gallivm/lp_bld_misc.h   |  3 ---
 2 files changed, 8 insertions(+), 18 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp 
b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
index 0e4a531..342cb38 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
@@ -117,47 +117,40 @@ static void init_native_targets()
 {
// If we have a native target, initialize it to ensure it is linked in and
// usable by the JIT.
llvm::InitializeNativeTarget();
 
llvm::InitializeNativeTargetAsmPrinter();
 
llvm::InitializeNativeTargetDisassembler();
 }
 
-/**
- * The llvm target registry is not thread-safe, so drivers and state-trackers
- * that want to initialize targets should use the gallivm_init_llvm_targets()
- * function to safely initialize targets.
- *
- * LLVM targets should be initialized before the driver or state-tracker tries
- * to access the registry.
- */
-extern "C" void
-gallivm_init_llvm_targets(void)
-{
-   call_once(_native_targets_once_flag, init_native_targets);
-}
-
 extern "C" void
 lp_set_target_options(void)
 {
 #if HAVE_LLVM < 0x0304
/*
 * By default LLVM adds a signal handler to output a pretty stack trace.
 * This signal handler is never removed, causing problems when unloading the
 * shared object where the gallium driver resides.
 */
llvm::DisablePrettyStackTrace = true;
 #endif
 
-   gallivm_init_llvm_targets();
+   /* The llvm target registry is not thread-safe, so drivers and 
state-trackers
+* that want to initialize targets should use the lp_set_target_options()
+* function to safely initialize targets.
+*
+* LLVM targets should be initialized before the driver or state-tracker 
tries
+* to access the registry.
+*/
+   call_once(_native_targets_once_flag, init_native_targets);
 }
 
 extern "C"
 LLVMTargetLibraryInfoRef
 gallivm_create_target_library_info(const char *triple)
 {
return reinterpret_cast(
 #if HAVE_LLVM < 0x0307
new llvm::TargetLibraryInfo(
 #else
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.h 
b/src/gallium/auxiliary/gallivm/lp_bld_misc.h
index dafb4cf..1b725d1 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.h
@@ -35,23 +35,20 @@
 #include 
 
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 
 struct lp_generated_code;
 
-extern void
-gallivm_init_llvm_targets(void);
-
 extern LLVMTargetLibraryInfoRef
 gallivm_create_target_library_info(const char *triple);
 
 extern void
 gallivm_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info);
 
 extern void
 lp_set_target_options(void);
 
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 07/10] radeonsi: don't use info.num_inputs when it's unused

2017-07-07 Thread Marek Olšák
From: Marek Olšák 

For clarity. It's only used by color interpolation.
---
 src/gallium/drivers/radeonsi/si_shader.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index ad1fb7b..157cb51 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1348,21 +1348,21 @@ static void declare_input_fs(
 
interp_param_idx = lookup_interp_param_index(decl->Interp.Interpolate,
 decl->Interp.Location);
if (interp_param_idx == -1)
return;
else if (interp_param_idx) {
interp_param = LLVMGetParam(ctx->main_fn, interp_param_idx);
}
 
interp_fs_input(ctx, input_index, decl->Semantic.Name,
-   decl->Semantic.Index, shader->selector->info.num_inputs,
+   decl->Semantic.Index, 0, /* this param is unused */
shader->selector->info.colors_read, interp_param,
LLVMGetParam(main_fn, SI_PARAM_PRIM_MASK),
LLVMGetParam(main_fn, SI_PARAM_FRONT_FACE),
[0]);
 }
 
 static LLVMValueRef get_sample_id(struct si_shader_context *ctx)
 {
return unpack_param(ctx, SI_PARAM_ANCILLARY, 8, 4);
 }
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 08/10] radeonsi: move building parameter exports into a separate function

2017-07-07 Thread Marek Olšák
From: Marek Olšák 

Both loops now look simple.
---
 src/gallium/drivers/radeonsi/si_shader.c | 162 +++
 1 file changed, 78 insertions(+), 84 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 157cb51..71e84ce 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2235,134 +2235,128 @@ static void si_llvm_emit_streamout(struct 
si_shader_context *ctx,
if (stream != so->output[i].stream)
continue;
 
emit_streamout_output(ctx, so_buffers, so_write_offset,
  >output[i], [reg]);
}
}
lp_build_endif(_ctx);
 }
 
+static void si_export_param(struct si_shader_context *ctx, unsigned index,
+   LLVMValueRef *values)
+{
+   struct ac_export_args args;
+
+   si_llvm_init_export_args(>bld_base, values,
+V_008DFC_SQ_EXP_PARAM + index, );
+   ac_build_export(>ac, );
+}
+
+static void si_build_param_exports(struct si_shader_context *ctx,
+  struct si_shader_output_values *outputs,
+  unsigned noutput)
+{
+   struct si_shader *shader = ctx->shader;
+   unsigned param_count = 0;
+
+   for (unsigned i = 0; i < noutput; i++) {
+   unsigned semantic_name = outputs[i].semantic_name;
+   unsigned semantic_index = outputs[i].semantic_index;
+
+   if (outputs[i].vertex_stream[0] != 0 &&
+   outputs[i].vertex_stream[1] != 0 &&
+   outputs[i].vertex_stream[2] != 0 &&
+   outputs[i].vertex_stream[3] != 0)
+   continue;
+
+   switch (semantic_name) {
+   case TGSI_SEMANTIC_LAYER:
+   case TGSI_SEMANTIC_VIEWPORT_INDEX:
+   case TGSI_SEMANTIC_CLIPDIST:
+   case TGSI_SEMANTIC_COLOR:
+   case TGSI_SEMANTIC_BCOLOR:
+   case TGSI_SEMANTIC_PRIMID:
+   case TGSI_SEMANTIC_FOG:
+   case TGSI_SEMANTIC_TEXCOORD:
+   case TGSI_SEMANTIC_GENERIC:
+   break;
+   default:
+   continue;
+   }
+
+   if ((semantic_name != TGSI_SEMANTIC_GENERIC ||
+semantic_index < SI_MAX_IO_GENERIC) &&
+   shader->key.opt.kill_outputs &
+   (1ull << si_shader_io_get_unique_index(semantic_name, 
semantic_index)))
+   continue;
+
+   si_export_param(ctx, param_count, outputs[i].values);
+
+   assert(i < ARRAY_SIZE(shader->info.vs_output_param_offset));
+   shader->info.vs_output_param_offset[i] = param_count++;
+   }
+
+   shader->info.nr_param_exports = param_count;
+}
 
 /* Generate export instructions for hardware VS shader stage */
 static void si_llvm_export_vs(struct lp_build_tgsi_context *bld_base,
  struct si_shader_output_values *outputs,
  unsigned noutput)
 {
struct si_shader_context *ctx = si_shader_context(bld_base);
struct si_shader *shader = ctx->shader;
struct lp_build_context *base = _base->base;
-   struct ac_export_args args, pos_args[4] = {};
+   struct ac_export_args pos_args[4] = {};
LLVMValueRef psize_value = NULL, edgeflag_value = NULL, layer_value = 
NULL, viewport_index_value = NULL;
-   unsigned semantic_name, semantic_index;
-   unsigned target;
-   unsigned param_count = 0;
unsigned pos_idx;
int i;
 
+   /* Build position exports. */
for (i = 0; i < noutput; i++) {
-   semantic_name = outputs[i].semantic_name;
-   semantic_index = outputs[i].semantic_index;
-   bool export_param = true;
-
-   switch (semantic_name) {
-   case TGSI_SEMANTIC_POSITION: /* ignore these */
-   case TGSI_SEMANTIC_PSIZE:
-   case TGSI_SEMANTIC_CLIPVERTEX:
-   case TGSI_SEMANTIC_EDGEFLAG:
+   switch (outputs[i].semantic_name) {
+   case TGSI_SEMANTIC_POSITION:
+   si_llvm_init_export_args(bld_base, outputs[i].values,
+V_008DFC_SQ_EXP_POS, 
_args[0]);
break;
-   case TGSI_SEMANTIC_GENERIC:
-   /* don't process indices the function can't handle */
-   if (semantic_index >= SI_MAX_IO_GENERIC)
-   break;
-   /* fall through */
-   default:
-   if (shader->key.opt.kill_outputs &
-   (1ull << 

[Mesa-dev] [PATCH 01/10] gallium/radeon: flush the context after in-place texture realloc before export

2017-07-07 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeon/r600_texture.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index 2deb56a..f5f7d10 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -563,20 +563,21 @@ static boolean r600_texture_get_handle(struct 
pipe_screen* screen,
 * interop in the future.
 */
if (resource->nr_samples > 1 || rtex->is_depth)
return false;
 
/* Move a suballocated texture into a non-suballocated 
allocation. */
if (rscreen->ws->buffer_is_suballocated(res->buf)) {
assert(!res->b.is_shared);
r600_reallocate_texture_inplace(rctx, rtex,
PIPE_BIND_SHARED, 
false);
+   rctx->b.flush(>b, NULL, 0);
assert(res->b.b.bind & PIPE_BIND_SHARED);
assert(res->flags & RADEON_FLAG_NO_SUBALLOC);
}
 
/* Since shader image stores don't support DCC on VI,
 * disable it for external clients that want write
 * access.
 */
if (usage & PIPE_HANDLE_USAGE_WRITE && rtex->dcc_offset) {
if (r600_texture_disable_dcc(rctx, rtex))
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] mesa: flag _NEW_TEXTURE_OBJECT for GL_TEXTURE_LOD_BIAS_EXT

2017-07-07 Thread Marek Olšák
From: Marek Olšák 

Only the compatibility profile can set it.
It was done incorrectly when we split _NEW_TEXTURE.
---
 src/mesa/main/texenv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/main/texenv.c b/src/mesa/main/texenv.c
index ee5171c..2fcaf7c 100644
--- a/src/mesa/main/texenv.c
+++ b/src/mesa/main/texenv.c
@@ -437,21 +437,21 @@ _mesa_TexEnvfv( GLenum target, GLenum pname, const 
GLfloat *param )
 break;
   default:
 _mesa_error( ctx, GL_INVALID_ENUM, "glTexEnv(pname)" );
 return;
   }
}
else if (target == GL_TEXTURE_FILTER_CONTROL_EXT) {
   if (pname == GL_TEXTURE_LOD_BIAS_EXT) {
 if (texUnit->LodBias == param[0])
return;
-FLUSH_VERTICES(ctx, _NEW_TEXTURE_STATE);
+FLUSH_VERTICES(ctx, _NEW_TEXTURE_OBJECT);
  texUnit->LodBias = param[0];
   }
   else {
  TE_ERROR(GL_INVALID_ENUM, "glTexEnv(pname=%s)", pname);
 return;
   }
}
else if (target == GL_POINT_SPRITE_NV) {
   /* GL_ARB_point_sprite / GL_NV_point_sprite */
   if (!ctx->Extensions.NV_point_sprite
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] gallium: use "ull" number suffix to keep the QtCreator parser happy

2017-07-07 Thread Marek Olšák
From: Marek Olšák 

It can't parse "llu".
---
 src/gallium/drivers/r600/r600_state_common.c|  4 +--
 src/gallium/drivers/radeon/r600_pipe_common.h   | 44 -
 src/gallium/drivers/radeonsi/si_debug.c |  4 +--
 src/gallium/drivers/radeonsi/si_shader.c|  2 +-
 src/gallium/drivers/radeonsi/si_state_shaders.c | 10 +++---
 src/gallium/winsys/amdgpu/drm/amdgpu_cs.c   |  2 +-
 src/gallium/winsys/radeon/drm/radeon_drm_cs.c   |  2 +-
 src/mesa/state_tracker/st_atom.h|  4 +--
 8 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_state_common.c 
b/src/gallium/drivers/r600/r600_state_common.c
index 8ace779..4c97efa 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -840,25 +840,25 @@ static void *r600_create_shader_state(struct pipe_context 
*ctx,
 
for (i = 0; i < sel->info.num_outputs; i++) {
unsigned name = sel->info.output_semantic_name[i];
unsigned index = sel->info.output_semantic_index[i];
 
switch (name) {
case TGSI_SEMANTIC_TESSINNER:
case TGSI_SEMANTIC_TESSOUTER:
case TGSI_SEMANTIC_PATCH:
sel->lds_patch_outputs_written_mask |=
-   1llu << r600_get_lds_unique_index(name, 
index);
+   1ull << r600_get_lds_unique_index(name, 
index);
break;
default:
sel->lds_outputs_written_mask |=
-   1llu << r600_get_lds_unique_index(name, 
index);
+   1ull << r600_get_lds_unique_index(name, 
index);
}
}
break;
default:
break;
}
 
return sel;
 }
 
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index b22a3a7..5c761f3 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -83,42 +83,42 @@
 #define DBG_NO_TGSI(1 << 13)
 #define DBG_NO_ASM (1 << 14)
 #define DBG_PREOPT_IR  (1 << 15)
 #define DBG_CHECK_IR   (1 << 16)
 #define DBG_NO_OPT_VARIANT (1 << 17)
 #define DBG_FS_CORRECT_DERIVS_AFTER_KILL (1 << 18)
 /* gaps */
 #define DBG_TEST_DMA   (1 << 20)
 /* Bits 21-31 are reserved for the r600g driver. */
 /* features */
-#define DBG_NO_ASYNC_DMA   (1llu << 32)
-#define DBG_NO_HYPERZ  (1llu << 33)
-#define DBG_NO_DISCARD_RANGE   (1llu << 34)
-#define DBG_NO_2D_TILING   (1llu << 35)
-#define DBG_NO_TILING  (1llu << 36)
-#define DBG_SWITCH_ON_EOP  (1llu << 37)
-#define DBG_FORCE_DMA  (1llu << 38)
-#define DBG_PRECOMPILE (1llu << 39)
-#define DBG_INFO   (1llu << 40)
-#define DBG_NO_WC  (1llu << 41)
-#define DBG_CHECK_VM   (1llu << 42)
-#define DBG_NO_DCC (1llu << 43)
-#define DBG_NO_DCC_CLEAR   (1llu << 44)
-#define DBG_NO_RB_PLUS (1llu << 45)
-#define DBG_SI_SCHED   (1llu << 46)
-#define DBG_MONOLITHIC_SHADERS (1llu << 47)
-#define DBG_NO_CE  (1llu << 48)
-#define DBG_UNSAFE_MATH(1llu << 49)
-#define DBG_NO_DCC_FB  (1llu << 50)
-#define DBG_TEST_VMFAULT_CP(1llu << 51)
-#define DBG_TEST_VMFAULT_SDMA  (1llu << 52)
-#define DBG_TEST_VMFAULT_SHADER(1llu << 53)
+#define DBG_NO_ASYNC_DMA   (1ull << 32)
+#define DBG_NO_HYPERZ  (1ull << 33)
+#define DBG_NO_DISCARD_RANGE   (1ull << 34)
+#define DBG_NO_2D_TILING   (1ull << 35)
+#define DBG_NO_TILING  (1ull << 36)
+#define DBG_SWITCH_ON_EOP  (1ull << 37)
+#define DBG_FORCE_DMA  (1ull << 38)
+#define DBG_PRECOMPILE (1ull << 39)
+#define DBG_INFO   (1ull << 40)
+#define DBG_NO_WC  (1ull << 41)
+#define DBG_CHECK_VM   (1ull << 42)
+#define DBG_NO_DCC (1ull << 43)
+#define DBG_NO_DCC_CLEAR   (1ull << 44)
+#define DBG_NO_RB_PLUS (1ull << 45)
+#define DBG_SI_SCHED   (1ull << 46)
+#define DBG_MONOLITHIC_SHADERS (1ull << 47)
+#define DBG_NO_CE  (1ull << 48)
+#define DBG_UNSAFE_MATH(1ull << 49)
+#define DBG_NO_DCC_FB  (1ull << 50)
+#define DBG_TEST_VMFAULT_CP(1ull << 51)
+#define DBG_TEST_VMFAULT_SDMA  (1ull << 52)
+#define DBG_TEST_VMFAULT_SHADER(1ull << 53)
 
 #define R600_MAP_BUFFER_ALIGNMENT 64
 #define R600_MAX_VIEWPORTS16
 
 #define SI_MAX_VARIABLE_THREADS_PER_BLOCK 1024
 
 enum r600_coherency {
R600_COHERENCY_NONE, /* no cache flushes needed */
R600_COHERENCY_SHADER,
R600_COHERENCY_CB_META,
diff 

[Mesa-dev] [PATCH] nir: copy front interpolation when creating fake back color input

2017-07-07 Thread Ilia Mirkin
Fixes a bunch of gl_BackColor interpolation tests that had explicit
interpolation specified on the fragment shader gl_Color.

Signed-off-by: Ilia Mirkin 
---
 src/compiler/nir/nir_lower_two_sided_color.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/compiler/nir/nir_lower_two_sided_color.c 
b/src/compiler/nir/nir_lower_two_sided_color.c
index 7d1a3bd236d..90da1013ec8 100644
--- a/src/compiler/nir/nir_lower_two_sided_color.c
+++ b/src/compiler/nir/nir_lower_two_sided_color.c
@@ -46,7 +46,8 @@ typedef struct {
  */
 
 static nir_variable *
-create_input(nir_shader *shader, unsigned drvloc, gl_varying_slot slot)
+create_input(nir_shader *shader, unsigned drvloc, gl_varying_slot slot,
+ enum glsl_interp_mode interpolation)
 {
nir_variable *var = rzalloc(shader, nir_variable);
 
@@ -56,6 +57,7 @@ create_input(nir_shader *shader, unsigned drvloc, 
gl_varying_slot slot)
var->name = ralloc_asprintf(var, "in_%d", drvloc);
var->data.index = 0;
var->data.location = slot;
+   var->data.interpolation = interpolation;
 
exec_list_push_tail(>inputs, >node);
 
@@ -116,7 +118,9 @@ setup_inputs(lower_2side_state *state)
   else
  slot = VARYING_SLOT_BFC1;
 
-  state->colors[i].back = create_input(state->shader, ++maxloc, slot);
+  state->colors[i].back = create_input(
+state->shader, ++maxloc, slot,
+state->colors[i].front->data.interpolation);
}
 
return 0;
-- 
2.13.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glthread: get rid of unmarshal dispatch enum/table

2017-07-07 Thread Grigori Goronzy

On 2017-07-01 18:46, Marek Olšák wrote:

Instead of passing the function pointer through the queue, passing
just a call ID (uint16_t) is preferable.

If the switch statement is an issue, doing a function pointer lookup
from a static array should be OK.



OK, then let's drop this patch. gcc turns the switch/case block into an 
efficient jump table with the ID method, so an array for function lookup 
instead of that doesn't improve anything.
I didn't see any measurable benefit of the function pointer method 
either.


Best regards
Grigori



On Fri, Jun 30, 2017 at 7:14 PM, Grigori Goronzy  
wrote:

On 2017-06-30 15:27, Nicolai Hähnle wrote:


On 30.06.2017 02:29, Grigori Goronzy wrote:


Use function pointers to identify the unmarshalling function, which
is simpler and gets rid of a lot generated code.

This removes an indirection and possibly results in a slight speedup
as well.



The fact that it blows up cmd_base from 4 bytes to 16 bytes might
result in a slowdown. Marek's recent changes clearly indicated that
looking at memory behavior matters quite a bit for glthread. So I'm
inclined to say No on this unless you can demonstrate a consistent
speedup.



That's indeed a notable difference. I suspect it isn't so much the 
byte size

of the marshalled commands that affects throughput, but the number of
commands per batch and their associated costs when unmarshalling, so 
the
larger size of cmd_base might not matter much (perhaps with adjusted 
max

batch size). In any case, I'll try get hold of some numbers.

Best regards
Grigori



Cheers,
Nicolai



---
  src/mapi/glapi/gen/Makefile.am |  4 --
  src/mapi/glapi/gen/gl_marshal.py   | 36 ++--
  src/mapi/glapi/gen/gl_marshal_h.py | 86
--
  src/mesa/Android.gen.mk|  7 
  src/mesa/Makefile.sources  |  1 -
  src/mesa/SConscript|  8 
  src/mesa/main/.gitignore   |  1 -
  src/mesa/main/glthread.c   |  9 +++-
  src/mesa/main/glthread.h   |  2 -
  src/mesa/main/marshal.c| 19 -
  src/mesa/main/marshal.h| 14 +++
  11 files changed, 26 insertions(+), 161 deletions(-)
  delete mode 100644 src/mapi/glapi/gen/gl_marshal_h.py

diff --git a/src/mapi/glapi/gen/Makefile.am
b/src/mapi/glapi/gen/Makefile.am
index bd04519..62007a4 100644
--- a/src/mapi/glapi/gen/Makefile.am
+++ b/src/mapi/glapi/gen/Makefile.am
@@ -76,7 +76,6 @@ EXTRA_DIST= \
gl_genexec.py \
gl_gentable.py \
gl_marshal.py \
-   gl_marshal_h.py \
gl_procs.py \
gl_SPARC_asm.py \
gl_table.py \
@@ -297,9 +296,6 @@ $(MESA_DIR)/main/api_exec.c: gl_genexec.py 
apiexec.py

$(COMMON)
  $(MESA_DIR)/main/marshal_generated.c: gl_marshal.py marshal_XML.py
$(COMMON)
$(PYTHON_GEN) $(srcdir)/gl_marshal.py -f
$(srcdir)/gl_and_es_API.xml > $@
  -$(MESA_DIR)/main/marshal_generated.h: gl_marshal_h.py 
marshal_XML.py

$(COMMON)
-   $(PYTHON_GEN) $(srcdir)/gl_marshal_h.py -f
$(srcdir)/gl_and_es_API.xml > $@
-
  $(MESA_DIR)/main/dispatch.h: gl_table.py $(COMMON)
$(PYTHON_GEN) $(srcdir)/gl_table.py -f
$(srcdir)/gl_and_es_API.xml -m remap_table > $@
  diff --git a/src/mapi/glapi/gen/gl_marshal.py
b/src/mapi/glapi/gen/gl_marshal.py
index efa4d9e..e71ede3 100644
--- a/src/mapi/glapi/gen/gl_marshal.py
+++ b/src/mapi/glapi/gen/gl_marshal.py
@@ -34,7 +34,6 @@ header = """
  #include "dispatch.h"
  #include "glthread.h"
  #include "marshal.h"
-#include "marshal_generated.h"
  """
@@ -106,7 +105,7 @@ class PrintCode(gl_XML.gl_print_base):
def print_async_dispatch(self, func):
  out('cmd = _mesa_glthread_allocate_command(ctx, '
-'DISPATCH_CMD_{0}, cmd_size);'.format(func.name))
+'(unmarshal_func)_mesa_unmarshal_{0},
cmd_size);'.format(func.name))
  for p in func.fixed_params:
  if p.count:
  out('memcpy(cmd->{0}, {0}, {1});'.format(
@@ -166,7 +165,7 @@ class PrintCode(gl_XML.gl_print_base):
  out('};')
def print_async_unmarshal(self, func):
-out('static inline void')
+out('static void')
  out(('_mesa_unmarshal_{0}(struct gl_context *ctx, '
   'const struct marshal_cmd_{0} 
*cmd)').format(func.name))

  out('{')
@@ -205,6 +204,7 @@ class PrintCode(gl_XML.gl_print_base):
  else:
  out('variable_data +=
{0};'.format(p.size_string(False)))
  +
out('debug_print_unmarshal("{0}");'.format(func.name))

  self.print_sync_call(func)
  out('}')
  @@ -276,35 +276,6 @@ class PrintCode(gl_XML.gl_print_base):
  out('')
  out('')
  -def print_unmarshal_dispatch_cmd(self, api):
-out('size_t')
-out('_mesa_unmarshal_dispatch_cmd(struct gl_context *ctx, '
-'const void *cmd)')
-out('{')
-with indent():
-out('const struct 

Re: [Mesa-dev] [PATCH] draw: handle more TGSI_SEMANTIC_COLOR indices

2017-07-07 Thread Brian Paul


Reviewed-by: Brian Paul 


On 07/07/2017 04:15 PM, srol...@vmware.com wrote:

From: Roland Scheidegger 

It could only handle indices 0/1, otherwise what happened was bad (accessing
array out of bounds, no crash but kind of random). This is enough for the gl
state tracker (primary/secondary color) but not enough for some other state
trackers (d3d9 has no limits on the number of color interpolants).
The complexity with color semantics are all due to the front/back mapping (2
outputs in the vs map to one input in the fs) so this isn't extended to
indices > 1 - d3d9 has no use for back colors, therefore this isn't needed and
still only 2 back colors can be handled correctly.
---
  src/gallium/auxiliary/draw/draw_pipe_clip.c  | 17 ++---
  src/gallium/auxiliary/draw/draw_pipe_flatshade.c | 11 +++
  src/gallium/auxiliary/draw/draw_pipe_twoside.c   |  9 ++---
  3 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c 
b/src/gallium/auxiliary/draw/draw_pipe_clip.c
index cf2b417..4cfa54b 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_clip.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c
@@ -771,8 +771,9 @@ find_interp(const struct draw_fragment_shader *fs, int 
*indexed_interp,
 int interp;
 /* If it's gl_{Front,Back}{,Secondary}Color, pick up the mode
  * from the array we've filled before. */
-   if (semantic_name == TGSI_SEMANTIC_COLOR ||
-   semantic_name == TGSI_SEMANTIC_BCOLOR) {
+   if ((semantic_name == TGSI_SEMANTIC_COLOR ||
+semantic_name == TGSI_SEMANTIC_BCOLOR) &&
+   semantic_index < 2) {
interp = indexed_interp[semantic_index];
 } else if (semantic_name == TGSI_SEMANTIC_POSITION ||
semantic_name == TGSI_SEMANTIC_CLIPVERTEX) {
@@ -851,7 +852,8 @@ clip_init_state(struct draw_stage *stage)

 if (fs) {
for (i = 0; i < fs->info.num_inputs; i++) {
- if (fs->info.input_semantic_name[i] == TGSI_SEMANTIC_COLOR) {
+ if (fs->info.input_semantic_name[i] == TGSI_SEMANTIC_COLOR &&
+ fs->info.input_semantic_index[i] < 2) {
  if (fs->info.input_interpolate[i] != TGSI_INTERPOLATE_COLOR)
 indexed_interp[fs->info.input_semantic_index[i]] = 
fs->info.input_interpolate[i];
   }
@@ -881,6 +883,15 @@ clip_init_state(struct draw_stage *stage)
   clipper->perspect_attribs[clipper->num_perspect_attribs] = i;
   clipper->num_perspect_attribs++;
   break;
+  case TGSI_INTERPOLATE_COLOR:
+ if (draw->rasterizer->flatshade) {
+clipper->const_attribs[clipper->num_const_attribs] = i;
+clipper->num_const_attribs++;
+ } else {
+clipper->perspect_attribs[clipper->num_perspect_attribs] = i;
+clipper->num_perspect_attribs++;
+ }
+ break;
default:
   assert(interp == -1);
   break;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c 
b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c
index cd285e6..2830435 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c
@@ -170,8 +170,9 @@ find_interp(const struct draw_fragment_shader *fs, int 
*indexed_interp,
 int interp;
 /* If it's gl_{Front,Back}{,Secondary}Color, pick up the mode
  * from the array we've filled before. */
-   if (semantic_name == TGSI_SEMANTIC_COLOR ||
-   semantic_name == TGSI_SEMANTIC_BCOLOR) {
+   if ((semantic_name == TGSI_SEMANTIC_COLOR ||
+semantic_name == TGSI_SEMANTIC_BCOLOR) &&
+   semantic_index < 2) {
interp = indexed_interp[semantic_index];
 } else {
/* Otherwise, search in the FS inputs, with a decent default
@@ -216,7 +217,8 @@ static void flatshade_init_state( struct draw_stage *stage )

 if (fs) {
for (i = 0; i < fs->info.num_inputs; i++) {
- if (fs->info.input_semantic_name[i] == TGSI_SEMANTIC_COLOR) {
+ if (fs->info.input_semantic_name[i] == TGSI_SEMANTIC_COLOR &&
+ fs->info.input_semantic_index[i] < 2) {
  if (fs->info.input_interpolate[i] != TGSI_INTERPOLATE_COLOR)
 indexed_interp[fs->info.input_semantic_index[i]] = 
fs->info.input_interpolate[i];
   }
@@ -236,7 +238,8 @@ static void flatshade_init_state( struct draw_stage *stage )
 info->output_semantic_index[i]);
/* If it's flat, add it to the flat vector. */

-  if (interp == TGSI_INTERPOLATE_CONSTANT) {
+  if (interp == TGSI_INTERPOLATE_CONSTANT ||
+  (interp == TGSI_INTERPOLATE_COLOR && draw->rasterizer->flatshade)) {
   flat->flat_attribs[flat->num_flat_attribs] = i;
   flat->num_flat_attribs++;
}
diff --git a/src/gallium/auxiliary/draw/draw_pipe_twoside.c 
b/src/gallium/auxiliary/draw/draw_pipe_twoside.c
index 52d87c6..7e76835 100644
--- 

Re: [Mesa-dev] [PATCH 1/3] mesa: GL_TEXTURE_BORDER_COLOR exists in OpenGL 1.0, so don't depend on GL_ARB_texture_border_clamp

2017-07-07 Thread Andres Gomez
Ian, it looks like we could want this patch (and the others from the
series when they land) in -stable (?)

On Tue, 2017-06-27 at 10:09 -0700, Ian Romanick wrote:
> From: Ian Romanick 
> 
> On NV20 (and probably also on earlier NV GPUs that lack
> GL_ARB_texture_border_clamp) fixes the following piglit tests:
> 
> gl-1.0-beginend-coverage gltexparameter[if]{v,}
> push-pop-texture-state
> texwrap 1d
> texwrap 1d proj
> texwrap 2d proj
> texwrap formats
> 
> All told, 49 more tests pass on NV20 (10de:0201).
> 
> No changes on Intel CI run or RV250 (1002:4c66).
> 
> Signed-off-by: Ian Romanick 
> ---
>  src/mesa/main/texparam.c | 10 +-
>  1 file changed, 9 insertions(+), 1 deletion(-)
> 
> diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c
> index 3c110de..857faf6 100644
> --- a/src/mesa/main/texparam.c
> +++ b/src/mesa/main/texparam.c
> @@ -736,8 +736,16 @@ set_tex_parameterf(struct gl_context *ctx,
>break;
>  
> case GL_TEXTURE_BORDER_COLOR:
> +  /* Border color exists in desktop OpenGL since 1.0 for GL_CLAMP.  In
> +   * OpenGL ES 2.0+, it only exists in when GL_OES_texture_border_clamp 
> is
> +   * enabled.  It is never available in OpenGL ES 1.x.
> +   *
> +   * FIXME: Every driver that supports GLES2 has this extension.  Elide
> +   * the check?
> +   */
>if (ctx->API == API_OPENGLES ||
> -  !ctx->Extensions.ARB_texture_border_clamp)
> +  (ctx->API == API_OPENGLES2 &&
> +   !ctx->Extensions.ARB_texture_border_clamp))
>   goto invalid_pname;
>  
>if (!_mesa_target_allows_setting_sampler_parameters(texObj->Target))
-- 
Br,

Andres
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] [rfc] radv: offset images by a differing amount.

2017-07-07 Thread Marek Olšák
On Fri, Jul 7, 2017 at 9:37 PM, Dave Airlie  wrote:
> On 8 July 2017 at 04:07, Christian König  wrote:
>> Am 07.07.2017 um 18:51 schrieb Marek Olšák:
>>>
>>> On Fri, Jul 7, 2017 at 11:18 AM, Christian König
>>>  wrote:

 What tilling format have the destination textures?

 Sounds like the offset is just added so that we distribute memory
 accesses
 more equally over memory channels.
>>>
>>> You can't set an offset that is not aligned. The hardware ignores the
>>> low unaligned bits, so they have a different meaning. They specify
>>> pipe and bank rotation for macro tiling. It's like a state. It
>>> basically rotates the tile pattern.
>>
>>
>> Yeah, I know. That's what I meant with distributing memory accesses more
>> equally over all channels. The lower bits select a memory bank swizzle IIRC.
>>
>> I've tried years ago with R600 if shuffling them randomly could improve
>> performance, but MRT wasn't widely used and/or supported at that time.
>
> I'd known this and forgotten, the public CIK docs say bits 0..7 must be zero,
> but I have older docs which had more info. It would be nice if we could get
> proper docs released for the bottom bits considering AMD are using them in 
> their
> drivers.

The low 8 bits of the address are unused and can't be set, because
CB_COLOR0_BASE is shifted by 8 bits. We are really talking about bits
starting from 8 going higher. E.g. 8K alignment gives you 5 bits that
can be used to express the rotation.

>
> It would be good to know what registers have the bits that matter (i.e. BASE,
> FMASK, CMASK, DCC, and resource descriptors.)
>
> Then I suppose we'd need to know the algorithm for programming them, and
> if we need to make any allocations bigger in order to do so.
>
> I expect this only starts to matter when we hit memory bandwidth limits,
> the deferred demo does 3 MRT, one depth at 2kx2k then samples from those
> down to 1280x720 displayed. This combined with a 3 instanced 57k vertex
> draw seemed to be enough to see the pain. (Maybe a GL example doing something
> similiar might show the problem for radeonsi).

Addrlib contains the encoding code for the base address pipe/bank bits.

>
> The other open question I have, is does this just matter for MRT or does 
> texture
> sampling also get some boost from it, my hack patch does it for only
> surfaces which
> will end up attached to the CB.

Yes, it should be done for read-only textures too.

>
> I'll update the patch to not call it an offset but name them the tile
> rotation bits.

The proper name is "tile swizzle" or "pipe/bank swizzle". On gfx9,
it's called "pipe/bank xor".

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] draw: handle more TGSI_SEMANTIC_COLOR indices

2017-07-07 Thread sroland
From: Roland Scheidegger 

It could only handle indices 0/1, otherwise what happened was bad (accessing
array out of bounds, no crash but kind of random). This is enough for the gl
state tracker (primary/secondary color) but not enough for some other state
trackers (d3d9 has no limits on the number of color interpolants).
The complexity with color semantics are all due to the front/back mapping (2
outputs in the vs map to one input in the fs) so this isn't extended to
indices > 1 - d3d9 has no use for back colors, therefore this isn't needed and
still only 2 back colors can be handled correctly.
---
 src/gallium/auxiliary/draw/draw_pipe_clip.c  | 17 ++---
 src/gallium/auxiliary/draw/draw_pipe_flatshade.c | 11 +++
 src/gallium/auxiliary/draw/draw_pipe_twoside.c   |  9 ++---
 3 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c 
b/src/gallium/auxiliary/draw/draw_pipe_clip.c
index cf2b417..4cfa54b 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_clip.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c
@@ -771,8 +771,9 @@ find_interp(const struct draw_fragment_shader *fs, int 
*indexed_interp,
int interp;
/* If it's gl_{Front,Back}{,Secondary}Color, pick up the mode
 * from the array we've filled before. */
-   if (semantic_name == TGSI_SEMANTIC_COLOR ||
-   semantic_name == TGSI_SEMANTIC_BCOLOR) {
+   if ((semantic_name == TGSI_SEMANTIC_COLOR ||
+semantic_name == TGSI_SEMANTIC_BCOLOR) &&
+   semantic_index < 2) {
   interp = indexed_interp[semantic_index];
} else if (semantic_name == TGSI_SEMANTIC_POSITION ||
   semantic_name == TGSI_SEMANTIC_CLIPVERTEX) {
@@ -851,7 +852,8 @@ clip_init_state(struct draw_stage *stage)
 
if (fs) {
   for (i = 0; i < fs->info.num_inputs; i++) {
- if (fs->info.input_semantic_name[i] == TGSI_SEMANTIC_COLOR) {
+ if (fs->info.input_semantic_name[i] == TGSI_SEMANTIC_COLOR &&
+ fs->info.input_semantic_index[i] < 2) {
 if (fs->info.input_interpolate[i] != TGSI_INTERPOLATE_COLOR)
indexed_interp[fs->info.input_semantic_index[i]] = 
fs->info.input_interpolate[i];
  }
@@ -881,6 +883,15 @@ clip_init_state(struct draw_stage *stage)
  clipper->perspect_attribs[clipper->num_perspect_attribs] = i;
  clipper->num_perspect_attribs++;
  break;
+  case TGSI_INTERPOLATE_COLOR:
+ if (draw->rasterizer->flatshade) {
+clipper->const_attribs[clipper->num_const_attribs] = i;
+clipper->num_const_attribs++;
+ } else {
+clipper->perspect_attribs[clipper->num_perspect_attribs] = i;
+clipper->num_perspect_attribs++;
+ }
+ break;
   default:
  assert(interp == -1);
  break;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c 
b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c
index cd285e6..2830435 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c
@@ -170,8 +170,9 @@ find_interp(const struct draw_fragment_shader *fs, int 
*indexed_interp,
int interp;
/* If it's gl_{Front,Back}{,Secondary}Color, pick up the mode
 * from the array we've filled before. */
-   if (semantic_name == TGSI_SEMANTIC_COLOR ||
-   semantic_name == TGSI_SEMANTIC_BCOLOR) {
+   if ((semantic_name == TGSI_SEMANTIC_COLOR ||
+semantic_name == TGSI_SEMANTIC_BCOLOR) &&
+   semantic_index < 2) {
   interp = indexed_interp[semantic_index];
} else {
   /* Otherwise, search in the FS inputs, with a decent default
@@ -216,7 +217,8 @@ static void flatshade_init_state( struct draw_stage *stage )
 
if (fs) {
   for (i = 0; i < fs->info.num_inputs; i++) {
- if (fs->info.input_semantic_name[i] == TGSI_SEMANTIC_COLOR) {
+ if (fs->info.input_semantic_name[i] == TGSI_SEMANTIC_COLOR &&
+ fs->info.input_semantic_index[i] < 2) {
 if (fs->info.input_interpolate[i] != TGSI_INTERPOLATE_COLOR)
indexed_interp[fs->info.input_semantic_index[i]] = 
fs->info.input_interpolate[i];
  }
@@ -236,7 +238,8 @@ static void flatshade_init_state( struct draw_stage *stage )
info->output_semantic_index[i]);
   /* If it's flat, add it to the flat vector. */
 
-  if (interp == TGSI_INTERPOLATE_CONSTANT) {
+  if (interp == TGSI_INTERPOLATE_CONSTANT ||
+  (interp == TGSI_INTERPOLATE_COLOR && draw->rasterizer->flatshade)) {
  flat->flat_attribs[flat->num_flat_attribs] = i;
  flat->num_flat_attribs++;
   }
diff --git a/src/gallium/auxiliary/draw/draw_pipe_twoside.c 
b/src/gallium/auxiliary/draw/draw_pipe_twoside.c
index 52d87c6..7e76835 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_twoside.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_twoside.c
@@ -111,18 +111,21 @@ static void 

[Mesa-dev] [PATCH] [rfc] radv: set cb base tile swizzles for MRT speedups

2017-07-07 Thread Dave Airlie
From: Dave Airlie 

This patch uses addrlib to workout the tile swizzles according
to the surface index. It seems to produce the same values as
amdgpu-pro for the deferred test.

This gets the deferred demo from 730->950fps on my rx480.
(dcc cmask elim predication patches get it further)

open question: should we do this for all textures or just
color surfaces?
---
 src/amd/common/ac_surface.c   | 14 ++
 src/amd/common/ac_surface.h   |  2 ++
 src/amd/vulkan/radv_device.c  |  9 -
 src/amd/vulkan/radv_image.c   | 12 +++-
 src/amd/vulkan/radv_private.h |  2 ++
 5 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c
index 877d592..c936426 100644
--- a/src/amd/common/ac_surface.c
+++ b/src/amd/common/ac_surface.c
@@ -684,6 +684,20 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib,
surf->htile_size *= 2;
 
surf->is_linear = surf->u.legacy.level[0].mode == 
RADEON_SURF_MODE_LINEAR_ALIGNED;
+
+   /* workout base swizzle */
+   if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)) {
+   ADDR_COMPUTE_BASE_SWIZZLE_INPUT AddrBaseSwizzleIn = {0};
+   ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT AddrBaseSwizzleOut = {0};
+
+   AddrBaseSwizzleIn.surfIndex = config->info.surf_index;
+   AddrBaseSwizzleIn.tileIndex = AddrSurfInfoIn.tileIndex;
+   AddrBaseSwizzleIn.macroModeIndex = 
AddrSurfInfoOut.macroModeIndex;
+   AddrBaseSwizzleIn.pTileInfo = AddrSurfInfoOut.pTileInfo;
+   AddrBaseSwizzleIn.tileMode = AddrSurfInfoOut.tileMode;
+   AddrComputeBaseSwizzle(addrlib, , 
);
+   surf->u.legacy.combined_swizzle = 
AddrBaseSwizzleOut.tileSwizzle;
+   }
return 0;
 }
 
diff --git a/src/amd/common/ac_surface.h b/src/amd/common/ac_surface.h
index 582a671..ecba4c8 100644
--- a/src/amd/common/ac_surface.h
+++ b/src/amd/common/ac_surface.h
@@ -97,6 +97,7 @@ struct legacy_surf_layout {
 unsigneddepth_adjusted:1;
 unsignedstencil_adjusted:1;
 
+uint8_t combined_swizzle;
 struct legacy_surf_levellevel[RADEON_SURF_MAX_LEVELS];
 struct legacy_surf_levelstencil_level[RADEON_SURF_MAX_LEVELS];
 uint8_t tiling_index[RADEON_SURF_MAX_LEVELS];
@@ -194,6 +195,7 @@ struct ac_surf_info {
uint32_t width;
uint32_t height;
uint32_t depth;
+   uint32_t surf_index;
uint8_t samples;
uint8_t levels;
uint16_t array_size;
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 59efccf..0cf0d57 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -2757,15 +2757,20 @@ radv_initialise_color_surface(struct radv_device 
*device,
}
 
cb->cb_color_base = va >> 8;
-
+   if (device->physical_device->rad_info.chip_class < GFX9)
+   cb->cb_color_base |= 
iview->image->surface.u.legacy.combined_swizzle;
/* CMASK variables */
va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
va += iview->image->cmask.offset;
cb->cb_color_cmask = va >> 8;
+   if (device->physical_device->rad_info.chip_class < GFX9)
+   cb->cb_color_cmask |= 
iview->image->surface.u.legacy.combined_swizzle;
 
va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
va += iview->image->dcc_offset;
cb->cb_dcc_base = va >> 8;
+   if (device->physical_device->rad_info.chip_class < GFX9)
+   cb->cb_dcc_base |= 
iview->image->surface.u.legacy.combined_swizzle;
 
uint32_t max_slice = radv_surface_layer_count(iview);
cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
@@ -2781,6 +2786,8 @@ radv_initialise_color_surface(struct radv_device *device,
if (iview->image->fmask.size) {
va = device->ws->buffer_get_va(iview->bo) + 
iview->image->offset + iview->image->fmask.offset;
cb->cb_color_fmask = va >> 8;
+   if (device->physical_device->rad_info.chip_class < GFX9)
+   cb->cb_color_fmask |= 
iview->image->surface.u.legacy.combined_swizzle;
} else {
cb->cb_color_fmask = cb->cb_color_base;
}
diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c
index 115e5a5..cd0af5e 100644
--- a/src/amd/vulkan/radv_image.c
+++ b/src/amd/vulkan/radv_image.c
@@ -31,6 +31,7 @@
 #include "sid.h"
 #include "gfx9d.h"
 #include "util/debug.h"
+#include "util/u_atomic.h"
 static unsigned
 radv_choose_tiling(struct radv_device *Device,
   const struct radv_image_create_info *create_info)
@@ -209,6 +210,8 @@ si_set_mutable_tex_desc_fields(struct radv_device *device,
va += base_level_info->offset;
 
state[0] = va >> 8;
+   if (chip_class < GFX9)
+

Re: [Mesa-dev] [PATCH] swr: invalidate attachment on transition change

2017-07-07 Thread Kyriazis, George
+Bruce

Andres,

VTK/Kitware has already modified their code to enable dual depth peeling for 
Mesa version >= 17.2.  From that perspective, it doesn’t matter if the change 
goes into the 17.1.x releases or 17.2.

Having said that, we haven’t seen any issues with that check-in, so if it makes 
it to a release branch earlier than 17.2, it’s still a good thing for us.

Thanks!

George

> On Jul 7, 2017, at 4:11 PM, Andres Gomez  wrote:
> 
> George, would we want this patch in -stable or we shouldn't bother ?
> 
> On Tue, 2017-06-20 at 11:42 -0500, George Kyriazis wrote:
>> Consider the following RT attachment order:
>> 1. Attach surfaces attachments 0 & 1, and render with them
>> 2. Detach 0 & 1
>> 3. Re-attach 0 & 1 to different surfaces
>> 4. Render with the new attachment
>> 
>> The definition of a tile being resolved is that local changes have been
>> flushed out to the surface, hence there is no need to reload the tile before
>> it's written to.  For an invalid tile, the tile has to be reloaded from
>> the surface before rendering.
>> 
>> Stage (2) was marking hot tiles for attachements 0 & 1 as RESOLVED,
>> which means that the hot tiles can be written out to memory with no
>> need to read them back in (they are "clean").  They need to be marked as
>> resolved here, because a surface may be destroyed after a detach, and we
>> don't want to have un-resolved tiles that may force a readback from a
>> NULL (destroyed) surface.  (Part of a destroy is detach all attachments 
>> first)
>> 
>> Stage (3), during the no att -> att transition, we  need to realize that the
>> "new" surface tiles need to be fetched fresh from the new surface, instead
>> of using the resolved tiles, that belong to a stale attachment.
>> 
>> This is done by marking the hot tiles as invalid in stage (3), when we 
>> realize
>> that a new attachment is being made, so that they are re-fetched during
>> rendering in stage (4).
>> 
>> Also note that hot tiles are indexed by attachment.
>> 
>> - Fixes VTK dual depth-peeling tests.
>> - No piglit changes
>> ---
>> src/gallium/drivers/swr/swr_draw.cpp   | 19 +++
>> src/gallium/drivers/swr/swr_resource.h |  4 
>> src/gallium/drivers/swr/swr_state.cpp  |  5 +
>> 3 files changed, 28 insertions(+)
>> 
>> diff --git a/src/gallium/drivers/swr/swr_draw.cpp 
>> b/src/gallium/drivers/swr/swr_draw.cpp
>> index 03c82a7..ac300e2 100644
>> --- a/src/gallium/drivers/swr/swr_draw.cpp
>> +++ b/src/gallium/drivers/swr/swr_draw.cpp
>> @@ -215,6 +215,25 @@ swr_finish(struct pipe_context *pipe)
>>swr_fence_reference(pipe->screen, , NULL);
>> }
>> 
>> +/*
>> + * Invalidate tiles so they can be reloaded back when needed
>> + */
>> +void
>> +swr_invalidate_render_target(struct pipe_context *pipe,
>> + uint32_t attachment,
>> + uint16_t width, uint16_t height)
>> +{
>> +   struct swr_context *ctx = swr_context(pipe);
>> +
>> +   /* grab the rect from the passed in arguments */
>> +   swr_update_draw_context(ctx);
>> +   SWR_RECT full_rect =
>> +  {0, 0, (int32_t)width, (int32_t)height};
>> +   SwrInvalidateTiles(ctx->swrContext,
>> +  1 << attachment,
>> +  full_rect);
>> +}
>> +
>> 
>> /*
>>  * Store SWR HotTiles back to renderTarget surface.
>> diff --git a/src/gallium/drivers/swr/swr_resource.h 
>> b/src/gallium/drivers/swr/swr_resource.h
>> index ae9954c..4effd46 100644
>> --- a/src/gallium/drivers/swr/swr_resource.h
>> +++ b/src/gallium/drivers/swr/swr_resource.h
>> @@ -96,6 +96,10 @@ swr_resource_data(struct pipe_resource *resource)
>> }
>> 
>> 
>> +void swr_invalidate_render_target(struct pipe_context *pipe,
>> +  uint32_t attachment,
>> +  uint16_t width, uint16_t height);
>> +
>> void swr_store_render_target(struct pipe_context *pipe,
>>  uint32_t attachment,
>>  enum SWR_TILE_STATE post_tile_state);
>> diff --git a/src/gallium/drivers/swr/swr_state.cpp 
>> b/src/gallium/drivers/swr/swr_state.cpp
>> index 08549e5..deae4e6 100644
>> --- a/src/gallium/drivers/swr/swr_state.cpp
>> +++ b/src/gallium/drivers/swr/swr_state.cpp
>> @@ -933,6 +933,11 @@ swr_change_rt(struct swr_context *ctx,
>>* INVALID so they are reloaded from surface. */
>>   swr_store_render_target(>pipe, attachment, SWR_TILE_INVALID);
>>   need_fence = true;
>> +   } else {
>> +  /* if no previous attachment, invalidate tiles that may be marked
>> +   * RESOLVED because of an old attachment */
>> +  swr_invalidate_render_target(>pipe, attachment, sf->width, 
>> sf->height);
>> +  /* no need to set fence here */
>>}
>> 
>>/* Make new attachment */
> -- 
> Br,
> 
> Andres

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] Revert "intel/isl: Only create a CCS buffer if the image supports rendering"

2017-07-07 Thread Nanley Chery
On Thu, Jul 06, 2017 at 01:11:27PM -0700, Nanley Chery wrote:
> This reverts commit 8aaa13467dc289d35dc7900ab9fab9a7689c4178, which was
> based on an incorrect assumption. Unlike the restriction placed on image
> views in the Vulkan API, OpenGL allows you to render to texture views
> whose formats differ from the originals.
> 
> Bugzilla: https://bugzilla.freedesktop.org/show_bug.cgi?id=101677
> Signed-off-by: Nanley Chery 
> ---
>  src/intel/isl/isl.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 

Pushed.

> diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c
> index bbbdb19df2..ba56d86c17 100644
> --- a/src/intel/isl/isl.c
> +++ b/src/intel/isl/isl.c
> @@ -1699,7 +1699,7 @@ isl_surf_get_ccs_surf(const struct isl_device *dev,
> if (ISL_DEV_GEN(dev) <= 8 && surf->dim != ISL_SURF_DIM_2D)
>return false;
>  
> -   if (!isl_format_supports_rendering(dev->info, surf->format))
> +   if (isl_format_is_compressed(surf->format))
>return false;
>  
> /* TODO: More conditions where it can fail. */
> -- 
> 2.13.2
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] swr: build driver proper separate from rasterizer

2017-07-07 Thread Tim Rowley
swr used to build and link the rasterizer to the driver, and to support
multiple architectures we needed to have multiple versions of the
driver/rasterizer combination, which needed to link in much of mesa.

Changing to having one instance of the driver and just building
architecture specific versions of the rasterizer gives a large reduction
in disk space.

libGL.so6464 Kb ->  7000 Kb
libswrAVX.so   10068 Kb ->  5432 Kb
libswrAVX2.so   9828 Kb ->  5200 Kb

Total  26360 Kb -> 17632 Kb
---
 src/gallium/drivers/swr/Makefile.am | 24 +---
 src/gallium/drivers/swr/swr_context.cpp |  2 +-
 src/gallium/drivers/swr/swr_loader.cpp  | 14 ++
 src/gallium/drivers/swr/swr_screen.h|  2 ++
 4 files changed, 22 insertions(+), 20 deletions(-)

diff --git a/src/gallium/drivers/swr/Makefile.am 
b/src/gallium/drivers/swr/Makefile.am
index 4b4bd37..e764e0d 100644
--- a/src/gallium/drivers/swr/Makefile.am
+++ b/src/gallium/drivers/swr/Makefile.am
@@ -26,7 +26,13 @@ AM_CXXFLAGS = $(GALLIUM_DRIVER_CFLAGS) $(SWR_CXX11_CXXFLAGS)
 
 noinst_LTLIBRARIES = libmesaswr.la
 
-libmesaswr_la_SOURCES = $(LOADER_SOURCES)
+libmesaswr_la_SOURCES = \
+   $(CXX_SOURCES) \
+   $(COMMON_CXX_SOURCES) \
+   $(JITTER_CXX_SOURCES) \
+   rasterizer/codegen/gen_knobs.cpp \
+   rasterizer/codegen/gen_knobs.h \
+   $(LOADER_SOURCES)
 
 COMMON_CXXFLAGS = \
-fno-strict-aliasing \
@@ -43,12 +49,15 @@ COMMON_CXXFLAGS = \
-I$(srcdir)/rasterizer/jitter \
-I$(srcdir)/rasterizer/archrast
 
+libmesaswr_la_CXXFLAGS = \
+   $(SWR_AVX_CXXFLAGS) \
+   -DKNOB_ARCH=KNOB_ARCH_AVX \
+   $(COMMON_CXXFLAGS)
+
 COMMON_SOURCES = \
-   $(CXX_SOURCES) \
$(ARCHRAST_CXX_SOURCES) \
$(COMMON_CXX_SOURCES) \
$(CORE_CXX_SOURCES) \
-   $(JITTER_CXX_SOURCES) \
$(MEMORY_CXX_SOURCES) \
$(BUILT_SOURCES)
 
@@ -207,19 +216,12 @@ rasterizer.intermediate: 
rasterizer/codegen/gen_backends.py rasterizer/codegen/t
--cpp \
--hpp
 
-COMMON_LIBADD = \
-   $(top_builddir)/src/gallium/auxiliary/libgallium.la \
-   $(top_builddir)/src/mesa/libmesagallium.la \
-   $(LLVM_LIBS)
-
 COMMON_LDFLAGS = \
-shared \
-module \
-no-undefined \
$(GC_SECTIONS) \
-   $(NO_UNDEFINED) \
-   $(LLVM_LDFLAGS)
-
+   $(NO_UNDEFINED)
 
 lib_LTLIBRARIES = libswrAVX.la libswrAVX2.la
 
diff --git a/src/gallium/drivers/swr/swr_context.cpp 
b/src/gallium/drivers/swr/swr_context.cpp
index 9648278..c058870 100644
--- a/src/gallium/drivers/swr/swr_context.cpp
+++ b/src/gallium/drivers/swr/swr_context.cpp
@@ -467,7 +467,7 @@ swr_create_context(struct pipe_screen *p_screen, void 
*priv, unsigned flags)
   AlignedMalloc(sizeof(struct swr_context), KNOB_SIMD_BYTES);
memset(ctx, 0, sizeof(struct swr_context));
 
-   SwrGetInterface(ctx->api);
+   swr_screen(p_screen)->pfnSwrGetInterface(ctx->api);
ctx->swrDC.pAPI = >api;
 
ctx->blendJIT =
diff --git a/src/gallium/drivers/swr/swr_loader.cpp 
b/src/gallium/drivers/swr/swr_loader.cpp
index 4d71a67..d56fb0e 100644
--- a/src/gallium/drivers/swr/swr_loader.cpp
+++ b/src/gallium/drivers/swr/swr_loader.cpp
@@ -24,13 +24,10 @@
 #include "util/u_cpu_detect.h"
 #include "util/u_dl.h"
 #include "swr_public.h"
-
-#include "pipe/p_screen.h"
+#include "swr_screen.h"
 
 #include 
 
-typedef pipe_screen *(*screen_create_proc)(struct sw_winsys *winsys);
-
 struct pipe_screen *
 swr_create_screen(struct sw_winsys *winsys)
 {
@@ -57,16 +54,17 @@ swr_create_screen(struct sw_winsys *winsys)
   exit(-1);
}
 
-   util_dl_proc pScreenProc = util_dl_get_proc_address(pLibrary, 
"swr_create_screen_internal");
+   util_dl_proc pApiProc = util_dl_get_proc_address(pLibrary, 
"SwrGetInterface");
 
-   if (!pScreenProc) {
+   if (!pApiProc) {
   fprintf(stderr, "SWR library search failure: %s\n", util_dl_error());
   exit(-1);
}
 
-   screen_create_proc pScreenCreate = (screen_create_proc)pScreenProc;
+   struct pipe_screen *screen = swr_create_screen_internal(winsys);
+   swr_screen(screen)->pfnSwrGetInterface = (PFNSwrGetInterface)pApiProc;
 
-   return pScreenCreate(winsys);
+   return screen;
 }
 
 
diff --git a/src/gallium/drivers/swr/swr_screen.h 
b/src/gallium/drivers/swr/swr_screen.h
index dc1bb47..a10f426 100644
--- a/src/gallium/drivers/swr/swr_screen.h
+++ b/src/gallium/drivers/swr/swr_screen.h
@@ -47,6 +47,8 @@ struct swr_screen {
uint8_t msaa_max_count;
 
HANDLE hJitMgr;
+
+   PFNSwrGetInterface pfnSwrGetInterface;
 };
 
 static INLINE struct swr_screen *
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] swr: switch to using SwrGetInterface api table

2017-07-07 Thread Tim Rowley
Use the SWR rasterizer API through the table returned from
SwrGetInterface rather than referencing the functions directly.
This will allow us to move to a model of having the driver dynamically
load the appropriate swr architecture library.
---
 src/gallium/drivers/swr/swr_clear.cpp   |  6 ++---
 src/gallium/drivers/swr/swr_context.cpp | 19 --
 src/gallium/drivers/swr/swr_context.h   |  5 +++-
 src/gallium/drivers/swr/swr_draw.cpp| 46 -
 src/gallium/drivers/swr/swr_fence.cpp   |  2 +-
 src/gallium/drivers/swr/swr_memory.h|  6 ++---
 src/gallium/drivers/swr/swr_query.cpp   |  8 +++---
 src/gallium/drivers/swr/swr_scratch.cpp |  2 +-
 src/gallium/drivers/swr/swr_screen.cpp  |  3 ++-
 src/gallium/drivers/swr/swr_state.cpp   | 40 ++--
 10 files changed, 72 insertions(+), 65 deletions(-)

diff --git a/src/gallium/drivers/swr/swr_clear.cpp 
b/src/gallium/drivers/swr/swr_clear.cpp
index 3a35805..233432e 100644
--- a/src/gallium/drivers/swr/swr_clear.cpp
+++ b/src/gallium/drivers/swr/swr_clear.cpp
@@ -78,9 +78,9 @@ swr_clear(struct pipe_context *pipe,
 
for (unsigned i = 0; i < layers; ++i) {
   swr_update_draw_context(ctx);
-  SwrClearRenderTarget(ctx->swrContext, clearMask, i,
-   color->f, depth, stencil,
-   clear_rect);
+  ctx->api.pfnSwrClearRenderTarget(ctx->swrContext, clearMask, i,
+   color->f, depth, stencil,
+   clear_rect);
 
   // Mask out the attachments that are out of layers.
   if (fb->zsbuf &&
diff --git a/src/gallium/drivers/swr/swr_context.cpp 
b/src/gallium/drivers/swr/swr_context.cpp
index f2d971a..9648278 100644
--- a/src/gallium/drivers/swr/swr_context.cpp
+++ b/src/gallium/drivers/swr/swr_context.cpp
@@ -311,8 +311,8 @@ swr_blit(struct pipe_context *pipe, const struct 
pipe_blit_info *blit_info)
}
 
if (ctx->active_queries) {
-  SwrEnableStatsFE(ctx->swrContext, FALSE);
-  SwrEnableStatsBE(ctx->swrContext, FALSE);
+  ctx->api.pfnSwrEnableStatsFE(ctx->swrContext, FALSE);
+  ctx->api.pfnSwrEnableStatsBE(ctx->swrContext, FALSE);
}
 
util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vertex_buffer);
@@ -349,8 +349,8 @@ swr_blit(struct pipe_context *pipe, const struct 
pipe_blit_info *blit_info)
util_blitter_blit(ctx->blitter, );
 
if (ctx->active_queries) {
-  SwrEnableStatsFE(ctx->swrContext, TRUE);
-  SwrEnableStatsBE(ctx->swrContext, TRUE);
+  ctx->api.pfnSwrEnableStatsFE(ctx->swrContext, TRUE);
+  ctx->api.pfnSwrEnableStatsBE(ctx->swrContext, TRUE);
}
 }
 
@@ -383,10 +383,10 @@ swr_destroy(struct pipe_context *pipe)
 
/* Idle core after destroying buffer resources, but before deleting
 * context.  Destroying resources has potentially called StoreTiles.*/
-   SwrWaitForIdle(ctx->swrContext);
+   ctx->api.pfnSwrWaitForIdle(ctx->swrContext);
 
if (ctx->swrContext)
-  SwrDestroyContext(ctx->swrContext);
+  ctx->api.pfnSwrDestroyContext(ctx->swrContext);
 
delete ctx->blendJIT;
 
@@ -467,6 +467,9 @@ swr_create_context(struct pipe_screen *p_screen, void 
*priv, unsigned flags)
   AlignedMalloc(sizeof(struct swr_context), KNOB_SIMD_BYTES);
memset(ctx, 0, sizeof(struct swr_context));
 
+   SwrGetInterface(ctx->api);
+   ctx->swrDC.pAPI = >api;
+
ctx->blendJIT =
   new std::unordered_map;
 
@@ -478,9 +481,9 @@ swr_create_context(struct pipe_screen *p_screen, void 
*priv, unsigned flags)
createInfo.pfnClearTile = swr_StoreHotTileClear;
createInfo.pfnUpdateStats = swr_UpdateStats;
createInfo.pfnUpdateStatsFE = swr_UpdateStatsFE;
-   ctx->swrContext = SwrCreateContext();
+   ctx->swrContext = ctx->api.pfnSwrCreateContext();
 
-   SwrInit();
+   ctx->api.pfnSwrInit();
 
if (ctx->swrContext == NULL)
   goto fail;
diff --git a/src/gallium/drivers/swr/swr_context.h 
b/src/gallium/drivers/swr/swr_context.h
index 3ff4bf3..753cbf3 100644
--- a/src/gallium/drivers/swr/swr_context.h
+++ b/src/gallium/drivers/swr/swr_context.h
@@ -102,6 +102,7 @@ struct swr_draw_context {
 
SWR_SURFACE_STATE renderTargets[SWR_NUM_ATTACHMENTS];
struct swr_query_result *pStats; // @llvm_struct
+   SWR_INTERFACE *pAPI; // @llvm_struct - Needed for the swr_memory callbacks
 };
 
 /* gen_llvm_types FINI */
@@ -169,6 +170,8 @@ struct swr_context {
struct swr_draw_context swrDC;
 
unsigned dirty; /**< Mask of SWR_NEW_x flags */
+
+   SWR_INTERFACE api;
 };
 
 static INLINE struct swr_context *
@@ -182,7 +185,7 @@ swr_update_draw_context(struct swr_context *ctx,
   struct swr_query_result *pqr = nullptr)
 {
swr_draw_context *pDC =
-  (swr_draw_context *)SwrGetPrivateContextState(ctx->swrContext);
+  (swr_draw_context 
*)ctx->api.pfnSwrGetPrivateContextState(ctx->swrContext);
if (pqr)
   ctx->swrDC.pStats = pqr;

[Mesa-dev] [PATCH 0/2] swr: drastically reduce compiled size

2017-07-07 Thread Tim Rowley
These two patches allow us to change how we build and link the swr
driver; details are in the second patch commit message.

Change in disk space:

libGL.so6464 Kb ->  7000 Kb
libswrAVX.so   10068 Kb ->  5432 Kb
libswrAVX2.so   9828 Kb ->  5200 Kb

Total  26360 Kb -> 17632 Kb

Tim Rowley (2):
  swr: switch to using SwrGetInterface api table
  swr: build driver proper separate from rasterizer

 src/gallium/drivers/swr/Makefile.am | 24 +
 src/gallium/drivers/swr/swr_clear.cpp   |  6 ++---
 src/gallium/drivers/swr/swr_context.cpp | 19 --
 src/gallium/drivers/swr/swr_context.h   |  5 +++-
 src/gallium/drivers/swr/swr_draw.cpp| 46 -
 src/gallium/drivers/swr/swr_fence.cpp   |  2 +-
 src/gallium/drivers/swr/swr_loader.cpp  | 14 +-
 src/gallium/drivers/swr/swr_memory.h|  6 ++---
 src/gallium/drivers/swr/swr_query.cpp   |  8 +++---
 src/gallium/drivers/swr/swr_scratch.cpp |  2 +-
 src/gallium/drivers/swr/swr_screen.cpp  |  3 ++-
 src/gallium/drivers/swr/swr_screen.h|  2 ++
 src/gallium/drivers/swr/swr_state.cpp   | 40 ++--
 13 files changed, 93 insertions(+), 84 deletions(-)

-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] swr: invalidate attachment on transition change

2017-07-07 Thread Andres Gomez
George, would we want this patch in -stable or we shouldn't bother ?

On Tue, 2017-06-20 at 11:42 -0500, George Kyriazis wrote:
> Consider the following RT attachment order:
> 1. Attach surfaces attachments 0 & 1, and render with them
> 2. Detach 0 & 1
> 3. Re-attach 0 & 1 to different surfaces
> 4. Render with the new attachment
> 
> The definition of a tile being resolved is that local changes have been
> flushed out to the surface, hence there is no need to reload the tile before
> it's written to.  For an invalid tile, the tile has to be reloaded from
> the surface before rendering.
> 
> Stage (2) was marking hot tiles for attachements 0 & 1 as RESOLVED,
> which means that the hot tiles can be written out to memory with no
> need to read them back in (they are "clean").  They need to be marked as
> resolved here, because a surface may be destroyed after a detach, and we
> don't want to have un-resolved tiles that may force a readback from a
> NULL (destroyed) surface.  (Part of a destroy is detach all attachments first)
> 
> Stage (3), during the no att -> att transition, we  need to realize that the
> "new" surface tiles need to be fetched fresh from the new surface, instead
> of using the resolved tiles, that belong to a stale attachment.
> 
> This is done by marking the hot tiles as invalid in stage (3), when we realize
> that a new attachment is being made, so that they are re-fetched during
> rendering in stage (4).
> 
> Also note that hot tiles are indexed by attachment.
> 
> - Fixes VTK dual depth-peeling tests.
> - No piglit changes
> ---
>  src/gallium/drivers/swr/swr_draw.cpp   | 19 +++
>  src/gallium/drivers/swr/swr_resource.h |  4 
>  src/gallium/drivers/swr/swr_state.cpp  |  5 +
>  3 files changed, 28 insertions(+)
> 
> diff --git a/src/gallium/drivers/swr/swr_draw.cpp 
> b/src/gallium/drivers/swr/swr_draw.cpp
> index 03c82a7..ac300e2 100644
> --- a/src/gallium/drivers/swr/swr_draw.cpp
> +++ b/src/gallium/drivers/swr/swr_draw.cpp
> @@ -215,6 +215,25 @@ swr_finish(struct pipe_context *pipe)
> swr_fence_reference(pipe->screen, , NULL);
>  }
>  
> +/*
> + * Invalidate tiles so they can be reloaded back when needed
> + */
> +void
> +swr_invalidate_render_target(struct pipe_context *pipe,
> + uint32_t attachment,
> + uint16_t width, uint16_t height)
> +{
> +   struct swr_context *ctx = swr_context(pipe);
> +
> +   /* grab the rect from the passed in arguments */
> +   swr_update_draw_context(ctx);
> +   SWR_RECT full_rect =
> +  {0, 0, (int32_t)width, (int32_t)height};
> +   SwrInvalidateTiles(ctx->swrContext,
> +  1 << attachment,
> +  full_rect);
> +}
> +
>  
>  /*
>   * Store SWR HotTiles back to renderTarget surface.
> diff --git a/src/gallium/drivers/swr/swr_resource.h 
> b/src/gallium/drivers/swr/swr_resource.h
> index ae9954c..4effd46 100644
> --- a/src/gallium/drivers/swr/swr_resource.h
> +++ b/src/gallium/drivers/swr/swr_resource.h
> @@ -96,6 +96,10 @@ swr_resource_data(struct pipe_resource *resource)
>  }
>  
>  
> +void swr_invalidate_render_target(struct pipe_context *pipe,
> +  uint32_t attachment,
> +  uint16_t width, uint16_t height);
> +
>  void swr_store_render_target(struct pipe_context *pipe,
>   uint32_t attachment,
>   enum SWR_TILE_STATE post_tile_state);
> diff --git a/src/gallium/drivers/swr/swr_state.cpp 
> b/src/gallium/drivers/swr/swr_state.cpp
> index 08549e5..deae4e6 100644
> --- a/src/gallium/drivers/swr/swr_state.cpp
> +++ b/src/gallium/drivers/swr/swr_state.cpp
> @@ -933,6 +933,11 @@ swr_change_rt(struct swr_context *ctx,
> * INVALID so they are reloaded from surface. */
>swr_store_render_target(>pipe, attachment, SWR_TILE_INVALID);
>need_fence = true;
> +   } else {
> +  /* if no previous attachment, invalidate tiles that may be marked
> +   * RESOLVED because of an old attachment */
> +  swr_invalidate_render_target(>pipe, attachment, sf->width, 
> sf->height);
> +  /* no need to set fence here */
> }
>  
> /* Make new attachment */
-- 
Br,

Andres
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] anv: Stop setting domains to RENDER on EXEC_OBJECT_WRITE

2017-07-07 Thread Chris Wilson
Quoting Jason Ekstrand (2017-07-07 21:37:29)
> The reason we were doing this was to ensure that the kernel did the
> appropriate cross-ring synchronization and flushing.  However, the
> kernel only looks at EXEC_OBJECT_WRITE to determine whether or not to
> insert a fence.  It only cares about the domain for determining whether
> or not it needs to clflush the BO before using it for scanout but the
> domain automatically gets set to RENDER internally by the kernel if
> EXEC_OBJECT_WRITE is set.

Once upon a time we also depended upon EXEC_OBJECT_WRITE for correct
swapout. That was until I saw what you were planning to do for anv. Hmm,
that puts the oldest kernel that might support anv as

commit 51bc140431e233284660b1d22c47dec9ecdb521e [v4.3]
Author: Chris Wilson 
Date:   Mon Aug 31 15:10:39 2015 +0100

drm/i915: Always mark the object as dirty when used by the GPU

> Cc: Chris Wilson 
> ---
>  src/intel/vulkan/anv_batch_chain.c | 7 ++-
>  1 file changed, 2 insertions(+), 5 deletions(-)
> 
> diff --git a/src/intel/vulkan/anv_batch_chain.c 
> b/src/intel/vulkan/anv_batch_chain.c
> index 9def174..9776a45 100644
> --- a/src/intel/vulkan/anv_batch_chain.c
> +++ b/src/intel/vulkan/anv_batch_chain.c
> @@ -148,9 +148,6 @@ anv_reloc_list_add(struct anv_reloc_list *list,
> struct drm_i915_gem_relocation_entry *entry;
> int index;
>  
> -   const uint32_t domain =
> -  (target_bo->flags & EXEC_OBJECT_WRITE) ? I915_GEM_DOMAIN_RENDER : 0;
> -
> VkResult result = anv_reloc_list_grow(list, alloc, 1);
> if (result != VK_SUCCESS)
>return result;
> @@ -163,8 +160,8 @@ anv_reloc_list_add(struct anv_reloc_list *list,
> entry->delta = delta;
> entry->offset = offset;
> entry->presumed_offset = target_bo->offset;
> -   entry->read_domains = domain;
> -   entry->write_domain = domain;
> +   entry->read_domains = 0;
> +   entry->write_domain = 0;

The first time I saw this I was amazed we let 0 through. It is true that
the kernel only cares about EXEC_OBJECT_WRITE, and doesn't care whether
that is from an execobject.flag or from accumulation of
reloc[].write_domain. (That has been true for all kernels since the
introduction of NORELOC and the EXEC_OBJECT_WRITE flag)  We don't even
use the reloc.write_domain information during reloc itself, so

Reviewed-by: Chris Wilson 
-Chris
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] anv: Stop setting domains to RENDER on EXEC_OBJECT_WRITE

2017-07-07 Thread Jason Ekstrand
The reason we were doing this was to ensure that the kernel did the
appropriate cross-ring synchronization and flushing.  However, the
kernel only looks at EXEC_OBJECT_WRITE to determine whether or not to
insert a fence.  It only cares about the domain for determining whether
or not it needs to clflush the BO before using it for scanout but the
domain automatically gets set to RENDER internally by the kernel if
EXEC_OBJECT_WRITE is set.

Cc: Chris Wilson 
---
 src/intel/vulkan/anv_batch_chain.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/src/intel/vulkan/anv_batch_chain.c 
b/src/intel/vulkan/anv_batch_chain.c
index 9def174..9776a45 100644
--- a/src/intel/vulkan/anv_batch_chain.c
+++ b/src/intel/vulkan/anv_batch_chain.c
@@ -148,9 +148,6 @@ anv_reloc_list_add(struct anv_reloc_list *list,
struct drm_i915_gem_relocation_entry *entry;
int index;
 
-   const uint32_t domain =
-  (target_bo->flags & EXEC_OBJECT_WRITE) ? I915_GEM_DOMAIN_RENDER : 0;
-
VkResult result = anv_reloc_list_grow(list, alloc, 1);
if (result != VK_SUCCESS)
   return result;
@@ -163,8 +160,8 @@ anv_reloc_list_add(struct anv_reloc_list *list,
entry->delta = delta;
entry->offset = offset;
entry->presumed_offset = target_bo->offset;
-   entry->read_domains = domain;
-   entry->write_domain = domain;
+   entry->read_domains = 0;
+   entry->write_domain = 0;
VG(VALGRIND_CHECK_MEM_IS_DEFINED(entry, sizeof(*entry)));
 
return VK_SUCCESS;
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/6] ac/nir: rewrite local variable handling

2017-07-07 Thread Matt Arsenault

> On Jul 6, 2017, at 19:02, Connor Abbott  wrote:
> 
> On Thu, Jul 6, 2017 at 6:36 PM, Matt Arsenault  wrote:
>> 
>> On Jul 6, 2017, at 18:31, Connor Abbott  wrote:
>> 
>> After looking into it some more, I think LLVM won't promote allocas to
>> registers at all when there are non-constant indices in the mix, and
>> fixing it seems kinda involved. I guess a better solution for now
>> 
>> 
>> AMDGPUPromoteAlloca does this, but it doesn’t happen very often
> 
> Could we just bump the heuristic on maximum size there to match
> radeonsi? That way we wouldn't need to have these heuristics in
> radeonsi and radv. As-is, we're second-guessing the backend.

There’s also a vague plan to replace this with a machine level pass later that 
has more knowledge of register pressure. The current pass is pretty stupid and 
doesn’t even attempt to decide if it’s a good idea for a specific alloca.

-Matt
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] [rfc] radv: offset images by a differing amount. (v2)

2017-07-07 Thread Dave Airlie
From: Dave Airlie 

(this patch doesn't seem to work fully, hopefully AMD can tell us
more info on the rules, and how to calculate the magic).

It appears that to get full access to memory bandwidth with MRT
rendering the pro vulkan driver seems to offset each image by 0x3800.
I'm not sure how that value is calculated.

Glenn came up with the idea (probably what -pro does also) of just
offseting every image in round robin order, in the hope that apps
would create mrt images in sequence anyways.

This attempts to do that using an atomic counter in the device.

This gets the deferred demo from 800fps->1150fps on my rx480.

(I've tested dota2 and talos still run at least after this)

v2: acknowledge it isn't an offset but a tile rotation pattern.
add a quote from evergreen docs
---
 src/amd/vulkan/radv_device.c  |  8 
 src/amd/vulkan/radv_image.c   | 22 ++
 src/amd/vulkan/radv_private.h |  3 +++
 3 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 59efccf..fb15ed6 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -2756,16 +2756,16 @@ radv_initialise_color_surface(struct radv_device 
*device,
}
}
 
-   cb->cb_color_base = va >> 8;
+   cb->cb_color_base = (va >> 8) | iview->image->tile_rotate_bits;
 
/* CMASK variables */
va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
va += iview->image->cmask.offset;
-   cb->cb_color_cmask = va >> 8;
+   cb->cb_color_cmask = (va >> 8) | iview->image->tile_rotate_bits;
 
va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
va += iview->image->dcc_offset;
-   cb->cb_dcc_base = va >> 8;
+   cb->cb_dcc_base = (va >> 8) | iview->image->tile_rotate_bits;
 
uint32_t max_slice = radv_surface_layer_count(iview);
cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
@@ -2780,7 +2780,7 @@ radv_initialise_color_surface(struct radv_device *device,
 
if (iview->image->fmask.size) {
va = device->ws->buffer_get_va(iview->bo) + 
iview->image->offset + iview->image->fmask.offset;
-   cb->cb_color_fmask = va >> 8;
+   cb->cb_color_fmask = (va >> 8) | iview->image->tile_rotate_bits;
} else {
cb->cb_color_fmask = cb->cb_color_base;
}
diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c
index b3a223b..b57a7d1 100644
--- a/src/amd/vulkan/radv_image.c
+++ b/src/amd/vulkan/radv_image.c
@@ -31,6 +31,7 @@
 #include "sid.h"
 #include "gfx9d.h"
 #include "util/debug.h"
+#include "util/u_atomic.h"
 static unsigned
 radv_choose_tiling(struct radv_device *Device,
   const struct radv_image_create_info *create_info)
@@ -208,7 +209,7 @@ si_set_mutable_tex_desc_fields(struct radv_device *device,
} else
va += base_level_info->offset;
 
-   state[0] = va >> 8;
+   state[0] = (va >> 8) | image->tile_rotate_bits;
state[1] &= C_008F14_BASE_ADDRESS_HI;
state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
state[3] |= S_008F1C_TILING_INDEX(si_tile_mode_index(image, base_level,
@@ -223,8 +224,7 @@ si_set_mutable_tex_desc_fields(struct radv_device *device,
if (chip_class <= VI)
meta_va += base_level_info->dcc_offset;
state[6] |= S_008F28_COMPRESSION_EN(1);
-   state[7] = meta_va >> 8;
-
+   state[7] = (meta_va >> 8) | image->tile_rotate_bits;
}
}
 
@@ -471,7 +471,7 @@ si_make_texture_descriptor(struct radv_device *device,
num_format = V_008F14_IMG_NUM_FORMAT_UINT;
}
 
-   fmask_state[0] = va >> 8;
+   fmask_state[0] = (va >> 8) | image->tile_rotate_bits;
fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
S_008F14_DATA_FORMAT_GFX6(fmask_format) |
S_008F14_NUM_FORMAT_GFX6(num_format);
@@ -801,6 +801,20 @@ radv_image_create(VkDevice _device,
image->size = image->surface.surf_size;
image->alignment = image->surface.surf_alignment;
 
+   if ((pCreateInfo->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) && 
!create_info->scanout) {
+   /*
+* from the evergreen docs -
+* Bits [p-1:0] of this field, where p =
+* log2(numPipes), specifiy the pipe swizzle. Bits [p+b-
+* 1:p], where b = log2(numBanks) specify the bank
+* swizzle.
+* this may not be correct for GCN gpus.
+   */
+   uint32_t mrt_idx = 
p_atomic_inc_return(>image_mrt_offset_counter) - 1;
+   mrt_idx %= 4;
+   image->tile_rotate_bits = 0x38 * mrt_idx;
+   }
+

Re: [Mesa-dev] [Intel-gfx] [PATCH 1/1] drm/i915: Version the MOCS settings

2017-07-07 Thread Chris Wilson
Quoting Ben Widawsky (2017-07-07 19:42:25)
> On 17-07-07 11:34:48, Chris Wilson wrote:
> >Quoting Ben Widawsky (2017-07-07 00:27:01)
> >>  drivers/gpu/drm/i915/i915_drv.c |  3 +++
> >>  drivers/gpu/drm/i915/i915_drv.h |  2 ++
> >>  drivers/gpu/drm/i915/i915_pci.c | 13 +
> >>  include/uapi/drm/i915_drm.h |  8 
> >>  4 files changed, 22 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/i915/i915_drv.c 
> >> b/drivers/gpu/drm/i915/i915_drv.c
> >> index 9167a73f3c69..26c27b6ae814 100644
> >> --- a/drivers/gpu/drm/i915/i915_drv.c
> >> +++ b/drivers/gpu/drm/i915/i915_drv.c
> >> @@ -401,6 +401,9 @@ static int i915_getparam(struct drm_device *dev, void 
> >> *data,
> >> if (!value)
> >> return -ENODEV;
> >> break;
> >> +   case I915_PARAM_MOCS_TABLE_VERSION:
> >> +   value = INTEL_INFO(dev_priv)->mocs_version;
> >
> >If we use intel_mocs_get_table_version() we can put this magic number
> >in intel_mocs.c next to the tables, where we can keep its history and
> >hopefully be able to remember to update it.
> >
> 
> Yeah, that seems like an improvement to me as well.
> 
> >> +/* What version of the MOCS table we have. For GEN9 GPUs, the PRM defined
> >> + * non-optimal settings for the MOCS table. As a result, we were required 
> >> to use a
> >> + * small subset, and later add new settings. This param allows userspace 
> >> to
> >> + * determine which settings are there.
> >> + */
> >> +#define MOCS_TABLE_VERSION   1 /* Build time MOCS table 
> >> version */
> >
> >How are you planing to share this? When we update we bump this number,
> >and then mesa copies it across and uses it after verifying it as 0,1 on
> >an old kernel.
> >
> >I don't think you want to expose the updated constant here, but symbolic
> >names for each version? (What would be the point?)
> >
> 
> At least one thing wrong here is we would need per GEN constants, which is 
> maybe
> what you meant and I misunderstood. Assuming you had per GEN constants, which 
> I
> don't like, I believe everything works out fine. So, I'll remove this compile
> time MOCS versioning.

I figured you were going towards per-gen versioning, which is kind of
why I liked the idea of table size -- but that only makes sense if
somehow the index has the same meaning across gen (which it won't).

> >Next question, why a version number and not just the number of entries
> >defined? Each index is defined by ABI once assigned, so the number of
> >entries still operates as a version number and allows easy checking.
> >
> >   if (advanced_cacheing_idx < kernel_max_mocs)
> >   return advanced_cacheing_idx;
> >   if (default_cacheing_idx < kernel_max_mocs)
> >   return default_cacheing_idx;
> >
> >   return follow_pte_idx;
> >
> >give or take the smarts to choose the preferred indices for any
> >particular scenario.
> >
> >In the future, if we finally get user defined mocs, the table_size will
> >then give the start of the user modifiable indices (presming they want
> >to keep the predefined entries for compatibility?))
> >-Chris
> 
> Yes, I considered this as well. I see no difference really as to one versus 
> the
> other. In fact, if you're to support multiple table versions, I think it's
> actually easier with a pure version:
> 
> switch (kernel_mocs_version) {
> case 3:
> return new_best_cacheing_index;
> case 2:
> return old_best_cacheing_index;
> case 1:
> return naive_best_index;
> }

Indeed 6 of one, half a dozen of the other. Whichever you pick, 3 years
down the line you wish you picked the other. The big advantage of using
an absolute version is that you can just stuff these into tables. Ok, I
like that more, a version parameter (that may be per-gen) worksforme.
-Chris
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radv: Add compute htile clear for combined depth+stencil surfaces.

2017-07-07 Thread Dave Airlie
On 8 July 2017 at 04:53, Bas Nieuwenhuizen  wrote:
> Figured out the clear value when we have a combined depth stencil
> surface.

That corresponds to what I've read,

When you have depth, it's two 14-bit values and 8-bits,
When you have depth/stencil it's 14-bit value + 6-bit delta + 8-bit stencil.

Reviewed-by: Dave Airlie 

>
> Signed-off-by: Bas Nieuwenhuizen 
> ---
>  src/amd/vulkan/radv_meta_clear.c | 16 +++-
>  1 file changed, 7 insertions(+), 9 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_meta_clear.c 
> b/src/amd/vulkan/radv_meta_clear.c
> index bd979973e71..7f3cfdccc86 100644
> --- a/src/amd/vulkan/radv_meta_clear.c
> +++ b/src/amd/vulkan/radv_meta_clear.c
> @@ -708,17 +708,15 @@ emit_fast_htile_clear(struct radv_cmd_buffer 
> *cmd_buffer,
> if (clear_rect->layerCount != iview->image->info.array_size)
> goto fail;
>
> -   /* Don't do stencil clears till we have figured out if the clear 
> words are
> -* correct. */
> -   if (vk_format_aspects(iview->image->vk_format) & 
> VK_IMAGE_ASPECT_STENCIL_BIT)
> +   if ((clear_value.depth != 0.0 && clear_value.depth != 1.0) || 
> !(aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
> goto fail;
>
> -   if (clear_value.depth == 1.0)
> -   clear_word = 0xfff0;
> -   else if (clear_value.depth == 0.0)
> -   clear_word = 0;
> -   else
> -   goto fail;
> +   if (vk_format_aspects(iview->image->vk_format) & 
> VK_IMAGE_ASPECT_STENCIL_BIT) {
> +   if (clear_value.stencil != 0 || !(aspects & 
> VK_IMAGE_ASPECT_STENCIL_BIT))
> +   goto fail;
> +   clear_word = clear_value.depth ? 0xfffc : 0;
> +   } else
> +   clear_word = clear_value.depth ? 0xfff0 : 0;
>
> if (pre_flush) {
> cmd_buffer->state.flush_bits |= 
> (RADV_CMD_FLAG_FLUSH_AND_INV_DB |
> --
> 2.13.2
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] [rfc] radv: offset images by a differing amount.

2017-07-07 Thread Dave Airlie
On 8 July 2017 at 04:07, Christian König  wrote:
> Am 07.07.2017 um 18:51 schrieb Marek Olšák:
>>
>> On Fri, Jul 7, 2017 at 11:18 AM, Christian König
>>  wrote:
>>>
>>> What tilling format have the destination textures?
>>>
>>> Sounds like the offset is just added so that we distribute memory
>>> accesses
>>> more equally over memory channels.
>>
>> You can't set an offset that is not aligned. The hardware ignores the
>> low unaligned bits, so they have a different meaning. They specify
>> pipe and bank rotation for macro tiling. It's like a state. It
>> basically rotates the tile pattern.
>
>
> Yeah, I know. That's what I meant with distributing memory accesses more
> equally over all channels. The lower bits select a memory bank swizzle IIRC.
>
> I've tried years ago with R600 if shuffling them randomly could improve
> performance, but MRT wasn't widely used and/or supported at that time.

I'd known this and forgotten, the public CIK docs say bits 0..7 must be zero,
but I have older docs which had more info. It would be nice if we could get
proper docs released for the bottom bits considering AMD are using them in their
drivers.

It would be good to know what registers have the bits that matter (i.e. BASE,
FMASK, CMASK, DCC, and resource descriptors.)

Then I suppose we'd need to know the algorithm for programming them, and
if we need to make any allocations bigger in order to do so.

I expect this only starts to matter when we hit memory bandwidth limits,
the deferred demo does 3 MRT, one depth at 2kx2k then samples from those
down to 1280x720 displayed. This combined with a 3 instanced 57k vertex
draw seemed to be enough to see the pain. (Maybe a GL example doing something
similiar might show the problem for radeonsi).

The other open question I have, is does this just matter for MRT or does texture
sampling also get some boost from it, my hack patch does it for only
surfaces which
will end up attached to the CB.

I'll update the patch to not call it an offset but name them the tile
rotation bits.

Thanks,
Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 6/6] st/dri: list __DRI2_FENCE extension only where needed

2017-07-07 Thread Alex Deucher
On Fri, Jul 7, 2017 at 2:47 PM, Emil Velikov  wrote:
> From: Emil Velikov 
>
> The extension should be present (if applicable) in the list returned by
> getExtensions(). AFAICT no loader has ever looked for it in
> __driDriverExtensions/__driDriverGetExtensions.
>
> Signed-off-by: Emil Velikov 

Not really an expert, but the series seems sane to me:
Reviewed-by: Alex Deucher 

> ---
>  src/gallium/state_trackers/dri/dri2.c | 1 -
>  1 file changed, 1 deletion(-)
>
> diff --git a/src/gallium/state_trackers/dri/dri2.c 
> b/src/gallium/state_trackers/dri/dri2.c
> index 6a977636ea8..3bde9b35c02 100644
> --- a/src/gallium/state_trackers/dri/dri2.c
> +++ b/src/gallium/state_trackers/dri/dri2.c
> @@ -2241,7 +2241,6 @@ const __DRIextension *galliumdrm_driver_extensions[] = {
>  ,
>  ,
>  _config_options.base,
> -,
>  NULL
>  };
>
> --
> 2.13.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 101464] PrimitiveRestartNV inside a render list causes a crash

2017-07-07 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=101464

Brian Paul  changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |FIXED

--- Comment #2 from Brian Paul  ---
I've committed a patch based on your suggestion (commit
f5c8bb1e00f358e05ed21f8ed69c9fc3803bf95f) plus an patch for another failure
case (commit 9ac55e8219e1f6abeab3c779c8fe710c2bc25f2b).
Closing this bug.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radv: Add compute htile clear for combined depth+stencil surfaces.

2017-07-07 Thread Bas Nieuwenhuizen
Figured out the clear value when we have a combined depth stencil
surface.

Signed-off-by: Bas Nieuwenhuizen 
---
 src/amd/vulkan/radv_meta_clear.c | 16 +++-
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/src/amd/vulkan/radv_meta_clear.c b/src/amd/vulkan/radv_meta_clear.c
index bd979973e71..7f3cfdccc86 100644
--- a/src/amd/vulkan/radv_meta_clear.c
+++ b/src/amd/vulkan/radv_meta_clear.c
@@ -708,17 +708,15 @@ emit_fast_htile_clear(struct radv_cmd_buffer *cmd_buffer,
if (clear_rect->layerCount != iview->image->info.array_size)
goto fail;
 
-   /* Don't do stencil clears till we have figured out if the clear words 
are
-* correct. */
-   if (vk_format_aspects(iview->image->vk_format) & 
VK_IMAGE_ASPECT_STENCIL_BIT)
+   if ((clear_value.depth != 0.0 && clear_value.depth != 1.0) || !(aspects 
& VK_IMAGE_ASPECT_DEPTH_BIT))
goto fail;
 
-   if (clear_value.depth == 1.0)
-   clear_word = 0xfff0;
-   else if (clear_value.depth == 0.0)
-   clear_word = 0;
-   else
-   goto fail;
+   if (vk_format_aspects(iview->image->vk_format) & 
VK_IMAGE_ASPECT_STENCIL_BIT) {
+   if (clear_value.stencil != 0 || !(aspects & 
VK_IMAGE_ASPECT_STENCIL_BIT))
+   goto fail;
+   clear_word = clear_value.depth ? 0xfffc : 0;
+   } else
+   clear_word = clear_value.depth ? 0xfff0 : 0;
 
if (pre_flush) {
cmd_buffer->state.flush_bits |= (RADV_CMD_FLAG_FLUSH_AND_INV_DB 
|
-- 
2.13.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/6] i915: remove local vblank_mode option

2017-07-07 Thread Emil Velikov
From: Emil Velikov 

Analogous to previous commit.

Signed-off-by: Emil Velikov 
---
 src/mesa/drivers/dri/i915/intel_screen.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i915/intel_screen.c 
b/src/mesa/drivers/dri/i915/intel_screen.c
index 686cbf5ec12..882b139ff6a 100644
--- a/src/mesa/drivers/dri/i915/intel_screen.c
+++ b/src/mesa/drivers/dri/i915/intel_screen.c
@@ -47,7 +47,6 @@ static const __DRIconfigOptionsExtension i915_config_options 
= {
 
 DRI_CONF_BEGIN
DRI_CONF_SECTION_PERFORMANCE
-  DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_ALWAYS_SYNC)
   /* Options correspond to DRI_CONF_BO_REUSE_DISABLED,
* DRI_CONF_BO_REUSE_ALL
*/
-- 
2.13.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/6] swrast: add dri2ConfigQueryExtension to the correct extension list

2017-07-07 Thread Emil Velikov
From: Emil Velikov 

The extension should be in the list as returned by getExtensions().
Seems to have gone unnoticed since close to nobody wants to change the
vblank mode for the software driver.

Cc: mesa-sta...@lists.freedesktop.org
Signed-off-by: Emil Velikov 
---
 src/mesa/drivers/dri/swrast/swrast.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/swrast/swrast.c 
b/src/mesa/drivers/dri/swrast/swrast.c
index e66b2257de8..79d1346938d 100644
--- a/src/mesa/drivers/dri/swrast/swrast.c
+++ b/src/mesa/drivers/dri/swrast/swrast.c
@@ -208,6 +208,7 @@ static const __DRI2rendererQueryExtension 
swrast_query_renderer_extension = {
 static const __DRIextension *dri_screen_extensions[] = {
 ,
 _query_renderer_extension.base,
+,
 NULL
 };
 
@@ -964,7 +965,6 @@ static const __DRIextension *swrast_driver_extensions[] = {
 ,
 ,
 ,
-,
 _vtable.base,
 NULL
 };
-- 
2.13.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/6] st/dri: list __DRI2_FENCE extension only where needed

2017-07-07 Thread Emil Velikov
From: Emil Velikov 

The extension should be present (if applicable) in the list returned by
getExtensions(). AFAICT no loader has ever looked for it in
__driDriverExtensions/__driDriverGetExtensions.

Signed-off-by: Emil Velikov 
---
 src/gallium/state_trackers/dri/dri2.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/gallium/state_trackers/dri/dri2.c 
b/src/gallium/state_trackers/dri/dri2.c
index 6a977636ea8..3bde9b35c02 100644
--- a/src/gallium/state_trackers/dri/dri2.c
+++ b/src/gallium/state_trackers/dri/dri2.c
@@ -2241,7 +2241,6 @@ const __DRIextension *galliumdrm_driver_extensions[] = {
 ,
 ,
 _config_options.base,
-,
 NULL
 };
 
-- 
2.13.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/6] i965: remove local vblank_mode option

2017-07-07 Thread Emil Velikov
From: Emil Velikov 

The option is only queried from the loader, which has access to the
dri "common" one in src/mesa/drivers/dri/common/.

One could grant the loader access to brw_config_options but even
then, having the same option in both places is not a good idea.

Signed-off-by: Emil Velikov 
---
 src/mesa/drivers/dri/i965/intel_screen.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index c75f2125d4b..4cc328b6f5e 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -55,7 +55,6 @@ static const __DRIconfigOptionsExtension brw_config_options = 
{
.xml =
 DRI_CONF_BEGIN
DRI_CONF_SECTION_PERFORMANCE
-  DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_ALWAYS_SYNC)
   /* Options correspond to DRI_CONF_BO_REUSE_DISABLED,
* DRI_CONF_BO_REUSE_ALL
*/
-- 
2.13.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/6] radeon: remove local vblank_mode option

2017-07-07 Thread Emil Velikov
From: Emil Velikov 

Analogous to previous commits.

Signed-off-by: Emil Velikov 
---
 src/mesa/drivers/dri/radeon/radeon_screen.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c 
b/src/mesa/drivers/dri/radeon/radeon_screen.c
index 79e388988a0..b097aef33f1 100644
--- a/src/mesa/drivers/dri/radeon/radeon_screen.c
+++ b/src/mesa/drivers/dri/radeon/radeon_screen.c
@@ -78,7 +78,6 @@ DRI_CONF_BEGIN
 DRI_CONF_SECTION_PERFORMANCE
 DRI_CONF_TCL_MODE(DRI_CONF_TCL_CODEGEN)
 DRI_CONF_FTHROTTLE_MODE(DRI_CONF_FTHROTTLE_IRQS)
-DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0)
 DRI_CONF_MAX_TEXTURE_UNITS(3,2,3)
 DRI_CONF_HYPERZ("false")
 DRI_CONF_COMMAND_BUFFER_SIZE(8, 8, 32)
@@ -106,7 +105,6 @@ DRI_CONF_BEGIN
 DRI_CONF_SECTION_PERFORMANCE
 DRI_CONF_TCL_MODE(DRI_CONF_TCL_CODEGEN)
 DRI_CONF_FTHROTTLE_MODE(DRI_CONF_FTHROTTLE_IRQS)
-DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0)
 DRI_CONF_MAX_TEXTURE_UNITS(6,2,6)
 DRI_CONF_HYPERZ("false")
 DRI_CONF_COMMAND_BUFFER_SIZE(8, 8, 32)
-- 
2.13.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/6] st/dri: use correct __DRI2_CONFIG_QUERY extension

2017-07-07 Thread Emil Velikov
From: Emil Velikov 

If device supports robustness (AKA PIPE_CAP_DEVICE_RESET_STATUS_QUERY)
then we're using dri_robust_screen_extensions.
Hence for such cases the DRI loader was not able to query the driver
options.

This went unnoticed since only r600/radeonsi drivers have the feature.
At the same time neither of them has a local option that the loader
cares about.

Fixes: ff2978b4494 ("st/dri: Allow dri users to query also driver
options")
Cc: Thomas Hellstrom 
Cc: Marek Olšák 
Signed-off-by: Emil Velikov 
---
Perhaps we might want to use it in swrast? If so we'll need to move the
extension to dri_extensions.[ch] first.
---
 src/gallium/state_trackers/dri/dri2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/dri/dri2.c 
b/src/gallium/state_trackers/dri/dri2.c
index 60ec38d8e44..6a977636ea8 100644
--- a/src/gallium/state_trackers/dri/dri2.c
+++ b/src/gallium/state_trackers/dri/dri2.c
@@ -1995,7 +1995,7 @@ static const __DRIextension 
*dri_robust_screen_extensions[] = {
,
,
,
-   ,
+   ,
,
,
,
-- 
2.13.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Intel-gfx] [PATCH 1/1] drm/i915: Version the MOCS settings

2017-07-07 Thread Ben Widawsky

On 17-07-07 09:23:26, Jason Ekstrand wrote:

On Fri, Jul 7, 2017 at 3:34 AM, Chris Wilson 
wrote:


Quoting Ben Widawsky (2017-07-07 00:27:01)
>  drivers/gpu/drm/i915/i915_drv.c |  3 +++
>  drivers/gpu/drm/i915/i915_drv.h |  2 ++
  drivers/gpu/drm/i915/i915_pci.c | 13 +
>  include/uapi/drm/i915_drm.h |  8 
>  4 files changed, 22 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.c
b/drivers/gpu/drm/i915/i915_drv.c
> index 9167a73f3c69..26c27b6ae814 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -401,6 +401,9 @@ static int i915_getparam(struct drm_device *dev,
void *data,
> if (!value)
> return -ENODEV;
> break;
> +   case I915_PARAM_MOCS_TABLE_VERSION:
> +   value = INTEL_INFO(dev_priv)->mocs_version;

If we use intel_mocs_get_table_version() we can put this magic number
in intel_mocs.c next to the tables, where we can keep its history and
hopefully be able to remember to update it.

> +/* What version of the MOCS table we have. For GEN9 GPUs, the PRM
defined
> + * non-optimal settings for the MOCS table. As a result, we were
required to use a
> + * small subset, and later add new settings. This param allows
userspace to
> + * determine which settings are there.
> + */
> +#define MOCS_TABLE_VERSION   1 /* Build time MOCS table
version */

How are you planing to share this? When we update we bump this number,
and then mesa copies it across and uses it after verifying it as 0,1 on
an old kernel.



Agreed.  I don't see how having a #define for compile-time mocs version is
useful.  The compile-time version doesn't really matter and we wouldn't
want to use that in i965/anv anyway (more on that in the other patch).




I think we're all agreed here.


I don't think you want to expose the updated constant here, but symbolic
names for each version? (What would be the point?)

Next question, why a version number and not just the number of entries
defined? Each index is defined by ABI once assigned, so the number of
entries still operates as a version number and allows easy checking.

if (advanced_cacheing_idx < kernel_max_mocs)
return advanced_cacheing_idx;
if (default_cacheing_idx < kernel_max_mocs)
return default_cacheing_idx;

return follow_pte_idx;

give or take the smarts to choose the preferred indices for any
particular scenario.



I'll have to think about it a bit more but this sounds like a fairly good
idea.  I see two major benefits:

1. The kernel can return ARRAY_SIZE(mocs_table_for_your_gen) and we will
never forget to update it.
2. It makes the "does this MOCS value exist" check much easier.  I imagine
future userspace code which chooses mocs values having some sort of "try
and fall back" approach to making MOCS choices and this would be convenient.

That said, having it be a version may have it's advantages, I just don't
know what they are yet.

--Jason


Please direct comments to my response to Chris if you have more. To me it's 6
one way, half dozen the other - and I believe version has a more direct meaning
(and the ability to potentially, albeit a terrible idea, rewrite entries).

If people are going to block a review based on this, I will change it, but I'd
rather not.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Intel-gfx] [PATCH 1/1] drm/i915: Version the MOCS settings

2017-07-07 Thread Ben Widawsky

On 17-07-07 11:34:48, Chris Wilson wrote:

Quoting Ben Widawsky (2017-07-07 00:27:01)

 drivers/gpu/drm/i915/i915_drv.c |  3 +++
 drivers/gpu/drm/i915/i915_drv.h |  2 ++
 drivers/gpu/drm/i915/i915_pci.c | 13 +
 include/uapi/drm/i915_drm.h |  8 
 4 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 9167a73f3c69..26c27b6ae814 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -401,6 +401,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
if (!value)
return -ENODEV;
break;
+   case I915_PARAM_MOCS_TABLE_VERSION:
+   value = INTEL_INFO(dev_priv)->mocs_version;


If we use intel_mocs_get_table_version() we can put this magic number
in intel_mocs.c next to the tables, where we can keep its history and
hopefully be able to remember to update it.



Yeah, that seems like an improvement to me as well.


+/* What version of the MOCS table we have. For GEN9 GPUs, the PRM defined
+ * non-optimal settings for the MOCS table. As a result, we were required to 
use a
+ * small subset, and later add new settings. This param allows userspace to
+ * determine which settings are there.
+ */
+#define MOCS_TABLE_VERSION   1 /* Build time MOCS table version */


How are you planing to share this? When we update we bump this number,
and then mesa copies it across and uses it after verifying it as 0,1 on
an old kernel.

I don't think you want to expose the updated constant here, but symbolic
names for each version? (What would be the point?)



At least one thing wrong here is we would need per GEN constants, which is maybe
what you meant and I misunderstood. Assuming you had per GEN constants, which I
don't like, I believe everything works out fine. So, I'll remove this compile
time MOCS versioning.


Next question, why a version number and not just the number of entries
defined? Each index is defined by ABI once assigned, so the number of
entries still operates as a version number and allows easy checking.

if (advanced_cacheing_idx < kernel_max_mocs)
return advanced_cacheing_idx;
if (default_cacheing_idx < kernel_max_mocs)
return default_cacheing_idx;

return follow_pte_idx;

give or take the smarts to choose the preferred indices for any
particular scenario.

In the future, if we finally get user defined mocs, the table_size will
then give the start of the user modifiable indices (presming they want
to keep the predefined entries for compatibility?))
-Chris


Yes, I considered this as well. I see no difference really as to one versus the
other. In fact, if you're to support multiple table versions, I think it's
actually easier with a pure version:

switch (kernel_mocs_version) {
case 3:
return new_best_cacheing_index;
case 2:
return old_best_cacheing_index;
case 1:
return naive_best_index;
}
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] mesa: finish implementing glPrimitiveRestartNV() for display lists

2017-07-07 Thread Charmaine Lee

>From: Brian Paul 
>Sent: Friday, July 7, 2017 7:10 AM
>To: mesa-dev@lists.freedesktop.org
>Cc: Charmaine Lee; Neha Bhende; Olivier Lauffenburger
>Subject: [PATCH 2/2] mesa: finish implementing glPrimitiveRestartNV() for 
>display lists

>If we try to build a display list with just a glPrimitiveRestartNV()
>call, we'd crash because of a null GLvertexformat::PrimitiveRestartNV
>pointer.  This change fixes that case.

>The previous patch fixed the case of calling glPrimitiveRestartNV()
>inside a glBegin/End pair.
>---
> src/mesa/main/dlist.c | 27 ++-
> 1 file changed, 26 insertions(+), 1 deletion(-)

>diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c
index 9e817be..6e334fd 100644
>--- a/src/mesa/main/dlist.c
>+++ b/src/mesa/main/dlist.c
>@@ -325,7 +325,8 @@ typedef enum
>OPCODE_STENCIL_FUNC_SEPARATE,
>OPCODE_STENCIL_OP_SEPARATE,
>OPCODE_STENCIL_MASK_SEPARATE,
>-
>+   /* GL_NV_primitive_restart */
>+   OPCODE_PRIMITIVE_RESTART_NV,
>/* GL_ARB_shader_objects */
>OPCODE_USE_PROGRAM,
>OPCODE_UNIFORM_1F,
>@@ -6095,6 +6096,24 @@ save_VertexAttrib4fvARB(GLuint index, const GLfloat * v)
> }

> static void GLAPIENTRY
>+save_PrimitiveRestartNV(void)
>+{
>+   /* Note: this is used when outside a glBegin/End pair in a display list */
>+   GET_CURRENT_CONTEXT(ctx);
>+   Node *n;
>+   ASSERT_OUTSIDE_SAVE_BEGIN_END_AND_FLUSH(ctx);
>+   n = alloc_instruction(ctx, OPCODE_PRIMITIVE_RESTART_NV, 0);
>+   if (n) {
>+  /* nothing */
>+   }

Can you remove this if clause since it is not doing anything?

>+   if (ctx->ExecuteFlag) {
>+  CALL_PrimitiveRestartNV(ctx->Exec, ());
>+   }
>+
>+}
>+
>+
>+static void GLAPIENTRY
> save_BlitFramebufferEXT(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
>GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
> GLbitfield mask, GLenum filter)
>@@ -8670,6 +8689,10 @@ execute_list(struct gl_context *ctx, GLuint list)
> n[5].i, n[6].i, n[7].i, 
> n[8].i,
> n[9].i, n[10].e));
> break;
>+ case OPCODE_PRIMITIVE_RESTART_NV:
>+CALL_PrimitiveRestartNV(ctx->Exec, ());
>+break;
>+
>  case OPCODE_USE_PROGRAM:
> CALL_UseProgram(ctx->Exec, (n[1].ui));
> break;
>@@ -10460,6 +10483,8 @@ save_vtxfmt_init(GLvertexformat * vfmt)
>vfmt->VertexAttrib3fvARB = save_VertexAttrib3fvARB;
>vfmt->VertexAttrib4fARB = save_VertexAttrib4fARB;
>vfmt->VertexAttrib4fvARB = save_VertexAttrib4fvARB;
>+
>+   vfmt->PrimitiveRestartNV = save_PrimitiveRestartNV;
> }


For this series,  Reviewed-by: Charmaine Lee 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] gallium: introduce PIPE_CAP_MEMOBJ v2

2017-07-07 Thread Andres Rodriguez
This can be used to guard support for EXT_memory_object and related
extensions.

v2: update gallium docs

Signed-off-by: Andres Rodriguez 
---
 src/gallium/docs/source/screen.rst   | 1 +
 src/gallium/drivers/etnaviv/etnaviv_screen.c | 1 +
 src/gallium/drivers/freedreno/freedreno_screen.c | 1 +
 src/gallium/drivers/i915/i915_screen.c   | 1 +
 src/gallium/drivers/llvmpipe/lp_screen.c | 1 +
 src/gallium/drivers/nouveau/nv30/nv30_screen.c   | 1 +
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c   | 1 +
 src/gallium/drivers/r300/r300_screen.c   | 1 +
 src/gallium/drivers/r600/r600_pipe.c | 1 +
 src/gallium/drivers/radeonsi/si_pipe.c   | 1 +
 src/gallium/drivers/softpipe/sp_screen.c | 1 +
 src/gallium/drivers/svga/svga_screen.c   | 1 +
 src/gallium/drivers/swr/swr_screen.cpp   | 1 +
 src/gallium/drivers/vc4/vc4_screen.c | 1 +
 src/gallium/drivers/virgl/virgl_screen.c | 1 +
 src/gallium/include/pipe/p_defines.h | 1 +
 16 files changed, 16 insertions(+)

diff --git a/src/gallium/docs/source/screen.rst 
b/src/gallium/docs/source/screen.rst
index 32da228..30bce0c 100644
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -396,6 +396,7 @@ The integer capabilities:
   ``TGSI_PROPERTY_FS_POST_DEPTH_COVERAGE`` is supported.
 * ``PIPE_CAP_BINDLESS_TEXTURE``: Whether bindless texture operations are
   supported.
+* ``PIPE_CAP_MEMOBJ``: Whether operations on memory objects are supported.
 
 
 .. _pipe_capf:
diff --git a/src/gallium/drivers/etnaviv/etnaviv_screen.c 
b/src/gallium/drivers/etnaviv/etnaviv_screen.c
index eefb51c..718f0ac 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_screen.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c
@@ -258,6 +258,7 @@ etna_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
case PIPE_CAP_POST_DEPTH_COVERAGE:
case PIPE_CAP_BINDLESS_TEXTURE:
+   case PIPE_CAP_MEMOBJ:
   return 0;
 
/* Stream output. */
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c 
b/src/gallium/drivers/freedreno/freedreno_screen.c
index a915d65..6c9c011 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -321,6 +321,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
case PIPE_CAP_POST_DEPTH_COVERAGE:
case PIPE_CAP_BINDLESS_TEXTURE:
+   case PIPE_CAP_MEMOBJ:
return 0;
 
case PIPE_CAP_MAX_VIEWPORTS:
diff --git a/src/gallium/drivers/i915/i915_screen.c 
b/src/gallium/drivers/i915/i915_screen.c
index 4ad98e2..3919978 100644
--- a/src/gallium/drivers/i915/i915_screen.c
+++ b/src/gallium/drivers/i915/i915_screen.c
@@ -310,6 +310,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap 
cap)
case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
case PIPE_CAP_BINDLESS_TEXTURE:
+   case PIPE_CAP_MEMOBJ:
   return 0;
 
case PIPE_CAP_MAX_VIEWPORTS:
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c 
b/src/gallium/drivers/llvmpipe/lp_screen.c
index e98e30d..7959015 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -356,6 +356,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum 
pipe_cap param)
case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
case PIPE_CAP_POST_DEPTH_COVERAGE:
case PIPE_CAP_BINDLESS_TEXTURE:
+   case PIPE_CAP_MEMOBJ:
   return 0;
}
/* should only get here on unhandled cases */
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c 
b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
index a352ff5..14d1b1a 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -220,6 +220,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
case PIPE_CAP_POST_DEPTH_COVERAGE:
case PIPE_CAP_BINDLESS_TEXTURE:
+   case PIPE_CAP_MEMOBJ:
   return 0;
 
case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 8bbe403..571df1c 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -301,6 +301,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_INT64_DIVMOD:
case PIPE_CAP_SPARSE_BUFFER_PAGE_SIZE:
case PIPE_CAP_BINDLESS_TEXTURE:
+   case PIPE_CAP_MEMOBJ:
   return 0;
 
case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/r300/r300_screen.c 
b/src/gallium/drivers/r300/r300_screen.c
index 5cdb248..37d102b 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ 

Re: [Mesa-dev] [PATCH 13/25] gallium: introduce PIPE_CAP_MEMOBJ

2017-07-07 Thread Andres Rodriguez

Thanks Ilia, I'll get that fixed up.

Regards,
Andres

On 2017-07-07 09:53 AM, Ilia Mirkin wrote:

This is missing the docs addition for what this cap does
(gallium/docs/source/screen.rst).

On Fri, Jul 7, 2017 at 12:24 AM, Andres Rodriguez  wrote:

This can be used to guard support for EXT_memory_object and related
extensions.

Signed-off-by: Andres Rodriguez 
---
  src/gallium/drivers/etnaviv/etnaviv_screen.c | 1 +
  src/gallium/drivers/freedreno/freedreno_screen.c | 1 +
  src/gallium/drivers/i915/i915_screen.c   | 1 +
  src/gallium/drivers/llvmpipe/lp_screen.c | 1 +
  src/gallium/drivers/nouveau/nv30/nv30_screen.c   | 1 +
  src/gallium/drivers/nouveau/nvc0/nvc0_screen.c   | 1 +
  src/gallium/drivers/r300/r300_screen.c   | 1 +
  src/gallium/drivers/r600/r600_pipe.c | 1 +
  src/gallium/drivers/radeonsi/si_pipe.c   | 1 +
  src/gallium/drivers/softpipe/sp_screen.c | 1 +
  src/gallium/drivers/svga/svga_screen.c   | 1 +
  src/gallium/drivers/swr/swr_screen.cpp   | 1 +
  src/gallium/drivers/vc4/vc4_screen.c | 1 +
  src/gallium/drivers/virgl/virgl_screen.c | 1 +
  src/gallium/include/pipe/p_defines.h | 1 +
  15 files changed, 15 insertions(+)

diff --git a/src/gallium/drivers/etnaviv/etnaviv_screen.c 
b/src/gallium/drivers/etnaviv/etnaviv_screen.c
index eefb51c..718f0ac 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_screen.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c
@@ -258,6 +258,7 @@ etna_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
 case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
 case PIPE_CAP_POST_DEPTH_COVERAGE:
 case PIPE_CAP_BINDLESS_TEXTURE:
+   case PIPE_CAP_MEMOBJ:
return 0;

 /* Stream output. */
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c 
b/src/gallium/drivers/freedreno/freedreno_screen.c
index a915d65..6c9c011 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -321,6 +321,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
 case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
 case PIPE_CAP_POST_DEPTH_COVERAGE:
 case PIPE_CAP_BINDLESS_TEXTURE:
+   case PIPE_CAP_MEMOBJ:
 return 0;

 case PIPE_CAP_MAX_VIEWPORTS:
diff --git a/src/gallium/drivers/i915/i915_screen.c 
b/src/gallium/drivers/i915/i915_screen.c
index 4ad98e2..3919978 100644
--- a/src/gallium/drivers/i915/i915_screen.c
+++ b/src/gallium/drivers/i915/i915_screen.c
@@ -310,6 +310,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap 
cap)
 case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
 case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
 case PIPE_CAP_BINDLESS_TEXTURE:
+   case PIPE_CAP_MEMOBJ:
return 0;

 case PIPE_CAP_MAX_VIEWPORTS:
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c 
b/src/gallium/drivers/llvmpipe/lp_screen.c
index e98e30d..7959015 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -356,6 +356,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum 
pipe_cap param)
 case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
 case PIPE_CAP_POST_DEPTH_COVERAGE:
 case PIPE_CAP_BINDLESS_TEXTURE:
+   case PIPE_CAP_MEMOBJ:
return 0;
 }
 /* should only get here on unhandled cases */
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c 
b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
index a352ff5..14d1b1a 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -220,6 +220,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
 case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
 case PIPE_CAP_POST_DEPTH_COVERAGE:
 case PIPE_CAP_BINDLESS_TEXTURE:
+   case PIPE_CAP_MEMOBJ:
return 0;

 case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 8bbe403..571df1c 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -301,6 +301,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
 case PIPE_CAP_INT64_DIVMOD:
 case PIPE_CAP_SPARSE_BUFFER_PAGE_SIZE:
 case PIPE_CAP_BINDLESS_TEXTURE:
+   case PIPE_CAP_MEMOBJ:
return 0;

 case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/r300/r300_screen.c 
b/src/gallium/drivers/r300/r300_screen.c
index 5cdb248..37d102b 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -242,6 +242,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
  case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
  case PIPE_CAP_POST_DEPTH_COVERAGE:
  case 

[Mesa-dev] Mesa 17.2.0 release plan

2017-07-07 Thread Emil Velikov
Hi all,

As you may have noticed, for a little while now we've had the release
plan on the mesa3d.org website [1].

Here is the current tentative schedule.

 Jul 21 2017 - Feature freeze/Release candidate 1
 Jul 28 2017 - Release candidate 2
 Aug 04 2017 - Release candidate 3
 Aug 11 2017 - Release candidate 4/final release

This gives us approximately 2 weeks to get new features in.

As always, please let me know of must have features that you'll like
to merge before the branch point.

Thanks
Emil

[1] https://www.mesa3d.org/release-calendar.html
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] [rfc] radv: offset images by a differing amount.

2017-07-07 Thread Christian König

Am 07.07.2017 um 18:51 schrieb Marek Olšák:

On Fri, Jul 7, 2017 at 11:18 AM, Christian König
 wrote:

What tilling format have the destination textures?

Sounds like the offset is just added so that we distribute memory accesses
more equally over memory channels.

You can't set an offset that is not aligned. The hardware ignores the
low unaligned bits, so they have a different meaning. They specify
pipe and bank rotation for macro tiling. It's like a state. It
basically rotates the tile pattern.


Yeah, I know. That's what I meant with distributing memory accesses more 
equally over all channels. The lower bits select a memory bank swizzle IIRC.


I've tried years ago with R600 if shuffling them randomly could improve 
performance, but MRT wasn't widely used and/or supported at that time.


Christian.



Marek



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] svga: adjust line subpixel position for HWv8

2017-07-07 Thread Charmaine Lee

Reviewed-by: Charmaine Lee 

From: Brian Paul 
Sent: Wednesday, July 5, 2017 7:56 PM
To: mesa-dev@lists.freedesktop.org
Cc: Charmaine Lee; Neha Bhende
Subject: [PATCH] svga: adjust line subpixel position for HWv8

This fixes two regressions on HWv8:
  Piglit gl-1.0-ortho-pos
  Piglit/glean fbo
This was caused by commit c2b92dada076a "svga: clamp device line width
to at least 1 to fix HWv8 line stippling"

This also fixes two conform tests: Vertex Order and Polygon Face

No Piglit/conform changes with HWv9 or later.

VMware bug 1905053
---
 src/gallium/drivers/svga/svga_state_framebuffer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/svga/svga_state_framebuffer.c 
b/src/gallium/drivers/svga/svga_state_framebuffer.c
index bb92c54..c52b7ee 100644
--- a/src/gallium/drivers/svga/svga_state_framebuffer.c
+++ b/src/gallium/drivers/svga/svga_state_framebuffer.c
@@ -561,7 +561,7 @@ emit_viewport( struct svga_context *svga,
 break;
  case PIPE_PRIM_LINES:
 adjust_x = -0.5;
-adjust_y = 0;
+adjust_y = -0.125;
 break;
  case PIPE_PRIM_TRIANGLES:
 adjust_x = -0.5;
--
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] vbo: rename target->index in loopback code

2017-07-07 Thread Charmaine Lee

Series looks good.

Reviewed-by: Charmaine Lee 

From: Brian Paul 
Sent: Friday, July 7, 2017 7:11 AM
To: mesa-dev@lists.freedesktop.org
Cc: Charmaine Lee; Neha Bhende
Subject: [PATCH 3/3] vbo: rename target->index in loopback code

Because it's a vertex attribute index.
---
 src/mesa/vbo/vbo_save_loopback.c | 24 
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/mesa/vbo/vbo_save_loopback.c b/src/mesa/vbo/vbo_save_loopback.c
index 773af93..1dae91b 100644
--- a/src/mesa/vbo/vbo_save_loopback.c
+++ b/src/mesa/vbo/vbo_save_loopback.c
@@ -37,7 +37,7 @@
 #include "vbo_context.h"


-typedef void (*attr_func)(struct gl_context *ctx, GLint target, const GLfloat 
*);
+typedef void (*attr_func)(struct gl_context *ctx, GLint index, const GLfloat 
*);


 /* This file makes heavy use of the aliasing of NV vertex attributes
@@ -45,30 +45,30 @@ typedef void (*attr_func)(struct gl_context *ctx, GLint 
target, const GLfloat *)
  * attributes as currently implemented.
  */
 static void
-VertexAttrib1fvNV(struct gl_context *ctx, GLint target, const GLfloat *v)
+VertexAttrib1fvNV(struct gl_context *ctx, GLint index, const GLfloat *v)
 {
-   CALL_VertexAttrib1fvNV(ctx->Exec, (target, v));
+   CALL_VertexAttrib1fvNV(ctx->Exec, (index, v));
 }


 static void
-VertexAttrib2fvNV(struct gl_context *ctx, GLint target, const GLfloat *v)
+VertexAttrib2fvNV(struct gl_context *ctx, GLint index, const GLfloat *v)
 {
-   CALL_VertexAttrib2fvNV(ctx->Exec, (target, v));
+   CALL_VertexAttrib2fvNV(ctx->Exec, (index, v));
 }


 static void
-VertexAttrib3fvNV(struct gl_context *ctx, GLint target, const GLfloat *v)
+VertexAttrib3fvNV(struct gl_context *ctx, GLint index, const GLfloat *v)
 {
-   CALL_VertexAttrib3fvNV(ctx->Exec, (target, v));
+   CALL_VertexAttrib3fvNV(ctx->Exec, (index, v));
 }


 static void
-VertexAttrib4fvNV(struct gl_context *ctx, GLint target, const GLfloat *v)
+VertexAttrib4fvNV(struct gl_context *ctx, GLint index, const GLfloat *v)
 {
-   CALL_VertexAttrib4fvNV(ctx->Exec, (target, v));
+   CALL_VertexAttrib4fvNV(ctx->Exec, (index, v));
 }


@@ -81,7 +81,7 @@ static attr_func vert_attrfunc[4] = {


 struct loopback_attr {
-   GLint target;
+   GLint index;
GLint sz;
attr_func func;
 };
@@ -127,7 +127,7 @@ loopback_prim(struct gl_context *ctx,
   const GLfloat *tmp = data + la[0].sz;

   for (k = 1; k < nr; k++) {
- la[k].func(ctx, la[k].target, tmp);
+ la[k].func(ctx, la[k].index, tmp);
  tmp += la[k].sz;
   }

@@ -184,7 +184,7 @@ vbo_loopback_vertex_list(struct gl_context *ctx,
 */
for (i = 0; i < VBO_ATTRIB_MAX; i++) {
   if (attrsz[i]) {
- la[nr].target = i;
+ la[nr].index = i;
  la[nr].sz = attrsz[i];
  la[nr].func = vert_attrfunc[attrsz[i]-1];
  nr++;
--
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] st/mesa: remove unused st_framebuffer::Private field

2017-07-07 Thread Charmaine Lee

Reviewed-by: Charmaine Lee 

From: Brian Paul 
Sent: Friday, July 7, 2017 7:11 AM
To: mesa-dev@lists.freedesktop.org
Cc: Charmaine Lee; Neha Bhende
Subject: [PATCH] st/mesa: remove unused st_framebuffer::Private field

---
 src/mesa/state_tracker/st_context.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/mesa/state_tracker/st_context.h 
b/src/mesa/state_tracker/st_context.h
index 5c7c58d..af9149e 100644
--- a/src/mesa/state_tracker/st_context.h
+++ b/src/mesa/state_tracker/st_context.h
@@ -295,7 +295,6 @@ static inline struct st_context *st_context(struct 
gl_context *ctx)
 struct st_framebuffer
 {
struct gl_framebuffer Base;
-   void *Private;

struct st_framebuffer_iface *iface;
enum st_attachment_type statts[ST_ATTACHMENT_COUNT];
--
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] st/dri: always initialize 'opencl_func_mutex' in struct dri_screen

2017-07-07 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Thu, Jul 6, 2017 at 10:28 PM, Aleksander Morgado
 wrote:
> The 'opencl_func_mutex' is destroyed in dri_destroy_screen_helper()
> unconditionally, so make sure it is always initialized to avoid any
> undefined behaviour.
>
> Signed-off-by: Aleksander Morgado 
> ---
>  src/gallium/state_trackers/dri/dri2.c  | 1 +
>  src/gallium/state_trackers/dri/drisw.c | 1 +
>  2 files changed, 2 insertions(+)
>
> diff --git a/src/gallium/state_trackers/dri/dri2.c 
> b/src/gallium/state_trackers/dri/dri2.c
> index 60ec38d8e4..f089dfbbed 100644
> --- a/src/gallium/state_trackers/dri/dri2.c
> +++ b/src/gallium/state_trackers/dri/dri2.c
> @@ -2121,6 +2121,7 @@ dri_kms_init_screen(__DRIscreen * sPriv)
>
> screen->sPriv = sPriv;
> screen->fd = sPriv->fd;
> +   (void) mtx_init(>opencl_func_mutex, mtx_plain);
>
> sPriv->driverPrivate = (void *)screen;
>
> diff --git a/src/gallium/state_trackers/dri/drisw.c 
> b/src/gallium/state_trackers/dri/drisw.c
> index 189d61c4cb..83de388b31 100644
> --- a/src/gallium/state_trackers/dri/drisw.c
> +++ b/src/gallium/state_trackers/dri/drisw.c
> @@ -393,6 +393,7 @@ drisw_init_screen(__DRIscreen * sPriv)
>
> screen->sPriv = sPriv;
> screen->fd = -1;
> +   (void) mtx_init(>opencl_func_mutex, mtx_plain);
>
> swrast_no_present = debug_get_option_swrast_no_present();
>
> --
> 2.13.1
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] st/mesa: remove unused st_framebuffer::Private field

2017-07-07 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Fri, Jul 7, 2017 at 4:11 PM, Brian Paul  wrote:
> ---
>  src/mesa/state_tracker/st_context.h | 1 -
>  1 file changed, 1 deletion(-)
>
> diff --git a/src/mesa/state_tracker/st_context.h 
> b/src/mesa/state_tracker/st_context.h
> index 5c7c58d..af9149e 100644
> --- a/src/mesa/state_tracker/st_context.h
> +++ b/src/mesa/state_tracker/st_context.h
> @@ -295,7 +295,6 @@ static inline struct st_context *st_context(struct 
> gl_context *ctx)
>  struct st_framebuffer
>  {
> struct gl_framebuffer Base;
> -   void *Private;
>
> struct st_framebuffer_iface *iface;
> enum st_attachment_type statts[ST_ATTACHMENT_COUNT];
> --
> 1.9.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] [rfc] radv: offset images by a differing amount.

2017-07-07 Thread Marek Olšák
On Fri, Jul 7, 2017 at 11:18 AM, Christian König
 wrote:
> What tilling format have the destination textures?
>
> Sounds like the offset is just added so that we distribute memory accesses
> more equally over memory channels.

You can't set an offset that is not aligned. The hardware ignores the
low unaligned bits, so they have a different meaning. They specify
pipe and bank rotation for macro tiling. It's like a state. It
basically rotates the tile pattern.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Intel-gfx] [PATCH 3/3] intel: Make driver aware of MOCS table version

2017-07-07 Thread Jason Ekstrand
On Thu, Jul 6, 2017 at 4:27 PM, Ben Widawsky  wrote:

> We don't yet have optimal MOCS settings, but we have enough to know how
> to at least determine when we might have non-optimal settings within our
> driver.
>
> Signed-off-by: Ben Widawsky 
> ---
>  src/intel/vulkan/anv_device.c | 12 
>  src/intel/vulkan/anv_private.h|  2 ++
>  src/mesa/drivers/dri/i915/intel_context.c |  7 ++-
>  src/mesa/drivers/dri/i965/intel_screen.c  | 14 ++
>  src/mesa/drivers/dri/i965/intel_screen.h  |  2 ++
>  5 files changed, 36 insertions(+), 1 deletion(-)
>
> diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
> index 3dc55dbb8d..8e180dbf18 100644
> --- a/src/intel/vulkan/anv_device.c
> +++ b/src/intel/vulkan/anv_device.c
> @@ -368,6 +368,18 @@ anv_physical_device_init(struct anv_physical_device
> *device,
>   device->info.max_cs_threads = max_cs_threads;
> }
>
> +   if (device->info.gen >= 9) {
> +  device->mocs_version = anv_gem_get_param(fd,
> +
>  I915_PARAM_MOCS_TABLE_VERSION);
> +  switch (device->mocs_version) {
> +  default:
> + anv_perf_warn("Kernel exposes newer MOCS table\n");
>

A perf_warn here seems reasonable though it makes more sense to me to make
it

if (device->mocs_version > ANV_MAX_KNOWN_MOCS_VERSION)
   anv_perf_warn("...");


> +  case 1:
> +  case 0:
> + device->mocs_version = MOCS_TABLE_VERSION;
>

Why are we stomping device->mocs_version to MOCS_TABLE_VERSION?  Are you
just trying to avoid the version 0?  If so, why not just have

/* If the MOCS_TABLE_VERSION query fails, assume version 1 */
if (device->mocs_version == 0)
   device->mocs_version = 1;

I don't think we want to have it dependent on a #define in an external
header file.  What if someone updates it for i965 and doesn't update anv or
vice-versa?


> +  }
> +   }
> +
> brw_process_intel_debug_variable();
>
> device->compiler = brw_compiler_create(NULL, >info);
> diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_
> private.h
> index 573778dad5..b8241a9b22 100644
> --- a/src/intel/vulkan/anv_private.h
> +++ b/src/intel/vulkan/anv_private.h
> @@ -684,6 +684,8 @@ struct anv_physical_device {
>  uint32_teu_total;
>  uint32_tsubslice_total;
>
> +uint8_t mocs_version;
> +
>  struct {
>uint32_t  type_count;
>struct anv_memory_type
> types[VK_MAX_MEMORY_TYPES];
> diff --git a/src/mesa/drivers/dri/i915/intel_context.c
> b/src/mesa/drivers/dri/i915/intel_context.c
> index e0766a0e3f..9169ea650e 100644
> --- a/src/mesa/drivers/dri/i915/intel_context.c
> +++ b/src/mesa/drivers/dri/i915/intel_context.c
> @@ -521,8 +521,13 @@ intelInitContext(struct intel_context *intel,
> INTEL_DEBUG = parse_debug_string(getenv("INTEL_DEBUG"),
> debug_control);
> if (INTEL_DEBUG & DEBUG_BUFMGR)
>dri_bufmgr_set_debug(intel->bufmgr, true);
> -   if (INTEL_DEBUG & DEBUG_PERF)
> +   if (INTEL_DEBUG & DEBUG_PERF) {
>intel->perf_debug = true;
> +  if (screen->mocs_version > MOCS_TABLE_VERSION) {
> + fprintf(stderr, "Kernel exposes newer MOCS table\n");
> + screen->mocs_version = MOCS_TABLE_VERSION;
> +  }
> +   }
>
> if (INTEL_DEBUG & DEBUG_AUB)
>drm_intel_bufmgr_gem_set_aub_dump(intel->bufmgr, true);
> diff --git a/src/mesa/drivers/dri/i965/intel_screen.c
> b/src/mesa/drivers/dri/i965/intel_screen.c
> index c75f2125d4..c53f133d49 100644
> --- a/src/mesa/drivers/dri/i965/intel_screen.c
> +++ b/src/mesa/drivers/dri/i965/intel_screen.c
> @@ -2301,6 +2301,20 @@ __DRIconfig **intelInitScreen2(__DRIscreen
> *dri_screen)
>   (ret != -1 || errno != EINVAL);
> }
>
> +   if (devinfo->gen >= 9) {
> +  screen->mocs_version = intel_get_integer(screen,
> +
>  I915_PARAM_MOCS_TABLE_VERSION);
> +  switch (screen->mocs_version) {
> +  case 1:
> +  case 0:
> + screen->mocs_version = MOCS_TABLE_VERSION;
>

Same comments apply here.


> + break;
> +  default:
> + /* We want to perf debug, but we can't yet */
> + break;
> +  }
> +   }
> +
> dri_screen->extensions = !screen->has_context_reset_notification
>? screenExtensions : intelRobustScreenExtensions;
>
> diff --git a/src/mesa/drivers/dri/i965/intel_screen.h
> b/src/mesa/drivers/dri/i965/intel_screen.h
> index f78b3e8f74..eb801f8155 100644
> --- a/src/mesa/drivers/dri/i965/intel_screen.h
> +++ b/src/mesa/drivers/dri/i965/intel_screen.h
> @@ -112,6 +112,8 @@ struct intel_screen
> bool mesa_format_supports_texture[MESA_FORMAT_COUNT];
> bool mesa_format_supports_render[MESA_FORMAT_COUNT];
> enum isl_format mesa_to_isl_render_format[MESA_FORMAT_COUNT];
> +
> +   unsigned mocs_version;
>  };
>
>  extern void intelDestroyContext(__DRIcontext 

Re: [Mesa-dev] [Intel-gfx] [PATCH 1/1] drm/i915: Version the MOCS settings

2017-07-07 Thread Jason Ekstrand
On Fri, Jul 7, 2017 at 3:34 AM, Chris Wilson 
wrote:

> Quoting Ben Widawsky (2017-07-07 00:27:01)
> >  drivers/gpu/drm/i915/i915_drv.c |  3 +++
> >  drivers/gpu/drm/i915/i915_drv.h |  2 ++
>   drivers/gpu/drm/i915/i915_pci.c | 13 +
> >  include/uapi/drm/i915_drm.h |  8 
> >  4 files changed, 22 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_drv.c
> b/drivers/gpu/drm/i915/i915_drv.c
> > index 9167a73f3c69..26c27b6ae814 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.c
> > +++ b/drivers/gpu/drm/i915/i915_drv.c
> > @@ -401,6 +401,9 @@ static int i915_getparam(struct drm_device *dev,
> void *data,
> > if (!value)
> > return -ENODEV;
> > break;
> > +   case I915_PARAM_MOCS_TABLE_VERSION:
> > +   value = INTEL_INFO(dev_priv)->mocs_version;
>
> If we use intel_mocs_get_table_version() we can put this magic number
> in intel_mocs.c next to the tables, where we can keep its history and
> hopefully be able to remember to update it.
>
> > +/* What version of the MOCS table we have. For GEN9 GPUs, the PRM
> defined
> > + * non-optimal settings for the MOCS table. As a result, we were
> required to use a
> > + * small subset, and later add new settings. This param allows
> userspace to
> > + * determine which settings are there.
> > + */
> > +#define MOCS_TABLE_VERSION   1 /* Build time MOCS table
> version */
>
> How are you planing to share this? When we update we bump this number,
> and then mesa copies it across and uses it after verifying it as 0,1 on
> an old kernel.
>

Agreed.  I don't see how having a #define for compile-time mocs version is
useful.  The compile-time version doesn't really matter and we wouldn't
want to use that in i965/anv anyway (more on that in the other patch).


> I don't think you want to expose the updated constant here, but symbolic
> names for each version? (What would be the point?)
>
> Next question, why a version number and not just the number of entries
> defined? Each index is defined by ABI once assigned, so the number of
> entries still operates as a version number and allows easy checking.
>
> if (advanced_cacheing_idx < kernel_max_mocs)
> return advanced_cacheing_idx;
> if (default_cacheing_idx < kernel_max_mocs)
> return default_cacheing_idx;
>
> return follow_pte_idx;
>
> give or take the smarts to choose the preferred indices for any
> particular scenario.
>

I'll have to think about it a bit more but this sounds like a fairly good
idea.  I see two major benefits:

 1. The kernel can return ARRAY_SIZE(mocs_table_for_your_gen) and we will
never forget to update it.
 2. It makes the "does this MOCS value exist" check much easier.  I imagine
future userspace code which chooses mocs values having some sort of "try
and fall back" approach to making MOCS choices and this would be convenient.

That said, having it be a version may have it's advantages, I just don't
know what they are yet.

--Jason
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] glsl: xfb_stride applies to buffers, not block members

2017-07-07 Thread Juan A. Suarez Romero
When we have an interface block like:

layout (xfb_buffer = 0, xfb_offset = 0) out Block {
 vec4 var1;
layout (xfb_stride = 48) vec4 var2;
 vec4 var3;
};

According to ARB_enhanced_layouts spec:

   "The *xfb_stride* qualifier specifies how many bytes are consumed by
each captured vertex.  It applies to the transform feedback buffer
for that declaration, whether it is inherited or explicitly
declared. It can be applied to variables, blocks, block members, or
just the qualifier out. [ ...] While *xfb_stride* can be declared
multiple times for the same buffer, it is a compile-time or
link-time error to have different values specified for the stride
for the same buffer."

This means xfb_stride actually applies to the buffer, and not to the
individual components.

In the above example, it means that var2 consumes 16 bytes, and var3 is
at offset 32.

This has been confirmed also by John Kessenich, the main contact for the
ARB_enhanced_layouts specs, and also because this commit fixes:

GL45.enhanced_layouts.xfb_block_member_stride

This commit is in practice a revert of 598790e8564 (glsl: apply
xfb_stride to implicit offsets for ifc block members).
---
 src/compiler/glsl/ast_to_hir.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp
index c338ad7..3968657 100644
--- a/src/compiler/glsl/ast_to_hir.cpp
+++ b/src/compiler/glsl/ast_to_hir.cpp
@@ -7372,14 +7372,13 @@ ast_process_struct_or_iface_block_members(exec_list 
*instructions,
qual->offset, _offset)) {
fields[i].offset = xfb_offset;
block_xfb_offset = fields[i].offset +
-  MAX2(xfb_stride, (int) (4 * field_type->component_slots()));
+  4 * field_type->component_slots();
 }
  } else {
 if (layout && layout->flags.q.explicit_xfb_offset) {
unsigned align = field_type->is_64bit() ? 8 : 4;
fields[i].offset = glsl_align(block_xfb_offset, align);
-   block_xfb_offset +=
-  MAX2(xfb_stride, (int) (4 * field_type->component_slots()));
+   block_xfb_offset += 4 * field_type->component_slots();
 }
  }
 
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Review request: [PATCH] Fix grabbing the wrong variant if glDrawPixels is called

2017-07-07 Thread Ilia Mirkin
Hi Matias,

You may want to review

https://www.mesa3d.org/submittingpatches.html

Particularly the "mailing patches" bit of it.

Cheers,

  -ilia


On Fri, Jul 7, 2017 at 11:41 AM, Matias N. Goldberg
 wrote:
> Hi!
>
> I just subscribed to this dev list.
>
>
> I wrote this patch (copy at the end of this email)
>
> https://bugs.freedesktop.org/attachment.cgi?id=132462=edit
>
> in order to fix bug Bug 101596 - Blender renders black UI elements
> (https://bugs.freedesktop.org/show_bug.cgi?id=101596)
>
> Note that this bug may not only affect Mesa.
>
>
> I am asking for this patch to be reviewed for inclusion in Mesa.
>
>
> Thanks
>
> Matias
>
>
> From 3db888f8645acd5d41b689ee6522d465bcf71044 Mon Sep 17 00:00:00 2001
> Message-Id:
> <3db888f8645acd5d41b689ee6522d465bcf71044.1499274200.git.dark_syl...@yahoo.com.ar>
> From: "Matias N. Goldberg" 
> Date: Wed, 5 Jul 2017 14:02:50 -0300
> Subject: [PATCH] Fix grabbing the wrong variant if glDrawPixels is called
>
> By design pixel shaders can have up to 3 variants:
> * The standard one.
> * glDrawPixels variant.
> * glBitmap variant.
> However "shader_has_one_variant" ignores this fact, and therefore
> st_update_fp would select the wrong variant if glDrawPixels or glBitmap
> was ever called.
>
> This patch fixes the problem. If the standard variant has been created,
> calling glDrawPixels or glBitmap will append the variant to the second
> entry of the linked list, so that st_update_fp still selects the right
> one if shader_has_one_variant is set.
>
> If the standard variant hasn't been created yet and glDrawPixel/Bitmap
> has been called, st_update_fp will will see this and take the slow path
> instead. The standard variant will then be added at the front of the
> linked list, so that the next time the fast path is taken.
>
> Blender in particular is hit by this bug.
>
> Fixes https://bugs.freedesktop.org/show_bug.cgi?id=101596
> ---
>  src/mesa/state_tracker/st_atom_shader.c |  4 +++-
>  src/mesa/state_tracker/st_program.c | 23 ---
>  2 files changed, 23 insertions(+), 4 deletions(-)
>
> diff --git a/src/mesa/state_tracker/st_atom_shader.c
> b/src/mesa/state_tracker/st_atom_shader.c
> index c1869d323b..07cf54f555 100644
> --- a/src/mesa/state_tracker/st_atom_shader.c
> +++ b/src/mesa/state_tracker/st_atom_shader.c
> @@ -108,7 +108,9 @@ st_update_fp( struct st_context *st )
> if (st->shader_has_one_variant[MESA_SHADER_FRAGMENT] &&
> !stfp->ati_fs && /* ATI_fragment_shader always has multiple variants
> */
> !stfp->Base.ExternalSamplersUsed && /* external samplers need
> variants */
> -   stfp->variants) {
> +   stfp->variants &&
> +   !stfp->variants->key.drawpixels &&
> +   !stfp->variants->key.bitmap ) {
>shader = stfp->variants->driver_shader;
> } else {
>memset(, 0, sizeof(key));
> diff --git a/src/mesa/state_tracker/st_program.c
> b/src/mesa/state_tracker/st_program.c
> index 6de61741dc..86faf5982d 100644
> --- a/src/mesa/state_tracker/st_program.c
> +++ b/src/mesa/state_tracker/st_program.c
> @@ -1322,9 +1322,26 @@ st_get_fp_variant(struct st_context *st,
>/* create new */
>fpv = st_create_fp_variant(st, stfp, key);
>if (fpv) {
> - /* insert into list */
> - fpv->next = stfp->variants;
> - stfp->variants = fpv;
> + if( key->bitmap || key->drawpixels ) {
> +/* Regular variants should always come before the
> +   bitmap & drawpixels variants, (unless there
> +   are no regular variants) so that
> +   st_update_fp can take a fast path when
> +   shader_has_one_variant is set.
> +*/
> +/* insert into list */
> +if( !stfp->variants ) {
> +   fpv->next = stfp->variants;
> +   stfp->variants = fpv;
> +} else {
> +   fpv->next = stfp->variants->next;
> +   stfp->variants->next = fpv;
> +}
> + } else {
> +/* insert into list */
> +fpv->next = stfp->variants;
> +stfp->variants = fpv;
> + }
>}
> }
>
> --
> 2.11.0
>
>
> IMPORTANT: The information contained in this email may be commercially
> sensitive and/or legally privileged. It is intended solely for the person(s)
> to whom it is addressed. If the reader of this message is not the intended
> recipient, you are on notice of its status and hereby notified that your
> access is unauthorized, and any review, dissemination, distribution,
> disclose or copying of this message including any attachments is strictly
> prohibited. Please notify the sender immediately by reply e-mail and then
> delete this message from your system.
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>

[Mesa-dev] Review request: [PATCH] Fix grabbing the wrong variant if glDrawPixels is called

2017-07-07 Thread Matias N. Goldberg
Hi!I just subscribed to this dev list.
I wrote this patch (copy at the end of this 
email)https://bugs.freedesktop.org/attachment.cgi?id=132462=edit
in order to fix bug Bug 101596 - Blender renders black UI elements 
(https://bugs.freedesktop.org/show_bug.cgi?id=101596)Note that this bug may not 
only affect Mesa.
I am asking for this patch to be reviewed for inclusion in Mesa.

Thanks
Matias

>From 3db888f8645acd5d41b689ee6522d465bcf71044 Mon Sep 17 00:00:00 2001
Message-Id: 
<3db888f8645acd5d41b689ee6522d465bcf71044.1499274200.git.dark_syl...@yahoo.com.ar>
From: "Matias N. Goldberg" 
Date: Wed, 5 Jul 2017 14:02:50 -0300
Subject: [PATCH] Fix grabbing the wrong variant if glDrawPixels is called

By design pixel shaders can have up to 3 variants:
* The standard one.
* glDrawPixels variant.
* glBitmap variant.
However "shader_has_one_variant" ignores this fact, and therefore
st_update_fp would select the wrong variant if glDrawPixels or glBitmap
was ever called.

This patch fixes the problem. If the standard variant has been created,
calling glDrawPixels or glBitmap will append the variant to the second
entry of the linked list, so that st_update_fp still selects the right
one if shader_has_one_variant is set.

If the standard variant hasn't been created yet and glDrawPixel/Bitmap
has been called, st_update_fp will will see this and take the slow path
instead. The standard variant will then be added at the front of the
linked list, so that the next time the fast path is taken.

Blender in particular is hit by this bug.

Fixes https://bugs.freedesktop.org/show_bug.cgi?id=101596
---
 src/mesa/state_tracker/st_atom_shader.c |  4 +++-
 src/mesa/state_tracker/st_program.c | 23 ---
 2 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/src/mesa/state_tracker/st_atom_shader.c 
b/src/mesa/state_tracker/st_atom_shader.c
index c1869d323b..07cf54f555 100644
--- a/src/mesa/state_tracker/st_atom_shader.c
+++ b/src/mesa/state_tracker/st_atom_shader.c
@@ -108,7 +108,9 @@ st_update_fp( struct st_context *st )
if (st->shader_has_one_variant[MESA_SHADER_FRAGMENT] &&
!stfp->ati_fs && /* ATI_fragment_shader always has multiple variants */
!stfp->Base.ExternalSamplersUsed && /* external samplers need variants 
*/
-   stfp->variants) {
+   stfp->variants &&
+   !stfp->variants->key.drawpixels &&
+   !stfp->variants->key.bitmap ) {
   shader = stfp->variants->driver_shader;
} else {
   memset(, 0, sizeof(key));
diff --git a/src/mesa/state_tracker/st_program.c 
b/src/mesa/state_tracker/st_program.c
index 6de61741dc..86faf5982d 100644
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/state_tracker/st_program.c
@@ -1322,9 +1322,26 @@ st_get_fp_variant(struct st_context *st,
   /* create new */
   fpv = st_create_fp_variant(st, stfp, key);
   if (fpv) {
- /* insert into list */
- fpv->next = stfp->variants;
- stfp->variants = fpv;
+ if( key->bitmap || key->drawpixels ) {
+/* Regular variants should always come before the
+   bitmap & drawpixels variants, (unless there
+   are no regular variants) so that
+   st_update_fp can take a fast path when
+   shader_has_one_variant is set.
+*/
+/* insert into list */
+if( !stfp->variants ) {
+   fpv->next = stfp->variants;
+   stfp->variants = fpv;
+} else {
+   fpv->next = stfp->variants->next;
+   stfp->variants->next = fpv;
+}
+ } else {
+/* insert into list */
+fpv->next = stfp->variants;
+stfp->variants = fpv;
+ }
   }
}
 
-- 
2.11.0 IMPORTANT: The information contained in this email may be commercially 
sensitive and/or legally privileged. It is intended solely for the person(s) to 
whom it is addressed. If the reader of this message is not the intended 
recipient, you are on notice of its status and hereby notified that your access 
is unauthorized, and any review, dissemination, distribution, disclose or 
copying of this message including any attachments is strictly prohibited. 
Please notify the sender immediately by reply e-mail and then delete this 
message from your system.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2] i965: Resolve framebuffers before signaling the fence

2017-07-07 Thread Chris Wilson
>From KHR_fence_sync:

  When the condition of the sync object is satisfied by the fence
  command, the sync is signaled by the associated client API context,
  causing any eglClientWaitSyncKHR commands (see below) blocking on
   to unblock. The only condition currently supported is
  EGL_SYNC_PRIOR_COMMANDS_COMPLETE_KHR, which is satisfied by
  completion of the fence command corresponding to the sync object,
  and all preceding commands in the associated client API context's
  command stream. The sync object will not be signaled until all
  effects from these commands on the client API's internal and
  framebuffer state are fully realized. No other state is affected by
  execution of the fence command.

If clients are passing the fence fd (from EGL_ANDROID_native_fence_sync)
to a compositor, that fence must only be signaled once the framebuffer
is resolved and not before as is currently the case.

v2: fixup assert to use GL_SYNC_GPU_COMMANDS_COMPLETE (Chad)

Reported-by: Sergi Granell 
Fixes: c636284ee8ee ("i965/sync: Implement DRI2_Fence extension")
Signed-off-by: Chris Wilson 
Cc: Sergi Granell 
Cc: Rob Clark 
Cc: Chad Versace 
Cc: Daniel Stone 
Cc: Kenneth Graunke 
Reviewed-by: Chad Versace 
---
 src/mesa/drivers/dri/i965/brw_sync.c | 30 ++
 1 file changed, 30 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_sync.c 
b/src/mesa/drivers/dri/i965/brw_sync.c
index a8356c304f..c78d670ed0 100644
--- a/src/mesa/drivers/dri/i965/brw_sync.c
+++ b/src/mesa/drivers/dri/i965/brw_sync.c
@@ -110,6 +110,34 @@ brw_fence_finish(struct brw_fence *fence)
 static bool MUST_CHECK
 brw_fence_insert_locked(struct brw_context *brw, struct brw_fence *fence)
 {
+   __DRIcontext *driContext = brw->driContext;
+   __DRIdrawable *driDrawable = driContext->driDrawablePriv;
+
+   /*
+* From KHR_fence_sync:
+*
+*   When the condition of the sync object is satisfied by the fence
+*   command, the sync is signaled by the associated client API context,
+*   causing any eglClientWaitSyncKHR commands (see below) blocking on
+*to unblock. The only condition currently supported is
+*   EGL_SYNC_PRIOR_COMMANDS_COMPLETE_KHR, which is satisfied by
+*   completion of the fence command corresponding to the sync object,
+*   and all preceding commands in the associated client API context's
+*   command stream. The sync object will not be signaled until all
+*   effects from these commands on the client API's internal and
+*   framebuffer state are fully realized. No other state is affected by
+*   execution of the fence command.
+*
+* Note the emphasis there on ensuring that the framebuffer is fully
+* realised before the fence is signal. We cannot just flush the batch,
+* but must also resolve the drawable first. The importance of this is,
+* for example, in creating a fence for a frame to be passed to a
+* remote compositor. Otherwise, the resolve will be in a following batch
+* (when the client finally calls SwapBuffers, or forces a resolve via
+* some other path) and the compositor may read the fraembuffer beforehand.
+*/
+   if (driDrawable)
+  intel_resolve_for_dri2_flush(brw, driDrawable);
brw_emit_mi_flush(brw);
 
switch (fence->type) {
@@ -335,6 +363,8 @@ brw_gl_fence_sync(struct gl_context *ctx, struct 
gl_sync_object *_sync,
struct brw_context *brw = brw_context(ctx);
struct brw_gl_sync *sync = (struct brw_gl_sync *) _sync;
 
+   assert(condition == GL_SYNC_GPU_COMMANDS_COMPLETE);
+
brw_fence_init(brw, >fence, BRW_FENCE_TYPE_BO_WAIT);
 
if (!brw_fence_insert_locked(brw, >fence)) {
-- 
2.13.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] i965: perf: use new subslices numbers from device info

2017-07-07 Thread Lionel Landwerlin
Signed-off-by: Lionel Landwerlin 
Reviewed-by: Ben Widawsky 
---
 src/mesa/drivers/dri/i965/brw_performance_query.c | 49 ---
 1 file changed, 17 insertions(+), 32 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c 
b/src/mesa/drivers/dri/i965/brw_performance_query.c
index a394bb7082e..768f37cc589 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_query.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
@@ -1826,26 +1826,25 @@ init_oa_sys_vars(struct brw_context *brw, const char 
*sysfs_dev_dir)
brw->perfquery.sys_vars.gt_min_freq = min_freq_mhz * 100;
brw->perfquery.sys_vars.gt_max_freq = max_freq_mhz * 100;
brw->perfquery.sys_vars.timestamp_frequency = devinfo->timestamp_frequency;
+   brw->perfquery.sys_vars.n_eu_slices = devinfo->num_slices;
+   /* Assuming uniform distribution of subslices per slices. */
+   brw->perfquery.sys_vars.n_eu_sub_slices = devinfo->num_subslices[0];
 
if (devinfo->is_haswell) {
+  brw->perfquery.sys_vars.slice_mask = 0;
+  brw->perfquery.sys_vars.subslice_mask = 0;
+
+  for (int s = 0; s < devinfo->num_slices; s++)
+ brw->perfquery.sys_vars.slice_mask |= 1U << s;
+  for (int ss = 0; ss < devinfo->num_subslices[0]; ss++)
+ brw->perfquery.sys_vars.subslice_mask |= 1U << ss;
+
   if (devinfo->gt == 1) {
  brw->perfquery.sys_vars.n_eus = 10;
- brw->perfquery.sys_vars.n_eu_slices = 1;
- brw->perfquery.sys_vars.n_eu_sub_slices = 1;
- brw->perfquery.sys_vars.slice_mask = 0x1;
- brw->perfquery.sys_vars.subslice_mask = 0x1;
   } else if (devinfo->gt == 2) {
  brw->perfquery.sys_vars.n_eus = 20;
- brw->perfquery.sys_vars.n_eu_slices = 1;
- brw->perfquery.sys_vars.n_eu_sub_slices = 2;
- brw->perfquery.sys_vars.slice_mask = 0x1;
- brw->perfquery.sys_vars.subslice_mask = 0x3;
   } else if (devinfo->gt == 3) {
  brw->perfquery.sys_vars.n_eus = 40;
- brw->perfquery.sys_vars.n_eu_slices = 2;
- brw->perfquery.sys_vars.n_eu_sub_slices = 2;
- brw->perfquery.sys_vars.slice_mask = 0x3;
- brw->perfquery.sys_vars.subslice_mask = 0xf;
   } else
  unreachable("not reached");
} else {
@@ -1854,29 +1853,15 @@ init_oa_sys_vars(struct brw_context *brw, const char 
*sysfs_dev_dir)
   int ret;
   int slice_mask = 0;
   int ss_mask = 0;
-  int s_max = devinfo->num_slices; /* maximum number of slices */
-  int ss_max = 0; /* maximum number of subslices per slice */
+  /* maximum number of slices */
+  int s_max = devinfo->num_slices;
+  /* maximum number of subslices per slice (assuming uniform subslices per
+   * slices)
+   */
+  int ss_max = devinfo->num_subslices[0];
   uint64_t subslice_mask = 0;
   int s;
 
-  if (devinfo->gen == 8) {
- if (devinfo->gt == 1) {
-ss_max = 2;
- } else {
-ss_max = 3;
- }
-  } else if (devinfo->gen == 9) {
- /* XXX: beware that the kernel (as of writing) actually works as if
-  * ss_max == 4 since the HW register that reports the global subslice
-  * mask has 4 bits while in practice the limit is 3. It's also
-  * important that we initialize $SubsliceMask with 3 bits per slice
-  * since that's what the counter availability expressions in XML
-  * expect.
-  */
- ss_max = 3;
-  } else
- return false;
-
   gp.param = I915_PARAM_SLICE_MASK;
   gp.value = _mask;
   ret = drmIoctl(screen->fd, DRM_IOCTL_I915_GETPARAM, );
-- 
2.13.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] intel: add number of subslices to device info

2017-07-07 Thread Lionel Landwerlin
We could have used a single integer to store that value, but
Cannonlake has different number of subslices per slice depending on
the GT.

v2: Add CFL subslice numbers (Lionel)

Signed-off-by: Lionel Landwerlin 
Reviewed-by: Ben Widawsky 
---
 src/intel/common/gen_device_info.c | 57 --
 src/intel/common/gen_device_info.h |  5 
 2 files changed, 54 insertions(+), 8 deletions(-)

diff --git a/src/intel/common/gen_device_info.c 
b/src/intel/common/gen_device_info.c
index f008b76ea47..c0eb7c3c356 100644
--- a/src/intel/common/gen_device_info.c
+++ b/src/intel/common/gen_device_info.c
@@ -21,15 +21,18 @@
  * IN THE SOFTWARE.
  */
 
+#include 
 #include 
 #include 
 #include "gen_device_info.h"
 #include "compiler/shader_enums.h"
+#include "util/macros.h"
 
 static const struct gen_device_info gen_device_info_i965 = {
.gen = 4,
.has_negative_rhw_bug = true,
.num_slices = 1,
+   .num_subslices = { 1, },
.num_thread_per_eu = 4,
.max_vs_threads = 16,
.max_gs_threads = 2,
@@ -47,6 +50,7 @@ static const struct gen_device_info gen_device_info_g4x = {
.has_surface_tile_offset = true,
.is_g4x = true,
.num_slices = 1,
+   .num_subslices = { 1, },
.num_thread_per_eu = 5,
.max_vs_threads = 32,
.max_gs_threads = 2,
@@ -63,6 +67,7 @@ static const struct gen_device_info gen_device_info_ilk = {
.has_compr4 = true,
.has_surface_tile_offset = true,
.num_slices = 1,
+   .num_subslices = { 1, },
.num_thread_per_eu = 6,
.max_vs_threads = 72,
.max_gs_threads = 32,
@@ -82,6 +87,7 @@ static const struct gen_device_info gen_device_info_snb_gt1 = 
{
.has_surface_tile_offset = true,
.needs_unlit_centroid_workaround = true,
.num_slices = 1,
+   .num_subslices = { 1, },
.num_thread_per_eu = 6, /* Not confirmed */
.max_vs_threads = 24,
.max_gs_threads = 21, /* conservative; 24 if rendering disabled. */
@@ -108,6 +114,7 @@ static const struct gen_device_info gen_device_info_snb_gt2 
= {
.has_surface_tile_offset = true,
.needs_unlit_centroid_workaround = true,
.num_slices = 1,
+   .num_subslices = { 1, },
.num_thread_per_eu = 6, /* Not confirmed */
.max_vs_threads = 60,
.max_gs_threads = 60,
@@ -137,6 +144,7 @@ static const struct gen_device_info gen_device_info_snb_gt2 
= {
 static const struct gen_device_info gen_device_info_ivb_gt1 = {
GEN7_FEATURES, .is_ivybridge = true, .gt = 1,
.num_slices = 1,
+   .num_subslices = { 1, },
.num_thread_per_eu = 6,
.l3_banks = 2,
.max_vs_threads = 36,
@@ -163,6 +171,7 @@ static const struct gen_device_info gen_device_info_ivb_gt1 
= {
 static const struct gen_device_info gen_device_info_ivb_gt2 = {
GEN7_FEATURES, .is_ivybridge = true, .gt = 2,
.num_slices = 1,
+   .num_subslices = { 1, },
.num_thread_per_eu = 8, /* Not sure why this isn't a multiple of
 * @max_wm_threads ... */
.l3_banks = 4,
@@ -190,6 +199,7 @@ static const struct gen_device_info gen_device_info_ivb_gt2 
= {
 static const struct gen_device_info gen_device_info_byt = {
GEN7_FEATURES, .is_baytrail = true, .gt = 1,
.num_slices = 1,
+   .num_subslices = { 1, },
.num_thread_per_eu = 8,
.l3_banks = 1,
.has_llc = false,
@@ -223,6 +233,7 @@ static const struct gen_device_info gen_device_info_byt = {
 static const struct gen_device_info gen_device_info_hsw_gt1 = {
HSW_FEATURES, .gt = 1,
.num_slices = 1,
+   .num_subslices = { 1, },
.num_thread_per_eu = 7,
.l3_banks = 2,
.max_vs_threads = 70,
@@ -249,6 +260,7 @@ static const struct gen_device_info gen_device_info_hsw_gt1 
= {
 static const struct gen_device_info gen_device_info_hsw_gt2 = {
HSW_FEATURES, .gt = 2,
.num_slices = 1,
+   .num_subslices = { 2, },
.num_thread_per_eu = 7,
.l3_banks = 4,
.max_vs_threads = 280,
@@ -275,6 +287,7 @@ static const struct gen_device_info gen_device_info_hsw_gt2 
= {
 static const struct gen_device_info gen_device_info_hsw_gt3 = {
HSW_FEATURES, .gt = 3,
.num_slices = 2,
+   .num_subslices = { 2, },
.num_thread_per_eu = 7,
.l3_banks = 8,
.max_vs_threads = 280,
@@ -318,6 +331,7 @@ static const struct gen_device_info gen_device_info_bdw_gt1 
= {
GEN8_FEATURES, .gt = 1,
.is_broadwell = true,
.num_slices = 1,
+   .num_subslices = { 2, },
.num_thread_per_eu = 7,
.l3_banks = 2,
.max_cs_threads = 42,
@@ -340,6 +354,7 @@ static const struct gen_device_info gen_device_info_bdw_gt2 
= {
GEN8_FEATURES, .gt = 2,
.is_broadwell = true,
.num_slices = 1,
+   .num_subslices = { 3, },
.num_thread_per_eu = 7,
.l3_banks = 4,
.max_cs_threads = 56,
@@ -362,6 +377,7 @@ static const struct gen_device_info gen_device_info_bdw_gt3 
= {
GEN8_FEATURES, .gt = 3,
.is_broadwell = true,
.num_slices = 2,
+   .num_subslices = { 3, 3, },
.num_thread_per_eu = 7,
.l3_banks = 8,

[Mesa-dev] [PATCH 1/3] i965: Use already existing eu_total

2017-07-07 Thread Lionel Landwerlin
From: Ben Widawsky 

Reduces IOCTL calls by 1, and provides a centralized place to override
such configurations if we have a need to do so.

Signed-off-by: Ben Widawsky 
Reviewed-by: Lionel Landwerlin 
---
 src/mesa/drivers/dri/i965/brw_performance_query.c | 9 +
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c 
b/src/mesa/drivers/dri/i965/brw_performance_query.c
index 81389dbd3e3..a394bb7082e 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_query.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
@@ -1852,7 +1852,6 @@ init_oa_sys_vars(struct brw_context *brw, const char 
*sysfs_dev_dir)
   __DRIscreen *screen = brw->screen->driScrnPriv;
   drm_i915_getparam_t gp;
   int ret;
-  int n_eus = 0;
   int slice_mask = 0;
   int ss_mask = 0;
   int s_max = devinfo->num_slices; /* maximum number of slices */
@@ -1878,12 +1877,6 @@ init_oa_sys_vars(struct brw_context *brw, const char 
*sysfs_dev_dir)
   } else
  return false;
 
-  gp.param = I915_PARAM_EU_TOTAL;
-  gp.value = _eus;
-  ret = drmIoctl(screen->fd, DRM_IOCTL_I915_GETPARAM, );
-  if (ret)
- return false;
-
   gp.param = I915_PARAM_SLICE_MASK;
   gp.value = _mask;
   ret = drmIoctl(screen->fd, DRM_IOCTL_I915_GETPARAM, );
@@ -1896,7 +1889,7 @@ init_oa_sys_vars(struct brw_context *brw, const char 
*sysfs_dev_dir)
   if (ret)
  return false;
 
-  brw->perfquery.sys_vars.n_eus = n_eus;
+  brw->perfquery.sys_vars.n_eus = brw->screen->eu_total;
   brw->perfquery.sys_vars.n_eu_slices = __builtin_popcount(slice_mask);
   brw->perfquery.sys_vars.slice_mask = slice_mask;
 
-- 
2.13.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/3] Intel: subslices/eus computation in perf query

2017-07-07 Thread Lionel Landwerlin
Hi,

This is an already reviewed series (through IRC), updating what I've
sent earlier this week [1], just to have people comment publicly. If
no comments in a couple of (working) days, I'll push it.

Cheers,

[1] : https://patchwork.freedesktop.org/series/26894/

Ben Widawsky (1):
  i965: Use already existing eu_total

Lionel Landwerlin (2):
  intel: add number of subslices to device info
  i965: perf: use new subslices numbers from device info

 src/intel/common/gen_device_info.c| 57 ++
 src/intel/common/gen_device_info.h|  5 ++
 src/mesa/drivers/dri/i965/brw_performance_query.c | 58 +++
 3 files changed, 72 insertions(+), 48 deletions(-)

--
2.13.2
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 10/11] egl/wayland: Remove duplicate wl_buffer creation code

2017-07-07 Thread Daniel Stone
Hi,

On 28 June 2017 at 16:35, Lucas Stach  wrote:
> Am Freitag, den 16.06.2017, 18:14 +0100 schrieb Daniel Stone:
>> if (dri2_dpy->capabilities & WL_DRM_CAPABILITY_PRIME) {
>> +  struct wl_drm *wl_drm =
>> + dri2_surf ? dri2_surf->wl_drm_wrapper : dri2_dpy->wl_drm;
>
> This and...
>
>>int stride, fd;
>>
>>dri2_dpy->image->queryImage(image, __DRI_IMAGE_ATTRIB_STRIDE, 
>> );
>>dri2_dpy->image->queryImage(image, __DRI_IMAGE_ATTRIB_FD, );
>> -  ret = wl_drm_create_prime_buffer(dri2_surf->wl_drm_wrapper,
>> -   fd, width, height, fourcc, 0, stride,
>> -   0, 0, 0, 0);
>> +  ret = wl_drm_create_prime_buffer(wl_drm, fd, width, height, fourcc, 0,
>> +   stride, 0, 0, 0, 0);
>>close(fd);
>> } else {
>> +  struct wl_drm *wl_drm =
>> + dri2_surf ? dri2_surf->wl_drm_wrapper : dri2_dpy->wl_drm;
>
> .. this can be moved out of the if/else path to get rid of even more
> duplication.

Sure. I deliberately left them lowered though, so it couldn't be in
scope in the dmabuf branch. Happy to change it if you feel strongly
about it though.

Cheers,
Daniel
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] Review request: [PATCH] Fix grabbing the wrong variant if glDrawPixels is called

2017-07-07 Thread Matias N. Goldberg
Hi!I just subscribed to this dev list.
I wrote this patch (copy at the end of this 
email)https://bugs.freedesktop.org/attachment.cgi?id=132462=edit
in order to fix bug Bug 101596 - Blender renders black UI elements 
(https://bugs.freedesktop.org/show_bug.cgi?id=101596)Note that this bug may not 
only affect Mesa.
I am asking for this patch to be reviewed for inclusion in Mesa.

Thanks
Matias

>From 3db888f8645acd5d41b689ee6522d465bcf71044 Mon Sep 17 00:00:00 2001
Message-Id: 
<3db888f8645acd5d41b689ee6522d465bcf71044.1499274200.git.dark_syl...@yahoo.com.ar>
From: "Matias N. Goldberg" 
Date: Wed, 5 Jul 2017 14:02:50 -0300
Subject: [PATCH] Fix grabbing the wrong variant if glDrawPixels is called

By design pixel shaders can have up to 3 variants:
* The standard one.
* glDrawPixels variant.
* glBitmap variant.
However "shader_has_one_variant" ignores this fact, and therefore
st_update_fp would select the wrong variant if glDrawPixels or glBitmap
was ever called.

This patch fixes the problem. If the standard variant has been created,
calling glDrawPixels or glBitmap will append the variant to the second
entry of the linked list, so that st_update_fp still selects the right
one if shader_has_one_variant is set.

If the standard variant hasn't been created yet and glDrawPixel/Bitmap
has been called, st_update_fp will will see this and take the slow path
instead. The standard variant will then be added at the front of the
linked list, so that the next time the fast path is taken.

Blender in particular is hit by this bug.

Fixes https://bugs.freedesktop.org/show_bug.cgi?id=101596
---
 src/mesa/state_tracker/st_atom_shader.c |  4 +++-
 src/mesa/state_tracker/st_program.c | 23 ---
 2 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/src/mesa/state_tracker/st_atom_shader.c 
b/src/mesa/state_tracker/st_atom_shader.c
index c1869d323b..07cf54f555 100644
--- a/src/mesa/state_tracker/st_atom_shader.c
+++ b/src/mesa/state_tracker/st_atom_shader.c
@@ -108,7 +108,9 @@ st_update_fp( struct st_context *st )
if (st->shader_has_one_variant[MESA_SHADER_FRAGMENT] &&
!stfp->ati_fs && /* ATI_fragment_shader always has multiple variants */
!stfp->Base.ExternalSamplersUsed && /* external samplers need variants 
*/
-   stfp->variants) {
+   stfp->variants &&
+   !stfp->variants->key.drawpixels &&
+   !stfp->variants->key.bitmap ) {
   shader = stfp->variants->driver_shader;
} else {
   memset(, 0, sizeof(key));
diff --git a/src/mesa/state_tracker/st_program.c 
b/src/mesa/state_tracker/st_program.c
index 6de61741dc..86faf5982d 100644
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/state_tracker/st_program.c
@@ -1322,9 +1322,26 @@ st_get_fp_variant(struct st_context *st,
   /* create new */
   fpv = st_create_fp_variant(st, stfp, key);
   if (fpv) {
- /* insert into list */
- fpv->next = stfp->variants;
- stfp->variants = fpv;
+ if( key->bitmap || key->drawpixels ) {
+/* Regular variants should always come before the
+   bitmap & drawpixels variants, (unless there
+   are no regular variants) so that
+   st_update_fp can take a fast path when
+   shader_has_one_variant is set.
+*/
+/* insert into list */
+if( !stfp->variants ) {
+   fpv->next = stfp->variants;
+   stfp->variants = fpv;
+} else {
+   fpv->next = stfp->variants->next;
+   stfp->variants->next = fpv;
+}
+ } else {
+/* insert into list */
+fpv->next = stfp->variants;
+stfp->variants = fpv;
+ }
   }
}
 
-- 
2.11.0

 IMPORTANT: The information contained in this email may be commercially 
sensitive and/or legally privileged. It is intended solely for the person(s) to 
whom it is addressed. If the reader of this message is not the intended 
recipient, you are on notice of its status and hereby notified that your access 
is unauthorized, and any review, dissemination, distribution, disclose or 
copying of this message including any attachments is strictly prohibited. 
Please notify the sender immediately by reply e-mail and then delete this 
message from your system.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] st/dri: add 32-bit RGBX/RGBA formats

2017-07-07 Thread Rob Herring
On Wed, Jul 5, 2017 at 5:14 PM, Chad Versace  wrote:
> On Fri 30 Jun 2017, Rob Herring wrote:
>> Add support for 32-bit RGBX/RGBA formats which are required for Android.
>>
>> The original patch (commit ccdcf91104a5) was reverted (commit
>> c0c6ca40a25e) in mesa as it broke GLX resulting in swapped colors. Based
>> on further investigation by Chad Versace, moving the RGBX/RGBA configs
>> to the end is enough to prevent breaking GLX.
>>
>> Cc: Marek Olšák 
>> Cc: Eric Anholt 
>> Cc: Chad Versace 
>> Cc: Mauro Rossi 
>> Signed-off-by: Rob Herring 
>> ---
>> I've tested only on Android and could use help testing with KDE which
>> broke last time. This has been done on the Intel driver and *should* be
>> okay, but maybe not.
>
> Should this patch also update the switch statement in
> dri2.c:dri2_drawable_get_buffers()? I think so, but am not certain.

I don't know. At least for Android, I think we'd always take the
dri_image_drawable_get_buffers path which already has the formats.

Rob
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] st/dri: add 32-bit RGBX/RGBA formats

2017-07-07 Thread Marek Olšák
On Thu, Jul 6, 2017 at 12:22 AM, Chad Versace  wrote:
> On Sat 01 Jul 2017, Marek Olšák wrote:
>> Hi Rob,
>>
>> It would be better to have a flag passed from libEGL to st/dri saying
>> that it's OK to expose those formats. I wouldn't like to have GLX
>> visuals that are unusable in practice because X doesn't support that
>> component ordering.
>
> At least on Intel, the equivalent patch exposed no new GLX visuals. When I
> investigated with gdb, Mesa's GLX code matches internal visual to X server
> visuals by examining just the bitsize of each channel, and chooses the first
> matching internal visual as the winner, ignoring all other visuals. As long as
> the BGRA and BGRX internal visuals precede the RGBA and RGBX ones, then the
> user never sees them.

OK, you've convinced me:

Reviewed-by: Marek Olšák 

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH 1/8] mesa: check for allocation failures in _mesa_new_texture_object()

2017-07-07 Thread Andres Gomez
On Fri, 2017-07-07 at 09:14 +0200, Samuel Pitoiset wrote:
> 
> On 07/06/2017 08:06 PM, Andres Gomez wrote:
> > It looks like we could want patches 1 and 3-8 from this series into
> > -stable (?)
> 
> These are not critical fixes, your call.

OK. I will leave them out by now, then.

Thanks for the feedback! ☺

-- 
Br,

Andres
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH 1/4] radeonsi: fix DCC fast clear for luminance and alpha formats

2017-07-07 Thread Andres Gomez
On Thu, 2017-07-06 at 23:24 +0200, Marek Olšák wrote:
> On Thu, Jul 6, 2017 at 8:24 PM, Andres Gomez  wrote:
> > Marek, would we want this series in -stable or we shouldn't bother ?
> 
> Don't bother. Patch 1 isn't that important. Other patches are for
> features not enabled in 17.1.

OK. Thanks for the feedback! ☺

-- 
Br,

Andres
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [Fwd: Re: [PATCH 1/2] intel/isl: Use uint64_t to store total surface size]

2017-07-07 Thread Andres Gomez
On Thu, 2017-07-06 at 18:21 +0100, Emil Velikov wrote:
> On 3 July 2017 at 21:14, Andres Gomez  wrote:
> > Actually, forgot to add -stable into CC.
> > 
> >  Forwarded Message 
> > From: Andres Gomez 
> > To: Anuj Phogat , mesa-dev@lists.freedesktop.org
> > Subject: Re: [Mesa-dev] [PATCH 1/2] intel/isl: Use uint64_t to store
> > total surface size
> > Date: Mon, 03 Jul 2017 23:02:31 +0300
> > 
> > It looks like we could want these 2 into -stable (?)
> > 
> 
> Shouldn't hurt, despite that most of the
> isl_surf_init/isl_surf_get_[a-z]_surf handling is a simple assert(ok).
> I'll leave the call to the experts, but my take is "don't bother".

OK, I'll wait to see if Anuj has anything to say before picking this
one, then.

Thanks for the feedback!

-- 
Br,

Andres
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH 1/3] mesa/main: Move NULL pointer check.

2017-07-07 Thread Andres Gomez
On Thu, 2017-07-06 at 18:02 +0100, Emil Velikov wrote:
> On 6 July 2017 at 15:35, Andres Gomez  wrote:
> > It looks like we could want this into -stable (?)
> > 
> 
> _mesa_update_draw_buffer_bounds is a no-op if the buffer pointer is NULL.
> There's no issue to fix, but making Coverity happy is not a bad idea either 
> ;-)

_mesa_update_draw_buffer_bounds is a no-op but, if I'm not mistaken,
_mesa_update_framebuffer could end in a crash.

In any case, I think I will pick this.

Thanks for the feedback, Emil ☺

-- 
Br,

Andres
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] st/mesa: remove unused st_framebuffer::Private field

2017-07-07 Thread Brian Paul
---
 src/mesa/state_tracker/st_context.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/mesa/state_tracker/st_context.h 
b/src/mesa/state_tracker/st_context.h
index 5c7c58d..af9149e 100644
--- a/src/mesa/state_tracker/st_context.h
+++ b/src/mesa/state_tracker/st_context.h
@@ -295,7 +295,6 @@ static inline struct st_context *st_context(struct 
gl_context *ctx)
 struct st_framebuffer
 {
struct gl_framebuffer Base;
-   void *Private;
 
struct st_framebuffer_iface *iface;
enum st_attachment_type statts[ST_ATTACHMENT_COUNT];
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] vbo: whitespace/formatting fixes in vbo_save_loopback.c

2017-07-07 Thread Brian Paul
Trivial.
---
 src/mesa/vbo/vbo_save_loopback.c | 116 +--
 1 file changed, 64 insertions(+), 52 deletions(-)

diff --git a/src/mesa/vbo/vbo_save_loopback.c b/src/mesa/vbo/vbo_save_loopback.c
index 7410f18..773af93 100644
--- a/src/mesa/vbo/vbo_save_loopback.c
+++ b/src/mesa/vbo/vbo_save_loopback.c
@@ -1,8 +1,8 @@
 /**
- * 
+ *
  * Copyright 2005 VMware, Inc.
  * All Rights Reserved.
- * 
+ *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
@@ -10,11 +10,11 @@
  * distribute, sub license, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
- * 
+ *
  * The above copyright notice and this permission notice (including the
  * next paragraph) shall be included in all copies or substantial portions
  * of the Software.
- * 
+ *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
@@ -22,7 +22,7 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
+ *
  **/
 
 #include 
@@ -37,33 +37,41 @@
 #include "vbo_context.h"
 
 
-typedef void (*attr_func)( struct gl_context *ctx, GLint target, const GLfloat 
* );
+typedef void (*attr_func)(struct gl_context *ctx, GLint target, const GLfloat 
*);
 
 
 /* This file makes heavy use of the aliasing of NV vertex attributes
  * with the legacy attributes, and also with ARB and Material
  * attributes as currently implemented.
  */
-static void VertexAttrib1fvNV(struct gl_context *ctx, GLint target, const 
GLfloat *v)
+static void
+VertexAttrib1fvNV(struct gl_context *ctx, GLint target, const GLfloat *v)
 {
CALL_VertexAttrib1fvNV(ctx->Exec, (target, v));
 }
 
-static void VertexAttrib2fvNV(struct gl_context *ctx, GLint target, const 
GLfloat *v)
+
+static void
+VertexAttrib2fvNV(struct gl_context *ctx, GLint target, const GLfloat *v)
 {
CALL_VertexAttrib2fvNV(ctx->Exec, (target, v));
 }
 
-static void VertexAttrib3fvNV(struct gl_context *ctx, GLint target, const 
GLfloat *v)
+
+static void
+VertexAttrib3fvNV(struct gl_context *ctx, GLint target, const GLfloat *v)
 {
CALL_VertexAttrib3fvNV(ctx->Exec, (target, v));
 }
 
-static void VertexAttrib4fvNV(struct gl_context *ctx, GLint target, const 
GLfloat *v)
+
+static void
+VertexAttrib4fvNV(struct gl_context *ctx, GLint target, const GLfloat *v)
 {
CALL_VertexAttrib4fvNV(ctx->Exec, (target, v));
 }
 
+
 static attr_func vert_attrfunc[4] = {
VertexAttrib1fvNV,
VertexAttrib2fvNV,
@@ -71,22 +79,26 @@ static attr_func vert_attrfunc[4] = {
VertexAttrib4fvNV
 };
 
+
 struct loopback_attr {
GLint target;
GLint sz;
attr_func func;
 };
 
-/* Don't emit ends and begins on wrapped primitives.  Don't replay
+
+/**
+ * Don't emit ends and begins on wrapped primitives.  Don't replay
  * wrapped vertices.  If we get here, it's probably because the
  * precalculated wrapping is wrong.
  */
-static void loopback_prim( struct gl_context *ctx,
-  const GLfloat *buffer,
-  const struct _mesa_prim *prim,
-  GLuint wrap_count,
-  GLuint vertex_size,
-  const struct loopback_attr *la, GLuint nr )
+static void
+loopback_prim(struct gl_context *ctx,
+  const GLfloat *buffer,
+  const struct _mesa_prim *prim,
+  GLuint wrap_count,
+  GLuint vertex_size,
+  const struct loopback_attr *la, GLuint nr)
 {
GLint start = prim->start;
GLint end = start + prim->count;
@@ -96,14 +108,13 @@ static void loopback_prim( struct gl_context *ctx,
 
if (0)
   printf("loopback prim %s(%s,%s) verts %d..%d\n",
-_mesa_lookup_prim_by_nr(prim->mode),
-prim->begin ? "begin" : "..",
-prim->end ? "end" : "..",
-start, 
-end);
+ _mesa_lookup_prim_by_nr(prim->mode),
+ prim->begin ? "begin" : "..",
+ prim->end ? "end" : "..",
+ start, end);
 
if (prim->begin) {
-  CALL_Begin(GET_DISPATCH(), ( prim->mode ));
+  CALL_Begin(GET_DISPATCH(), (prim->mode));
}
else {
   assert(start == 0);
@@ -112,17 +123,17 @@ static void loopback_prim( struct gl_context *ctx,
 
data = buffer + start * vertex_size;
 
-   for (j = start ; j < end ; j++) {
+   for (j = start; j < end; j++) {
  

[Mesa-dev] [PATCH 1/3] vbo: simplify vbo_save_NotifyBegin()

2017-07-07 Thread Brian Paul
This function always returned GL_TRUE.  Just make it a void function.
Remove unreachable code following the call to vbo_save_NotifyBegin()
in save_Begin() in dlist.c

There were some stale comments that no longer applied since an earlier
code refactoring.

No Piglit regressions.
---
 src/mesa/main/dlist.c   | 18 +-
 src/mesa/vbo/vbo.h  |  2 +-
 src/mesa/vbo/vbo_save_api.c |  7 +--
 3 files changed, 3 insertions(+), 24 deletions(-)

diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c
index 7e44054..9e817be 100644
--- a/src/mesa/main/dlist.c
+++ b/src/mesa/main/dlist.c
@@ -5766,25 +5766,9 @@ save_Begin(GLenum mode)
   _mesa_compile_error(ctx, GL_INVALID_OPERATION, "recursive glBegin");
}
else {
-  Node *n;
-
   ctx->Driver.CurrentSavePrimitive = mode;
 
-  /* Give the driver an opportunity to hook in an optimized
-   * display list compiler.
-   */
-  if (vbo_save_NotifyBegin(ctx, mode))
- return;
-
-  SAVE_FLUSH_VERTICES(ctx);
-  n = alloc_instruction(ctx, OPCODE_BEGIN, 1);
-  if (n) {
- n[1].e = mode;
-  }
-
-  if (ctx->ExecuteFlag) {
- CALL_Begin(ctx->Exec, (mode));
-  }
+  vbo_save_NotifyBegin(ctx, mode);
}
 }
 
diff --git a/src/mesa/vbo/vbo.h b/src/mesa/vbo/vbo.h
index eec484b..c8e87d3 100644
--- a/src/mesa/vbo/vbo.h
+++ b/src/mesa/vbo/vbo.h
@@ -90,7 +90,7 @@ vbo_initialize_save_dispatch(const struct gl_context *ctx,
 
 void vbo_exec_FlushVertices(struct gl_context *ctx, GLuint flags);
 void vbo_save_SaveFlushVertices(struct gl_context *ctx);
-GLboolean vbo_save_NotifyBegin(struct gl_context *ctx, GLenum mode);
+void vbo_save_NotifyBegin(struct gl_context *ctx, GLenum mode);
 void vbo_save_NewList(struct gl_context *ctx, GLuint list, GLenum mode);
 void vbo_save_EndList(struct gl_context *ctx);
 void vbo_save_BeginCallList(struct gl_context *ctx, struct gl_display_list 
*list);
diff --git a/src/mesa/vbo/vbo_save_api.c b/src/mesa/vbo/vbo_save_api.c
index a0735f6..a42a3c3 100644
--- a/src/mesa/vbo/vbo_save_api.c
+++ b/src/mesa/vbo/vbo_save_api.c
@@ -1035,7 +1035,7 @@ _save_CallLists(GLsizei n, GLenum type, const GLvoid * v)
  * Called when a glBegin is getting compiled into a display list.
  * Updating of ctx->Driver.CurrentSavePrimitive is already taken care of.
  */
-GLboolean
+void
 vbo_save_NotifyBegin(struct gl_context *ctx, GLenum mode)
 {
struct vbo_save_context *save = _context(ctx)->save;
@@ -1064,11 +1064,6 @@ vbo_save_NotifyBegin(struct gl_context *ctx, GLenum mode)
 
/* We need to call vbo_save_SaveFlushVertices() if there's state change */
ctx->Driver.SaveNeedFlush = GL_TRUE;
-
-   /* GL_TRUE means we've handled this glBegin here; don't compile a BEGIN
-* opcode into the display list.
-*/
-   return GL_TRUE;
 }
 
 
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] mesa: finish implementing glPrimitiveRestartNV() for display lists

2017-07-07 Thread Brian Paul
If we try to build a display list with just a glPrimitiveRestartNV()
call, we'd crash because of a null GLvertexformat::PrimitiveRestartNV
pointer.  This change fixes that case.

The previous patch fixed the case of calling glPrimitiveRestartNV()
inside a glBegin/End pair.
---
 src/mesa/main/dlist.c | 27 ++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c
index 9e817be..6e334fd 100644
--- a/src/mesa/main/dlist.c
+++ b/src/mesa/main/dlist.c
@@ -325,7 +325,8 @@ typedef enum
OPCODE_STENCIL_FUNC_SEPARATE,
OPCODE_STENCIL_OP_SEPARATE,
OPCODE_STENCIL_MASK_SEPARATE,
-
+   /* GL_NV_primitive_restart */
+   OPCODE_PRIMITIVE_RESTART_NV,
/* GL_ARB_shader_objects */
OPCODE_USE_PROGRAM,
OPCODE_UNIFORM_1F,
@@ -6095,6 +6096,24 @@ save_VertexAttrib4fvARB(GLuint index, const GLfloat * v)
 }
 
 static void GLAPIENTRY
+save_PrimitiveRestartNV(void)
+{
+   /* Note: this is used when outside a glBegin/End pair in a display list */
+   GET_CURRENT_CONTEXT(ctx);
+   Node *n;
+   ASSERT_OUTSIDE_SAVE_BEGIN_END_AND_FLUSH(ctx);
+   n = alloc_instruction(ctx, OPCODE_PRIMITIVE_RESTART_NV, 0);
+   if (n) {
+  /* nothing */
+   }
+   if (ctx->ExecuteFlag) {
+  CALL_PrimitiveRestartNV(ctx->Exec, ());
+   }
+
+}
+
+
+static void GLAPIENTRY
 save_BlitFramebufferEXT(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
 GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
 GLbitfield mask, GLenum filter)
@@ -8670,6 +8689,10 @@ execute_list(struct gl_context *ctx, GLuint list)
 n[5].i, n[6].i, n[7].i, n[8].i,
 n[9].i, n[10].e));
 break;
+ case OPCODE_PRIMITIVE_RESTART_NV:
+CALL_PrimitiveRestartNV(ctx->Exec, ());
+break;
+
  case OPCODE_USE_PROGRAM:
 CALL_UseProgram(ctx->Exec, (n[1].ui));
 break;
@@ -10460,6 +10483,8 @@ save_vtxfmt_init(GLvertexformat * vfmt)
vfmt->VertexAttrib3fvARB = save_VertexAttrib3fvARB;
vfmt->VertexAttrib4fARB = save_VertexAttrib4fARB;
vfmt->VertexAttrib4fvARB = save_VertexAttrib4fvARB;
+
+   vfmt->PrimitiveRestartNV = save_PrimitiveRestartNV;
 }
 
 
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] vbo: fix glPrimitiveRestartNV crash inside a display list

2017-07-07 Thread Brian Paul
From: Olivier Lauffenburger 

glPrimitiveRestartNV crashes when it is called during the compilation
of a display list.

There are two reasons:
- ctx->Driver.CurrentSavePrimitive is not set to the current primitive
- save_PrimitiveRestartNV() calls _save_Begin() which only sets an
  OpenGL error, instead of calling vbo_save_NotifyBegin().

This patch correctly calls vbo_save_NotifyBegin() but it detects
the current primitive mode by looking at the latest saved primitive.

Additional work by Brian Paul

Signed-off-by: Olivier Lauffenburger 
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101464
Reviewed-by: Brian Paul 
---
 src/mesa/vbo/vbo_save_api.c | 20 +++-
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/src/mesa/vbo/vbo_save_api.c b/src/mesa/vbo/vbo_save_api.c
index a42a3c3..aab5f54 100644
--- a/src/mesa/vbo/vbo_save_api.c
+++ b/src/mesa/vbo/vbo_save_api.c
@@ -1108,13 +1108,23 @@ _save_Begin(GLenum mode)
 static void GLAPIENTRY
 _save_PrimitiveRestartNV(void)
 {
-   GLenum curPrim;
GET_CURRENT_CONTEXT(ctx);
+   struct vbo_save_context *save = _context(ctx)->save;
 
-   curPrim = ctx->Driver.CurrentSavePrimitive;
-
-   _save_End();
-   _save_Begin(curPrim);
+   if (save->prim_count == 0) {
+  /* We're not inside a glBegin/End pair, so calling glPrimitiverRestartNV
+   * is an error.
+   */
+  _mesa_compile_error(ctx, GL_INVALID_OPERATION,
+  "glPrimitiveRestartNV called outside glBegin/End");
+   } else {
+  /* get current primitive mode */
+  GLenum curPrim = save->prim[save->prim_count - 1].mode;
+
+  /* restart primitive */
+  CALL_End(GET_DISPATCH(), ());
+  vbo_save_NotifyBegin(ctx, curPrim);
+   }
 }
 
 
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] vbo: rename target->index in loopback code

2017-07-07 Thread Brian Paul
Because it's a vertex attribute index.
---
 src/mesa/vbo/vbo_save_loopback.c | 24 
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/mesa/vbo/vbo_save_loopback.c b/src/mesa/vbo/vbo_save_loopback.c
index 773af93..1dae91b 100644
--- a/src/mesa/vbo/vbo_save_loopback.c
+++ b/src/mesa/vbo/vbo_save_loopback.c
@@ -37,7 +37,7 @@
 #include "vbo_context.h"
 
 
-typedef void (*attr_func)(struct gl_context *ctx, GLint target, const GLfloat 
*);
+typedef void (*attr_func)(struct gl_context *ctx, GLint index, const GLfloat 
*);
 
 
 /* This file makes heavy use of the aliasing of NV vertex attributes
@@ -45,30 +45,30 @@ typedef void (*attr_func)(struct gl_context *ctx, GLint 
target, const GLfloat *)
  * attributes as currently implemented.
  */
 static void
-VertexAttrib1fvNV(struct gl_context *ctx, GLint target, const GLfloat *v)
+VertexAttrib1fvNV(struct gl_context *ctx, GLint index, const GLfloat *v)
 {
-   CALL_VertexAttrib1fvNV(ctx->Exec, (target, v));
+   CALL_VertexAttrib1fvNV(ctx->Exec, (index, v));
 }
 
 
 static void
-VertexAttrib2fvNV(struct gl_context *ctx, GLint target, const GLfloat *v)
+VertexAttrib2fvNV(struct gl_context *ctx, GLint index, const GLfloat *v)
 {
-   CALL_VertexAttrib2fvNV(ctx->Exec, (target, v));
+   CALL_VertexAttrib2fvNV(ctx->Exec, (index, v));
 }
 
 
 static void
-VertexAttrib3fvNV(struct gl_context *ctx, GLint target, const GLfloat *v)
+VertexAttrib3fvNV(struct gl_context *ctx, GLint index, const GLfloat *v)
 {
-   CALL_VertexAttrib3fvNV(ctx->Exec, (target, v));
+   CALL_VertexAttrib3fvNV(ctx->Exec, (index, v));
 }
 
 
 static void
-VertexAttrib4fvNV(struct gl_context *ctx, GLint target, const GLfloat *v)
+VertexAttrib4fvNV(struct gl_context *ctx, GLint index, const GLfloat *v)
 {
-   CALL_VertexAttrib4fvNV(ctx->Exec, (target, v));
+   CALL_VertexAttrib4fvNV(ctx->Exec, (index, v));
 }
 
 
@@ -81,7 +81,7 @@ static attr_func vert_attrfunc[4] = {
 
 
 struct loopback_attr {
-   GLint target;
+   GLint index;
GLint sz;
attr_func func;
 };
@@ -127,7 +127,7 @@ loopback_prim(struct gl_context *ctx,
   const GLfloat *tmp = data + la[0].sz;
 
   for (k = 1; k < nr; k++) {
- la[k].func(ctx, la[k].target, tmp);
+ la[k].func(ctx, la[k].index, tmp);
  tmp += la[k].sz;
   }
 
@@ -184,7 +184,7 @@ vbo_loopback_vertex_list(struct gl_context *ctx,
 */
for (i = 0; i < VBO_ATTRIB_MAX; i++) {
   if (attrsz[i]) {
- la[nr].target = i;
+ la[nr].index = i;
  la[nr].sz = attrsz[i];
  la[nr].func = vert_attrfunc[attrsz[i]-1];
  nr++;
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 13/25] gallium: introduce PIPE_CAP_MEMOBJ

2017-07-07 Thread Ilia Mirkin
This is missing the docs addition for what this cap does
(gallium/docs/source/screen.rst).

On Fri, Jul 7, 2017 at 12:24 AM, Andres Rodriguez  wrote:
> This can be used to guard support for EXT_memory_object and related
> extensions.
>
> Signed-off-by: Andres Rodriguez 
> ---
>  src/gallium/drivers/etnaviv/etnaviv_screen.c | 1 +
>  src/gallium/drivers/freedreno/freedreno_screen.c | 1 +
>  src/gallium/drivers/i915/i915_screen.c   | 1 +
>  src/gallium/drivers/llvmpipe/lp_screen.c | 1 +
>  src/gallium/drivers/nouveau/nv30/nv30_screen.c   | 1 +
>  src/gallium/drivers/nouveau/nvc0/nvc0_screen.c   | 1 +
>  src/gallium/drivers/r300/r300_screen.c   | 1 +
>  src/gallium/drivers/r600/r600_pipe.c | 1 +
>  src/gallium/drivers/radeonsi/si_pipe.c   | 1 +
>  src/gallium/drivers/softpipe/sp_screen.c | 1 +
>  src/gallium/drivers/svga/svga_screen.c   | 1 +
>  src/gallium/drivers/swr/swr_screen.cpp   | 1 +
>  src/gallium/drivers/vc4/vc4_screen.c | 1 +
>  src/gallium/drivers/virgl/virgl_screen.c | 1 +
>  src/gallium/include/pipe/p_defines.h | 1 +
>  15 files changed, 15 insertions(+)
>
> diff --git a/src/gallium/drivers/etnaviv/etnaviv_screen.c 
> b/src/gallium/drivers/etnaviv/etnaviv_screen.c
> index eefb51c..718f0ac 100644
> --- a/src/gallium/drivers/etnaviv/etnaviv_screen.c
> +++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c
> @@ -258,6 +258,7 @@ etna_screen_get_param(struct pipe_screen *pscreen, enum 
> pipe_cap param)
> case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
> case PIPE_CAP_POST_DEPTH_COVERAGE:
> case PIPE_CAP_BINDLESS_TEXTURE:
> +   case PIPE_CAP_MEMOBJ:
>return 0;
>
> /* Stream output. */
> diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c 
> b/src/gallium/drivers/freedreno/freedreno_screen.c
> index a915d65..6c9c011 100644
> --- a/src/gallium/drivers/freedreno/freedreno_screen.c
> +++ b/src/gallium/drivers/freedreno/freedreno_screen.c
> @@ -321,6 +321,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum 
> pipe_cap param)
> case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
> case PIPE_CAP_POST_DEPTH_COVERAGE:
> case PIPE_CAP_BINDLESS_TEXTURE:
> +   case PIPE_CAP_MEMOBJ:
> return 0;
>
> case PIPE_CAP_MAX_VIEWPORTS:
> diff --git a/src/gallium/drivers/i915/i915_screen.c 
> b/src/gallium/drivers/i915/i915_screen.c
> index 4ad98e2..3919978 100644
> --- a/src/gallium/drivers/i915/i915_screen.c
> +++ b/src/gallium/drivers/i915/i915_screen.c
> @@ -310,6 +310,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap 
> cap)
> case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
> case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
> case PIPE_CAP_BINDLESS_TEXTURE:
> +   case PIPE_CAP_MEMOBJ:
>return 0;
>
> case PIPE_CAP_MAX_VIEWPORTS:
> diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c 
> b/src/gallium/drivers/llvmpipe/lp_screen.c
> index e98e30d..7959015 100644
> --- a/src/gallium/drivers/llvmpipe/lp_screen.c
> +++ b/src/gallium/drivers/llvmpipe/lp_screen.c
> @@ -356,6 +356,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum 
> pipe_cap param)
> case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
> case PIPE_CAP_POST_DEPTH_COVERAGE:
> case PIPE_CAP_BINDLESS_TEXTURE:
> +   case PIPE_CAP_MEMOBJ:
>return 0;
> }
> /* should only get here on unhandled cases */
> diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c 
> b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
> index a352ff5..14d1b1a 100644
> --- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
> +++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
> @@ -220,6 +220,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum 
> pipe_cap param)
> case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
> case PIPE_CAP_POST_DEPTH_COVERAGE:
> case PIPE_CAP_BINDLESS_TEXTURE:
> +   case PIPE_CAP_MEMOBJ:
>return 0;
>
> case PIPE_CAP_VENDOR_ID:
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 
> b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
> index 8bbe403..571df1c 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
> @@ -301,6 +301,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum 
> pipe_cap param)
> case PIPE_CAP_INT64_DIVMOD:
> case PIPE_CAP_SPARSE_BUFFER_PAGE_SIZE:
> case PIPE_CAP_BINDLESS_TEXTURE:
> +   case PIPE_CAP_MEMOBJ:
>return 0;
>
> case PIPE_CAP_VENDOR_ID:
> diff --git a/src/gallium/drivers/r300/r300_screen.c 
> b/src/gallium/drivers/r300/r300_screen.c
> index 5cdb248..37d102b 100644
> --- a/src/gallium/drivers/r300/r300_screen.c
> +++ b/src/gallium/drivers/r300/r300_screen.c
> @@ -242,6 +242,7 @@ static int r300_get_param(struct pipe_screen* pscreen, 
> enum pipe_cap param)
>  case 

Re: [Mesa-dev] [PATCH 0/2] vulkan/wsi/wayland: Improve surface format support

2017-07-07 Thread Alexandros Frantzis
On Fri, Jun 23, 2017 at 06:50:49PM +0300, alexandros.frant...@collabora.com 
wrote:
> From: Alexandros Frantzis 
> 
> Improve the surface format support in the Wayland Vulkan WSI, by
> automating the matching between wl_drm and Vulkan formats. The same
> mechanism can be used to improve format support in vulkan/wsi/x11
> (in a future patchset).

Hi all,

I would like to discuss the best way to move forward with the
improvements to the format support in the Vulkan WSI.

My understanding is that the main concern about the proposed patchset is
the introduction of another format description mechanism. In light of
that, I would like to get opinions on the following two alternatives for
moving forward, and of course I am open to other suggestions.

1. Now that I am aware of amd/vulkan/vk_format.h (thanks Dave), I think
   it will be a good base to provide the needed functionality. We could
   extract it to vulkan/util and use it as the common representation.
   We will still need to express native formats in this common
   representation (taking into account endianness etc) and we will also
   need code to perform the matching correctly.

2. Instead of making this mapping at runtime, we can use similar logic
   to automatically produce code that provides the same functionality
   and just commit that. This is fine for wayland that uses a
   predefined set of formats, but more complicated for X11 where
   formats are described as bitmasks.

Thanks,
Alexandros
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965: Use brw_bo_wait() for brw_bo_wait_rendering()

2017-07-07 Thread Chris Wilson
Currently, we use set_domain() to cause a stall on rendering. But the
set-domain ioctl has the side-effect of changing the kernel's cache
domain underneath the struct_mutex, which may perturb state if there was
no rendering to wait upon and in general is much heavier than the
lockless wait-ioctl. Historically libdrm used set-domain as we did not
have an explicit wait-ioctl (and the patches to teach it to use wait if
available were lost in the mists). Since mesa already depends upon a
kernel support the wait-ioctl, we do not need to supply a fallback.

Signed-off-by: Chris Wilson 
Cc: Daniel Vetter 
Cc: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/brw_bufmgr.c| 8 +---
 src/mesa/drivers/dri/i965/brw_bufmgr.h| 2 +-
 src/mesa/drivers/dri/i965/brw_context.c   | 2 +-
 src/mesa/drivers/dri/i965/brw_performance_query.c | 2 +-
 src/mesa/drivers/dri/i965/intel_batchbuffer.c | 4 ++--
 5 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index da12a13152..ee4a5cfa2c 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -831,10 +831,12 @@ brw_bo_get_subdata(struct brw_bo *bo, uint64_t offset,
 
 /** Waits for all GPU rendering with the object to have completed. */
 void
-brw_bo_wait_rendering(struct brw_context *brw, struct brw_bo *bo)
+brw_bo_wait_rendering(struct brw_bo *bo)
 {
-   set_domain(brw, "waiting for",
-  bo, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+   /* We require a kernel recent enough for WAIT_IOCTL support.
+* See intel_init_bufmgr()
+*/
+   brw_bo_wait(bo, -1);
 }
 
 /**
diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.h 
b/src/mesa/drivers/dri/i965/brw_bufmgr.h
index 4d671b6aae..80c71825e8 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.h
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.h
@@ -227,7 +227,7 @@ int brw_bo_get_subdata(struct brw_bo *bo, uint64_t offset,
  * bo_subdata, etc.  It is merely a way for the driver to implement
  * glFinish.
  */
-void brw_bo_wait_rendering(struct brw_context *brw, struct brw_bo *bo);
+void brw_bo_wait_rendering(struct brw_bo *bo);
 
 /**
  * Tears down the buffer manager instance.
diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index 0b3fdc6842..8a3ffab443 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -256,7 +256,7 @@ intel_finish(struct gl_context * ctx)
intel_glFlush(ctx);
 
if (brw->batch.last_bo)
-  brw_bo_wait_rendering(brw, brw->batch.last_bo);
+  brw_bo_wait_rendering(brw->batch.last_bo);
 }
 
 static void
diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c 
b/src/mesa/drivers/dri/i965/brw_performance_query.c
index 81389dbd3e..e4e1854bf2 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_query.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
@@ -1350,7 +1350,7 @@ brw_wait_perf_query(struct gl_context *ctx, struct 
gl_perf_query_object *o)
if (brw_batch_references(>batch, bo))
   intel_batchbuffer_flush(brw);
 
-   brw_bo_wait_rendering(brw, bo);
+   brw_bo_wait_rendering(bo);
 
/* Due to a race condition between the OA unit signaling report
 * availability and the report actually being written into memory,
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c 
b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index 62d2fe8ef3..28c2f474c0 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -497,7 +497,7 @@ throttle(struct brw_context *brw)
 /* Pass NULL rather than brw so we avoid perf_debug warnings;
  * stalling is common and expected here...
  */
-brw_bo_wait_rendering(NULL, brw->throttle_batch[1]);
+brw_bo_wait_rendering(brw->throttle_batch[1]);
  }
  brw_bo_unreference(brw->throttle_batch[1]);
   }
@@ -723,7 +723,7 @@ _intel_batchbuffer_flush_fence(struct brw_context *brw,
 
if (unlikely(INTEL_DEBUG & DEBUG_SYNC)) {
   fprintf(stderr, "waiting for idle\n");
-  brw_bo_wait_rendering(brw, brw->batch.bo);
+  brw_bo_wait_rendering(brw->batch.bo);
}
 
/* Start a new batch buffer. */
-- 
2.13.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 7/7] i965: Fix asynchronous mappings on !LLC platforms.

2017-07-07 Thread Chris Wilson
Quoting Kenneth Graunke (2017-07-07 07:08:16)
> On Thursday, July 6, 2017 10:51:49 PM PDT Kenneth Graunke wrote:
> > On Wednesday, July 5, 2017 2:24:55 PM PDT Chris Wilson wrote:
> > > Quoting Kenneth Graunke (2017-07-05 21:56:54)
> > > > ---
> > > >  src/mesa/drivers/dri/i965/brw_bufmgr.c | 15 +--
> > > >  1 file changed, 13 insertions(+), 2 deletions(-)
> > > > 
> > > > diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
> > > > b/src/mesa/drivers/dri/i965/brw_bufmgr.c
> > > > index 7756e2b5f6c..46696be3577 100644
> > > > --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
> > > > +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
> > > > @@ -56,6 +56,7 @@
> > > >  #ifndef ETIME
> > > >  #define ETIME ETIMEDOUT
> > > >  #endif
> > > > +#include "common/gen_clflush.h"
> > > >  #include "common/gen_debug.h"
> > > >  #include "common/gen_device_info.h"
> > > >  #include "libdrm_macros.h"
> > > > @@ -698,12 +699,22 @@ brw_bo_map_cpu(struct brw_context *brw, struct 
> > > > brw_bo *bo, unsigned flags)
> > > >   VG(VALGRIND_FREELIKE_BLOCK(map, 0));
> > > >   drm_munmap(map, bo->size);
> > > >}
> > > > +   } else if (!bo->cache_coherent) {
> > > > +  /* If we're reusing an existing CPU mapping, the CPU caches may
> > > > +   * contain stale data from the last time we read from that 
> > > > mapping.
> > > > +   * (With the BO cache, it might even be data from a previous 
> > > > buffer!)
> > > > +   *
> > > > +   * We need to invalidate those cachelines so that we see the 
> > > > latest
> > > > +   * contents.
> > > > +   */
> > > > +  gen_invalidate_range(bo->map_cpu, bo->size);
> > > > }
> > > 
> > > This leaves us in trouble on the first invocation with MAP_ASYNC, where
> > > we create the cpu mmaping but don't change any of its domains. (Not that
> > > GL allows for READ | UNSYNCRONIZED if my reading of the spec was
> > > correct.) However, if you use
> > > 
> > > if (!(flags & MAP_ASYNC))
> > > wait_rendering(); /* teach me to use gem_wait! */
> > > 
> > > if (!bo->cache_coherent)
> > > gen_invalidate_range();
> > > 
> > > and a set_domain(GTT, 0) on creation that should cover everything.
> > 
> > I think I meant to set_domain(GTT, 0) on creation in this series, and
> > lost that patch somehow.  It seems like a good idea to add that.
> > 
> > You're correct, READ | UNSYNCHRONIZED is not allowed.  Since we disallow
> > CPU maps for writes, we should never see MAP_ASYNC here.  Which means we'll
> > always do gen_invalidate_range() and then set_domain(CPU).
> > 
> > I'm struggling to see how wait_rendering (aka set_domain(GTT)) helps us
> > here...there are no CPU writes to wait for...we disallowed them...
> > 
> > > In the meantime, s/else if (!bo->cache_coherent)/if (!bo->cache_coherent)/
> > > -Chris
> > 
> > Oh?  I can do that.  I figured that when we asked the kernel to create a
> > brand new CPU map for us, it would guarantee that the new virtual address
> > range didn't have any stale data in the CPU caches.  But, if it doesn't,
> > then we definitely need to clflush them out.
> 
> On second thought, that seems pretty scary:
> 
> We never throw away CPU maps...so if we don't have one, this is the first
> time we've CPU mapped the buffer.  We never write via the CPU, either.
> There cannot possibly be valid data in the CPU cache for this buffer.
> 
> So the only data that could possibly be there...is from some other buffer.
> Wouldn't that be an information leak...?

You've forgotten that the kernel wrote to the BO to zero it out (and
previously you may have used pwrite), and you are choosing to bypass the
set_domain that ensured the data was where you expected it. (But that
applies to flushing after write before GTT access, not quite this
situation.)

But in this case, it is not about clflushing the contents of the cache
away, but ensuring that speculative cpu access is invalidated prior to
refreshing the contents from memory. (You cannot prevent bsw+ from
loading stale data into the cpu cache ahead of time and it won't notice
that the memory changes behind the cacheline.) Since you bypass
set_domain, you need to pick up all the pieces ;)
-Chris
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Intel-gfx] [PATCH 1/1] drm/i915: Version the MOCS settings

2017-07-07 Thread Emil Velikov
On 7 July 2017 at 11:34, Chris Wilson  wrote:
> Quoting Ben Widawsky (2017-07-07 00:27:01)
>>  drivers/gpu/drm/i915/i915_drv.c |  3 +++
>>  drivers/gpu/drm/i915/i915_drv.h |  2 ++
>>  drivers/gpu/drm/i915/i915_pci.c | 13 +
>>  include/uapi/drm/i915_drm.h |  8 
>>  4 files changed, 22 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.c 
>> b/drivers/gpu/drm/i915/i915_drv.c
>> index 9167a73f3c69..26c27b6ae814 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.c
>> +++ b/drivers/gpu/drm/i915/i915_drv.c
>> @@ -401,6 +401,9 @@ static int i915_getparam(struct drm_device *dev, void 
>> *data,
>> if (!value)
>> return -ENODEV;
>> break;
>> +   case I915_PARAM_MOCS_TABLE_VERSION:
>> +   value = INTEL_INFO(dev_priv)->mocs_version;
>
> If we use intel_mocs_get_table_version() we can put this magic number
> in intel_mocs.c next to the tables, where we can keep its history and
> hopefully be able to remember to update it.
>
>> +/* What version of the MOCS table we have. For GEN9 GPUs, the PRM defined
>> + * non-optimal settings for the MOCS table. As a result, we were required 
>> to use a
>> + * small subset, and later add new settings. This param allows userspace to
>> + * determine which settings are there.
>> + */
>> +#define MOCS_TABLE_VERSION   1 /* Build time MOCS table version 
>> */
>
> How are you planing to share this? When we update we bump this number,
> and then mesa copies it across and uses it after verifying it as 0,1 on
> an old kernel.
>
> I don't think you want to expose the updated constant here, but symbolic
> names for each version? (What would be the point?)
>
FWIW I have to agree with Chris here - having the value is of limited
use. Furthermore it mostly confuses people when writing the user space
parts.

For example:
Mesa implements v1 and uses the define. Kernel headers get updated to
v2 and Mesa supporting v1 gets rebuild against them.
Mesa stores/treats as the MOCS version has "v2" when the actual
hardware/kernel supports "v1".

The expected issues vary depending on the implementation, but I
suspect it won't be fun :-)

IMHO it's better if user space is explicit on the versions it supports
and kernel should avoid exposing such defines unless really needed.

HTH
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 101657] strtod.c:32:10: fatal error: xlocale.h: No such file or directory

2017-07-07 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=101657

Eric Engestrom  changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |FIXED

--- Comment #3 from Eric Engestrom  ---
Fixed by:

commit c5d0dc7fa5566941a49ede8c83a0cfe0a33a3d7f
Author: Vinson Lee 
Date:   Wed Jul 5 14:16:11 2017 -0700

scons: Check for xlocale.h before defining HAVE_XLOCALE_H.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/4] i965: Use I915_EXEC_NO_RELOC

2017-07-07 Thread Chris Wilson
Quoting Daniel Vetter (2017-07-07 11:04:00)
> On Mon, Jun 19, 2017 at 11:06:48AM +0100, Chris Wilson wrote:
> > -   if (target != batch->bo)
> > -  add_exec_bo(batch, target);
> > +   if (target != batch->bo) {
> > +  unsigned int index = add_exec_bo(batch, target);
> > +  struct drm_i915_gem_exec_object2 *exec = >exec_objects[index];
> > +
> > +  if (write_domain) {
> > + exec->flags |= EXEC_OBJECT_WRITE;
> > +
> > + /* PIPECONTROL needs a w/a on gen6 */
> > + if (write_domain == I915_GEM_DOMAIN_INSTRUCTION) {
> > +struct brw_context *brw = container_of(batch, brw, batch);
> > +if (brw->gen == 6)
> > +   exec->flags |= EXEC_OBJECT_NEEDS_GTT;
> > + }
> > +  }
> 
> If we ever do a write to the batch this goes boom I think. Can we move the
> if (write_domain) out of the batch check? Means we need to cache the batch
> exec entry somewhere. Just having a batch->batch_exec_flags would do it I
> think.

You are strictly not allowed to write to the batch; it was overkill in
hindsight, but the kernel will reject such execbuf.

> > +  offset64 = exec->offset;
> > +   } else {
> > +  offset64 = target->offset64;
> 
> You lost the READ_ONCE for the above two, and since that's now at least
> locally defined, we don't even need the comment.
> 
> With the above two issues addressed:

Where's the second issue? There's no need for a READ_ONCE for offset
here as both are local to the context (and a context is not supposed to
be accessed concurrently, hence the lack of locking around here) and not
shared.
-Chris
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 101334] Any vulkan app seems to freeze the system

2017-07-07 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=101334

--- Comment #21 from John  ---
I believe that's a same generation card, so it would make sense to behave
similarly.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/4] i965: Convert reloc.target_handle into an index for I915_EXEC_HANDLE_LUT

2017-07-07 Thread Chris Wilson
Quoting Daniel Vetter (2017-07-07 11:31:46)
> On Mon, Jun 19, 2017 at 11:06:50AM +0100, Chris Wilson wrote:
> > Passing the index of the target buffer via the reloc.target_handle is
> > marginally more efficient for the kernel (it can avoid some allocations,
> > and can use a direct lookup rather than a hash or search). It is also
> > useful for ourselves as we can use the index into our exec_bos for other
> > tasks.
> > 
> > v2: Only enable HANDLE_LUT if we can use BATCH_FIRST and thereby avoid
> > a post-processing loop to fixup the relocations.
> > 
> > Signed-off-by: Chris Wilson 
> > Cc: Kenneth Graunke 
> > Cc: Matt Turner 
> > Cc: Jason Ekstrand 
> > ---
> >  src/mesa/drivers/dri/i965/brw_context.h   |  1 +
> >  src/mesa/drivers/dri/i965/intel_batchbuffer.c | 82 
> > ---
> >  2 files changed, 61 insertions(+), 22 deletions(-)
> > 
> > diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
> > b/src/mesa/drivers/dri/i965/brw_context.h
> > index 2fb2cab918..93ddd0825a 100644
> > --- a/src/mesa/drivers/dri/i965/brw_context.h
> > +++ b/src/mesa/drivers/dri/i965/brw_context.h
> > @@ -450,6 +450,7 @@ struct intel_batchbuffer {
> >  
> > uint32_t state_batch_offset;
> > enum brw_gpu_ring ring;
> > +   bool has_batch_first;
> > bool needs_sol_reset;
> > bool state_base_address_emitted;
> >  
> > diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c 
> > b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> > index 15aaf01e52..5fa849c5a5 100644
> > --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> > +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> > @@ -40,6 +40,10 @@
> >  
> >  #define FILE_DEBUG_FLAG DEBUG_BUFMGR
> >  
> > +#define DBG_NO_BATCH_FIRST 0
> > +#define I915_PARAM_HAS_EXEC_BATCH_FIRST 48
> > +#define I915_EXEC_BATCH_FIRST (1 << 18)
> 
> Needs an #ifndef I think, to avoid troubles when updating libdrm. Or just
> properly update mesa's copy of i915_drm.h, that would be much better.

Because src/include/drm/i915_drm.h didn't exist at time. In the current
version of the patch, this pair are no longer required as they are
already defined.

> >  static void
> >  intel_batchbuffer_reset(struct intel_batchbuffer *batch,
> >  struct brw_bufmgr *bufmgr,
> > @@ -57,13 +61,33 @@ uint_key_hash(const void *key)
> > return (uintptr_t) key;
> >  }
> >  
> > +static int gem_param(int fd, int name)
> > +{
> > +   drm_i915_getparam_t gp;
> > +   int v = -1; /* No param uses (yet) the sign bit, reserve it for errors 
> > */
> > +
> > +   memset(, 0, sizeof(gp));
> > +   gp.param = name;
> > +   gp.value = 
> > +   if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, ))
> > +  return -1;
> > +
> > +   return v;
> > +}
> 
> Afaict this exists as intel_get_param already, why can't we use that?

Because it didn't exist when I wrote the patch. (I'm platforming for
kern_info to be queried once alongside dev_info.)

> > @@ -793,8 +829,9 @@ brw_emit_reloc(struct intel_batchbuffer *batch, 
> > uint32_t batch_offset,
> > assert(batch_offset <= BATCH_SZ - sizeof(uint32_t));
> > assert(_mesa_bitcount(write_domain) <= 1);
> >  
> > +   unsigned int index;
> > if (target != batch->bo) {
> > -  unsigned int index = add_exec_bo(batch, target);
> > +  index = add_exec_bo(batch, target);
> >struct drm_i915_gem_exec_object2 *exec = >exec_objects[index];
> >  
> >if (write_domain) {
> > @@ -811,6 +848,7 @@ brw_emit_reloc(struct intel_batchbuffer *batch, 
> > uint32_t batch_offset,
> >offset64 = exec->offset;
> > } else {
> >offset64 = target->offset64;
> > +  index = target->index;
> 
> index = 0; Yes the bathc isn't ever shared, but I think it's better to
> make this obviously safe.

It's safer using the tracking than adding the presumption, surely?
That way this patch (in the original ordering) was just as happy with
using the batch in the last_slot and doing the fixups.
-Chris
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/7] intel: Fix clflushing on modern (Baytrail+) Atom CPUs.

2017-07-07 Thread Lionel Landwerlin
I remember that DS had issues with CMAA (Conservative Morphological 
Anti-Aliasing) on Baytrail or Braswell ChromeOS devices.

Would be worth a to see whether this series fixes that.

Thanks!

Acked-by: Lionel Landwerlin 

On 05/07/17 21:56, Kenneth Graunke wrote:

Thanks to Chris Wilson for pointing this out.

Cc: Jason Ekstrand 
---
  src/intel/common/gen_clflush.h | 11 +++
  1 file changed, 11 insertions(+)

diff --git a/src/intel/common/gen_clflush.h b/src/intel/common/gen_clflush.h
index 9b971cac37e..84cc2f2f132 100644
--- a/src/intel/common/gen_clflush.h
+++ b/src/intel/common/gen_clflush.h
@@ -50,6 +50,17 @@ static inline void
  gen_invalidate_range(void *start, size_t size)
  {
 gen_clflush_range(start, size);
+
+   /* Modern Atom CPUs (Baytrail+) have issues with clflush serialization,
+* where mfence is not a sufficient synchronization barrier.  We must
+* double clflush the last cacheline.  This guarantees it will be ordered
+* after the preceding clflushes, and then the mfence guards against
+* prefetches crossing the clflush boundary.
+*
+* See kernel commit 396f5d62d1a5fd99421855a08ffdef8edb43c76e and
+* https://bugs.freedesktop.org/show_bug.cgi?id=92845.
+*/
+   __builtin_ia32_clflush(start + size - 1);
 __builtin_ia32_mfence();
  }
  



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3] i965: Track last location of bo used for the batch

2017-07-07 Thread Daniel Vetter
On Fri, Jul 07, 2017 at 11:23:36AM +0100, Chris Wilson wrote:
> Borrow a trick from anv, and use the last known index for the bo to skip
> a search of the batch->exec_bo when adding a new relocation. In defence
> against the bo being used in multiple batches simultaneously, we check
> that this slot exists and points back to us.
> 
> v2: Also update brw_batch_references()
> v3: Reset bo->index on creation (Daniel)
> 
> Signed-off-by: Chris Wilson 
> Cc: Kenneth Graunke 
> Cc: Matt Turner 
> Cc: Jason Ekstrand 
> Cc: Daniel Vetter 
> ---
>  src/mesa/drivers/dri/i965/brw_bufmgr.c|  1 +
>  src/mesa/drivers/dri/i965/brw_bufmgr.h|  7 +++
>  src/mesa/drivers/dri/i965/intel_batchbuffer.c | 18 --
>  3 files changed, 24 insertions(+), 2 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
> b/src/mesa/drivers/dri/i965/brw_bufmgr.c
> index da12a13152..4e43a448ae 100644
> --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
> +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
> @@ -353,6 +353,7 @@ retry:
> p_atomic_set(>refcount, 1);
> bo->reusable = true;
> bo->cache_coherent = bufmgr->has_llc;
> +   bo->index = -1;
>  
> pthread_mutex_unlock(>lock);
>  
> diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.h 
> b/src/mesa/drivers/dri/i965/brw_bufmgr.h
> index 4d671b6aae..27a27ca244 100644
> --- a/src/mesa/drivers/dri/i965/brw_bufmgr.h
> +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.h
> @@ -76,6 +76,13 @@ struct brw_bo {
> uint64_t offset64;
>  
> /**
> +* Index of this buffer inside the batch, -1 when not in a batch. Note
> +* that a single buffer may be in multiple batches (contexts), the index
> +* only refers to its last use and should not be trusted!
> +*/
> +   unsigned int index;
> +
> +   /**
>  * Boolean of whether the GPU is definitely not accessing the buffer.
>  *
>  * This is only valid when reusable, since non-reusable
> diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c 
> b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> index 62d2fe8ef3..f76ece8d71 100644
> --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> @@ -515,12 +515,20 @@ throttle(struct brw_context *brw)
> }
>  }
>  
> +#define READ_ONCE(x) (*(volatile __typeof__(x) *)&(x))
> +
>  static void
>  add_exec_bo(struct intel_batchbuffer *batch, struct brw_bo *bo)
>  {
> if (bo != batch->bo) {
> -  for (int i = 0; i < batch->exec_count; i++) {
> - if (batch->exec_bos[i] == bo)
> +  unsigned int index = READ_ONCE(bo->index);
> +
> +  if (index < batch->exec_count && batch->exec_bos[index] == bo)
> + return;
> +
> +  /* May have been shared between multiple active batches */
> +  for (index = 0; index < batch->exec_count; index++) {
> + if (batch->exec_bos[index] == bo)
>  return;
>}
>  
> @@ -553,6 +561,7 @@ add_exec_bo(struct intel_batchbuffer *batch, struct 
> brw_bo *bo)
> validation_entry->rsvd1 = 0;
> validation_entry->rsvd2 = 0;
>  
> +   bo->index = batch->exec_count;
> batch->exec_bos[batch->exec_count] = bo;
> batch->exec_count++;
> batch->aperture_space += bo->size;
> @@ -597,6 +606,7 @@ execbuffer(int fd,
>struct brw_bo *bo = batch->exec_bos[i];
>  
>bo->idle = false;
> +  bo->index = -1;
>  
>/* Update brw_bo::offset64 */
>if (batch->exec_objects[i].offset != bo->offset64) {
> @@ -742,6 +752,10 @@ brw_batch_has_aperture_space(struct brw_context *brw, 
> unsigned extra_space)
>  bool
>  brw_batch_references(struct intel_batchbuffer *batch, struct brw_bo *bo)
>  {
> +   unsigned int index = READ_ONCE(bo->index);
> +   if (index < batch->exec_count && batch->exec_bos[index] == bo)
> +  return true;

Yeah that's a neat addition.

Reviewed-by: Daniel Vetter 

> +
> for (int i = 0; i < batch->exec_count; i++) {
>if (batch->exec_bos[i] == bo)
>   return true;
> -- 
> 2.13.2
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Intel-gfx] [PATCH 1/1] drm/i915: Version the MOCS settings

2017-07-07 Thread Chris Wilson
Quoting Ben Widawsky (2017-07-07 00:27:01)
>  drivers/gpu/drm/i915/i915_drv.c |  3 +++
>  drivers/gpu/drm/i915/i915_drv.h |  2 ++
>  drivers/gpu/drm/i915/i915_pci.c | 13 +
>  include/uapi/drm/i915_drm.h |  8 
>  4 files changed, 22 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 9167a73f3c69..26c27b6ae814 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -401,6 +401,9 @@ static int i915_getparam(struct drm_device *dev, void 
> *data,
> if (!value)
> return -ENODEV;
> break;
> +   case I915_PARAM_MOCS_TABLE_VERSION:
> +   value = INTEL_INFO(dev_priv)->mocs_version;

If we use intel_mocs_get_table_version() we can put this magic number
in intel_mocs.c next to the tables, where we can keep its history and
hopefully be able to remember to update it.

> +/* What version of the MOCS table we have. For GEN9 GPUs, the PRM defined
> + * non-optimal settings for the MOCS table. As a result, we were required to 
> use a
> + * small subset, and later add new settings. This param allows userspace to
> + * determine which settings are there.
> + */
> +#define MOCS_TABLE_VERSION   1 /* Build time MOCS table version 
> */

How are you planing to share this? When we update we bump this number,
and then mesa copies it across and uses it after verifying it as 0,1 on
an old kernel.

I don't think you want to expose the updated constant here, but symbolic
names for each version? (What would be the point?)

Next question, why a version number and not just the number of entries
defined? Each index is defined by ABI once assigned, so the number of
entries still operates as a version number and allows easy checking.

if (advanced_cacheing_idx < kernel_max_mocs)
return advanced_cacheing_idx;
if (default_cacheing_idx < kernel_max_mocs)
return default_cacheing_idx;

return follow_pte_idx;

give or take the smarts to choose the preferred indices for any
particular scenario.

In the future, if we finally get user defined mocs, the table_size will
then give the start of the user modifiable indices (presming they want
to keep the predefined entries for compatibility?))
-Chris
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/4] i965: Convert reloc.target_handle into an index for I915_EXEC_HANDLE_LUT

2017-07-07 Thread Daniel Vetter
On Mon, Jun 19, 2017 at 11:06:50AM +0100, Chris Wilson wrote:
> Passing the index of the target buffer via the reloc.target_handle is
> marginally more efficient for the kernel (it can avoid some allocations,
> and can use a direct lookup rather than a hash or search). It is also
> useful for ourselves as we can use the index into our exec_bos for other
> tasks.
> 
> v2: Only enable HANDLE_LUT if we can use BATCH_FIRST and thereby avoid
> a post-processing loop to fixup the relocations.
> 
> Signed-off-by: Chris Wilson 
> Cc: Kenneth Graunke 
> Cc: Matt Turner 
> Cc: Jason Ekstrand 

Bikeshed for clarity: s/has_batch_first/exec_lut/ and a comment explaining
that we need both LUT and BATCH_FIRST and why. That would make some of the
if (batch->exec_lut) paths easier to grok on a quick reading.
-Daniel

> ---
>  src/mesa/drivers/dri/i965/brw_context.h   |  1 +
>  src/mesa/drivers/dri/i965/intel_batchbuffer.c | 82 
> ---
>  2 files changed, 61 insertions(+), 22 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
> b/src/mesa/drivers/dri/i965/brw_context.h
> index 2fb2cab918..93ddd0825a 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -450,6 +450,7 @@ struct intel_batchbuffer {
>  
> uint32_t state_batch_offset;
> enum brw_gpu_ring ring;
> +   bool has_batch_first;
> bool needs_sol_reset;
> bool state_base_address_emitted;
>  
> diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c 
> b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> index 15aaf01e52..5fa849c5a5 100644
> --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> @@ -40,6 +40,10 @@
>  
>  #define FILE_DEBUG_FLAG DEBUG_BUFMGR
>  
> +#define DBG_NO_BATCH_FIRST 0
> +#define I915_PARAM_HAS_EXEC_BATCH_FIRST 48
> +#define I915_EXEC_BATCH_FIRST (1 << 18)
> +
>  static void
>  intel_batchbuffer_reset(struct intel_batchbuffer *batch,
>  struct brw_bufmgr *bufmgr,
> @@ -57,13 +61,33 @@ uint_key_hash(const void *key)
> return (uintptr_t) key;
>  }
>  
> +static int gem_param(int fd, int name)
> +{
> +   drm_i915_getparam_t gp;
> +   int v = -1; /* No param uses (yet) the sign bit, reserve it for errors */
> +
> +   memset(, 0, sizeof(gp));
> +   gp.param = name;
> +   gp.value = 
> +   if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, ))
> +  return -1;
> +
> +   return v;
> +}
> +
> +static bool test_has_batch_first(int fd)
> +{
> +   if (DBG_NO_BATCH_FIRST)
> +  return DBG_NO_BATCH_FIRST < 0;
> +
> +   return gem_param(fd, I915_PARAM_HAS_EXEC_BATCH_FIRST) > 0;
> +}
> +
>  void
>  intel_batchbuffer_init(struct intel_batchbuffer *batch,
> struct brw_bufmgr *bufmgr,
> bool has_llc)
>  {
> -   intel_batchbuffer_reset(batch, bufmgr, has_llc);
> -
> if (!has_llc) {
>batch->cpu_map = malloc(BATCH_SZ);
>batch->map = batch->cpu_map;
> @@ -85,6 +109,12 @@ intel_batchbuffer_init(struct intel_batchbuffer *batch,
>batch->state_batch_sizes =
>   _mesa_hash_table_create(NULL, uint_key_hash, uint_key_compare);
> }
> +
> +   struct brw_context *brw = container_of(batch, brw, batch);
> +   __DRIscreen *dri_screen = brw->screen->driScrnPriv;
> +   batch->has_batch_first = test_has_batch_first(dri_screen->fd);
> +
> +   intel_batchbuffer_reset(batch, bufmgr, has_llc);
>  }
>  
>  #define READ_ONCE(x) (*(volatile __typeof__(x) *)&(x))
> @@ -117,21 +147,12 @@ add_exec_bo(struct intel_batchbuffer *batch, struct 
> brw_bo *bo)
>   batch->exec_array_size * sizeof(batch->exec_objects[0]));
> }
>  
> -   struct drm_i915_gem_exec_object2 *validation_entry =
> -  >exec_objects[batch->exec_count];
> -   validation_entry->handle = bo->gem_handle;
> -   if (bo == batch->bo) {
> -  validation_entry->relocation_count = batch->reloc_count;
> -  validation_entry->relocs_ptr = (uintptr_t) batch->relocs;
> -   } else {
> -  validation_entry->relocation_count = 0;
> -  validation_entry->relocs_ptr = 0;
> -   }
> -   validation_entry->alignment = bo->align;
> -   validation_entry->offset = bo->offset64;
> -   validation_entry->flags = bo->kflags;
> -   validation_entry->rsvd1 = 0;
> -   validation_entry->rsvd2 = 0;
> +   struct drm_i915_gem_exec_object2 *exec =
> +  memset(>exec_objects[batch->exec_count], 0, sizeof(*exec));
> +   exec->handle = bo->gem_handle;
> +   exec->alignment = bo->align;
> +   exec->offset = bo->offset64;
> +   exec->flags = bo->kflags;
>  
> bo->index = batch->exec_count;
> batch->exec_bos[batch->exec_count] = bo;
> @@ -157,6 +178,11 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch,
> }
> batch->map_next = batch->map;
>  
> +   if (batch->has_batch_first) {
> +  add_exec_bo(batch, batch->bo);
> +  

Re: [Mesa-dev] [PATCH 4/4] i965: Convert reloc.target_handle into an index for I915_EXEC_HANDLE_LUT

2017-07-07 Thread Daniel Vetter
On Mon, Jun 19, 2017 at 11:06:50AM +0100, Chris Wilson wrote:
> Passing the index of the target buffer via the reloc.target_handle is
> marginally more efficient for the kernel (it can avoid some allocations,
> and can use a direct lookup rather than a hash or search). It is also
> useful for ourselves as we can use the index into our exec_bos for other
> tasks.
> 
> v2: Only enable HANDLE_LUT if we can use BATCH_FIRST and thereby avoid
> a post-processing loop to fixup the relocations.
> 
> Signed-off-by: Chris Wilson 
> Cc: Kenneth Graunke 
> Cc: Matt Turner 
> Cc: Jason Ekstrand 
> ---
>  src/mesa/drivers/dri/i965/brw_context.h   |  1 +
>  src/mesa/drivers/dri/i965/intel_batchbuffer.c | 82 
> ---
>  2 files changed, 61 insertions(+), 22 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
> b/src/mesa/drivers/dri/i965/brw_context.h
> index 2fb2cab918..93ddd0825a 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -450,6 +450,7 @@ struct intel_batchbuffer {
>  
> uint32_t state_batch_offset;
> enum brw_gpu_ring ring;
> +   bool has_batch_first;
> bool needs_sol_reset;
> bool state_base_address_emitted;
>  
> diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c 
> b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> index 15aaf01e52..5fa849c5a5 100644
> --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> @@ -40,6 +40,10 @@
>  
>  #define FILE_DEBUG_FLAG DEBUG_BUFMGR
>  
> +#define DBG_NO_BATCH_FIRST 0
> +#define I915_PARAM_HAS_EXEC_BATCH_FIRST 48
> +#define I915_EXEC_BATCH_FIRST (1 << 18)

Needs an #ifndef I think, to avoid troubles when updating libdrm. Or just
properly update mesa's copy of i915_drm.h, that would be much better.

> +
>  static void
>  intel_batchbuffer_reset(struct intel_batchbuffer *batch,
>  struct brw_bufmgr *bufmgr,
> @@ -57,13 +61,33 @@ uint_key_hash(const void *key)
> return (uintptr_t) key;
>  }
>  
> +static int gem_param(int fd, int name)
> +{
> +   drm_i915_getparam_t gp;
> +   int v = -1; /* No param uses (yet) the sign bit, reserve it for errors */
> +
> +   memset(, 0, sizeof(gp));
> +   gp.param = name;
> +   gp.value = 
> +   if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, ))
> +  return -1;
> +
> +   return v;
> +}

Afaict this exists as intel_get_param already, why can't we use that?

> +
> +static bool test_has_batch_first(int fd)
> +{
> +   if (DBG_NO_BATCH_FIRST)
> +  return DBG_NO_BATCH_FIRST < 0;
> +
> +   return gem_param(fd, I915_PARAM_HAS_EXEC_BATCH_FIRST) > 0;
> +}
> +
>  void
>  intel_batchbuffer_init(struct intel_batchbuffer *batch,
> struct brw_bufmgr *bufmgr,
> bool has_llc)
>  {
> -   intel_batchbuffer_reset(batch, bufmgr, has_llc);
> -
> if (!has_llc) {
>batch->cpu_map = malloc(BATCH_SZ);
>batch->map = batch->cpu_map;
> @@ -85,6 +109,12 @@ intel_batchbuffer_init(struct intel_batchbuffer *batch,
>batch->state_batch_sizes =
>   _mesa_hash_table_create(NULL, uint_key_hash, uint_key_compare);
> }
> +
> +   struct brw_context *brw = container_of(batch, brw, batch);
> +   __DRIscreen *dri_screen = brw->screen->driScrnPriv;
> +   batch->has_batch_first = test_has_batch_first(dri_screen->fd);
> +
> +   intel_batchbuffer_reset(batch, bufmgr, has_llc);
>  }
>  
>  #define READ_ONCE(x) (*(volatile __typeof__(x) *)&(x))
> @@ -117,21 +147,12 @@ add_exec_bo(struct intel_batchbuffer *batch, struct 
> brw_bo *bo)
>   batch->exec_array_size * sizeof(batch->exec_objects[0]));
> }
>  
> -   struct drm_i915_gem_exec_object2 *validation_entry =
> -  >exec_objects[batch->exec_count];
> -   validation_entry->handle = bo->gem_handle;
> -   if (bo == batch->bo) {
> -  validation_entry->relocation_count = batch->reloc_count;
> -  validation_entry->relocs_ptr = (uintptr_t) batch->relocs;
> -   } else {
> -  validation_entry->relocation_count = 0;
> -  validation_entry->relocs_ptr = 0;
> -   }
> -   validation_entry->alignment = bo->align;
> -   validation_entry->offset = bo->offset64;
> -   validation_entry->flags = bo->kflags;
> -   validation_entry->rsvd1 = 0;
> -   validation_entry->rsvd2 = 0;
> +   struct drm_i915_gem_exec_object2 *exec =
> +  memset(>exec_objects[batch->exec_count], 0, sizeof(*exec));
> +   exec->handle = bo->gem_handle;
> +   exec->alignment = bo->align;
> +   exec->offset = bo->offset64;
> +   exec->flags = bo->kflags;
>  
> bo->index = batch->exec_count;
> batch->exec_bos[batch->exec_count] = bo;
> @@ -157,6 +178,11 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch,
> }
> batch->map_next = batch->map;
>  
> +   if (batch->has_batch_first) {
> +  add_exec_bo(batch, batch->bo);
> +  

Re: [Mesa-dev] [PATCH 1/7] intel: Move clflush helpers from anv to common/gen_clflush.h.

2017-07-07 Thread Emil Velikov
On 5 July 2017 at 21:56, Kenneth Graunke  wrote:
> I want to use these in the OpenGL driver as well.
>
> Cc: Jason Ekstrand 
> ---
>  src/intel/common/gen_clflush.h | 56 
> ++
>  src/intel/vulkan/anv_batch_chain.c |  2 +-
>  src/intel/vulkan/anv_device.c  |  2 +-
>  src/intel/vulkan/anv_private.h | 32 ++
>  src/intel/vulkan/anv_queue.c   |  2 +-
>  src/intel/vulkan/genX_blorp_exec.c |  2 +-
>  6 files changed, 62 insertions(+), 34 deletions(-)
>  create mode 100644 src/intel/common/gen_clflush.h
>
> diff --git a/src/intel/common/gen_clflush.h b/src/intel/common/gen_clflush.h
> new file mode 100644
> index 000..9b971cac37e
> --- /dev/null
> +++ b/src/intel/common/gen_clflush.h
Please add this file to the COMMON_FILES list in src/intel/Makefile.sources.

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3] i965: Track last location of bo used for the batch

2017-07-07 Thread Chris Wilson
Borrow a trick from anv, and use the last known index for the bo to skip
a search of the batch->exec_bo when adding a new relocation. In defence
against the bo being used in multiple batches simultaneously, we check
that this slot exists and points back to us.

v2: Also update brw_batch_references()
v3: Reset bo->index on creation (Daniel)

Signed-off-by: Chris Wilson 
Cc: Kenneth Graunke 
Cc: Matt Turner 
Cc: Jason Ekstrand 
Cc: Daniel Vetter 
---
 src/mesa/drivers/dri/i965/brw_bufmgr.c|  1 +
 src/mesa/drivers/dri/i965/brw_bufmgr.h|  7 +++
 src/mesa/drivers/dri/i965/intel_batchbuffer.c | 18 --
 3 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index da12a13152..4e43a448ae 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -353,6 +353,7 @@ retry:
p_atomic_set(>refcount, 1);
bo->reusable = true;
bo->cache_coherent = bufmgr->has_llc;
+   bo->index = -1;
 
pthread_mutex_unlock(>lock);
 
diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.h 
b/src/mesa/drivers/dri/i965/brw_bufmgr.h
index 4d671b6aae..27a27ca244 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.h
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.h
@@ -76,6 +76,13 @@ struct brw_bo {
uint64_t offset64;
 
/**
+* Index of this buffer inside the batch, -1 when not in a batch. Note
+* that a single buffer may be in multiple batches (contexts), the index
+* only refers to its last use and should not be trusted!
+*/
+   unsigned int index;
+
+   /**
 * Boolean of whether the GPU is definitely not accessing the buffer.
 *
 * This is only valid when reusable, since non-reusable
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c 
b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index 62d2fe8ef3..f76ece8d71 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -515,12 +515,20 @@ throttle(struct brw_context *brw)
}
 }
 
+#define READ_ONCE(x) (*(volatile __typeof__(x) *)&(x))
+
 static void
 add_exec_bo(struct intel_batchbuffer *batch, struct brw_bo *bo)
 {
if (bo != batch->bo) {
-  for (int i = 0; i < batch->exec_count; i++) {
- if (batch->exec_bos[i] == bo)
+  unsigned int index = READ_ONCE(bo->index);
+
+  if (index < batch->exec_count && batch->exec_bos[index] == bo)
+ return;
+
+  /* May have been shared between multiple active batches */
+  for (index = 0; index < batch->exec_count; index++) {
+ if (batch->exec_bos[index] == bo)
 return;
   }
 
@@ -553,6 +561,7 @@ add_exec_bo(struct intel_batchbuffer *batch, struct brw_bo 
*bo)
validation_entry->rsvd1 = 0;
validation_entry->rsvd2 = 0;
 
+   bo->index = batch->exec_count;
batch->exec_bos[batch->exec_count] = bo;
batch->exec_count++;
batch->aperture_space += bo->size;
@@ -597,6 +606,7 @@ execbuffer(int fd,
   struct brw_bo *bo = batch->exec_bos[i];
 
   bo->idle = false;
+  bo->index = -1;
 
   /* Update brw_bo::offset64 */
   if (batch->exec_objects[i].offset != bo->offset64) {
@@ -742,6 +752,10 @@ brw_batch_has_aperture_space(struct brw_context *brw, 
unsigned extra_space)
 bool
 brw_batch_references(struct intel_batchbuffer *batch, struct brw_bo *bo)
 {
+   unsigned int index = READ_ONCE(bo->index);
+   if (index < batch->exec_count && batch->exec_bos[index] == bo)
+  return true;
+
for (int i = 0; i < batch->exec_count; i++) {
   if (batch->exec_bos[i] == bo)
  return true;
-- 
2.13.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/4] i965: Move add_exec_bo()

2017-07-07 Thread Daniel Vetter
On Mon, Jun 19, 2017 at 11:06:49AM +0100, Chris Wilson wrote:
> To avoid a forward declaration in the next patch, move the definition of
> add_exec_bo() earlier.
> 
> Signed-off-by: Chris Wilson 
> Cc: Kenneth Graunke 
> Cc: Matt Turner 
> Cc: Jason Ekstrand 

Needs to be rebased when patch 2 is updated, but ack.
-Daniel

> ---
>  src/mesa/drivers/dri/i965/intel_batchbuffer.c | 106 
> +-
>  1 file changed, 53 insertions(+), 53 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c 
> b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> index 7129209c26..15aaf01e52 100644
> --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> @@ -87,6 +87,59 @@ intel_batchbuffer_init(struct intel_batchbuffer *batch,
> }
>  }
>  
> +#define READ_ONCE(x) (*(volatile __typeof__(x) *)&(x))
> +
> +static unsigned int
> +add_exec_bo(struct intel_batchbuffer *batch, struct brw_bo *bo)
> +{
> +   if (bo != batch->bo) {
> +  unsigned int index = READ_ONCE(bo->index);
> +
> +  if (index < batch->exec_count && batch->exec_bos[index] == bo)
> + return index;
> +
> +  /* May have been shared between multiple active batches */
> +  for (index = 0; index < batch->exec_count; index++) {
> + if (batch->exec_bos[index] == bo)
> +return index;
> +  }
> +
> +  brw_bo_reference(bo);
> +   }
> +
> +   if (batch->exec_count == batch->exec_array_size) {
> +  batch->exec_array_size *= 2;
> +  batch->exec_bos =
> + realloc(batch->exec_bos,
> + batch->exec_array_size * sizeof(batch->exec_bos[0]));
> +  batch->exec_objects =
> + realloc(batch->exec_objects,
> + batch->exec_array_size * sizeof(batch->exec_objects[0]));
> +   }
> +
> +   struct drm_i915_gem_exec_object2 *validation_entry =
> +  >exec_objects[batch->exec_count];
> +   validation_entry->handle = bo->gem_handle;
> +   if (bo == batch->bo) {
> +  validation_entry->relocation_count = batch->reloc_count;
> +  validation_entry->relocs_ptr = (uintptr_t) batch->relocs;
> +   } else {
> +  validation_entry->relocation_count = 0;
> +  validation_entry->relocs_ptr = 0;
> +   }
> +   validation_entry->alignment = bo->align;
> +   validation_entry->offset = bo->offset64;
> +   validation_entry->flags = bo->kflags;
> +   validation_entry->rsvd1 = 0;
> +   validation_entry->rsvd2 = 0;
> +
> +   bo->index = batch->exec_count;
> +   batch->exec_bos[batch->exec_count] = bo;
> +   batch->aperture_space += bo->size;
> +
> +   return batch->exec_count++;
> +}
> +
>  static void
>  intel_batchbuffer_reset(struct intel_batchbuffer *batch,
>  struct brw_bufmgr *bufmgr,
> @@ -515,59 +568,6 @@ throttle(struct brw_context *brw)
> }
>  }
>  
> -#define READ_ONCE(x) (*(volatile __typeof__(x) *)&(x))
> -
> -static unsigned int
> -add_exec_bo(struct intel_batchbuffer *batch, struct brw_bo *bo)
> -{
> -   if (bo != batch->bo) {
> -  unsigned int index = READ_ONCE(bo->index);
> -
> -  if (index < batch->exec_count && batch->exec_bos[index] == bo)
> - return index;
> -
> -  /* May have been shared between multiple active batches */
> -  for (index = 0; index < batch->exec_count; index++) {
> - if (batch->exec_bos[index] == bo)
> -return index;
> -  }
> -
> -  brw_bo_reference(bo);
> -   }
> -
> -   if (batch->exec_count == batch->exec_array_size) {
> -  batch->exec_array_size *= 2;
> -  batch->exec_bos =
> - realloc(batch->exec_bos,
> - batch->exec_array_size * sizeof(batch->exec_bos[0]));
> -  batch->exec_objects =
> - realloc(batch->exec_objects,
> - batch->exec_array_size * sizeof(batch->exec_objects[0]));
> -   }
> -
> -   struct drm_i915_gem_exec_object2 *validation_entry =
> -  >exec_objects[batch->exec_count];
> -   validation_entry->handle = bo->gem_handle;
> -   if (bo == batch->bo) {
> -  validation_entry->relocation_count = batch->reloc_count;
> -  validation_entry->relocs_ptr = (uintptr_t) batch->relocs;
> -   } else {
> -  validation_entry->relocation_count = 0;
> -  validation_entry->relocs_ptr = 0;
> -   }
> -   validation_entry->alignment = bo->align;
> -   validation_entry->offset = bo->offset64;
> -   validation_entry->flags = bo->kflags;
> -   validation_entry->rsvd1 = 0;
> -   validation_entry->rsvd2 = 0;
> -
> -   bo->index = batch->exec_count;
> -   batch->exec_bos[batch->exec_count] = bo;
> -   batch->aperture_space += bo->size;
> -
> -   return batch->exec_count++;
> -}
> -
>  static int
>  execbuffer(int fd,
> struct intel_batchbuffer *batch,
> -- 
> 2.11.0
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> 

Re: [Mesa-dev] [PATCH] etnaviv: fix refcnt initialization in etna_screen

2017-07-07 Thread Christian Gmeiner
2017-07-06 23:18 GMT+02:00 Aleksander Morgado :
> Despite being a member of the etna_screen struct, 'refcnt' is used by
> the winsys-specific logic to track the reference count of the object
> managed in a hash table. When the count reaches zero, the pipe screen
> is removed from the table and destroyed.
>
> Fix the logic by initializing the refcnt to 1 when screen created.
> This initialization is done in etna_screen_create(), to follow the
> same logic as in freedreno and virgl.
>
> Signed-off-by: Aleksander Morgado 

Reviewed-by: Christian Gmeiner 

> ---
>  src/gallium/drivers/etnaviv/etnaviv_screen.c | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/src/gallium/drivers/etnaviv/etnaviv_screen.c 
> b/src/gallium/drivers/etnaviv/etnaviv_screen.c
> index eefb51c5da..fa0cbd9076 100644
> --- a/src/gallium/drivers/etnaviv/etnaviv_screen.c
> +++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c
> @@ -772,6 +772,7 @@ etna_screen_create(struct etna_device *dev, struct 
> etna_gpu *gpu,
> screen->dev = dev;
> screen->gpu = gpu;
> screen->ro = renderonly_dup(ro);
> +   screen->refcnt = 1;
>
> if (!screen->ro) {
>DBG("could not create renderonly object");
> --
> 2.13.1
>

greets
--
Christian Gmeiner, MSc

https://christian-gmeiner.info
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCHv2] etnaviv: Add support for ETC2 texture compression

2017-07-07 Thread Christian Gmeiner
Hi Wladimir


2017-07-07 11:12 GMT+02:00 Wladimir :
> On Thu, Jul 6, 2017 at 4:01 PM, Wladimir  wrote:
>
>> That the SRGB formats fail is expected (they're simply using RGB, this
>> is mentioned in the patch message), the discrepancy between
>> gc2000/gc3000 on RGB(A) is interesting, though, I'll look into it.
>
> I've not been able to figure this out. I've tried comparing blob
> command streams GC2000 versus GC3000 - could find no significant
> differences, most notably the format IDs are the same.
>
> Then I looked at the visual output of this test (pngs attached) on both GPUs.
>
> GC3000 is 100% correct. The overall image on GC2000 looks ok, however
> some tiles show serious artifacts. My suspicion is starting to be that
> there is a hardware bug on GC2000 with the ETC1 decoder.

Lets hope there is a way to workaround that issue.

> To know this for sure we'd have to run piglit with the blob drivers on
> GC2000. Did you (or anyone else) ever manage to do this?
>

I have a piglit on my RE box. From my experience piglit does works
every now and then
depending on the used test. Will give it a try during the next 1-2
days and will provide you
the captured cmd stream.

greets
--
Christian Gmeiner, MSc

https://christian-gmeiner.info
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] etnaviv: fix refcnt initialization in etna_screen

2017-07-07 Thread Emil Velikov
On 7 July 2017 at 09:09, Lucas Stach  wrote:
> Am Donnerstag, den 06.07.2017, 23:18 +0200 schrieb Aleksander Morgado:
>> Despite being a member of the etna_screen struct, 'refcnt' is used by
>> the winsys-specific logic to track the reference count of the object
>> managed in a hash table. When the count reaches zero, the pipe screen
>> is removed from the table and destroyed.
>>
>> Fix the logic by initializing the refcnt to 1 when screen created.
>> This initialization is done in etna_screen_create(), to follow the
>> same logic as in freedreno and virgl.
>
> Urgh, nice (for whatever value of nice) find. I'll push this with stable
> tags added in a moment.
>
It should land in stable indeed. Thanks Lucas.

Very quick skim shows that other drivers don't have this bug, yet I
wonder if we cannot get a volunteer to re-spin RobH refcount
series[1].
There were some comments but nothing serious IMHO.

Also, welcome to Mesa Aleksander!
Emil

[1] https://patchwork.freedesktop.org/series/8855/
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 13/30] i965/miptree: Add an explicit format parameter to create_for_dri_image

2017-07-07 Thread Daniel Stone
Hi,

On 28 June 2017 at 19:09, Jason Ekstrand  wrote:
> On Wed, Jun 28, 2017 at 10:59 AM, Daniel Stone  wrote:
>> i965 tries pretty hard to allocate sRGB images in the pre-DRIImage,
>> DRI2 (as in the X11 protocol named 'DRI2') codepath, but this isn't
>> used by Wayland, GBM, or DRI3.
>
> Except that whether you get an sRGB renderbuffer or not is governed by GLX
> and EGL and not Wayland/DRI2/DRI3.  In one of them (I think it's ES), the
> default is to get an sRGB renderbuffer but either is possible with both
> independent of how the image comes in.  We *do* see it on DRI3 and Wayland
> which is why this patch exists in the first place.

Well, that's fairly depressing. So I guess SARGB8 is only used for
GLX_ARB_framebuffer_sRGB, and the rest is just magically transforming
(ostensibly) _UNORM Mesa formats into _SRGB?

intel_gles3_srgb_workaround() is ... quite a thing.

>> So no, not for pretty much any externally-visible images AFAICT. Even
>> if it were true for scanout, the client would need to tell KMS, so KMS
>> could send a HDMI infoframe telling the display.
>
> But scanout always does sRGB.  If you want real UNORM, then you'll have to
> add kernel API.

I'm kinda confused on this point; the colour transform matrix set up
by default is an identity mapping, rather than a degamma-to-linear
(ignoring the 16-235 vs. limited dance ...). In theory, if we're
sending sRGB, we should inform the sink via an AVI infoframe, but I
can't see anywhere we actually do that.

Anyway, I don't see this patch making the historical mistake any
better or worse, so let's just mentally file it away to bottom out one
day and move on.

Cheers,
Daniel
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


  1   2   >