Mesa (main): radv: fix the CS regalloc hang workaround on GFX6 and few GFX7 chips

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: main
Commit: d532da6e964e60811cbc03998cd4e2ee5f6991f5
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=d532da6e964e60811cbc03998cd4e2ee5f6991f5

Author: Samuel Pitoiset 
Date:   Fri Feb 25 08:44:07 2022 +0100

radv: fix the CS regalloc hang workaround on GFX6 and few GFX7 chips

RadeonSI uses a different terminology and info->blocks is actually the
number of threads, not the number of blocks (ie. info->grid).

Found by inspection.

Signed-off-by: Samuel Pitoiset 
Reviewed-by: Bas Nieuwenhuizen 
Part-of: 

---

 src/amd/vulkan/radv_cmd_buffer.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index c1795b5c662..8acfaf27e64 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -7378,8 +7378,10 @@ radv_dispatch(struct radv_cmd_buffer *cmd_buffer, const 
struct radv_dispatch_inf
 {
bool has_prefetch = 
cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7;
bool pipeline_is_dirty = pipeline != 
cmd_buffer->state.emitted_compute_pipeline;
+   struct radv_shader *compute_shader = pipeline->shaders[MESA_SHADER_COMPUTE];
+   unsigned *cs_block_size = compute_shader->info.cs.block_size;
bool cs_regalloc_hang = 
cmd_buffer->device->physical_device->rad_info.has_cs_regalloc_hang_bug &&
-   info->blocks[0] * info->blocks[1] * info->blocks[2] 
> 256;
+   cs_block_size[0] * cs_block_size[1] * 
cs_block_size[2] > 256;
 
if (cs_regalloc_hang)
   cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH |



Mesa (main): radv: rework the CS regalloc hang workaround

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: main
Commit: 7ad1eb4e8c3963f41a38b3ce826a1e33b552b94f
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=7ad1eb4e8c3963f41a38b3ce826a1e33b552b94f

Author: Samuel Pitoiset 
Date:   Fri Feb 25 08:54:27 2022 +0100

radv: rework the CS regalloc hang workaround

Move it to the pipeline creation to reduce computations in the hot path.

Signed-off-by: Samuel Pitoiset 
Reviewed-by: Bas Nieuwenhuizen 
Part-of: 

---

 src/amd/vulkan/radv_cmd_buffer.c | 8 ++--
 src/amd/vulkan/radv_pipeline.c   | 8 
 src/amd/vulkan/radv_private.h| 1 +
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 8acfaf27e64..e9eeb3cc3c4 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -7378,12 +7378,8 @@ radv_dispatch(struct radv_cmd_buffer *cmd_buffer, const 
struct radv_dispatch_inf
 {
bool has_prefetch = 
cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7;
bool pipeline_is_dirty = pipeline != 
cmd_buffer->state.emitted_compute_pipeline;
-   struct radv_shader *compute_shader = pipeline->shaders[MESA_SHADER_COMPUTE];
-   unsigned *cs_block_size = compute_shader->info.cs.block_size;
-   bool cs_regalloc_hang = 
cmd_buffer->device->physical_device->rad_info.has_cs_regalloc_hang_bug &&
-   cs_block_size[0] * cs_block_size[1] * 
cs_block_size[2] > 256;
 
-   if (cs_regalloc_hang)
+   if (pipeline->compute.cs_regalloc_hang_bug)
   cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
   RADV_CMD_FLAG_CS_PARTIAL_FLUSH;
 
@@ -7442,7 +7438,7 @@ radv_dispatch(struct radv_cmd_buffer *cmd_buffer, const 
struct radv_dispatch_inf
  : 
VK_PIPELINE_BIND_POINT_COMPUTE);
}
 
-   if (cs_regalloc_hang)
+   if (pipeline->compute.cs_regalloc_hang_bug)
   cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH;
 
radv_cmd_buffer_after_draw(cmd_buffer, RADV_CMD_FLAG_CS_PARTIAL_FLUSH);
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 8b59160276f..6cc5157f340 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -6504,6 +6504,14 @@ radv_compute_pipeline_create(VkDevice _device, 
VkPipelineCache _cache,
pipeline->push_constant_size = pipeline_layout->push_constant_size;
pipeline->dynamic_offset_count = pipeline_layout->dynamic_offset_count;
 
+   if (device->physical_device->rad_info.has_cs_regalloc_hang_bug) {
+  struct radv_shader *compute_shader = 
pipeline->shaders[MESA_SHADER_COMPUTE];
+  unsigned *cs_block_size = compute_shader->info.cs.block_size;
+
+  pipeline->compute.cs_regalloc_hang_bug =
+ cs_block_size[0] * cs_block_size[1] * cs_block_size[2] > 256;
+   }
+
radv_compute_generate_pm4(pipeline);
 
*pPipeline = radv_pipeline_to_handle(pipeline);
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index f950a82f815..877c99b8005 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -1909,6 +1909,7 @@ struct radv_pipeline {
  struct radv_pipeline_shader_stack_size *rt_stack_sizes;
  bool dynamic_stack_size;
  uint32_t group_count;
+ bool cs_regalloc_hang_bug;
   } compute;
   struct {
  unsigned stage_count;



Mesa (main): aco/ra: count constant moves in get_reg_create_vector()

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: main
Commit: 9181e8ceba3c9a1a6a4d060fa30ef7825cf9d404
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=9181e8ceba3c9a1a6a4d060fa30ef7825cf9d404

Author: Daniel Schürmann 
Date:   Fri Jun 18 10:59:58 2021 +0200

aco/ra: count constant moves in get_reg_create_vector()

Also implements a correct_pos_mask to keep track which
operands are already at the right target position.
To mitigate some regressions, call get_reg_impl() less often.

Totals from 1229 (0.91% of 134913) affected shaders: (GFX10.3)
VGPRs: 60216 -> 59848 (-0.61%)
CodeSize: 3716496 -> 3711268 (-0.14%); split: -0.19%, +0.05%
MaxWaves: 27952 -> 28004 (+0.19%)
Instrs: 685983 -> 685035 (-0.14%); split: -0.20%, +0.06%
Latency: 6727587 -> 6725340 (-0.03%); split: -0.06%, +0.02%
InvThroughput: 9289043 -> 9289866 (+0.01%); split: -0.02%, +0.03%
VClause: 17730 -> 17740 (+0.06%); split: -0.25%, +0.30%
Copies: 54352 -> 53420 (-1.71%); split: -2.46%, +0.75%
Branches: 12122 -> 12121 (-0.01%)

Reviewed-by: Rhys Perry 
Part-of: 

---

 src/amd/compiler/aco_register_allocation.cpp | 39 +---
 1 file changed, 24 insertions(+), 15 deletions(-)

diff --git a/src/amd/compiler/aco_register_allocation.cpp 
b/src/amd/compiler/aco_register_allocation.cpp
index 82af2951ade..3b723d9e485 100644
--- a/src/amd/compiler/aco_register_allocation.cpp
+++ b/src/amd/compiler/aco_register_allocation.cpp
@@ -1680,6 +1680,7 @@ get_reg_create_vector(ra_ctx& ctx, RegisterFile& 
reg_file, Temp temp,
PhysReg best_pos{0xFFF};
unsigned num_moves = 0xFF;
bool best_avoid = true;
+   uint32_t correct_pos_mask = 0;
 
/* test for each operand which definition placement causes the least 
shuffle instructions */
for (unsigned i = 0, offset = 0; i < instr->operands.size();
@@ -1744,13 +1745,14 @@ get_reg_create_vector(ra_ctx& ctx, RegisterFile& 
reg_file, Temp temp,
  continue;
 
   /* count operands in wrong positions */
+  uint32_t correct_pos_mask_new = 0;
   for (unsigned j = 0, offset2 = 0; j < instr->operands.size();
offset2 += instr->operands[j].bytes(), j++) {
- if (j == i || !instr->operands[j].isTemp() ||
- instr->operands[j].getTemp().type() != rc.type())
-continue;
- if (instr->operands[j].physReg().reg_b != reg_win.lo() * 4 + offset2)
-k += instr->operands[j].bytes();
+ Operand& op = instr->operands[j];
+ if (op.isTemp() && op.physReg().reg_b == reg_win.lo() * 4 + offset2)
+correct_pos_mask_new |= 1 << j;
+ else
+k += op.bytes();
   }
   bool aligned = rc == RegClass::v4 && reg_win.lo() % 4 == 0;
   if (k > num_moves || (!aligned && k == num_moves))
@@ -1759,18 +1761,28 @@ get_reg_create_vector(ra_ctx& ctx, RegisterFile& 
reg_file, Temp temp,
   best_pos = reg_win.lo();
   num_moves = k;
   best_avoid = avoid;
+  correct_pos_mask = correct_pos_mask_new;
}
 
-   if (num_moves >= bytes)
+   /* too many moves: try the generic get_reg() function */
+   if (num_moves >= 2 * bytes) {
   return get_reg(ctx, reg_file, temp, parallelcopies, instr);
+   } else if (num_moves > bytes) {
+  DefInfo info(ctx, instr, rc, -1);
+  std::pair res = get_reg_simple(ctx, reg_file, info);
+  if (res.second)
+ return res.first;
+   }
 
/* re-enable killed operands which are in the wrong position */
RegisterFile tmp_file(reg_file);
-   for (unsigned i = 0, offset = 0; i < instr->operands.size();
-offset += instr->operands[i].bytes(), i++) {
-  if (instr->operands[i].isTemp() && 
instr->operands[i].isFirstKillBeforeDef() &&
-  instr->operands[i].physReg().reg_b != best_pos.reg_b + offset)
- tmp_file.fill(instr->operands[i]);
+   for (Operand& op : instr->operands) {
+  if (op.isTemp() && op.isFirstKillBeforeDef())
+ tmp_file.fill(op);
+   }
+   for (unsigned i = 0; i < instr->operands.size(); i++) {
+  if ((correct_pos_mask >> i) & 1u && instr->operands[i].isKill())
+ tmp_file.clear(instr->operands[i]);
}
 
/* collect variables to be moved */
@@ -1782,7 +1794,7 @@ get_reg_create_vector(ra_ctx& ctx, RegisterFile& 
reg_file, Temp temp,
   if (!instr->operands[i].isTemp() || 
!instr->operands[i].isFirstKillBeforeDef() ||
   instr->operands[i].getTemp().type() != rc.type())
  continue;
-  bool correct_pos = instr->operands[i].physReg().reg_b == best_pos.reg_b 
+ offset;
+  bool correct_pos = !tmp_file.test(instr->operands[i].physReg(), 
instr->operands[i].bytes());
   /* GFX9+: move killed operands which aren't yet at the correct position
* Moving all killed operands generally leads to more register swaps.
* This is only done on GFX9+ because of the cheap v_swap instruction.
@@ -1790,9 +1802,6 @@ get_reg_create_vector(ra_ctx& ctx, RegisterFile& 
reg_file, Temp temp,
   if (ct

Mesa (main): aco/ra: refactor collect_vars() to return a sorted vector

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: main
Commit: 70aea6b41aeea6e9ff3341b07d27c5e4ede9ae02
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=70aea6b41aeea6e9ff3341b07d27c5e4ede9ae02

Author: Daniel Schürmann 
Date:   Fri Jun 18 17:51:15 2021 +0200

aco/ra: refactor collect_vars() to return a sorted vector

The vector of IDs is sorted with decreasing sizes,
and by increasing assigned registers.
This decouples register assingment from ssa IDs.

Totals from 12694 (9.41% of 134913) affected shaders: (GFX10.3)
VGPRs: 757864 -> 757848 (-0.00%); split: -0.00%, +0.00%
CodeSize: 72350540 -> 72348688 (-0.00%); split: -0.02%, +0.02%
MaxWaves: 237018 -> 237020 (+0.00%); split: +0.00%, -0.00%
Instrs: 13545494 -> 13544699 (-0.01%); split: -0.03%, +0.02%
Latency: 148539203 -> 148533292 (-0.00%); split: -0.01%, +0.00%
InvThroughput: 30319086 -> 30320382 (+0.00%); split: -0.01%, +0.01%
VClause: 326875 -> 327028 (+0.05%); split: -0.05%, +0.09%
SClause: 479833 -> 479837 (+0.00%); split: -0.00%, +0.00%
Copies: 862152 -> 860914 (-0.14%); split: -0.43%, +0.28%
Branches: 317775 -> 31 (+0.00%)

Reviewed-by: Rhys Perry 
Part-of: 

---

 src/amd/compiler/aco_register_allocation.cpp | 46 ++--
 src/amd/compiler/tests/test_regalloc.cpp |  4 +--
 2 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/src/amd/compiler/aco_register_allocation.cpp 
b/src/amd/compiler/aco_register_allocation.cpp
index 7b8855966cb..a41c61f1ff4 100644
--- a/src/amd/compiler/aco_register_allocation.cpp
+++ b/src/amd/compiler/aco_register_allocation.cpp
@@ -1006,19 +1006,28 @@ find_vars(ra_ctx& ctx, RegisterFile& reg_file, const 
PhysRegInterval reg_interva
return vars;
 }
 
-/* collect variables from a register area and clear reg_file */
-std::set>
+/* collect variables from a register area and clear reg_file
+ * variables are sorted in decreasing size and
+ * increasing assigned register
+ */
+std::vector
 collect_vars(ra_ctx& ctx, RegisterFile& reg_file, const PhysRegInterval 
reg_interval)
 {
std::vector ids = find_vars(ctx, reg_file, reg_interval);
-   std::set> vars;
+   std::sort(ids.begin(), ids.end(),
+ [&](unsigned a, unsigned b)
+ {
+assignment& var_a = ctx.assignments[a];
+assignment& var_b = ctx.assignments[b];
+return var_a.rc.bytes() > var_b.rc.bytes() ||
+   (var_a.rc.bytes() == var_b.rc.bytes() && var_a.reg < 
var_b.reg);
+ });
 
for (unsigned id : ids) {
   assignment& var = ctx.assignments[id];
   reg_file.clear(var.reg, var.rc);
-  vars.emplace(var.rc.bytes(), id);
}
-   return vars;
+   return ids;
 }
 
 std::pair
@@ -1073,17 +1082,11 @@ get_reg_for_create_vector_copy(ra_ctx& ctx, 
RegisterFile& reg_file,
 bool
 get_regs_for_copies(ra_ctx& ctx, RegisterFile& reg_file,
 std::vector>& 
parallelcopies,
-const std::set>& vars,
-const PhysRegInterval bounds, aco_ptr& instr,
-const PhysRegInterval def_reg)
+const std::vector& vars, const PhysRegInterval 
bounds,
+aco_ptr& instr, const PhysRegInterval def_reg)
 {
-   /* variables are sorted from small sized to large */
-   /* NOTE: variables are also sorted by ID. this only affects a very small 
number of shaders
-* slightly though. */
-   // TODO: sort by register instead of id
-   for (std::set>::const_reverse_iterator it = 
vars.rbegin();
-it != vars.rend(); ++it) {
-  unsigned id = it->second;
+   /* Variables are sorted from large to small and with increasing assigned 
register */
+   for (unsigned id : vars) {
   assignment& var = ctx.assignments[id];
   DefInfo info = DefInfo(ctx, ctx.pseudo_dummy, var.rc, -1);
   uint32_t size = info.size;
@@ -1203,7 +1206,7 @@ get_regs_for_copies(ra_ctx& ctx, RegisterFile& reg_file,
   PhysRegInterval reg_win{best_pos, size};
 
   /* collect variables and block reg file */
-  std::set> new_vars = collect_vars(ctx, 
reg_file, reg_win);
+  std::vector new_vars = collect_vars(ctx, reg_file, reg_win);
 
   /* mark the area as blocked */
   reg_file.block(reg_win.lo(), var.rc);
@@ -1351,7 +1354,7 @@ get_reg_impl(ra_ctx& ctx, RegisterFile& reg_file,
   }
}
 
-   std::set> vars = collect_vars(ctx, tmp_file, 
best_win);
+   std::vector vars = collect_vars(ctx, tmp_file, best_win);
 
/* re-enable killed operands */
if (!is_phi(instr) && instr->opcode != aco_opcode::p_create_vector) {
@@ -1820,8 +1823,7 @@ get_reg_create_vector(ra_ctx& ctx, RegisterFile& 
reg_file, Temp temp,
}
 
/* collect variables to be moved */
-   std::set> vars =
-  collect_vars(ctx, tmp_file, PhysRegInterval{best_pos, size});
+   std::vector vars = collect_vars(ctx, tmp_file, 
PhysRegInterval{best_pos, size});
 
bool success = false;
std::vector> pc;
@@ -1952,

Mesa (main): aco/ra: refactor find_vars() to return a vector

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: main
Commit: 61c36b6dc0a275057df160a20e709acbae9c0450
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=61c36b6dc0a275057df160a20e709acbae9c0450

Author: Daniel Schürmann 
Date:   Fri Jun 18 17:45:16 2021 +0200

aco/ra: refactor find_vars() to return a vector

instead of std::set<>

No fossil-db changes.

Reviewed-by: Rhys Perry 
Part-of: 

---

 src/amd/compiler/aco_register_allocation.cpp | 42 +++-
 1 file changed, 22 insertions(+), 20 deletions(-)

diff --git a/src/amd/compiler/aco_register_allocation.cpp 
b/src/amd/compiler/aco_register_allocation.cpp
index 69bcf09a2ce..7b8855966cb 100644
--- a/src/amd/compiler/aco_register_allocation.cpp
+++ b/src/amd/compiler/aco_register_allocation.cpp
@@ -366,8 +366,8 @@ private:
}
 };
 
-std::set> find_vars(ra_ctx& ctx, RegisterFile& 
reg_file,
-  const PhysRegInterval 
reg_interval);
+std::vector find_vars(ra_ctx& ctx, RegisterFile& reg_file,
+const PhysRegInterval reg_interval);
 
 /* helper function for debugging */
 UNUSED void
@@ -453,9 +453,10 @@ print_regs(ra_ctx& ctx, bool vgprs, RegisterFile& reg_file)
/* print assignments ordered by registers */
std::map>
   regs_to_vars; /* maps to byte size and temp id */
-   for (const auto& size_id : find_vars(ctx, reg_file, regs)) {
-  auto reg = ctx.assignments[size_id.second].reg;
-  ASSERTED auto inserted = regs_to_vars.emplace(reg, size_id);
+   for (unsigned id : find_vars(ctx, reg_file, regs)) {
+  const assignment& var = ctx.assignments[id];
+  PhysReg reg = var.reg;
+  ASSERTED auto inserted = regs_to_vars.emplace(reg, 
std::make_pair(var.rc.bytes(), id));
   assert(inserted.second);
}
 
@@ -982,26 +983,24 @@ get_reg_simple(ra_ctx& ctx, RegisterFile& reg_file, 
DefInfo info)
return {{}, false};
 }
 
-/* collect variables from a register area and clear reg_file */
-std::set>
+/* collect variables from a register area */
+std::vector
 find_vars(ra_ctx& ctx, RegisterFile& reg_file, const PhysRegInterval 
reg_interval)
 {
-   std::set> vars;
+   std::vector vars;
for (PhysReg j : reg_interval) {
   if (reg_file.is_blocked(j))
  continue;
   if (reg_file[j] == 0xF000) {
  for (unsigned k = 0; k < 4; k++) {
 unsigned id = reg_file.subdword_regs[j][k];
-if (id) {
-   assignment& var = ctx.assignments[id];
-   vars.emplace(var.rc.bytes(), id);
-}
+if (id && (vars.empty() || id != vars.back()))
+   vars.emplace_back(id);
  }
-  } else if (reg_file[j] != 0) {
+  } else {
  unsigned id = reg_file[j];
- assignment& var = ctx.assignments[id];
- vars.emplace(var.rc.bytes(), id);
+ if (id && (vars.empty() || id != vars.back()))
+vars.emplace_back(id);
   }
}
return vars;
@@ -1011,10 +1010,13 @@ find_vars(ra_ctx& ctx, RegisterFile& reg_file, const 
PhysRegInterval reg_interva
 std::set>
 collect_vars(ra_ctx& ctx, RegisterFile& reg_file, const PhysRegInterval 
reg_interval)
 {
-   std::set> vars = find_vars(ctx, reg_file, 
reg_interval);
-   for (std::pair size_id : vars) {
-  assignment& var = ctx.assignments[size_id.second];
+   std::vector ids = find_vars(ctx, reg_file, reg_interval);
+   std::set> vars;
+
+   for (unsigned id : ids) {
+  assignment& var = ctx.assignments[id];
   reg_file.clear(var.reg, var.rc);
+  vars.emplace(var.rc.bytes(), id);
}
return vars;
 }
@@ -1668,8 +1670,8 @@ get_reg(ra_ctx& ctx, RegisterFile& reg_file, Temp temp,
 
   /* reallocate passthrough variables and non-killed operands */
   std::vector vars;
-  for (const std::pair& var : find_vars(ctx, reg_file, 
regs))
- vars.emplace_back(var.second, ctx.assignments[var.second].rc);
+  for (unsigned id : find_vars(ctx, reg_file, regs))
+ vars.emplace_back(id, ctx.assignments[id].rc);
   vars.emplace_back(0x, RegClass(info.rc.type(), MAX2(def_size, 
killed_op_size)));
 
   PhysReg space = compact_relocate_vars(ctx, vars, parallelcopies, 
regs.lo());



Mesa (main): aco/ra: special-case get_reg_for_create_vector_copy()

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: main
Commit: 9476986e6f6fe49ef8bc511f4ebeca4232263888
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=9476986e6f6fe49ef8bc511f4ebeca4232263888

Author: Daniel Schürmann 
Date:   Fri Jun 18 16:43:18 2021 +0200

aco/ra: special-case get_reg_for_create_vector_copy()

This function implements separate handling for
p_create_vector during get_regs_for_copies().
This simplifies some code and lets more precisely select
swap instructions if possible.

Totals from 876 (0.65% of 134913) affected shaders: (GFX10.3)
VGPRs: 53312 -> 53336 (+0.05%)
CodeSize: 3792936 -> 3788160 (-0.13%); split: -0.15%, +0.03%
MaxWaves: 16084 -> 16078 (-0.04%)
Instrs: 707449 -> 706385 (-0.15%); split: -0.19%, +0.04%
Latency: 6288293 -> 6286677 (-0.03%); split: -0.03%, +0.01%
InvThroughput: 4264450 -> 4263671 (-0.02%); split: -0.02%, +0.00%
VClause: 18655 -> 18679 (+0.13%); split: -0.20%, +0.33%
Copies: 55397 -> 54353 (-1.88%); split: -2.45%, +0.57%
Branches: 12426 -> 12415 (-0.09%)

Reviewed-by: Rhys Perry 
Part-of: 

---

 src/amd/compiler/aco_register_allocation.cpp | 131 +++
 1 file changed, 74 insertions(+), 57 deletions(-)

diff --git a/src/amd/compiler/aco_register_allocation.cpp 
b/src/amd/compiler/aco_register_allocation.cpp
index 3b723d9e485..69bcf09a2ce 100644
--- a/src/amd/compiler/aco_register_allocation.cpp
+++ b/src/amd/compiler/aco_register_allocation.cpp
@@ -1019,6 +1019,55 @@ collect_vars(ra_ctx& ctx, RegisterFile& reg_file, const 
PhysRegInterval reg_inte
return vars;
 }
 
+std::pair
+get_reg_for_create_vector_copy(ra_ctx& ctx, RegisterFile& reg_file,
+   std::vector>& 
parallelcopies,
+   aco_ptr& instr, const 
PhysRegInterval def_reg,
+   DefInfo info, unsigned id)
+{
+   PhysReg reg = def_reg.lo();
+   /* dead operand: return position in vector */
+   for (unsigned i = 0; i < instr->operands.size(); i++) {
+  if (instr->operands[i].isTemp() && instr->operands[i].tempId() == id &&
+  instr->operands[i].isKillBeforeDef()) {
+ assert(!reg_file.test(reg, info.rc.bytes()));
+ return {reg, !info.rc.is_subdword() || (reg.byte() % info.stride == 
0)};
+  }
+  reg.reg_b += instr->operands[i].bytes();
+   }
+
+   if (ctx.program->chip_class <= GFX8)
+  return {PhysReg(), false};
+
+   /* check if the previous position was in vector */
+   assignment& var = ctx.assignments[id];
+   if (def_reg.contains(PhysRegInterval{var.reg, info.size})) {
+  reg = def_reg.lo();
+  /* try to use the previous register of the operand */
+  for (unsigned i = 0; i < instr->operands.size(); i++) {
+ if (reg != var.reg) {
+reg.reg_b += instr->operands[i].bytes();
+continue;
+ }
+
+ /* check if we can swap positions */
+ if (instr->operands[i].isTemp() && instr->operands[i].isFirstKill() &&
+ instr->operands[i].regClass() == info.rc) {
+assignment& op = ctx.assignments[instr->operands[i].tempId()];
+/* if everything matches, create parallelcopy for the killed 
operand */
+if (!intersects(def_reg, PhysRegInterval{op.reg, op.rc.size()}) &&
+reg_file.get_id(op.reg) == instr->operands[i].tempId()) {
+   Definition pc_def = Definition(reg, info.rc);
+   parallelcopies.emplace_back(instr->operands[i], pc_def);
+   return {op.reg, true};
+}
+ }
+ return {PhysReg(), false};
+  }
+   }
+   return {PhysReg(), false};
+}
+
 bool
 get_regs_for_copies(ra_ctx& ctx, RegisterFile& reg_file,
 std::vector>& 
parallelcopies,
@@ -1029,6 +1078,7 @@ get_regs_for_copies(ra_ctx& ctx, RegisterFile& reg_file,
/* variables are sorted from small sized to large */
/* NOTE: variables are also sorted by ID. this only affects a very small 
number of shaders
 * slightly though. */
+   // TODO: sort by register instead of id
for (std::set>::const_reverse_iterator it = 
vars.rbegin();
 it != vars.rend(); ++it) {
   unsigned id = it->second;
@@ -1039,34 +1089,24 @@ get_regs_for_copies(ra_ctx& ctx, RegisterFile& reg_file,
   /* check if this is a dead operand, then we can re-use the space from 
the definition
* also use the correct stride for sub-dword operands */
   bool is_dead_operand = false;
-  for (unsigned i = 0; !is_phi(instr) && i < instr->operands.size(); i++) {
- if (instr->operands[i].isTemp() && instr->operands[i].tempId() == id) 
{
-if (instr->operands[i].isKillBeforeDef())
-   is_dead_operand = true;
-info = DefInfo(ctx, instr, var.rc, i);
-break;
- }
-  }
-
-  std::pair res;
-  if (is_dead_operand) {
- if (instr->opcode == aco_opcode::p_create_vector) {
-PhysReg reg(def_r

Mesa (main): amd: add PKT3_LOAD_SH_REG_INDEX

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: main
Commit: 53ccfbb99672a822440fc5d987310d4c56db2090
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=53ccfbb99672a822440fc5d987310d4c56db2090

Author: Samuel Pitoiset 
Date:   Wed Feb 16 19:02:51 2022 +0100

amd: add PKT3_LOAD_SH_REG_INDEX

It seems only available on GFX8+.

Signed-off-by: Samuel Pitoiset 
Reviewed-by: Bas Nieuwenhuizen 
Part-of: 

---

 src/amd/common/sid.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/amd/common/sid.h b/src/amd/common/sid.h
index 9924a2b2aa0..919e3c7b6f1 100644
--- a/src/amd/common/sid.h
+++ b/src/amd/common/sid.h
@@ -220,6 +220,7 @@
 #define PKT3_LOAD_UCONFIG_REG   0x5E /* GFX7+ */
 #define PKT3_LOAD_SH_REG0x5F
 #define PKT3_LOAD_CONTEXT_REG   0x61
+#define PKT3_LOAD_SH_REG_INDEX  0x63 /* GFX8+ */
 #define PKT3_SET_CONFIG_REG 0x68
 #define PKT3_SET_CONTEXT_REG0x69
 #define PKT3_SET_SH_REG 0x76



Mesa (main): radv: fix indirect dispatches on the compute queue on GFX10.3+

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: main
Commit: 5f3d3be24a7d9fa79020877c259c0740cfba12d4
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=5f3d3be24a7d9fa79020877c259c0740cfba12d4

Author: Samuel Pitoiset 
Date:   Wed Feb 16 19:03:48 2022 +0100

radv: fix indirect dispatches on the compute queue on GFX10.3+

For weird reasons, the COPY_DATA packet doesn't seem to copy anything
while on the compute queue. Instead, use PKT3_LOAD_SH_REG_INDEX which
seems to work as expected.

Note that LOAD_SH_REG_INDEX on the compute queue is only supported by
the CP on GFX10.3, so we need to implement a different solution (load
from the indirect BO in the shader) for older generations.

This should fix the Control RT GPU hang.

Signed-off-by: Samuel Pitoiset 
Reviewed-by: Bas Nieuwenhuizen 
Part-of: 

---

 src/amd/vulkan/radv_cmd_buffer.c | 26 ++
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index e9eeb3cc3c4..534f8077600 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -7266,14 +7266,24 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer 
*cmd_buffer, struct radv_pipel
   radv_cs_add_buffer(ws, cs, info->indirect);
 
   if (loc->sgpr_idx != -1) {
- for (unsigned i = 0; i < 3; ++i) {
-radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
-radeon_emit(cs,
-COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | 
COPY_DATA_DST_SEL(COPY_DATA_REG));
-radeon_emit(cs, (info->va + 4 * i));
-radeon_emit(cs, (info->va + 4 * i) >> 32);
-radeon_emit(cs, ((R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 
4) >> 2) + i);
-radeon_emit(cs, 0);
+ if (cmd_buffer->device->physical_device->rad_info.chip_class >= 
GFX10_3) {
+unsigned reg = R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4;
+
+radeon_emit(cs, PKT3(PKT3_LOAD_SH_REG_INDEX, 3, 0));
+radeon_emit(cs, info->va);
+radeon_emit(cs, info->va >> 32);
+radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2);
+radeon_emit(cs, 3);
+ } else {
+for (unsigned i = 0; i < 3; ++i) {
+   radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+   radeon_emit(cs,
+   COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | 
COPY_DATA_DST_SEL(COPY_DATA_REG));
+   radeon_emit(cs, (info->va + 4 * i));
+   radeon_emit(cs, (info->va + 4 * i) >> 32);
+   radeon_emit(cs, ((R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx 
* 4) >> 2) + i);
+   radeon_emit(cs, 0);
+}
  }
   }
 



Mesa (main): Revert "llvmpipe: allow vertex processing and fragment processing in parallel"

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: main
Commit: 1b354ab9137d254f653873611931fddc832984b1
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=1b354ab9137d254f653873611931fddc832984b1

Author: Gert Wollny 
Date:   Thu Mar 10 14:17:55 2022 +0100

Revert "llvmpipe: allow vertex processing and fragment processing in parallel"

This reverts commit ec8104c6b227421b3a21e9c0652e3050066bb169.
  llvmpipe: allow vertex processing and fragment processing in parallel

The commit breaks the the virglrenderer vtest environment used in the
virglrednerer CI and running wayland in virtualized environments.

Related: #6130
Related: #6110

Signed-off-by: Gert Wollny 
Acked-by: Dave Airlie 
Acked-by: Tomeu Vizoso 
Part-of: 

---

 src/gallium/drivers/llvmpipe/lp_rast.c  |  2 ++
 src/gallium/drivers/llvmpipe/lp_scene.c |  2 +-
 src/gallium/drivers/llvmpipe/lp_setup.c | 18 ++
 src/gallium/drivers/llvmpipe/lp_setup_context.h |  1 +
 4 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c 
b/src/gallium/drivers/llvmpipe/lp_rast.c
index e27d78a3432..f67fbda6b01 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -81,6 +81,8 @@ lp_rast_begin( struct lp_rasterizer *rast,
 static void
 lp_rast_end( struct lp_rasterizer *rast )
 {
+   lp_scene_end_rasterization( rast->curr_scene );
+
rast->curr_scene = NULL;
 }
 
diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c 
b/src/gallium/drivers/llvmpipe/lp_scene.c
index dbe0cc3873c..3d109ff52b6 100644
--- a/src/gallium/drivers/llvmpipe/lp_scene.c
+++ b/src/gallium/drivers/llvmpipe/lp_scene.c
@@ -100,7 +100,7 @@ lp_scene_create( struct lp_setup_context *setup )
 void
 lp_scene_destroy(struct lp_scene *scene)
 {
-   lp_scene_end_rasterization(scene);
+   lp_fence_reference(&scene->fence, NULL);
mtx_destroy(&scene->mutex);
assert(scene->data.head == &scene->data.first);
slab_free_st(&scene->setup->scene_slab, scene);
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c 
b/src/gallium/drivers/llvmpipe/lp_setup.c
index 4c9f6da7821..614a058f968 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -73,7 +73,6 @@ lp_setup_wait_empty_scene(struct lp_setup_context *setup)
   debug_printf("%s: wait for scene %d\n",
__FUNCTION__, setup->scenes[0]->fence->id);
   lp_fence_wait(setup->scenes[0]->fence);
-  lp_scene_end_rasterization(setup->scenes[0]);
}
return 0;
 }
@@ -87,10 +86,8 @@ lp_setup_get_empty_scene(struct lp_setup_context *setup)
/* try and find a scene that isn't being used */
for (i = 0; i < setup->num_active_scenes; i++) {
   if (setup->scenes[i]->fence) {
- if (lp_fence_signalled(setup->scenes[i]->fence)) {
-lp_scene_end_rasterization(setup->scenes[i]);
+ if (lp_fence_signalled(setup->scene->fence))
 break;
- }
   } else
  break;
}
@@ -213,9 +210,22 @@ lp_setup_rasterize_scene( struct lp_setup_context *setup )
   setup->last_fence->issued = TRUE;
 
mtx_lock(&screen->rast_mutex);
+
+   /* FIXME: We enqueue the scene then wait on the rasterizer to finish.
+* This means we never actually run any vertex stuff in parallel to
+* rasterization (not in the same context at least) which is what the
+* multiple scenes per setup is about - when we get a new empty scene
+* any old one is already empty again because we waited here for
+* raster tasks to be finished. Ideally, we shouldn't need to wait here
+* and rely on fences elsewhere when waiting is necessary.
+* Certainly, lp_scene_end_rasterization() would need to be deferred too
+* and there's probably other bits why this doesn't actually work.
+*/
lp_rast_queue_scene(screen->rast, scene);
+   lp_rast_finish(screen->rast);
mtx_unlock(&screen->rast_mutex);
 
+   lp_scene_end_rasterization(setup->scene);
lp_setup_reset( setup );
 
LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__);
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h 
b/src/gallium/drivers/llvmpipe/lp_setup_context.h
index 420b78e2f52..92dfeb890d5 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h
@@ -56,6 +56,7 @@ struct lp_setup_variant;
 
 
 /** Max number of scenes */
+/* XXX: make multiple scenes per context work, see lp_setup_rasterize_scene */
 #define INITIAL_SCENES 4
 #define MAX_SCENES 64
 



Mesa (main): radv: stop waiting for DMA to be idle for all transfer operations

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: main
Commit: d7514c5f0408a3173c5ed10452dd33f7b6220eda
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=d7514c5f0408a3173c5ed10452dd33f7b6220eda

Author: Samuel Pitoiset 
Date:   Mon Feb 28 15:50:34 2022 +0100

radv: stop waiting for DMA to be idle for all transfer operations

Only copy operations actually use CP DMA.

Signed-off-by: Samuel Pitoiset 
Reviewed-by: Bas Nieuwenhuizen 
Part-of: 

---

 src/amd/vulkan/radv_cmd_buffer.c | 13 +++--
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 534f8077600..3fdd75ea14e 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -8446,18 +8446,11 @@ radv_barrier(struct radv_cmd_buffer *cmd_buffer, const 
VkDependencyInfoKHR *dep_
  &dep_info->pImageMemoryBarriers[i].subresourceRange, sample_locs_info 
? &sample_locations : NULL);
}
 
-   /* Make sure CP DMA is idle because the driver might have performed a
-* DMA operation for copying or filling buffers/images.
+   /* Make sure CP DMA is idle because the driver might have performed a DMA 
operation for copying a
+* buffer (or a MSAA image using FMASK) or updated a buffer which is a 
transfer operation.
 */
if (src_stage_mask & (VK_PIPELINE_STAGE_2_COPY_BIT_KHR |
- VK_PIPELINE_STAGE_2_RESOLVE_BIT_KHR |
- VK_PIPELINE_STAGE_2_BLIT_BIT_KHR |
- VK_PIPELINE_STAGE_2_CLEAR_BIT_KHR)) {
-  /* Be conservative for now. */
-  src_stage_mask |= VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR;
-   }
-
-   if (src_stage_mask & (VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR |
+ VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR |
  VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT_KHR))
   si_cp_dma_wait_for_idle(cmd_buffer);
 



Mesa (main): intel/compiler: Fix non-trivial designated initializer

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: main
Commit: e5f3689cfff9e4eb7166e7f354d754354c11e7be
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=e5f3689cfff9e4eb7166e7f354d754354c11e7be

Author: Ernst Sjöstrand 
Date:   Sat Mar 12 17:24:11 2022 +0100

intel/compiler: Fix non-trivial designated initializer

Not supported by GCC 7.

src/compiler/nir/nir_builder_opcodes.h:14156:118:
sorry, unimplemented: non-trivial designated initializers not supported
src/intel/compiler/brw_mesh.cpp:515:7:
note: in expansion of macro ‘nir_store_per_primitive_output’

Reviewed-by: Marcin Ślusarz 
Fixes: bc4f8c073a2 ("intel/compiler: inject MUE initialization")
Signed-off-by: Ernst Sjöstrand 
Part-of: 

---

 src/intel/compiler/brw_mesh.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/intel/compiler/brw_mesh.cpp b/src/intel/compiler/brw_mesh.cpp
index f31c83c0ce1..190c60e01e6 100644
--- a/src/intel/compiler/brw_mesh.cpp
+++ b/src/intel/compiler/brw_mesh.cpp
@@ -515,6 +515,7 @@ brw_nir_initialize_mue(nir_shader *nir,
   nir_store_per_primitive_output(&b, zerovec, prim, dw_off,
  .base = (int)map->per_primitive_start_dw,
  .write_mask = WRITEMASK_XYZW,
+ .component = 0,
  .src_type = nir_type_uint32);
}
 
@@ -535,6 +536,7 @@ brw_nir_initialize_mue(nir_shader *nir,
  nir_store_per_primitive_output(&b, zerovec, prim, dw_off,
 .base = 
(int)map->per_primitive_start_dw,
 .write_mask = WRITEMASK_XYZW,
+.component = 0,
 .src_type = nir_type_uint32);
   }
   nir_pop_if(&b, if_stmt);



Mesa (main): radv: update inputs_read when lowering the view index

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: main
Commit: 42f84a588633a11e991bb2856468145df30771ac
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=42f84a588633a11e991bb2856468145df30771ac

Author: Samuel Pitoiset 
Date:   Fri Mar 11 09:10:41 2022 +0100

radv: update inputs_read when lowering the view index

Otherwise inputs_read doesn't contain the information. This shouldn't
fix anything in practice because radv_shader_info gathers this from
the variable.

Signed-off-by: Samuel Pitoiset 
Reviewed-by: Bas Nieuwenhuizen 
Part-of: 

---

 src/amd/vulkan/radv_shader.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 7611c3acbe3..66929492461 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -935,6 +935,9 @@ lower_view_index(nir_shader *nir)
  nir_ssa_def *def = nir_load_var(&b, layer);
  nir_ssa_def_rewrite_uses(&load->dest.ssa, def);
 
+ /* Update inputs_read to reflect that the pass added a new input. */
+ nir->info.inputs_read |= VARYING_BIT_LAYER;
+
  nir_instr_remove(instr);
  progress = true;
   }



Mesa (main): radv: fix compatibility with VK_IMAGE_CREATE_EXTENDED_USAGE_BIT

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: main
Commit: 0eaf9dbce302fe76b0ea2f0d22520526512eae5c
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=0eaf9dbce302fe76b0ea2f0d22520526512eae5c

Author: Samuel Pitoiset 
Date:   Thu Feb 17 16:21:47 2022 +0100

radv: fix compatibility with VK_IMAGE_CREATE_EXTENDED_USAGE_BIT

Some formats can be accepted if a compatible format is also supported
and VK_IMAGE_CREATE_EXTENDED_USAGE_BIT used.

Fixes new CTS dEQP-VK.image.extended_usage_bit_compatibility.*.

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/6046
Signed-off-by: Samuel Pitoiset 
Reviewed-by: Bas Nieuwenhuizen 
Part-of: 

---

 src/amd/vulkan/radv_formats.c | 24 +---
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/src/amd/vulkan/radv_formats.c b/src/amd/vulkan/radv_formats.c
index be62ef1d001..84a07f2e7d2 100644
--- a/src/amd/vulkan/radv_formats.c
+++ b/src/amd/vulkan/radv_formats.c
@@ -1563,43 +1563,53 @@ radv_get_image_format_properties(struct 
radv_physical_device *physical_device,
   goto unsupported;
}
 
-   if (info->usage & VK_IMAGE_USAGE_SAMPLED_BIT) {
+   /* From the Vulkan 1.3.206 spec:
+*
+* "VK_IMAGE_CREATE_EXTENDED_USAGE_BIT specifies that the image can be 
created with usage flags
+* that are not supported for the format the image is created with but are 
supported for at least
+* one format a VkImageView created from the image can have."
+*/
+   VkImageUsageFlags image_usage = info->usage;
+   if (info->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT)
+  image_usage = 0;
+
+   if (image_usage & VK_IMAGE_USAGE_SAMPLED_BIT) {
   if (!(format_feature_flags & VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT_KHR)) 
{
  goto unsupported;
   }
}
 
-   if (info->usage & VK_IMAGE_USAGE_STORAGE_BIT) {
+   if (image_usage & VK_IMAGE_USAGE_STORAGE_BIT) {
   if (!(format_feature_flags & VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT_KHR)) 
{
  goto unsupported;
   }
}
 
-   if (info->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) {
+   if (image_usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) {
   if (!(format_feature_flags & 
VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BIT_KHR)) {
  goto unsupported;
   }
}
 
-   if (info->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
+   if (image_usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
   if (!(format_feature_flags & 
VK_FORMAT_FEATURE_2_DEPTH_STENCIL_ATTACHMENT_BIT_KHR)) {
  goto unsupported;
   }
}
 
-   if (info->usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) {
+   if (image_usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) {
   if (!(format_feature_flags & VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT_KHR)) {
  goto unsupported;
   }
}
 
-   if (info->usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) {
+   if (image_usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) {
   if (!(format_feature_flags & VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT_KHR)) {
  goto unsupported;
   }
}
 
-   if (info->usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) {
+   if (image_usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) {
   if (!(format_feature_flags & 
(VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BIT_KHR |
 
VK_FORMAT_FEATURE_2_DEPTH_STENCIL_ATTACHMENT_BIT_KHR))) {
  goto unsupported;



Mesa (main): radv: stop zeroing radv_sample_locations_state in barriers

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: main
Commit: b003a101ee36edf72c41341dd09bea3d6aa40ea8
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=b003a101ee36edf72c41341dd09bea3d6aa40ea8

Author: Samuel Pitoiset 
Date:   Mon Feb 28 16:00:43 2022 +0100

radv: stop zeroing radv_sample_locations_state in barriers

This is useless because all fields should be correctly filled if the
pNext struct is found.

Signed-off-by: Samuel Pitoiset 
Reviewed-by: Bas Nieuwenhuizen 
Part-of: 

---

 src/amd/vulkan/radv_cmd_buffer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 8ead01980b4..f970ae81ca9 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -8403,7 +8403,7 @@ radv_barrier(struct radv_cmd_buffer *cmd_buffer, const 
VkDependencyInfoKHR *dep_
 
   const struct VkSampleLocationsInfoEXT *sample_locs_info =
  vk_find_struct_const(dep_info->pImageMemoryBarriers[i].pNext, 
SAMPLE_LOCATIONS_INFO_EXT);
-  struct radv_sample_locations_state sample_locations = {0};
+  struct radv_sample_locations_state sample_locations;
 
   if (sample_locs_info) {
  assert(image->flags & 
VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT);



Mesa (main): radv: remove unnecessary check in FreeCommandBuffers()

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: main
Commit: c6d776f092cd0bd6189796bb61eb6c41824e0ce0
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=c6d776f092cd0bd6189796bb61eb6c41824e0ce0

Author: Samuel Pitoiset 
Date:   Wed Feb 23 19:49:54 2022 +0100

radv: remove unnecessary check in FreeCommandBuffers()

cmd_buffer->pool should never be NULL. Even if AllocateCommandBuffers()
fails, the successfully created cmdbuffers would have it set correctly.

>From the Vulkan spec:

"VUID-vkFreeCommandBuffers-pCommandBuffers-parent
 Each element of pCommandBuffers that is a valid handle must have
 been created, allocated, or retrieved from commandPool."

Signed-off-by: Samuel Pitoiset 
Reviewed-by: Bas Nieuwenhuizen 
Part-of: 

---

 src/amd/vulkan/radv_cmd_buffer.c | 15 ---
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index b7f9a00d667..1ba594c9192 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -4435,16 +4435,17 @@ VKAPI_ATTR void VKAPI_CALL
 radv_FreeCommandBuffers(VkDevice device, VkCommandPool commandPool, uint32_t 
commandBufferCount,
 const VkCommandBuffer *pCommandBuffers)
 {
+   RADV_FROM_HANDLE(radv_cmd_pool, pool, commandPool);
+
for (uint32_t i = 0; i < commandBufferCount; i++) {
   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, pCommandBuffers[i]);
 
-  if (cmd_buffer) {
- if (cmd_buffer->pool) {
-list_del(&cmd_buffer->pool_link);
-list_addtail(&cmd_buffer->pool_link, 
&cmd_buffer->pool->free_cmd_buffers);
- } else
-radv_destroy_cmd_buffer(cmd_buffer);
-  }
+  if (!cmd_buffer)
+ continue;
+  assert(cmd_buffer->pool == pool);
+
+  list_del(&cmd_buffer->pool_link);
+  list_addtail(&cmd_buffer->pool_link, &pool->free_cmd_buffers);
}
 }
 



Mesa (main): radv: move waiting for events to CmdWaitEvents2KHR()

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: main
Commit: 612a12a42c21dde1a9d93a977e82b501cd4cefce
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=612a12a42c21dde1a9d93a977e82b501cd4cefce

Author: Samuel Pitoiset 
Date:   Mon Feb 28 15:38:19 2022 +0100

radv: move waiting for events to CmdWaitEvents2KHR()

CmdPipelineBarrier doesn't have events.

Signed-off-by: Samuel Pitoiset 
Reviewed-by: Bas Nieuwenhuizen 
Part-of: 

---

 src/amd/vulkan/radv_cmd_buffer.c | 48 ++--
 1 file changed, 16 insertions(+), 32 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 1ba594c9192..8ead01980b4 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -8340,17 +8340,10 @@ radv_handle_image_transition(struct radv_cmd_buffer 
*cmd_buffer, struct radv_ima
}
 }
 
-struct radv_barrier_info {
-   enum rgp_barrier_reason reason;
-   uint32_t eventCount;
-   const VkEvent *pEvents;
-};
-
 static void
 radv_barrier(struct radv_cmd_buffer *cmd_buffer, const VkDependencyInfoKHR 
*dep_info,
- const struct radv_barrier_info *info)
+ enum rgp_barrier_reason reason)
 {
-   struct radeon_cmdbuf *cs = cmd_buffer->cs;
enum radv_cmd_flush_bits src_flush_bits = 0;
enum radv_cmd_flush_bits dst_flush_bits = 0;
VkPipelineStageFlags2KHR src_stage_mask = 0;
@@ -8359,19 +8352,7 @@ radv_barrier(struct radv_cmd_buffer *cmd_buffer, const 
VkDependencyInfoKHR *dep_
if (cmd_buffer->state.subpass)
   radv_mark_noncoherent_rb(cmd_buffer);
 
-   radv_describe_barrier_start(cmd_buffer, info->reason);
-
-   for (unsigned i = 0; i < info->eventCount; ++i) {
-  RADV_FROM_HANDLE(radv_event, event, info->pEvents[i]);
-  uint64_t va = radv_buffer_get_va(event->bo);
-
-  radv_cs_add_buffer(cmd_buffer->device->ws, cs, event->bo);
-
-  ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, 
cs, 7);
-
-  radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, va, 1, 0x);
-  assert(cmd_buffer->cs->cdw <= cdw_max);
-   }
+   radv_describe_barrier_start(cmd_buffer, reason);
 
for (uint32_t i = 0; i < dep_info->memoryBarrierCount; i++) {
   src_stage_mask |= dep_info->pMemoryBarriers[i].srcStageMask;
@@ -8461,13 +8442,8 @@ radv_CmdPipelineBarrier2KHR(VkCommandBuffer 
commandBuffer,
 const VkDependencyInfoKHR *pDependencyInfo)
 {
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-   struct radv_barrier_info info;
-
-   info.reason = RGP_BARRIER_EXTERNAL_CMD_PIPELINE_BARRIER;
-   info.eventCount = 0;
-   info.pEvents = NULL;
 
-   radv_barrier(cmd_buffer, pDependencyInfo, &info);
+   radv_barrier(cmd_buffer, pDependencyInfo, 
RGP_BARRIER_EXTERNAL_CMD_PIPELINE_BARRIER);
 }
 
 static void
@@ -8588,13 +8564,21 @@ radv_CmdWaitEvents2KHR(VkCommandBuffer commandBuffer, 
uint32_t eventCount, const
const VkDependencyInfoKHR* pDependencyInfos)
 {
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-   struct radv_barrier_info info;
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
+
+   for (unsigned i = 0; i < eventCount; ++i) {
+  RADV_FROM_HANDLE(radv_event, event, pEvents[i]);
+  uint64_t va = radv_buffer_get_va(event->bo);
+
+  radv_cs_add_buffer(cmd_buffer->device->ws, cs, event->bo);
+
+  ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, 
cs, 7);
 
-   info.reason = RGP_BARRIER_EXTERNAL_CMD_WAIT_EVENTS;
-   info.eventCount = eventCount;
-   info.pEvents = pEvents;
+  radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, va, 1, 0x);
+  assert(cmd_buffer->cs->cdw <= cdw_max);
+   }
 
-   radv_barrier(cmd_buffer, pDependencyInfos, &info);
+   radv_barrier(cmd_buffer, pDependencyInfos, 
RGP_BARRIER_EXTERNAL_CMD_WAIT_EVENTS);
 }
 
 VKAPI_ATTR void VKAPI_CALL



Mesa (main): radv: remove useless check in radv_cmd_buffer_upload_data()

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: main
Commit: 269b1232eeeade303181ac906229f9b1c15d96bc
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=269b1232eeeade303181ac906229f9b1c15d96bc

Author: Samuel Pitoiset 
Date:   Fri Feb 25 08:14:25 2022 +0100

radv: remove useless check in radv_cmd_buffer_upload_data()

ptr shouldn't be NULL.

Signed-off-by: Samuel Pitoiset 
Reviewed-by: Bas Nieuwenhuizen 
Part-of: 

---

 src/amd/vulkan/radv_cmd_buffer.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 3fdd75ea14e..fbc0fc320b5 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -647,10 +647,9 @@ radv_cmd_buffer_upload_data(struct radv_cmd_buffer 
*cmd_buffer, unsigned size, c
 
if (!radv_cmd_buffer_upload_alloc(cmd_buffer, size, out_offset, (void 
**)&ptr))
   return false;
+   assert(ptr);
 
-   if (ptr)
-  memcpy(ptr, data, size);
-
+   memcpy(ptr, data, size);
return true;
 }
 



Mesa (main): radv: remove unnecessary NULL check in TrimCommandPool()

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: main
Commit: 01ec899083a6d58d139492e48f48fe55fb31aa65
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=01ec899083a6d58d139492e48f48fe55fb31aa65

Author: Samuel Pitoiset 
Date:   Wed Feb 23 19:46:15 2022 +0100

radv: remove unnecessary NULL check in TrimCommandPool()

This function seems rarely used or maybe never but I noticed this.

>From the Vulkan spec:

"VUID-vkTrimCommandPool-commandPool-parameter
 commandPool must be a valid VkCommandPool handle".

Signed-off-by: Samuel Pitoiset 
Reviewed-by: Bas Nieuwenhuizen 
Part-of: 

---

 src/amd/vulkan/radv_cmd_buffer.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index fbc0fc320b5..b7f9a00d667 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -5811,9 +5811,6 @@ radv_TrimCommandPool(VkDevice device, VkCommandPool 
commandPool, VkCommandPoolTr
 {
RADV_FROM_HANDLE(radv_cmd_pool, pool, commandPool);
 
-   if (!pool)
-  return;
-
list_for_each_entry_safe(struct radv_cmd_buffer, cmd_buffer, 
&pool->free_cmd_buffers, pool_link)
{
   radv_destroy_cmd_buffer(cmd_buffer);



Mesa (main): panfrost: Optimise recalculation of max sampler view

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: main
Commit: d5870c45ae82c091e72f1258920aa54974e19a36
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=d5870c45ae82c091e72f1258920aa54974e19a36

Author: Icecream95 
Date:   Thu Oct 14 16:39:59 2021 +1300

panfrost: Optimise recalculation of max sampler view

Previously we always searched through 128 sampler views for set
sampler views, now we never look above the maximum updated view.

Fixes: 304851422a4 ("panfrost: Fix set_sampler_views for big GL")
Part-of: 

---

 src/gallium/drivers/panfrost/pan_context.c | 22 +-
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_context.c 
b/src/gallium/drivers/panfrost/pan_context.c
index 074d40e43d3..7d3c050306b 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -627,12 +627,16 @@ panfrost_set_sampler_views(
 struct panfrost_context *ctx = pan_context(pctx);
 ctx->dirty_shader[shader] |= PAN_DIRTY_STAGE_TEXTURE;
 
+unsigned new_nr = 0;
 unsigned i;
 
 for (i = 0; i < num_views; ++i) {
 struct pipe_sampler_view *view = views ? views[i] : NULL;
 unsigned p = i + start_slot;
 
+if (view)
+new_nr = p + 1;
+
 if (take_ownership) {
 pipe_sampler_view_reference((struct pipe_sampler_view 
**)&ctx->sampler_views[shader][p],
 NULL);
@@ -649,13 +653,21 @@ panfrost_set_sampler_views(
NULL);
 }
 
-/* Recalculate sampler view count */
-ctx->sampler_view_count[shader] = 0;
+/* If the sampler view count is higher than the greatest sampler view
+ * we touch, it can't change */
+if (ctx->sampler_view_count[shader] > start_slot + num_views + 
unbind_num_trailing_slots)
+return;
 
-for (i = 0; i < ARRAY_SIZE(ctx->sampler_views[shader]); ++i) {
-if (ctx->sampler_views[shader][i])
-ctx->sampler_view_count[shader] = i + 1;
+/* If we haven't set any sampler views here, search lower numbers for
+ * set sampler views */
+if (new_nr == 0) {
+for (i = 0; i < start_slot; ++i) {
+if (ctx->sampler_views[shader][i])
+new_nr = i + 1;
+}
 }
+
+ctx->sampler_view_count[shader] = new_nr;
 }
 
 static void



Mesa (main): panfrost: Don't initialise the trampolines array

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: main
Commit: 3e405afeb9c1cb1182f83e2a1fd6f0beb199df64
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=3e405afeb9c1cb1182f83e2a1fd6f0beb199df64

Author: Icecream95 
Date:   Thu Oct 14 16:38:38 2021 +1300

panfrost: Don't initialise the trampolines array

PIPE_MAX_SHADER_SAMPLER_VIEWS is 128, so we just end up initialising a
kilobyte of memory for no reason, when usually only a couple of
sampler views are used.

Fixes: 53ef20f08d4 ("panfrost: Handle NULL sampler views")
Part-of: 

---

 src/gallium/drivers/panfrost/pan_cmdstream.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c 
b/src/gallium/drivers/panfrost/pan_cmdstream.c
index 6471bd04a5b..80400404cda 100644
--- a/src/gallium/drivers/panfrost/pan_cmdstream.c
+++ b/src/gallium/drivers/panfrost/pan_cmdstream.c
@@ -1401,13 +1401,15 @@ panfrost_emit_texture_descriptors(struct panfrost_batch 
*batch,
 
 return T.gpu;
 #else
-uint64_t trampolines[PIPE_MAX_SHADER_SAMPLER_VIEWS] = { 0 };
+uint64_t trampolines[PIPE_MAX_SHADER_SAMPLER_VIEWS];
 
 for (int i = 0; i < ctx->sampler_view_count[stage]; ++i) {
 struct panfrost_sampler_view *view = 
ctx->sampler_views[stage][i];
 
-if (!view)
+if (!view) {
+trampolines[i] = 0;
 continue;
+}
 
 panfrost_update_sampler_view(view, &ctx->base);
 



Mesa (staging/22.0): .pick_status.json: Update to d5870c45ae82c091e72f1258920aa54974e19a36

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: staging/22.0
Commit: bf12c7cde48f650be8d5cbcdf1741b9b5c88cc84
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=bf12c7cde48f650be8d5cbcdf1741b9b5c88cc84

Author: Dylan Baker 
Date:   Mon Mar 14 10:37:11 2022 -0700

.pick_status.json: Update to d5870c45ae82c091e72f1258920aa54974e19a36

---

 .pick_status.json | 918 ++
 1 file changed, 918 insertions(+)

diff --git a/.pick_status.json b/.pick_status.json
index 155f0628a33..8fa352419ed 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -1,4 +1,922 @@
 [
+{
+"sha": "d5870c45ae82c091e72f1258920aa54974e19a36",
+"description": "panfrost: Optimise recalculation of max sampler view",
+"nominated": true,
+"nomination_type": 1,
+"resolution": 0,
+"main_sha": null,
+"because_sha": "304851422a4610170e870a5315fefaa5ec42917f"
+},
+{
+"sha": "3e405afeb9c1cb1182f83e2a1fd6f0beb199df64",
+"description": "panfrost: Don't initialise the trampolines array",
+"nominated": true,
+"nomination_type": 1,
+"resolution": 0,
+"main_sha": null,
+"because_sha": "53ef20f08d4340c1bad0b45a2501f4daba7fb479"
+},
+{
+"sha": "b003a101ee36edf72c41341dd09bea3d6aa40ea8",
+"description": "radv: stop zeroing radv_sample_locations_state in 
barriers",
+"nominated": false,
+"nomination_type": null,
+"resolution": 4,
+"main_sha": null,
+"because_sha": null
+},
+{
+"sha": "612a12a42c21dde1a9d93a977e82b501cd4cefce",
+"description": "radv: move waiting for events to CmdWaitEvents2KHR()",
+"nominated": false,
+"nomination_type": null,
+"resolution": 4,
+"main_sha": null,
+"because_sha": null
+},
+{
+"sha": "c6d776f092cd0bd6189796bb61eb6c41824e0ce0",
+"description": "radv: remove unnecessary check in 
FreeCommandBuffers()",
+"nominated": false,
+"nomination_type": null,
+"resolution": 4,
+"main_sha": null,
+"because_sha": null
+},
+{
+"sha": "01ec899083a6d58d139492e48f48fe55fb31aa65",
+"description": "radv: remove unnecessary NULL check in 
TrimCommandPool()",
+"nominated": false,
+"nomination_type": null,
+"resolution": 4,
+"main_sha": null,
+"because_sha": null
+},
+{
+"sha": "269b1232eeeade303181ac906229f9b1c15d96bc",
+"description": "radv: remove useless check in 
radv_cmd_buffer_upload_data()",
+"nominated": false,
+"nomination_type": null,
+"resolution": 4,
+"main_sha": null,
+"because_sha": null
+},
+{
+"sha": "0eaf9dbce302fe76b0ea2f0d22520526512eae5c",
+"description": "radv: fix compatibility with 
VK_IMAGE_CREATE_EXTENDED_USAGE_BIT",
+"nominated": false,
+"nomination_type": null,
+"resolution": 4,
+"main_sha": null,
+"because_sha": null
+},
+{
+"sha": "42f84a588633a11e991bb2856468145df30771ac",
+"description": "radv: update inputs_read when lowering the view index",
+"nominated": false,
+"nomination_type": null,
+"resolution": 4,
+"main_sha": null,
+"because_sha": null
+},
+{
+"sha": "e5f3689cfff9e4eb7166e7f354d754354c11e7be",
+"description": "intel/compiler: Fix non-trivial designated 
initializer",
+"nominated": false,
+"nomination_type": 1,
+"resolution": 4,
+"main_sha": null,
+"because_sha": "bc4f8c073a2cf35227df85a1edfce9848b9cdfae"
+},
+{
+"sha": "d7514c5f0408a3173c5ed10452dd33f7b6220eda",
+"description": "radv: stop waiting for DMA to be idle for all transfer 
operations",
+"nominated": false,
+"nomination_type": null,
+"resolution": 4,
+"main_sha": null,
+"because_sha": null
+},
+{
+"sha": "1b354ab9137d254f653873611931fddc832984b1",
+"description": "Revert \"llvmpipe: allow vertex processing and 
fragment processing in parallel\"",
+"nominated": false,
+"nomination_type": 2,
+"resolution": 4,
+"main_sha": null,
+"because_sha": "ec8104c6b227421b3a21e9c0652e3050066bb169"
+},
+{
+"sha": "5f3d3be24a7d9fa79020877c259c0740cfba12d4",
+"description": "radv: fix indirect dispatches on the compute queue on 
GFX10.3+",
+"nominated": false,
+"nomination_type": null,
+"resolution": 4,
+"main_sha": null,
+"because_sha": null
+},
+{
+"sha": "53ccfbb99672a822440fc5d987310d4c56db2090",
+"description": "amd: add PKT3_LOAD_SH_REG_INDEX",
+"nominated": false,
+"nomination_type": null,
+"resolution": 4,
+"main_sha": null,
+"because_sha": null
+},

Mesa (staging/22.0): ir3/ra: Sanitize parallel copy flags better

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: staging/22.0
Commit: 4656dbee084bed9b19e7e1d11fe4ea346962ed5d
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=4656dbee084bed9b19e7e1d11fe4ea346962ed5d

Author: Connor Abbott 
Date:   Mon Jan 10 18:16:05 2022 +0100

ir3/ra: Sanitize parallel copy flags better

For pcopies we only care about the register's type, i.e. whether its a
half-register and whether it's an array (plus its size). Copying over
other flags like IR3_REG_RELATIV just leads to sadness and validator
assertions.

Fixes: 0ffcb19b9d9 ("ir3: Rewrite register allocation")
Part-of: 
(cherry picked from commit ab0ed4ff3f2eed6610597a0f0b42b8ca829790f9)

---

 .pick_status.json  |  2 +-
 src/freedreno/ir3/ir3_ra.c | 14 --
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index 6912cbabdea..461c2f6f50a 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -868,7 +868,7 @@
 "description": "ir3/ra: Sanitize parallel copy flags better",
 "nominated": true,
 "nomination_type": 1,
-"resolution": 0,
+"resolution": 1,
 "main_sha": null,
 "because_sha": "0ffcb19b9d9fbe902224542047c389a661fbf816"
 },
diff --git a/src/freedreno/ir3/ir3_ra.c b/src/freedreno/ir3/ir3_ra.c
index 2897163b2f9..58ffcc36825 100644
--- a/src/freedreno/ir3/ir3_ra.c
+++ b/src/freedreno/ir3/ir3_ra.c
@@ -1332,7 +1332,8 @@ insert_parallel_copy_instr(struct ra_ctx *ctx, struct 
ir3_instruction *instr)
   struct ra_parallel_copy *entry = &ctx->parallel_copies[i];
   struct ir3_register *reg =
  ir3_dst_create(pcopy, INVALID_REG,
-entry->interval->interval.reg->flags & ~IR3_REG_SSA);
+entry->interval->interval.reg->flags &
+(IR3_REG_HALF | IR3_REG_ARRAY));
   reg->size = entry->interval->interval.reg->size;
   reg->wrmask = entry->interval->interval.reg->wrmask;
   assign_reg(pcopy, reg, ra_interval_get_num(entry->interval));
@@ -1342,7 +1343,8 @@ insert_parallel_copy_instr(struct ra_ctx *ctx, struct 
ir3_instruction *instr)
   struct ra_parallel_copy *entry = &ctx->parallel_copies[i];
   struct ir3_register *reg =
  ir3_src_create(pcopy, INVALID_REG,
-entry->interval->interval.reg->flags & ~IR3_REG_SSA);
+entry->interval->interval.reg->flags &
+(IR3_REG_HALF | IR3_REG_ARRAY));
   reg->size = entry->interval->interval.reg->size;
   reg->wrmask = entry->interval->interval.reg->wrmask;
   assign_reg(pcopy, reg, ra_physreg_to_num(entry->src, reg->flags));
@@ -1778,8 +1780,9 @@ insert_liveout_copy(struct ir3_block *block, physreg_t 
dst, physreg_t src,
   pcopy->dsts[pcopy->dsts_count++] = old_pcopy->dsts[i];
}
 
-   struct ir3_register *dst_reg =
-  ir3_dst_create(pcopy, INVALID_REG, reg->flags & ~IR3_REG_SSA);
+   unsigned flags = reg->flags & (IR3_REG_HALF | IR3_REG_ARRAY);
+
+   struct ir3_register *dst_reg = ir3_dst_create(pcopy, INVALID_REG, flags);
dst_reg->wrmask = reg->wrmask;
dst_reg->size = reg->size;
assign_reg(pcopy, dst_reg, ra_physreg_to_num(dst, reg->flags));
@@ -1788,8 +1791,7 @@ insert_liveout_copy(struct ir3_block *block, physreg_t 
dst, physreg_t src,
   pcopy->srcs[pcopy->srcs_count++] = old_pcopy->srcs[i];
}
 
-   struct ir3_register *src_reg =
-  ir3_src_create(pcopy, INVALID_REG, reg->flags & ~IR3_REG_SSA);
+   struct ir3_register *src_reg = ir3_src_create(pcopy, INVALID_REG, flags);
src_reg->wrmask = reg->wrmask;
src_reg->size = reg->size;
assign_reg(pcopy, src_reg, ra_physreg_to_num(src, reg->flags));



Mesa (staging/22.0): virgl: Fix texture transfers by using a staging resource

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: staging/22.0
Commit: 42bf17f6cc2a2fe523925a3f5f34421ad09966c1
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=42bf17f6cc2a2fe523925a3f5f34421ad09966c1

Author: Gert Wollny 
Date:   Wed Jan 12 13:01:30 2022 +0100

virgl: Fix texture transfers by using a staging resource

This commit fixes the following flaws in the implementation:

* when a resource was re-allocated, the guest side storage
  was also allocated
* when a source needs a readback before being written to, then
  the call would go through vws->transfer_get, thereby bypassing the
  staging resource, and this would fail on the host, because no
  the allocated IOV was too small (just one byte)
* if the texture write would need neither flush nor readback, the
  old code path would be used expecting that guest side backing stogage
  for the texture.

v2: - actually do a readback to the stageing resource when it is required
- fix typo (Lepton)

v3: Don't use stageing transfers if the host can't read back the data
by rendering to an FBO or calling getTexImage, because in this case
we rely on the IOV to hold the date.

v4: Also don't use staging transfers if the format is no readback
format. Otherwise we have to deal with the resolve blit, and
this is currently not working correctly.

v5: add a new flag that indicates whether non-renderable textures can
be read back (either via glGetTexImage or GBM)

v6: Restrict the use of staging texture transfers to textures that can
be read back, and on GLES also if the they are bound to scanout and
the host uses minigbm to allocate such textures.
For that replace the flag indicating the capability to read back
non-renderable textures with a cap that indicates whether scanout
textures can be read back.

v7: update virglrenderer version in the CI

v8: update use of stageing (Chia-I)

v9: remove superflous check and assignment (Chia-I)

v10: disable stageing textures for arrays with stencil format. This is a
 workaround for failures of the CI.

Fixes: cdc480585c9be368ddfdc33e2eb73e3582f25fe7
virgl/drm: New optimization for uploading textures

Signed-off-by: Gert Wollny 
Reviewed-by: Chia-I Wu 
Part-of: 
(cherry picked from commit c9d99b7eec7ec14d6d71d381a424b6280d75a882)

Conflicts:
.gitlab-ci/container/build-crosvm.sh

---

 .gitlab-ci/container/build-crosvm.sh   |   4 +-
 .pick_status.json  |   2 +-
 src/gallium/drivers/virgl/virgl_resource.c | 100 +
 src/gallium/drivers/virgl/virgl_resource.h |   5 +-
 src/gallium/drivers/virgl/virgl_screen.c   |  13 +++-
 src/gallium/drivers/virgl/virgl_screen.h   |   8 ++-
 src/gallium/drivers/virgl/virgl_texture.c  |   6 +-
 src/virtio/virtio-gpu/virgl_hw.h   |   2 +-
 8 files changed, 103 insertions(+), 37 deletions(-)

diff --git a/.gitlab-ci/container/build-crosvm.sh 
b/.gitlab-ci/container/build-crosvm.sh
index bd5fbfbb52d..297650149b5 100644
--- a/.gitlab-ci/container/build-crosvm.sh
+++ b/.gitlab-ci/container/build-crosvm.sh
@@ -8,7 +8,9 @@ pushd /platform/crosvm
 git checkout "$CROSVM_VERSION"
 git submodule update --init
 
-VIRGLRENDERER_VERSION=2a5fb800c6b0ce15ad37c2c698635e3e2d27b37c
+VIRGLRENDERER_VERSION=e420a5aab92de8fb42fad50762f0ac3b5fcb3bfb
+rm -rf third_party/virglrenderer
+git clone --single-branch -b master --no-checkout 
https://gitlab.freedesktop.org/virgl/virglrenderer.git third_party/virglrenderer
 pushd third_party/virglrenderer
 git checkout "$VIRGLRENDERER_VERSION"
 meson build/ $EXTRA_MESON_ARGS
diff --git a/.pick_status.json b/.pick_status.json
index 8fa352419ed..61ab653891f 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -1867,7 +1867,7 @@
 "description": "virgl: Fix texture transfers by using a staging 
resource",
 "nominated": true,
 "nomination_type": 1,
-"resolution": 0,
+"resolution": 1,
 "main_sha": null,
 "because_sha": "cdc480585c9be368ddfdc33e2eb73e3582f25fe7"
 },
diff --git a/src/gallium/drivers/virgl/virgl_resource.c 
b/src/gallium/drivers/virgl/virgl_resource.c
index e3d82fb0f10..7185c9a90ff 100644
--- a/src/gallium/drivers/virgl/virgl_resource.c
+++ b/src/gallium/drivers/virgl/virgl_resource.c
@@ -51,21 +51,59 @@ enum virgl_transfer_map_type {
/* Map type for read of texture data from host to guest
 * using staging buffer. */
VIRGL_TRANSFER_MAP_READ_FROM_STAGING,
+   /* Map type for write of texture data to host using staging
+* buffer that needs a readback first. */
+   VIRGL_TRANSFER_MAP_WRITE_TO_STAGING_WITH_READBACK,
 };
 
 /* Check if copy transfer from host can be used:
- *  1. if resource is a texture
- *  2. if renderer supports copy transfer from host
+ *  1. if resource is a texture,
+ *  2. if renderer supports copy transfer from host,
+ *  3. the host is not GLES (no fake FP64)
+ *  4. the format can be rendered to and the for

Mesa (staging/22.0): lavapipe: Reset the free_cmd_buffers list in TrimCommandPool

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: staging/22.0
Commit: 17836a6d64fc90230f71947d8cadc4fb45b66f97
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=17836a6d64fc90230f71947d8cadc4fb45b66f97

Author: Jason Ekstrand 
Date:   Wed Mar  9 20:14:44 2022 -0600

lavapipe: Reset the free_cmd_buffers list in TrimCommandPool

We delete all the command buffers but they're still in the list so
future allocations may try to re-use them post-free and another trim
will re-delete them.

Fixes: b38879f8c5f5 ("vallium: initial import of the vulkan frontend")
Reviewed-by: Boris Brezillon 
Part-of: 
(cherry picked from commit 66cb64c8ad339848e7538faa24800755dc6221f8)

---

 .pick_status.json   | 2 +-
 src/gallium/frontends/lavapipe/lvp_cmd_buffer.c | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.pick_status.json b/.pick_status.json
index 61ab653891f..6912cbabdea 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -940,7 +940,7 @@
 "description": "lavapipe: Reset the free_cmd_buffers list in 
TrimCommandPool",
 "nominated": true,
 "nomination_type": 1,
-"resolution": 0,
+"resolution": 1,
 "main_sha": null,
 "because_sha": "b38879f8c5f57b7f1802e433e33181bdf5e72aef"
 },
diff --git a/src/gallium/frontends/lavapipe/lvp_cmd_buffer.c 
b/src/gallium/frontends/lavapipe/lvp_cmd_buffer.c
index 35a85a72e75..fc261326c26 100644
--- a/src/gallium/frontends/lavapipe/lvp_cmd_buffer.c
+++ b/src/gallium/frontends/lavapipe/lvp_cmd_buffer.c
@@ -265,6 +265,7 @@ VKAPI_ATTR void VKAPI_CALL lvp_TrimCommandPool(
 &pool->free_cmd_buffers, pool_link) {
   lvp_cmd_buffer_destroy(cmd_buffer);
}
+   list_inithead(&pool->free_cmd_buffers);
 }
 
 VKAPI_ATTR void VKAPI_CALL lvp_CmdDrawMultiEXT(



Mesa (staging/22.0): util/bitset: Fix off-by-one in __bitset_set_range

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: staging/22.0
Commit: c2541d65c8d439fa1562bf8f65cdbdf97388f507
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=c2541d65c8d439fa1562bf8f65cdbdf97388f507

Author: Connor Abbott 
Date:   Mon Jan 10 14:01:34 2022 +0100

util/bitset: Fix off-by-one in __bitset_set_range

Fixes: b3b03e33c9f ("util/bitset: add BITSET_SET_RANGE(..)")
Part-of: 
(cherry picked from commit 410e746198b498efdb9c37a63a0b8aad67469a4c)

---

 .pick_status.json | 2 +-
 src/util/bitset.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index 461c2f6f50a..34dbdb5235f 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -832,7 +832,7 @@
 "description": "util/bitset: Fix off-by-one in __bitset_set_range",
 "nominated": true,
 "nomination_type": 1,
-"resolution": 0,
+"resolution": 1,
 "main_sha": null,
 "because_sha": "b3b03e33c9f11adb0c4d84311a651ea6016a0885"
 },
diff --git a/src/util/bitset.h b/src/util/bitset.h
index 279ad553e79..d8ec1af67ad 100644
--- a/src/util/bitset.h
+++ b/src/util/bitset.h
@@ -208,7 +208,7 @@ __bitset_shl(BITSET_WORD *x, unsigned amount, unsigned n)
 static inline void
 __bitset_set_range(BITSET_WORD *r, unsigned start, unsigned end)
 {
-   const unsigned size = end - start;
+   const unsigned size = end - start + 1;
const unsigned start_mod = start % BITSET_WORDBITS;
 
if (start_mod + size <= BITSET_WORDBITS) {



Mesa (main): Revert "ci: downgrade sphinx to v3.x"

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: main
Commit: dd9b8881e0ad84ab1e74d972a8233f5c03e7b37a
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=dd9b8881e0ad84ab1e74d972a8233f5c03e7b37a

Author: Erik Faye-Lund 
Date:   Wed Mar  2 12:58:58 2022 +0100

Revert "ci: downgrade sphinx to v3.x"

The readthedocs theme now supports Sphinx 4.x, so there's no longer any
reason to stick with 3.x.

This reverts commit a545b6eda040ee40b5c36533d7ae378fc24fcf09.

Reviewed-by: Hoe Hao Cheng 
Part-of: 

---

 .gitlab-ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 357996ab567..b7a9835b5ec 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -125,7 +125,7 @@ stages:
 - .ci-run-policy
   script:
   - apk --no-cache add graphviz doxygen
-  - pip3 install "sphinx<4.0" breathe mako sphinx_rtd_theme
+  - pip3 install sphinx breathe mako sphinx_rtd_theme
   - docs/doxygen-wrapper.py --out-dir=docs/doxygen_xml
   - sphinx-build -W -b html docs public
 



Mesa (main): panvk: Add support for texel buffers

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: main
Commit: ff8aa15fa0d3f813efb45878d77111af6575a049
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=ff8aa15fa0d3f813efb45878d77111af6575a049

Author: Boris Brezillon 
Date:   Fri Sep 24 19:34:12 2021 +0200

panvk: Add support for texel buffers

Signed-off-by: Boris Brezillon 
Reviewed-by: Jason Ekstrand 
Part-of: 

---

 src/panfrost/ci/deqp-panfrost-g52-vk.toml |  8 +--
 src/panfrost/vulkan/panvk_descriptor_set.c| 14 +++--
 src/panfrost/vulkan/panvk_device.c|  2 +-
 src/panfrost/vulkan/panvk_image.c | 19 +++---
 src/panfrost/vulkan/panvk_private.h   | 12 +++-
 src/panfrost/vulkan/panvk_vX_descriptor_set.c | 56 +
 src/panfrost/vulkan/panvk_vX_image.c  | 86 +++
 7 files changed, 161 insertions(+), 36 deletions(-)

diff --git a/src/panfrost/ci/deqp-panfrost-g52-vk.toml 
b/src/panfrost/ci/deqp-panfrost-g52-vk.toml
index 9145930e8b4..897667a5e2a 100644
--- a/src/panfrost/ci/deqp-panfrost-g52-vk.toml
+++ b/src/panfrost/ci/deqp-panfrost-g52-vk.toml
@@ -4,16 +4,12 @@ caselists = ["/deqp/mustpass/vk-master.txt"]
 renderer_check = "Mali-G52"
 include = [
 "dEQP-VK.pipeline.blend.*",
+"dEQP-VK.api.buffer_view.*",
 "dEQP-VK.api.copy_and_blit.core.*",
 "dEQP-VK.compute.builtin_var.*",
 "dEQP-VK.glsl.builtin.function.integer.usubborrow.*",
 "dEQP-VK.glsl.builtin.precision.frexp.*",
 "dEQP-VK.glsl.builtin.precision.ldexp.*",
-"dEQP-VK.image.load_store.with_format.1d.*",
-"dEQP-VK.image.load_store.with_format.1d_array.*",
-"dEQP-VK.image.load_store.with_format.2d.*",
-"dEQP-VK.image.load_store.with_format.2d_array.*",
-"dEQP-VK.image.load_store.with_format.3d.*",
-"dEQP-VK.image.load_store.with_format.cube.*",
+"dEQP-VK.image.load_store.with_format.*",
 "dEQP-VK.ssbo.layout.single_basic_type.*",
 ]
diff --git a/src/panfrost/vulkan/panvk_descriptor_set.c 
b/src/panfrost/vulkan/panvk_descriptor_set.c
index cdf987416bb..5474b9ffa01 100644
--- a/src/panfrost/vulkan/panvk_descriptor_set.c
+++ b/src/panfrost/vulkan/panvk_descriptor_set.c
@@ -120,12 +120,14 @@ panvk_CreateDescriptorSetLayout(VkDevice _device,
  tex_idx += binding_layout->array_size;
  break;
   case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
-  case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
-  case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
   case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
  binding_layout->tex_idx = tex_idx;
  tex_idx += binding_layout->array_size;
  break;
+  case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ binding_layout->tex_idx = tex_idx;
+ tex_idx += binding_layout->array_size;
+ break;
   case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
  binding_layout->dyn_ubo_idx = dyn_ubo_idx;
  dyn_ubo_idx += binding_layout->array_size;
@@ -146,6 +148,10 @@ panvk_CreateDescriptorSetLayout(VkDevice _device,
  binding_layout->img_idx = img_idx;
  img_idx += binding_layout->array_size;
  break;
+  case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ binding_layout->img_idx = img_idx;
+ img_idx += binding_layout->array_size;
+ break;
   default:
  unreachable("Invalid descriptor type");
   }
@@ -223,9 +229,8 @@ panvk_GetDescriptorSetLayoutSupport(VkDevice _device,
  tex_idx += binding->descriptorCount;
  break;
   case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
-  case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
-  case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
   case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+  case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
  tex_idx += binding->descriptorCount;
  break;
   case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
@@ -241,6 +246,7 @@ panvk_GetDescriptorSetLayoutSupport(VkDevice _device,
  ssbo_idx += binding->descriptorCount;
  break;
   case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+  case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
  img_idx += binding->descriptorCount;
  break;
   default:
diff --git a/src/panfrost/vulkan/panvk_device.c 
b/src/panfrost/vulkan/panvk_device.c
index 362892f74b4..55f119fd782 100644
--- a/src/panfrost/vulkan/panvk_device.c
+++ b/src/panfrost/vulkan/panvk_device.c
@@ -790,7 +790,7 @@ panvk_GetPhysicalDeviceProperties2(VkPhysicalDevice 
physicalDevice,
   .viewportBoundsRange = { INT16_MIN, INT16_MAX },
   .viewportSubPixelBits = 8,
   .minMemoryMapAlignment = 4096, /* A page */
-  .minTexelBufferOffsetAlignment = 1,
+  .minTexelBufferOffsetAlignment = 64,
   .minUniformBufferOffsetAlignment = 4,
   .minStorageBufferOffsetAlignment = 4,
   .minTexelOffset = -32,
diff --git a/src/panfrost/vulkan/panvk_image.c 
b/src/panfrost/vulkan/panvk_image.c
index 3581e35d7b8..a8b322e4c17 100644
--- a/src/pa

Mesa (main): panvk: Add a dummy sampler for NIR tex operations that don't take one

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: main
Commit: 9dc8382de8be8f8f899276d0d9f466335483f403
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=9dc8382de8be8f8f899276d0d9f466335483f403

Author: Boris Brezillon 
Date:   Thu Sep 23 18:02:41 2021 +0200

panvk: Add a dummy sampler for NIR tex operations that don't take one

In the NIR domain, some texture operations don't require a sampler, but
Bifrost/Midgard always want one. Let's add a dummy sampler to handle
that case.

Signed-off-by: Boris Brezillon 
Reviewed-by: Jason Ekstrand 
Part-of: 

---

 src/panfrost/vulkan/panvk_descriptor_set.c |  9 +
 src/panfrost/vulkan/panvk_vX_cmd_buffer.c  | 12 
 2 files changed, 21 insertions(+)

diff --git a/src/panfrost/vulkan/panvk_descriptor_set.c 
b/src/panfrost/vulkan/panvk_descriptor_set.c
index 7240dd16cc5..cdf987416bb 100644
--- a/src/panfrost/vulkan/panvk_descriptor_set.c
+++ b/src/panfrost/vulkan/panvk_descriptor_set.c
@@ -340,6 +340,15 @@ panvk_CreatePipelineLayout(VkDevice _device,
layout->num_dyn_ssbos = dyn_ssbo_idx;
layout->num_imgs = img_idx;
 
+   /* Some NIR texture operations don't require a sampler, but Bifrost/Midgard
+* ones always expect one. Add a dummy sampler to deal with this limitation.
+*/
+   if (layout->num_textures) {
+  layout->num_samplers++;
+  for (unsigned set = 0; set < pCreateInfo->setLayoutCount; set++)
+ layout->sets[set].sampler_offset++;
+   }
+
_mesa_sha1_final(&ctx, layout->sha1);
 
*pPipelineLayout = panvk_pipeline_layout_to_handle(layout);
diff --git a/src/panfrost/vulkan/panvk_vX_cmd_buffer.c 
b/src/panfrost/vulkan/panvk_vX_cmd_buffer.c
index fbfe088d559..869d84adbb3 100644
--- a/src/panfrost/vulkan/panvk_vX_cmd_buffer.c
+++ b/src/panfrost/vulkan/panvk_vX_cmd_buffer.c
@@ -492,6 +492,18 @@ panvk_cmd_prepare_samplers(struct panvk_cmd_buffer *cmdbuf,
 
void *sampler = samplers.cpu;
 
+   /* Prepare the dummy sampler */
+   pan_pack(sampler, SAMPLER, cfg) {
+#if PAN_ARCH >= 6
+  cfg.seamless_cube_map = false;
+#endif
+  cfg.magnify_nearest = true;
+  cfg.minify_nearest = true;
+  cfg.normalized_coordinates = false;
+   }
+
+   sampler += pan_size(SAMPLER);
+
for (unsigned i = 0; i < ARRAY_SIZE(desc_state->sets); i++) {
   if (!desc_state->sets[i]) continue;
 



Mesa (main): panvk: Stop advertising Vulkan 1.1

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: main
Commit: a35e721162bb7dad087e75fd07fec08bc635dc96
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=a35e721162bb7dad087e75fd07fec08bc635dc96

Author: Jason Ekstrand 
Date:   Wed Mar  9 10:12:19 2022 -0600

panvk: Stop advertising Vulkan 1.1

We're nowhere close to even having Vulkan 1.0 working yet, there's no
reason to get too excited about 1.1.  It just means piles more test
crashes for features we're claiming to support but don't.  If we want to
enable more tests, we can turn on the extensions for those features once
we actually have them working.

Reviewed-by: Boris Brezillon 
Part-of: 

---

 src/panfrost/vulkan/panvk_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/panfrost/vulkan/panvk_device.c 
b/src/panfrost/vulkan/panvk_device.c
index 5421991cd3b..362892f74b4 100644
--- a/src/panfrost/vulkan/panvk_device.c
+++ b/src/panfrost/vulkan/panvk_device.c
@@ -123,7 +123,7 @@ static const struct debug_control panvk_debug_options[] = {
 #define PANVK_USE_WSI_PLATFORM
 #endif
 
-#define PANVK_API_VERSION VK_MAKE_VERSION(1, 1, VK_HEADER_VERSION)
+#define PANVK_API_VERSION VK_MAKE_VERSION(1, 0, VK_HEADER_VERSION)
 
 VkResult
 panvk_EnumerateInstanceVersion(uint32_t *pApiVersion)



Mesa (main): v3dv/ci: add test to skip list

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: main
Commit: 000b935c50513ab80fae800f6af9fc573faf6fc9
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=000b935c50513ab80fae800f6af9fc573faf6fc9

Author: Juan A. Suarez Romero 
Date:   Mon Mar 14 13:32:16 2022 +0100

v3dv/ci: add test to skip list

Add test that it is a timeout in the CI, but otherwise it passes.

Signed-off-by: Juan A. Suarez Romero 
Reviewed-by: Jose Maria Casanova Crespo 
Part-of: 

---

 src/broadcom/ci/broadcom-rpi4-skips.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/broadcom/ci/broadcom-rpi4-skips.txt 
b/src/broadcom/ci/broadcom-rpi4-skips.txt
index a6ab7614d41..218e25fb8ce 100644
--- a/src/broadcom/ci/broadcom-rpi4-skips.txt
+++ b/src/broadcom/ci/broadcom-rpi4-skips.txt
@@ -45,6 +45,7 @@ dEQP-VK.rasterization.depth_bias.d32_sfloat
 
 # Slow tests (> 1 minute to run)
 dEQP-VK.api.object_management.max_concurrent.query_pool
+dEQP-VK.graphicsfuzz.spv-stable-maze-flatten-copy-composite
 dEQP-VK.memory.mapping.dedicated_alloc.buffer.full.variable.implicit_unmap
 dEQP-VK.memory.mapping.dedicated_alloc.image.full.variable.implicit_unmap
 dEQP-VK.memory.mapping.suballocation.full.variable.implicit_unmap



Mesa (main): aco: split and recombine unaligned sgpr inputs

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: main
Commit: 973967c49d70b9d751f76361db8511a2403e4432
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=973967c49d70b9d751f76361db8511a2403e4432

Author: Rhys Perry 
Date:   Thu Feb 17 16:41:35 2022 +

aco: split and recombine unaligned sgpr inputs

An example is the num_work_groups argument. Fixes invalid assembly with
func.compute.num-workgroups.basic.q0

Signed-off-by: Rhys Perry 
Reviewed-by: Daniel Schürmann 
Part-of: 

---

 src/amd/compiler/aco_instruction_selection.cpp | 40 +++---
 1 file changed, 30 insertions(+), 10 deletions(-)

diff --git a/src/amd/compiler/aco_instruction_selection.cpp 
b/src/amd/compiler/aco_instruction_selection.cpp
index 82178ed38c1..42d0421b292 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -11286,8 +11286,20 @@ emit_streamout(isel_context* ctx, unsigned stream)
 Pseudo_instruction*
 add_startpgm(struct isel_context* ctx)
 {
-   aco_ptr startpgm{
-  create_instruction(aco_opcode::p_startpgm, 
Format::PSEUDO, 0, ctx->args->ac.arg_count)};
+   unsigned def_count = 0;
+   for (unsigned i = 0; i < ctx->args->ac.arg_count; i++) {
+  if (ctx->args->ac.args[i].skip)
+ continue;
+  unsigned align = MIN2(4, 
util_next_power_of_two(ctx->args->ac.args[i].size));
+  if (ctx->args->ac.args[i].file == AC_ARG_SGPR && 
ctx->args->ac.args[i].offset % align)
+ def_count += ctx->args->ac.args[i].size;
+  else
+ def_count++;
+   }
+
+   Pseudo_instruction* startpgm =
+  create_instruction(aco_opcode::p_startpgm, 
Format::PSEUDO, 0, def_count);
+   ctx->block->instructions.emplace_back(startpgm);
for (unsigned i = 0, arg = 0; i < ctx->args->ac.arg_count; i++) {
   if (ctx->args->ac.args[i].skip)
  continue;
@@ -11296,14 +11308,22 @@ add_startpgm(struct isel_context* ctx)
   unsigned size = ctx->args->ac.args[i].size;
   unsigned reg = ctx->args->ac.args[i].offset;
   RegClass type = RegClass(file == AC_ARG_SGPR ? RegType::sgpr : 
RegType::vgpr, size);
-  Temp dst = ctx->program->allocateTmp(type);
-  ctx->arg_temps[i] = dst;
-  startpgm->definitions[arg] = Definition(dst);
-  startpgm->definitions[arg].setFixed(PhysReg{file == AC_ARG_SGPR ? reg : 
reg + 256});
-  arg++;
+
+  if (file == AC_ARG_SGPR && reg % MIN2(4, util_next_power_of_two(size))) {
+ Temp elems[16];
+ for (unsigned j = 0; j < size; j++) {
+elems[j] = ctx->program->allocateTmp(s1);
+startpgm->definitions[arg++] = Definition(elems[j].id(), 
PhysReg{reg + j}, s1);
+ }
+ ctx->arg_temps[i] = create_vec_from_array(ctx, elems, size, 
RegType::sgpr, 4);
+  } else {
+ Temp dst = ctx->program->allocateTmp(type);
+ ctx->arg_temps[i] = dst;
+ startpgm->definitions[arg] = Definition(dst);
+ startpgm->definitions[arg].setFixed(PhysReg{file == AC_ARG_SGPR ? reg 
: reg + 256});
+ arg++;
+  }
}
-   Pseudo_instruction* instr = startpgm.get();
-   ctx->block->instructions.push_back(std::move(startpgm));
 
/* Stash these in the program so that they can be accessed later when
 * handling spilling.
@@ -11323,7 +11343,7 @@ add_startpgm(struct isel_context* ctx)
   }
}
 
-   return instr;
+   return startpgm;
 }
 
 void



Mesa (main): radv,aco,ac/llvm: fix indirect dispatches on the compute queue on GFX7-10

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: main
Commit: c4cf92cad778e3040ab3bed4706b3f9c5cbea502
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=c4cf92cad778e3040ab3bed4706b3f9c5cbea502

Author: Rhys Perry 
Date:   Wed Feb 16 20:01:36 2022 +

radv,aco,ac/llvm: fix indirect dispatches on the compute queue on GFX7-10

Since neither PKT3_LOAD_SH_REG_INDEX nor PKT3_COPY_DATA work with compute
queues on GFX7-10, we have to load the dispatch size from memory in the
shader.

Signed-off-by: Rhys Perry 
Reviewed-by: Samuel Pitoiset 
Part-of: 

---

 src/amd/compiler/aco_instruction_selection.cpp | 10 ++-
 src/amd/llvm/ac_nir_to_llvm.c  |  9 +-
 src/amd/llvm/ac_shader_abi.h   |  3 ++
 src/amd/vulkan/radv_acceleration_structure.c   |  6 
 src/amd/vulkan/radv_cmd_buffer.c   | 39 +++---
 src/amd/vulkan/radv_device.c   |  3 ++
 src/amd/vulkan/radv_nir_to_llvm.c  |  1 +
 src/amd/vulkan/radv_private.h  |  3 ++
 src/amd/vulkan/radv_shader.c   |  1 +
 src/amd/vulkan/radv_shader.h   |  1 +
 src/amd/vulkan/radv_shader_args.c  | 10 +--
 src/gallium/drivers/radeonsi/si_shader_llvm.c  |  1 +
 12 files changed, 66 insertions(+), 21 deletions(-)

diff --git a/src/amd/compiler/aco_instruction_selection.cpp 
b/src/amd/compiler/aco_instruction_selection.cpp
index 42d0421b292..ea4518c2943 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -8143,7 +8143,15 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* 
instr)
case nir_intrinsic_scoped_barrier: emit_scoped_barrier(ctx, instr); break;
case nir_intrinsic_load_num_workgroups: {
   Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
-  bld.copy(Definition(dst), Operand(get_arg(ctx, 
ctx->args->ac.num_work_groups)));
+  if (ctx->options->load_grid_size_from_user_sgpr) {
+ bld.copy(Definition(dst), get_arg(ctx, 
ctx->args->ac.num_work_groups));
+  } else {
+ Temp addr = get_arg(ctx, ctx->args->ac.num_work_groups);
+ assert(addr.regClass() == s2);
+ bld.pseudo(aco_opcode::p_create_vector, Definition(dst),
+bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), addr, 
Operand::zero()),
+bld.smem(aco_opcode::s_load_dword, bld.def(s1), addr, 
Operand::c32(8)));
+  }
   emit_split_vector(ctx, dst, 3);
   break;
}
diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c
index 67d0ebfa55f..f8d1deb2fcd 100644
--- a/src/amd/llvm/ac_nir_to_llvm.c
+++ b/src/amd/llvm/ac_nir_to_llvm.c
@@ -3721,7 +3721,14 @@ static void visit_intrinsic(struct ac_nir_context *ctx, 
nir_intrinsic_instr *ins
   result = ctx->instance_id_replaced ? ctx->instance_id_replaced : 
ctx->abi->instance_id;
   break;
case nir_intrinsic_load_num_workgroups:
-  result = ac_get_arg(&ctx->ac, ctx->args->num_work_groups);
+  if (ctx->abi->load_grid_size_from_user_sgpr) {
+ result = ac_get_arg(&ctx->ac, ctx->args->num_work_groups);
+  } else {
+ LLVMTypeRef ptr_type = ac_array_in_const_addr_space(ctx->ac.v3i32);
+ LLVMValueRef ptr = ac_get_arg(&ctx->ac, ctx->args->num_work_groups);
+ ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ptr_type, "");
+ result = ac_build_load_invariant(&ctx->ac, ptr, ctx->ac.i32_0);
+  }
   break;
case nir_intrinsic_load_local_invocation_index:
   result = visit_load_local_invocation_index(ctx);
diff --git a/src/amd/llvm/ac_shader_abi.h b/src/amd/llvm/ac_shader_abi.h
index c1269f83a02..dca3d4a7c6e 100644
--- a/src/amd/llvm/ac_shader_abi.h
+++ b/src/amd/llvm/ac_shader_abi.h
@@ -175,6 +175,9 @@ struct ac_shader_abi {
 * images.
 */
bool disable_aniso_single_level;
+
+   /* Whether to inline the compute dispatch size in user sgprs. */
+   bool load_grid_size_from_user_sgpr;
 };
 
 #endif /* AC_SHADER_ABI_H */
diff --git a/src/amd/vulkan/radv_acceleration_structure.c 
b/src/amd/vulkan/radv_acceleration_structure.c
index 3950028bf51..e7cb430b7cf 100644
--- a/src/amd/vulkan/radv_acceleration_structure.c
+++ b/src/amd/vulkan/radv_acceleration_structure.c
@@ -1906,6 +1906,9 @@ radv_CmdCopyAccelerationStructureKHR(VkCommandBuffer 
commandBuffer,
  
cmd_buffer->device->meta_state.accel_struct_build.copy_p_layout,
  VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), 
&consts);
 
+   cmd_buffer->state.flush_bits |=
+  radv_dst_access_flush(cmd_buffer, 
VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT_KHR, NULL);
+
radv_indirect_dispatch(cmd_buffer, src->bo,
   src_addr + offsetof(struct radv_accel_struct_header, 
copy_dispatch_size));
radv_meta_restore(&saved_state, cmd_buffer);
@@ -2052,6 +2055,9 @@ radv_CmdCopyAccelerationStructureToMemoryKHR(
   

Mesa (main): radv: export the pipeline hash via VK_KHR_pipeline_executable_properties

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: main
Commit: 1dfee91fdf1002ac5c2ae555b374d501d4437f8f
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=1dfee91fdf1002ac5c2ae555b374d501d4437f8f

Author: Samuel Pitoiset 
Date:   Tue Mar  8 14:05:52 2022 +0100

radv: export the pipeline hash via VK_KHR_pipeline_executable_properties

This will help to match RGP<->Fossilize pipelines.

Signed-off-by: Samuel Pitoiset 
Reviewed-by: Bas Nieuwenhuizen 
Part-of: 

---

 src/amd/vulkan/radv_pipeline.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 6cc5157f340..958395b1233 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -6709,6 +6709,14 @@ radv_GetPipelineExecutableStatisticsKHR(VkDevice _device,
VkPipelineExecutableStatisticKHR *end = s + (pStatistics ? *pStatisticCount 
: 0);
VkResult result = VK_SUCCESS;
 
+   if (s < end) {
+  desc_copy(s->name, "Driver pipeline hash");
+  desc_copy(s->description, "Driver pipeline hash used by RGP");
+  s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+  s->value.u64 = pipeline->pipeline_hash;
+   }
+   ++s;
+
if (s < end) {
   desc_copy(s->name, "SGPRs");
   desc_copy(s->description, "Number of SGPR registers allocated per 
subgroup");



Mesa (staging/22.0): zink: fix 64bit float shader ops

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: staging/22.0
Commit: eb77d38b4562aac0363042b1fc7792693278facd
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=eb77d38b4562aac0363042b1fc7792693278facd

Author: Mike Blumenkrantz 
Date:   Mon Mar  7 09:25:43 2022 -0500

zink: fix 64bit float shader ops

this was being set from back before zink actually supported 64bit
natively and only 32bit was functional, but it breaks 64bit support

cc: mesa-stable

fixes (lavapipe):
KHR-GL46.gpu_shader_fp64.builtin.mod_dvec2
KHR-GL46.gpu_shader_fp64.builtin.mod_dvec3
KHR-GL46.gpu_shader_fp64.builtin.mod_dvec4

Reviewed-by: Dave Airlie 
Part-of: 
(cherry picked from commit 5fae35fb17d6d89c4fe1d9d5a19d827caf25b9fc)

---

 .pick_status.json| 2 +-
 src/gallium/drivers/zink/zink_compiler.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index 34dbdb5235f..f0fcabbe296 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -1912,7 +1912,7 @@
 "description": "zink: fix 64bit float shader ops",
 "nominated": true,
 "nomination_type": 0,
-"resolution": 0,
+"resolution": 1,
 "main_sha": null,
 "because_sha": null
 },
diff --git a/src/gallium/drivers/zink/zink_compiler.c 
b/src/gallium/drivers/zink/zink_compiler.c
index 7caeded9008..28af75c35e1 100644
--- a/src/gallium/drivers/zink/zink_compiler.c
+++ b/src/gallium/drivers/zink/zink_compiler.c
@@ -408,7 +408,7 @@ zink_screen_init_compiler(struct zink_screen *screen)
   .lower_unpack_32_2x16_split = true,
   .lower_vector_cmp = true,
   .lower_int64_options = 0,
-  .lower_doubles_options = ~nir_lower_fp64_full_software,
+  .lower_doubles_options = 0,
   .lower_uniforms_to_ubo = true,
   .has_fsub = true,
   .has_isub = true,



Mesa (main): Venus: Adjust VN_CMD_ENQUEUE to set VN_COMMAND_BUFFER_STATE_INVALID

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: main
Commit: a65d2ef1c1399da8ea6f04b361913bfe6abfa98a
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=a65d2ef1c1399da8ea6f04b361913bfe6abfa98a

Author: Igor Torrente 
Date:   Mon Mar 14 07:55:20 2022 -0300

Venus: Adjust VN_CMD_ENQUEUE to set VN_COMMAND_BUFFER_STATE_INVALID

This improves the issue of a return inside the macro.

Signed-off-by: Igor Torrente 
Part-of: 

---

 src/virtio/vulkan/vn_command_buffer.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/virtio/vulkan/vn_command_buffer.c 
b/src/virtio/vulkan/vn_command_buffer.c
index 735b8625db5..6740bd1a22c 100644
--- a/src/virtio/vulkan/vn_command_buffer.c
+++ b/src/virtio/vulkan/vn_command_buffer.c
@@ -23,10 +23,10 @@
  vn_command_buffer_from_handle(commandBuffer);   \
   size_t _cmd_size = vn_sizeof_##cmd_name(commandBuffer, ##__VA_ARGS__); \
  \
-  if (!vn_cs_encoder_reserve(&_cmd->cs, _cmd_size))  \
- return; \
- \
-  vn_encode_##cmd_name(&_cmd->cs, 0, commandBuffer, ##__VA_ARGS__);  \
+  if (vn_cs_encoder_reserve(&_cmd->cs, _cmd_size))   \
+ vn_encode_##cmd_name(&_cmd->cs, 0, commandBuffer, ##__VA_ARGS__);   \
+  else   \
+ _cmd->state = VN_COMMAND_BUFFER_STATE_INVALID;  \
} while (0)
 
 static bool



Mesa (main): Venus: Add VN_CMD_ENQUEUE macro with vkCmd* common code

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: main
Commit: 6cdbc0299a4b97031e06787f8aafa2d20d00bfcc
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=6cdbc0299a4b97031e06787f8aafa2d20d00bfcc

Author: Igor Torrente 
Date:   Fri Mar 11 09:31:59 2022 -0300

Venus: Add VN_CMD_ENQUEUE macro with vkCmd* common code

Several `vn_Cmd` share the same code to enqueue the command to the
command stream.

This adds a macro with this common code.

Signed-off-by: Igor Torrente 
Part-of: 

---

 src/virtio/vulkan/vn_command_buffer.c | 921 ++
 1 file changed, 149 insertions(+), 772 deletions(-)

Diff:   
http://cgit.freedesktop.org/mesa/mesa/diff/?id=6cdbc0299a4b97031e06787f8aafa2d20d00bfcc


Mesa (main): Venus: add VN_CMD_ENQUEUE to vn_cmd_encode_memory_barriers

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: main
Commit: f30334b6c4605ca7eeda35670ef9b96d46db0bce
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=f30334b6c4605ca7eeda35670ef9b96d46db0bce

Author: Igor Torrente 
Date:   Mon Mar 14 08:39:01 2022 -0300

Venus: add VN_CMD_ENQUEUE to vn_cmd_encode_memory_barriers

Signed-off-by: Igor Torrente 
Part-of: 

---

 src/virtio/vulkan/vn_command_buffer.c | 14 +++---
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/src/virtio/vulkan/vn_command_buffer.c 
b/src/virtio/vulkan/vn_command_buffer.c
index 6740bd1a22c..2c78b15e00d 100644
--- a/src/virtio/vulkan/vn_command_buffer.c
+++ b/src/virtio/vulkan/vn_command_buffer.c
@@ -295,17 +295,9 @@ vn_cmd_encode_memory_barriers(struct vn_command_buffer 
*cmd,
 {
const VkCommandBuffer cmd_handle = vn_command_buffer_to_handle(cmd);
 
-   const size_t cmd_size = vn_sizeof_vkCmdPipelineBarrier(
-  cmd_handle, src_stage_mask, dst_stage_mask, 0, 0, NULL,
-  buf_barrier_count, buf_barriers, img_barrier_count, img_barriers);
-   if (!vn_cs_encoder_reserve(&cmd->cs, cmd_size)) {
-  cmd->state = VN_COMMAND_BUFFER_STATE_INVALID;
-  return;
-   }
-
-   vn_encode_vkCmdPipelineBarrier(
-  &cmd->cs, 0, cmd_handle, src_stage_mask, dst_stage_mask, 0, 0, NULL,
-  buf_barrier_count, buf_barriers, img_barrier_count, img_barriers);
+   VN_CMD_ENQUEUE(vkCmdPipelineBarrier, cmd_handle, src_stage_mask,
+  dst_stage_mask, 0, 0, NULL, buf_barrier_count, buf_barriers,
+  img_barrier_count, img_barriers);
 }
 
 static void



Mesa (staging/22.0): zink: use 64bit mask for xfb analysis

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: staging/22.0
Commit: a46dbbdc125c964a82d380a3ca6bb0b49b4fd6ca
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=a46dbbdc125c964a82d380a3ca6bb0b49b4fd6ca

Author: Mike Blumenkrantz 
Date:   Wed Feb 16 15:14:52 2022 -0500

zink: use 64bit mask for xfb analysis

I don't know how this worked before since all the values are oob?

cc: mesa-stable

Reviewed-by: Dave Airlie 
Part-of: 
(cherry picked from commit a5c7d34fdf8403b0115d5eead7ca67027e93efc7)

---

 .pick_status.json| 4 ++--
 src/gallium/drivers/zink/zink_compiler.c | 8 
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index 34dbdb5235f..0a68b1b0169 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -1759,7 +1759,7 @@
 "description": "zink: use 64bit mask for xfb analysis",
 "nominated": true,
 "nomination_type": 0,
-"resolution": 0,
+"resolution": 1,
 "main_sha": null,
 "because_sha": null
 },
@@ -1912,7 +1912,7 @@
 "description": "zink: fix 64bit float shader ops",
 "nominated": true,
 "nomination_type": 0,
-"resolution": 0,
+"resolution": 1,
 "main_sha": null,
 "because_sha": null
 },
diff --git a/src/gallium/drivers/zink/zink_compiler.c 
b/src/gallium/drivers/zink/zink_compiler.c
index 7caeded9008..3134a19d251 100644
--- a/src/gallium/drivers/zink/zink_compiler.c
+++ b/src/gallium/drivers/zink/zink_compiler.c
@@ -597,7 +597,7 @@ update_so_info(struct zink_shader *zs, const struct 
pipe_stream_output_info *so_
   var->data.explicit_xfb_buffer = 0;
 
bool inlined[VARYING_SLOT_MAX][4] = {0};
-   uint32_t packed = 0;
+   uint64_t packed = 0;
uint8_t packed_components[VARYING_SLOT_MAX] = {0};
uint8_t packed_streams[VARYING_SLOT_MAX] = {0};
uint8_t packed_buffers[VARYING_SLOT_MAX] = {0};
@@ -626,7 +626,7 @@ update_so_info(struct zink_shader *zs, const struct 
pipe_stream_output_info *so_
inlined[slot][output->start_component + j] = true;
  } else {
 /* otherwise store some metadata for later */
-packed |= BITFIELD_BIT(slot);
+packed |= BITFIELD64_BIT(slot);
 packed_components[slot]++;
 packed_streams[slot] |= BITFIELD_BIT(output->stream);
 packed_buffers[slot] |= BITFIELD_BIT(output->output_buffer);
@@ -650,7 +650,7 @@ update_so_info(struct zink_shader *zs, const struct 
pipe_stream_output_info *so_
   * being output with the same stream on the same buffer, this entire 
variable
   * can be consolidated into a single output to conserve locations
   */
- if (packed & BITFIELD_BIT(slot) &&
+ if (packed & BITFIELD64_BIT(slot) &&
  glsl_get_components(var->type) == packed_components[slot] &&
  util_bitcount(packed_streams[slot]) == 1 &&
  util_bitcount(packed_buffers[slot]) == 1) {
@@ -672,7 +672,7 @@ update_so_info(struct zink_shader *zs, const struct 
pipe_stream_output_info *so_
var->data.stream = output->stream;
for (unsigned j = 0; j < packed_components[slot]; j++)
   inlined[slot][j] = true;
-   packed &= ~BITFIELD_BIT(slot);
+   packed &= ~BITFIELD64_BIT(slot);
continue;
 }
  }



Mesa (staging/22.0): zink: store the correct number of components for xfb packing outputs

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: staging/22.0
Commit: 63e9e3398760c1766ed1d28f6a93b65cc0e04abc
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=63e9e3398760c1766ed1d28f6a93b65cc0e04abc

Author: Mike Blumenkrantz 
Date:   Wed Feb 16 15:15:35 2022 -0500

zink: store the correct number of components for xfb packing outputs

cc: mesa-stable

Reviewed-by: Dave Airlie 
Part-of: 
(cherry picked from commit 432700fc61a33e0c040d47d9b7bd8cfe970d35cc)

---

 .pick_status.json| 2 +-
 src/gallium/drivers/zink/zink_compiler.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index 0a68b1b0169..4f5bbdd33ee 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -1750,7 +1750,7 @@
 "description": "zink: store the correct number of components for xfb 
packing outputs",
 "nominated": true,
 "nomination_type": 0,
-"resolution": 0,
+"resolution": 1,
 "main_sha": null,
 "because_sha": null
 },
diff --git a/src/gallium/drivers/zink/zink_compiler.c 
b/src/gallium/drivers/zink/zink_compiler.c
index 3134a19d251..5dc008867aa 100644
--- a/src/gallium/drivers/zink/zink_compiler.c
+++ b/src/gallium/drivers/zink/zink_compiler.c
@@ -627,7 +627,7 @@ update_so_info(struct zink_shader *zs, const struct 
pipe_stream_output_info *so_
  } else {
 /* otherwise store some metadata for later */
 packed |= BITFIELD64_BIT(slot);
-packed_components[slot]++;
+packed_components[slot] += output->num_components;
 packed_streams[slot] |= BITFIELD_BIT(output->stream);
 packed_buffers[slot] |= BITFIELD_BIT(output->output_buffer);
 for (unsigned j = 0; j < output->num_components; j++)



Mesa (staging/22.0): zink: correctly set xfb packed output offsets

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: staging/22.0
Commit: 8bd9c64fefc11440226ca3a6a8f6adb867a2ec85
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=8bd9c64fefc11440226ca3a6a8f6adb867a2ec85

Author: Mike Blumenkrantz 
Date:   Wed Feb 16 15:15:55 2022 -0500

zink: correctly set xfb packed output offsets

cc: mesa-stable

Reviewed-by: Dave Airlie 
Part-of: 
(cherry picked from commit 4ed7329236a576b6b6f615787bb722b960f32c6b)

---

 .pick_status.json| 2 +-
 src/gallium/drivers/zink/zink_compiler.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index 4f5bbdd33ee..78c7ac5d414 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -1741,7 +1741,7 @@
 "description": "zink: correctly set xfb packed output offsets",
 "nominated": true,
 "nomination_type": 0,
-"resolution": 0,
+"resolution": 1,
 "main_sha": null,
 "because_sha": null
 },
diff --git a/src/gallium/drivers/zink/zink_compiler.c 
b/src/gallium/drivers/zink/zink_compiler.c
index 5dc008867aa..099434a698d 100644
--- a/src/gallium/drivers/zink/zink_compiler.c
+++ b/src/gallium/drivers/zink/zink_compiler.c
@@ -631,7 +631,7 @@ update_so_info(struct zink_shader *zs, const struct 
pipe_stream_output_info *so_
 packed_streams[slot] |= BITFIELD_BIT(output->stream);
 packed_buffers[slot] |= BITFIELD_BIT(output->output_buffer);
 for (unsigned j = 0; j < output->num_components; j++)
-   packed_offsets[output->register_index][j + 
output->start_component] = output->dst_offset;
+   packed_offsets[output->register_index][j + 
output->start_component] = output->dst_offset + j;
  }
   }
}



Mesa (main): Revert "glx: Fix build errors with --enable-mangling (v2)"

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: main
Commit: 5dbbc0f0a824e4addffeb115631a9c5bcb426c29
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=5dbbc0f0a824e4addffeb115631a9c5bcb426c29

Author: Eric Engestrom 
Date:   Thu Oct 10 18:13:53 2019 +0100

Revert "glx: Fix build errors with --enable-mangling (v2)"

This reverts commit a27f2d991b1723c3349623401ce3c8f26dcdb28b.

As of a0829cf23b307ca44ab8 ("GL: drop symbols mangling support"), this
extra complexity isn't needed anymore.

Signed-off-by: Eric Engestrom 
Part-of: 

---

 src/glx/glxextensions.h | 10 ++
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/src/glx/glxextensions.h b/src/glx/glxextensions.h
index 999db335717..0af2db04e17 100644
--- a/src/glx/glxextensions.h
+++ b/src/glx/glxextensions.h
@@ -283,17 +283,11 @@ __glXEnableDirectExtension(struct glx_screen *psc, const 
char *name);
 /* GLX_ALIAS should be used for functions with a non-void return type.
GLX_ALIAS_VOID is for functions with a void return type. */
 # ifdef HAVE_FUNC_ATTRIBUTE_ALIAS
-/* GLX_ALIAS and GLX_ALIAS_VOID both expand to the macro GLX_ALIAS2. Using the
- * extra expansion means that the name mangling macros in glx_mangle.h will
- * apply before stringification, so the alias attribute will have a string like
- * "mglXFoo" instead of "glXFoo". */
-#  define GLX_ALIAS2(return_type, real_func, proto_args, args, aliased_func) \
+#  define GLX_ALIAS(return_type, real_func, proto_args, args, aliased_func) \
return_type  real_func  proto_args   \
__attribute__ ((alias( # aliased_func ) ));
-#  define GLX_ALIAS(return_type, real_func, proto_args, args, aliased_func) \
-   GLX_ALIAS2(return_type, real_func, proto_args, args, aliased_func)
 #  define GLX_ALIAS_VOID(real_func, proto_args, args, aliased_func) \
-   GLX_ALIAS2(void, real_func, proto_args, args, aliased_func)
+   GLX_ALIAS(void, real_func, proto_args, args, aliased_func)
 # else
 #  define GLX_ALIAS(return_type, real_func, proto_args, args, aliased_func) \
return_type  real_func  proto_args   \



Mesa (staging/22.0): zink: workaround depth texture mode alpha.

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: staging/22.0
Commit: deb0225609d9558601712deb31a04446009c5790
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=deb0225609d9558601712deb31a04446009c5790

Author: Dave Airlie 
Date:   Thu Mar 10 13:07:37 2022 +1000

zink: workaround depth texture mode alpha.

Since spir-v only has single channel depth sampling, it breaks
with the old school GL_ALPHA depth mode swizzle, so just detect
that case and smash all the channels.

Cc: mesa-stable
Reviewed-By: Mike Blumenkrantz 
Part-of: 
(cherry picked from commit 30cb63bead12ad0d7ff66a6a267400c3a07c4f86)

---

 .pick_status.json   |  2 +-
 src/gallium/drivers/zink/zink_context.c | 22 ++
 2 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index 78c7ac5d414..8b2cdab99b5 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -796,7 +796,7 @@
 "description": "zink: workaround depth texture mode alpha.",
 "nominated": true,
 "nomination_type": 0,
-"resolution": 0,
+"resolution": 1,
 "main_sha": null,
 "because_sha": null
 },
diff --git a/src/gallium/drivers/zink/zink_context.c 
b/src/gallium/drivers/zink/zink_context.c
index c645a6d13ed..636729563eb 100644
--- a/src/gallium/drivers/zink/zink_context.c
+++ b/src/gallium/drivers/zink/zink_context.c
@@ -759,10 +759,24 @@ zink_create_sampler_view(struct pipe_context *pctx, 
struct pipe_resource *pres,
   ivci.subresourceRange.aspectMask = 
sampler_aspect_from_format(state->format);
   /* samplers for stencil aspects of packed formats need to always use 
stencil swizzle */
   if (ivci.subresourceRange.aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | 
VK_IMAGE_ASPECT_STENCIL_BIT)) {
- ivci.components.r = 
zink_component_mapping(clamp_zs_swizzle(sampler_view->base.swizzle_r));
- ivci.components.g = 
zink_component_mapping(clamp_zs_swizzle(sampler_view->base.swizzle_g));
- ivci.components.b = 
zink_component_mapping(clamp_zs_swizzle(sampler_view->base.swizzle_b));
- ivci.components.a = 
zink_component_mapping(clamp_zs_swizzle(sampler_view->base.swizzle_a));
+ if (sampler_view->base.swizzle_r == PIPE_SWIZZLE_0 &&
+ sampler_view->base.swizzle_g == PIPE_SWIZZLE_0 &&
+ sampler_view->base.swizzle_b == PIPE_SWIZZLE_0 &&
+ sampler_view->base.swizzle_a == PIPE_SWIZZLE_X) {
+/*
+ * When the state tracker asks for 000x swizzles, this is depth 
mode GL_ALPHA,
+ * however with the single dref fetch this will fail, so just spam 
all the channels.
+ */
+ivci.components.r = VK_COMPONENT_SWIZZLE_R;
+ivci.components.g = VK_COMPONENT_SWIZZLE_R;
+ivci.components.b = VK_COMPONENT_SWIZZLE_R;
+ivci.components.a = VK_COMPONENT_SWIZZLE_R;
+ } else {
+ivci.components.r = 
zink_component_mapping(clamp_zs_swizzle(sampler_view->base.swizzle_r));
+ivci.components.g = 
zink_component_mapping(clamp_zs_swizzle(sampler_view->base.swizzle_g));
+ivci.components.b = 
zink_component_mapping(clamp_zs_swizzle(sampler_view->base.swizzle_b));
+ivci.components.a = 
zink_component_mapping(clamp_zs_swizzle(sampler_view->base.swizzle_a));
+ }
   } else {
  /* if we have e.g., R8G8B8X8, then we have to ignore alpha since 
we're just emulating
   * these formats



Mesa (staging/22.0): panfrost: Emulate GL_CLAMP on Bifrost

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: staging/22.0
Commit: 9558f7674baf4e30324895e88dcc0866b719b9ca
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=9558f7674baf4e30324895e88dcc0866b719b9ca

Author: Alyssa Rosenzweig 
Date:   Sat Mar  5 15:10:10 2022 -0500

panfrost: Emulate GL_CLAMP on Bifrost

Hardware support was removed with Midgard. Use mesa/st to emulate GL_CLAMP with
nir_lower_tex automatically (the Zink lowering), and disable GL_MIRROR_CLAMP
which isn't lowered correctly.

Fixes *texwrap* Piglit tests on G52.

Fixes: f9ceab7b237 ("panfrost: Fix CLAMP wrap mode")
Signed-off-by: Alyssa Rosenzweig 
Part-of: 
(cherry picked from commit 1f97819fbe56a388878a23f49c0cf47016e4dde0)

---

 .pick_status.json|   2 +-
 src/gallium/drivers/panfrost/pan_cmdstream.c |  28 ++-
 src/gallium/drivers/panfrost/pan_screen.c|   3 +-
 src/panfrost/ci/panfrost-g52-fails.txt   | 244 ---
 4 files changed, 15 insertions(+), 262 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index 842ea36cf4e..a2659aa857e 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -328,7 +328,7 @@
 "description": "panfrost: Emulate GL_CLAMP on Bifrost",
 "nominated": true,
 "nomination_type": 1,
-"resolution": 0,
+"resolution": 1,
 "main_sha": null,
 "because_sha": "f9ceab7b237d40ded7728457114e59ca6707dd71"
 },
diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c 
b/src/gallium/drivers/panfrost/pan_cmdstream.c
index 3bc030eda65..88a80805790 100644
--- a/src/gallium/drivers/panfrost/pan_cmdstream.c
+++ b/src/gallium/drivers/panfrost/pan_cmdstream.c
@@ -121,31 +121,27 @@ panfrost_sample_pattern(unsigned samples)
 static unsigned
 translate_tex_wrap(enum pipe_tex_wrap w, bool using_nearest)
 {
-/* Bifrost doesn't support the GL_CLAMP wrap mode, so instead use
- * CLAMP_TO_EDGE and CLAMP_TO_BORDER. On Midgard, CLAMP is broken for
- * nearest filtering, so use CLAMP_TO_EDGE in that case. */
+/* CLAMP is only supported on Midgard, where it is broken for nearest
+ * filtering. Use CLAMP_TO_EDGE in that case.
+ */
 
 switch (w) {
 case PIPE_TEX_WRAP_REPEAT: return MALI_WRAP_MODE_REPEAT;
-case PIPE_TEX_WRAP_CLAMP:
-return using_nearest ? MALI_WRAP_MODE_CLAMP_TO_EDGE :
-#if PAN_ARCH <= 5
- MALI_WRAP_MODE_CLAMP;
-#else
- MALI_WRAP_MODE_CLAMP_TO_BORDER;
-#endif
 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return MALI_WRAP_MODE_CLAMP_TO_EDGE;
 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return 
MALI_WRAP_MODE_CLAMP_TO_BORDER;
 case PIPE_TEX_WRAP_MIRROR_REPEAT: return 
MALI_WRAP_MODE_MIRRORED_REPEAT;
+case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: return 
MALI_WRAP_MODE_MIRRORED_CLAMP_TO_EDGE;
+case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: return 
MALI_WRAP_MODE_MIRRORED_CLAMP_TO_BORDER;
+
+#if PAN_ARCH <= 5
+case PIPE_TEX_WRAP_CLAMP:
+return using_nearest ? MALI_WRAP_MODE_CLAMP_TO_EDGE :
+   MALI_WRAP_MODE_CLAMP;
 case PIPE_TEX_WRAP_MIRROR_CLAMP:
 return using_nearest ? MALI_WRAP_MODE_MIRRORED_CLAMP_TO_EDGE :
-#if PAN_ARCH <= 5
- MALI_WRAP_MODE_MIRRORED_CLAMP;
-#else
- MALI_WRAP_MODE_MIRRORED_CLAMP_TO_BORDER;
+   MALI_WRAP_MODE_MIRRORED_CLAMP;
 #endif
-case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: return 
MALI_WRAP_MODE_MIRRORED_CLAMP_TO_EDGE;
-case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: return 
MALI_WRAP_MODE_MIRRORED_CLAMP_TO_BORDER;
+
 default: unreachable("Invalid wrap");
 }
 }
diff --git a/src/gallium/drivers/panfrost/pan_screen.c 
b/src/gallium/drivers/panfrost/pan_screen.c
index 9b11cc10de4..69f0bd74716 100644
--- a/src/gallium/drivers/panfrost/pan_screen.c
+++ b/src/gallium/drivers/panfrost/pan_screen.c
@@ -153,7 +153,6 @@ panfrost_get_param(struct pipe_screen *screen, enum 
pipe_cap param)
 
 case PIPE_CAP_SAMPLER_VIEW_TARGET:
 case PIPE_CAP_TEXTURE_SWIZZLE:
-case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
 case PIPE_CAP_TEXTURE_MIRROR_CLAMP_TO_EDGE:
 case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
 case PIPE_CAP_BLEND_EQUATION_SEPARATE:
@@ -284,6 +283,8 @@ panfrost_get_param(struct pipe_screen *screen, enum 
pipe_cap param)
 return MAX_VARYING;
 
 /* Removed in v6 (Bifrost) */
+case PIPE_CAP_GL_CLAMP:
+case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
 case PIPE_CAP_ALPHA_TEST:
 return dev->arch <= 5;
 
diff --git a/src/panfrost/ci/panfrost-g52-fails.txt 
b/src/panfrost/ci/panfrost-g52-fails.txt
index efa83ad1ad9..aa8408239fa 100644
--- a/src/panfrost/ci/panfrost-g52-fails.txt
+++ b/src/panfrost/ci/panfrost-g52-fails.txt
@@ -60,30 +60,12 @@ 
spec@a

Mesa (staging/22.0): anv: fix CmdSetColorWriteEnableEXT for maximum rts

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: staging/22.0
Commit: d76c60884694316e5155932daed9a9066750576a
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=d76c60884694316e5155932daed9a9066750576a

Author: Mike Blumenkrantz 
Date:   Mon Mar  7 20:15:50 2022 -0500

anv: fix CmdSetColorWriteEnableEXT for maximum rts

Fixes: b15bfe92f7f ("anv: implement VK_EXT_color_write_enable")

Reviewed-by: Lionel Landwerlin 
Reviewed-by: Jason Ekstrand 
Part-of: 
(cherry picked from commit 1e3e7b3a4da6c7cd04879e0e4aab5f0d6f7b8d5f)

---

 .pick_status.json | 2 +-
 src/intel/vulkan/anv_cmd_buffer.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index 882bd03404c..9855d544d8b 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -661,7 +661,7 @@
 "description": "anv: fix CmdSetColorWriteEnableEXT for maximum rts",
 "nominated": true,
 "nomination_type": 1,
-"resolution": 0,
+"resolution": 1,
 "main_sha": null,
 "because_sha": "b15bfe92f7f87e270160a9221a1394fc7b5de5d2"
 },
diff --git a/src/intel/vulkan/anv_cmd_buffer.c 
b/src/intel/vulkan/anv_cmd_buffer.c
index 3bd1e0cdc03..7949fbdaefa 100644
--- a/src/intel/vulkan/anv_cmd_buffer.c
+++ b/src/intel/vulkan/anv_cmd_buffer.c
@@ -1647,7 +1647,7 @@ void anv_CmdSetColorWriteEnableEXT(
 {
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
 
-   assert(attachmentCount < MAX_RTS);
+   assert(attachmentCount <= MAX_RTS);
 
uint8_t color_writes = 0;
for (uint32_t i = 0; i < attachmentCount; i++)



Mesa (staging/22.0): radeonsi: don't clear framebuffer.state before dcc decomp

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: staging/22.0
Commit: 2edbdea1eef47cd93785487a3a01ad6a8ea60a74
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=2edbdea1eef47cd93785487a3a01ad6a8ea60a74

Author: Pierre-Eric Pelloux-Prayer 
Date:   Mon Mar  7 11:02:45 2022 +0100

radeonsi: don't clear framebuffer.state before dcc decomp

This causes inconsistencies between sctx->framebuffer.state and other
sctx->framebuffer properties (like compressed_cb_mask).

The point of this code was to fix an issue with vi_separate_dcc_stop_query,
which was removed by 804e2924406 we can safely drop it.

Reviewed-by: Marek Olšák 
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/6099
Cc: mesa-stable
Part-of: 
(cherry picked from commit 968d68125c234bc347fde8fad543f0fc7d80d8b0)

---

 .pick_status.json   |  2 +-
 src/gallium/drivers/radeonsi/si_state.c | 11 ---
 2 files changed, 1 insertion(+), 12 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index 84871467975..f8c45140358 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -589,7 +589,7 @@
 "description": "radeonsi: don't clear framebuffer.state before dcc 
decomp",
 "nominated": true,
 "nomination_type": 0,
-"resolution": 0,
+"resolution": 1,
 "main_sha": null,
 "because_sha": null
 },
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 293b9b81f8e..0d7d23f41a7 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2788,7 +2788,6 @@ static void si_set_framebuffer_state(struct pipe_context 
*ctx,
bool old_has_stencil =
   old_has_zsbuf &&
   ((struct si_texture 
*)sctx->framebuffer.state.zsbuf->texture)->surface.has_stencil;
-   bool unbound = false;
int i;
 
/* Reject zero-sized framebuffers due to a hw bug on GFX6 that occurs
@@ -2813,16 +2812,6 @@ static void si_set_framebuffer_state(struct pipe_context 
*ctx,
   if (!surf->dcc_incompatible)
  continue;
 
-  /* Since the DCC decompression calls back into set_framebuffer-
-   * _state, we need to unbind the framebuffer, so that
-   * vi_separate_dcc_stop_query isn't called twice with the same
-   * color buffer.
-   */
-  if (!unbound) {
- util_copy_framebuffer_state(&sctx->framebuffer.state, NULL);
- unbound = true;
-  }
-
   if (vi_dcc_enabled(tex, surf->base.u.tex.level))
  if (!si_texture_disable_dcc(sctx, tex))
 si_decompress_dcc(sctx, tex);



Mesa (staging/22.0): anv: fix some dynamic rasterization discard cases in pipeline construction

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: staging/22.0
Commit: f14e788b6da1738a86fb80c7e75a8da364bb466d
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=f14e788b6da1738a86fb80c7e75a8da364bb466d

Author: Mike Blumenkrantz 
Date:   Mon Mar  7 20:20:11 2022 -0500

anv: fix some dynamic rasterization discard cases in pipeline construction

cc: mesa-stable

Reviewed-by: Lionel Landwerlin 
Reviewed-by: Jason Ekstrand 
Part-of: 
(cherry picked from commit 5ab0e3f0bb09bde3d06b0ea448a6e183a813e286)

---

 .pick_status.json   | 2 +-
 src/intel/vulkan/anv_pipeline.c | 6 ++
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index 9855d544d8b..84871467975 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -652,7 +652,7 @@
 "description": "anv: fix some dynamic rasterization discard cases in 
pipeline construction",
 "nominated": true,
 "nomination_type": 0,
-"resolution": 0,
+"resolution": 1,
 "main_sha": null,
 "because_sha": null
 },
diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index 0c4abb8339f..94774e82cdd 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -2421,8 +2421,7 @@ copy_non_dynamic_state(struct anv_graphics_pipeline 
*pipeline,
}
 
const VkPipelineMultisampleStateCreateInfo *ms_info =
-  pCreateInfo->pRasterizationState->rasterizerDiscardEnable ? NULL :
-  pCreateInfo->pMultisampleState;
+  raster_discard ? NULL : pCreateInfo->pMultisampleState;
if (states & ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS) {
   const VkPipelineSampleLocationsStateCreateInfoEXT *sl_info = ms_info ?
  vk_find_struct_const(ms_info, 
PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT) : NULL;
@@ -2452,8 +2451,7 @@ copy_non_dynamic_state(struct anv_graphics_pipeline 
*pipeline,
}
 
if (states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE) {
-  if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&
-  uses_color_att) {
+  if (!raster_discard && uses_color_att) {
  assert(pCreateInfo->pColorBlendState);
  const VkPipelineColorWriteCreateInfoEXT *color_write_info =
 vk_find_struct_const(pCreateInfo->pColorBlendState->pNext,



Mesa (staging/22.0): lavapipe: skip format checks for EXTENDED_USAGE

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: staging/22.0
Commit: 3c040493457fb7201df7790b87773e0f7e75f430
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=3c040493457fb7201df7790b87773e0f7e75f430

Author: Mike Blumenkrantz 
Date:   Wed Mar  9 14:26:19 2022 -0500

lavapipe: skip format checks for EXTENDED_USAGE

we can effectively skip any kind of checks here and just assume that one
of two scenarios is in effect:
* the user is about to attempt some incredibly illegal behavior that VVL will 
catch
* the user is about to attempt a pro gamer move and we'll be fine

in either case, it's EXTENDED_USAGE, so hopefully we're about to make a texture
view from a compatible and supported format

cc: mesa-stable

fixes:
dEQP-VK.image.extended_usage_bit_compatibility.image_format_properties*

Reviewed-by: Dave Airlie 
Part-of: 
(cherry picked from commit 6a4c7ef7286f50b5396ffc68bcf6ad33eb67de07)

---

 .pick_status.json| 2 +-
 src/gallium/frontends/lavapipe/lvp_formats.c | 4 
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/.pick_status.json b/.pick_status.json
index 43f1e4774f1..0976440f1a3 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -769,7 +769,7 @@
 "description": "lavapipe: skip format checks for EXTENDED_USAGE",
 "nominated": true,
 "nomination_type": 0,
-"resolution": 0,
+"resolution": 1,
 "main_sha": null,
 "because_sha": null
 },
diff --git a/src/gallium/frontends/lavapipe/lvp_formats.c 
b/src/gallium/frontends/lavapipe/lvp_formats.c
index b30853a9df1..6831a6587aa 100644
--- a/src/gallium/frontends/lavapipe/lvp_formats.c
+++ b/src/gallium/frontends/lavapipe/lvp_formats.c
@@ -230,6 +230,9 @@ static VkResult lvp_get_image_format_properties(struct 
lvp_physical_device *phys
   break;
}
 
+   if (info->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT)
+  goto skip_checks;
+
if (info->usage & VK_IMAGE_USAGE_SAMPLED_BIT) {
   if (!(format_feature_flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) {
  goto unsupported;
@@ -273,6 +276,7 @@ static VkResult lvp_get_image_format_properties(struct 
lvp_physical_device *phys
   }
}
 
+skip_checks:
*pImageFormatProperties = (VkImageFormatProperties) {
   .maxExtent = maxExtent,
   .maxMipLevels = maxMipLevels,



Mesa (staging/22.0): lavapipe: use the correct value for dynamic render resolve attachment indexing

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: staging/22.0
Commit: b82412510b366c338966e707bc7820aa3191d2a5
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=b82412510b366c338966e707bc7820aa3191d2a5

Author: Mike Blumenkrantz 
Date:   Thu Mar 10 14:05:32 2022 -0500

lavapipe: use the correct value for dynamic render resolve attachment indexing

subpass->color_count is (obviously) not set yet, so this would just clobber
the color attachments any time resolves were used

Fixes: 8a6160a3542 ("lavapipe: VK_KHR_dynamic_rendering")

fixes:
dEQP-VK.draw.dynamic_rendering.multiple_interpolation.structured.with_sample_decoration.4_samples

Reviewed-by: Dave Airlie 
Part-of: 
(cherry picked from commit c40dc39b5a87911cd6467b3ec647f382653a9b97)

---

 .pick_status.json| 2 +-
 src/gallium/frontends/lavapipe/lvp_execute.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index 68b8efad5df..43f1e4774f1 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -778,7 +778,7 @@
 "description": "lavapipe: use the correct value for dynamic render 
resolve attachment indexing",
 "nominated": true,
 "nomination_type": 1,
-"resolution": 0,
+"resolution": 1,
 "main_sha": null,
 "because_sha": "8a6160a35428264fa3b4722d8c72b9c8aa4b8256"
 },
diff --git a/src/gallium/frontends/lavapipe/lvp_execute.c 
b/src/gallium/frontends/lavapipe/lvp_execute.c
index bcb82bb26a8..14c26d9cb7f 100644
--- a/src/gallium/frontends/lavapipe/lvp_execute.c
+++ b/src/gallium/frontends/lavapipe/lvp_execute.c
@@ -1865,8 +1865,8 @@ static void handle_begin_rendering(struct 
vk_cmd_queue_entry *cmd,
   return;
}
bool has_ds = !!info->pDepthAttachment + !!info->pStencilAttachment;
-   struct lvp_render_pass_attachment *resolve_attachments = num_resolves ? 
&attachments[subpass->color_count + has_ds] : NULL;
-   struct lvp_render_pass_attachment **resolve_attachment_refs = num_resolves 
? &attachment_refs[subpass->color_count + has_ds] : NULL;
+   struct lvp_render_pass_attachment *resolve_attachments = num_resolves ? 
&attachments[info->colorAttachmentCount + has_ds] : NULL;
+   struct lvp_render_pass_attachment **resolve_attachment_refs = num_resolves 
? &attachment_refs[info->colorAttachmentCount + has_ds] : NULL;
subpass->color_count = info->colorAttachmentCount;
 
subpass->view_mask = info->viewMask;



Mesa (staging/22.0): anv: Allow MSAA resolve with different numbers of planes

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: staging/22.0
Commit: d8b54ea4ffe7f4147c50f26d394022bb814dbcd9
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=d8b54ea4ffe7f4147c50f26d394022bb814dbcd9

Author: Jason Ekstrand 
Date:   Fri Feb 11 23:41:03 2022 -0600

anv: Allow MSAA resolve with different numbers of planes

The Vulkan spec for VK_KHR_depth_stencil_resolve allows a format
mismatch between the primary attachment and the resolve attachment
within certain limits.  In particular,


VUID-VkSubpassDescriptionDepthStencilResolve-pDepthStencilResolveAttachment-03181

If pDepthStencilResolveAttachment is not NULL and does not have the
value VK_ATTACHMENT_UNUSED and VkFormat of
pDepthStencilResolveAttachment has a depth component, then the
VkFormat of pDepthStencilAttachment must have a depth component with
the same number of bits and numerical type


VUID-VkSubpassDescriptionDepthStencilResolve-pDepthStencilResolveAttachment-03182

If pDepthStencilResolveAttachment is not NULL and does not have the
value VK_ATTACHMENT_UNUSED, and VkFormat of
pDepthStencilResolveAttachment has a stencil component, then the
VkFormat of pDepthStencilAttachment must have a stencil component
with the same number of bits and numerical type

So you can resolve from a depth/stencil format to a depth-only or
stencil-only format so long as the number of bits matches.
Unfortunately, this has never been tested because the CTS tests which
purport to test this are broken and actually test with a destination
combined depth/stencil format.

Fixes: 5e4f9ea363a6 ("anv: Implement VK_KHR_depth_stencil_resolve")
Reviewed-by: Ivan Briano 
Part-of: 
(cherry picked from commit d65dbe8018923e3a405b362c92a21caa30e66e83)

---

 .pick_status.json  | 2 +-
 src/intel/vulkan/anv_blorp.c   | 1 -
 src/intel/vulkan/genX_cmd_buffer.c | 2 ++
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index f8c45140358..0b6c5f0229c 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -445,7 +445,7 @@
 "description": "anv: Allow MSAA resolve with different numbers of 
planes",
 "nominated": true,
 "nomination_type": 1,
-"resolution": 0,
+"resolution": 1,
 "main_sha": null,
 "because_sha": "5e4f9ea363a638645670abeffce08ed58c37c369"
 },
diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
index 9f0fe902d7c..572e480a469 100644
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@@ -1403,7 +1403,6 @@ anv_image_msaa_resolve(struct anv_cmd_buffer *cmd_buffer,
assert(src_image->vk.samples > 1);
assert(dst_image->vk.image_type == VK_IMAGE_TYPE_2D);
assert(dst_image->vk.samples == 1);
-   assert(src_image->n_planes == dst_image->n_planes);
 
struct blorp_surf src_surf, dst_surf;
get_blorp_surf_for_anv_image(cmd_buffer->device, src_image, aspect,
diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index 55c38d73bf9..9577ca69aa7 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -6952,6 +6952,7 @@ cmd_buffer_resolve_attachments(struct anv_cmd_buffer 
*cmd_buffer,
  &cmd_state->attachments[dst_att];
 
   if ((src_iview->image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
+  (dst_iview->image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
   subpass->depth_resolve_mode != VK_RESOLVE_MODE_NONE_KHR) {
 
  /* MSAA resolves sample from the source attachment.  Transition the
@@ -7018,6 +7019,7 @@ cmd_buffer_resolve_attachments(struct anv_cmd_buffer 
*cmd_buffer,
   }
 
   if ((src_iview->image->vk.aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
+  (dst_iview->image->vk.aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
   subpass->stencil_resolve_mode != VK_RESOLVE_MODE_NONE_KHR) {
 
  src_state->current_stencil_layout = 
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;



Mesa (staging/22.0): llvmpipe: fix occlusion queries with early depth test

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: staging/22.0
Commit: 5eb3f5eccdb394a5c25b9352ffc49286a966a9e5
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=5eb3f5eccdb394a5c25b9352ffc49286a966a9e5

Author: Mike Blumenkrantz 
Date:   Wed Mar  9 11:48:51 2022 -0500

llvmpipe: fix occlusion queries with early depth test

for genuine early depth tests, the samplecount must be updated after depth
test but before samplemask is applied

for inferred-early or regular depth tests, the samplemask can be applied
before the depth test

Fixes: d9276ae965a ("llvmpipe: handle gl_SampleMask writing.")

fixes:
dEQP-VK.fragment_operations.early_fragment.sample_count_early_fragment_tests_depth_samples_4

Reviewed-by: Dave Airlie 
Part-of: 
(cherry picked from commit 42e78ba12573323be35cf4a6e10e5d434b2a56c5)

---

 .pick_status.json  |  2 +-
 src/gallium/drivers/llvmpipe/lp_state_fs.c | 28 
 2 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index d7a3baad239..6f82958d0a2 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -688,7 +688,7 @@
 "description": "llvmpipe: fix occlusion queries with early depth test",
 "nominated": true,
 "nomination_type": 1,
-"resolution": 0,
+"resolution": 1,
 "main_sha": null,
 "because_sha": "d9276ae965aadf967ee8f2ca85dab1dd31881919"
 },
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c 
b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index e775047221a..36386efbf49 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -301,6 +301,7 @@ generate_quad_mask(struct gallivm_state *gallivm,
 #define LATE_DEPTH_TEST   0x2
 #define EARLY_DEPTH_WRITE 0x4
 #define LATE_DEPTH_WRITE  0x8
+#define EARLY_DEPTH_TEST_INFERRED  0x10 //only with EARLY_DEPTH_TEST
 
 static int
 find_output_by_semantic( const struct tgsi_shader_info *info,
@@ -637,10 +638,10 @@ generate_fs_loop(struct gallivm_state *gallivm,
  key->stencil[1].writemask)))
depth_mode = LATE_DEPTH_TEST | LATE_DEPTH_WRITE;
 else
-   depth_mode = EARLY_DEPTH_TEST | LATE_DEPTH_WRITE;
+   depth_mode = EARLY_DEPTH_TEST | LATE_DEPTH_WRITE | 
EARLY_DEPTH_TEST_INFERRED;
  }
  else
-depth_mode = EARLY_DEPTH_TEST | EARLY_DEPTH_WRITE;
+depth_mode = EARLY_DEPTH_TEST | EARLY_DEPTH_WRITE | 
EARLY_DEPTH_TEST_INFERRED;
   }
   else {
  depth_mode = LATE_DEPTH_TEST | LATE_DEPTH_WRITE;
@@ -1146,8 +1147,10 @@ generate_fs_loop(struct gallivm_state *gallivm,
   if (key->min_samples == 1)
  s_mask = LLVMBuildAnd(builder, s_mask, lp_build_mask_value(&mask), 
"");
 
-  /* if the shader writes sample mask use that */
-  if (shader->info.base.writes_samplemask) {
+  /* if the shader writes sample mask use that,
+   * but only if this isn't genuine early-depth to avoid breaking 
occlusion query */
+  if (shader->info.base.writes_samplemask &&
+  (!(depth_mode & EARLY_DEPTH_TEST) || (depth_mode & 
(EARLY_DEPTH_TEST_INFERRED {
  LLVMValueRef out_smask_idx = LLVMBuildShl(builder, 
lp_build_const_int32(gallivm, 1), sample_loop_state.counter, "");
  out_smask_idx = lp_build_broadcast(gallivm, int_vec_type, 
out_smask_idx);
  LLVMValueRef output_smask = LLVMBuildLoad(builder, 
out_sample_mask_storage, "");
@@ -1263,6 +1266,23 @@ generate_fs_loop(struct gallivm_state *gallivm,
key->multisample ? s_mask : 
lp_build_mask_value(&mask), counter);
}
 
+   /* if this is genuine early-depth in the shader, write samplemask now
+* after occlusion count has been updated
+*/
+   if (key->multisample && shader->info.base.writes_samplemask &&
+   (depth_mode & (EARLY_DEPTH_TEST_INFERRED | EARLY_DEPTH_TEST)) == 
EARLY_DEPTH_TEST) {
+  /* if the shader writes sample mask use that */
+ LLVMValueRef out_smask_idx = LLVMBuildShl(builder, 
lp_build_const_int32(gallivm, 1), sample_loop_state.counter, "");
+ out_smask_idx = lp_build_broadcast(gallivm, int_vec_type, 
out_smask_idx);
+ LLVMValueRef output_smask = LLVMBuildLoad(builder, 
out_sample_mask_storage, "");
+ LLVMValueRef smask_bit = LLVMBuildAnd(builder, output_smask, 
out_smask_idx, "");
+ LLVMValueRef cmp = LLVMBuildICmp(builder, LLVMIntNE, smask_bit, 
lp_build_const_int_vec(gallivm, int_type, 0), "");
+ smask_bit = LLVMBuildSExt(builder, cmp, int_vec_type, "");
+
+ s_mask = LLVMBuildAnd(builder, s_mask, smask_bit, "");
+   }
+
+
if (key->multisample) {
   /* store the sample mask for this loop */
   LLVMBuildStore(builder, s_mask, s_mask_ptr);



Mesa (staging/22.0): panfrost: Don't initialise the trampolines array

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: staging/22.0
Commit: 73782b26e0c24fb96c6ee50936aa064b9b9e21d2
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=73782b26e0c24fb96c6ee50936aa064b9b9e21d2

Author: Icecream95 
Date:   Thu Oct 14 16:38:38 2021 +1300

panfrost: Don't initialise the trampolines array

PIPE_MAX_SHADER_SAMPLER_VIEWS is 128, so we just end up initialising a
kilobyte of memory for no reason, when usually only a couple of
sampler views are used.

Fixes: 53ef20f08d4 ("panfrost: Handle NULL sampler views")
Part-of: 
(cherry picked from commit 3e405afeb9c1cb1182f83e2a1fd6f0beb199df64)

---

 .pick_status.json| 2 +-
 src/gallium/drivers/panfrost/pan_cmdstream.c | 6 --
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index 809323a8ce1..3194abcbcf8 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -13,7 +13,7 @@
 "description": "panfrost: Don't initialise the trampolines array",
 "nominated": true,
 "nomination_type": 1,
-"resolution": 0,
+"resolution": 1,
 "main_sha": null,
 "because_sha": "53ef20f08d4340c1bad0b45a2501f4daba7fb479"
 },
diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c 
b/src/gallium/drivers/panfrost/pan_cmdstream.c
index 88a80805790..bc3f01ddc0a 100644
--- a/src/gallium/drivers/panfrost/pan_cmdstream.c
+++ b/src/gallium/drivers/panfrost/pan_cmdstream.c
@@ -1387,13 +1387,15 @@ panfrost_emit_texture_descriptors(struct panfrost_batch 
*batch,
 
 return T.gpu;
 #else
-uint64_t trampolines[PIPE_MAX_SHADER_SAMPLER_VIEWS] = { 0 };
+uint64_t trampolines[PIPE_MAX_SHADER_SAMPLER_VIEWS];
 
 for (int i = 0; i < ctx->sampler_view_count[stage]; ++i) {
 struct panfrost_sampler_view *view = 
ctx->sampler_views[stage][i];
 
-if (!view)
+if (!view) {
+trampolines[i] = 0;
 continue;
+}
 
 panfrost_update_sampler_view(view, &ctx->base);
 



Mesa (staging/22.0): freedreno/ir3: Don't try re-swapping cat3 srcs

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: staging/22.0
Commit: 32fbe5e82cde245b709539797fa848a5223258d4
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=32fbe5e82cde245b709539797fa848a5223258d4

Author: Rob Clark 
Date:   Thu Mar 10 20:46:23 2022 -0800

freedreno/ir3: Don't try re-swapping cat3 srcs

This can lead us to endless loops of "progress".. Note fixes commit
commit really just exposed an existing problem.

Fixes: 9c9e8c33498 ("nir: Reorder ffma and fsub combining")
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/6133
Signed-off-by: Rob Clark 
Part-of: 
(cherry picked from commit 05d687723530ed3c5c9f7d0addb3b047138613a3)

---

 .pick_status.json  |  2 +-
 src/freedreno/ir3/ir3.h|  1 +
 src/freedreno/ir3/ir3_cp.c | 22 +++---
 3 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index f625a9f5965..842ea36cf4e 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -337,7 +337,7 @@
 "description": "freedreno/ir3: Don't try re-swapping cat3 srcs",
 "nominated": true,
 "nomination_type": 1,
-"resolution": 0,
+"resolution": 1,
 "main_sha": null,
 "because_sha": "9c9e8c334981b1af7a709fa42cd5ef9dcf4d9791"
 },
diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h
index 5ef1eaeeeaf..7416a768e75 100644
--- a/src/freedreno/ir3/ir3.h
+++ b/src/freedreno/ir3/ir3.h
@@ -338,6 +338,7 @@ struct ir3_instruction {
 IR3_SRC_PACKED_LOW = 0,
 IR3_SRC_PACKED_HIGH = 1,
  } packed;
+ bool swapped;
   } cat3;
   struct {
  unsigned samp, tex;
diff --git a/src/freedreno/ir3/ir3_cp.c b/src/freedreno/ir3/ir3_cp.c
index c667f0fd03e..66694310a8d 100644
--- a/src/freedreno/ir3/ir3_cp.c
+++ b/src/freedreno/ir3/ir3_cp.c
@@ -276,10 +276,13 @@ try_swap_mad_two_srcs(struct ir3_instruction *instr, 
unsigned new_flags)
if (!is_mad(instr->opc))
   return false;
 
-   /* NOTE: pre-swap first two src's before valid_flags(),
-* which might try to dereference the n'th src:
+   /* If we've already tried, nothing more to gain.. we will only
+* have previously swapped if the original 2nd src was const or
+* immed.  So swapping back won't improve anything and could
+* result in an infinite "progress" loop.
 */
-   swap(instr->srcs[0], instr->srcs[1]);
+   if (instr->cat3.swapped)
+  return false;
 
/* cat3 doesn't encode immediate, but we can lower immediate
 * to const if that helps:
@@ -289,6 +292,19 @@ try_swap_mad_two_srcs(struct ir3_instruction *instr, 
unsigned new_flags)
   new_flags |= IR3_REG_CONST;
}
 
+   /* If the reason we couldn't fold without swapping is something
+* other than const source, then swapping won't help:
+*/
+   if (!(new_flags & IR3_REG_CONST))
+  return false;
+
+   instr->cat3.swapped = true;
+
+   /* NOTE: pre-swap first two src's before valid_flags(),
+* which might try to dereference the n'th src:
+*/
+   swap(instr->srcs[0], instr->srcs[1]);
+
bool valid_swap =
   /* can we propagate mov if we move 2nd src to first? */
   ir3_valid_flags(instr, 0, new_flags) &&



Mesa (staging/22.0): lavapipe: run nir_opt_copy_prop_vars during optimization loop

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: staging/22.0
Commit: 41a8a3b0a077bf3da896fe501328acac17a63de5
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=41a8a3b0a077bf3da896fe501328acac17a63de5

Author: Mike Blumenkrantz 
Date:   Thu Mar 10 11:16:03 2022 -0500

lavapipe: run nir_opt_copy_prop_vars during optimization loop

this enables better elimination of operations

fixes:
dEQP-VK.graphicsfuzz.spv-stable-mergesort-flatten-selection-dead-continues

fixes #5458

cc: mesa-stable

Reviewed-by: Dave Airlie 
Part-of: 
(cherry picked from commit cf5c32a4b2d839305ef2104ab0acf973e63b940a)

---

 .pick_status.json | 2 +-
 src/gallium/frontends/lavapipe/lvp_pipeline.c | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/.pick_status.json b/.pick_status.json
index 0976440f1a3..d7a3baad239 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -751,7 +751,7 @@
 "description": "lavapipe: run nir_opt_copy_prop_vars during 
optimization loop",
 "nominated": true,
 "nomination_type": 0,
-"resolution": 0,
+"resolution": 1,
 "main_sha": null,
 "because_sha": null
 },
diff --git a/src/gallium/frontends/lavapipe/lvp_pipeline.c 
b/src/gallium/frontends/lavapipe/lvp_pipeline.c
index f1bb2578671..c79d568188e 100644
--- a/src/gallium/frontends/lavapipe/lvp_pipeline.c
+++ b/src/gallium/frontends/lavapipe/lvp_pipeline.c
@@ -606,6 +606,8 @@ lvp_shader_compile_to_ir(struct lvp_pipeline *pipeline,
   NIR_PASS(progress, nir, nir_opt_deref);
   NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
 
+  NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
+
   NIR_PASS(progress, nir, nir_copy_prop);
   NIR_PASS(progress, nir, nir_opt_dce);
   NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);



Mesa (staging/22.0): lavapipe: remove broken workaround for zink depth texturing.

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: staging/22.0
Commit: f00548eeacddc1d90cc97f7ffd750fca85b21029
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=f00548eeacddc1d90cc97f7ffd750fca85b21029

Author: Dave Airlie 
Date:   Wed Mar  9 16:31:46 2022 +1000

lavapipe: remove broken workaround for zink depth texturing.

Cc: mesa-stable
Reviewed-By: Mike Blumenkrantz 
Part-of: 
(cherry picked from commit 938488f43986e21a22be5c8b8c7be4fcc60af989)

---

 .pick_status.json| 2 +-
 src/gallium/frontends/lavapipe/lvp_execute.c | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index 8b2cdab99b5..68b8efad5df 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -787,7 +787,7 @@
 "description": "lavapipe: remove broken workaround for zink depth 
texturing.",
 "nominated": true,
 "nomination_type": 0,
-"resolution": 0,
+"resolution": 1,
 "main_sha": null,
 "because_sha": null
 },
diff --git a/src/gallium/frontends/lavapipe/lvp_execute.c 
b/src/gallium/frontends/lavapipe/lvp_execute.c
index f5f8f3b7151..bcb82bb26a8 100644
--- a/src/gallium/frontends/lavapipe/lvp_execute.c
+++ b/src/gallium/frontends/lavapipe/lvp_execute.c
@@ -1012,8 +1012,6 @@ static void fill_sampler_view_stage(struct 
rendering_state *state,
*/
if (iv->subresourceRange.aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ||
iv->subresourceRange.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) {
-  if (templ.swizzle_a == PIPE_SWIZZLE_X)
- templ.swizzle_r = PIPE_SWIZZLE_X;
   fix_depth_swizzle(templ.swizzle_r);
   fix_depth_swizzle(templ.swizzle_g);
   fix_depth_swizzle(templ.swizzle_b);



Mesa (staging/22.0): anv: fix xfb usage with rasterizer discard

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: staging/22.0
Commit: 032528671b7ba7f711a3f2fd3ad80ee4424234a5
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=032528671b7ba7f711a3f2fd3ad80ee4424234a5

Author: Mike Blumenkrantz 
Date:   Mon Mar  7 16:35:04 2022 -0500

anv: fix xfb usage with rasterizer discard

in the initial implementation, a stream like:

* CmdBeginTransformFeedbackEXT
* CmdSetRasterizerDiscardEnableEXT
* CmdDraw
* CmdEndTransformFeedbackEXT
* CmdBeginTransformFeedbackEXT
* CmdDraw
* CmdEndTransformFeedbackEXT

would never enable transform feedback, as it only checked for the change
in rasterizer_discard state

Fixes: 4d531c67dfd ("anv: support rasterizer discard dynamic state")

Reviewed-by: Jason Ekstrand 
Part-of: 
(cherry picked from commit 52f6978484fb738a06b45dd09c5dc6a1bd594fb7)

---

 .pick_status.json  | 2 +-
 src/intel/vulkan/genX_cmd_buffer.c | 6 --
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index 1bd4f69cfbe..882bd03404c 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -670,7 +670,7 @@
 "description": "anv: fix xfb usage with rasterizer discard",
 "nominated": true,
 "nomination_type": 1,
-"resolution": 0,
+"resolution": 1,
 "main_sha": null,
 "because_sha": "4d531c67dfd93ff6b1dc4a9626074a74dfb14a2d"
 },
diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index e293f54ca01..55c38d73bf9 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -3922,8 +3922,10 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer 
*cmd_buffer)
 
cmd_buffer_emit_clip(cmd_buffer);
 
-   if (cmd_buffer->state.gfx.dirty & 
ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE)
-  cmd_buffer_emit_streamout(cmd_buffer);
+   if (pipeline->dynamic_states & 
ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE) {
+  if (cmd_buffer->state.gfx.dirty & 
(ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE | ANV_CMD_DIRTY_XFB_ENABLE))
+ cmd_buffer_emit_streamout(cmd_buffer);
+   }
 
if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT)
   gfx8_cmd_buffer_emit_viewport(cmd_buffer);



Mesa (staging/22.0): util: fix msvc build

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: staging/22.0
Commit: 6a14e289929a093dee3ce532712ac40b6b3e967b
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=6a14e289929a093dee3ce532712ac40b6b3e967b

Author: Jonathan Gray 
Date:   Sat Mar 12 11:08:56 2022 +1100

util: fix msvc build

Fix msvc build regression after 0536b691338f2759b850f9ec94634033a5d1f9e1
reported by Prodea Alexandru-Liviu.

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/6137
Fixes: 0536b691338 ("util: fix build with clang 10 on mips64")
Reviewed-by: Ilia Mirkin 
Part-of: 
(cherry picked from commit e50eb1ce7ab371a4cb9016a2e41d0a30e08f)

---

 .pick_status.json| 2 +-
 src/util/u_debug_stack.c | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index 0b6c5f0229c..f625a9f5965 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -436,7 +436,7 @@
 "description": "util: fix msvc build",
 "nominated": true,
 "nomination_type": 1,
-"resolution": 0,
+"resolution": 1,
 "main_sha": null,
 "because_sha": "0536b691338f2759b850f9ec94634033a5d1f9e1"
 },
diff --git a/src/util/u_debug_stack.c b/src/util/u_debug_stack.c
index 86bfb2fb64b..9042effc364 100644
--- a/src/util/u_debug_stack.c
+++ b/src/util/u_debug_stack.c
@@ -256,10 +256,11 @@ debug_backtrace_capture(struct debug_stack_frame 
*backtrace,
const void **frame_pointer = ((const void **)__builtin_frame_address(1));
 #pragma GCC diagnostic pop
 #elif defined(PIPE_CC_MSVC)
+   const void **frame_pointer;
__asm {
   mov frame_pointer, ebp
}
-   const void **frame_pointer = (const void **)frame_pointer[0];
+   frame_pointer = (const void **)frame_pointer[0];
 #else
const void **frame_pointer = NULL;
 #endif



Mesa (staging/22.0): pan/bi: Handle non-2D arrays

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: staging/22.0
Commit: 663dbf34ac578c72a429ba1c94b2f01be61062da
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=663dbf34ac578c72a429ba1c94b2f01be61062da

Author: Alyssa Rosenzweig 
Date:   Sat Mar  5 15:49:43 2022 -0500

pan/bi: Handle non-2D arrays

Handle arrays generically by using the last component of the coordinate source
as the array index. That works for both 2D arrays and cube arrays, fixing cube
arrays. Cube arrays were already handled correctly in core Panfrost code.

This code path is not tested in dEQP-GLES31 without exposing OES_cube_map_array,
which depends on OES_geometry_shader, which we don't have. Yet we do expose
PIPE_CAP_CUBE_ARRAY, so ARB_cube_map_array is exposed.

Disabling PIPE_CAP_CUBE_ARRAY would be an easy band-aid fix, but it's easy
enough to handle correctly.

dEQP-GLES31 passes with a hack enabling OES_cube_map_array [without geometry
shaders].

Also fixes 1D arrays on Bifrost for the same reasons.

Fixes: 70d6c5675d6 ("pan/bi: Emit TEXC with builder")
Signed-off-by: Alyssa Rosenzweig 
Part-of: 
(cherry picked from commit 53f1e57ee7a7a6f7906c34cf7b78a39873fd00c2)

---

 .pick_status.json  |   2 +-
 src/panfrost/bifrost/bifrost_compile.c |  19 ++--
 src/panfrost/ci/panfrost-g52-fails.txt | 169 -
 3 files changed, 10 insertions(+), 180 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index a2659aa857e..809323a8ce1 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -319,7 +319,7 @@
 "description": "pan/bi: Handle non-2D arrays",
 "nominated": true,
 "nomination_type": 1,
-"resolution": 0,
+"resolution": 1,
 "main_sha": null,
 "because_sha": "70d6c5675d68103aeb72983a100418da15dd2bc9"
 },
diff --git a/src/panfrost/bifrost/bifrost_compile.c 
b/src/panfrost/bifrost/bifrost_compile.c
index 903ea1b82de..5f0cf9bff76 100644
--- a/src/panfrost/bifrost/bifrost_compile.c
+++ b/src/panfrost/bifrost/bifrost_compile.c
@@ -2670,6 +2670,7 @@ bi_emit_texc(bi_builder *b, nir_tex_instr *instr)
 for (unsigned i = 0; i < instr->num_srcs; ++i) {
 bi_index index = bi_src_index(&instr->src[i].src);
 unsigned sz = nir_src_bit_size(instr->src[i].src);
+unsigned components = 
nir_src_num_components(instr->src[i].src);
 ASSERTED nir_alu_type base = nir_tex_instr_src_type(instr, i);
 nir_alu_type T = base | sz;
 
@@ -2678,27 +2679,25 @@ bi_emit_texc(bi_builder *b, nir_tex_instr *instr)
 if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
 cx = bi_emit_texc_cube_coord(b, index, &cy);
} else {
-unsigned components = 
nir_src_num_components(instr->src[i].src);
-
 /* Copy XY (for 2D+) or XX (for 1D) */
 cx = index;
 cy = bi_word(index, MIN2(1, components - 1));
 
 assert(components >= 1 && components <= 3);
 
-if (components < 3) {
-/* nothing to do */
-} else if (desc.array) {
-/* 2D array */
-dregs[BIFROST_TEX_DREG_ARRAY] =
-bi_emit_texc_array_index(b,
-bi_word(index, 
2), T);
-} else {
+if (components == 3 && !desc.array) {
 /* 3D */
 dregs[BIFROST_TEX_DREG_Z_COORD] =
 bi_word(index, 2);
 }
 }
+
+if (desc.array) {
+dregs[BIFROST_TEX_DREG_ARRAY] =
+bi_emit_texc_array_index(b,
+bi_word(index, 
components - 1), T);
+}
+
 break;
 
 case nir_tex_src_lod:
diff --git a/src/panfrost/ci/panfrost-g52-fails.txt 
b/src/panfrost/ci/panfrost-g52-fails.txt
index aa8408239fa..60df76e709f 100644
--- a/src/panfrost/ci/panfrost-g52-fails.txt
+++ b/src/panfrost/ci/panfrost-g52-fails.txt
@@ -75,8 +75,6 @@ spec@arb_get_program_binary@restore-sso-program,Fail
 spec@arb_pixel_buffer_object@fbo-pbo-readpixels-small,Fail
 
spec@arb_pixel_buffer_object@fbo-pbo-readpixels-small@GL_DEPTH32F_STENCIL8-GL_DEPTH_STENCIL,Fail
 
spec@arb_pixel_buffer_object@fbo-pbo-readpixels-small@GL_DEPTH32F_STENCIL8-GL_STENCIL_INDEX,Fail
-spec@arb_pi

Mesa (staging/22.0): crocus: don't map scanout buffers as write-back

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: staging/22.0
Commit: d778e00eb0244a7b1a8cebe178b761d03c54
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=d778e00eb0244a7b1a8cebe178b761d03c54

Author: Dave Airlie 
Date:   Thu Mar 10 15:01:20 2022 +1000

crocus: don't map scanout buffers as write-back

This essentially ports 64405230774210488dedbc54d73ba394ec6ae802
Author: Keith Packard 
Date:   Fri Aug 6 16:11:18 2021 -0700

iris: Map scanout buffers WC instead of WB [v2]

to crocus.

Fixes: f3630548f1da ("crocus: initial gallium driver for Intel gfx 4-7")

Reviewed-by: Zoltán Böszörményi 
Part-of: 
(cherry picked from commit e8c3be0eb84bbefe1486f2543125c35b278ac1cf)

---

 .pick_status.json| 2 +-
 src/gallium/drivers/crocus/crocus_bufmgr.c   | 6 ++
 src/gallium/drivers/crocus/crocus_bufmgr.h   | 6 ++
 src/gallium/drivers/crocus/crocus_resource.c | 4 
 4 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/.pick_status.json b/.pick_status.json
index 6f82958d0a2..1bd4f69cfbe 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -679,7 +679,7 @@
 "description": "crocus: don't map scanout buffers as write-back",
 "nominated": true,
 "nomination_type": 1,
-"resolution": 0,
+"resolution": 1,
 "main_sha": null,
 "because_sha": "f3630548f1da904ec6c63b43ece7e68afdb8867e"
 },
diff --git a/src/gallium/drivers/crocus/crocus_bufmgr.c 
b/src/gallium/drivers/crocus/crocus_bufmgr.c
index aa70271ffee..d3fe5ee42f5 100644
--- a/src/gallium/drivers/crocus/crocus_bufmgr.c
+++ b/src/gallium/drivers/crocus/crocus_bufmgr.c
@@ -429,6 +429,9 @@ bo_alloc_internal(struct crocus_bufmgr *bufmgr,
bo->index = -1;
bo->kflags = 0;
 
+   if (flags & BO_ALLOC_SCANOUT)
+  bo->scanout = 1;
+
if ((flags & BO_ALLOC_COHERENT) && !bo->cache_coherent) {
   struct drm_i915_gem_caching arg = {
  .handle = bo->gem_handle,
@@ -1010,6 +1013,9 @@ crocus_bo_map_gtt(struct pipe_debug_callback *dbg,
 static bool
 can_map_cpu(struct crocus_bo *bo, unsigned flags)
 {
+   if (bo->scanout)
+  return false;
+
if (bo->cache_coherent)
   return true;
 
diff --git a/src/gallium/drivers/crocus/crocus_bufmgr.h 
b/src/gallium/drivers/crocus/crocus_bufmgr.h
index e4310448440..de550d99fa7 100644
--- a/src/gallium/drivers/crocus/crocus_bufmgr.h
+++ b/src/gallium/drivers/crocus/crocus_bufmgr.h
@@ -141,12 +141,18 @@ struct crocus_bo {
 */
bool userptr;
 
+   /**
+* Boolean of if this is used for scanout.
+*/
+   bool scanout;
+
/** Pre-computed hash using _mesa_hash_pointer for cache tracking sets */
uint32_t hash;
 };
 
 #define BO_ALLOC_ZEROED   (1 << 0)
 #define BO_ALLOC_COHERENT (1 << 1)
+#define BO_ALLOC_SCANOUT  (1 << 2)
 
 /**
  * Allocate a buffer object.
diff --git a/src/gallium/drivers/crocus/crocus_resource.c 
b/src/gallium/drivers/crocus/crocus_resource.c
index 3ee44e7713a..436a776360b 100644
--- a/src/gallium/drivers/crocus/crocus_resource.c
+++ b/src/gallium/drivers/crocus/crocus_resource.c
@@ -708,6 +708,10 @@ crocus_resource_create_with_modifiers(struct pipe_screen 
*pscreen,
if (templ->usage == PIPE_USAGE_STAGING)
   flags |= BO_ALLOC_COHERENT;
 
+   /* Scanout buffers need to be WC. */
+   if (templ->bind & PIPE_BIND_SCANOUT)
+  flags |= BO_ALLOC_SCANOUT;
+
uint64_t aux_size = 0;
uint32_t aux_preferred_alloc_flags;
 



Mesa (main): panvk: Make panvk_image derive from vk_image

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: main
Commit: 1865b7a93e60f4e97cea536d7124ca8af0fb83af
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=1865b7a93e60f4e97cea536d7124ca8af0fb83af

Author: Jason Ekstrand 
Date:   Fri Mar 11 14:01:15 2022 -0600

panvk: Make panvk_image derive from vk_image

Reviewed-by: Boris Brezillon 
Part-of: 

---

 src/panfrost/vulkan/panvk_image.c| 45 +++-
 src/panfrost/vulkan/panvk_private.h  | 21 -
 src/panfrost/vulkan/panvk_vX_image.c |  4 ++--
 3 files changed, 14 insertions(+), 56 deletions(-)

diff --git a/src/panfrost/vulkan/panvk_image.c 
b/src/panfrost/vulkan/panvk_image.c
index a8b322e4c17..3f7ae6ee7b8 100644
--- a/src/panfrost/vulkan/panvk_image.c
+++ b/src/panfrost/vulkan/panvk_image.c
@@ -71,48 +71,19 @@ panvk_image_create(VkDevice _device,
VK_FROM_HANDLE(panvk_device, device, _device);
const struct panfrost_device *pdev = &device->physical_device->pdev;
struct panvk_image *image = NULL;
-   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
 
-   assert(pCreateInfo->mipLevels > 0);
-   assert(pCreateInfo->arrayLayers > 0);
-   assert(pCreateInfo->samples > 0);
-   assert(pCreateInfo->extent.width > 0);
-   assert(pCreateInfo->extent.height > 0);
-   assert(pCreateInfo->extent.depth > 0);
-
-   image = vk_object_zalloc(&device->vk, alloc, sizeof(*image),
-VK_OBJECT_TYPE_IMAGE);
+   image = vk_image_create(&device->vk, pCreateInfo, alloc, sizeof(*image));
if (!image)
   return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
 
-   image->type = pCreateInfo->imageType;
-
-   image->vk_format = pCreateInfo->format;
-   image->tiling = pCreateInfo->tiling;
-   image->usage = pCreateInfo->usage;
-   image->flags = pCreateInfo->flags;
-   image->extent = pCreateInfo->extent;
pan_image_layout_init(pdev, &image->pimage.layout, modifier,
- vk_format_to_pipe_format(pCreateInfo->format),
- 
panvk_image_type_to_mali_tex_dim(pCreateInfo->imageType),
- pCreateInfo->extent.width, pCreateInfo->extent.height,
- pCreateInfo->extent.depth, pCreateInfo->arrayLayers,
- pCreateInfo->samples, pCreateInfo->mipLevels,
+ vk_format_to_pipe_format(image->vk.format),
+ 
panvk_image_type_to_mali_tex_dim(image->vk.image_type),
+ image->vk.extent.width, image->vk.extent.height,
+ image->vk.extent.depth, image->vk.array_layers,
+ image->vk.samples, image->vk.mip_levels,
  PAN_IMAGE_CRC_NONE, NULL);
 
-   image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE;
-   if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) {
-  for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i) {
- if (pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_EXTERNAL)
-image->queue_family_mask |= (1u << PANVK_MAX_QUEUE_FAMILIES) - 1u;
- else
-image->queue_family_mask |= 1u << 
pCreateInfo->pQueueFamilyIndices[i];
-   }
-   }
-
-   if (vk_find_struct_const(pCreateInfo->pNext, 
EXTERNAL_MEMORY_IMAGE_CREATE_INFO))
-  image->shareable = true;
-
*pImage = panvk_image_to_handle(image);
return VK_SUCCESS;
 }
@@ -234,7 +205,7 @@ panvk_DestroyImage(VkDevice _device,
if (!image)
   return;
 
-   vk_object_free(&device->vk, pAllocator, image);
+   vk_image_destroy(&device->vk, pAllocator, &image->vk);
 }
 
 static unsigned
@@ -260,7 +231,7 @@ panvk_GetImageSubresourceLayout(VkDevice _device,
 {
VK_FROM_HANDLE(panvk_image, image, _image);
 
-   unsigned plane = panvk_plane_index(image->vk_format, 
pSubresource->aspectMask);
+   unsigned plane = panvk_plane_index(image->vk.format, 
pSubresource->aspectMask);
assert(plane < PANVK_MAX_PLANES);
 
const struct pan_image_slice_layout *slice_layout =
diff --git a/src/panfrost/vulkan/panvk_private.h 
b/src/panfrost/vulkan/panvk_private.h
index 8e3150c853a..1b1d9e826fa 100644
--- a/src/panfrost/vulkan/panvk_private.h
+++ b/src/panfrost/vulkan/panvk_private.h
@@ -52,6 +52,7 @@
 #include "vk_command_buffer.h"
 #include "vk_command_pool.h"
 #include "vk_device.h"
+#include "vk_image.h"
 #include "vk_instance.h"
 #include "vk_log.h"
 #include "vk_object.h"
@@ -933,23 +934,9 @@ struct panvk_plane_memory {
 #define PANVK_MAX_PLANES 1
 
 struct panvk_image {
-   struct vk_object_base base;
-   struct pan_image pimage;
-   VkImageType type;
+   struct vk_image vk;
 
-   /* The original VkFormat provided by the client.  This may not match any
-* of the actual surface formats.
-*/
-   VkFormat vk_format;
-   VkImageAspectFlags aspects;
-   VkImageUsageFlags usage;  /**< Superset of VkImageCreateInfo::usage. */
-   VkImageTiling tiling; /** VkImageCr

Mesa (main): panvk: Use the correct integer border colors

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: main
Commit: 2170c3ac63079a99864a29e2c23207e61a30278f
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=2170c3ac63079a99864a29e2c23207e61a30278f

Author: Jason Ekstrand 
Date:   Sat Mar 12 11:01:12 2022 -0600

panvk: Use the correct integer border colors

Reviewed-by: Boris Brezillon 
Part-of: 

---

 src/panfrost/ci/deqp-panfrost-g52-vk.toml | 1 +
 src/panfrost/vulkan/panvk_vX_cs.c | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/panfrost/ci/deqp-panfrost-g52-vk.toml 
b/src/panfrost/ci/deqp-panfrost-g52-vk.toml
index 897667a5e2a..fbafaf8daef 100644
--- a/src/panfrost/ci/deqp-panfrost-g52-vk.toml
+++ b/src/panfrost/ci/deqp-panfrost-g52-vk.toml
@@ -11,5 +11,6 @@ include = [
 "dEQP-VK.glsl.builtin.precision.frexp.*",
 "dEQP-VK.glsl.builtin.precision.ldexp.*",
 "dEQP-VK.image.load_store.with_format.*",
+
"dEQP-VK.pipeline.sampler.view_type.*.format.r*.address_modes.all_mode_clamp_to_border*",
 "dEQP-VK.ssbo.layout.single_basic_type.*",
 ]
diff --git a/src/panfrost/vulkan/panvk_vX_cs.c 
b/src/panfrost/vulkan/panvk_vX_cs.c
index caabb217fc8..d537510cdeb 100644
--- a/src/panfrost/vulkan/panvk_vX_cs.c
+++ b/src/panfrost/vulkan/panvk_vX_cs.c
@@ -80,13 +80,13 @@ panvk_translate_sampler_border_color(const 
VkSamplerCreateInfo *pCreateInfo,
   border_color[0] = border_color[1] = border_color[2] = 0;
   border_color[3] =
  pCreateInfo->borderColor == VK_BORDER_COLOR_INT_OPAQUE_BLACK ?
- UINT_MAX : 0;
+ 1 : 0;
   break;
case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
   border_color[0] = border_color[1] = border_color[2] = border_color[3] = 
fui(1.0);
   break;
case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
-  border_color[0] = border_color[1] = border_color[2] = border_color[3] = 
UINT_MAX;
+  border_color[0] = border_color[1] = border_color[2] = border_color[3] = 
1;
   break;
case VK_BORDER_COLOR_FLOAT_CUSTOM_EXT:
case VK_BORDER_COLOR_INT_CUSTOM_EXT:



Mesa (main): panvk: Skip ZS setup if there is no depth/stencil attachment

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: main
Commit: 17e79b044e47656ae82c87e6f1e336d10833c326
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=17e79b044e47656ae82c87e6f1e336d10833c326

Author: Jason Ekstrand 
Date:   Fri Mar 11 14:38:08 2022 -0600

panvk: Skip ZS setup if there is no depth/stencil attachment

Reviewed-by: Boris Brezillon 
Part-of: 

---

 src/panfrost/vulkan/panvk_vX_pipeline.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/panfrost/vulkan/panvk_vX_pipeline.c 
b/src/panfrost/vulkan/panvk_vX_pipeline.c
index de8e7787be3..3c8cd0fe9d6 100644
--- a/src/panfrost/vulkan/panvk_vX_pipeline.c
+++ b/src/panfrost/vulkan/panvk_vX_pipeline.c
@@ -685,6 +685,9 @@ static void
 panvk_pipeline_builder_parse_zs(struct panvk_pipeline_builder *builder,
 struct panvk_pipeline *pipeline)
 {
+   if (!builder->use_depth_stencil_attachment)
+  return;
+
pipeline->zs.z_test = 
builder->create_info.gfx->pDepthStencilState->depthTestEnable;
pipeline->zs.z_write = 
builder->create_info.gfx->pDepthStencilState->depthWriteEnable;
pipeline->zs.z_compare_func =



Mesa (main): panvk: Rework texture, sampler, and image binding index calculation

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: main
Commit: 8dd917b9f0b6d4e65a8cd47d030abd59fc32d92d
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=8dd917b9f0b6d4e65a8cd47d030abd59fc32d92d

Author: Jason Ekstrand 
Date:   Fri Mar 11 21:42:21 2022 -0600

panvk: Rework texture, sampler, and image binding index calculation

This adds a new get_resource_deref_binding helper which decodes a
resource deref into set, binding, and index.  To make texture
instructions nicer, the index can optionally be split into immediate
and SSA parts.

Reviewed-by: Boris Brezillon 
Part-of: 

---

 src/panfrost/vulkan/panvk_vX_shader.c | 106 +-
 1 file changed, 65 insertions(+), 41 deletions(-)

diff --git a/src/panfrost/vulkan/panvk_vX_shader.c 
b/src/panfrost/vulkan/panvk_vX_shader.c
index 82b8cc6223e..47357c4d9d7 100644
--- a/src/panfrost/vulkan/panvk_vX_shader.c
+++ b/src/panfrost/vulkan/panvk_vX_shader.c
@@ -85,32 +85,32 @@ struct panvk_lower_misc_ctx {
bool has_img_access;
 };
 
-static unsigned
-get_fixed_sampler_index(nir_deref_instr *deref,
-const struct panvk_lower_misc_ctx *ctx)
+static void
+get_resource_deref_binding(nir_deref_instr *deref,
+   uint32_t *set, uint32_t *binding,
+   uint32_t *index_imm, nir_ssa_def **index_ssa)
 {
-   nir_variable *var = nir_deref_instr_get_variable(deref);
-   unsigned set = var->data.descriptor_set;
-   unsigned binding = var->data.binding;
-   const struct panvk_descriptor_set_binding_layout *bind_layout =
-  &ctx->layout->sets[set].layout->bindings[binding];
+   *index_imm = 0;
+   *index_ssa = NULL;
 
-   return bind_layout->sampler_idx + ctx->layout->sets[set].sampler_offset;
-}
+   if (deref->deref_type == nir_deref_type_array) {
+  assert(deref->arr.index.is_ssa);
+  if (index_imm != NULL && nir_src_is_const(deref->arr.index))
+ *index_imm = nir_src_as_uint(deref->arr.index);
+  else
+ *index_ssa = deref->arr.index.ssa;
 
-static unsigned
-get_fixed_texture_index(nir_deref_instr *deref,
-const struct panvk_lower_misc_ctx *ctx)
-{
-   nir_variable *var = nir_deref_instr_get_variable(deref);
-   unsigned set = var->data.descriptor_set;
-   unsigned binding = var->data.binding;
-   const struct panvk_descriptor_set_binding_layout *bind_layout =
-  &ctx->layout->sets[set].layout->bindings[binding];
+  deref = nir_deref_instr_parent(deref);
+   }
 
-   return bind_layout->tex_idx + ctx->layout->sets[set].tex_offset;
+   assert(deref->deref_type == nir_deref_type_var);
+   nir_variable *var = deref->var;
+
+   *set = var->data.descriptor_set;
+   *binding = var->data.binding;
 }
 
+
 static bool
 lower_tex(nir_builder *b, nir_tex_instr *tex,
   const struct panvk_lower_misc_ctx *ctx)
@@ -122,16 +122,46 @@ lower_tex(nir_builder *b, nir_tex_instr *tex,
 
if (sampler_src_idx >= 0) {
   nir_deref_instr *deref = nir_src_as_deref(tex->src[sampler_src_idx].src);
-  tex->sampler_index = get_fixed_sampler_index(deref, ctx);
   nir_tex_instr_remove_src(tex, sampler_src_idx);
+
+  uint32_t set, binding, index_imm;
+  nir_ssa_def *index_ssa;
+  get_resource_deref_binding(deref, &set, &binding,
+ &index_imm, &index_ssa);
+
+  const struct panvk_descriptor_set_binding_layout *bind_layout =
+ &ctx->layout->sets[set].layout->bindings[binding];
+
+  tex->sampler_index = ctx->layout->sets[set].sampler_offset +
+   bind_layout->sampler_idx + index_imm;
+
+  if (index_ssa != NULL) {
+ nir_tex_instr_add_src(tex, nir_tex_src_sampler_offset,
+   nir_src_for_ssa(index_ssa));
+  }
   progress = true;
}
 
int tex_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
if (tex_src_idx >= 0) {
   nir_deref_instr *deref = nir_src_as_deref(tex->src[tex_src_idx].src);
-  tex->texture_index = get_fixed_texture_index(deref, ctx);
   nir_tex_instr_remove_src(tex, tex_src_idx);
+
+  uint32_t set, binding, index_imm;
+  nir_ssa_def *index_ssa;
+  get_resource_deref_binding(deref, &set, &binding,
+ &index_imm, &index_ssa);
+
+  const struct panvk_descriptor_set_binding_layout *bind_layout =
+ &ctx->layout->sets[set].layout->bindings[binding];
+
+  tex->texture_index = ctx->layout->sets[set].tex_offset +
+   bind_layout->tex_idx + index_imm;
+
+  if (index_ssa != NULL) {
+ nir_tex_instr_add_src(tex, nir_tex_src_texture_offset,
+   nir_src_for_ssa(index_ssa));
+  }
   progress = true;
}
 
@@ -189,35 +219,29 @@ lower_load_vulkan_descriptor(nir_builder *b, 
nir_intrinsic_instr *intrin)
nir_instr_remove(&intrin->instr);
 }
 
-static void
-type_size_align_1(const struct glsl_type *

Mesa (main): panvk: Make panvk_image_view derive from vk_image_view

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: main
Commit: 4f843db0a14c30f66e7a18c9e1246e04d5ebd605
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=4f843db0a14c30f66e7a18c9e1246e04d5ebd605

Author: Jason Ekstrand 
Date:   Fri Mar 11 14:13:28 2022 -0600

panvk: Make panvk_image_view derive from vk_image_view

Reviewed-by: Boris Brezillon 
Part-of: 

---

 src/panfrost/vulkan/panvk_image.c|  2 +-
 src/panfrost/vulkan/panvk_private.h  |  6 ++---
 src/panfrost/vulkan/panvk_vX_image.c | 49 +---
 3 files changed, 21 insertions(+), 36 deletions(-)

diff --git a/src/panfrost/vulkan/panvk_image.c 
b/src/panfrost/vulkan/panvk_image.c
index 3f7ae6ee7b8..38bc774aa77 100644
--- a/src/panfrost/vulkan/panvk_image.c
+++ b/src/panfrost/vulkan/panvk_image.c
@@ -258,7 +258,7 @@ panvk_DestroyImageView(VkDevice _device,
   return;
 
panfrost_bo_unreference(view->bo);
-   vk_object_free(&device->vk, pAllocator, view);
+   vk_image_view_destroy(&device->vk, pAllocator, &view->vk);
 }
 
 void
diff --git a/src/panfrost/vulkan/panvk_private.h 
b/src/panfrost/vulkan/panvk_private.h
index 1b1d9e826fa..be25e4cc8d7 100644
--- a/src/panfrost/vulkan/panvk_private.h
+++ b/src/panfrost/vulkan/panvk_private.h
@@ -949,10 +949,10 @@ panvk_image_get_total_size(const struct panvk_image 
*image);
 #define ATTRIB_BUF_DESC_WORDS 4
 
 struct panvk_image_view {
-   struct vk_object_base base;
+   struct vk_image_view vk;
+
struct pan_image_view pview;
 
-   VkFormat vk_format;
struct panfrost_bo *bo;
struct {
   uint32_t tex[TEXTURE_DESC_WORDS];
@@ -1063,7 +1063,7 @@ VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_fence, base, 
VkFence, VK_OBJECT_TYPE_FENCE)
 VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_event, base, VkEvent, 
VK_OBJECT_TYPE_EVENT)
 VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_framebuffer, base, VkFramebuffer, 
VK_OBJECT_TYPE_FRAMEBUFFER)
 VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_image, vk.base, VkImage, 
VK_OBJECT_TYPE_IMAGE)
-VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_image_view, base, VkImageView, 
VK_OBJECT_TYPE_IMAGE_VIEW);
+VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_image_view, vk.base, VkImageView, 
VK_OBJECT_TYPE_IMAGE_VIEW);
 VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_pipeline_cache, base, VkPipelineCache, 
VK_OBJECT_TYPE_PIPELINE_CACHE)
 VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_pipeline, base, VkPipeline, 
VK_OBJECT_TYPE_PIPELINE)
 VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_pipeline_layout, base, VkPipelineLayout, 
VK_OBJECT_TYPE_PIPELINE_LAYOUT)
diff --git a/src/panfrost/vulkan/panvk_vX_image.c 
b/src/panfrost/vulkan/panvk_vX_image.c
index 9f2f9eef90c..4692ee089a4 100644
--- a/src/panfrost/vulkan/panvk_vX_image.c
+++ b/src/panfrost/vulkan/panvk_vX_image.c
@@ -63,9 +63,6 @@ panvk_convert_swizzle(const VkComponentMapping *in,
const VkComponentSwizzle *comp = &in->r;
for (unsigned i = 0; i < 4; i++) {
   switch (comp[i]) {
-  case VK_COMPONENT_SWIZZLE_IDENTITY:
- out[i] = PIPE_SWIZZLE_X + i;
- break;
   case VK_COMPONENT_SWIZZLE_ZERO:
  out[i] = PIPE_SWIZZLE_0;
  break;
@@ -100,40 +97,28 @@ panvk_per_arch(CreateImageView)(VkDevice _device,
VK_FROM_HANDLE(panvk_image, image, pCreateInfo->image);
struct panvk_image_view *view;
 
-   view = vk_object_zalloc(&device->vk, pAllocator, sizeof(*view),
-  VK_OBJECT_TYPE_IMAGE_VIEW);
+   view = vk_image_view_create(&device->vk, pCreateInfo,
+   pAllocator, sizeof(*view));
if (view == NULL)
   return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
 
-   view->pview.format = vk_format_to_pipe_format(pCreateInfo->format);
-
-   if (pCreateInfo->subresourceRange.aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT)
-  view->pview.format = util_format_get_depth_only(view->pview.format);
-   else if (pCreateInfo->subresourceRange.aspectMask == 
VK_IMAGE_ASPECT_STENCIL_BIT)
-  view->pview.format = util_format_stencil_only(view->pview.format);
-
-   unsigned level_count =
-  pCreateInfo->subresourceRange.levelCount == VK_REMAINING_MIP_LEVELS ?
-  image->pimage.layout.nr_slices - 
pCreateInfo->subresourceRange.baseMipLevel :
-  pCreateInfo->subresourceRange.levelCount;
-   unsigned layer_count =
-  pCreateInfo->subresourceRange.layerCount == VK_REMAINING_ARRAY_LAYERS ?
-  image->pimage.layout.array_size - 
pCreateInfo->subresourceRange.baseArrayLayer :
-  pCreateInfo->subresourceRange.layerCount;
-
-   view->pview.dim = panvk_view_type_to_mali_tex_dim(pCreateInfo->viewType);
-   view->pview.first_level = pCreateInfo->subresourceRange.baseMipLevel;
-   view->pview.last_level = pCreateInfo->subresourceRange.baseMipLevel + 
level_count - 1;
-   view->pview.first_layer = pCreateInfo->subresourceRange.baseArrayLayer;
-   view->pview.last_layer = pCreateInfo->subresourceRange.baseArrayLayer + 
layer_count - 1;
-   panvk_convert_swizzle(&pCreateInfo->components, view->pview.swizzle);
-   view->pview.image = &image->

Mesa (main): lavapipe: always clone shader nir for shader states

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: main
Commit: 2f9976debc549450d37595bdbe34c9115c3605fb
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=2f9976debc549450d37595bdbe34c9115c3605fb

Author: Mike Blumenkrantz 
Date:   Wed Mar  9 09:03:20 2022 -0500

lavapipe: always clone shader nir for shader states

these become owned and freed by llvmpipe, so ensure that freeing
them there won't cause crashes

cc: mesa-stable

Reviewed-by: Dave Airlie 
Part-of: 

---

 src/gallium/frontends/lavapipe/lvp_pipeline.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/gallium/frontends/lavapipe/lvp_pipeline.c 
b/src/gallium/frontends/lavapipe/lvp_pipeline.c
index 0ac79f8a02c..c7d7c1b3468 100644
--- a/src/gallium/frontends/lavapipe/lvp_pipeline.c
+++ b/src/gallium/frontends/lavapipe/lvp_pipeline.c
@@ -64,6 +64,9 @@ VKAPI_ATTR void VKAPI_CALL lvp_DestroyPipeline(
if (pipeline->shader_cso[PIPE_SHADER_COMPUTE])
   device->queue.ctx->delete_compute_state(device->queue.ctx, 
pipeline->shader_cso[PIPE_SHADER_COMPUTE]);
 
+   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++)
+  ralloc_free(pipeline->pipeline_nir[i]);
+
ralloc_free(pipeline->mem_ctx);
vk_object_base_finish(&pipeline->base);
vk_free2(&device->vk.alloc, pAllocator, pipeline);
@@ -765,7 +768,7 @@ lvp_shader_compile_to_ir(struct lvp_pipeline *pipeline,
 static void fill_shader_prog(struct pipe_shader_state *state, gl_shader_stage 
stage, struct lvp_pipeline *pipeline)
 {
state->type = PIPE_SHADER_IR_NIR;
-   state->ir.nir = pipeline->pipeline_nir[stage];
+   state->ir.nir = nir_shader_clone(NULL, pipeline->pipeline_nir[stage]);
 }
 
 static void
@@ -837,7 +840,7 @@ lvp_pipeline_compile(struct lvp_pipeline *pipeline,

device->physical_device->pscreen->finalize_nir(device->physical_device->pscreen,
 pipeline->pipeline_nir[stage]);
if (stage == MESA_SHADER_COMPUTE) {
   struct pipe_compute_state shstate = {0};
-  shstate.prog = (void *)pipeline->pipeline_nir[MESA_SHADER_COMPUTE];
+  shstate.prog = (void *)nir_shader_clone(NULL, 
pipeline->pipeline_nir[MESA_SHADER_COMPUTE]);
   shstate.ir_type = PIPE_SHADER_IR_NIR;
   shstate.req_local_mem = 
pipeline->pipeline_nir[MESA_SHADER_COMPUTE]->info.shared_size;
   pipeline->shader_cso[PIPE_SHADER_COMPUTE] = 
device->queue.ctx->create_compute_state(device->queue.ctx, &shstate);
@@ -1023,7 +1026,7 @@ lvp_graphics_pipeline_init(struct lvp_pipeline *pipeline,
   pipeline->pipeline_nir[MESA_SHADER_FRAGMENT] = b.shader;
   struct pipe_shader_state shstate = {0};
   shstate.type = PIPE_SHADER_IR_NIR;
-  shstate.ir.nir = pipeline->pipeline_nir[MESA_SHADER_FRAGMENT];
+  shstate.ir.nir = nir_shader_clone(NULL, 
pipeline->pipeline_nir[MESA_SHADER_FRAGMENT]);
   pipeline->shader_cso[PIPE_SHADER_FRAGMENT] = 
device->queue.ctx->create_fs_state(device->queue.ctx, &shstate);
}
return VK_SUCCESS;



Mesa (staging/22.0): anv: fix CmdSetColorWriteEnableEXT for maximum rts

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: staging/22.0
Commit: 0185719f0a528fcb07f3f2653dd906cc447e8189
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=0185719f0a528fcb07f3f2653dd906cc447e8189

Author: Mike Blumenkrantz 
Date:   Mon Mar  7 20:15:50 2022 -0500

anv: fix CmdSetColorWriteEnableEXT for maximum rts

Fixes: b15bfe92f7f ("anv: implement VK_EXT_color_write_enable")

Reviewed-by: Lionel Landwerlin 
Reviewed-by: Jason Ekstrand 
Part-of: 
(cherry picked from commit 1e3e7b3a4da6c7cd04879e0e4aab5f0d6f7b8d5f)

---

 .pick_status.json | 2 +-
 src/intel/vulkan/anv_cmd_buffer.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index f1f296c9e2d..263a2f2f020 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -661,7 +661,7 @@
 "description": "anv: fix CmdSetColorWriteEnableEXT for maximum rts",
 "nominated": true,
 "nomination_type": 1,
-"resolution": 0,
+"resolution": 1,
 "main_sha": null,
 "because_sha": "b15bfe92f7f87e270160a9221a1394fc7b5de5d2"
 },
diff --git a/src/intel/vulkan/anv_cmd_buffer.c 
b/src/intel/vulkan/anv_cmd_buffer.c
index 3bd1e0cdc03..7949fbdaefa 100644
--- a/src/intel/vulkan/anv_cmd_buffer.c
+++ b/src/intel/vulkan/anv_cmd_buffer.c
@@ -1647,7 +1647,7 @@ void anv_CmdSetColorWriteEnableEXT(
 {
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
 
-   assert(attachmentCount < MAX_RTS);
+   assert(attachmentCount <= MAX_RTS);
 
uint8_t color_writes = 0;
for (uint32_t i = 0; i < attachmentCount; i++)



Mesa (staging/22.0): anv: Allow MSAA resolve with different numbers of planes

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: staging/22.0
Commit: c457e5b7938ba93b658a8ad6e79566645f2814ca
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=c457e5b7938ba93b658a8ad6e79566645f2814ca

Author: Jason Ekstrand 
Date:   Fri Feb 11 23:41:03 2022 -0600

anv: Allow MSAA resolve with different numbers of planes

The Vulkan spec for VK_KHR_depth_stencil_resolve allows a format
mismatch between the primary attachment and the resolve attachment
within certain limits.  In particular,


VUID-VkSubpassDescriptionDepthStencilResolve-pDepthStencilResolveAttachment-03181

If pDepthStencilResolveAttachment is not NULL and does not have the
value VK_ATTACHMENT_UNUSED and VkFormat of
pDepthStencilResolveAttachment has a depth component, then the
VkFormat of pDepthStencilAttachment must have a depth component with
the same number of bits and numerical type


VUID-VkSubpassDescriptionDepthStencilResolve-pDepthStencilResolveAttachment-03182

If pDepthStencilResolveAttachment is not NULL and does not have the
value VK_ATTACHMENT_UNUSED, and VkFormat of
pDepthStencilResolveAttachment has a stencil component, then the
VkFormat of pDepthStencilAttachment must have a stencil component
with the same number of bits and numerical type

So you can resolve from a depth/stencil format to a depth-only or
stencil-only format so long as the number of bits matches.
Unfortunately, this has never been tested because the CTS tests which
purport to test this are broken and actually test with a destination
combined depth/stencil format.

Fixes: 5e4f9ea363a6 ("anv: Implement VK_KHR_depth_stencil_resolve")
Reviewed-by: Ivan Briano 
Part-of: 
(cherry picked from commit d65dbe8018923e3a405b362c92a21caa30e66e83)

---

 .pick_status.json  | 2 +-
 src/intel/vulkan/anv_blorp.c   | 1 -
 src/intel/vulkan/genX_cmd_buffer.c | 2 ++
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index c3a6274bf12..8dc2792d163 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -445,7 +445,7 @@
 "description": "anv: Allow MSAA resolve with different numbers of 
planes",
 "nominated": true,
 "nomination_type": 1,
-"resolution": 0,
+"resolution": 1,
 "main_sha": null,
 "because_sha": "5e4f9ea363a638645670abeffce08ed58c37c369"
 },
diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
index 9f0fe902d7c..572e480a469 100644
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@@ -1403,7 +1403,6 @@ anv_image_msaa_resolve(struct anv_cmd_buffer *cmd_buffer,
assert(src_image->vk.samples > 1);
assert(dst_image->vk.image_type == VK_IMAGE_TYPE_2D);
assert(dst_image->vk.samples == 1);
-   assert(src_image->n_planes == dst_image->n_planes);
 
struct blorp_surf src_surf, dst_surf;
get_blorp_surf_for_anv_image(cmd_buffer->device, src_image, aspect,
diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index 55c38d73bf9..9577ca69aa7 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -6952,6 +6952,7 @@ cmd_buffer_resolve_attachments(struct anv_cmd_buffer 
*cmd_buffer,
  &cmd_state->attachments[dst_att];
 
   if ((src_iview->image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
+  (dst_iview->image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
   subpass->depth_resolve_mode != VK_RESOLVE_MODE_NONE_KHR) {
 
  /* MSAA resolves sample from the source attachment.  Transition the
@@ -7018,6 +7019,7 @@ cmd_buffer_resolve_attachments(struct anv_cmd_buffer 
*cmd_buffer,
   }
 
   if ((src_iview->image->vk.aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
+  (dst_iview->image->vk.aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
   subpass->stencil_resolve_mode != VK_RESOLVE_MODE_NONE_KHR) {
 
  src_state->current_stencil_layout = 
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;



Mesa (staging/22.0): crocus: don't map scanout buffers as write-back

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: staging/22.0
Commit: 6822ea6304d213a3d343dac13d9561a61dda548e
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=6822ea6304d213a3d343dac13d9561a61dda548e

Author: Dave Airlie 
Date:   Thu Mar 10 15:01:20 2022 +1000

crocus: don't map scanout buffers as write-back

This essentially ports 64405230774210488dedbc54d73ba394ec6ae802
Author: Keith Packard 
Date:   Fri Aug 6 16:11:18 2021 -0700

iris: Map scanout buffers WC instead of WB [v2]

to crocus.

Fixes: f3630548f1da ("crocus: initial gallium driver for Intel gfx 4-7")

Reviewed-by: Zoltán Böszörményi 
Part-of: 
(cherry picked from commit e8c3be0eb84bbefe1486f2543125c35b278ac1cf)

---

 .pick_status.json| 2 +-
 src/gallium/drivers/crocus/crocus_bufmgr.c   | 6 ++
 src/gallium/drivers/crocus/crocus_bufmgr.h   | 6 ++
 src/gallium/drivers/crocus/crocus_resource.c | 4 
 4 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/.pick_status.json b/.pick_status.json
index d7a3baad239..ac5ab8e45d2 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -679,7 +679,7 @@
 "description": "crocus: don't map scanout buffers as write-back",
 "nominated": true,
 "nomination_type": 1,
-"resolution": 0,
+"resolution": 1,
 "main_sha": null,
 "because_sha": "f3630548f1da904ec6c63b43ece7e68afdb8867e"
 },
diff --git a/src/gallium/drivers/crocus/crocus_bufmgr.c 
b/src/gallium/drivers/crocus/crocus_bufmgr.c
index aa70271ffee..d3fe5ee42f5 100644
--- a/src/gallium/drivers/crocus/crocus_bufmgr.c
+++ b/src/gallium/drivers/crocus/crocus_bufmgr.c
@@ -429,6 +429,9 @@ bo_alloc_internal(struct crocus_bufmgr *bufmgr,
bo->index = -1;
bo->kflags = 0;
 
+   if (flags & BO_ALLOC_SCANOUT)
+  bo->scanout = 1;
+
if ((flags & BO_ALLOC_COHERENT) && !bo->cache_coherent) {
   struct drm_i915_gem_caching arg = {
  .handle = bo->gem_handle,
@@ -1010,6 +1013,9 @@ crocus_bo_map_gtt(struct pipe_debug_callback *dbg,
 static bool
 can_map_cpu(struct crocus_bo *bo, unsigned flags)
 {
+   if (bo->scanout)
+  return false;
+
if (bo->cache_coherent)
   return true;
 
diff --git a/src/gallium/drivers/crocus/crocus_bufmgr.h 
b/src/gallium/drivers/crocus/crocus_bufmgr.h
index e4310448440..de550d99fa7 100644
--- a/src/gallium/drivers/crocus/crocus_bufmgr.h
+++ b/src/gallium/drivers/crocus/crocus_bufmgr.h
@@ -141,12 +141,18 @@ struct crocus_bo {
 */
bool userptr;
 
+   /**
+* Boolean of if this is used for scanout.
+*/
+   bool scanout;
+
/** Pre-computed hash using _mesa_hash_pointer for cache tracking sets */
uint32_t hash;
 };
 
 #define BO_ALLOC_ZEROED   (1 << 0)
 #define BO_ALLOC_COHERENT (1 << 1)
+#define BO_ALLOC_SCANOUT  (1 << 2)
 
 /**
  * Allocate a buffer object.
diff --git a/src/gallium/drivers/crocus/crocus_resource.c 
b/src/gallium/drivers/crocus/crocus_resource.c
index 3ee44e7713a..436a776360b 100644
--- a/src/gallium/drivers/crocus/crocus_resource.c
+++ b/src/gallium/drivers/crocus/crocus_resource.c
@@ -708,6 +708,10 @@ crocus_resource_create_with_modifiers(struct pipe_screen 
*pscreen,
if (templ->usage == PIPE_USAGE_STAGING)
   flags |= BO_ALLOC_COHERENT;
 
+   /* Scanout buffers need to be WC. */
+   if (templ->bind & PIPE_BIND_SCANOUT)
+  flags |= BO_ALLOC_SCANOUT;
+
uint64_t aux_size = 0;
uint32_t aux_preferred_alloc_flags;
 



Mesa (staging/22.0): anv: fix xfb usage with rasterizer discard

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: staging/22.0
Commit: 3aca53b5dd339954c192ab81c9542b6083d5be27
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=3aca53b5dd339954c192ab81c9542b6083d5be27

Author: Mike Blumenkrantz 
Date:   Mon Mar  7 16:35:04 2022 -0500

anv: fix xfb usage with rasterizer discard

in the initial implementation, a stream like:

* CmdBeginTransformFeedbackEXT
* CmdSetRasterizerDiscardEnableEXT
* CmdDraw
* CmdEndTransformFeedbackEXT
* CmdBeginTransformFeedbackEXT
* CmdDraw
* CmdEndTransformFeedbackEXT

would never enable transform feedback, as it only checked for the change
in rasterizer_discard state

Fixes: 4d531c67dfd ("anv: support rasterizer discard dynamic state")

Reviewed-by: Jason Ekstrand 
Part-of: 
(cherry picked from commit 52f6978484fb738a06b45dd09c5dc6a1bd594fb7)

---

 .pick_status.json  | 2 +-
 src/intel/vulkan/genX_cmd_buffer.c | 6 --
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index ac5ab8e45d2..f1f296c9e2d 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -670,7 +670,7 @@
 "description": "anv: fix xfb usage with rasterizer discard",
 "nominated": true,
 "nomination_type": 1,
-"resolution": 0,
+"resolution": 1,
 "main_sha": null,
 "because_sha": "4d531c67dfd93ff6b1dc4a9626074a74dfb14a2d"
 },
diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index e293f54ca01..55c38d73bf9 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -3922,8 +3922,10 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer 
*cmd_buffer)
 
cmd_buffer_emit_clip(cmd_buffer);
 
-   if (cmd_buffer->state.gfx.dirty & 
ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE)
-  cmd_buffer_emit_streamout(cmd_buffer);
+   if (pipeline->dynamic_states & 
ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE) {
+  if (cmd_buffer->state.gfx.dirty & 
(ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE | ANV_CMD_DIRTY_XFB_ENABLE))
+ cmd_buffer_emit_streamout(cmd_buffer);
+   }
 
if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT)
   gfx8_cmd_buffer_emit_viewport(cmd_buffer);



Mesa (staging/22.0): radeonsi: don't clear framebuffer.state before dcc decomp

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: staging/22.0
Commit: 76b8a28d5a21c22daa9295726b39c15fd8b72d72
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=76b8a28d5a21c22daa9295726b39c15fd8b72d72

Author: Pierre-Eric Pelloux-Prayer 
Date:   Mon Mar  7 11:02:45 2022 +0100

radeonsi: don't clear framebuffer.state before dcc decomp

This causes inconsistencies between sctx->framebuffer.state and other
sctx->framebuffer properties (like compressed_cb_mask).

The point of this code was to fix an issue with vi_separate_dcc_stop_query,
which was removed by 804e2924406 we can safely drop it.

Reviewed-by: Marek Olšák 
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/6099
Cc: mesa-stable
Part-of: 
(cherry picked from commit 968d68125c234bc347fde8fad543f0fc7d80d8b0)

---

 .pick_status.json   |  2 +-
 src/gallium/drivers/radeonsi/si_state.c | 11 ---
 2 files changed, 1 insertion(+), 12 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index be0fb75d64c..c3a6274bf12 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -589,7 +589,7 @@
 "description": "radeonsi: don't clear framebuffer.state before dcc 
decomp",
 "nominated": true,
 "nomination_type": 0,
-"resolution": 0,
+"resolution": 1,
 "main_sha": null,
 "because_sha": null
 },
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 293b9b81f8e..0d7d23f41a7 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2788,7 +2788,6 @@ static void si_set_framebuffer_state(struct pipe_context 
*ctx,
bool old_has_stencil =
   old_has_zsbuf &&
   ((struct si_texture 
*)sctx->framebuffer.state.zsbuf->texture)->surface.has_stencil;
-   bool unbound = false;
int i;
 
/* Reject zero-sized framebuffers due to a hw bug on GFX6 that occurs
@@ -2813,16 +2812,6 @@ static void si_set_framebuffer_state(struct pipe_context 
*ctx,
   if (!surf->dcc_incompatible)
  continue;
 
-  /* Since the DCC decompression calls back into set_framebuffer-
-   * _state, we need to unbind the framebuffer, so that
-   * vi_separate_dcc_stop_query isn't called twice with the same
-   * color buffer.
-   */
-  if (!unbound) {
- util_copy_framebuffer_state(&sctx->framebuffer.state, NULL);
- unbound = true;
-  }
-
   if (vi_dcc_enabled(tex, surf->base.u.tex.level))
  if (!si_texture_disable_dcc(sctx, tex))
 si_decompress_dcc(sctx, tex);



Mesa (staging/22.0): anv: fix some dynamic rasterization discard cases in pipeline construction

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: staging/22.0
Commit: bbdcc0d1e47f5f6f9ec801ea7236aefe6ccdb783
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=bbdcc0d1e47f5f6f9ec801ea7236aefe6ccdb783

Author: Mike Blumenkrantz 
Date:   Mon Mar  7 20:20:11 2022 -0500

anv: fix some dynamic rasterization discard cases in pipeline construction

cc: mesa-stable

Reviewed-by: Lionel Landwerlin 
Reviewed-by: Jason Ekstrand 
Part-of: 
(cherry picked from commit 5ab0e3f0bb09bde3d06b0ea448a6e183a813e286)

---

 .pick_status.json   | 2 +-
 src/intel/vulkan/anv_pipeline.c | 6 ++
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index 263a2f2f020..be0fb75d64c 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -652,7 +652,7 @@
 "description": "anv: fix some dynamic rasterization discard cases in 
pipeline construction",
 "nominated": true,
 "nomination_type": 0,
-"resolution": 0,
+"resolution": 1,
 "main_sha": null,
 "because_sha": null
 },
diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index 0c4abb8339f..94774e82cdd 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -2421,8 +2421,7 @@ copy_non_dynamic_state(struct anv_graphics_pipeline 
*pipeline,
}
 
const VkPipelineMultisampleStateCreateInfo *ms_info =
-  pCreateInfo->pRasterizationState->rasterizerDiscardEnable ? NULL :
-  pCreateInfo->pMultisampleState;
+  raster_discard ? NULL : pCreateInfo->pMultisampleState;
if (states & ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS) {
   const VkPipelineSampleLocationsStateCreateInfoEXT *sl_info = ms_info ?
  vk_find_struct_const(ms_info, 
PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT) : NULL;
@@ -2452,8 +2451,7 @@ copy_non_dynamic_state(struct anv_graphics_pipeline 
*pipeline,
}
 
if (states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE) {
-  if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&
-  uses_color_att) {
+  if (!raster_discard && uses_color_att) {
  assert(pCreateInfo->pColorBlendState);
  const VkPipelineColorWriteCreateInfoEXT *color_write_info =
 vk_find_struct_const(pCreateInfo->pColorBlendState->pNext,



Mesa (staging/22.0): pan/bi: Handle non-2D arrays

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: staging/22.0
Commit: 9c93c285ff107fcf4343b1cebad40a3060c51bb4
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=9c93c285ff107fcf4343b1cebad40a3060c51bb4

Author: Alyssa Rosenzweig 
Date:   Sat Mar  5 15:49:43 2022 -0500

pan/bi: Handle non-2D arrays

Handle arrays generically by using the last component of the coordinate source
as the array index. That works for both 2D arrays and cube arrays, fixing cube
arrays. Cube arrays were already handled correctly in core Panfrost code.

This code path is not tested in dEQP-GLES31 without exposing OES_cube_map_array,
which depends on OES_geometry_shader, which we don't have. Yet we do expose
PIPE_CAP_CUBE_ARRAY, so ARB_cube_map_array is exposed.

Disabling PIPE_CAP_CUBE_ARRAY would be an easy band-aid fix, but it's easy
enough to handle correctly.

dEQP-GLES31 passes with a hack enabling OES_cube_map_array [without geometry
shaders].

Also fixes 1D arrays on Bifrost for the same reasons.

Fixes: 70d6c5675d6 ("pan/bi: Emit TEXC with builder")
Signed-off-by: Alyssa Rosenzweig 
Part-of: 
(cherry picked from commit 53f1e57ee7a7a6f7906c34cf7b78a39873fd00c2)

---

 .pick_status.json  |   2 +-
 src/panfrost/bifrost/bifrost_compile.c |  19 ++--
 src/panfrost/ci/panfrost-g52-fails.txt | 169 -
 3 files changed, 10 insertions(+), 180 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index 4fb3f9c902f..d5a3f77d906 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -319,7 +319,7 @@
 "description": "pan/bi: Handle non-2D arrays",
 "nominated": true,
 "nomination_type": 1,
-"resolution": 0,
+"resolution": 1,
 "main_sha": null,
 "because_sha": "70d6c5675d68103aeb72983a100418da15dd2bc9"
 },
diff --git a/src/panfrost/bifrost/bifrost_compile.c 
b/src/panfrost/bifrost/bifrost_compile.c
index 903ea1b82de..5f0cf9bff76 100644
--- a/src/panfrost/bifrost/bifrost_compile.c
+++ b/src/panfrost/bifrost/bifrost_compile.c
@@ -2670,6 +2670,7 @@ bi_emit_texc(bi_builder *b, nir_tex_instr *instr)
 for (unsigned i = 0; i < instr->num_srcs; ++i) {
 bi_index index = bi_src_index(&instr->src[i].src);
 unsigned sz = nir_src_bit_size(instr->src[i].src);
+unsigned components = 
nir_src_num_components(instr->src[i].src);
 ASSERTED nir_alu_type base = nir_tex_instr_src_type(instr, i);
 nir_alu_type T = base | sz;
 
@@ -2678,27 +2679,25 @@ bi_emit_texc(bi_builder *b, nir_tex_instr *instr)
 if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
 cx = bi_emit_texc_cube_coord(b, index, &cy);
} else {
-unsigned components = 
nir_src_num_components(instr->src[i].src);
-
 /* Copy XY (for 2D+) or XX (for 1D) */
 cx = index;
 cy = bi_word(index, MIN2(1, components - 1));
 
 assert(components >= 1 && components <= 3);
 
-if (components < 3) {
-/* nothing to do */
-} else if (desc.array) {
-/* 2D array */
-dregs[BIFROST_TEX_DREG_ARRAY] =
-bi_emit_texc_array_index(b,
-bi_word(index, 
2), T);
-} else {
+if (components == 3 && !desc.array) {
 /* 3D */
 dregs[BIFROST_TEX_DREG_Z_COORD] =
 bi_word(index, 2);
 }
 }
+
+if (desc.array) {
+dregs[BIFROST_TEX_DREG_ARRAY] =
+bi_emit_texc_array_index(b,
+bi_word(index, 
components - 1), T);
+}
+
 break;
 
 case nir_tex_src_lod:
diff --git a/src/panfrost/ci/panfrost-g52-fails.txt 
b/src/panfrost/ci/panfrost-g52-fails.txt
index aa8408239fa..60df76e709f 100644
--- a/src/panfrost/ci/panfrost-g52-fails.txt
+++ b/src/panfrost/ci/panfrost-g52-fails.txt
@@ -75,8 +75,6 @@ spec@arb_get_program_binary@restore-sso-program,Fail
 spec@arb_pixel_buffer_object@fbo-pbo-readpixels-small,Fail
 
spec@arb_pixel_buffer_object@fbo-pbo-readpixels-small@GL_DEPTH32F_STENCIL8-GL_DEPTH_STENCIL,Fail
 
spec@arb_pixel_buffer_object@fbo-pbo-readpixels-small@GL_DEPTH32F_STENCIL8-GL_STENCIL_INDEX,Fail
-spec@arb_pi

Mesa (staging/22.0): panfrost: Emulate GL_CLAMP on Bifrost

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: staging/22.0
Commit: f254c2ebf4bf7dbbe99cc0ea570a802a77de9927
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=f254c2ebf4bf7dbbe99cc0ea570a802a77de9927

Author: Alyssa Rosenzweig 
Date:   Sat Mar  5 15:10:10 2022 -0500

panfrost: Emulate GL_CLAMP on Bifrost

Hardware support was removed with Midgard. Use mesa/st to emulate GL_CLAMP with
nir_lower_tex automatically (the Zink lowering), and disable GL_MIRROR_CLAMP
which isn't lowered correctly.

Fixes *texwrap* Piglit tests on G52.

Fixes: f9ceab7b237 ("panfrost: Fix CLAMP wrap mode")
Signed-off-by: Alyssa Rosenzweig 
Part-of: 
(cherry picked from commit 1f97819fbe56a388878a23f49c0cf47016e4dde0)

---

 .pick_status.json|   2 +-
 src/gallium/drivers/panfrost/pan_cmdstream.c |  28 ++-
 src/gallium/drivers/panfrost/pan_screen.c|   3 +-
 src/panfrost/ci/panfrost-g52-fails.txt   | 244 ---
 4 files changed, 15 insertions(+), 262 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index e881283688a..4fb3f9c902f 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -328,7 +328,7 @@
 "description": "panfrost: Emulate GL_CLAMP on Bifrost",
 "nominated": true,
 "nomination_type": 1,
-"resolution": 0,
+"resolution": 1,
 "main_sha": null,
 "because_sha": "f9ceab7b237d40ded7728457114e59ca6707dd71"
 },
diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c 
b/src/gallium/drivers/panfrost/pan_cmdstream.c
index 3bc030eda65..88a80805790 100644
--- a/src/gallium/drivers/panfrost/pan_cmdstream.c
+++ b/src/gallium/drivers/panfrost/pan_cmdstream.c
@@ -121,31 +121,27 @@ panfrost_sample_pattern(unsigned samples)
 static unsigned
 translate_tex_wrap(enum pipe_tex_wrap w, bool using_nearest)
 {
-/* Bifrost doesn't support the GL_CLAMP wrap mode, so instead use
- * CLAMP_TO_EDGE and CLAMP_TO_BORDER. On Midgard, CLAMP is broken for
- * nearest filtering, so use CLAMP_TO_EDGE in that case. */
+/* CLAMP is only supported on Midgard, where it is broken for nearest
+ * filtering. Use CLAMP_TO_EDGE in that case.
+ */
 
 switch (w) {
 case PIPE_TEX_WRAP_REPEAT: return MALI_WRAP_MODE_REPEAT;
-case PIPE_TEX_WRAP_CLAMP:
-return using_nearest ? MALI_WRAP_MODE_CLAMP_TO_EDGE :
-#if PAN_ARCH <= 5
- MALI_WRAP_MODE_CLAMP;
-#else
- MALI_WRAP_MODE_CLAMP_TO_BORDER;
-#endif
 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return MALI_WRAP_MODE_CLAMP_TO_EDGE;
 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return 
MALI_WRAP_MODE_CLAMP_TO_BORDER;
 case PIPE_TEX_WRAP_MIRROR_REPEAT: return 
MALI_WRAP_MODE_MIRRORED_REPEAT;
+case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: return 
MALI_WRAP_MODE_MIRRORED_CLAMP_TO_EDGE;
+case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: return 
MALI_WRAP_MODE_MIRRORED_CLAMP_TO_BORDER;
+
+#if PAN_ARCH <= 5
+case PIPE_TEX_WRAP_CLAMP:
+return using_nearest ? MALI_WRAP_MODE_CLAMP_TO_EDGE :
+   MALI_WRAP_MODE_CLAMP;
 case PIPE_TEX_WRAP_MIRROR_CLAMP:
 return using_nearest ? MALI_WRAP_MODE_MIRRORED_CLAMP_TO_EDGE :
-#if PAN_ARCH <= 5
- MALI_WRAP_MODE_MIRRORED_CLAMP;
-#else
- MALI_WRAP_MODE_MIRRORED_CLAMP_TO_BORDER;
+   MALI_WRAP_MODE_MIRRORED_CLAMP;
 #endif
-case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: return 
MALI_WRAP_MODE_MIRRORED_CLAMP_TO_EDGE;
-case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: return 
MALI_WRAP_MODE_MIRRORED_CLAMP_TO_BORDER;
+
 default: unreachable("Invalid wrap");
 }
 }
diff --git a/src/gallium/drivers/panfrost/pan_screen.c 
b/src/gallium/drivers/panfrost/pan_screen.c
index 9b11cc10de4..69f0bd74716 100644
--- a/src/gallium/drivers/panfrost/pan_screen.c
+++ b/src/gallium/drivers/panfrost/pan_screen.c
@@ -153,7 +153,6 @@ panfrost_get_param(struct pipe_screen *screen, enum 
pipe_cap param)
 
 case PIPE_CAP_SAMPLER_VIEW_TARGET:
 case PIPE_CAP_TEXTURE_SWIZZLE:
-case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
 case PIPE_CAP_TEXTURE_MIRROR_CLAMP_TO_EDGE:
 case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
 case PIPE_CAP_BLEND_EQUATION_SEPARATE:
@@ -284,6 +283,8 @@ panfrost_get_param(struct pipe_screen *screen, enum 
pipe_cap param)
 return MAX_VARYING;
 
 /* Removed in v6 (Bifrost) */
+case PIPE_CAP_GL_CLAMP:
+case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
 case PIPE_CAP_ALPHA_TEST:
 return dev->arch <= 5;
 
diff --git a/src/panfrost/ci/panfrost-g52-fails.txt 
b/src/panfrost/ci/panfrost-g52-fails.txt
index efa83ad1ad9..aa8408239fa 100644
--- a/src/panfrost/ci/panfrost-g52-fails.txt
+++ b/src/panfrost/ci/panfrost-g52-fails.txt
@@ -60,30 +60,12 @@ 
spec@a

Mesa (staging/22.0): util: fix msvc build

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: staging/22.0
Commit: ab468e1e0b9897ecaaabd4cfdd9e1cfc5b898cf2
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=ab468e1e0b9897ecaaabd4cfdd9e1cfc5b898cf2

Author: Jonathan Gray 
Date:   Sat Mar 12 11:08:56 2022 +1100

util: fix msvc build

Fix msvc build regression after 0536b691338f2759b850f9ec94634033a5d1f9e1
reported by Prodea Alexandru-Liviu.

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/6137
Fixes: 0536b691338 ("util: fix build with clang 10 on mips64")
Reviewed-by: Ilia Mirkin 
Part-of: 
(cherry picked from commit e50eb1ce7ab371a4cb9016a2e41d0a30e08f)

---

 .pick_status.json| 2 +-
 src/util/u_debug_stack.c | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index 8dc2792d163..f05b8bff466 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -436,7 +436,7 @@
 "description": "util: fix msvc build",
 "nominated": true,
 "nomination_type": 1,
-"resolution": 0,
+"resolution": 1,
 "main_sha": null,
 "because_sha": "0536b691338f2759b850f9ec94634033a5d1f9e1"
 },
diff --git a/src/util/u_debug_stack.c b/src/util/u_debug_stack.c
index 86bfb2fb64b..9042effc364 100644
--- a/src/util/u_debug_stack.c
+++ b/src/util/u_debug_stack.c
@@ -256,10 +256,11 @@ debug_backtrace_capture(struct debug_stack_frame 
*backtrace,
const void **frame_pointer = ((const void **)__builtin_frame_address(1));
 #pragma GCC diagnostic pop
 #elif defined(PIPE_CC_MSVC)
+   const void **frame_pointer;
__asm {
   mov frame_pointer, ebp
}
-   const void **frame_pointer = (const void **)frame_pointer[0];
+   frame_pointer = (const void **)frame_pointer[0];
 #else
const void **frame_pointer = NULL;
 #endif



Mesa (staging/22.0): panfrost: Don't initialise the trampolines array

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: staging/22.0
Commit: 1dc2900731224f687bde4a233f27414a740de689
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=1dc2900731224f687bde4a233f27414a740de689

Author: Icecream95 
Date:   Thu Oct 14 16:38:38 2021 +1300

panfrost: Don't initialise the trampolines array

PIPE_MAX_SHADER_SAMPLER_VIEWS is 128, so we just end up initialising a
kilobyte of memory for no reason, when usually only a couple of
sampler views are used.

Fixes: 53ef20f08d4 ("panfrost: Handle NULL sampler views")
Part-of: 
(cherry picked from commit 3e405afeb9c1cb1182f83e2a1fd6f0beb199df64)

---

 .pick_status.json| 2 +-
 src/gallium/drivers/panfrost/pan_cmdstream.c | 6 --
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index d5a3f77d906..e15d68df3f6 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -13,7 +13,7 @@
 "description": "panfrost: Don't initialise the trampolines array",
 "nominated": true,
 "nomination_type": 1,
-"resolution": 0,
+"resolution": 1,
 "main_sha": null,
 "because_sha": "53ef20f08d4340c1bad0b45a2501f4daba7fb479"
 },
diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c 
b/src/gallium/drivers/panfrost/pan_cmdstream.c
index 88a80805790..bc3f01ddc0a 100644
--- a/src/gallium/drivers/panfrost/pan_cmdstream.c
+++ b/src/gallium/drivers/panfrost/pan_cmdstream.c
@@ -1387,13 +1387,15 @@ panfrost_emit_texture_descriptors(struct panfrost_batch 
*batch,
 
 return T.gpu;
 #else
-uint64_t trampolines[PIPE_MAX_SHADER_SAMPLER_VIEWS] = { 0 };
+uint64_t trampolines[PIPE_MAX_SHADER_SAMPLER_VIEWS];
 
 for (int i = 0; i < ctx->sampler_view_count[stage]; ++i) {
 struct panfrost_sampler_view *view = 
ctx->sampler_views[stage][i];
 
-if (!view)
+if (!view) {
+trampolines[i] = 0;
 continue;
+}
 
 panfrost_update_sampler_view(view, &ctx->base);
 



Mesa (staging/22.0): freedreno/ir3: Don't try re-swapping cat3 srcs

2022-03-14 Thread GitLab Mirror
Module: Mesa
Branch: staging/22.0
Commit: 7976ceda5590b05cb453f829d23f57342eb0582f
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=7976ceda5590b05cb453f829d23f57342eb0582f

Author: Rob Clark 
Date:   Thu Mar 10 20:46:23 2022 -0800

freedreno/ir3: Don't try re-swapping cat3 srcs

This can lead us to endless loops of "progress".. Note fixes commit
commit really just exposed an existing problem.

Fixes: 9c9e8c33498 ("nir: Reorder ffma and fsub combining")
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/6133
Signed-off-by: Rob Clark 
Part-of: 
(cherry picked from commit 05d687723530ed3c5c9f7d0addb3b047138613a3)

---

 .pick_status.json  |  2 +-
 src/freedreno/ir3/ir3.h|  1 +
 src/freedreno/ir3/ir3_cp.c | 22 +++---
 3 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index f05b8bff466..e881283688a 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -337,7 +337,7 @@
 "description": "freedreno/ir3: Don't try re-swapping cat3 srcs",
 "nominated": true,
 "nomination_type": 1,
-"resolution": 0,
+"resolution": 1,
 "main_sha": null,
 "because_sha": "9c9e8c334981b1af7a709fa42cd5ef9dcf4d9791"
 },
diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h
index 5ef1eaeeeaf..7416a768e75 100644
--- a/src/freedreno/ir3/ir3.h
+++ b/src/freedreno/ir3/ir3.h
@@ -338,6 +338,7 @@ struct ir3_instruction {
 IR3_SRC_PACKED_LOW = 0,
 IR3_SRC_PACKED_HIGH = 1,
  } packed;
+ bool swapped;
   } cat3;
   struct {
  unsigned samp, tex;
diff --git a/src/freedreno/ir3/ir3_cp.c b/src/freedreno/ir3/ir3_cp.c
index c667f0fd03e..66694310a8d 100644
--- a/src/freedreno/ir3/ir3_cp.c
+++ b/src/freedreno/ir3/ir3_cp.c
@@ -276,10 +276,13 @@ try_swap_mad_two_srcs(struct ir3_instruction *instr, 
unsigned new_flags)
if (!is_mad(instr->opc))
   return false;
 
-   /* NOTE: pre-swap first two src's before valid_flags(),
-* which might try to dereference the n'th src:
+   /* If we've already tried, nothing more to gain.. we will only
+* have previously swapped if the original 2nd src was const or
+* immed.  So swapping back won't improve anything and could
+* result in an infinite "progress" loop.
 */
-   swap(instr->srcs[0], instr->srcs[1]);
+   if (instr->cat3.swapped)
+  return false;
 
/* cat3 doesn't encode immediate, but we can lower immediate
 * to const if that helps:
@@ -289,6 +292,19 @@ try_swap_mad_two_srcs(struct ir3_instruction *instr, 
unsigned new_flags)
   new_flags |= IR3_REG_CONST;
}
 
+   /* If the reason we couldn't fold without swapping is something
+* other than const source, then swapping won't help:
+*/
+   if (!(new_flags & IR3_REG_CONST))
+  return false;
+
+   instr->cat3.swapped = true;
+
+   /* NOTE: pre-swap first two src's before valid_flags(),
+* which might try to dereference the n'th src:
+*/
+   swap(instr->srcs[0], instr->srcs[1]);
+
bool valid_swap =
   /* can we propagate mov if we move 2nd src to first? */
   ir3_valid_flags(instr, 0, new_flags) &&