Re: [Mesa-dev] [PATCH v4 (part2) 29/59] nir: Implement __intrinsic_store_ssbo

2015-08-07 Thread Iago Toral
On Fri, 2015-08-07 at 07:43 +0200, Iago Toral wrote:
> On Thu, 2015-08-06 at 11:06 -0700, Connor Abbott wrote:
> > On Thu, Aug 6, 2015 at 12:30 AM, Iago Toral  wrote:
> > > On Wed, 2015-08-05 at 12:17 -0700, Connor Abbott wrote:
> > >> On Wed, Aug 5, 2015 at 1:30 AM, Iago Toral Quiroga  
> > >> wrote:
> > >> > ---
> > >> >  src/glsl/nir/glsl_to_nir.cpp  | 36 
> > >> > 
> > >> >  src/glsl/nir/nir_intrinsics.h | 12 ++--
> > >> >  2 files changed, 42 insertions(+), 6 deletions(-)
> > >> >
> > >> > diff --git a/src/glsl/nir/glsl_to_nir.cpp 
> > >> > b/src/glsl/nir/glsl_to_nir.cpp
> > >> > index 642affd..cbec2df 100644
> > >> > --- a/src/glsl/nir/glsl_to_nir.cpp
> > >> > +++ b/src/glsl/nir/glsl_to_nir.cpp
> > >> > @@ -641,6 +641,8 @@ nir_visitor::visit(ir_call *ir)
> > >> >   op = nir_intrinsic_image_atomic_comp_swap;
> > >> >} else if (strcmp(ir->callee_name(), 
> > >> > "__intrinsic_memory_barrier") == 0) {
> > >> >   op = nir_intrinsic_memory_barrier;
> > >> > +  } else if (strcmp(ir->callee_name(), "__intrinsic_store_ssbo") 
> > >> > == 0) {
> > >> > + op = nir_intrinsic_store_ssbo;
> > >> >} else {
> > >> >   unreachable("not reached");
> > >> >}
> > >> > @@ -730,6 +732,40 @@ nir_visitor::visit(ir_call *ir)
> > >> >}
> > >> >case nir_intrinsic_memory_barrier:
> > >> >   break;
> > >> > +  case nir_intrinsic_store_ssbo: {
> > >> > + exec_node *param = ir->actual_parameters.get_head();
> > >> > + ir_rvalue *block = ((ir_instruction *)param)->as_rvalue();
> > >> > +
> > >> > + param = param->get_next();
> > >> > + ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
> > >> > +
> > >> > + param = param->get_next();
> > >> > + ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
> > >> > +
> > >> > + param = param->get_next();
> > >> > + ir_constant *write_mask = ((ir_instruction 
> > >> > *)param)->as_constant();
> > >> > + assert(write_mask);
> > >> > +
> > >> > + /* Check if we need the indirect version */
> > >> > + ir_constant *const_offset = offset->as_constant();
> > >> > + if (!const_offset) {
> > >> > +op = nir_intrinsic_store_ssbo_indirect;
> > >> > +ralloc_free(instr);
> > >> > +instr = nir_intrinsic_instr_create(shader, op);
> > >> > +instr->src[2] = evaluate_rvalue(offset);
> > >> > +instr->const_index[0] = 0;
> > >> > + } else {
> > >> > +instr->const_index[0] = const_offset->value.u[0];
> > >> > + }
> > >> > +
> > >> > + instr->const_index[1] = write_mask->value.u[0];
> > >> > +
> > >> > + instr->src[0] = evaluate_rvalue(val);
> > >> > + instr->num_components = val->type->vector_elements;
> > >> > +
> > >> > + instr->src[1] = evaluate_rvalue(block);
> > >> > + break;
> > >> > +  }
> > >> >default:
> > >> >   unreachable("not reached");
> > >> >}
> > >> > diff --git a/src/glsl/nir/nir_intrinsics.h 
> > >> > b/src/glsl/nir/nir_intrinsics.h
> > >> > index f264f55..83eeecd 100644
> > >> > --- a/src/glsl/nir/nir_intrinsics.h
> > >> > +++ b/src/glsl/nir/nir_intrinsics.h
> > >> > @@ -176,12 +176,12 @@ LOAD(input, 0, NIR_INTRINSIC_CAN_ELIMINATE | 
> > >> > NIR_INTRINSIC_CAN_REORDER)
> > >> >   * offset.
> > >> >   */
> > >> >
> > >> > -#define STORE(name, num_indices, flags) \
> > >> > -   INTRINSIC(store_##name, 1, ARR(0), false, 0, 0, num_indices, 
> > >> > flags) \
> > >> > -   INTRINSIC(store_##name##_indirect, 2, ARR(0, 1), false, 0, 0, \
> > >> > +#define STORE(name, extra_srcs, num_indices, flags) \
> > >> > +   INTRINSIC(store_##name, extra_srcs, ARR(0, 1), false, 0, 0, 
> > >> > num_indices, flags) \
> > >> > +   INTRINSIC(store_##name##_indirect, extra_srcs + 1, ARR(0, 1, 1), 
> > >> > false, 0, 0, \
> > >> >   num_indices, flags) \
> > >> >
> > >> > -STORE(output, 1, 0)
> > >> > -/* STORE(ssbo, 2, 0) */
> > >> > +STORE(output, 1, 2, 0)
> > >> > +STORE(ssbo, 2, 2, 0)
> > >>
> > >> I don't think outputs should have any extra sources, since they only
> > >> take a constant index, plus possibly an indirect source that's already
> > >> covered by the STORE macro. SSBO stores should only have one extra
> > >> source for the block index. Also, we should update the comment above
> > >> to explain this similarly to the paragraph above the loads.
> > >
> > > SSBO stores need an extra source for the block index and an extra index
> > > for a writemask.
> > >
> > > I'll leave the STORE() macro as it was and just define SSBO stores using
> > > INTRINSIC() directly then.
> > 
> > Ok, I see. I don't think you need a separate INTRINSIC(), but right
> > now calling the parameter you added "extra_srcs" is confusing, since
> > you're counting the value to be stored, which isn't really "extra" at
> > all -- every 

[Mesa-dev] [PATCH v4] clover: stub missing CL 1.2 functions

2015-08-07 Thread Serge Martin (EdB)
As sugested by Tom a long time ago
and in order to be able to create Piglit tests

v2:
replace NOT_SUPPORTED_BY_CL_1_1 macro with an inline function
remove extra space in clLinkProgram arg

v3:
use __func__

v4:
back to a macro, it make more sense to use it with __func__
---
 src/gallium/state_trackers/clover/api/dispatch.cpp | 10 -
 src/gallium/state_trackers/clover/api/kernel.cpp   |  8 +++
 src/gallium/state_trackers/clover/api/memory.cpp   | 25 --
 src/gallium/state_trackers/clover/api/program.cpp  | 10 +
 src/gallium/state_trackers/clover/api/transfer.cpp | 12 +++
 src/gallium/state_trackers/clover/api/util.hpp |  7 ++
 6 files changed, 65 insertions(+), 7 deletions(-)

diff --git a/src/gallium/state_trackers/clover/api/dispatch.cpp 
b/src/gallium/state_trackers/clover/api/dispatch.cpp
index b5a4094..f10babe 100644
--- a/src/gallium/state_trackers/clover/api/dispatch.cpp
+++ b/src/gallium/state_trackers/clover/api/dispatch.cpp
@@ -123,12 +123,12 @@ namespace clover {
   clCreateImage,
   clCreateProgramWithBuiltInKernels,
   clCompileProgram,
-  NULL, // clLinkProgram
+  clLinkProgram,
   clUnloadPlatformCompiler,
-  NULL, // clGetKernelArgInfo
-  NULL, // clEnqueueFillBuffer
-  NULL, // clEnqueueFillImage
-  NULL, // clEnqueueMigrateMemObjects
+  clGetKernelArgInfo,
+  clEnqueueFillBuffer,
+  clEnqueueFillImage,
+  clEnqueueMigrateMemObjects,
   clEnqueueMarkerWithWaitList,
   clEnqueueBarrierWithWaitList,
   NULL, // clGetExtensionFunctionAddressForPlatform
diff --git a/src/gallium/state_trackers/clover/api/kernel.cpp 
b/src/gallium/state_trackers/clover/api/kernel.cpp
index 857a152..de3e300 100644
--- a/src/gallium/state_trackers/clover/api/kernel.cpp
+++ b/src/gallium/state_trackers/clover/api/kernel.cpp
@@ -189,6 +189,14 @@ clGetKernelWorkGroupInfo(cl_kernel d_kern, cl_device_id 
d_dev,
return CL_INVALID_DEVICE;
 }
 
+CLOVER_API cl_int
+clGetKernelArgInfo(cl_kernel d_kern,
+   cl_uint idx, cl_kernel_arg_info param,
+   size_t size, void *r_buf, size_t *r_size) {
+   CLOVER_NOT_SUPPORTED_BY("1.1");
+   return CL_KERNEL_ARG_INFO_NOT_AVAILABLE;
+}
+
 namespace {
///
/// Common argument checking shared by kernel invocation commands.
diff --git a/src/gallium/state_trackers/clover/api/memory.cpp 
b/src/gallium/state_trackers/clover/api/memory.cpp
index 3ff6ba0..42e8c30 100644
--- a/src/gallium/state_trackers/clover/api/memory.cpp
+++ b/src/gallium/state_trackers/clover/api/memory.cpp
@@ -358,8 +358,29 @@ clCreateImage(cl_context d_ctx, cl_mem_flags flags,
   const cl_image_desc *image_desc,
   void *host_ptr, cl_int *r_errcode) {
// This function was added in OpenCL 1.2
-   std::cerr << "CL user error: clCreateImage() not supported by OpenCL 1.1." 
<<
-std::endl;
+   CLOVER_NOT_SUPPORTED_BY("1.1");
ret_error(r_errcode, CL_INVALID_OPERATION);
return NULL;
 }
+
+CLOVER_API cl_int
+clEnqueueFillBuffer(cl_command_queue command_queue, cl_mem buffer,
+const void *pattern, size_t pattern_size,
+size_t offset, size_t size,
+cl_uint num_events_in_wait_list,
+const cl_event *event_wait_list,
+cl_event *event) {
+   CLOVER_NOT_SUPPORTED_BY("1.1");
+   return CL_INVALID_VALUE;
+}
+
+CLOVER_API cl_int
+clEnqueueFillImage(cl_command_queue command_queue, cl_mem image,
+   const void *fill_color,
+   const size_t *origin, const size_t *region,
+   cl_uint num_events_in_wait_list,
+   const cl_event *event_wait_list,
+   cl_event *event) {
+   CLOVER_NOT_SUPPORTED_BY("1.1");
+   return CL_INVALID_VALUE;
+}
diff --git a/src/gallium/state_trackers/clover/api/program.cpp 
b/src/gallium/state_trackers/clover/api/program.cpp
index e9b1f38..0110789 100644
--- a/src/gallium/state_trackers/clover/api/program.cpp
+++ b/src/gallium/state_trackers/clover/api/program.cpp
@@ -231,6 +231,16 @@ clCompileProgram(cl_program d_prog, cl_uint num_devs,
return e.get();
 }
 
+CLOVER_API cl_program
+clLinkProgram(cl_context d_ctx, cl_uint num_devs, const cl_device_id *d_devs,
+  const char *p_opts, cl_uint num_progs, const cl_program *d_progs,
+  void (*pfn_notify)(cl_program, void *), void *user_data,
+  cl_int *r_errcode) {
+   CLOVER_NOT_SUPPORTED_BY("1.1");
+   ret_error(r_errcode, CL_LINKER_NOT_AVAILABLE);
+   return NULL;
+}
+
 CLOVER_API cl_int
 clUnloadCompiler() {
return CL_SUCCESS;
diff --git a/src/gallium/state_trackers/clover/api/transfer.cpp 
b/src/gallium/state_trackers/clover/api/transfer.cpp
index fdb9405..f5b8e2b 100644
--- a/src/gallium/state_trackers/clover/api/transfer.cpp
+++ b/src/gallium/state_trackers/clover/api/transfer.cpp
@@ -726,3 +726,15 @@ clEnqueueUnmapMemObject(cl_command_qu

Re: [Mesa-dev] [PATCH 10/13] hash: Add _mesa_HashRemoveLocked() function.

2015-08-07 Thread Timothy Arceri
On Thu, 2015-08-06 at 17:11 -0700, Matt Turner wrote:
> ---
>  src/mesa/main/hash.c | 19 +++
>  src/mesa/main/hash.h |  2 ++
>  2 files changed, 17 insertions(+), 4 deletions(-)
> 
> diff --git a/src/mesa/main/hash.c b/src/mesa/main/hash.c
> index 315b5d6..aa1c6a1 100644
> --- a/src/mesa/main/hash.c
> +++ b/src/mesa/main/hash.c
> @@ -328,8 +328,8 @@ _mesa_HashInsert(struct _mesa_HashTable *table, GLuint 
> key, void *data)
>   * While holding the hash table's lock, searches the entry with the 
> matching
>   * key and unlinks it.
>   */
> -void
> -_mesa_HashRemove(struct _mesa_HashTable *table, GLuint key)
> +static inline void
> +_mesa_HashRemove_unlocked(struct _mesa_HashTable *table, GLuint key)

Any reason why this isnt just _mesa_HashRemoveUnlocked to match the naming of
locked?


>  {
> struct hash_entry *entry;
>  
> @@ -343,17 +343,28 @@ _mesa_HashRemove(struct _mesa_HashTable *table, GLuint 
> key)
>return;
> }
>  
> -   mtx_lock(&table->Mutex);
> if (key == DELETED_KEY_VALUE) {
>table->deleted_key_data = NULL;
> } else {
>entry = _mesa_hash_table_search(table->ht, uint_key(key));
>_mesa_hash_table_remove(table->ht, entry);
> }
> -   mtx_unlock(&table->Mutex);
>  }
>  
>  
> +void
> +_mesa_HashRemoveLocked(struct _mesa_HashTable *table, GLuint key)
> +{
> +   _mesa_HashRemove_unlocked(table, key);
> +}
> +
> +void
> +_mesa_HashRemove(struct _mesa_HashTable *table, GLuint key)
> +{
> +   mtx_lock(&table->Mutex);
> +   _mesa_HashRemove_unlocked(table, key);
> +   mtx_unlock(&table->Mutex);
> +}
>  
>  /**
>   * Delete all entries in a hash table, but don't delete the table itself.
> diff --git a/src/mesa/main/hash.h b/src/mesa/main/hash.h
> index da3b997..52a6c5d 100644
> --- a/src/mesa/main/hash.h
> +++ b/src/mesa/main/hash.h
> @@ -54,6 +54,8 @@ extern void *_mesa_HashLookupLocked(struct _mesa_HashTable 
> *table, GLuint key);
>  extern void _mesa_HashInsertLocked(struct _mesa_HashTable *table,
> GLuint key, void *data);
>  
> +extern void _mesa_HashRemoveLocked(struct _mesa_HashTable *table, GLuint 
> key);
> +
>  extern void
>  _mesa_HashDeleteAll(struct _mesa_HashTable *table,
>  void (*callback)(GLuint key, void *data, void 
> *userData),
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 11/13] mesa: Replace uses of Shared->Mutex with hash-table mutexes

2015-08-07 Thread Timothy Arceri
On Thu, 2015-08-06 at 17:11 -0700, Matt Turner wrote:
> We were locking the Shared->Mutex and then using calling functions like
> _mesa_HashInsert that do additional per-hash-table locking internally.
> 
> Instead just lock each hash-table's mutex and use functions like
> _mesa_HashInsertLocked and the new _mesa_HashRemoveLocked.
> 
> In order to do this, we need to remove the locking from
> _mesa_HashFindFreeKeyBlock since it will always be called with the
> per-hash-table lock taken.
> ---
>  src/mesa/main/arbprogram.c|  7 ++-
>  src/mesa/main/atifragshader.c |  6 +-
>  src/mesa/main/bufferobj.c | 26 ++
>  src/mesa/main/dlist.c |  8 
>  src/mesa/main/fbobject.c  | 23 ++-
>  src/mesa/main/hash.c  |  4 
>  src/mesa/main/samplerobj.c| 23 ++-
>  src/mesa/main/shaderapi.c | 10 --
>  src/mesa/main/texobj.c| 14 +-
>  9 files changed, 74 insertions(+), 47 deletions(-)
> 
> diff --git a/src/mesa/main/arbprogram.c b/src/mesa/main/arbprogram.c
> index f474951..3f7acda 100644
> --- a/src/mesa/main/arbprogram.c
> +++ b/src/mesa/main/arbprogram.c
> @@ -200,13 +200,18 @@ _mesa_GenProgramsARB(GLsizei n, GLuint *ids)
> if (!ids)
>return;
>  
> +   _mesa_HashLockMutex(ctx->Shared->Programs);
> +
> first = _mesa_HashFindFreeKeyBlock(ctx->Shared->Programs, n);
>  
> /* Insert pointer to dummy program as placeholder */
> for (i = 0; i < (GLuint) n; i++) {
> -  _mesa_HashInsert(ctx->Shared->Programs, first + i, 
> &_mesa_DummyProgram);
> +  _mesa_HashInsertLocked(ctx->Shared->Programs, first + i,
> + &_mesa_DummyProgram);
> }
>  
> +   _mesa_HashUnlockMutex(ctx->Shared->Programs);
> +
> /* Return the program names */
> for (i = 0; i < (GLuint) n; i++) {
>ids[i] = first + i;
> diff --git a/src/mesa/main/atifragshader.c b/src/mesa/main/atifragshader.c
> index 935ba05..9dd4e21 100644
> --- a/src/mesa/main/atifragshader.c
> +++ b/src/mesa/main/atifragshader.c
> @@ -199,11 +199,15 @@ _mesa_GenFragmentShadersATI(GLuint range)
>return 0;
> }
>  
> +   _mesa_HashLockMutex(ctx->Shared->ATIShaders);
> +
> first = _mesa_HashFindFreeKeyBlock(ctx->Shared->ATIShaders, range);
> for (i = 0; i < range; i++) {
> -  _mesa_HashInsert(ctx->Shared->ATIShaders, first + i, &DummyShader);
> +  _mesa_HashInsertLocked(ctx->Shared->ATIShaders, first + i, 
> &DummyShader);
> }
>  
> +   _mesa_HashUnlockMutex(ctx->Shared->ATIShaders);
> +
> return first;
>  }
>  
> diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
> index 78af229..6a995e7 100644
> --- a/src/mesa/main/bufferobj.c
> +++ b/src/mesa/main/bufferobj.c
> @@ -994,8 +994,11 @@ _mesa_lookup_bufferobj(struct gl_context *ctx, GLuint 
> buffer)
>  struct gl_buffer_object *
>  _mesa_lookup_bufferobj_locked(struct gl_context *ctx, GLuint buffer)
>  {
> -   return (struct gl_buffer_object *)
> -  _mesa_HashLookupLocked(ctx->Shared->BufferObjects, buffer);
> +   if (buffer == 0)
> +  return NULL;
> +   else
> +  return (struct gl_buffer_object *)
> + _mesa_HashLookupLocked(ctx->Shared->BufferObjects, buffer);
>  }
>  
>  /**
> @@ -1179,10 +1182,11 @@ _mesa_DeleteBuffers(GLsizei n, const GLuint *ids)
>return;
> }
>  
> -   mtx_lock(&ctx->Shared->Mutex);
> +   _mesa_HashLockMutex(ctx->Shared->BufferObjects);
>  
> for (i = 0; i < n; i++) {
> -  struct gl_buffer_object *bufObj = _mesa_lookup_bufferobj(ctx, 
> ids[i]);
> +  struct gl_buffer_object *bufObj =
> + _mesa_lookup_bufferobj_locked(ctx, ids[i]);
>if (bufObj) {
>   struct gl_vertex_array_object *vao = ctx->Array.VAO;
>   GLuint j;
> @@ -1276,7 +1280,7 @@ _mesa_DeleteBuffers(GLsizei n, const GLuint *ids)
>   }
>  
>   /* The ID is immediately freed for re-use */
> - _mesa_HashRemove(ctx->Shared->BufferObjects, ids[i]);
> + _mesa_HashRemoveLocked(ctx->Shared->BufferObjects, ids[i]);
>   /* Make sure we do not run into the classic ABA problem on bind.
>* We don't want to allow re-binding a buffer object that's been
>* "deleted" by glDeleteBuffers().
> @@ -1292,7 +1296,7 @@ _mesa_DeleteBuffers(GLsizei n, const GLuint *ids)
>}
> }
>  
> -   mtx_unlock(&ctx->Shared->Mutex);
> +   _mesa_HashUnlockMutex(ctx->Shared->BufferObjects);
>  }
>  
>  
> @@ -1326,7 +1330,7 @@ create_buffers(GLsizei n, GLuint *buffers, bool dsa)
> /*
>  * This must be atomic (generation and allocation of buffer object IDs)
>  */
> -   mtx_lock(&ctx->Shared->Mutex);
> +   _mesa_HashLockMutex(ctx->Shared->BufferObjects);
>  
> first = _mesa_HashFindFreeKeyBlock(ctx->Shared->BufferObjects, n);
>  
> @@ -1341,17 +1345,17 @@ create_buffers(GLsizei n, GLuint *buffers, bool dsa)
>   buf = ctx->Driver.NewBufferObject(ctx, buffers[i]);
>   

Re: [Mesa-dev] [PATCH 00/13] mesa: Locking improvements and optimizations

2015-08-07 Thread Timothy Arceri
On Thu, 2015-08-06 at 17:10 -0700, Matt Turner wrote:
> Patches 1-11 improve performance of SynMark OglBatch7 by 6.29586% +/- 
> 0.277734%
> (n=337) and OglMultithread by 1.12564% +/- 0.424038% (n=209). I haven't
> benchmarked individual patches because I'd like to not waste all that time 
> if I
> get review feedback that requires me to change things. :)
> 
> Patches 12-13 were supposed to improve performance, but seem to make an
> existing thread-safety problem worse, so I'm not proposing them for 
> inclusion.
> 
> 
> [01/13] c11/threads: Assert that mtx is non-NULL and check
> [02/13] mesa: Remove debugging code from _mesa_reference_*.
> 

Small comment on 1 but doesn't really matter.

These two are Reviewed-by: Timothy Arceri 


> [03/13] mesa: Add locking to sampler objects.
> [04/13] mesa: Add locking to programs.
> 
>These two add missing locks to sampler and program objects, which
>I believe are supposed to be thread-safe.

These two look correct but shouldn't you just squash these with 6 and 7?

If you really want to show the code evolution in git history then: 
Reviewed-by: Timothy Arceri 

> 
> [05/13] mesa: Replace buffer object locks with atomic inc/dec.
> [06/13] mesa: Replace sampler object locks with atomic inc/dec.
> [07/13] mesa: Replace program locks with atomic inc/dec.
> [08/13] mesa: Replace renderbuffer object locks with atomic inc/dec.
> [09/13] mesa: Replace texture buffer object locks with atomic inc/dec.
> 
>These five replace locks around RefCount++/-- with atomic increment
>and decrement.

Nice I didn't know about this.

Reviewed-by: Timothy Arceri 

> 
> [10/13] hash: Add _mesa_HashRemoveLocked() function.
> [11/13] mesa: Replace uses of Shared->Mutex with hash-table mutexes
> 
>These two replace uses of ctx->Shared->Mutex with the mutexes in the
>hash tables.

Nice. I was looking in this area early in the year when I noticed some common
Phoronix benchmark games spent a lot of time locking in these areas so maybe
some real work gains here.

Some minor comments on these.

Reviewed-by: Timothy Arceri 

> 
> [12/13] mesa: Remove unnecessary locking from container
> [13/13] mesa: Remove deleteFlag pattern.
> 
>*I am not proposing these for inclusion*
> 
>These two remove some "unnecessary" locking from so called "container
>objects" that are not shared between threads by the GL. While I expected
>them to improve performance, they actually cause double-free errors in
>SynMark OglMultithread. Valgrind's helgrind tool shows that there are 
> many
>thread-safety issues in the texture code, and removing these locks seems > 
> to
>exacerbate the problem.
> 
>Specifically, multiple threads are reading and writing to 
> gl_texture_objects
>without any synchronization from places like 
> intel_finalize_mipmap_tree(),
>gen7_update_texture_surface(), brw_populate_sampler_prog_key_data(),
>update_sampler_state(), and _mesa_BindTexture().
> 
>Suggestions for solving this (apparently quite longstanding) problem are
>welcome.
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] gles/es3.1: Implement glMemoryBarrierByRegion

2015-08-07 Thread Tapani Pälli
Super, I've verified that this makes 
ES31-CTS.shader_image_load_store.basic-api-barrier-byRegion pass with 
one of Curro's branches.


Reviewed-by: Tapani Pälli 

On 08/04/2015 11:22 AM, Marta Lofstedt wrote:

From: Marta Lofstedt 

Signed-off-by: Marta Lofstedt 
---
  src/mapi/glapi/gen/gl_API.xml   |  4 
  src/mesa/main/shaderimage.c | 40 +
  src/mesa/main/shaderimage.h |  3 +++
  src/mesa/main/tests/dispatch_sanity.cpp |  3 +--
  4 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml
index 658efa4..3db4349 100644
--- a/src/mapi/glapi/gen/gl_API.xml
+++ b/src/mapi/glapi/gen/gl_API.xml
@@ -2966,6 +2966,10 @@
  
  
  
+
+
+
+
  

  
diff --git a/src/mesa/main/shaderimage.c b/src/mesa/main/shaderimage.c
index a348cdb..7337f22 100644
--- a/src/mesa/main/shaderimage.c
+++ b/src/mesa/main/shaderimage.c
@@ -653,3 +653,43 @@ _mesa_MemoryBarrier(GLbitfield barriers)
 if (ctx->Driver.MemoryBarrier)
ctx->Driver.MemoryBarrier(ctx, barriers);
  }
+
+void GLAPIENTRY
+_mesa_MemoryBarrierByRegion(GLbitfield barriers)
+{
+   GET_CURRENT_CONTEXT(ctx);
+
+   GLbitfield all_allowed_bits = GL_ATOMIC_COUNTER_BARRIER_BIT |
+ GL_FRAMEBUFFER_BARRIER_BIT |
+ GL_SHADER_IMAGE_ACCESS_BARRIER_BIT |
+ GL_SHADER_STORAGE_BARRIER_BIT |
+ GL_TEXTURE_FETCH_BARRIER_BIT |
+ GL_UNIFORM_BARRIER_BIT;
+
+   if (ctx->Driver.MemoryBarrier) {
+  /* From section 7.11.2 of the OpenGL ES 3.1 specification:
+   *
+   *"When barriers is ALL_BARRIER_BITS, shader memory accesses will be
+   * synchronized relative to all these barrier bits, but not to other
+   * barrier bits specific to MemoryBarrier."
+   *
+   * That is, if barriers is the special value GL_ALL_BARRIER_BITS, then 
all
+   * barriers allowed by glMemoryBarrierByRegion should be activated."
+   */
+  if (barriers == GL_ALL_BARRIER_BITS)
+ return ctx->Driver.MemoryBarrier(ctx, all_allowed_bits);
+
+  /* From section 7.11.2 of the OpenGL ES 3.1 specification:
+   *
+   *"An INVALID_VALUE error is generated if barriers is not the special
+   * value ALL_BARRIER_BITS, and has any bits set other than those
+   * described above."
+   */
+  if ((barriers & ~all_allowed_bits) != 0) {
+ _mesa_error(ctx, GL_INVALID_VALUE,
+ "glMemoryBarrierByRegion(unsupported barrier bit");
+  }
+
+  ctx->Driver.MemoryBarrier(ctx, barriers);
+   }
+}
diff --git a/src/mesa/main/shaderimage.h b/src/mesa/main/shaderimage.h
index 33d8a1e..d08ece8 100644
--- a/src/mesa/main/shaderimage.h
+++ b/src/mesa/main/shaderimage.h
@@ -68,6 +68,9 @@ _mesa_BindImageTextures(GLuint first, GLsizei count, const 
GLuint *textures);
  void GLAPIENTRY
  _mesa_MemoryBarrier(GLbitfield barriers);

+void GLAPIENTRY
+_mesa_MemoryBarrierByRegion(GLbitfield barriers);
+
  #ifdef __cplusplus
  }
  #endif
diff --git a/src/mesa/main/tests/dispatch_sanity.cpp 
b/src/mesa/main/tests/dispatch_sanity.cpp
index af89d2c..14c9eda 100644
--- a/src/mesa/main/tests/dispatch_sanity.cpp
+++ b/src/mesa/main/tests/dispatch_sanity.cpp
@@ -2461,8 +2461,7 @@ const struct function gles31_functions_possible[] = {
 { "glGetBooleani_v", 31, -1 },
 { "glMemoryBarrier", 31, -1 },

-   // FINISHME: This function has not been implemented yet.
-   // { "glMemoryBarrierByRegion", 31, -1 },
+   { "glMemoryBarrierByRegion", 31, -1 },

 { "glTexStorage2DMultisample", 31, -1 },
 { "glGetMultisamplefv", 31, -1 },


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v4 (part2) 02/59] i965/vec4: Import helpers to convert vectors into arrays and back.

2015-08-07 Thread Iago Toral
On Wed, 2015-08-05 at 10:29 +0200, Iago Toral Quiroga wrote:
> From: Francisco Jerez 
> 
> These functions handle the conversion of a vec4 into the form expected
> by the dataport unit in message and message return payloads.  The
> conversion is not always trivial because some messages don't support
> SIMD4x2 for some generations, in which case a strided copy may be
> necessary.
> 
> v2: Split from the FS implementation.
> v3: Rewrite to avoid evil array_reg, emit_collect and emit_zip.
> ---
>  src/mesa/drivers/dri/i965/Makefile.sources |  2 +
>  .../drivers/dri/i965/brw_vec4_surface_builder.cpp  | 98 
> ++
>  .../drivers/dri/i965/brw_vec4_surface_builder.h| 30 +++
>  3 files changed, 130 insertions(+)
>  create mode 100644 src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp
>  create mode 100644 src/mesa/drivers/dri/i965/brw_vec4_surface_builder.h
> 
> diff --git a/src/mesa/drivers/dri/i965/Makefile.sources 
> b/src/mesa/drivers/dri/i965/Makefile.sources
> index 88e64fb..049e293 100644
> --- a/src/mesa/drivers/dri/i965/Makefile.sources
> +++ b/src/mesa/drivers/dri/i965/Makefile.sources
> @@ -129,6 +129,8 @@ i965_FILES = \
>   brw_vec4_nir.cpp \
>   brw_vec4_gs_nir.cpp \
>   brw_vec4_reg_allocate.cpp \
> + brw_vec4_surface_builder.cpp \
> + brw_vec4_surface_builder.h \
>   brw_vec4_visitor.cpp \
>   brw_vec4_vp.cpp \
>   brw_vec4_vs_visitor.cpp \
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp 
> b/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp
> new file mode 100644
> index 000..5ba1c6d
> --- /dev/null
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp
> @@ -0,0 +1,98 @@
> +/*
> + * Copyright © 2013-2015 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
> DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include "brw_vec4_surface_builder.h"
> +
> +using namespace brw;
> +
> +namespace {
> +   namespace array_utils {
> +  /**
> +   * Copy one every \p src_stride logical components of the argument into
> +   * one every \p dst_stride logical components of the result.
> +   */
> +  src_reg
> +  emit_stride(const vec4_builder &bld, const src_reg &src, unsigned size,
> +  unsigned dst_stride, unsigned src_stride)
> +  {
> + if (src_stride == 1 && dst_stride == 1) {
> +return src;
> + } else {
> +const dst_reg dst = bld.vgrf(src.type,
> + DIV_ROUND_UP(size * dst_stride, 4));
> +
> +for (unsigned i = 0; i < size; ++i)
> +   bld.MOV(writemask(offset(dst, i * dst_stride / 4),
> + 1 << (i * dst_stride % 4)),
> +   swizzle(offset(src, i * src_stride / 4),
> +   brw_swizzle_for_mask(1 << (i * src_stride % 
> 4;
> +
> +return src_reg(dst);
> + }
> +  }
> +
> +  /**
> +   * Convert a VEC4 into an array of registers with the layout expected 
> by
> +   * the recipient shared unit.  If \p has_simd4x2 is true the argument 
> is
> +   * left unmodified in SIMD4x2 form, otherwise it will be rearranged 
> into
> +   * a SIMD8 vector.
> +   */
> +  src_reg
> +  emit_insert(const vec4_builder &bld, const src_reg &src,
> +  unsigned n, bool has_simd4x2)
> +  {
> + if (src.file == BAD_FILE || n == 0) {
> +return src_reg();
> +
> + } else {
> +/* Pad unused components with zeroes. */
> +const unsigned mask = (1 << n) - 1;
> +const dst_reg tmp = bld.vgrf(src.type);
> +
> +bld.MOV(writemask(tmp, mask), src);
> +if (n < 4)
> +   bld.MOV(writemask(tmp, ~mask), 0);
> +
> +return emit_stride(bld, src, n, has_simd4x2 ? 1 : 4, 1);

Re: [Mesa-dev] [PATCH v4 (part2) 02/59] i965/vec4: Import helpers to convert vectors into arrays and back.

2015-08-07 Thread Francisco Jerez
Iago Toral  writes:

> On Wed, 2015-08-05 at 10:29 +0200, Iago Toral Quiroga wrote:
>> From: Francisco Jerez 
>> 
>> These functions handle the conversion of a vec4 into the form expected
>> by the dataport unit in message and message return payloads.  The
>> conversion is not always trivial because some messages don't support
>> SIMD4x2 for some generations, in which case a strided copy may be
>> necessary.
>> 
>> v2: Split from the FS implementation.
>> v3: Rewrite to avoid evil array_reg, emit_collect and emit_zip.
>> ---
>>  src/mesa/drivers/dri/i965/Makefile.sources |  2 +
>>  .../drivers/dri/i965/brw_vec4_surface_builder.cpp  | 98 
>> ++
>>  .../drivers/dri/i965/brw_vec4_surface_builder.h| 30 +++
>>  3 files changed, 130 insertions(+)
>>  create mode 100644 src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp
>>  create mode 100644 src/mesa/drivers/dri/i965/brw_vec4_surface_builder.h
>> 
>> diff --git a/src/mesa/drivers/dri/i965/Makefile.sources 
>> b/src/mesa/drivers/dri/i965/Makefile.sources
>> index 88e64fb..049e293 100644
>> --- a/src/mesa/drivers/dri/i965/Makefile.sources
>> +++ b/src/mesa/drivers/dri/i965/Makefile.sources
>> @@ -129,6 +129,8 @@ i965_FILES = \
>>  brw_vec4_nir.cpp \
>>  brw_vec4_gs_nir.cpp \
>>  brw_vec4_reg_allocate.cpp \
>> +brw_vec4_surface_builder.cpp \
>> +brw_vec4_surface_builder.h \
>>  brw_vec4_visitor.cpp \
>>  brw_vec4_vp.cpp \
>>  brw_vec4_vs_visitor.cpp \
>> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp 
>> b/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp
>> new file mode 100644
>> index 000..5ba1c6d
>> --- /dev/null
>> +++ b/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp
>> @@ -0,0 +1,98 @@
>> +/*
>> + * Copyright © 2013-2015 Intel Corporation
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the 
>> "Software"),
>> + * to deal in the Software without restriction, including without limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice (including the next
>> + * paragraph) shall be included in all copies or substantial portions of the
>> + * Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 
>> OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 
>> OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
>> DEALINGS
>> + * IN THE SOFTWARE.
>> + */
>> +
>> +#include "brw_vec4_surface_builder.h"
>> +
>> +using namespace brw;
>> +
>> +namespace {
>> +   namespace array_utils {
>> +  /**
>> +   * Copy one every \p src_stride logical components of the argument 
>> into
>> +   * one every \p dst_stride logical components of the result.
>> +   */
>> +  src_reg
>> +  emit_stride(const vec4_builder &bld, const src_reg &src, unsigned 
>> size,
>> +  unsigned dst_stride, unsigned src_stride)
>> +  {
>> + if (src_stride == 1 && dst_stride == 1) {
>> +return src;
>> + } else {
>> +const dst_reg dst = bld.vgrf(src.type,
>> + DIV_ROUND_UP(size * dst_stride, 
>> 4));
>> +
>> +for (unsigned i = 0; i < size; ++i)
>> +   bld.MOV(writemask(offset(dst, i * dst_stride / 4),
>> + 1 << (i * dst_stride % 4)),
>> +   swizzle(offset(src, i * src_stride / 4),
>> +   brw_swizzle_for_mask(1 << (i * src_stride % 
>> 4;
>> +
>> +return src_reg(dst);
>> + }
>> +  }
>> +
>> +  /**
>> +   * Convert a VEC4 into an array of registers with the layout expected 
>> by
>> +   * the recipient shared unit.  If \p has_simd4x2 is true the argument 
>> is
>> +   * left unmodified in SIMD4x2 form, otherwise it will be rearranged 
>> into
>> +   * a SIMD8 vector.
>> +   */
>> +  src_reg
>> +  emit_insert(const vec4_builder &bld, const src_reg &src,
>> +  unsigned n, bool has_simd4x2)
>> +  {
>> + if (src.file == BAD_FILE || n == 0) {
>> +return src_reg();
>> +
>> + } else {
>> +/* Pad unused components with zeroes. */
>> +const unsigned mask = (1 << n) - 1;
>> +const dst_reg tmp = bld.vgrf(src.type);
>> +
>> +bld.MOV(writemask(tmp, mask), src);

Re: [Mesa-dev] driDriverAPI help

2015-08-07 Thread Marek Olšák
Hi,

All Gallium drivers use this directory, because it contains some
common driver loader code:
src/mesa/drivers/dri/common

Marek


On Wed, Aug 5, 2015 at 3:24 PM, Daniel Melo Jorge da Cunha
 wrote:
> Yes, yes... I know this is an expert group but I searched the whole web,
> sent a few
> emails and nothing... this is driving me mad...
>
> The point is driDriverAPI is initialized in
> src/gallium/state_trackers/dri/drm/dri2.c but
> this variable is only used in src/mesa/drivers...
>
> mesa3d web site states that this directory, src/mesa/drivers, is not used
> with
> gallium... so I'm lost.
>
> Any help is very much welcomed to help clarify me what is going on.
> Thanks in advance.
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 28130] vbo: premature flushing breaks GL_LINE_LOOP

2015-08-07 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=28130

Marek Olšák  changed:

   What|Removed |Added

 QA Contact|marius.pre...@intel.com |mesa-dev@lists.freedesktop.
   ||org

--- Comment #10 from Marek Olšák  ---
Please keep the QA contact as mesa-dev, so that we can receive all messages.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] driDriverAPI help

2015-08-07 Thread Emil Velikov
On 7 August 2015 at 11:54, Marek Olšák  wrote:
> Hi,
>
> All Gallium drivers use this directory, because it contains some
> common driver loader code:
> src/mesa/drivers/dri/common
>
To prevent any possible confusion - all gallium DRI drivers. Other
drivers such as vdpau/opencl/etc do not use it.

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 4/4] i965/vec4: Don't unspill the same register in consecutive instructions

2015-08-07 Thread Francisco Jerez
Iago Toral  writes:

> On Thu, 2015-08-06 at 18:27 +0300, Francisco Jerez wrote:
>> Iago Toral Quiroga  writes:
>> 
>> > If we have spilled/unspilled a register in the current instruction, avoid
>> > emitting unspills for the same register in the same instruction or 
>> > consecutive
>> > instructions following the current one as long as they keep reading the 
>> > spilled
>> > register. This should allow us to avoid emitting costy unspills that come 
>> > with
>> > little benefit to register allocation.
>> >
>> > Also, update evaluate_spill_costs so that we account for the saved 
>> > unspills.
>> > ---
>> >  .../drivers/dri/i965/brw_vec4_reg_allocate.cpp | 129 
>> > +++--
>> >  1 file changed, 121 insertions(+), 8 deletions(-)
>> >
>> > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp 
>> > b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
>> > index 617c988..fed5f4d 100644
>> > --- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
>> > +++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
>> > @@ -264,6 +264,95 @@ vec4_visitor::reg_allocate()
>> > return true;
>> >  }
>> >  
>> > +/**
>> > + * When we decide to spill a register, instead of blindly spilling every 
>> > use,
>> > + * save unspills when the spill register is used (read) in consecutive
>> > + * instructions. This can potentially save a bunch of unspills that would
>> > + * have very little impact in register allocation anyway.
>> > + *
>> > + * Notice that we need to account for this behavior when spilling a 
>> > register
>> > + * and when evaluating spilling costs. This function is designed so it can
>> > + * be called from both places and avoid repeating the logic.
>> > + *
>> > + *  - When we call this function from spill_reg, we pass in scratch_reg 
>> > the
>> > + *actual unspill/spill register that we want to reuse in the current
>> > + *instruction.
>> > + *
>> > + *  - When we call this from evaluate_spill_costs, we pass the register 
>> > for
>> > + *which we are evaluating spilling costs.
>> > + *
>> > + * In either case, we check if the previous instructions read scratch_reg 
>> > until
>> > + * we find an instruction that writes to it (in which case we can reuse
>> > + * scratch_reg as long as the writemask is compatible with the channels 
>> > we need
>> > + * to read in the current instruction) or we hit an instruction that does 
>> > not
>> > + * read scratch_reg at all. The latter can only happen when we call this 
>> > from
>> > + * evaluate_spill_costs,
>> 
>> Strictly speaking it can also happen when called from spill_reg() for
>> the first time in a given sequence of consecutive instructions (in which
>> case you correctly return false).
>
> True, I'll fix the comment.
>
>> >  and means that this is the point at which we first
>> > + * need the unspill this register for our current instruction. Since all 
>> > our
>> > + * unspills read a full vec4, we know that in this case we will have all
>> > + * the channels available in scratch_reg and we can reuse it.
>> > + *
>> > + * In any other case, we can't reuse scratch_reg in the current 
>> > instruction,
>> > + * meaning that we will need to unspill it.
>> > + */
>> > +static bool
>> > +can_use_scratch_for_source(const vec4_instruction *inst, unsigned i,
>> > +   unsigned scratch_reg)
>> > +{
>> > +   assert(inst->src[i].file == GRF);
>> > +
>> > +   /* If the current instruction is already using scratch_reg in src[n] 
>> > with
>> > +* n < i, then we know we can reuse it for src[i] too.
>> > +*/
>> > +   for (unsigned n = 0; n < i; n++) {
>> > +  if (inst->src[n].file == GRF && inst->src[n].reg == scratch_reg)
>> > + return true;
>> > +   }
>> 
>> I don't think this is correct in cases where the previous source reused
>> the temporary of a previously spilled register with incompatible
>> writemask.  You probably want to handle the current instruction
>> consistently with the previous ones, i.e. as part of the loop below.
>
> Yes, you're right.
>
>> I suggest you define a variable (e.g. n as you've called it) initially
>> equal to i that would determine the number of sources to check for the
>> next instruction.  At the end of the loop body it would be re-set to 3,
>> what would also cause the destination registers to be checked in
>> subsequent iterations.
>
> Sounds good to me.
>
>> > +
>> > +   bool prev_inst_read_scratch_reg = false;
>> > +   vec4_instruction *prev_inst = (vec4_instruction *) inst->prev;
>> 
>> You can move this declaration into the init statement of the for loop to
>> limit its scope.
>
> Ok.
>
>> > +   for (; !prev_inst->is_head_sentinel();
>> > +prev_inst = (vec4_instruction *) prev_inst->prev) {
>> > +  /* If any previous instruction does not read from or write to 
>> > scratch_reg
>> > +   * inconditonally we cannot reuse scratch_reg
>> > +   */
>> > +  if (prev_inst->predicate && prev_inst->opcode != BRW_OPCODE_SE

Re: [Mesa-dev] [PATCH v3 4/4] i965/vec4: Don't unspill the same register in consecutive instructions

2015-08-07 Thread Iago Toral
On Fri, 2015-08-07 at 14:14 +0300, Francisco Jerez wrote:
> Iago Toral  writes:
> 
> > On Thu, 2015-08-06 at 18:27 +0300, Francisco Jerez wrote:
> >> Iago Toral Quiroga  writes:
> >> 
> >> > If we have spilled/unspilled a register in the current instruction, avoid
> >> > emitting unspills for the same register in the same instruction or 
> >> > consecutive
> >> > instructions following the current one as long as they keep reading the 
> >> > spilled
> >> > register. This should allow us to avoid emitting costy unspills that 
> >> > come with
> >> > little benefit to register allocation.
> >> >
> >> > Also, update evaluate_spill_costs so that we account for the saved 
> >> > unspills.
> >> > ---
> >> >  .../drivers/dri/i965/brw_vec4_reg_allocate.cpp | 129 
> >> > +++--
> >> >  1 file changed, 121 insertions(+), 8 deletions(-)
> >> >
> >> > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp 
> >> > b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
> >> > index 617c988..fed5f4d 100644
> >> > --- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
> >> > +++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
> >> > @@ -264,6 +264,95 @@ vec4_visitor::reg_allocate()
> >> > return true;
> >> >  }
> >> >  
> >> > +/**
> >> > + * When we decide to spill a register, instead of blindly spilling 
> >> > every use,
> >> > + * save unspills when the spill register is used (read) in consecutive
> >> > + * instructions. This can potentially save a bunch of unspills that 
> >> > would
> >> > + * have very little impact in register allocation anyway.
> >> > + *
> >> > + * Notice that we need to account for this behavior when spilling a 
> >> > register
> >> > + * and when evaluating spilling costs. This function is designed so it 
> >> > can
> >> > + * be called from both places and avoid repeating the logic.
> >> > + *
> >> > + *  - When we call this function from spill_reg, we pass in scratch_reg 
> >> > the
> >> > + *actual unspill/spill register that we want to reuse in the current
> >> > + *instruction.
> >> > + *
> >> > + *  - When we call this from evaluate_spill_costs, we pass the register 
> >> > for
> >> > + *which we are evaluating spilling costs.
> >> > + *
> >> > + * In either case, we check if the previous instructions read 
> >> > scratch_reg until
> >> > + * we find an instruction that writes to it (in which case we can reuse
> >> > + * scratch_reg as long as the writemask is compatible with the channels 
> >> > we need
> >> > + * to read in the current instruction) or we hit an instruction that 
> >> > does not
> >> > + * read scratch_reg at all. The latter can only happen when we call 
> >> > this from
> >> > + * evaluate_spill_costs,
> >> 
> >> Strictly speaking it can also happen when called from spill_reg() for
> >> the first time in a given sequence of consecutive instructions (in which
> >> case you correctly return false).
> >
> > True, I'll fix the comment.
> >
> >> >  and means that this is the point at which we first
> >> > + * need the unspill this register for our current instruction. Since 
> >> > all our
> >> > + * unspills read a full vec4, we know that in this case we will have all
> >> > + * the channels available in scratch_reg and we can reuse it.
> >> > + *
> >> > + * In any other case, we can't reuse scratch_reg in the current 
> >> > instruction,
> >> > + * meaning that we will need to unspill it.
> >> > + */
> >> > +static bool
> >> > +can_use_scratch_for_source(const vec4_instruction *inst, unsigned i,
> >> > +   unsigned scratch_reg)
> >> > +{
> >> > +   assert(inst->src[i].file == GRF);
> >> > +
> >> > +   /* If the current instruction is already using scratch_reg in src[n] 
> >> > with
> >> > +* n < i, then we know we can reuse it for src[i] too.
> >> > +*/
> >> > +   for (unsigned n = 0; n < i; n++) {
> >> > +  if (inst->src[n].file == GRF && inst->src[n].reg == scratch_reg)
> >> > + return true;
> >> > +   }
> >> 
> >> I don't think this is correct in cases where the previous source reused
> >> the temporary of a previously spilled register with incompatible
> >> writemask.  You probably want to handle the current instruction
> >> consistently with the previous ones, i.e. as part of the loop below.
> >
> > Yes, you're right.
> >
> >> I suggest you define a variable (e.g. n as you've called it) initially
> >> equal to i that would determine the number of sources to check for the
> >> next instruction.  At the end of the loop body it would be re-set to 3,
> >> what would also cause the destination registers to be checked in
> >> subsequent iterations.
> >
> > Sounds good to me.
> >
> >> > +
> >> > +   bool prev_inst_read_scratch_reg = false;
> >> > +   vec4_instruction *prev_inst = (vec4_instruction *) inst->prev;
> >> 
> >> You can move this declaration into the init statement of the for loop to
> >> limit its scope.
> >
> > Ok.
> >
> >> > +   for (; !prev_inst->is_head_sentinel

Re: [Mesa-dev] [PATCH v4] clover: stub missing CL 1.2 functions

2015-08-07 Thread Francisco Jerez
"Serge Martin (EdB)"  writes:

> As sugested by Tom a long time ago
> and in order to be able to create Piglit tests
>
> v2:
> replace NOT_SUPPORTED_BY_CL_1_1 macro with an inline function
> remove extra space in clLinkProgram arg
>
> v3:
> use __func__
>
> v4:
> back to a macro, it make more sense to use it with __func__

Instead of annoying you with another nit-pick I've squashed the attached
change into your patch locally -- A warning like "Not supported by X.Y"
with hard-coded X and Y can easily become stale as soon as we start
supporting multiple CL versions, it seems more future-proof to just
say from what version on it will be supported.

I'll push this with my fix as soon as you confirm you're OK with my
change.

> ---
>  src/gallium/state_trackers/clover/api/dispatch.cpp | 10 -
>  src/gallium/state_trackers/clover/api/kernel.cpp   |  8 +++
>  src/gallium/state_trackers/clover/api/memory.cpp   | 25 
> --
>  src/gallium/state_trackers/clover/api/program.cpp  | 10 +
>  src/gallium/state_trackers/clover/api/transfer.cpp | 12 +++
>  src/gallium/state_trackers/clover/api/util.hpp |  7 ++
>  6 files changed, 65 insertions(+), 7 deletions(-)
>
> diff --git a/src/gallium/state_trackers/clover/api/dispatch.cpp 
> b/src/gallium/state_trackers/clover/api/dispatch.cpp
> index b5a4094..f10babe 100644
> --- a/src/gallium/state_trackers/clover/api/dispatch.cpp
> +++ b/src/gallium/state_trackers/clover/api/dispatch.cpp
> @@ -123,12 +123,12 @@ namespace clover {
>clCreateImage,
>clCreateProgramWithBuiltInKernels,
>clCompileProgram,
> -  NULL, // clLinkProgram
> +  clLinkProgram,
>clUnloadPlatformCompiler,
> -  NULL, // clGetKernelArgInfo
> -  NULL, // clEnqueueFillBuffer
> -  NULL, // clEnqueueFillImage
> -  NULL, // clEnqueueMigrateMemObjects
> +  clGetKernelArgInfo,
> +  clEnqueueFillBuffer,
> +  clEnqueueFillImage,
> +  clEnqueueMigrateMemObjects,
>clEnqueueMarkerWithWaitList,
>clEnqueueBarrierWithWaitList,
>NULL, // clGetExtensionFunctionAddressForPlatform
> diff --git a/src/gallium/state_trackers/clover/api/kernel.cpp 
> b/src/gallium/state_trackers/clover/api/kernel.cpp
> index 857a152..de3e300 100644
> --- a/src/gallium/state_trackers/clover/api/kernel.cpp
> +++ b/src/gallium/state_trackers/clover/api/kernel.cpp
> @@ -189,6 +189,14 @@ clGetKernelWorkGroupInfo(cl_kernel d_kern, cl_device_id 
> d_dev,
> return CL_INVALID_DEVICE;
>  }
>  
> +CLOVER_API cl_int
> +clGetKernelArgInfo(cl_kernel d_kern,
> +   cl_uint idx, cl_kernel_arg_info param,
> +   size_t size, void *r_buf, size_t *r_size) {
> +   CLOVER_NOT_SUPPORTED_BY("1.1");
> +   return CL_KERNEL_ARG_INFO_NOT_AVAILABLE;
> +}
> +
>  namespace {
> ///
> /// Common argument checking shared by kernel invocation commands.
> diff --git a/src/gallium/state_trackers/clover/api/memory.cpp 
> b/src/gallium/state_trackers/clover/api/memory.cpp
> index 3ff6ba0..42e8c30 100644
> --- a/src/gallium/state_trackers/clover/api/memory.cpp
> +++ b/src/gallium/state_trackers/clover/api/memory.cpp
> @@ -358,8 +358,29 @@ clCreateImage(cl_context d_ctx, cl_mem_flags flags,
>const cl_image_desc *image_desc,
>void *host_ptr, cl_int *r_errcode) {
> // This function was added in OpenCL 1.2
> -   std::cerr << "CL user error: clCreateImage() not supported by OpenCL 
> 1.1." <<
> -std::endl;
> +   CLOVER_NOT_SUPPORTED_BY("1.1");
> ret_error(r_errcode, CL_INVALID_OPERATION);
> return NULL;
>  }
> +
> +CLOVER_API cl_int
> +clEnqueueFillBuffer(cl_command_queue command_queue, cl_mem buffer,
> +const void *pattern, size_t pattern_size,
> +size_t offset, size_t size,
> +cl_uint num_events_in_wait_list,
> +const cl_event *event_wait_list,
> +cl_event *event) {
> +   CLOVER_NOT_SUPPORTED_BY("1.1");
> +   return CL_INVALID_VALUE;
> +}
> +
> +CLOVER_API cl_int
> +clEnqueueFillImage(cl_command_queue command_queue, cl_mem image,
> +   const void *fill_color,
> +   const size_t *origin, const size_t *region,
> +   cl_uint num_events_in_wait_list,
> +   const cl_event *event_wait_list,
> +   cl_event *event) {
> +   CLOVER_NOT_SUPPORTED_BY("1.1");
> +   return CL_INVALID_VALUE;
> +}
> diff --git a/src/gallium/state_trackers/clover/api/program.cpp 
> b/src/gallium/state_trackers/clover/api/program.cpp
> index e9b1f38..0110789 100644
> --- a/src/gallium/state_trackers/clover/api/program.cpp
> +++ b/src/gallium/state_trackers/clover/api/program.cpp
> @@ -231,6 +231,16 @@ clCompileProgram(cl_program d_prog, cl_uint num_devs,
> return e.get();
>  }
>  
> +CLOVER_API cl_program
> +clLinkProgram(cl_context d_ctx, cl_uint num_devs, const cl_device_id *d_devs,
> +  

Re: [Mesa-dev] [PATCH v4] clover: stub missing CL 1.2 functions

2015-08-07 Thread Serge Martin
On Friday 07 August 2015 14:45:56 Francisco Jerez wrote:
> "Serge Martin (EdB)"  writes:
> > As sugested by Tom a long time ago
> > and in order to be able to create Piglit tests
> > 
> > v2:
> > replace NOT_SUPPORTED_BY_CL_1_1 macro with an inline function
> > remove extra space in clLinkProgram arg
> > 
> > v3:
> > use __func__
> > 
> > v4:
> > back to a macro, it make more sense to use it with __func__
> 
> Instead of annoying you with another nit-pick I've squashed the attached
> change into your patch locally -- A warning like "Not supported by X.Y"
> with hard-coded X and Y can easily become stale as soon as we start
> supporting multiple CL versions, it seems more future-proof to just
> say from what version on it will be supported.
> 
> I'll push this with my fix as soon as you confirm you're OK with my
> change.

It's ok for me. Thanks

   EdB

> 
> > ---
> > 
> >  src/gallium/state_trackers/clover/api/dispatch.cpp | 10 -
> >  src/gallium/state_trackers/clover/api/kernel.cpp   |  8 +++
> >  src/gallium/state_trackers/clover/api/memory.cpp   | 25
> >  -- src/gallium/state_trackers/clover/api/program.cpp
> >   | 10 +
> >  src/gallium/state_trackers/clover/api/transfer.cpp | 12 +++
> >  src/gallium/state_trackers/clover/api/util.hpp |  7 ++
> >  6 files changed, 65 insertions(+), 7 deletions(-)
> > 
> > diff --git a/src/gallium/state_trackers/clover/api/dispatch.cpp
> > b/src/gallium/state_trackers/clover/api/dispatch.cpp index
> > b5a4094..f10babe 100644
> > --- a/src/gallium/state_trackers/clover/api/dispatch.cpp
> > +++ b/src/gallium/state_trackers/clover/api/dispatch.cpp
> > @@ -123,12 +123,12 @@ namespace clover {
> > 
> >clCreateImage,
> >clCreateProgramWithBuiltInKernels,
> >clCompileProgram,
> > 
> > -  NULL, // clLinkProgram
> > +  clLinkProgram,
> > 
> >clUnloadPlatformCompiler,
> > 
> > -  NULL, // clGetKernelArgInfo
> > -  NULL, // clEnqueueFillBuffer
> > -  NULL, // clEnqueueFillImage
> > -  NULL, // clEnqueueMigrateMemObjects
> > +  clGetKernelArgInfo,
> > +  clEnqueueFillBuffer,
> > +  clEnqueueFillImage,
> > +  clEnqueueMigrateMemObjects,
> > 
> >clEnqueueMarkerWithWaitList,
> >clEnqueueBarrierWithWaitList,
> >NULL, // clGetExtensionFunctionAddressForPlatform
> > 
> > diff --git a/src/gallium/state_trackers/clover/api/kernel.cpp
> > b/src/gallium/state_trackers/clover/api/kernel.cpp index 857a152..de3e300
> > 100644
> > --- a/src/gallium/state_trackers/clover/api/kernel.cpp
> > +++ b/src/gallium/state_trackers/clover/api/kernel.cpp
> > @@ -189,6 +189,14 @@ clGetKernelWorkGroupInfo(cl_kernel d_kern,
> > cl_device_id d_dev,> 
> > return CL_INVALID_DEVICE;
> >  
> >  }
> > 
> > +CLOVER_API cl_int
> > +clGetKernelArgInfo(cl_kernel d_kern,
> > +   cl_uint idx, cl_kernel_arg_info param,
> > +   size_t size, void *r_buf, size_t *r_size) {
> > +   CLOVER_NOT_SUPPORTED_BY("1.1");
> > +   return CL_KERNEL_ARG_INFO_NOT_AVAILABLE;
> > +}
> > +
> > 
> >  namespace {
> >  
> > ///
> > /// Common argument checking shared by kernel invocation commands.
> > 
> > diff --git a/src/gallium/state_trackers/clover/api/memory.cpp
> > b/src/gallium/state_trackers/clover/api/memory.cpp index 3ff6ba0..42e8c30
> > 100644
> > --- a/src/gallium/state_trackers/clover/api/memory.cpp
> > +++ b/src/gallium/state_trackers/clover/api/memory.cpp
> > @@ -358,8 +358,29 @@ clCreateImage(cl_context d_ctx, cl_mem_flags flags,
> > 
> >const cl_image_desc *image_desc,
> >void *host_ptr, cl_int *r_errcode) {
> > 
> > // This function was added in OpenCL 1.2
> > 
> > -   std::cerr << "CL user error: clCreateImage() not supported by OpenCL
> > 1.1." << -std::endl;
> > +   CLOVER_NOT_SUPPORTED_BY("1.1");
> > 
> > ret_error(r_errcode, CL_INVALID_OPERATION);
> > return NULL;
> >  
> >  }
> > 
> > +
> > +CLOVER_API cl_int
> > +clEnqueueFillBuffer(cl_command_queue command_queue, cl_mem buffer,
> > +const void *pattern, size_t pattern_size,
> > +size_t offset, size_t size,
> > +cl_uint num_events_in_wait_list,
> > +const cl_event *event_wait_list,
> > +cl_event *event) {
> > +   CLOVER_NOT_SUPPORTED_BY("1.1");
> > +   return CL_INVALID_VALUE;
> > +}
> > +
> > +CLOVER_API cl_int
> > +clEnqueueFillImage(cl_command_queue command_queue, cl_mem image,
> > +   const void *fill_color,
> > +   const size_t *origin, const size_t *region,
> > +   cl_uint num_events_in_wait_list,
> > +   const cl_event *event_wait_list,
> > +   cl_event *event) {
> > +   CLOVER_NOT_SUPPORTED_BY("1.1");
> > +   return CL_INVALID_VALUE;
> > +}
> > diff --git a/src/gallium/state_trackers/clover/api/program.cpp
> > b/src/gallium/state_t

[Mesa-dev] clover: image resource management

2015-08-07 Thread Zoltán Gilián
Hi!

I've noticed that clover::kernel::image_wr_argument::unbind destroys
the surface. What kind of behaviour is expected from the driver to
allow later access to the resource (e.g. by clEnqueueReadImage)?

Zoltan
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] clover: image resource management

2015-08-07 Thread Francisco Jerez
Zoltán Gilián  writes:

> Hi!
>
> I've noticed that clover::kernel::image_wr_argument::unbind destroys
> the surface. What kind of behaviour is expected from the driver to
> allow later access to the resource (e.g. by clEnqueueReadImage)?

It only destroys the cached view into the image, of course the resource
itself is not destroyed.  clEnqueueReadImage will map the resource
directly so it doesn't need access to the pipe_surface at this point.

>
> Zoltan


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] util: Rename PURE to ATTRIBUTE_PURE.

2015-08-07 Thread Jose Fonseca
To avoid collission with windows.h's PURE macro.

We could consider eventually renaming to __pure, but that would require
further care, so it's left to the future.
---
 src/util/macros.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/util/macros.h b/src/util/macros.h
index 5c5c92e..84e4f18 100644
--- a/src/util/macros.h
+++ b/src/util/macros.h
@@ -145,9 +145,9 @@ do {   \
  * return value.  As a result, calls to it can be dead code eliminated.
  */
 #ifdef HAVE_FUNC_ATTRIBUTE_PURE
-#define PURE __attribute__((__pure__))
+#define ATTRIBUTE_PURE __attribute__((__pure__))
 #else
-#define PURE
+#define ATTRIBUTE_PURE
 #endif
 
 #ifdef __cplusplus
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Intel-gfx] [PATCH libdrm v3 1/2] intel: Add EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag

2015-08-07 Thread Michel Thierry

On 8/7/2015 11:56 AM, Michał Winiarski wrote:

On Fri, Aug 07, 2015 at 10:45:21AM +0100, Michel Thierry wrote:

Gen8+ supports 48-bit virtual addresses, but some objects must always be
allocated inside the 32-bit address range.

In specific, any resource used with flat/heapless (0x-0xf000)
General State Heap (GSH) or Instruction State Heap (ISH) must be in a
32-bit range, because the General State Offset and Instruction State Offset
are limited to 32-bits.

The i915 driver has been modified to provide a flag to set when the 4GB
limit is not necessary in a given bo (EXEC_OBJECT_SUPPORTS_48B_ADDRESS).
48-bit range will only be used when explicitly requested.

Calls to the new drm_intel_bo_emit_reloc_48bit function will have this flag
set automatically, while calls to drm_intel_bo_emit_reloc will clear it.

v2: Make set/clear functions nops on pre-gen8 platforms, and use them
 internally in emit_reloc functions (Ben)
 s/48BADDRESS/48B_ADDRESS/ (Dave)
v3: Keep set/clear functions internal, no-one needs to use them directly.

References: 
http://lists.freedesktop.org/archives/intel-gfx/2015-July/072612.html
Cc: Ben Widawsky 
Cc: Dave Gordon 
Signed-off-by: Michel Thierry 
---
  include/drm/i915_drm.h|  3 ++-
  intel/intel_bufmgr.c  | 16 ++
  intel/intel_bufmgr.h  |  6 +-
  intel/intel_bufmgr_gem.c  | 54 +++
  intel/intel_bufmgr_priv.h | 11 ++
  5 files changed, 84 insertions(+), 6 deletions(-)

diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index ded43b1..426b25c 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -680,7 +680,8 @@ struct drm_i915_gem_exec_object2 {
  #define EXEC_OBJECT_NEEDS_FENCE (1<<0)
  #define EXEC_OBJECT_NEEDS_GTT (1<<1)
  #define EXEC_OBJECT_WRITE (1<<2)
-#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_WRITE<<1)
+#define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
+#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48B_ADDRESS<<1)
__u64 flags;

__u64 rsvd1;
diff --git a/intel/intel_bufmgr.c b/intel/intel_bufmgr.c
index 14ea9f9..0bd5191 100644
--- a/intel/intel_bufmgr.c
+++ b/intel/intel_bufmgr.c
@@ -202,6 +202,22 @@ drm_intel_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
drm_intel_bo *target_bo, uint32_t target_offset,
uint32_t read_domains, uint32_t write_domain)
  {
+   if (bo->bufmgr->bo_clear_supports_48b_address)
+   bo->bufmgr->bo_clear_supports_48b_address(target_bo);


This is always true - you assign func to this func pointer unconditionally.
Also - why not return some meaningful value if the user does not have
enable_ppgtt=3 set? You can get that from I915_PARAM_HAS_ALIASING_PPGTT right
now. Check for gen >= 8 seems rather inadequate, and even then you're not
returning anything useful to the caller.


+
+   return bo->bufmgr->bo_emit_reloc(bo, offset,
+target_bo, target_offset,
+read_domains, write_domain);
+}
+


Using emit_reloc to set a BO flag seems confusing and can be error prone,
emit_reloc can be called many times and caller needs to be careful and call the
right function for each reloc.


+int
+drm_intel_bo_emit_reloc_48bit(drm_intel_bo *bo, uint32_t offset,
+   drm_intel_bo *target_bo, uint32_t target_offset,
+   uint32_t read_domains, uint32_t write_domain)
+{
+   if (bo->bufmgr->bo_set_supports_48b_address)
+   bo->bufmgr->bo_set_supports_48b_address(target_bo);


Same situation as with clear_supports_48b_address.


+
return bo->bufmgr->bo_emit_reloc(bo, offset,
 target_bo, target_offset,
 read_domains, write_domain);
diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
index 285919e..8f91ffe 100644
--- a/intel/intel_bufmgr.h
+++ b/intel/intel_bufmgr.h
@@ -87,7 +87,8 @@ struct _drm_intel_bo {
/**
 * Last seen card virtual address (offset from the beginning of the
 * aperture) for the object.  This should be used to fill relocation
-* entries when calling drm_intel_bo_emit_reloc()
+* entries when calling drm_intel_bo_emit_reloc() or
+* drm_intel_bo_emit_reloc_48bit()
 */
uint64_t offset64;
  };
@@ -147,6 +148,9 @@ int drm_intel_bufmgr_check_aperture_space(drm_intel_bo ** 
bo_array, int count);
  int drm_intel_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
drm_intel_bo *target_bo, uint32_t target_offset,
uint32_t read_domains, uint32_t write_domain);
+int drm_intel_bo_emit_reloc_48bit(drm_intel_bo *bo, uint32_t offset,
+ drm_intel_bo *target_bo, uint32_t 
target_offset,
+ uint32_t read_domains, uint32_t write_domain);
  int drm_intel_bo_e

[Mesa-dev] [PATCH mesa v3] i965/gen8+: bo in state base address must be in 32-bit address range

2015-08-07 Thread Michel Thierry
Gen8+ supports 48-bit virtual addresses, but some objects must always be
allocated inside the 32-bit address range.

In specific, any resource used with flat/heapless (0x-0xf000)
General State Heap or Intruction State Heap must be in a 32-bit range
(GSH / ISH), because the General State Offset and Instruction State Offset
are limited to 32-bits.

Use drm_intel_bo_emit_reloc_48bit when the 4GB limit is not necessary, and
the bo can be in the full address space.

This commit introduces a dependency of libdrm 2.4.63, which introduces the
drm_intel_bo_emit_reloc_48bit function.

v2: s/48baddress/48b_address/,
Only use in OUT_RELOC64 cases, OUT_RELOC implies a 32-bit address offset
is needed (Ben)
v3: Added OUT_RELOC64_INSIDE_4G, so it stands out when a 64-bit relocation
needs the 32-bit workaround (Chris)

References: 
http://lists.freedesktop.org/archives/intel-gfx/2015-July/072612.html
Cc: Ben Widawsky 
Cc: Chris Wilson 
Signed-off-by: Michel Thierry 
---
 configure.ac  |  2 +-
 src/mesa/drivers/dri/i965/gen8_misc_state.c   | 19 +++
 src/mesa/drivers/dri/i965/intel_batchbuffer.c | 20 
 src/mesa/drivers/dri/i965/intel_batchbuffer.h | 10 --
 4 files changed, 36 insertions(+), 15 deletions(-)

diff --git a/configure.ac b/configure.ac
index af61aa2..c92ca44 100644
--- a/configure.ac
+++ b/configure.ac
@@ -68,7 +68,7 @@ AC_SUBST([OSMESA_VERSION])
 dnl Versions for external dependencies
 LIBDRM_REQUIRED=2.4.38
 LIBDRM_RADEON_REQUIRED=2.4.56
-LIBDRM_INTEL_REQUIRED=2.4.60
+LIBDRM_INTEL_REQUIRED=2.4.63
 LIBDRM_NVVIEUX_REQUIRED=2.4.33
 LIBDRM_NOUVEAU_REQUIRED="2.4.33 libdrm >= 2.4.41"
 LIBDRM_FREEDRENO_REQUIRED=2.4.57
diff --git a/src/mesa/drivers/dri/i965/gen8_misc_state.c 
b/src/mesa/drivers/dri/i965/gen8_misc_state.c
index b20038e..73eba06 100644
--- a/src/mesa/drivers/dri/i965/gen8_misc_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_misc_state.c
@@ -28,6 +28,10 @@
 
 /**
  * Define the base addresses which some state is referenced from.
+ *
+ * Use OUT_RELOC64_INSIDE_4G instead of OUT_RELOC64, the General State
+ * Offset and Instruction State Offset are limited to 32-bits by hardware,
+ * and must be located in the first 4GBs (32-bit offset).
  */
 void gen8_upload_state_base_address(struct brw_context *brw)
 {
@@ -41,19 +45,18 @@ void gen8_upload_state_base_address(struct brw_context *brw)
OUT_BATCH(0);
OUT_BATCH(mocs_wb << 16);
/* Surface state base address: */
-   OUT_RELOC64(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
-   mocs_wb << 4 | 1);
+   OUT_RELOC64_INSIDE_4G(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
+ mocs_wb << 4 | 1);
/* Dynamic state base address: */
-   OUT_RELOC64(brw->batch.bo,
-   I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, 0,
-   mocs_wb << 4 | 1);
+   OUT_RELOC64_INSIDE_4G(brw->batch.bo,
+ I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, 
0,
+ mocs_wb << 4 | 1);
/* Indirect object base address: MEDIA_OBJECT data */
OUT_BATCH(mocs_wb << 4 | 1);
OUT_BATCH(0);
/* Instruction base address: shader kernels (incl. SIP) */
-   OUT_RELOC64(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
-   mocs_wb << 4 | 1);
-
+   OUT_RELOC64_INSIDE_4G(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+ mocs_wb << 4 | 1);
/* General state buffer size */
OUT_BATCH(0xf001);
/* Dynamic state buffer size */
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c 
b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index 54081a1..ca90784 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -409,11 +409,23 @@ bool
 intel_batchbuffer_emit_reloc64(struct brw_context *brw,
drm_intel_bo *buffer,
uint32_t read_domains, uint32_t write_domain,
-  uint32_t delta)
+   uint32_t delta,
+   bool support_48bit_offset)
 {
-   int ret = drm_intel_bo_emit_reloc(brw->batch.bo, 4*brw->batch.used,
- buffer, delta,
- read_domains, write_domain);
+   int ret;
+
+   /* Not all buffers can be allocated outside the first 4GB, and
+* offset must be limited to 32-bits.
+*/
+   if (support_48bit_offset)
+  drm_intel_bo_emit_reloc_48bit(brw->batch.bo, 4*brw->batch.used,
+buffer, delta,
+read_domains, write_domain);
+   else
+  drm_intel_bo_emit_reloc(brw->batch.bo, 4*brw->batch.used,
+  buffer, delta,
+  read_domains, write_domain);
+
assert(ret == 0);
(void) ret;
 
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.h 
b/src/

[Mesa-dev] [PATCH libdrm v3 1/2] intel: Add EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag

2015-08-07 Thread Michel Thierry
Gen8+ supports 48-bit virtual addresses, but some objects must always be
allocated inside the 32-bit address range.

In specific, any resource used with flat/heapless (0x-0xf000)
General State Heap (GSH) or Instruction State Heap (ISH) must be in a
32-bit range, because the General State Offset and Instruction State Offset
are limited to 32-bits.

The i915 driver has been modified to provide a flag to set when the 4GB
limit is not necessary in a given bo (EXEC_OBJECT_SUPPORTS_48B_ADDRESS).
48-bit range will only be used when explicitly requested.

Calls to the new drm_intel_bo_emit_reloc_48bit function will have this flag
set automatically, while calls to drm_intel_bo_emit_reloc will clear it.

v2: Make set/clear functions nops on pre-gen8 platforms, and use them
internally in emit_reloc functions (Ben)
s/48BADDRESS/48B_ADDRESS/ (Dave)
v3: Keep set/clear functions internal, no-one needs to use them directly.

References: 
http://lists.freedesktop.org/archives/intel-gfx/2015-July/072612.html
Cc: Ben Widawsky 
Cc: Dave Gordon 
Signed-off-by: Michel Thierry 
---
 include/drm/i915_drm.h|  3 ++-
 intel/intel_bufmgr.c  | 16 ++
 intel/intel_bufmgr.h  |  6 +-
 intel/intel_bufmgr_gem.c  | 54 +++
 intel/intel_bufmgr_priv.h | 11 ++
 5 files changed, 84 insertions(+), 6 deletions(-)

diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index ded43b1..426b25c 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -680,7 +680,8 @@ struct drm_i915_gem_exec_object2 {
 #define EXEC_OBJECT_NEEDS_FENCE (1<<0)
 #define EXEC_OBJECT_NEEDS_GTT  (1<<1)
 #define EXEC_OBJECT_WRITE  (1<<2)
-#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_WRITE<<1)
+#define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
+#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48B_ADDRESS<<1)
__u64 flags;
 
__u64 rsvd1;
diff --git a/intel/intel_bufmgr.c b/intel/intel_bufmgr.c
index 14ea9f9..0bd5191 100644
--- a/intel/intel_bufmgr.c
+++ b/intel/intel_bufmgr.c
@@ -202,6 +202,22 @@ drm_intel_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
drm_intel_bo *target_bo, uint32_t target_offset,
uint32_t read_domains, uint32_t write_domain)
 {
+   if (bo->bufmgr->bo_clear_supports_48b_address)
+   bo->bufmgr->bo_clear_supports_48b_address(target_bo);
+
+   return bo->bufmgr->bo_emit_reloc(bo, offset,
+target_bo, target_offset,
+read_domains, write_domain);
+}
+
+int
+drm_intel_bo_emit_reloc_48bit(drm_intel_bo *bo, uint32_t offset,
+   drm_intel_bo *target_bo, uint32_t target_offset,
+   uint32_t read_domains, uint32_t write_domain)
+{
+   if (bo->bufmgr->bo_set_supports_48b_address)
+   bo->bufmgr->bo_set_supports_48b_address(target_bo);
+
return bo->bufmgr->bo_emit_reloc(bo, offset,
 target_bo, target_offset,
 read_domains, write_domain);
diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
index 285919e..8f91ffe 100644
--- a/intel/intel_bufmgr.h
+++ b/intel/intel_bufmgr.h
@@ -87,7 +87,8 @@ struct _drm_intel_bo {
/**
 * Last seen card virtual address (offset from the beginning of the
 * aperture) for the object.  This should be used to fill relocation
-* entries when calling drm_intel_bo_emit_reloc()
+* entries when calling drm_intel_bo_emit_reloc() or
+* drm_intel_bo_emit_reloc_48bit()
 */
uint64_t offset64;
 };
@@ -147,6 +148,9 @@ int drm_intel_bufmgr_check_aperture_space(drm_intel_bo ** 
bo_array, int count);
 int drm_intel_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
drm_intel_bo *target_bo, uint32_t target_offset,
uint32_t read_domains, uint32_t write_domain);
+int drm_intel_bo_emit_reloc_48bit(drm_intel_bo *bo, uint32_t offset,
+ drm_intel_bo *target_bo, uint32_t 
target_offset,
+ uint32_t read_domains, uint32_t write_domain);
 int drm_intel_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset,
  drm_intel_bo *target_bo,
  uint32_t target_offset,
diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
index 41de396..713ed4e 100644
--- a/intel/intel_bufmgr_gem.c
+++ b/intel/intel_bufmgr_gem.c
@@ -140,6 +140,7 @@ typedef struct _drm_intel_bufmgr_gem {
 } drm_intel_bufmgr_gem;
 
 #define DRM_INTEL_RELOC_FENCE (1<<0)
+#define DRM_INTEL_RELOC_SUPPORTS_48B_ADDRESS (2<<0)
 
 typedef struct _drm_intel_reloc_target_info {
drm_intel_bo *bo;
@@ -237,6 +238,14 @@ struct _drm_intel_bo_gem {
bool is_userptr;
 
/**
+* Boolean of whether this buffer can be

Re: [Mesa-dev] [Intel-gfx] [PATCH libdrm v3 1/2] intel: Add EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag

2015-08-07 Thread Michał Winiarski
On Fri, Aug 07, 2015 at 10:45:21AM +0100, Michel Thierry wrote:
> Gen8+ supports 48-bit virtual addresses, but some objects must always be
> allocated inside the 32-bit address range.
> 
> In specific, any resource used with flat/heapless (0x-0xf000)
> General State Heap (GSH) or Instruction State Heap (ISH) must be in a
> 32-bit range, because the General State Offset and Instruction State Offset
> are limited to 32-bits.
> 
> The i915 driver has been modified to provide a flag to set when the 4GB
> limit is not necessary in a given bo (EXEC_OBJECT_SUPPORTS_48B_ADDRESS).
> 48-bit range will only be used when explicitly requested.
> 
> Calls to the new drm_intel_bo_emit_reloc_48bit function will have this flag
> set automatically, while calls to drm_intel_bo_emit_reloc will clear it.
> 
> v2: Make set/clear functions nops on pre-gen8 platforms, and use them
> internally in emit_reloc functions (Ben)
> s/48BADDRESS/48B_ADDRESS/ (Dave)
> v3: Keep set/clear functions internal, no-one needs to use them directly.
> 
> References: 
> http://lists.freedesktop.org/archives/intel-gfx/2015-July/072612.html
> Cc: Ben Widawsky 
> Cc: Dave Gordon 
> Signed-off-by: Michel Thierry 
> ---
>  include/drm/i915_drm.h|  3 ++-
>  intel/intel_bufmgr.c  | 16 ++
>  intel/intel_bufmgr.h  |  6 +-
>  intel/intel_bufmgr_gem.c  | 54 
> +++
>  intel/intel_bufmgr_priv.h | 11 ++
>  5 files changed, 84 insertions(+), 6 deletions(-)
> 
> diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
> index ded43b1..426b25c 100644
> --- a/include/drm/i915_drm.h
> +++ b/include/drm/i915_drm.h
> @@ -680,7 +680,8 @@ struct drm_i915_gem_exec_object2 {
>  #define EXEC_OBJECT_NEEDS_FENCE (1<<0)
>  #define EXEC_OBJECT_NEEDS_GTT(1<<1)
>  #define EXEC_OBJECT_WRITE(1<<2)
> -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_WRITE<<1)
> +#define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
> +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48B_ADDRESS<<1)
>   __u64 flags;
>  
>   __u64 rsvd1;
> diff --git a/intel/intel_bufmgr.c b/intel/intel_bufmgr.c
> index 14ea9f9..0bd5191 100644
> --- a/intel/intel_bufmgr.c
> +++ b/intel/intel_bufmgr.c
> @@ -202,6 +202,22 @@ drm_intel_bo_emit_reloc(drm_intel_bo *bo, uint32_t 
> offset,
>   drm_intel_bo *target_bo, uint32_t target_offset,
>   uint32_t read_domains, uint32_t write_domain)
>  {
> + if (bo->bufmgr->bo_clear_supports_48b_address)
> + bo->bufmgr->bo_clear_supports_48b_address(target_bo);

This is always true - you assign func to this func pointer unconditionally.
Also - why not return some meaningful value if the user does not have
enable_ppgtt=3 set? You can get that from I915_PARAM_HAS_ALIASING_PPGTT right
now. Check for gen >= 8 seems rather inadequate, and even then you're not
returning anything useful to the caller.

> +
> + return bo->bufmgr->bo_emit_reloc(bo, offset,
> +  target_bo, target_offset,
> +  read_domains, write_domain);
> +}
> +

Using emit_reloc to set a BO flag seems confusing and can be error prone,
emit_reloc can be called many times and caller needs to be careful and call the
right function for each reloc.

> +int
> +drm_intel_bo_emit_reloc_48bit(drm_intel_bo *bo, uint32_t offset,
> + drm_intel_bo *target_bo, uint32_t target_offset,
> + uint32_t read_domains, uint32_t write_domain)
> +{
> + if (bo->bufmgr->bo_set_supports_48b_address)
> + bo->bufmgr->bo_set_supports_48b_address(target_bo);

Same situation as with clear_supports_48b_address.

> +
>   return bo->bufmgr->bo_emit_reloc(bo, offset,
>target_bo, target_offset,
>read_domains, write_domain);
> diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
> index 285919e..8f91ffe 100644
> --- a/intel/intel_bufmgr.h
> +++ b/intel/intel_bufmgr.h
> @@ -87,7 +87,8 @@ struct _drm_intel_bo {
>   /**
>* Last seen card virtual address (offset from the beginning of the
>* aperture) for the object.  This should be used to fill relocation
> -  * entries when calling drm_intel_bo_emit_reloc()
> +  * entries when calling drm_intel_bo_emit_reloc() or
> +  * drm_intel_bo_emit_reloc_48bit()
>*/
>   uint64_t offset64;
>  };
> @@ -147,6 +148,9 @@ int drm_intel_bufmgr_check_aperture_space(drm_intel_bo ** 
> bo_array, int count);
>  int drm_intel_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
>   drm_intel_bo *target_bo, uint32_t target_offset,
>   uint32_t read_domains, uint32_t write_domain);
> +int drm_intel_bo_emit_reloc_48bit(drm_intel_bo *bo, uint32_t offset,
> +   drm_intel_bo *target_bo, uint32_t 
> target_offset,
> +

[Mesa-dev] [PATCH libdrm v3 2/2] intel: add new function name to symbol-check test

2015-08-07 Thread Michel Thierry
Signed-off-by: Michel Thierry 
---
 intel/intel-symbol-check | 1 +
 1 file changed, 1 insertion(+)

diff --git a/intel/intel-symbol-check b/intel/intel-symbol-check
index c555e6d..6f8450b 100755
--- a/intel/intel-symbol-check
+++ b/intel/intel-symbol-check
@@ -18,6 +18,7 @@ drm_intel_bo_busy
 drm_intel_bo_disable_reuse
 drm_intel_bo_emit_reloc
 drm_intel_bo_emit_reloc_fence
+drm_intel_bo_emit_reloc_48bit
 drm_intel_bo_exec
 drm_intel_bo_fake_alloc_static
 drm_intel_bo_fake_disable_backing_store
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH libdrm v3 0/2] 48-bit virtual address support in i915

2015-08-07 Thread Michel Thierry
48-bit virtual address range will be enabled in i915 soon, but some objects
must be referenced by 32-bit offsets. These patches use a new kernel flag to
specify if this restriction applies or not.

I'm sending these patches to comply with the i915 merge process.
Once the kernel patch is merged, I'll make a new libdrm release and address
the mesa build dependency.

Michel Thierry (2):
  intel: Add EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag
  intel: add new function name to symbol-check test

 include/drm/i915_drm.h|  3 ++-
 intel/intel-symbol-check  |  1 +
 intel/intel_bufmgr.c  | 16 ++
 intel/intel_bufmgr.h  |  6 +-
 intel/intel_bufmgr_gem.c  | 54 +++
 intel/intel_bufmgr_priv.h | 11 ++
 6 files changed, 85 insertions(+), 6 deletions(-)

-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC libdrm] intel: 48b ppgtt support

2015-08-07 Thread Michał Winiarski
---
 include/drm/i915_drm.h|  3 ++-
 intel/intel_bufmgr.c  | 11 +++
 intel/intel_bufmgr.h  |  1 +
 intel/intel_bufmgr_gem.c  | 43 +--
 intel/intel_bufmgr_priv.h |  8 
 5 files changed, 55 insertions(+), 11 deletions(-)

diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index ded43b1..426b25c 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -680,7 +680,8 @@ struct drm_i915_gem_exec_object2 {
 #define EXEC_OBJECT_NEEDS_FENCE (1<<0)
 #define EXEC_OBJECT_NEEDS_GTT  (1<<1)
 #define EXEC_OBJECT_WRITE  (1<<2)
-#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_WRITE<<1)
+#define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
+#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48B_ADDRESS<<1)
__u64 flags;
 
__u64 rsvd1;
diff --git a/intel/intel_bufmgr.c b/intel/intel_bufmgr.c
index 14ea9f9..97ea6ec 100644
--- a/intel/intel_bufmgr.c
+++ b/intel/intel_bufmgr.c
@@ -261,6 +261,17 @@ drm_intel_bo_get_tiling(drm_intel_bo *bo, uint32_t * 
tiling_mode,
 }
 
 int
+drm_intel_bo_use_full_range(drm_intel_bo *bo, uint32_t enable)
+{
+   if (bo->bufmgr->bo_use_full_range) {
+   bo->bufmgr->bo_use_full_range(bo, enable);
+   return 0;
+   }
+
+   return -ENODEV;
+}
+
+int
 drm_intel_bo_disable_reuse(drm_intel_bo *bo)
 {
if (bo->bufmgr->bo_disable_reuse)
diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
index 285919e..2635fa4 100644
--- a/intel/intel_bufmgr.h
+++ b/intel/intel_bufmgr.h
@@ -160,6 +160,7 @@ int drm_intel_bo_get_tiling(drm_intel_bo *bo, uint32_t * 
tiling_mode,
 int drm_intel_bo_flink(drm_intel_bo *bo, uint32_t * name);
 int drm_intel_bo_busy(drm_intel_bo *bo);
 int drm_intel_bo_madvise(drm_intel_bo *bo, int madv);
+int drm_intel_bo_use_full_range(drm_intel_bo *bo, uint32_t enable);
 
 int drm_intel_bo_disable_reuse(drm_intel_bo *bo);
 int drm_intel_bo_is_reusable(drm_intel_bo *bo);
diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
index 41de396..ef71686 100644
--- a/intel/intel_bufmgr_gem.c
+++ b/intel/intel_bufmgr_gem.c
@@ -237,6 +237,11 @@ struct _drm_intel_bo_gem {
bool is_userptr;
 
/**
+* Whether this buffer can be placed in full (2^48) ppgtt range on gen8+
+*/
+   bool uses_full_range;
+
+   /**
 * Size in bytes of this buffer and its relocation descendents.
 *
 * Used to avoid costly tree walking in
@@ -395,8 +400,8 @@ drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem 
*bufmgr_gem)
drm_intel_bo_gem *target_gem =
(drm_intel_bo_gem *) target_bo;
 
-   DBG("%2d: %d (%s)@0x%08llx -> "
-   "%d (%s)@0x%08lx + 0x%08x\n",
+   DBG("%2d: %d (%s)@0x%016llx -> "
+   "%d (%s)@0x%016lx + 0x%08x\n",
i,
bo_gem->gem_handle, bo_gem->name,
(unsigned long long)bo_gem->relocs[j].offset,
@@ -468,11 +473,15 @@ drm_intel_add_validate_buffer2(drm_intel_bo *bo, int 
need_fence)
drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
int index;
+   int flags = 0;
+
+   if (need_fence)
+   flags |= EXEC_OBJECT_NEEDS_FENCE;
+   if (bo_gem->uses_full_range)
+   flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
 
if (bo_gem->validate_index != -1) {
-   if (need_fence)
-   bufmgr_gem->exec2_objects[bo_gem->validate_index].flags 
|=
-   EXEC_OBJECT_NEEDS_FENCE;
+   bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |= 
flags;
return;
}
 
@@ -501,13 +510,9 @@ drm_intel_add_validate_buffer2(drm_intel_bo *bo, int 
need_fence)
bufmgr_gem->exec2_objects[index].alignment = bo->align;
bufmgr_gem->exec2_objects[index].offset = 0;
bufmgr_gem->exec_bos[index] = bo;
-   bufmgr_gem->exec2_objects[index].flags = 0;
+   bufmgr_gem->exec2_objects[index].flags = flags;
bufmgr_gem->exec2_objects[index].rsvd1 = 0;
bufmgr_gem->exec2_objects[index].rsvd2 = 0;
-   if (need_fence) {
-   bufmgr_gem->exec2_objects[index].flags |=
-   EXEC_OBJECT_NEEDS_FENCE;
-   }
bufmgr_gem->exec_count++;
 }
 
@@ -780,6 +785,7 @@ retry:
bo_gem->used_as_reloc_target = false;
bo_gem->has_error = false;
bo_gem->reusable = true;
+   bo_gem->uses_full_range = false;
 
drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, alignment);
 
@@ -926,6 +932,7 @@ drm_intel_gem_bo_alloc_userptr(drm_intel_bufmgr *bufmgr,
bo_gem->used_as_reloc_target = false;
bo_gem->has_error = false;
bo_gem->reusable = false;
+   bo_gem->uses_full_range = fa

Re: [Mesa-dev] [PATCH] util: Rename PURE to ATTRIBUTE_PURE.

2015-08-07 Thread Brian Paul

On 08/07/2015 06:10 AM, Jose Fonseca wrote:

To avoid collission with windows.h's PURE macro.

We could consider eventually renaming to __pure, but that would require
further care, so it's left to the future.
---
  src/util/macros.h | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/util/macros.h b/src/util/macros.h
index 5c5c92e..84e4f18 100644
--- a/src/util/macros.h
+++ b/src/util/macros.h
@@ -145,9 +145,9 @@ do {   \
   * return value.  As a result, calls to it can be dead code eliminated.
   */
  #ifdef HAVE_FUNC_ATTRIBUTE_PURE
-#define PURE __attribute__((__pure__))
+#define ATTRIBUTE_PURE __attribute__((__pure__))
  #else
-#define PURE
+#define ATTRIBUTE_PURE
  #endif

  #ifdef __cplusplus



Looks OK to me.  AFAICT, we're not using the macro anywhere yet.

Reviewed-by: Brian Paul 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/4] tnl: Maintain the _WindowMap matrix in TNLcontext v2.

2015-08-07 Thread Ilia Mirkin
On Fri, Aug 7, 2015 at 2:46 AM, Mathias Fröhlich 
wrote:

>
>
> Hi,
>
>
>
> On Thursday, August 06, 2015 12:32:18 Ilia Mirkin wrote:
>
> > > @@ -182,6 +187,13 @@ _tnl_InvalidateState( struct gl_context *ctx,
> GLuint new_state )
>
> > > }
>
> > > }
>
> > > }
>
> > > +
>
> > > + if (new_state & (_NEW_VIEWPORT | _NEW_BUFFERS)) {
>
> > > + double scale[3], translate[3];
>
> > > + _mesa_get_viewport_xform(ctx, 0, scale, translate);
>
> > > + _math_matrix_viewport(&tnl->_WindowMap, scale, translate,
>
> > > + ctx->DrawBuffer->_DepthMaxF);
>
> >
>
> > This appears to crash nouveau_vieux on startup. See
>
> > https://bugs.freedesktop.org/show_bug.cgi?id=91570 . I suspect that
>
> > ctx->DrawBuffer is null. What's the proper way to handle that
>
> > situation -- predicate on a drawbuffer being bound? Or make sure that
>
> > something is bound to the drawbuffer before we call tnl_wakeup?
>
> >
>
>
>
> It looks like nouveau_vieux uses _tnl_InvalidateState for early
> initialization.
>
> I assume that _mesa_make_current is called later on to set the DrawBuffer
>
> and there the _NEW_BUFFERS flag is set. So, when _tnl_InvalidateState is
> called
>
> while validating state before the first draw and past a bound draw buffer,
> we should
>
> get the desired update on the _WindowMap matrix.
>
> So just checking for the presence of the DrawBuffer before grabbing data
>
> from it seems to be safe.
>
>
>
> The aim of the change was to move the _WindowMap matrix out of the non tnl
> drivers
>
> and track it in the tnl module where it is exclusively used today.
>
> At the higher mesa level we did/do not invalidate DrawBuffer related state
> that early.
>
>
>
> One alternative I can see is to call _tnl_InvalidateState without the
> _NEW_VIEWPORT and
>
> _NEW_BUFFERS flag set instead of _tnl_wakeup in the two nouveau early
> initialization
>
> code paths in nouveau_swtnl_t.c and nv04_render.c. That should also bring
> us back to
>
> the original behavior.
>

That seems like a bit of a hack. If you think it's OK, I'm just going to
introduce the DrawBuffer check. Brian, does that seem OK to you? I'm mostly
unfamiliar with the TnL module, its APIs, and when/how they're supposed to
be used. But nouveau_vieux calls tnl_wakeup right in its context creation
pass. nv04/nv05 don't have hwtnl at all, nv10+ have it, but it has various
limitations, with which we end up using swtnl.

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] New stable-branch 10.6 candidate pushed

2015-08-07 Thread Emil Velikov
Hello list,

The candidate for the Mesa 10.6.4 is now available. Currently we have:
 - 29 queued
 - 22 nominated (outstanding)
 - and 2 rejected/obsolete patches

The series covers quite a wide range of issues, but it's mostly
consentrated around the GL implementation - both dri drivers and 
the EGL loader.

I would like to kindly remind people that a sizeable hunk of the 
nominatations are short on review or have not been updated based
on the review comments.

Take a look at section "Mesa stable queue" for more information.

Testing
---
The following results are against piglit 246791c51ec.


Changes - classic i965(snb)
---
Fixes:
 - spec
+ !opengl 1.1
   + getteximage-luminancefail > pass


Changes - swrast classic

None.


Changes - gallium softpipe
--
None.


Changes - gallium llvmpipe (LLVM 3.6.2)
---
Fixes:
 - spec
+ arb_texture_buffer_range
   + ranges-2 fail > pass


Testing reports/general approval

Any testing reports (or general approval of the state of the branch)
will be greatly appreciated.


Trivial merge conflicts
---
commit 736f6e16d9989f01cc55dcba15ba978ba90b7748
Author: Francisco Jerez 

i965/fs: Fix fs_inst::regs_read() for sources in the ATTR file.

(cherry picked from commit 42a18ca76057621ae7d8812b29ea2245d6ff282d)

commit 791cf8a025ac0d610596cdfab17fc84b49df2288
Author: Eduardo Lima Mitev 

mesa: Validate target before resolving tex obj in glTex(ture)SubImageXD

(cherry picked from commit 5d64cae8427b090c42d6d38da7fb474b3ddd4eb0)

commit 8f8c842338b11185e8432e4b44e31a85abcbf9c6
Author: Ben Widawsky 

i965/skl: Add production thread counts and URB size

(cherry picked from commit 7eaacc1678195738fab3bb98870828611cae066d)

commit 080c4713bcd4c0c3643b3fb3ede1aa09f891aecf
Author: Marek Olšák 

st/mesa: don't ignore texture buffer state changes

(cherry picked from commit e39ece0d7856d0532a0f011cd5cb17bc85ee82e2)

commit a31dfd91b500735c44bb89e194f02b718299bf2e
Author: Marek Olšák 

radeonsi: upload shader rodata after updating scratch relocations

(cherry picked from commit 50a957c5de842b18e10c361f7b0310aa46bb483f)

commit c33ca1696a0a967091937805fa198ffc3317a03f
Author: Emil Velikov 

configure.ac: do not set HAVE_DRI(23) when libdrm is missing

(cherry picked from commit 16f6d432de07dcb537dafd0c9f3ef7614891ed6b)



The plan is to have 10.6.4 this Monday(10th of August).

If you have any questions or comments that you would like to share
before the release, please go ahead.


Cheers,
Emil


Mesa stable queue
-

Nominated (22)
==

Adam Jackson (1):
  glx: Fix __glXWireToEvent for BufferSwapComplete

Boyan Ding (1):
  i915: Add XRGB format to intel_screen_make_configs

Brian Paul (1):
  configure: don't try to build gallium DRI drivers if --disable-dri is set

Chris Wilson (1):
  i965: Prevent coordinate overflow in intel_emit_linear_blit

Emil Velikov (6):
  mapi: automake: inline glapi_gen_mapi define
  xmlpool: remove LOCALEDIR variable/fix bmake
  vc4: add missing nir include, to fix the build

Frank Binns (1):
  egl/x11: don't abort when creating a DRI2 drawable fails

Jason Ekstrand (1):
  meta/copy_image: Stash off the scissor

Neil Roberts (3):
  i965/bdw: Fix setting the instancing state for the SGVS element
  i965: Swap the order of the vertex ID and edge flag attributes
  i965/bdw: Fix 3DSTATE_VF_INSTANCING when the edge flag is used

Oded Gabbay (2):
  mesa: clear existing swizzle info before bitwise-OR  
  mesa: _mesa_format_convert should be endian agnostic

Rob Clark (1):
  xa: add xa_surface_from_handle2

Timothy Arceri (1):
  glsl: fix atomic buffer index for bindings other than 0

Tom Stellard (3):
  clover: Call clBuildProgram() notification function when build completes 
v2
  gallium/drivers: Add threadsafe wrappers for pipe_context v2
  clover: Use threadsafe wrappers for pipe_context v2



Queued (29)
===

Anuj Phogat (6):
  mesa: Turn get_readpixels_transfer_ops() in to a global function
  meta: Fix transfer operations check in meta pbo path for readpixels
  meta: Abort meta pbo path if readpixels need signed-unsigned conversion
  meta: Don't do fragment color clamping in _mesa_meta_pbo_GetTexSubImage
  mesa: Add a helper function _mesa_need_luminance_to_rgb_conversion()
  meta: Fix reading luminance texture as rgba in 
_mesa_meta_pbo_GetTexSubImage()

Ben Widawsky (1):
  i965/skl: Add production thread counts and URB size

Eduardo Lima Mitev (3):
  mesa: Fix errors values returned by glShaderBinary()
  mesa: Validate target before resolving tex obj in glTex(ture)SubImageXD
  mesa: Fix error ret

[Mesa-dev] [Bug 90264] [Regression, bisected] Tooltip corruption in Chrome

2015-08-07 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=90264

--- Comment #45 from Chris Wilson  ---
Alternatively,

diff --git a/src/glx/dri2_glx.c b/src/glx/dri2_glx.c
index 5767026..01b5c28 100644
--- a/src/glx/dri2_glx.c
+++ b/src/glx/dri2_glx.c
@@ -656,6 +656,8 @@ dri2_wait_x(struct glx_context *gc)
struct dri2_drawable *priv = (struct dri2_drawable *)
   GetGLXDRIDrawable(gc->currentDpy, gc->currentDrawable);

+   dri2InvalidateBuffers(gc->currentDpy, gc->currentDrawable);
+
if (priv == NULL || !priv->have_fake_front)
   return;

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/4] tnl: Maintain the _WindowMap matrix in TNLcontext v2.

2015-08-07 Thread Mathias Fröhlich
Hi,

On Friday, August 07, 2015 10:28:10 Ilia Mirkin wrote:
> > One alternative I can see is to call _tnl_InvalidateState without the
> > _NEW_VIEWPORT and
> >
> > _NEW_BUFFERS flag set instead of _tnl_wakeup in the two nouveau early
> > initialization
> >
> > code paths in nouveau_swtnl_t.c and nv04_render.c. That should also bring
> > us back to
> >
> > the original behavior.
> >
> 
> That seems like a bit of a hack. If you think it's OK, I'm just going to
> introduce the DrawBuffer check. Brian, does that seem OK to you? I'm mostly
> unfamiliar with the TnL module, its APIs, and when/how they're supposed to
> be used. But nouveau_vieux calls tnl_wakeup right in its context creation
> pass. nv04/nv05 don't have hwtnl at all, nv10+ have it, but it has various
> limitations, with which we end up using swtnl.

To be honest, I did not expect the _tnl_InvalidateState function to be called
much differently than through _mesa_update_state where it is usually called
through a driver provided UpdateState callback. If you look where
_tnl_InvalidateState is used you can see this pattern broadly used.
The only exception is that it is also called through _tnl_wakeup which sounds
to me like something that is not meant for initialization purpose. Actually,
only nouveau uses this call for initialization stuff. So, to me the question
looks like: what is the right way to initialize nouveau.
Either way, just skipping this part of _tnl_InvalidateState either by a
pointer check or something else that makes that part of _tnl_InvalidateState
happen only for the succeeding calls, should be safe in terms of crashes as
well as correctness. As I read the calling code we should end up calling
_tnl_InvalidateState a second time past we have a bound a DrawBuffer
and before the first draw through nouveau_update_state. So the matrix should be
properly set when it is needed for the first draw.
The only aspect I can add to that is:
The additional pointer check happens on each Driver::UpdateState call. May
be it's not worth to care for that, but if we customize nouveaus startup
procedure we do not introduce any additional if code in state validation.
But, I also won't seriously object against your patch!

Thanks for taking care of the problem!

Greetings

Mathias___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/4] tnl: Maintain the _WindowMap matrix in TNLcontext v2.

2015-08-07 Thread Ilia Mirkin
On Fri, Aug 7, 2015 at 11:07 AM, Mathias Fröhlich  wrote:

>
>
> Hi,
>
>
>
> On Friday, August 07, 2015 10:28:10 Ilia Mirkin wrote:
>
> > > One alternative I can see is to call _tnl_InvalidateState without the
>
> > > _NEW_VIEWPORT and
>
> > >
>
> > > _NEW_BUFFERS flag set instead of _tnl_wakeup in the two nouveau early
>
> > > initialization
>
> > >
>
> > > code paths in nouveau_swtnl_t.c and nv04_render.c. That should also
> bring
>
> > > us back to
>
> > >
>
> > > the original behavior.
>
> > >
>
> >
>
> > That seems like a bit of a hack. If you think it's OK, I'm just going to
>
> > introduce the DrawBuffer check. Brian, does that seem OK to you? I'm
> mostly
>
> > unfamiliar with the TnL module, its APIs, and when/how they're supposed
> to
>
> > be used. But nouveau_vieux calls tnl_wakeup right in its context creation
>
> > pass. nv04/nv05 don't have hwtnl at all, nv10+ have it, but it has
> various
>
> > limitations, with which we end up using swtnl.
>
>
>
> To be honest, I did not expect the _tnl_InvalidateState function to be
> called
>
> much differently than through _mesa_update_state where it is usually called
>
> through a driver provided UpdateState callback. If you look where
>
> _tnl_InvalidateState is used you can see this pattern broadly used.
>
> The only exception is that it is also called through _tnl_wakeup which
> sounds
>
> to me like something that is not meant for initialization purpose.
> Actually,
>
> only nouveau uses this call for initialization stuff. So, to me the
> question
>
> looks like: what is the right way to initialize nouveau.
>

Yeah, I tend to agree with you -- it seems like _tnl_wakeup is meant for
"oh hey, I shut you down and haven't been sending you updates, but I want
to use you again, so please update your stuff to match reality". Does it
even need to be called at all? nouveau_state.c:nouveau_update_state
unconditionally calls _tnl_InvalidateState. i965, on the other hand, the
only other user of tnl_wakeup, calls it at draw time, for RenderMode !=
GL_RENDER, and never calls _tnl_InvalidateState, so the tnl module is
completely out-of-date there.

So I'm actually in favor of removing it entirely. Thoughts?

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 10/13] hash: Add _mesa_HashRemoveLocked() function.

2015-08-07 Thread Brian Paul

On 08/06/2015 06:11 PM, Matt Turner wrote:

---
  src/mesa/main/hash.c | 19 +++
  src/mesa/main/hash.h |  2 ++
  2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/src/mesa/main/hash.c b/src/mesa/main/hash.c
index 315b5d6..aa1c6a1 100644
--- a/src/mesa/main/hash.c
+++ b/src/mesa/main/hash.c
@@ -328,8 +328,8 @@ _mesa_HashInsert(struct _mesa_HashTable *table, GLuint key, 
void *data)
   * While holding the hash table's lock, searches the entry with the matching
   * key and unlinks it.
   */
-void
-_mesa_HashRemove(struct _mesa_HashTable *table, GLuint key)
+static inline void
+_mesa_HashRemove_unlocked(struct _mesa_HashTable *table, GLuint key)


You can drop the _mesa_ prefix on static functions.



  {
 struct hash_entry *entry;

@@ -343,17 +343,28 @@ _mesa_HashRemove(struct _mesa_HashTable *table, GLuint 
key)
return;
 }

-   mtx_lock(&table->Mutex);
 if (key == DELETED_KEY_VALUE) {
table->deleted_key_data = NULL;
 } else {
entry = _mesa_hash_table_search(table->ht, uint_key(key));
_mesa_hash_table_remove(table->ht, entry);
 }
-   mtx_unlock(&table->Mutex);
  }


+void
+_mesa_HashRemoveLocked(struct _mesa_HashTable *table, GLuint key)
+{
+   _mesa_HashRemove_unlocked(table, key);
+}
+
+void
+_mesa_HashRemove(struct _mesa_HashTable *table, GLuint key)
+{
+   mtx_lock(&table->Mutex);
+   _mesa_HashRemove_unlocked(table, key);
+   mtx_unlock(&table->Mutex);
+}

  /**
   * Delete all entries in a hash table, but don't delete the table itself.
diff --git a/src/mesa/main/hash.h b/src/mesa/main/hash.h
index da3b997..52a6c5d 100644
--- a/src/mesa/main/hash.h
+++ b/src/mesa/main/hash.h
@@ -54,6 +54,8 @@ extern void *_mesa_HashLookupLocked(struct _mesa_HashTable 
*table, GLuint key);
  extern void _mesa_HashInsertLocked(struct _mesa_HashTable *table,
 GLuint key, void *data);

+extern void _mesa_HashRemoveLocked(struct _mesa_HashTable *table, GLuint key);
+
  extern void
  _mesa_HashDeleteAll(struct _mesa_HashTable *table,
  void (*callback)(GLuint key, void *data, void *userData),



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 12/13] mesa: Remove unnecessary locking from container objects.

2015-08-07 Thread Brian Paul

On 08/06/2015 06:11 PM, Matt Turner wrote:

"Container objects" like vertex array objects, framebuffer objects, and
pipeline objects are not shared between contexts, so they require no
locking.


Framebuffer/renderbuffer objects _are_ shared among contexts.  Note that 
_mesa_lookup_framebuffer() accesses the ctx->Shared->FrameBuffers hash 
table.


Though, I recall something about EXT_fbo supporting sharing but not 
ARB_fbo but I'd have to read the specs to see for sure.


-Brian

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 00/13] mesa: Locking improvements and optimizations

2015-08-07 Thread Brian Paul

On 08/06/2015 06:10 PM, Matt Turner wrote:

Patches 1-11 improve performance of SynMark OglBatch7 by 6.29586% +/- 0.277734%
(n=337) and OglMultithread by 1.12564% +/- 0.424038% (n=209). I haven't
benchmarked individual patches because I'd like to not waste all that time if I
get review feedback that requires me to change things. :)

Patches 12-13 were supposed to improve performance, but seem to make an
existing thread-safety problem worse, so I'm not proposing them for inclusion.


[01/13] c11/threads: Assert that mtx is non-NULL and check
[02/13] mesa: Remove debugging code from _mesa_reference_*.

[03/13] mesa: Add locking to sampler objects.
[04/13] mesa: Add locking to programs.

These two add missing locks to sampler and program objects, which
I believe are supposed to be thread-safe.

[05/13] mesa: Replace buffer object locks with atomic inc/dec.
[06/13] mesa: Replace sampler object locks with atomic inc/dec.
[07/13] mesa: Replace program locks with atomic inc/dec.
[08/13] mesa: Replace renderbuffer object locks with atomic inc/dec.
[09/13] mesa: Replace texture buffer object locks with atomic inc/dec.

These five replace locks around RefCount++/-- with atomic increment
and decrement.

[10/13] hash: Add _mesa_HashRemoveLocked() function.
[11/13] mesa: Replace uses of Shared->Mutex with hash-table mutexes

These two replace uses of ctx->Shared->Mutex with the mutexes in the
hash tables.


1-13 look good to me, just a minor nit on #10.

I'd be OK w/ squashing 3 & 6, and 4 & 7, but not a big deal.

For the series,
Reviewed-by: Brian Paul 




[12/13] mesa: Remove unnecessary locking from container
[13/13] mesa: Remove deleteFlag pattern.

*I am not proposing these for inclusion*

These two remove some "unnecessary" locking from so called "container
objects" that are not shared between threads by the GL. While I expected
them to improve performance, they actually cause double-free errors in
SynMark OglMultithread. Valgrind's helgrind tool shows that there are many
thread-safety issues in the texture code, and removing these locks seems to
exacerbate the problem.

Specifically, multiple threads are reading and writing to gl_texture_objects
without any synchronization from places like intel_finalize_mipmap_tree(),
gen7_update_texture_surface(), brw_populate_sampler_prog_key_data(),
update_sampler_state(), and _mesa_BindTexture().

Suggestions for solving this (apparently quite longstanding) problem are
welcome.


gl_texture_object appears all over the place.  Adding locking everywhere 
could be a nightmare but I'm not sure what other solution there is off-hand.


-Brian

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/4] tnl: Maintain the _WindowMap matrix in TNLcontext v2.

2015-08-07 Thread Brian Paul

On 08/07/2015 08:28 AM, Ilia Mirkin wrote:

On Fri, Aug 7, 2015 at 2:46 AM, Mathias Fröhlich
mailto:mathias.froehl...@gmx.net>> wrote:

__

Hi,

On Thursday, August 06, 2015 12:32:18 Ilia Mirkin wrote:

> > @@ -182,6 +187,13 @@ _tnl_InvalidateState( struct gl_context *ctx, 
GLuint new_state )

> >   }

> >}

> > }

> > +

> > +   if (new_state & (_NEW_VIEWPORT | _NEW_BUFFERS)) {

> > +  double scale[3], translate[3];

> > +  _mesa_get_viewport_xform(ctx, 0, scale, translate);

> > +  _math_matrix_viewport(&tnl->_WindowMap, scale, translate,

> > +ctx->DrawBuffer->_DepthMaxF);

>

> This appears to crash nouveau_vieux on startup. See

>https://bugs.freedesktop.org/show_bug.cgi?id=91570


. I suspect that

> ctx->DrawBuffer is null. What's the proper way to handle that

> situation -- predicate on a drawbuffer being bound? Or make sure that

> something is bound to the drawbuffer before we call tnl_wakeup?

>

It looks like nouveau_vieux uses _tnl_InvalidateState for early
initialization.

I assume that _mesa_make_current is called later on to set the
DrawBuffer

and there the _NEW_BUFFERS flag is set. So, when
_tnl_InvalidateState is called

while validating state before the first draw and past a bound draw
buffer, we should

get the desired update on the _WindowMap matrix.

So just checking for the presence of the DrawBuffer before grabbing data

from it seems to be safe.

The aim of the change was to move the _WindowMap matrix out of the
non tnl drivers

and track it in the tnl module where it is exclusively used today.

At the higher mesa level we did/do not invalidate DrawBuffer related
state that early.

One alternative I can see is to call _tnl_InvalidateState without
the _NEW_VIEWPORT and

_NEW_BUFFERS flag set instead of _tnl_wakeup in the two nouveau
early initialization

code paths in nouveau_swtnl_t.c and nv04_render.c. That should also
bring us back to

the original behavior.


That seems like a bit of a hack. If you think it's OK, I'm just going to
introduce the DrawBuffer check. Brian, does that seem OK to you? I'm
mostly unfamiliar with the TnL module, its APIs, and when/how they're
supposed to be used.


I haven't look at the tnl validation code in ages.  Off-hand, I'd say 
adding the null ctx->DrawBuffer check in _tnl_InvalidateState() is 
probably the simplest/best thing to do.


If at a later point ctx->DrawBuffer becomes non-null we should get a 
_NEW_BUFFERS signal and revalidate the matrix anyway.


I guess one other possibility is something like:

if (ctx->DrawBuffer)
   depthMaxF = ctx->DrawBuffer->_DepthMaxF;
else
   depthMaxF = _mesa_compute_depth_max_from_bits(ctx->Visual.depthBits);

_math_matrix_viewport(..., depthMaxF);


where _mesa_compute_depth_max_from_bits() would be a redo of 
compute_depth_max().





But nouveau_vieux calls tnl_wakeup right in its
context creation pass. nv04/nv05 don't have hwtnl at all, nv10+ have it,
but it has various limitations, with which we end up using swtnl.

   -ilia


Re:


it seems like _tnl_wakeup is meant for "oh hey, I shut you down and
haven't been sending you updates, but I want to use you again, so
please update your stuff to match reality"


I believe that interpretation is correct.

-Brian

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 90264] [Regression, bisected] Tooltip corruption in Chrome

2015-08-07 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=90264

--- Comment #46 from Furkan  ---
I've tested both patches (#40 and #45) and they both seem to work for me.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/4] tnl: Maintain the _WindowMap matrix in TNLcontext v2.

2015-08-07 Thread Mathias Fröhlich
Ilia,

On Friday, August 07, 2015 11:20:42 Ilia Mirkin wrote:
> Yeah, I tend to agree with you -- it seems like _tnl_wakeup is meant for
> "oh hey, I shut you down and haven't been sending you updates, but I want
> to use you again, so please update your stuff to match reality". Does it
> even need to be called at all? nouveau_state.c:nouveau_update_state
> unconditionally calls _tnl_InvalidateState. i965, on the other hand, the
> only other user of tnl_wakeup, calls it at draw time, for RenderMode !=
> GL_RENDER, and never calls _tnl_InvalidateState, so the tnl module is
> completely out-of-date there.
>
> So I'm actually in favor of removing it entirely. Thoughts?
You mean remove _tnl_wakeup/_tnl_InvalidateState entirely from the
nouveau initialzation? That may work - I have the same
impression, but I cannot exactly foresee what we might break.

Greetings

Mathias___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/4] tnl: Maintain the _WindowMap matrix in TNLcontext v2.

2015-08-07 Thread Ilia Mirkin
On Fri, Aug 7, 2015 at 12:03 PM, Mathias Fröhlich  wrote:

>
>
> Ilia,
>
>
>
> On Friday, August 07, 2015 11:20:42 Ilia Mirkin wrote:
>
> > Yeah, I tend to agree with you -- it seems like _tnl_wakeup is meant for
>
> > "oh hey, I shut you down and haven't been sending you updates, but I want
>
> > to use you again, so please update your stuff to match reality". Does it
>
> > even need to be called at all? nouveau_state.c:nouveau_update_state
>
> > unconditionally calls _tnl_InvalidateState. i965, on the other hand, the
>
> > only other user of tnl_wakeup, calls it at draw time, for RenderMode !=
>
> > GL_RENDER, and never calls _tnl_InvalidateState, so the tnl module is
>
> > completely out-of-date there.
>
> >
>
> > So I'm actually in favor of removing it entirely. Thoughts?
>
> You mean remove _tnl_wakeup/_tnl_InvalidateState entirely from the
>
> nouveau initialzation? That may work - I have the same
>
> impression, but I cannot exactly foresee what we might break.
>

None of the other drivers appear to do it... should be safe. I'll def test
it out before pushing, of course... I've been meaning to plug a nv1x in so
I can play with a couple of minor items. Ideally it'd switch to the i965
method, and only call tnl_InvalidateState when in swtnl mode (as well as
calling tnl_wakeup on hwtnl -> !hwtnl transitions) but... meh. Actually it
looks like SWTNL is largely unimplemented and it falls straight back to
SWRAST? That's a bit unfortunate for nv04/nv05 :(

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] nir: Add a function to determine if a source is dynamically uniform

2015-08-07 Thread Neil Roberts
Adds nir_src_is_dynamically_uniform which returns true if the source
is known to be dynamically uniform. This will be used in a later patch
to add a workaround for cases that only work with dynamically uniform
sources. Note that the function is not definitive, it can return false
negatives (but not false positives). Currently it only detects
constants and uniform accesses. It could easily be extended to include
more cases.
---
 src/glsl/nir/nir.c | 29 +
 src/glsl/nir/nir.h |  1 +
 2 files changed, 30 insertions(+)

diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
index 78ff886..242f0b4 100644
--- a/src/glsl/nir/nir.c
+++ b/src/glsl/nir/nir.c
@@ -1784,6 +1784,35 @@ nir_src_as_const_value(nir_src src)
return &load->value;
 }
 
+/**
+ * Returns true if the source is known to be dynamically uniform. Otherwise it
+ * returns false which means it may or may not be dynamically uniform but it
+ * can't be determined.
+ */
+bool
+nir_src_is_dynamically_uniform(nir_src src)
+{
+   if (!src.is_ssa)
+  return false;
+
+   /* Constants are trivially dynamically uniform */
+   if (src.ssa->parent_instr->type == nir_instr_type_load_const)
+  return true;
+
+   /* As are uniform variables */
+   if (src.ssa->parent_instr->type == nir_instr_type_intrinsic) {
+  nir_intrinsic_instr *intr = 
nir_instr_as_intrinsic(src.ssa->parent_instr);
+
+  if (intr->intrinsic == nir_intrinsic_load_uniform)
+ return true;
+   }
+
+   /* XXX: this could have many more tests, such as when a sampler function is
+* called with dynamically uniform arguments.
+*/
+   return false;
+}
+
 bool
 nir_srcs_equal(nir_src src1, nir_src src2)
 {
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 9aae6d7..bb75897 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1575,6 +1575,7 @@ bool nir_foreach_dest(nir_instr *instr, 
nir_foreach_dest_cb cb, void *state);
 bool nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state);
 
 nir_const_value *nir_src_as_const_value(nir_src src);
+bool nir_src_is_dynamically_uniform(nir_src src);
 bool nir_srcs_equal(nir_src src1, nir_src src2);
 void nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src);
 void nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src);
-- 
1.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 2/2] i965/fs: Handle non-const sample number in interpolateAtSample

2015-08-07 Thread Neil Roberts
If a non-const sample number is given to interpolateAtSample it will
now generate an indirect send message with the sample ID similar to
how non-const sampler array indexing works. Previously non-const
values were ignored and instead it ended up using a constant 0 value.

The generator will try to determine if the sample ID is dynamically
uniform via nir_src_is_dynamically_uniform. If not it will query the
pixel interpolator in a loop, once for each possible sample number.
This is necessary because the indirect send message doesn't seem to
have a way to specify a different value for each fragment.

The range of possible sample numbers is determined using
STATE_NUM_SAMPLES. When linking the shader it will now add a reference
to this state if any dynamically non-uniform calls to
interpolateAtSample are found.

This fixes the following two Piglit tests:

arb_gpu_shader5-interpolateAtSample-nonconst
arb_gpu_shader5-interpolateAtSample-dynamically-nonuniform

v2: Handle dynamically non-uniform sample ids.
---
 src/mesa/drivers/dri/i965/brw_eu.h |   2 +-
 src/mesa/drivers/dri/i965/brw_eu_emit.c|  34 ---
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp |   5 +-
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp   | 119 +
 src/mesa/drivers/dri/i965/brw_program.c|  54 +++
 src/mesa/drivers/dri/i965/brw_program.h|   1 +
 src/mesa/drivers/dri/i965/brw_shader.cpp   |   2 +
 7 files changed, 185 insertions(+), 32 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_eu.h 
b/src/mesa/drivers/dri/i965/brw_eu.h
index 761aa0e..0ac1ad9 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -461,7 +461,7 @@ brw_pixel_interpolator_query(struct brw_codegen *p,
  struct brw_reg mrf,
  bool noperspective,
  unsigned mode,
- unsigned data,
+ struct brw_reg data,
  unsigned msg_length,
  unsigned response_length);
 
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c 
b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 4d39762..25524d4 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -3192,26 +3192,38 @@ brw_pixel_interpolator_query(struct brw_codegen *p,
  struct brw_reg mrf,
  bool noperspective,
  unsigned mode,
- unsigned data,
+ struct brw_reg data,
  unsigned msg_length,
  unsigned response_length)
 {
const struct brw_device_info *devinfo = p->devinfo;
-   struct brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
+   struct brw_inst *insn;
+   uint16_t exec_size;
 
-   brw_set_dest(p, insn, dest);
-   brw_set_src0(p, insn, mrf);
-   brw_set_message_descriptor(p, insn, GEN7_SFID_PIXEL_INTERPOLATOR,
-  msg_length, response_length,
-  false /* header is never present for PI */,
-  false);
+   if (data.file == BRW_IMMEDIATE_VALUE) {
+  insn = next_insn(p, BRW_OPCODE_SEND);
+  brw_set_dest(p, insn, dest);
+  brw_set_src0(p, insn, mrf);
+  brw_set_message_descriptor(p, insn, GEN7_SFID_PIXEL_INTERPOLATOR,
+ msg_length, response_length,
+ false /* header is never present for PI */,
+ false);
+  brw_inst_set_pi_message_data(devinfo, insn, data.dw1.ud);
+   } else {
+  insn = brw_send_indirect_message(p,
+   GEN7_SFID_PIXEL_INTERPOLATOR,
+   dest,
+   mrf,
+   vec1(data));
+  brw_inst_set_mlen(devinfo, insn, msg_length);
+  brw_inst_set_rlen(devinfo, insn, response_length);
+   }
 
-   brw_inst_set_pi_simd_mode(
- devinfo, insn, brw_inst_exec_size(devinfo, insn) == BRW_EXECUTE_16);
+   exec_size = brw_inst_exec_size(devinfo, p->current);
+   brw_inst_set_pi_simd_mode(devinfo, insn, exec_size == BRW_EXECUTE_16);
brw_inst_set_pi_slot_group(devinfo, insn, 0); /* zero unless 32/64px 
dispatch */
brw_inst_set_pi_nopersp(devinfo, insn, noperspective);
brw_inst_set_pi_message_type(devinfo, insn, mode);
-   brw_inst_set_pi_message_data(devinfo, insn, data);
 }
 
 void
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index c86ca04..88dbc62 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -1328,15 +1328,14 @@ fs_generator::generate_pixel_interpolator_query(fs_inst 
*inst,

Re: [Mesa-dev] [PATCH 05/13] mesa: Replace buffer object locks with atomic inc/dec.

2015-08-07 Thread Ian Romanick
So... the buffer object isn't locked during a call to glBufferData?
Since that allocates the backing storage for the BO, it doesn't seem
like anything good could happen...

On 08/06/2015 05:10 PM, Matt Turner wrote:
> ---
>  src/mesa/main/bufferobj.c | 17 +++--
>  src/mesa/main/mtypes.h|  1 -
>  2 files changed, 3 insertions(+), 15 deletions(-)
> 
> diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
> index cc035ff..78af229 100644
> --- a/src/mesa/main/bufferobj.c
> +++ b/src/mesa/main/bufferobj.c
> @@ -47,6 +47,7 @@
>  #include "texstore.h"
>  #include "transformfeedback.h"
>  #include "dispatch.h"
> +#include "util/u_atomic.h"
>  
>  
>  /* Debug flags */
> @@ -424,7 +425,6 @@ _mesa_delete_buffer_object(struct gl_context *ctx,
> bufObj->RefCount = -1000;
> bufObj->Name = ~0;
>  
> -   mtx_destroy(&bufObj->Mutex);
> free(bufObj->Label);
> free(bufObj);
>  }
> @@ -443,16 +443,9 @@ _mesa_reference_buffer_object_(struct gl_context *ctx,
>  {
> if (*ptr) {
>/* Unreference the old buffer */
> -  GLboolean deleteFlag = GL_FALSE;
>struct gl_buffer_object *oldObj = *ptr;
>  
> -  mtx_lock(&oldObj->Mutex);
> -  assert(oldObj->RefCount > 0);
> -  oldObj->RefCount--;
> -  deleteFlag = (oldObj->RefCount == 0);
> -  mtx_unlock(&oldObj->Mutex);
> -
> -  if (deleteFlag) {
> +  if (p_atomic_dec_zero(&oldObj->RefCount)) {
>assert(ctx->Driver.DeleteBuffer);
>   ctx->Driver.DeleteBuffer(ctx, oldObj);
>}
> @@ -463,7 +456,6 @@ _mesa_reference_buffer_object_(struct gl_context *ctx,
>  
> if (bufObj) {
>/* reference new buffer */
> -  mtx_lock(&bufObj->Mutex);
>if (bufObj->RefCount == 0) {
>   /* this buffer's being deleted (look just above) */
>   /* Not sure this can every really happen.  Warn if it does. */
> @@ -471,10 +463,9 @@ _mesa_reference_buffer_object_(struct gl_context *ctx,
>   *ptr = NULL;
>}
>else {
> - bufObj->RefCount++;
> + p_atomic_inc(&bufObj->RefCount);
>   *ptr = bufObj;
>}
> -  mtx_unlock(&bufObj->Mutex);
> }
>  }
>  
> @@ -488,7 +479,6 @@ _mesa_initialize_buffer_object(struct gl_context *ctx,
> GLuint name)
>  {
> memset(obj, 0, sizeof(struct gl_buffer_object));
> -   mtx_init(&obj->Mutex, mtx_plain);
> obj->RefCount = 1;
> obj->Name = name;
> obj->Usage = GL_STATIC_DRAW_ARB;
> @@ -806,7 +796,6 @@ _mesa_init_buffer_objects( struct gl_context *ctx )
> GLuint i;
>  
> memset(&DummyBufferObject, 0, sizeof(DummyBufferObject));
> -   mtx_init(&DummyBufferObject.Mutex, mtx_plain);
> DummyBufferObject.RefCount = 1000*1000*1000; /* never delete */
>  
> _mesa_reference_buffer_object(ctx, &ctx->Array.ArrayBufferObj,
> diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
> index 19f65ee..2adfae2 100644
> --- a/src/mesa/main/mtypes.h
> +++ b/src/mesa/main/mtypes.h
> @@ -1477,7 +1477,6 @@ typedef enum {
>   */
>  struct gl_buffer_object
>  {
> -   mtx_t Mutex;
> GLint RefCount;
> GLuint Name;
> GLchar *Label;   /**< GL_KHR_debug */
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 12/13] mesa: Remove unnecessary locking from container objects.

2015-08-07 Thread Fredrik Höglund
On Friday 07 August 2015, Matt Turner wrote:
> "Container objects" like vertex array objects, framebuffer objects, and
> pipeline objects are not shared between contexts, so they require no
> locking.

Unfortunately it's not quite that simple when it comes to FBO's;
EXT_framebuffer_object defines FBO's to be shared, while
ARB_framebuffer_object defines them to not be shared.  There is a
note about this in section 2.6 of the OpenGL specification.

I see three possible solutions:

1) Use separate hash tables and namespaces for EXT_fbo and
   ARB_fbo.  The spec says that passing an FBO created by an
   EXT command to an ARB command and vice-versa results in
   undefined behavior, so this is legal.  But it doesn't appear
   to match what the proprietary drivers are doing, so some
   applications may break as a result.

2) Make FBO's non-shared in core contexts, where we don't expose
   EXT_fbo.

3) Keep doing what we're doing now, i.e. allow all FBO's to be shared.

> ---
>  src/mesa/main/arrayobj.c| 6 --
>  src/mesa/main/fbobject.c| 9 -
>  src/mesa/main/framebuffer.c | 9 -
>  src/mesa/main/mtypes.h  | 5 -
>  src/mesa/main/pipelineobj.c | 6 --
>  src/mesa/main/shaderapi.c   | 3 ---
>  6 files changed, 38 deletions(-)
> 
> diff --git a/src/mesa/main/arrayobj.c b/src/mesa/main/arrayobj.c
> index 2885143..f581e4c 100644
> --- a/src/mesa/main/arrayobj.c
> +++ b/src/mesa/main/arrayobj.c
> @@ -174,7 +174,6 @@ _mesa_delete_vao(struct gl_context *ctx, struct 
> gl_vertex_array_object *obj)
>  {
> unbind_array_object_vbos(ctx, obj);
> _mesa_reference_buffer_object(ctx, &obj->IndexBufferObj, NULL);
> -   mtx_destroy(&obj->Mutex);
> free(obj->Label);
> free(obj);
>  }
> @@ -197,11 +196,9 @@ _mesa_reference_vao_(struct gl_context *ctx,
>GLboolean deleteFlag = GL_FALSE;
>struct gl_vertex_array_object *oldObj = *ptr;
>  
> -  mtx_lock(&oldObj->Mutex);
>assert(oldObj->RefCount > 0);
>oldObj->RefCount--;
>deleteFlag = (oldObj->RefCount == 0);
> -  mtx_unlock(&oldObj->Mutex);
>  
>if (deleteFlag) {
>assert(ctx->Driver.DeleteArrayObject);
> @@ -214,7 +211,6 @@ _mesa_reference_vao_(struct gl_context *ctx,
>  
> if (vao) {
>/* reference new array object */
> -  mtx_lock(&vao->Mutex);
>if (vao->RefCount == 0) {
>   /* this array's being deleted (look just above) */
>   /* Not sure this can every really happen.  Warn if it does. */
> @@ -225,7 +221,6 @@ _mesa_reference_vao_(struct gl_context *ctx,
>   vao->RefCount++;
>   *ptr = vao;
>}
> -  mtx_unlock(&vao->Mutex);
> }
>  }
>  
> @@ -274,7 +269,6 @@ _mesa_initialize_vao(struct gl_context *ctx,
>  
> obj->Name = name;
>  
> -   mtx_init(&obj->Mutex, mtx_plain);
> obj->RefCount = 1;
>  
> /* Init the individual arrays */
> diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
> index 6c6de2f..6bc8204 100644
> --- a/src/mesa/main/fbobject.c
> +++ b/src/mesa/main/fbobject.c
> @@ -89,8 +89,6 @@ delete_dummy_framebuffer(struct gl_framebuffer *fb)
>  void
>  _mesa_init_fbobjects(struct gl_context *ctx)
>  {
> -   mtx_init(&DummyFramebuffer.Mutex, mtx_plain);
> -   mtx_init(&IncompleteFramebuffer.Mutex, mtx_plain);
> DummyFramebuffer.Delete = delete_dummy_framebuffer;
> DummyRenderbuffer.Delete = delete_dummy_renderbuffer;
> IncompleteFramebuffer.Delete = delete_dummy_framebuffer;
> @@ -527,8 +525,6 @@ _mesa_FramebufferRenderbuffer_sw(struct gl_context *ctx,
>  {
> struct gl_renderbuffer_attachment *att;
>  
> -   mtx_lock(&fb->Mutex);
> -
> att = get_attachment(ctx, fb, attachment);
> assert(att);
> if (rb) {
> @@ -552,8 +548,6 @@ _mesa_FramebufferRenderbuffer_sw(struct gl_context *ctx,
> }
>  
> invalidate_framebuffer(fb);
> -
> -   mtx_unlock(&fb->Mutex);
>  }
>  
>  
> @@ -3084,7 +3078,6 @@ _mesa_framebuffer_texture(struct gl_context *ctx, 
> struct gl_framebuffer *fb,
>  
> FLUSH_VERTICES(ctx, _NEW_BUFFERS);
>  
> -   mtx_lock(&fb->Mutex);
> if (texObj) {
>if (attachment == GL_DEPTH_ATTACHMENT &&
>texObj == fb->Attachment[BUFFER_STENCIL].Texture &&
> @@ -3142,8 +3135,6 @@ _mesa_framebuffer_texture(struct gl_context *ctx, 
> struct gl_framebuffer *fb,
> }
>  
> invalidate_framebuffer(fb);
> -
> -   mtx_unlock(&fb->Mutex);
>  }
>  
>  
> diff --git a/src/mesa/main/framebuffer.c b/src/mesa/main/framebuffer.c
> index 37e2c29..475b01b 100644
> --- a/src/mesa/main/framebuffer.c
> +++ b/src/mesa/main/framebuffer.c
> @@ -130,8 +130,6 @@ _mesa_initialize_window_framebuffer(struct gl_framebuffer 
> *fb,
>  
> memset(fb, 0, sizeof(struct gl_framebuffer));
>  
> -   mtx_init(&fb->Mutex, mtx_plain);
> -
> fb->RefCount = 1;
>  
> /* save the visual */
> @@ -183,7 +181,6 @@ _mesa_initialize_user_framebuffer(struct gl_framebuffer 
> *fb, GLuint name)
> fb->ColorReadBuffer = GL_COLOR_ATTACHMENT0_EXT;
> 

Re: [Mesa-dev] [PATCH 12/13] mesa: Remove unnecessary locking from container objects.

2015-08-07 Thread Ian Romanick
On 08/06/2015 05:11 PM, Matt Turner wrote:
> "Container objects" like vertex array objects, framebuffer objects, and
> pipeline objects are not shared between contexts, so they require no
> locking.

Yes and no.  FBOs from EXT_framebuffer_object and VAOs from
APPLE_vertex_array_object are shared between contexts.  I don't know of
any applications that use APPLE_vao, so perhaps we could drop support
for it?

> ---
>  src/mesa/main/arrayobj.c| 6 --
>  src/mesa/main/fbobject.c| 9 -
>  src/mesa/main/framebuffer.c | 9 -
>  src/mesa/main/mtypes.h  | 5 -
>  src/mesa/main/pipelineobj.c | 6 --
>  src/mesa/main/shaderapi.c   | 3 ---
>  6 files changed, 38 deletions(-)
> 
> diff --git a/src/mesa/main/arrayobj.c b/src/mesa/main/arrayobj.c
> index 2885143..f581e4c 100644
> --- a/src/mesa/main/arrayobj.c
> +++ b/src/mesa/main/arrayobj.c
> @@ -174,7 +174,6 @@ _mesa_delete_vao(struct gl_context *ctx, struct 
> gl_vertex_array_object *obj)
>  {
> unbind_array_object_vbos(ctx, obj);
> _mesa_reference_buffer_object(ctx, &obj->IndexBufferObj, NULL);
> -   mtx_destroy(&obj->Mutex);
> free(obj->Label);
> free(obj);
>  }
> @@ -197,11 +196,9 @@ _mesa_reference_vao_(struct gl_context *ctx,
>GLboolean deleteFlag = GL_FALSE;
>struct gl_vertex_array_object *oldObj = *ptr;
>  
> -  mtx_lock(&oldObj->Mutex);
>assert(oldObj->RefCount > 0);
>oldObj->RefCount--;
>deleteFlag = (oldObj->RefCount == 0);
> -  mtx_unlock(&oldObj->Mutex);
>  
>if (deleteFlag) {
>assert(ctx->Driver.DeleteArrayObject);
> @@ -214,7 +211,6 @@ _mesa_reference_vao_(struct gl_context *ctx,
>  
> if (vao) {
>/* reference new array object */
> -  mtx_lock(&vao->Mutex);
>if (vao->RefCount == 0) {
>   /* this array's being deleted (look just above) */
>   /* Not sure this can every really happen.  Warn if it does. */
> @@ -225,7 +221,6 @@ _mesa_reference_vao_(struct gl_context *ctx,
>   vao->RefCount++;
>   *ptr = vao;
>}
> -  mtx_unlock(&vao->Mutex);
> }
>  }
>  
> @@ -274,7 +269,6 @@ _mesa_initialize_vao(struct gl_context *ctx,
>  
> obj->Name = name;
>  
> -   mtx_init(&obj->Mutex, mtx_plain);
> obj->RefCount = 1;
>  
> /* Init the individual arrays */
> diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
> index 6c6de2f..6bc8204 100644
> --- a/src/mesa/main/fbobject.c
> +++ b/src/mesa/main/fbobject.c
> @@ -89,8 +89,6 @@ delete_dummy_framebuffer(struct gl_framebuffer *fb)
>  void
>  _mesa_init_fbobjects(struct gl_context *ctx)
>  {
> -   mtx_init(&DummyFramebuffer.Mutex, mtx_plain);
> -   mtx_init(&IncompleteFramebuffer.Mutex, mtx_plain);
> DummyFramebuffer.Delete = delete_dummy_framebuffer;
> DummyRenderbuffer.Delete = delete_dummy_renderbuffer;
> IncompleteFramebuffer.Delete = delete_dummy_framebuffer;
> @@ -527,8 +525,6 @@ _mesa_FramebufferRenderbuffer_sw(struct gl_context *ctx,
>  {
> struct gl_renderbuffer_attachment *att;
>  
> -   mtx_lock(&fb->Mutex);
> -
> att = get_attachment(ctx, fb, attachment);
> assert(att);
> if (rb) {
> @@ -552,8 +548,6 @@ _mesa_FramebufferRenderbuffer_sw(struct gl_context *ctx,
> }
>  
> invalidate_framebuffer(fb);
> -
> -   mtx_unlock(&fb->Mutex);
>  }
>  
>  
> @@ -3084,7 +3078,6 @@ _mesa_framebuffer_texture(struct gl_context *ctx, 
> struct gl_framebuffer *fb,
>  
> FLUSH_VERTICES(ctx, _NEW_BUFFERS);
>  
> -   mtx_lock(&fb->Mutex);
> if (texObj) {
>if (attachment == GL_DEPTH_ATTACHMENT &&
>texObj == fb->Attachment[BUFFER_STENCIL].Texture &&
> @@ -3142,8 +3135,6 @@ _mesa_framebuffer_texture(struct gl_context *ctx, 
> struct gl_framebuffer *fb,
> }
>  
> invalidate_framebuffer(fb);
> -
> -   mtx_unlock(&fb->Mutex);
>  }
>  
>  
> diff --git a/src/mesa/main/framebuffer.c b/src/mesa/main/framebuffer.c
> index 37e2c29..475b01b 100644
> --- a/src/mesa/main/framebuffer.c
> +++ b/src/mesa/main/framebuffer.c
> @@ -130,8 +130,6 @@ _mesa_initialize_window_framebuffer(struct gl_framebuffer 
> *fb,
>  
> memset(fb, 0, sizeof(struct gl_framebuffer));
>  
> -   mtx_init(&fb->Mutex, mtx_plain);
> -
> fb->RefCount = 1;
>  
> /* save the visual */
> @@ -183,7 +181,6 @@ _mesa_initialize_user_framebuffer(struct gl_framebuffer 
> *fb, GLuint name)
> fb->ColorReadBuffer = GL_COLOR_ATTACHMENT0_EXT;
> fb->_ColorReadBufferIndex = BUFFER_COLOR0;
> fb->Delete = _mesa_destroy_framebuffer;
> -   mtx_init(&fb->Mutex, mtx_plain);
>  }
>  
>  
> @@ -214,8 +211,6 @@ _mesa_free_framebuffer_data(struct gl_framebuffer *fb)
> assert(fb);
> assert(fb->RefCount == 0);
>  
> -   mtx_destroy(&fb->Mutex);
> -
> for (i = 0; i < BUFFER_COUNT; i++) {
>struct gl_renderbuffer_attachment *att = &fb->Attachment[i];
>if (att->Renderbuffer) {
> @@ -245,11 +240,9 @@ _mesa_reference_framebuffer_(struct gl_framebuffer **ptr,
>GLboolean deleteF

Re: [Mesa-dev] [PATCH 12/13] mesa: Remove unnecessary locking from container objects.

2015-08-07 Thread Ian Romanick
On 08/07/2015 09:50 AM, Fredrik Höglund wrote:
> On Friday 07 August 2015, Matt Turner wrote:
>> "Container objects" like vertex array objects, framebuffer objects, and
>> pipeline objects are not shared between contexts, so they require no
>> locking.
> 
> Unfortunately it's not quite that simple when it comes to FBO's;
> EXT_framebuffer_object defines FBO's to be shared, while
> ARB_framebuffer_object defines them to not be shared.  There is a
> note about this in section 2.6 of the OpenGL specification.
> 
> I see three possible solutions:
> 
> 1) Use separate hash tables and namespaces for EXT_fbo and
>ARB_fbo.  The spec says that passing an FBO created by an
>EXT command to an ARB command and vice-versa results in
>undefined behavior, so this is legal.  But it doesn't appear
>to match what the proprietary drivers are doing, so some
>applications may break as a result.

I seem to recall encountering some Valve games (maybe TF2?) that were
mixing ARB and EXT functions.  It seems like they were using EXT for
most things, but using some functions that were added by the ARB
extension.  It was quite a long time ago, so my memory is pretty fuzzy.

> 2) Make FBO's non-shared in core contexts, where we don't expose
>EXT_fbo.

That's an easy compromise that I hadn't considered.  I like it.

> 3) Keep doing what we're doing now, i.e. allow all FBO's to be shared.
> 
>> ---
>>  src/mesa/main/arrayobj.c| 6 --
>>  src/mesa/main/fbobject.c| 9 -
>>  src/mesa/main/framebuffer.c | 9 -
>>  src/mesa/main/mtypes.h  | 5 -
>>  src/mesa/main/pipelineobj.c | 6 --
>>  src/mesa/main/shaderapi.c   | 3 ---
>>  6 files changed, 38 deletions(-)
>>
>> diff --git a/src/mesa/main/arrayobj.c b/src/mesa/main/arrayobj.c
>> index 2885143..f581e4c 100644
>> --- a/src/mesa/main/arrayobj.c
>> +++ b/src/mesa/main/arrayobj.c
>> @@ -174,7 +174,6 @@ _mesa_delete_vao(struct gl_context *ctx, struct 
>> gl_vertex_array_object *obj)
>>  {
>> unbind_array_object_vbos(ctx, obj);
>> _mesa_reference_buffer_object(ctx, &obj->IndexBufferObj, NULL);
>> -   mtx_destroy(&obj->Mutex);
>> free(obj->Label);
>> free(obj);
>>  }
>> @@ -197,11 +196,9 @@ _mesa_reference_vao_(struct gl_context *ctx,
>>GLboolean deleteFlag = GL_FALSE;
>>struct gl_vertex_array_object *oldObj = *ptr;
>>  
>> -  mtx_lock(&oldObj->Mutex);
>>assert(oldObj->RefCount > 0);
>>oldObj->RefCount--;
>>deleteFlag = (oldObj->RefCount == 0);
>> -  mtx_unlock(&oldObj->Mutex);
>>  
>>if (deleteFlag) {
>>   assert(ctx->Driver.DeleteArrayObject);
>> @@ -214,7 +211,6 @@ _mesa_reference_vao_(struct gl_context *ctx,
>>  
>> if (vao) {
>>/* reference new array object */
>> -  mtx_lock(&vao->Mutex);
>>if (vao->RefCount == 0) {
>>   /* this array's being deleted (look just above) */
>>   /* Not sure this can every really happen.  Warn if it does. */
>> @@ -225,7 +221,6 @@ _mesa_reference_vao_(struct gl_context *ctx,
>>   vao->RefCount++;
>>   *ptr = vao;
>>}
>> -  mtx_unlock(&vao->Mutex);
>> }
>>  }
>>  
>> @@ -274,7 +269,6 @@ _mesa_initialize_vao(struct gl_context *ctx,
>>  
>> obj->Name = name;
>>  
>> -   mtx_init(&obj->Mutex, mtx_plain);
>> obj->RefCount = 1;
>>  
>> /* Init the individual arrays */
>> diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
>> index 6c6de2f..6bc8204 100644
>> --- a/src/mesa/main/fbobject.c
>> +++ b/src/mesa/main/fbobject.c
>> @@ -89,8 +89,6 @@ delete_dummy_framebuffer(struct gl_framebuffer *fb)
>>  void
>>  _mesa_init_fbobjects(struct gl_context *ctx)
>>  {
>> -   mtx_init(&DummyFramebuffer.Mutex, mtx_plain);
>> -   mtx_init(&IncompleteFramebuffer.Mutex, mtx_plain);
>> DummyFramebuffer.Delete = delete_dummy_framebuffer;
>> DummyRenderbuffer.Delete = delete_dummy_renderbuffer;
>> IncompleteFramebuffer.Delete = delete_dummy_framebuffer;
>> @@ -527,8 +525,6 @@ _mesa_FramebufferRenderbuffer_sw(struct gl_context *ctx,
>>  {
>> struct gl_renderbuffer_attachment *att;
>>  
>> -   mtx_lock(&fb->Mutex);
>> -
>> att = get_attachment(ctx, fb, attachment);
>> assert(att);
>> if (rb) {
>> @@ -552,8 +548,6 @@ _mesa_FramebufferRenderbuffer_sw(struct gl_context *ctx,
>> }
>>  
>> invalidate_framebuffer(fb);
>> -
>> -   mtx_unlock(&fb->Mutex);
>>  }
>>  
>>  
>> @@ -3084,7 +3078,6 @@ _mesa_framebuffer_texture(struct gl_context *ctx, 
>> struct gl_framebuffer *fb,
>>  
>> FLUSH_VERTICES(ctx, _NEW_BUFFERS);
>>  
>> -   mtx_lock(&fb->Mutex);
>> if (texObj) {
>>if (attachment == GL_DEPTH_ATTACHMENT &&
>>texObj == fb->Attachment[BUFFER_STENCIL].Texture &&
>> @@ -3142,8 +3135,6 @@ _mesa_framebuffer_texture(struct gl_context *ctx, 
>> struct gl_framebuffer *fb,
>> }
>>  
>> invalidate_framebuffer(fb);
>> -
>> -   mtx_unlock(&fb->Mutex);
>>  }
>>  
>>  
>> diff --git a/src/mesa/main/framebuffer.c

Re: [Mesa-dev] [PATCH v4 (part2) 29/59] nir: Implement __intrinsic_store_ssbo

2015-08-07 Thread Connor Abbott
On Fri, Aug 7, 2015 at 1:15 AM, Iago Toral  wrote:
> On Fri, 2015-08-07 at 07:43 +0200, Iago Toral wrote:
>> On Thu, 2015-08-06 at 11:06 -0700, Connor Abbott wrote:
>> > On Thu, Aug 6, 2015 at 12:30 AM, Iago Toral  wrote:
>> > > On Wed, 2015-08-05 at 12:17 -0700, Connor Abbott wrote:
>> > >> On Wed, Aug 5, 2015 at 1:30 AM, Iago Toral Quiroga  
>> > >> wrote:
>> > >> > ---
>> > >> >  src/glsl/nir/glsl_to_nir.cpp  | 36 
>> > >> > 
>> > >> >  src/glsl/nir/nir_intrinsics.h | 12 ++--
>> > >> >  2 files changed, 42 insertions(+), 6 deletions(-)
>> > >> >
>> > >> > diff --git a/src/glsl/nir/glsl_to_nir.cpp 
>> > >> > b/src/glsl/nir/glsl_to_nir.cpp
>> > >> > index 642affd..cbec2df 100644
>> > >> > --- a/src/glsl/nir/glsl_to_nir.cpp
>> > >> > +++ b/src/glsl/nir/glsl_to_nir.cpp
>> > >> > @@ -641,6 +641,8 @@ nir_visitor::visit(ir_call *ir)
>> > >> >   op = nir_intrinsic_image_atomic_comp_swap;
>> > >> >} else if (strcmp(ir->callee_name(), 
>> > >> > "__intrinsic_memory_barrier") == 0) {
>> > >> >   op = nir_intrinsic_memory_barrier;
>> > >> > +  } else if (strcmp(ir->callee_name(), "__intrinsic_store_ssbo") 
>> > >> > == 0) {
>> > >> > + op = nir_intrinsic_store_ssbo;
>> > >> >} else {
>> > >> >   unreachable("not reached");
>> > >> >}
>> > >> > @@ -730,6 +732,40 @@ nir_visitor::visit(ir_call *ir)
>> > >> >}
>> > >> >case nir_intrinsic_memory_barrier:
>> > >> >   break;
>> > >> > +  case nir_intrinsic_store_ssbo: {
>> > >> > + exec_node *param = ir->actual_parameters.get_head();
>> > >> > + ir_rvalue *block = ((ir_instruction *)param)->as_rvalue();
>> > >> > +
>> > >> > + param = param->get_next();
>> > >> > + ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
>> > >> > +
>> > >> > + param = param->get_next();
>> > >> > + ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
>> > >> > +
>> > >> > + param = param->get_next();
>> > >> > + ir_constant *write_mask = ((ir_instruction 
>> > >> > *)param)->as_constant();
>> > >> > + assert(write_mask);
>> > >> > +
>> > >> > + /* Check if we need the indirect version */
>> > >> > + ir_constant *const_offset = offset->as_constant();
>> > >> > + if (!const_offset) {
>> > >> > +op = nir_intrinsic_store_ssbo_indirect;
>> > >> > +ralloc_free(instr);
>> > >> > +instr = nir_intrinsic_instr_create(shader, op);
>> > >> > +instr->src[2] = evaluate_rvalue(offset);
>> > >> > +instr->const_index[0] = 0;
>> > >> > + } else {
>> > >> > +instr->const_index[0] = const_offset->value.u[0];
>> > >> > + }
>> > >> > +
>> > >> > + instr->const_index[1] = write_mask->value.u[0];
>> > >> > +
>> > >> > + instr->src[0] = evaluate_rvalue(val);
>> > >> > + instr->num_components = val->type->vector_elements;
>> > >> > +
>> > >> > + instr->src[1] = evaluate_rvalue(block);
>> > >> > + break;
>> > >> > +  }
>> > >> >default:
>> > >> >   unreachable("not reached");
>> > >> >}
>> > >> > diff --git a/src/glsl/nir/nir_intrinsics.h 
>> > >> > b/src/glsl/nir/nir_intrinsics.h
>> > >> > index f264f55..83eeecd 100644
>> > >> > --- a/src/glsl/nir/nir_intrinsics.h
>> > >> > +++ b/src/glsl/nir/nir_intrinsics.h
>> > >> > @@ -176,12 +176,12 @@ LOAD(input, 0, NIR_INTRINSIC_CAN_ELIMINATE | 
>> > >> > NIR_INTRINSIC_CAN_REORDER)
>> > >> >   * offset.
>> > >> >   */
>> > >> >
>> > >> > -#define STORE(name, num_indices, flags) \
>> > >> > -   INTRINSIC(store_##name, 1, ARR(0), false, 0, 0, num_indices, 
>> > >> > flags) \
>> > >> > -   INTRINSIC(store_##name##_indirect, 2, ARR(0, 1), false, 0, 0, \
>> > >> > +#define STORE(name, extra_srcs, num_indices, flags) \
>> > >> > +   INTRINSIC(store_##name, extra_srcs, ARR(0, 1), false, 0, 0, 
>> > >> > num_indices, flags) \
>> > >> > +   INTRINSIC(store_##name##_indirect, extra_srcs + 1, ARR(0, 1, 1), 
>> > >> > false, 0, 0, \
>> > >> >   num_indices, flags) \
>> > >> >
>> > >> > -STORE(output, 1, 0)
>> > >> > -/* STORE(ssbo, 2, 0) */
>> > >> > +STORE(output, 1, 2, 0)
>> > >> > +STORE(ssbo, 2, 2, 0)
>> > >>
>> > >> I don't think outputs should have any extra sources, since they only
>> > >> take a constant index, plus possibly an indirect source that's already
>> > >> covered by the STORE macro. SSBO stores should only have one extra
>> > >> source for the block index. Also, we should update the comment above
>> > >> to explain this similarly to the paragraph above the loads.
>> > >
>> > > SSBO stores need an extra source for the block index and an extra index
>> > > for a writemask.
>> > >
>> > > I'll leave the STORE() macro as it was and just define SSBO stores using
>> > > INTRINSIC() directly then.
>> >
>> > Ok, I see. I don't think you need a separate INTRINSIC(), but right
>> >

Re: [Mesa-dev] [PATCH 06/13] mesa: Replace sampler object locks with atomic inc/dec.

2015-08-07 Thread Ian Romanick
I know we've talked about this about 100 times, but something in the
back of my mind tells me that we have a pre-existing race.  What happens
if the p_atomic_dec_zero happens on thread A while thread B is between
the _mesa_lookup_renderbuffer call and the _mesa_reference_renderbuffer
call on the same object?  Won't thread A free the memory out from under
thread B?

On 08/06/2015 05:10 PM, Matt Turner wrote:
> ---
>  src/mesa/main/mtypes.h |  1 -
>  src/mesa/main/samplerobj.c | 16 +++-
>  2 files changed, 3 insertions(+), 14 deletions(-)
> 
> diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
> index 2adfae2..fcc527f 100644
> --- a/src/mesa/main/mtypes.h
> +++ b/src/mesa/main/mtypes.h
> @@ -1153,7 +1153,6 @@ typedef enum
>   */
>  struct gl_sampler_object
>  {
> -   mtx_t Mutex;
> GLuint Name;
> GLint RefCount;
> GLchar *Label;   /**< GL_KHR_debug */
> diff --git a/src/mesa/main/samplerobj.c b/src/mesa/main/samplerobj.c
> index c7b9666..dba2087 100644
> --- a/src/mesa/main/samplerobj.c
> +++ b/src/mesa/main/samplerobj.c
> @@ -38,6 +38,7 @@
>  #include "main/macros.h"
>  #include "main/mtypes.h"
>  #include "main/samplerobj.h"
> +#include "util/u_atomic.h"
>  
>  
>  struct gl_sampler_object *
> @@ -85,16 +86,9 @@ _mesa_reference_sampler_object_(struct gl_context *ctx,
>  
> if (*ptr) {
>/* Unreference the old sampler */
> -  GLboolean deleteFlag = GL_FALSE;
>struct gl_sampler_object *oldSamp = *ptr;
>  
> -  mtx_lock(&oldSamp->Mutex);
> -  assert(oldSamp->RefCount > 0);
> -  oldSamp->RefCount--;
> -  deleteFlag = (oldSamp->RefCount == 0);
> -  mtx_unlock(&oldSamp->Mutex);
> -
> -  if (deleteFlag) {
> +  if (p_atomic_dec_zero(&oldSamp->RefCount)) {
>assert(ctx->Driver.DeleteSamplerObject);
>   ctx->Driver.DeleteSamplerObject(ctx, oldSamp);
>}
> @@ -105,7 +99,6 @@ _mesa_reference_sampler_object_(struct gl_context *ctx,
>  
> if (samp) {
>/* reference new sampler */
> -  mtx_lock(&samp->Mutex);
>if (samp->RefCount == 0) {
>   /* this sampler's being deleted (look just above) */
>   /* Not sure this can every really happen.  Warn if it does. */
> @@ -113,10 +106,9 @@ _mesa_reference_sampler_object_(struct gl_context *ctx,
>   *ptr = NULL;
>}
>else {
> - samp->RefCount++;
> + p_atomic_inc(&samp->RefCount);
>   *ptr = samp;
>}
> -  mtx_unlock(&samp->Mutex);
> }
>  }
>  
> @@ -127,7 +119,6 @@ _mesa_reference_sampler_object_(struct gl_context *ctx,
>  static void
>  _mesa_init_sampler_object(struct gl_sampler_object *sampObj, GLuint name)
>  {
> -   mtx_init(&sampObj->Mutex, mtx_plain);
> sampObj->Name = name;
> sampObj->RefCount = 1;
> sampObj->WrapS = GL_REPEAT;
> @@ -170,7 +161,6 @@ static void
>  _mesa_delete_sampler_object(struct gl_context *ctx,
>  struct gl_sampler_object *sampObj)
>  {
> -   mtx_destroy(&sampObj->Mutex);
> free(sampObj->Label);
> free(sampObj);
>  }
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 10/13] hash: Add _mesa_HashRemoveLocked() function.

2015-08-07 Thread Matt Turner
On Fri, Aug 7, 2015 at 1:57 AM, Timothy Arceri  wrote:
> On Thu, 2015-08-06 at 17:11 -0700, Matt Turner wrote:
>> ---
>>  src/mesa/main/hash.c | 19 +++
>>  src/mesa/main/hash.h |  2 ++
>>  2 files changed, 17 insertions(+), 4 deletions(-)
>>
>> diff --git a/src/mesa/main/hash.c b/src/mesa/main/hash.c
>> index 315b5d6..aa1c6a1 100644
>> --- a/src/mesa/main/hash.c
>> +++ b/src/mesa/main/hash.c
>> @@ -328,8 +328,8 @@ _mesa_HashInsert(struct _mesa_HashTable *table, GLuint
>> key, void *data)
>>   * While holding the hash table's lock, searches the entry with the
>> matching
>>   * key and unlinks it.
>>   */
>> -void
>> -_mesa_HashRemove(struct _mesa_HashTable *table, GLuint key)
>> +static inline void
>> +_mesa_HashRemove_unlocked(struct _mesa_HashTable *table, GLuint key)
>
> Any reason why this isnt just _mesa_HashRemoveUnlocked to match the naming of
> locked?

Yes -- it's static. Also because the convention is 
will perform locking and Locked will not.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 10/13] hash: Add _mesa_HashRemoveLocked() function.

2015-08-07 Thread Matt Turner
On Fri, Aug 7, 2015 at 8:43 AM, Brian Paul  wrote:
> On 08/06/2015 06:11 PM, Matt Turner wrote:
>>
>> ---
>>   src/mesa/main/hash.c | 19 +++
>>   src/mesa/main/hash.h |  2 ++
>>   2 files changed, 17 insertions(+), 4 deletions(-)
>>
>> diff --git a/src/mesa/main/hash.c b/src/mesa/main/hash.c
>> index 315b5d6..aa1c6a1 100644
>> --- a/src/mesa/main/hash.c
>> +++ b/src/mesa/main/hash.c
>> @@ -328,8 +328,8 @@ _mesa_HashInsert(struct _mesa_HashTable *table, GLuint
>> key, void *data)
>>* While holding the hash table's lock, searches the entry with the
>> matching
>>* key and unlinks it.
>>*/
>> -void
>> -_mesa_HashRemove(struct _mesa_HashTable *table, GLuint key)
>> +static inline void
>> +_mesa_HashRemove_unlocked(struct _mesa_HashTable *table, GLuint key)
>
>
> You can drop the _mesa_ prefix on static functions.

Right, I think I named it that because existing static functions in
that file use _mesa_:

_mesa_HashInsert_unlocked
_mesa_HashLookup_unlocked

I can change them all if you like.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 11/13] mesa: Replace uses of Shared->Mutex with hash-table mutexes

2015-08-07 Thread Matt Turner
On Fri, Aug 7, 2015 at 2:09 AM, Timothy Arceri  wrote:
> On Thu, 2015-08-06 at 17:11 -0700, Matt Turner wrote:
>> We were locking the Shared->Mutex and then using calling functions like
>> _mesa_HashInsert that do additional per-hash-table locking internally.
>>
>> Instead just lock each hash-table's mutex and use functions like
>> _mesa_HashInsertLocked and the new _mesa_HashRemoveLocked.
>>
>> In order to do this, we need to remove the locking from
>> _mesa_HashFindFreeKeyBlock since it will always be called with the
>> per-hash-table lock taken.
>> ---
>>  src/mesa/main/arbprogram.c|  7 ++-
>>  src/mesa/main/atifragshader.c |  6 +-
>>  src/mesa/main/bufferobj.c | 26 ++
>>  src/mesa/main/dlist.c |  8 
>>  src/mesa/main/fbobject.c  | 23 ++-
>>  src/mesa/main/hash.c  |  4 
>>  src/mesa/main/samplerobj.c| 23 ++-
>>  src/mesa/main/shaderapi.c | 10 --
>>  src/mesa/main/texobj.c| 14 +-
>>  9 files changed, 74 insertions(+), 47 deletions(-)
>>
>> diff --git a/src/mesa/main/arbprogram.c b/src/mesa/main/arbprogram.c
>> index f474951..3f7acda 100644
>> --- a/src/mesa/main/arbprogram.c
>> +++ b/src/mesa/main/arbprogram.c
>> @@ -200,13 +200,18 @@ _mesa_GenProgramsARB(GLsizei n, GLuint *ids)
>> if (!ids)
>>return;
>>
>> +   _mesa_HashLockMutex(ctx->Shared->Programs);
>> +
>> first = _mesa_HashFindFreeKeyBlock(ctx->Shared->Programs, n);
>>
>> /* Insert pointer to dummy program as placeholder */
>> for (i = 0; i < (GLuint) n; i++) {
>> -  _mesa_HashInsert(ctx->Shared->Programs, first + i,
>> &_mesa_DummyProgram);
>> +  _mesa_HashInsertLocked(ctx->Shared->Programs, first + i,
>> + &_mesa_DummyProgram);
>> }
>>
>> +   _mesa_HashUnlockMutex(ctx->Shared->Programs);
>> +
>> /* Return the program names */
>> for (i = 0; i < (GLuint) n; i++) {
>>ids[i] = first + i;
>> diff --git a/src/mesa/main/atifragshader.c b/src/mesa/main/atifragshader.c
>> index 935ba05..9dd4e21 100644
>> --- a/src/mesa/main/atifragshader.c
>> +++ b/src/mesa/main/atifragshader.c
>> @@ -199,11 +199,15 @@ _mesa_GenFragmentShadersATI(GLuint range)
>>return 0;
>> }
>>
>> +   _mesa_HashLockMutex(ctx->Shared->ATIShaders);
>> +
>> first = _mesa_HashFindFreeKeyBlock(ctx->Shared->ATIShaders, range);
>> for (i = 0; i < range; i++) {
>> -  _mesa_HashInsert(ctx->Shared->ATIShaders, first + i, &DummyShader);
>> +  _mesa_HashInsertLocked(ctx->Shared->ATIShaders, first + i,
>> &DummyShader);
>> }
>>
>> +   _mesa_HashUnlockMutex(ctx->Shared->ATIShaders);
>> +
>> return first;
>>  }
>>
>> diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
>> index 78af229..6a995e7 100644
>> --- a/src/mesa/main/bufferobj.c
>> +++ b/src/mesa/main/bufferobj.c
>> @@ -994,8 +994,11 @@ _mesa_lookup_bufferobj(struct gl_context *ctx, GLuint
>> buffer)
>>  struct gl_buffer_object *
>>  _mesa_lookup_bufferobj_locked(struct gl_context *ctx, GLuint buffer)
>>  {
>> -   return (struct gl_buffer_object *)
>> -  _mesa_HashLookupLocked(ctx->Shared->BufferObjects, buffer);
>> +   if (buffer == 0)
>> +  return NULL;
>> +   else
>> +  return (struct gl_buffer_object *)
>> + _mesa_HashLookupLocked(ctx->Shared->BufferObjects, buffer);
>>  }
>>
>>  /**
>> @@ -1179,10 +1182,11 @@ _mesa_DeleteBuffers(GLsizei n, const GLuint *ids)
>>return;
>> }
>>
>> -   mtx_lock(&ctx->Shared->Mutex);
>> +   _mesa_HashLockMutex(ctx->Shared->BufferObjects);
>>
>> for (i = 0; i < n; i++) {
>> -  struct gl_buffer_object *bufObj = _mesa_lookup_bufferobj(ctx,
>> ids[i]);
>> +  struct gl_buffer_object *bufObj =
>> + _mesa_lookup_bufferobj_locked(ctx, ids[i]);
>>if (bufObj) {
>>   struct gl_vertex_array_object *vao = ctx->Array.VAO;
>>   GLuint j;
>> @@ -1276,7 +1280,7 @@ _mesa_DeleteBuffers(GLsizei n, const GLuint *ids)
>>   }
>>
>>   /* The ID is immediately freed for re-use */
>> - _mesa_HashRemove(ctx->Shared->BufferObjects, ids[i]);
>> + _mesa_HashRemoveLocked(ctx->Shared->BufferObjects, ids[i]);
>>   /* Make sure we do not run into the classic ABA problem on bind.
>>* We don't want to allow re-binding a buffer object that's been
>>* "deleted" by glDeleteBuffers().
>> @@ -1292,7 +1296,7 @@ _mesa_DeleteBuffers(GLsizei n, const GLuint *ids)
>>}
>> }
>>
>> -   mtx_unlock(&ctx->Shared->Mutex);
>> +   _mesa_HashUnlockMutex(ctx->Shared->BufferObjects);
>>  }
>>
>>
>> @@ -1326,7 +1330,7 @@ create_buffers(GLsizei n, GLuint *buffers, bool dsa)
>> /*
>>  * This must be atomic (generation and allocation of buffer object IDs)
>>  */
>> -   mtx_lock(&ctx->Shared->Mutex);
>> +   _mesa_HashLockMutex(ctx->Shared->BufferObjects);
>>
>> first = _mesa_HashFindFreeKeyBlock(ctx->Shared->BufferObjects, n);
>>
>>

Re: [Mesa-dev] [PATCH v2 1/3] egl/x11: fix use of EGL_BAD_NATIVE_WINDOW

2015-08-07 Thread Emil Velikov
On 4 August 2015 at 14:32, Frank Binns  wrote:
> Commit 4ed23fd590 introduced some calls to _eglError inappropriately
> passing it EGL_BAD_NATIVE_WINDOW. This was actually harmless in two of the
> cases as _eglError gets called later on with a more appropriate error code
> but (just to be safe) switch these to _eglLog calls instead.
>
> The final case is a little trickier as it actually needs to set an error
> of which the following are available (according to the EGL spec):
> EGL_BAD_MATCH, EGL_BAD_CONFIG, EGL_BAD_NATIVE_(PIXMAP|WINDOW) and
> EGL_BAD_ALLOC.
>
> Of these, EGL_BAD_ALLOC seems to be the most appropriate given that
> failure can occur either as a result of xcb_get_setup failing due to an
> earlier error on the connection (where the most commonly occurring error
> code is XCB_CONN_CLOSED_MEM_INSUFFICIENT) or as a result of the
> xcb_screen_iterator_t 'rem' field being 0.
>
> In addition to this, commit af2aea40d2 unconditionally set the error to
> EGL_BAD_NATIVE_WINDOW when creating a window or pixmap surface with a NULL
> native handle. Change this to correctly set the error based on surface
> type.
>
> v2: Updated patch description (Emil Velikov)
> Return EGL_BAD_NATIVE_PIXMAP when eglCreatePixmapSurface is called
> with a NULL native pixmap handle
>
> Signed-off-by: Frank Binns 
Reviewed-by: Emil Velikov 

Thanks for the update.

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 2/3] egl/x11: set EGL_BAD_NATIVE_(PIXMAP|WINDOW) for invalid pixmaps/windows

2015-08-07 Thread Emil Velikov
On 4 August 2015 at 14:32, Frank Binns  wrote:
> Both eglCreatePixmapSurface and eglCreateWindowSurface were incorrectly
> setting the EGL error to be EGL_BAD_ALLOC when an invalid native drawable
> handle was being passed in. The EGL spec states the following for
> eglCreatePixmapSurface:
>
> "If pixmap is not a valid native pixmap handle, then an EGL_BAD_-
>  NATIVE_PIXMAP error should be generated."
>
> (eglCreateWindowSurface has similar text)
>
> Correctly set the EGL error value based on xcb_get_geometry_reply returning
> an error structure containing something other than BadAlloc.
>
> v2: Check for BadAlloc error and update commit message to reflect this
>
> Signed-off-by: Frank Binns 
Reviewed-by: Emil Velikov 

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH v2 3/3] egl/x11: don't abort when creating a DRI2 drawable fails

2015-08-07 Thread Emil Velikov
On 4 August 2015 at 14:32, Frank Binns  wrote:
> When calling either eglCreateWindowSurface or eglCreatePixmapSurface it
> was possible for an application to be aborted as a result of it failing
> to create a DRI2 drawable on the server. This could happen due to an
> application passing in an invalid native drawable handle, for example.
>
> v2: Handle the case where an error has been set on the connection
>
> Cc: 
> Signed-off-by: Frank Binns 
Reviewed-by: Emil Velikov 

I'll push these in a few days in case others want to comment/object on
the series.

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 1/2] mesa: clear existing swizzle info before bitwise-OR

2015-08-07 Thread Emil Velikov
On 4 August 2015 at 19:39, Oded Gabbay  wrote:
> This patch fixes a bug in big-endian treatment, where the previous
> swizzle info wasn't cleared before a new swizzle info was inserted into
> the format field using a bitwise-OR operation.
>
> v2: use MESA_ARRAY_FORMAT_SWIZZLE_*_MASK instead of numeric constants
> v3: align according to coding style
>
> Signed-off-by: Oded Gabbay 
> CC: "10.5 10.6" 
I believe in the long term we'd might want to find out why things
differ wrt LE platforms.
Regardless this is a good fix imho.

Reviewed-by: Emil Velikov 

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] mesa: _mesa_format_convert should be endian agnostic

2015-08-07 Thread Emil Velikov
On 4 August 2015 at 17:57, Oded Gabbay  wrote:

>
>
> On Mon, Aug 3, 2015 at 6:40 PM, Emil Velikov 
> wrote:
>
>> Hi Oded,
>>
>> On 2 August 2015 at 11:37, Oded Gabbay  wrote:
>> > This patch fixes a bug that is manifested in the read path of mesa when
>> > running on big-endian machines. The effects can be seen when running
>> > piglit sanity test and/or taking a screen capture.
>> >
>> > The bug is caused when _mesa_format_convert receives src_format as
>> > mesa_format, which it thens changes to mesa_array_format. During this
>> > change, it checks for endianness and swaps the bytes accordingly.
>> > However, because the bytes are _already_ swapped in the memory itself
>> > (being written there by llvmpipe), and src_format value matches the
>> > _actual_ contents of the memory, the result of the read is wrong.
>> >
>> I'm assuming that you're looked at swrast + softpipe as well - do they
>> use the same approach or is llvmpipe the odd one out ?
>>
>> ​Hi Emil,
> ​
>
> ​I checked it with swrast, softpipe AND llvmpipe.
> Without my patch, all methods fail piglit sanity on ppc64
> With my patch, all ​methods pass piglit sanity
>
> I've mentioned the other drivers, as your commit message explicitly
mentions llvmpipe alone. Perhaps dropping that hunk or adding the other two
would be ok ? Props for checking all the drivers though !

I'm afraid that I've never looked into the BE codepaths so don't know how
useful of a review I can do here.

Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] mesa: _mesa_format_convert should be endian agnostic

2015-08-07 Thread Emil Velikov
Hi Christian,

On 4 August 2015 at 08:23, Christian Zigotzky  wrote:
> Hi All,
>
> I know the false colors problems. Some Linux programs appear to be ABGR
> instead of RGBA, thus green becomes purple, red becomes light blue etc.
>
> I created a bug report on freedesktop.org. Bug report 72877:
> https://bugs.freedesktop.org/show_bug.cgi?id=72877 and
> http://lists.freedesktop.org/archives/mesa-dev/2013-December/050363.html.
>
> I also created a bug report on bugs.launchpad.net. Bug report #1275042:
> https://bugs.launchpad.net/ubuntu/+source/mesa/+bug/1275042.
>
> Additionally I posted the bug on the Mesa dev mailing list:
> http://lists.freedesktop.org/archives/mesa-dev/2014-March/055510.html.
>
> I also posted it in the ubuntuforums' PPC forum:
> http://ubuntuforums.org/showthread.php?t=2214923 and I posted it on the
> Debian PPC mailing list:
> https://lists.debian.org/debian-powerpc/2014/01/msg3.html.
>
> I figured out what the problem is and I have fixed the problem in the
> MesaLib source code. I released some unofficial Mesa packages. Downloads:
> http://www.supertuxkart-amiga.de/amiga/mesalib-unofficial.html.
>
> I fixed the wrong colors issues in SuperTuxKart. Downloads:
> http://www.supertuxkart-amiga.de/amiga/x1000.html#downloads. But I know, it
> affects a lot of other programs.
>
> Furthermore I posted some information about this problem in the following
> thread: http://forum.hyperion-entertainment.biz/viewtopic.php?f=35&t=2137
>
> There is a second problem with all Radeon HD 7XXX and higher. 3D for these
> requires LLVM to compile the shaders. We compiled the new Mesa versions with
> LLVM, but there was a bug in LLVM that meant it wouldn't work on PPC.
>
> At the time we are still using the unofficial Mesa versions for our NG
> Amigas and Power Macs. Unfortunately these work only with Radeon HD 6XXX and
> lower.
>
I believe you misread why I've CC'ed you.

My intention was to make you aware of this patch so that you can check
(if interested) if it helps or it does/doesn't make things worse for
r600. As you already noticed, Oded is working on software renderers so
he may not be able to test on r600.

On the topic of BE, there has been a few fixes for r100/r200 hardware
recently but that's about it.

Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/5] utils: automake: remove uncommon $()

2015-08-07 Thread Emil Velikov
On 21 July 2015 at 13:12, Emil Velikov  wrote:
> On 20/07/15 21:25, Chad Versace wrote:
>> On Fri 17 Jul 2015, Emil Velikov wrote:
>>> On 17 July 2015 at 19:11, Ilia Mirkin  wrote:
 On Fri, Jul 17, 2015 at 2:11 PM, Eric Anholt  wrote:
> Matt Turner  writes:
>
>> On Fri, Jul 17, 2015 at 10:17 AM, Emil Velikov 
>>  wrote:
>>> Cc: Eric Anholt 
>>> Signed-off-by: Emil Velikov 
>>> ---
>>>  src/util/tests/hash_table/Makefile.am | 3 +--
>>>  1 file changed, 1 insertion(+), 2 deletions(-)
>>>
>>> diff --git a/src/util/tests/hash_table/Makefile.am 
>>> b/src/util/tests/hash_table/Makefile.am
>>> index 04a77e3..0c99e7b 100644
>>> --- a/src/util/tests/hash_table/Makefile.am
>>> +++ b/src/util/tests/hash_table/Makefile.am
>>> @@ -38,7 +38,6 @@ TESTS = \
>>> null_destroy \
>>> random_entry \
>>> remove_null \
>>> -   replacement \
>>> -   $()
>>> +   replacement
>>
>> To get the benefit of $() without some unknown incompatibility, pixman
>> uses $(NULL) which of course relies on not having a variable named
>> NULL.
>>
>> I might suggest that instead of removing them, but I'm not much
>> opposed to removing them either.
>
> I do really like having a terminator on these lists.  I find that
> without them, I'll end up copy-and-pasting the wrong thing and missing
> the trailing backslash on a line.

 Also makes diffs easier to read since you don't have spurious changes
 which just add a \ .
>>
>> I second Anholt. I prefer the sentinel too, but don't feel too strongly
>> about it.
>>
>> I find that a sentinel helps me avoid making mistakes when adding new
>> list members or when sorting the lists in $EDITOR.
>>
> In case it's not (too) obvious I'm nuking these for consistency sake, as
> there are five of these in over 80 makefiles. If people like/prefer them
> can we have a volunteer that adds them everywhere ?
>
Can we have some volunteers, please ?

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 10/13] hash: Add _mesa_HashRemoveLocked() function.

2015-08-07 Thread Brian Paul

On 08/07/2015 11:33 AM, Matt Turner wrote:

On Fri, Aug 7, 2015 at 8:43 AM, Brian Paul  wrote:

On 08/06/2015 06:11 PM, Matt Turner wrote:


---
   src/mesa/main/hash.c | 19 +++
   src/mesa/main/hash.h |  2 ++
   2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/src/mesa/main/hash.c b/src/mesa/main/hash.c
index 315b5d6..aa1c6a1 100644
--- a/src/mesa/main/hash.c
+++ b/src/mesa/main/hash.c
@@ -328,8 +328,8 @@ _mesa_HashInsert(struct _mesa_HashTable *table, GLuint
key, void *data)
* While holding the hash table's lock, searches the entry with the
matching
* key and unlinks it.
*/
-void
-_mesa_HashRemove(struct _mesa_HashTable *table, GLuint key)
+static inline void
+_mesa_HashRemove_unlocked(struct _mesa_HashTable *table, GLuint key)



You can drop the _mesa_ prefix on static functions.


Right, I think I named it that because existing static functions in
that file use _mesa_:

_mesa_HashInsert_unlocked
_mesa_HashLookup_unlocked

I can change them all if you like.


Sure.

-Brian


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] gles/es3.1: Implement glMemoryBarrierByRegion

2015-08-07 Thread Matt Turner
On Tue, Aug 4, 2015 at 1:22 AM, Marta Lofstedt
 wrote:
> From: Marta Lofstedt 
>
> Signed-off-by: Marta Lofstedt 
> ---
>  src/mapi/glapi/gen/gl_API.xml   |  4 
>  src/mesa/main/shaderimage.c | 40 
> +
>  src/mesa/main/shaderimage.h |  3 +++
>  src/mesa/main/tests/dispatch_sanity.cpp |  3 +--
>  4 files changed, 48 insertions(+), 2 deletions(-)
>
> diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml
> index 658efa4..3db4349 100644
> --- a/src/mapi/glapi/gen/gl_API.xml
> +++ b/src/mapi/glapi/gen/gl_API.xml
> @@ -2966,6 +2966,10 @@
>  
>  
>  
> +
> +
> +
> +
>  
>
>  
> diff --git a/src/mesa/main/shaderimage.c b/src/mesa/main/shaderimage.c
> index a348cdb..7337f22 100644
> --- a/src/mesa/main/shaderimage.c
> +++ b/src/mesa/main/shaderimage.c
> @@ -653,3 +653,43 @@ _mesa_MemoryBarrier(GLbitfield barriers)
> if (ctx->Driver.MemoryBarrier)
>ctx->Driver.MemoryBarrier(ctx, barriers);
>  }
> +
> +void GLAPIENTRY
> +_mesa_MemoryBarrierByRegion(GLbitfield barriers)
> +{
> +   GET_CURRENT_CONTEXT(ctx);
> +
> +   GLbitfield all_allowed_bits = GL_ATOMIC_COUNTER_BARRIER_BIT |
> + GL_FRAMEBUFFER_BARRIER_BIT |
> + GL_SHADER_IMAGE_ACCESS_BARRIER_BIT |
> + GL_SHADER_STORAGE_BARRIER_BIT |
> + GL_TEXTURE_FETCH_BARRIER_BIT |
> + GL_UNIFORM_BARRIER_BIT;
> +
> +   if (ctx->Driver.MemoryBarrier) {
> +  /* From section 7.11.2 of the OpenGL ES 3.1 specification:
> +   *
> +   *"When barriers is ALL_BARRIER_BITS, shader memory accesses will 
> be
> +   * synchronized relative to all these barrier bits, but not to 
> other
> +   * barrier bits specific to MemoryBarrier."
> +   *
> +   * That is, if barriers is the special value GL_ALL_BARRIER_BITS, then 
> all
> +   * barriers allowed by glMemoryBarrierByRegion should be activated."
> +   */
> +  if (barriers == GL_ALL_BARRIER_BITS)
> + return ctx->Driver.MemoryBarrier(ctx, all_allowed_bits);
> +
> +  /* From section 7.11.2 of the OpenGL ES 3.1 specification:
> +   *
> +   *"An INVALID_VALUE error is generated if barriers is not the 
> special
> +   * value ALL_BARRIER_BITS, and has any bits set other than those
> +   * described above."
> +   */
> +  if ((barriers & ~all_allowed_bits) != 0) {
> + _mesa_error(ctx, GL_INVALID_VALUE,
> + "glMemoryBarrierByRegion(unsupported barrier bit");
> +  }
> +
> +  ctx->Driver.MemoryBarrier(ctx, barriers);
> +   }

Would probably be nice to put an unreachable("not implemented") as an
else case for future implementors.

Reviewed-by: Matt Turner 

Thanks!
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/5] utils: automake: remove uncommon $()

2015-08-07 Thread Matt Turner
On Fri, Aug 7, 2015 at 11:05 AM, Emil Velikov  wrote:
> On 21 July 2015 at 13:12, Emil Velikov  wrote:
>> On 20/07/15 21:25, Chad Versace wrote:
>>> On Fri 17 Jul 2015, Emil Velikov wrote:
 On 17 July 2015 at 19:11, Ilia Mirkin  wrote:
> On Fri, Jul 17, 2015 at 2:11 PM, Eric Anholt  wrote:
>> Matt Turner  writes:
>>
>>> On Fri, Jul 17, 2015 at 10:17 AM, Emil Velikov 
>>>  wrote:
 Cc: Eric Anholt 
 Signed-off-by: Emil Velikov 
 ---
  src/util/tests/hash_table/Makefile.am | 3 +--
  1 file changed, 1 insertion(+), 2 deletions(-)

 diff --git a/src/util/tests/hash_table/Makefile.am 
 b/src/util/tests/hash_table/Makefile.am
 index 04a77e3..0c99e7b 100644
 --- a/src/util/tests/hash_table/Makefile.am
 +++ b/src/util/tests/hash_table/Makefile.am
 @@ -38,7 +38,6 @@ TESTS = \
 null_destroy \
 random_entry \
 remove_null \
 -   replacement \
 -   $()
 +   replacement
>>>
>>> To get the benefit of $() without some unknown incompatibility, pixman
>>> uses $(NULL) which of course relies on not having a variable named
>>> NULL.
>>>
>>> I might suggest that instead of removing them, but I'm not much
>>> opposed to removing them either.
>>
>> I do really like having a terminator on these lists.  I find that
>> without them, I'll end up copy-and-pasting the wrong thing and missing
>> the trailing backslash on a line.
>
> Also makes diffs easier to read since you don't have spurious changes
> which just add a \ .
>>>
>>> I second Anholt. I prefer the sentinel too, but don't feel too strongly
>>> about it.
>>>
>>> I find that a sentinel helps me avoid making mistakes when adding new
>>> list members or when sorting the lists in $EDITOR.
>>>
>> In case it's not (too) obvious I'm nuking these for consistency sake, as
>> there are five of these in over 80 makefiles. If people like/prefer them
>> can we have a volunteer that adds them everywhere ?
>>
> Can we have some volunteers, please ?

I don't think we should require consistency here.

Just make them $(NULL) like I suggested. That'll keep people who want
the sentinels happy and it'll work for whatever silly version of Make
you're using. :)
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] egl/dri2: Fix include path of u_atomic.h introduced e7e29189

2015-08-07 Thread Alexander von Gluck IV
This was causing a failure to build on SCons due to a missing
-Isrc/egl. Instead of adding in that path, lets just -Isrc/
and include "utils/u_atomic.h".
---
 src/egl/Makefile.am | 1 +
 src/egl/SConscript  | 1 +
 src/egl/drivers/dri2/egl_dri2.c | 2 +-
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/egl/Makefile.am b/src/egl/Makefile.am
index be7bfe9..5c2ba30 100644
--- a/src/egl/Makefile.am
+++ b/src/egl/Makefile.am
@@ -25,6 +25,7 @@ AM_CFLAGS = \
-I$(top_srcdir)/include \
-I$(top_srcdir)/src/egl/main \
-I$(top_srcdir)/src/gbm/main \
+   -I$(top_srcdir)/src \
$(DEFINES) \
$(VISIBILITY_CFLAGS) \
$(LIBDRM_CFLAGS) \
diff --git a/src/egl/SConscript b/src/egl/SConscript
index a7f6282..1b2a427 100644
--- a/src/egl/SConscript
+++ b/src/egl/SConscript
@@ -9,6 +9,7 @@ env = env.Clone()
 env.Append(CPPPATH = [
 '#/include',
 '#/src/egl/main',
+'#/src',
 ])
 
 
diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
index 0290c07..461735f 100644
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -52,7 +52,7 @@
 #endif
 
 #include "egl_dri2.h"
-#include "../util/u_atomic.h"
+#include "util/u_atomic.h"
 
 /* The kernel header drm_fourcc.h defines the DRM formats below.  We duplicate
  * some of the definitions here so that building Mesa won't bleeding-edge
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] egl/scons: Fix every platform getting dri and haiku added e7e29189

2015-08-07 Thread Alexander von Gluck IV
---
 src/egl/SConscript | 17 ++---
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/src/egl/SConscript b/src/egl/SConscript
index 1b2a427..00761e8 100644
--- a/src/egl/SConscript
+++ b/src/egl/SConscript
@@ -15,14 +15,17 @@ env.Append(CPPPATH = [
 
 # parse Makefile.sources
 egl_sources = env.ParseSourceList('Makefile.sources', 'LIBEGL_C_FILES')
-egl_sources.append(env.ParseSourceList('Makefile.sources', 
'dri2_backend_core_FILES'))
 
-env.Append(CPPDEFINES = [
-'_EGL_NATIVE_PLATFORM=_EGL_PLATFORM_HAIKU',
-'_EGL_BUILT_IN_DRIVER_HAIKU',
-'HAVE_HAIKU_PLATFORM',
-])
-egl_sources.append('drivers/haiku/egl_haiku.cpp')
+if env['dri']:
+egl_sources.append(env.ParseSourceList('Makefile.sources', 
'dri2_backend_core_FILES'))
+
+if env['platform'] == 'haiku':
+env.Append(CPPDEFINES = [
+'_EGL_NATIVE_PLATFORM=_EGL_PLATFORM_HAIKU',
+'_EGL_BUILT_IN_DRIVER_HAIKU',
+'HAVE_HAIKU_PLATFORM',
+])
+egl_sources.append('drivers/haiku/egl_haiku.cpp')
 
 egl = env.SharedLibrary(
 target = 'EGL',
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] egl/dri2: Fix include path of u_atomic.h introduced e7e29189

2015-08-07 Thread Matt Turner
On Fri, Aug 7, 2015 at 11:19 AM, Alexander von Gluck IV
 wrote:
> This was causing a failure to build on SCons due to a missing
> -Isrc/egl. Instead of adding in that path, lets just -Isrc/
> and include "utils/u_atomic.h".
> ---

Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Intel-gfx] [PATCH mesa v3] i965/gen8+: bo in state base address must be in 32-bit address range

2015-08-07 Thread Matt Turner
On Fri, Aug 7, 2015 at 2:45 AM, Michel Thierry  wrote:
> diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c 
> b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> index 54081a1..ca90784 100644
> --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> @@ -409,11 +409,23 @@ bool
>  intel_batchbuffer_emit_reloc64(struct brw_context *brw,

This patch needs to be rebased on commit 09348c12f (committed more
than 3 weeks ago).
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] clover: Properly initialize LLVM targets when linking with component libs

2015-08-07 Thread Tom Stellard
Calls to LLVMIntialize* fail when we are linking against individual
component libraries rather than one large shared object, because
we only include component libraries that are required by the drivers.

We need to make sure to only initialize the targets that we need.

CC: 10.6 
---
 configure.ac  |  4 
 src/gallium/state_trackers/clover/Makefile.am |  3 ++-
 src/gallium/state_trackers/clover/llvm/invocation.cpp | 17 +
 3 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/configure.ac b/configure.ac
index 36197d3..e1a7d7a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2040,8 +2040,10 @@ require_egl_drm() {
 radeon_llvm_check() {
 if test ${LLVM_VERSION_INT} -lt 307; then
 amdgpu_llvm_target_name='r600'
+   CLOVER_CPP_FLAGS="${CLOVER_CPP_FLAGS} -DCLOVER_INIT_R600_TARGET"
 else
 amdgpu_llvm_target_name='amdgpu'
+   CLOVER_CPP_FLAGS="${CLOVER_CPP_FLAGS} -DCLOVER_INIT_AMDGPU_TARGET"
 fi
 if test "x$enable_gallium_llvm" != "xyes"; then
 AC_MSG_ERROR([--enable-gallium-llvm is required when building $1])
@@ -2285,6 +2287,8 @@ AC_SUBST([XA_MINOR], $XA_MINOR)
 AC_SUBST([XA_TINY], $XA_TINY)
 AC_SUBST([XA_VERSION], "$XA_MAJOR.$XA_MINOR.$XA_TINY")
 
+AC_SUBST([CLOVER_CPP_FLAGS], $CLOVER_CPP_FLAGS)
+
 dnl Restore LDFLAGS and CPPFLAGS
 LDFLAGS="$_SAVE_LDFLAGS"
 CPPFLAGS="$_SAVE_CPPFLAGS"
diff --git a/src/gallium/state_trackers/clover/Makefile.am 
b/src/gallium/state_trackers/clover/Makefile.am
index fd0ccf8..975b36f 100644
--- a/src/gallium/state_trackers/clover/Makefile.am
+++ b/src/gallium/state_trackers/clover/Makefile.am
@@ -45,7 +45,8 @@ libclllvm_la_CXXFLAGS = \
$(DEFINES) \
-DLIBCLC_INCLUDEDIR=\"$(LIBCLC_INCLUDEDIR)/\" \
-DLIBCLC_LIBEXECDIR=\"$(LIBCLC_LIBEXECDIR)/\" \
-   -DCLANG_RESOURCE_DIR=\"$(CLANG_RESOURCE_DIR)\"
+   -DCLANG_RESOURCE_DIR=\"$(CLANG_RESOURCE_DIR)\" \
+   $(CLOVER_CPP_FLAGS)
 
 libclllvm_la_SOURCES = $(LLVM_SOURCES)
 
diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp 
b/src/gallium/state_trackers/clover/llvm/invocation.cpp
index 86859af..361a149 100644
--- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
+++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
@@ -786,10 +786,19 @@ namespace {
init_targets() {
   static bool targets_initialized = false;
   if (!targets_initialized) {
- LLVMInitializeAllTargets();
- LLVMInitializeAllTargetInfos();
- LLVMInitializeAllTargetMCs();
- LLVMInitializeAllAsmPrinters();
+#ifdef CLOVER_INIT_AMDGPU_TARGET
+ LLVMInitializeAMDGPUTarget();
+ LLVMInitializeAMDGPUTargetInfo();
+ LLVMInitializeAMDGPUTargetMC();
+ LLVMInitializeAMDGPUAsmPrinter();
+#endif
+
+#ifdef CLOVER_INIT_R600_TARGET
+ LLVMInitializeR600Target();
+ LLVMInitializeR600TargetInfo();
+ LLVMInitializeR600TargetMC();
+ LLVMInitializeR600AsmPrinter();
+#endif
  targets_initialized = true;
   }
}
-- 
2.0.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] mesa: _mesa_format_convert should be endian agnostic

2015-08-07 Thread Oded Gabbay
On Fri, Aug 7, 2015 at 8:55 PM, Emil Velikov  wrote:
> On 4 August 2015 at 17:57, Oded Gabbay  wrote:
>>
>>
>>
>> On Mon, Aug 3, 2015 at 6:40 PM, Emil Velikov 
>> wrote:
>>>
>>> Hi Oded,
>>>
>>> On 2 August 2015 at 11:37, Oded Gabbay  wrote:
>>> > This patch fixes a bug that is manifested in the read path of mesa when
>>> > running on big-endian machines. The effects can be seen when running
>>> > piglit sanity test and/or taking a screen capture.
>>> >
>>> > The bug is caused when _mesa_format_convert receives src_format as
>>> > mesa_format, which it thens changes to mesa_array_format. During this
>>> > change, it checks for endianness and swaps the bytes accordingly.
>>> > However, because the bytes are _already_ swapped in the memory itself
>>> > (being written there by llvmpipe), and src_format value matches the
>>> > _actual_ contents of the memory, the result of the read is wrong.
>>> >
>>> I'm assuming that you're looked at swrast + softpipe as well - do they
>>> use the same approach or is llvmpipe the odd one out ?
>>>
>> Hi Emil,
>>
>> I checked it with swrast, softpipe AND llvmpipe.
>> Without my patch, all methods fail piglit sanity on ppc64
>> With my patch, all methods pass piglit sanity
>>
> I've mentioned the other drivers, as your commit message explicitly mentions
> llvmpipe alone. Perhaps dropping that hunk or adding the other two would be
> ok ? Props for checking all the drivers though !
>
Of course, thanks for reminding me to add it to the commit message.
I'll resend the patch shortly.


> I'm afraid that I've never looked into the BE codepaths so don't know how
> useful of a review I can do here.
So maybe just acked-by ?

Oded

>
> Emil
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 2/2] mesa: _mesa_format_convert should be endian agnostic

2015-08-07 Thread Oded Gabbay
This patch fixes a bug that is manifested in the read path of mesa when
running on big-endian machines. The effects can be seen when running
piglit sanity test and/or taking a screen capture.

The bug is caused when _mesa_format_convert receives src_format as
mesa_format, which it thens changes to mesa_array_format. During this
change, it checks for endianness and swaps the bytes accordingly.
However, because the bytes are _already_ swapped in the memory itself
(being written there by llvmpipe/softpipe/sw rast), and src_format
value matches the _actual_ contents of the memory, the result of the
read is wrong.

Therefore, because other layers/functions, such as llvm or
read_rgba_pixels() takes care whether we are dealing with big-endian or
little-endian, _mesa_format_convert should be endian agnostic to avoid
duplicate swapping of bytes.

btw, it is also mentioned in the comment of the function that it doesn't
handle byte-swapping, so the original code contradicts the documentation.

Signed-off-by: Oded Gabbay 
CC: "10.5 10.6" 
Signed-off-by: Oded Gabbay 
---
 src/mesa/main/formats.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/mesa/main/formats.c b/src/mesa/main/formats.c
index baeb1bf..7a41cb0 100644
--- a/src/mesa/main/formats.c
+++ b/src/mesa/main/formats.c
@@ -372,10 +372,8 @@ uint32_t
 _mesa_format_to_array_format(mesa_format format)
 {
const struct gl_format_info *info = _mesa_get_format_info(format);
-   if (_mesa_little_endian())
-  return info->ArrayFormat;
-   else
-  return _mesa_array_format_flip_channels(info->ArrayFormat);
+
+   return info->ArrayFormat;
 }
 
 static struct hash_table *format_array_format_table;
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 2/2] mesa: _mesa_format_convert should be endian agnostic

2015-08-07 Thread Matt Turner
On Fri, Aug 7, 2015 at 12:24 PM, Oded Gabbay  wrote:
> This patch fixes a bug that is manifested in the read path of mesa when
> running on big-endian machines. The effects can be seen when running
> piglit sanity test and/or taking a screen capture.
>
> The bug is caused when _mesa_format_convert receives src_format as
> mesa_format, which it thens changes to mesa_array_format. During this
> change, it checks for endianness and swaps the bytes accordingly.
> However, because the bytes are _already_ swapped in the memory itself
> (being written there by llvmpipe/softpipe/sw rast), and src_format
> value matches the _actual_ contents of the memory, the result of the
> read is wrong.
>
> Therefore, because other layers/functions, such as llvm or
> read_rgba_pixels() takes care whether we are dealing with big-endian or
> little-endian, _mesa_format_convert should be endian agnostic to avoid
> duplicate swapping of bytes.
>
> btw, it is also mentioned in the comment of the function that it doesn't
> handle byte-swapping, so the original code contradicts the documentation.
>
> Signed-off-by: Oded Gabbay 
> CC: "10.5 10.6" 
> Signed-off-by: Oded Gabbay 

Multiple S-o-b. Whoops. Just fix before committing. :)
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 2/2] mesa: _mesa_format_convert should be endian agnostic

2015-08-07 Thread Oded Gabbay
On Fri, Aug 7, 2015 at 10:30 PM, Matt Turner  wrote:
> On Fri, Aug 7, 2015 at 12:24 PM, Oded Gabbay  wrote:
>> This patch fixes a bug that is manifested in the read path of mesa when
>> running on big-endian machines. The effects can be seen when running
>> piglit sanity test and/or taking a screen capture.
>>
>> The bug is caused when _mesa_format_convert receives src_format as
>> mesa_format, which it thens changes to mesa_array_format. During this
>> change, it checks for endianness and swaps the bytes accordingly.
>> However, because the bytes are _already_ swapped in the memory itself
>> (being written there by llvmpipe/softpipe/sw rast), and src_format
>> value matches the _actual_ contents of the memory, the result of the
>> read is wrong.
>>
>> Therefore, because other layers/functions, such as llvm or
>> read_rgba_pixels() takes care whether we are dealing with big-endian or
>> little-endian, _mesa_format_convert should be endian agnostic to avoid
>> duplicate swapping of bytes.
>>
>> btw, it is also mentioned in the comment of the function that it doesn't
>> handle byte-swapping, so the original code contradicts the documentation.
>>
>> Signed-off-by: Oded Gabbay 
>> CC: "10.5 10.6" 
>> Signed-off-by: Oded Gabbay 
>
> Multiple S-o-b. Whoops. Just fix before committing. :)
Matt, thanks for catching that :)
Oded
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] egl/dri2: Fix include path of u_atomic.h introduced e7e29189

2015-08-07 Thread Emil Velikov
On 07/08/15 19:19, Alexander von Gluck IV wrote:
> This was causing a failure to build on SCons due to a missing
> -Isrc/egl. Instead of adding in that path, lets just -Isrc/
> and include "utils/u_atomic.h".

Hmm nice one! Before the commit dri2 wasn't attempted on Haiku. With the
patch one, I've added it by mistake causing this problem.
Seems that 2/2 should come first (in a shorter form) and this one ought
to be second.

Patch looks great, just a small request - please reword the commit
message a bit. I'm thinking of the following but feel free to tweak to
your liking.

"egl/dri2: straighten u_atomic.h include

Earlier commit e7e29189e27(scons: rework the EGL build) squashed the
different SConscripts, unintentionally adding dri2 sources for the haiku
build (fixed with previous commit).

That did point out that the way we include u_atomics.h is not so robust.
Add $(top)/src and fix the include.
"

> ---
>  src/egl/Makefile.am | 1 +
>  src/egl/SConscript  | 1 +
>  src/egl/drivers/dri2/egl_dri2.c | 2 +-
>  3 files changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/src/egl/Makefile.am b/src/egl/Makefile.am
> index be7bfe9..5c2ba30 100644
> --- a/src/egl/Makefile.am
> +++ b/src/egl/Makefile.am
> @@ -25,6 +25,7 @@ AM_CFLAGS = \
>   -I$(top_srcdir)/include \
>   -I$(top_srcdir)/src/egl/main \
>   -I$(top_srcdir)/src/gbm/main \
> + -I$(top_srcdir)/src \
>   $(DEFINES) \
>   $(VISIBILITY_CFLAGS) \
>   $(LIBDRM_CFLAGS) \
> diff --git a/src/egl/SConscript b/src/egl/SConscript
> index a7f6282..1b2a427 100644
> --- a/src/egl/SConscript
> +++ b/src/egl/SConscript
> @@ -9,6 +9,7 @@ env = env.Clone()
>  env.Append(CPPPATH = [
>  '#/include',
>  '#/src/egl/main',
> +'#/src',
After we swap the order and this becomes patch 2/2 we can drop this change.

With that the patch is
Reviewed-by: Emil Velikov 

Thanks!
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] egl/scons: Fix every platform getting dri and haiku added e7e29189

2015-08-07 Thread Emil Velikov
Ouch seems like a git rebase slip, as I was nuking the final dri scons
target around the same time.

On 07/08/15 19:19, Alexander von Gluck IV wrote:> ---
>  src/egl/SConscript | 17 ++---
>  1 file changed, 10 insertions(+), 7 deletions(-)
> 
> diff --git a/src/egl/SConscript b/src/egl/SConscript
> index 1b2a427..00761e8 100644
> --- a/src/egl/SConscript
> +++ b/src/egl/SConscript
> @@ -15,14 +15,17 @@ env.Append(CPPPATH = [
>  
>  # parse Makefile.sources
>  egl_sources = env.ParseSourceList('Makefile.sources', 'LIBEGL_C_FILES')
> -egl_sources.append(env.ParseSourceList('Makefile.sources', 
> 'dri2_backend_core_FILES'))
>  
As this Sconscript is included only for haiku (see src/SConscript) we
can remove only the above line, and keep everything else intact. The
commit message could use an update though.

To keep make things easier would be better to use this as 1/2 of the series.

Sorry for breaking things. I tried really hard to avoid that but I seems
to have missed a line.

With the above suggestions
Reviewed-by: Emil Velikov 

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] gles/es3.1: Implement glMemoryBarrierByRegion

2015-08-07 Thread Ilia Mirkin
On Fri, Aug 7, 2015 at 2:18 PM, Matt Turner  wrote:
> On Tue, Aug 4, 2015 at 1:22 AM, Marta Lofstedt
>  wrote:
>> From: Marta Lofstedt 
>>
>> Signed-off-by: Marta Lofstedt 
>> ---
>>  src/mapi/glapi/gen/gl_API.xml   |  4 
>>  src/mesa/main/shaderimage.c | 40 
>> +
>>  src/mesa/main/shaderimage.h |  3 +++
>>  src/mesa/main/tests/dispatch_sanity.cpp |  3 +--
>>  4 files changed, 48 insertions(+), 2 deletions(-)
>>
>> diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml
>> index 658efa4..3db4349 100644
>> --- a/src/mapi/glapi/gen/gl_API.xml
>> +++ b/src/mapi/glapi/gen/gl_API.xml
>> @@ -2966,6 +2966,10 @@
>>  
>>  
>>  
>> +
>> +
>> +
>> +
>>  
>>
>>  
>> diff --git a/src/mesa/main/shaderimage.c b/src/mesa/main/shaderimage.c
>> index a348cdb..7337f22 100644
>> --- a/src/mesa/main/shaderimage.c
>> +++ b/src/mesa/main/shaderimage.c
>> @@ -653,3 +653,43 @@ _mesa_MemoryBarrier(GLbitfield barriers)
>> if (ctx->Driver.MemoryBarrier)
>>ctx->Driver.MemoryBarrier(ctx, barriers);
>>  }
>> +
>> +void GLAPIENTRY
>> +_mesa_MemoryBarrierByRegion(GLbitfield barriers)
>> +{
>> +   GET_CURRENT_CONTEXT(ctx);
>> +
>> +   GLbitfield all_allowed_bits = GL_ATOMIC_COUNTER_BARRIER_BIT |
>> + GL_FRAMEBUFFER_BARRIER_BIT |
>> + GL_SHADER_IMAGE_ACCESS_BARRIER_BIT |
>> + GL_SHADER_STORAGE_BARRIER_BIT |
>> + GL_TEXTURE_FETCH_BARRIER_BIT |
>> + GL_UNIFORM_BARRIER_BIT;
>> +
>> +   if (ctx->Driver.MemoryBarrier) {
>> +  /* From section 7.11.2 of the OpenGL ES 3.1 specification:
>> +   *
>> +   *"When barriers is ALL_BARRIER_BITS, shader memory accesses will 
>> be
>> +   * synchronized relative to all these barrier bits, but not to 
>> other
>> +   * barrier bits specific to MemoryBarrier."
>> +   *
>> +   * That is, if barriers is the special value GL_ALL_BARRIER_BITS, 
>> then all
>> +   * barriers allowed by glMemoryBarrierByRegion should be activated."
>> +   */
>> +  if (barriers == GL_ALL_BARRIER_BITS)
>> + return ctx->Driver.MemoryBarrier(ctx, all_allowed_bits);
>> +
>> +  /* From section 7.11.2 of the OpenGL ES 3.1 specification:
>> +   *
>> +   *"An INVALID_VALUE error is generated if barriers is not the 
>> special
>> +   * value ALL_BARRIER_BITS, and has any bits set other than those
>> +   * described above."
>> +   */
>> +  if ((barriers & ~all_allowed_bits) != 0) {
>> + _mesa_error(ctx, GL_INVALID_VALUE,
>> + "glMemoryBarrierByRegion(unsupported barrier bit");
>> +  }
>> +
>> +  ctx->Driver.MemoryBarrier(ctx, barriers);
>> +   }
>
> Would probably be nice to put an unreachable("not implemented") as an
> else case for future implementors.
>
> Reviewed-by: Matt Turner 

I wonder if this shouldn't just be

if (!ctx->Driver.MemoryBarrier)
  INVALID_OPERATION

But this is largely hypothetical... I'm not too worried about it.

>
> Thanks!
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] egl/scons: Fix every platform getting dri and haiku added e7e29189

2015-08-07 Thread Alexander von Gluck IV

On 2015-08-07 14:49, Emil Velikov wrote:

Ouch seems like a git rebase slip, as I was nuking the final dri scons
target around the same time.

On 07/08/15 19:19, Alexander von Gluck IV wrote:> ---

 src/egl/SConscript | 17 ++---
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/src/egl/SConscript b/src/egl/SConscript
index 1b2a427..00761e8 100644
--- a/src/egl/SConscript
+++ b/src/egl/SConscript
@@ -15,14 +15,17 @@ env.Append(CPPPATH = [

 # parse Makefile.sources
 egl_sources = env.ParseSourceList('Makefile.sources', 
'LIBEGL_C_FILES')
-egl_sources.append(env.ParseSourceList('Makefile.sources', 
'dri2_backend_core_FILES'))



As this Sconscript is included only for haiku (see src/SConscript) we
can remove only the above line, and keep everything else intact. The
commit message could use an update though.

To keep make things easier would be better to use this as 1/2 of the 
series.


Sorry for breaking things. I tried really hard to avoid that but I 
seems

to have missed a line.

With the above suggestions
Reviewed-by: Emil Velikov 


ah.. I pushed 1/2 up as-is after Matt's ack before your review.

Are we sure we want to completely drop support for scons building EGL
on all platforms except Haiku?

I do plan on fixing Mesa's autotools build under Haiku... but every
time I start in I get a pretty bad headache on how messy autotools is.
(scons has spoiled me :-) )

 -- Alex
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH mesa v3] i965/gen8+: bo in state base address must be in 32-bit address range

2015-08-07 Thread Ilia Mirkin
On Fri, Aug 7, 2015 at 5:45 AM, Michel Thierry  wrote:
> Gen8+ supports 48-bit virtual addresses, but some objects must always be
> allocated inside the 32-bit address range.
>
> In specific, any resource used with flat/heapless (0x-0xf000)
> General State Heap or Intruction State Heap must be in a 32-bit range
> (GSH / ISH), because the General State Offset and Instruction State Offset
> are limited to 32-bits.
>
> Use drm_intel_bo_emit_reloc_48bit when the 4GB limit is not necessary, and
> the bo can be in the full address space.
>
> This commit introduces a dependency of libdrm 2.4.63, which introduces the
> drm_intel_bo_emit_reloc_48bit function.
>
> v2: s/48baddress/48b_address/,
> Only use in OUT_RELOC64 cases, OUT_RELOC implies a 32-bit address offset
> is needed (Ben)
> v3: Added OUT_RELOC64_INSIDE_4G, so it stands out when a 64-bit relocation
> needs the 32-bit workaround (Chris)
>
> References: 
> http://lists.freedesktop.org/archives/intel-gfx/2015-July/072612.html
> Cc: Ben Widawsky 
> Cc: Chris Wilson 
> Signed-off-by: Michel Thierry 
> ---
>  configure.ac  |  2 +-
>  src/mesa/drivers/dri/i965/gen8_misc_state.c   | 19 +++
>  src/mesa/drivers/dri/i965/intel_batchbuffer.c | 20 
>  src/mesa/drivers/dri/i965/intel_batchbuffer.h | 10 --
>  4 files changed, 36 insertions(+), 15 deletions(-)
>
> diff --git a/configure.ac b/configure.ac
> index af61aa2..c92ca44 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -68,7 +68,7 @@ AC_SUBST([OSMESA_VERSION])
>  dnl Versions for external dependencies
>  LIBDRM_REQUIRED=2.4.38
>  LIBDRM_RADEON_REQUIRED=2.4.56
> -LIBDRM_INTEL_REQUIRED=2.4.60
> +LIBDRM_INTEL_REQUIRED=2.4.63

There is no such version. I think you need a release before you can
commit this. Otherwise you'll cause pain for a whole lot of people.

>  LIBDRM_NVVIEUX_REQUIRED=2.4.33
>  LIBDRM_NOUVEAU_REQUIRED="2.4.33 libdrm >= 2.4.41"
>  LIBDRM_FREEDRENO_REQUIRED=2.4.57
> diff --git a/src/mesa/drivers/dri/i965/gen8_misc_state.c 
> b/src/mesa/drivers/dri/i965/gen8_misc_state.c
> index b20038e..73eba06 100644
> --- a/src/mesa/drivers/dri/i965/gen8_misc_state.c
> +++ b/src/mesa/drivers/dri/i965/gen8_misc_state.c
> @@ -28,6 +28,10 @@
>
>  /**
>   * Define the base addresses which some state is referenced from.
> + *
> + * Use OUT_RELOC64_INSIDE_4G instead of OUT_RELOC64, the General State
> + * Offset and Instruction State Offset are limited to 32-bits by hardware,
> + * and must be located in the first 4GBs (32-bit offset).
>   */
>  void gen8_upload_state_base_address(struct brw_context *brw)
>  {
> @@ -41,19 +45,18 @@ void gen8_upload_state_base_address(struct brw_context 
> *brw)
> OUT_BATCH(0);
> OUT_BATCH(mocs_wb << 16);
> /* Surface state base address: */
> -   OUT_RELOC64(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
> -   mocs_wb << 4 | 1);
> +   OUT_RELOC64_INSIDE_4G(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
> + mocs_wb << 4 | 1);
> /* Dynamic state base address: */
> -   OUT_RELOC64(brw->batch.bo,
> -   I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, 0,
> -   mocs_wb << 4 | 1);
> +   OUT_RELOC64_INSIDE_4G(brw->batch.bo,
> + I915_GEM_DOMAIN_RENDER | 
> I915_GEM_DOMAIN_INSTRUCTION, 0,
> + mocs_wb << 4 | 1);
> /* Indirect object base address: MEDIA_OBJECT data */
> OUT_BATCH(mocs_wb << 4 | 1);
> OUT_BATCH(0);
> /* Instruction base address: shader kernels (incl. SIP) */
> -   OUT_RELOC64(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
> -   mocs_wb << 4 | 1);
> -
> +   OUT_RELOC64_INSIDE_4G(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
> + mocs_wb << 4 | 1);
> /* General state buffer size */
> OUT_BATCH(0xf001);
> /* Dynamic state buffer size */
> diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c 
> b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> index 54081a1..ca90784 100644
> --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> @@ -409,11 +409,23 @@ bool
>  intel_batchbuffer_emit_reloc64(struct brw_context *brw,
> drm_intel_bo *buffer,
> uint32_t read_domains, uint32_t write_domain,
> -  uint32_t delta)
> +   uint32_t delta,
> +   bool support_48bit_offset)
>  {
> -   int ret = drm_intel_bo_emit_reloc(brw->batch.bo, 4*brw->batch.used,
> - buffer, delta,
> - read_domains, write_domain);
> +   int ret;
> +
> +   /* Not all buffers can be allocated outside the first 4GB, and
> +* offset must be limited to 32-bits.
> +*/
> +   if (support_48bit_offset)
> +  drm_intel_bo_emit_reloc_48bit(brw->batch.bo, 4*brw->batch.

Re: [Mesa-dev] [PATCH 2/2] egl/scons: Fix every platform getting dri and haiku added e7e29189

2015-08-07 Thread Emil Velikov
On 7 August 2015 at 20:56, Alexander von Gluck IV  wrote:
> On 2015-08-07 14:49, Emil Velikov wrote:
>>
>> Ouch seems like a git rebase slip, as I was nuking the final dri scons
>> target around the same time.
>>
>> On 07/08/15 19:19, Alexander von Gluck IV wrote:> ---
>>>
>>>  src/egl/SConscript | 17 ++---
>>>  1 file changed, 10 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/src/egl/SConscript b/src/egl/SConscript
>>> index 1b2a427..00761e8 100644
>>> --- a/src/egl/SConscript
>>> +++ b/src/egl/SConscript
>>> @@ -15,14 +15,17 @@ env.Append(CPPPATH = [
>>>
>>>  # parse Makefile.sources
>>>  egl_sources = env.ParseSourceList('Makefile.sources', 'LIBEGL_C_FILES')
>>> -egl_sources.append(env.ParseSourceList('Makefile.sources',
>>> 'dri2_backend_core_FILES'))
>>>
>> As this Sconscript is included only for haiku (see src/SConscript) we
>> can remove only the above line, and keep everything else intact. The
>> commit message could use an update though.
>>
>> To keep make things easier would be better to use this as 1/2 of the
>> series.
>>
>> Sorry for breaking things. I tried really hard to avoid that but I seems
>> to have missed a line.
>>
>> With the above suggestions
>> Reviewed-by: Emil Velikov 
>
>
> ah.. I pushed 1/2 up as-is after Matt's ack before your review.
>
> Are we sure we want to completely drop support for scons building EGL
> on all platforms except Haiku?
>
To drop == has been dropped. The last target that we had was broken
for ~4 years. See commit 78674631a2d(egl: remove the non-haiku scons
build)

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 02/70] i965: Pack read-only booleans into a bitfield

2015-08-07 Thread Chris Wilson
GCC's read access for single bits in a bitfield is reasonable (just a
move + flag comparison), so let's save some cachelines by packing the
write-once/read-many booleans together.

   textdata bss dec hex filename
6490134  191992   26192 6708318  665c5e lib64/i965_dri.so
6491766  191992   26192 6709950  6662be lib64/i965_dri.so

Small inflation due to the extra immediate masks and entirely dubious as
to whether it is worth it.

Signed-off-by: Chris Wilson 
---
 src/mesa/drivers/dri/i965/brw_context.h | 76 +++--
 src/mesa/drivers/dri/i965/brw_device_info.h | 32 ++--
 src/mesa/drivers/dri/i965/intel_screen.h| 18 +++
 3 files changed, 65 insertions(+), 61 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index cd43ac5..62e39be 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1088,16 +1088,6 @@ struct brw_context
 
GLuint stats_wm;
 
-   /**
-* drirc options:
-* @{
-*/
-   bool no_rast;
-   bool always_flush_batch;
-   bool always_flush_cache;
-   bool disable_throttling;
-   bool precompile;
-
driOptionCache optionCache;
/** @} */
 
@@ -1105,36 +1095,33 @@ struct brw_context
 
GLenum reduced_primitive;
 
-   /**
-* Set if we're either a debug context or the INTEL_DEBUG=perf environment
-* variable is set, this is the flag indicating to do expensive work that
-* might lead to a perf_debug() call.
-*/
-   bool perf_debug;
-
uint32_t max_gtt_map_object_size;
 
int gen;
int gt;
 
-   bool is_g4x;
-   bool is_baytrail;
-   bool is_haswell;
-   bool is_cherryview;
-   bool is_broxton;
-
-   bool has_hiz;
-   bool has_separate_stencil;
-   bool must_use_separate_stencil;
-   bool has_llc;
-   bool has_swizzling;
-   bool has_surface_tile_offset;
-   bool has_compr4;
-   bool has_negative_rhw_bug;
-   bool has_pln;
-   bool no_simd8;
-   bool use_rep_send;
-   bool use_resource_streamer;
+   /* So long as we do not frequently write to these booleans, we can
+* pack them into a bitfield with fair efficient (GCC converting the
+* access into a mov + flag test).
+*/
+   bool is_g4x : 1;
+   bool is_baytrail : 1;
+   bool is_haswell : 1;
+   bool is_cherryview : 1;
+   bool is_broxton : 1;
+
+   bool has_hiz : 1;
+   bool has_separate_stencil : 1;
+   bool must_use_separate_stencil : 1;
+   bool has_llc : 1;
+   bool has_swizzling : 1;
+   bool has_surface_tile_offset : 1;
+   bool has_compr4 : 1;
+   bool has_negative_rhw_bug : 1;
+   bool has_pln : 1;
+   bool no_simd8 : 1;
+   bool use_rep_send : 1;
+   bool use_resource_streamer : 1;
 
/**
 * Some versions of Gen hardware don't do centroid interpolation correctly
@@ -1143,7 +1130,24 @@ struct brw_context
 * non-centroid interpolation for unlit pixels, at the expense of two extra
 * fragment shader instructions.
 */
-   bool needs_unlit_centroid_workaround;
+   bool needs_unlit_centroid_workaround : 1;
+
+   /**
+* Set if we're either a debug context or the INTEL_DEBUG=perf environment
+* variable is set, this is the flag indicating to do expensive work that
+* might lead to a perf_debug() call.
+*/
+   bool perf_debug : 1;
+
+   /**
+* drirc options:
+* @{
+*/
+   bool no_rast : 1;
+   bool always_flush_batch : 1;
+   bool always_flush_cache : 1;
+   bool disable_throttling : 1;
+   bool precompile : 1;
 
GLuint NewGLState;
struct {
diff --git a/src/mesa/drivers/dri/i965/brw_device_info.h 
b/src/mesa/drivers/dri/i965/brw_device_info.h
index 2a73e93..b5502b8 100644
--- a/src/mesa/drivers/dri/i965/brw_device_info.h
+++ b/src/mesa/drivers/dri/i965/brw_device_info.h
@@ -30,29 +30,29 @@ struct brw_device_info
int gen; /**< Generation number: 4, 5, 6, 7, ... */
int gt;
 
-   bool is_g4x;
-   bool is_ivybridge;
-   bool is_baytrail;
-   bool is_haswell;
-   bool is_cherryview;
-   bool is_broxton;
+   bool is_g4x : 1;
+   bool is_ivybridge : 1;
+   bool is_baytrail : 1;
+   bool is_haswell : 1;
+   bool is_cherryview : 1;
+   bool is_broxton : 1;
 
-   bool has_hiz_and_separate_stencil;
-   bool must_use_separate_stencil;
+   bool has_hiz_and_separate_stencil : 1;
+   bool must_use_separate_stencil : 1;
 
-   bool has_llc;
+   bool has_llc : 1;
 
-   bool has_pln;
-   bool has_compr4;
-   bool has_surface_tile_offset;
-   bool supports_simd16_3src;
-   bool has_resource_streamer;
+   bool has_pln : 1;
+   bool has_compr4 : 1;
+   bool has_surface_tile_offset : 1;
+   bool supports_simd16_3src : 1;
+   bool has_resource_streamer : 1;
 
/**
 * Quirks:
 *  @{
 */
-   bool has_negative_rhw_bug;
+   bool has_negative_rhw_bug : 1;
 
/**
 * Some versions of Gen hardware don't do centroid interpolation correctly
@@ -61,7 +61,7 @@ struct brw_device_info
 * non-centroid interpolation for unlit pixels, at the expense of two extra
 * fragment shader inst

[Mesa-dev] [PATCH 01/70] util/list: Add convenience functions for moving a list element

2015-08-07 Thread Chris Wilson
Just a couple of functions for removing an element from one list and
adding to another (perhaps even the same list, just at the head or
tail).

Used in future patches.

Signed-off-by: Chris Wilson 
---
 src/util/list.h | 27 +--
 1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/src/util/list.h b/src/util/list.h
index b98ce59..cce1adc 100644
--- a/src/util/list.h
+++ b/src/util/list.h
@@ -55,6 +55,12 @@ static inline void list_inithead(struct list_head *item)
 item->next = item;
 }
 
+static inline void __list_del(struct list_head *item)
+{
+item->prev->next = item->next;
+item->next->prev = item->prev;
+}
+
 static inline void list_add(struct list_head *item, struct list_head *list)
 {
 item->prev = list;
@@ -63,6 +69,12 @@ static inline void list_add(struct list_head *item, struct 
list_head *list)
 list->next = item;
 }
 
+inline static void list_move(struct list_head *from, struct list_head *to)
+{
+   __list_del(from);
+   list_add(from, to);
+}
+
 static inline void list_addtail(struct list_head *item, struct list_head *list)
 {
 item->next = list;
@@ -71,6 +83,12 @@ static inline void list_addtail(struct list_head *item, 
struct list_head *list)
 list->prev = item;
 }
 
+inline static void list_movetail(struct list_head *from, struct list_head *to)
+{
+   __list_del(from);
+   list_addtail(from, to);
+}
+
 static inline void list_replace(struct list_head *from, struct list_head *to)
 {
 to->prev = from->prev;
@@ -81,17 +99,14 @@ static inline void list_replace(struct list_head *from, 
struct list_head *to)
 
 static inline void list_del(struct list_head *item)
 {
-item->prev->next = item->next;
-item->next->prev = item->prev;
+   __list_del(item);
 item->prev = item->next = NULL;
 }
 
 static inline void list_delinit(struct list_head *item)
 {
-item->prev->next = item->next;
-item->next->prev = item->prev;
-item->next = item;
-item->prev = item;
+   __list_del(item);
+   list_inithead(item);
 }
 
 static inline bool list_empty(struct list_head *list)
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] i965: Context-local batch manager

2015-08-07 Thread Chris Wilson
A new round for brw_batch.c now with lots and lots of baby steps
painstakingly individually tested.

Please tear to shreds.
-Chris

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 11/70] i965: Move struct intel_batchbuffer from brw_context.h to brw_batch.h

2015-08-07 Thread Chris Wilson
To ease future transitions.

Signed-off-by: Chris Wilson 
---
 src/mesa/drivers/dri/i965/brw_batch.h   | 31 +++
 src/mesa/drivers/dri/i965/brw_context.h | 31 ---
 2 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_batch.h 
b/src/mesa/drivers/dri/i965/brw_batch.h
index 7268e26..c38b92a 100644
--- a/src/mesa/drivers/dri/i965/brw_batch.h
+++ b/src/mesa/drivers/dri/i965/brw_batch.h
@@ -35,6 +35,37 @@ extern "C" {
 
 typedef drm_intel_bo brw_bo;
 
+enum brw_gpu_ring {
+   UNKNOWN_RING,
+   RENDER_RING,
+   BLT_RING,
+};
+
+struct intel_batchbuffer {
+   /** Current batchbuffer being queued up. */
+   brw_bo *bo;
+   /** Last BO submitted to the hardware.  Used for glFinish(). */
+   brw_bo *last_bo;
+
+#ifdef DEBUG
+   uint16_t emit, total;
+#endif
+   uint16_t reserved_space;
+   uint32_t *map_next;
+   uint32_t *map;
+   uint32_t *cpu_map;
+#define BATCH_SZ (8192*sizeof(uint32_t))
+
+   uint32_t state_batch_offset;
+   enum brw_gpu_ring ring;
+   bool needs_sol_reset;
+
+   struct {
+  uint32_t *map_next;
+  int reloc_count;
+   } saved;
+};
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 7fc65e5..ce0ea94 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -859,37 +859,6 @@ struct brw_query_object {
bool flushed;
 };
 
-enum brw_gpu_ring {
-   UNKNOWN_RING,
-   RENDER_RING,
-   BLT_RING,
-};
-
-struct intel_batchbuffer {
-   /** Current batchbuffer being queued up. */
-   brw_bo *bo;
-   /** Last BO submitted to the hardware.  Used for glFinish(). */
-   brw_bo *last_bo;
-
-#ifdef DEBUG
-   uint16_t emit, total;
-#endif
-   uint16_t reserved_space;
-   uint32_t *map_next;
-   uint32_t *map;
-   uint32_t *cpu_map;
-#define BATCH_SZ (8192*sizeof(uint32_t))
-
-   uint32_t state_batch_offset;
-   enum brw_gpu_ring ring;
-   bool needs_sol_reset;
-
-   struct {
-  uint32_t *map_next;
-  int reloc_count;
-   } saved;
-};
-
 #define BRW_MAX_XFB_STREAMS 4
 
 struct brw_transform_feedback_object {
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 12/70] i965: Rename intel_batchbuffer to brw_batch

2015-08-07 Thread Chris Wilson
In order to reduce future churn, rename the intel_batchbuffer struct.

Signed-off-by: Chris Wilson 
---
 src/mesa/drivers/dri/i965/brw_batch.h | 4 ++--
 src/mesa/drivers/dri/i965/brw_context.h   | 2 +-
 src/mesa/drivers/dri/i965/brw_state_batch.c   | 6 ++
 src/mesa/drivers/dri/i965/intel_batchbuffer.c | 4 ++--
 src/mesa/drivers/dri/i965/intel_batchbuffer.h | 4 ++--
 5 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_batch.h 
b/src/mesa/drivers/dri/i965/brw_batch.h
index c38b92a..c05f9b0 100644
--- a/src/mesa/drivers/dri/i965/brw_batch.h
+++ b/src/mesa/drivers/dri/i965/brw_batch.h
@@ -41,7 +41,7 @@ enum brw_gpu_ring {
BLT_RING,
 };
 
-struct intel_batchbuffer {
+typedef struct brw_batch {
/** Current batchbuffer being queued up. */
brw_bo *bo;
/** Last BO submitted to the hardware.  Used for glFinish(). */
@@ -64,7 +64,7 @@ struct intel_batchbuffer {
   uint32_t *map_next;
   int reloc_count;
} saved;
-};
+} brw_batch;
 
 #ifdef __cplusplus
 }
diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index ce0ea94..cd8ea50 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1023,7 +1023,7 @@ struct brw_context
 */
uint32_t reset_count;
 
-   struct intel_batchbuffer batch;
+   brw_batch batch;
bool no_batch_wrap;
 
struct {
diff --git a/src/mesa/drivers/dri/i965/brw_state_batch.c 
b/src/mesa/drivers/dri/i965/brw_state_batch.c
index d785c89..22dfbe5 100644
--- a/src/mesa/drivers/dri/i965/brw_state_batch.c
+++ b/src/mesa/drivers/dri/i965/brw_state_batch.c
@@ -40,14 +40,12 @@ brw_track_state_batch(struct brw_context *brw,
   int size,
   int index)
 {
-   struct intel_batchbuffer *batch = &brw->batch;
-
if (!brw->state_batch_list) {
   /* Our structs are always aligned to at least 32 bytes, so
* our array doesn't need to be any larger
*/
   brw->state_batch_list = ralloc_size(brw, sizeof(*brw->state_batch_list) *
- batch->bo->size / 32);
+  brw->batch.bo->size / 32);
}
 
brw->state_batch_list[brw->state_batch_count].offset = offset;
@@ -124,7 +122,7 @@ __brw_state_batch(struct brw_context *brw,
   uint32_t *out_offset)
 
 {
-   struct intel_batchbuffer *batch = &brw->batch;
+   brw_batch *batch = &brw->batch;
uint32_t offset;
 
assert(size < batch->bo->size);
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c 
b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index 44512be..ca65390 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -112,7 +112,7 @@ static void
 do_batch_dump(struct brw_context *brw)
 {
struct drm_intel_decode *decode;
-   struct intel_batchbuffer *batch = &brw->batch;
+   brw_batch *batch = &brw->batch;
int ret;
 
decode = drm_intel_decode_context_alloc(brw->intelScreen->deviceID);
@@ -290,7 +290,7 @@ throttle(struct brw_context *brw)
 static int
 do_flush_locked(struct brw_context *brw)
 {
-   struct intel_batchbuffer *batch = &brw->batch;
+   brw_batch *batch = &brw->batch;
int ret = 0;
 
if (brw->has_llc) {
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.h 
b/src/mesa/drivers/dri/i965/intel_batchbuffer.h
index 48d6f5a..5ca3bf3 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.h
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.h
@@ -32,7 +32,7 @@ extern "C" {
  */
 #define BATCH_RESERVED 152
 
-struct intel_batchbuffer;
+struct brw_batch;
 struct brw_context;
 enum brw_gpu_ring;
 
@@ -154,7 +154,7 @@ static inline void
 intel_batchbuffer_advance(struct brw_context *brw)
 {
 #ifdef DEBUG
-   struct intel_batchbuffer *batch = &brw->batch;
+   brw_batch *batch = &brw->batch;
unsigned int _n = USED_BATCH(*batch) - batch->emit;
assert(batch->total != 0);
if (_n != batch->total) {
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 10/70] i965: Subsitute drm_intel_bo with a local name, brw_bo

2015-08-07 Thread Chris Wilson
In preparation for a local batch manager with a new buffer object, first
reduce the churn by renaming the existing buffer objects:
s/drm_intel_bo/brw_bo/

We only have to be careful to leave the global screen drm_intel_bo as
they are.

Signed-off-by: Chris Wilson 
---
 src/mesa/drivers/dri/i965/Makefile.sources |  1 +
 src/mesa/drivers/dri/i965/brw_batch.h  | 42 +
 src/mesa/drivers/dri/i965/brw_context.c|  2 +-
 src/mesa/drivers/dri/i965/brw_context.h| 61 +--
 src/mesa/drivers/dri/i965/brw_draw.c   |  2 +-
 src/mesa/drivers/dri/i965/brw_draw_upload.c|  6 +-
 src/mesa/drivers/dri/i965/brw_object_purgeable.c   |  4 +-
 .../drivers/dri/i965/brw_performance_monitor.c |  6 +-
 src/mesa/drivers/dri/i965/brw_pipe_control.c   |  2 +-
 src/mesa/drivers/dri/i965/brw_program.c|  4 +-
 src/mesa/drivers/dri/i965/brw_queryobj.c   |  4 +-
 src/mesa/drivers/dri/i965/brw_sf_state.c   |  2 +-
 src/mesa/drivers/dri/i965/brw_state_cache.c|  2 +-
 src/mesa/drivers/dri/i965/brw_vs_surface_state.c   |  2 +-
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c   | 24 
 src/mesa/drivers/dri/i965/gen6_queryobj.c  |  8 +--
 src/mesa/drivers/dri/i965/gen7_sol_state.c |  2 +-
 src/mesa/drivers/dri/i965/gen7_wm_surface_state.c  |  2 +-
 src/mesa/drivers/dri/i965/gen8_sol_state.c |  3 +-
 src/mesa/drivers/dri/i965/gen8_surface_state.c |  2 +-
 src/mesa/drivers/dri/i965/intel_batchbuffer.c  | 10 ++--
 src/mesa/drivers/dri/i965/intel_batchbuffer.h  |  4 +-
 src/mesa/drivers/dri/i965/intel_blit.c | 68 +++---
 src/mesa/drivers/dri/i965/intel_blit.h | 34 +--
 src/mesa/drivers/dri/i965/intel_buffer_objects.c   | 14 ++---
 src/mesa/drivers/dri/i965/intel_buffer_objects.h   | 16 ++---
 src/mesa/drivers/dri/i965/intel_fbo.c  |  4 +-
 src/mesa/drivers/dri/i965/intel_fbo.h  |  4 +-
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c  |  6 +-
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h  | 12 ++--
 src/mesa/drivers/dri/i965/intel_pixel_draw.c   |  2 +-
 src/mesa/drivers/dri/i965/intel_pixel_read.c   |  2 +-
 src/mesa/drivers/dri/i965/intel_screen.h   |  6 +-
 src/mesa/drivers/dri/i965/intel_syncobj.c  |  2 +-
 src/mesa/drivers/dri/i965/intel_tex.c  |  6 +-
 src/mesa/drivers/dri/i965/intel_tex_image.c|  4 +-
 src/mesa/drivers/dri/i965/intel_tex_subimage.c |  2 +-
 src/mesa/drivers/dri/i965/intel_upload.c   |  4 +-
 38 files changed, 213 insertions(+), 168 deletions(-)
 create mode 100644 src/mesa/drivers/dri/i965/brw_batch.h

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources 
b/src/mesa/drivers/dri/i965/Makefile.sources
index dfdad75..a007440 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -1,4 +1,5 @@
 i965_FILES = \
+   brw_batch.h \
brw_binding_tables.c \
brw_blorp_blit.cpp \
brw_blorp_blit_eu.cpp \
diff --git a/src/mesa/drivers/dri/i965/brw_batch.h 
b/src/mesa/drivers/dri/i965/brw_batch.h
new file mode 100644
index 000..7268e26
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_batch.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 
THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *Chris Wilson 
+ */
+
+#ifndef BRW_BATCH_H
+#define BRW_BATCH_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include 
+
+typedef drm_intel_bo brw_bo;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* BRW_BATCH_H */
diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index 8e02c69..8b2d006 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -1350,7 +1350,7 @@ intel_process_dri2_buff

[Mesa-dev] [PATCH 05/70] i965: Move the pipelined test for SO register access to the screen

2015-08-07 Thread Chris Wilson
Moving the test to the screen places it alongside the other global HW
feature tesst that want to be shared between contexts.

Signed-off-by: Chris Wilson 
Cc: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/brw_context.c  |  2 +
 src/mesa/drivers/dri/i965/intel_extensions.c | 69 -
 src/mesa/drivers/dri/i965/intel_screen.c | 92 
 src/mesa/drivers/dri/i965/intel_screen.h |  8 +++
 4 files changed, 102 insertions(+), 69 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index ac744d7..e8d1396 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -728,6 +728,8 @@ brwCreateContext(gl_api api,
 
brw->must_use_separate_stencil = screen->hw_must_use_separate_stencil;
brw->has_swizzling = screen->hw_has_swizzling;
+   brw->has_pipelined_so =
+  screen->hw_has_pipelined_register & HW_HAS_PIPELINED_SOL_OFFSET;
 
brw->vs.base.stage = MESA_SHADER_VERTEX;
brw->gs.base.stage = MESA_SHADER_GEOMETRY;
diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c 
b/src/mesa/drivers/dri/i965/intel_extensions.c
index bf8fdae..6346fbc 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -32,74 +32,6 @@
 #include "intel_reg.h"
 #include "utils.h"
 
-/**
- * Test if we can use MI_LOAD_REGISTER_MEM from an untrusted batchbuffer.
- *
- * Some combinations of hardware and kernel versions allow this feature,
- * while others don't.  Instead of trying to enumerate every case, just
- * try and write a register and see if works.
- */
-static bool
-can_do_pipelined_register_writes(struct brw_context *brw)
-{
-   /* Supposedly, Broadwell just works. */
-   if (brw->gen >= 8)
-  return true;
-
-   static int result = -1;
-   if (result != -1)
-  return result;
-
-   /* We use SO_WRITE_OFFSET0 since you're supposed to write it (unlike the
-* statistics registers), and we already reset it to zero before using it.
-*/
-   const int reg = GEN7_SO_WRITE_OFFSET(0);
-   const int expected_value = 0x1337d0d0;
-   const int offset = 100;
-
-   /* The register we picked only exists on Gen7+. */
-   assert(brw->gen == 7);
-
-   uint32_t *data;
-   /* Set a value in a BO to a known quantity.  The workaround BO already
-* exists and doesn't contain anything important, so we may as well use it.
-*/
-   drm_intel_bo_map(brw->workaround_bo, true);
-   data = brw->workaround_bo->virtual;
-   data[offset] = 0x;
-   drm_intel_bo_unmap(brw->workaround_bo);
-
-   /* Write the register. */
-   BEGIN_BATCH(3);
-   OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
-   OUT_BATCH(reg);
-   OUT_BATCH(expected_value);
-   ADVANCE_BATCH();
-
-   brw_emit_mi_flush(brw);
-
-   /* Save the register's value back to the buffer. */
-   BEGIN_BATCH(3);
-   OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2));
-   OUT_BATCH(reg);
-   OUT_RELOC(brw->workaround_bo,
- I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
- offset * sizeof(uint32_t));
-   ADVANCE_BATCH();
-
-   intel_batchbuffer_flush(brw);
-
-   /* Check whether the value got written. */
-   drm_intel_bo_map(brw->workaround_bo, false);
-   data = brw->workaround_bo->virtual;
-   bool success = data[offset] == expected_value;
-   drm_intel_bo_unmap(brw->workaround_bo);
-
-   result = success;
-
-   return success;
-}
-
 static bool
 can_write_oacontrol(struct brw_context *brw)
 {
@@ -328,7 +260,6 @@ intelInitExtensions(struct gl_context *ctx)
   ctx->Extensions.ARB_texture_compression_bptc = true;
   ctx->Extensions.ARB_texture_view = true;
 
-  brw->has_pipelined_so = can_do_pipelined_register_writes(brw);
   if (brw->has_pipelined_so) {
  ctx->Extensions.ARB_draw_indirect = true;
  ctx->Extensions.ARB_transform_feedback2 = true;
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index 61f1dbe..0a64d2b 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -1186,6 +1186,96 @@ intel_detect_timestamp(struct intel_screen *screen)
 }
 
 /**
+ * Test if we can use MI_LOAD_REGISTER_MEM from an untrusted batchbuffer.
+ *
+ * Some combinations of hardware and kernel versions allow this feature,
+ * while others don't.  Instead of trying to enumerate every case, just
+ * try and write a register and see if works.
+ */
+static bool
+intel_detect_pipelined_register(struct intel_screen *screen,
+int reg, uint32_t expected_value, bool reset)
+{
+   drm_intel_bo *results, *bo;
+   uint32_t *batch;
+   uint32_t offset = 0;
+   bool success = false;
+
+   /* Create a zero'ed temporary buffer for reading our results */
+   results = drm_intel_bo_alloc(screen->bufmgr, "registers", 4096, 0);
+   if (results == NULL)
+  goto err;
+
+   bo = drm_intel_bo_alloc(screen->bufmgr, "batchbuffer", 

[Mesa-dev] [PATCH 08/70] i965: Remove early release of DRI2 miptree

2015-08-07 Thread Chris Wilson
intel_update_winsys_renderbuffer_miptree() will release the existing
miptree when wrapping a new DRI2 buffer, so we can remove the early
release and so prevent a NULL mt dereference should importing the new
DRI2 name fail for any reason. (Reusing the old DRI2 name will result
in the rendering going astray, to a stale buffer, and not shown on the
screen, but it allows us to issue a warning and not crash much later in
innocent code.)

Signed-off-by: Chris Wilson 
---
 src/mesa/drivers/dri/i965/brw_context.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index e8d1396..72f3897 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -1388,7 +1388,6 @@ intel_process_dri2_buffer(struct brw_context *brw,
   buffer->cpp, buffer->pitch);
}
 
-   intel_miptree_release(&rb->mt);
bo = drm_intel_bo_gem_create_from_name(brw->bufmgr, buffer_name,
   buffer->name);
if (!bo) {
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 07/70] i965: Combine the multiple pipelined register detection into one round-trip

2015-08-07 Thread Chris Wilson
Combining the multiple access checks into a few batches and a single
serialising read can reduce detection times from around 100us to 70us on
a fast Haswell system.

Signed-off-by: Chris Wilson 
Cc: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/intel_screen.c | 165 +++
 1 file changed, 101 insertions(+), 64 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index 36c7bb2..0b60f13 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -1185,6 +1185,13 @@ intel_detect_timestamp(struct intel_screen *screen)
return 0;
 }
 
+struct detect_pipelined_register {
+   uint32_t reg;
+   uint32_t expected_value;
+   unsigned result;
+   bool reset;
+};
+
 /**
  * Test if we can use MI_LOAD_REGISTER_MEM from an untrusted batchbuffer.
  *
@@ -1192,102 +1199,134 @@ intel_detect_timestamp(struct intel_screen *screen)
  * while others don't.  Instead of trying to enumerate every case, just
  * try and write a register and see if works.
  */
-static bool
-intel_detect_pipelined_register(struct intel_screen *screen,
-int reg, uint32_t expected_value, bool reset)
+static void
+__intel_detect_pipelined_registers(struct intel_screen *screen,
+   struct detect_pipelined_register *r,
+   int count)
 {
-   drm_intel_bo *results, *bo;
-   uint32_t *batch;
-   uint32_t offset = 0;
-   bool success = false;
+   drm_intel_bo *results;
+   int i;
+
+   if (count == 0)
+  return;
 
/* Create a zero'ed temporary buffer for reading our results */
results = drm_intel_bo_alloc(screen->bufmgr, "registers", 4096, 0);
if (results == NULL)
-  goto err;
-
-   bo = drm_intel_bo_alloc(screen->bufmgr, "batchbuffer", 4096, 0);
-   if (bo == NULL)
-  goto err_results;
+  return;
 
-   if (drm_intel_bo_map(bo, 1))
-  goto err_batch;
+   /* Emit each access in a separate batch buffer so that if the kernel
+* rejects an individual access attempt, we don't incorrectly assume
+* all the register accesses are invalid.
+*/
+   for (i = 0; i < count; i++) {
+  drm_intel_bo *bo;
+  uint32_t *batch;
 
-   batch = bo->virtual;
+  bo = drm_intel_bo_alloc(screen->bufmgr, "batchbuffer", 4096, 0);
+  if (bo == NULL)
+ continue;
 
-   /* Write the register. */
-   *batch++ = MI_LOAD_REGISTER_IMM | (3 - 2);
-   *batch++ = reg;
-   *batch++ = expected_value;
+  if (drm_intel_bo_map(bo, 1))
+ goto err_batch;
 
-   /* Save the register's value back to the buffer. */
-   *batch++ = MI_STORE_REGISTER_MEM | (3 - 2);
-   *batch++ = reg;
-   drm_intel_bo_emit_reloc(bo, (char *)batch -(char *)bo->virtual,
-   results, offset*sizeof(uint32_t),
-   I915_GEM_DOMAIN_INSTRUCTION,
-   I915_GEM_DOMAIN_INSTRUCTION);
-   *batch++ = results->offset + offset*sizeof(uint32_t);
+  batch = bo->virtual;
 
-   /* And afterwards clear the register */
-   if (reset) {
+  /* Write the register. */
   *batch++ = MI_LOAD_REGISTER_IMM | (3 - 2);
-  *batch++ = reg;
-  *batch++ = 0;
-   }
+  *batch++ = r[i].reg;
+  *batch++ = r[i].expected_value;
+
+  /* Save the register's value back to the buffer. */
+  *batch++ = MI_STORE_REGISTER_MEM | (3 - 2);
+  *batch++ = r[i].reg;
+  drm_intel_bo_emit_reloc(bo, (char *)batch -(char *)bo->virtual,
+  results, i*sizeof(uint32_t),
+  I915_GEM_DOMAIN_INSTRUCTION,
+  I915_GEM_DOMAIN_INSTRUCTION);
+  *batch++ = results->offset + i*sizeof(uint32_t);
+
+  /* And afterwards clear the register */
+  if (r[i].reset) {
+ *batch++ = MI_LOAD_REGISTER_IMM | (3 - 2);
+ *batch++ = r[i].reg;
+ *batch++ = 0;
+  }
 
-   *batch++ = MI_BATCH_BUFFER_END;
+  *batch++ = MI_BATCH_BUFFER_END;
 
-   drm_intel_bo_mrb_exec(bo, ALIGN((char *)batch - (char *)bo->virtual, 8),
- NULL, 0, 0,
- I915_EXEC_RENDER);
+  drm_intel_bo_mrb_exec(bo, ALIGN((char *)batch - (char *)bo->virtual, 8),
+NULL, 0, 0,
+I915_EXEC_RENDER);
 
-   /* Check whether the value got written. */
+err_batch:
+  drm_intel_bo_unreference(bo);
+   }
+
+   /* Check whether the values got written. */
if (drm_intel_bo_map(results, false) == 0) {
-  success = *((uint32_t *)results->virtual + offset) == expected_value;
+  uint32_t *data = results->virtual;
+  for (i = 0; i < count; i++)
+ if (data[i] == r[i].expected_value)
+screen->hw_has_pipelined_register |= r[i].result;
   drm_intel_bo_unmap(results);
}
 
-err_batch:
-   drm_intel_bo_unreference(bo);
-err_results:
drm_intel_bo_unreference(results);
-err:
-   return 

[Mesa-dev] [PATCH 13/70] i965: Add a couple of utility functions to ref/unref a brw_bo

2015-08-07 Thread Chris Wilson
To further reduce churn when replacing the buffer object implementation,
wrap the existing drm_intel_bo_reference/drm_intel_bo_unreference.

Signed-off-by: Chris Wilson 
---
 src/mesa/drivers/dri/i965/brw_batch.h  | 12 +++
 src/mesa/drivers/dri/i965/brw_context.c| 25 +++---
 src/mesa/drivers/dri/i965/brw_draw.c   | 11 +-
 src/mesa/drivers/dri/i965/brw_draw_upload.c| 10 -
 .../drivers/dri/i965/brw_performance_monitor.c |  8 +++
 src/mesa/drivers/dri/i965/brw_pipe_control.c   |  5 ++---
 src/mesa/drivers/dri/i965/brw_program.c|  4 ++--
 src/mesa/drivers/dri/i965/brw_queryobj.c   | 10 -
 src/mesa/drivers/dri/i965/brw_state_cache.c|  4 ++--
 src/mesa/drivers/dri/i965/brw_vs_surface_state.c   |  2 +-
 src/mesa/drivers/dri/i965/gen6_queryobj.c  |  4 ++--
 src/mesa/drivers/dri/i965/gen6_sol.c   |  4 ++--
 src/mesa/drivers/dri/i965/intel_batchbuffer.c  | 17 ++-
 src/mesa/drivers/dri/i965/intel_buffer_objects.c   | 14 ++--
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c  | 13 ++-
 src/mesa/drivers/dri/i965/intel_screen.c   |  6 +++---
 src/mesa/drivers/dri/i965/intel_syncobj.c  | 10 -
 src/mesa/drivers/dri/i965/intel_upload.c   |  7 +++---
 18 files changed, 78 insertions(+), 88 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_batch.h 
b/src/mesa/drivers/dri/i965/brw_batch.h
index c05f9b0..5745aa4 100644
--- a/src/mesa/drivers/dri/i965/brw_batch.h
+++ b/src/mesa/drivers/dri/i965/brw_batch.h
@@ -66,6 +66,18 @@ typedef struct brw_batch {
} saved;
 } brw_batch;
 
+inline static brw_bo *brw_bo_get(brw_bo *bo)
+{
+   drm_intel_bo_reference(bo);
+   return bo;
+}
+
+inline static void brw_bo_put(brw_bo *bo)
+{
+   if (bo)
+  drm_intel_bo_unreference(bo);
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index 8b2d006..583ce7f 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -929,17 +929,11 @@ intelDestroyContext(__DRIcontext * driContextPriv)
brw_destroy_state(brw);
brw_draw_destroy(brw);
 
-   drm_intel_bo_unreference(brw->curbe.curbe_bo);
-   if (brw->vs.base.scratch_bo)
-  drm_intel_bo_unreference(brw->vs.base.scratch_bo);
-   if (brw->gs.base.scratch_bo)
-  drm_intel_bo_unreference(brw->gs.base.scratch_bo);
-   if (brw->wm.base.scratch_bo)
-  drm_intel_bo_unreference(brw->wm.base.scratch_bo);
-
-   gen7_reset_hw_bt_pool_offsets(brw);
-   drm_intel_bo_unreference(brw->hw_bt_pool.bo);
-   brw->hw_bt_pool.bo = NULL;
+   brw_bo_put(brw->curbe.curbe_bo);
+   brw_bo_put(brw->vs.base.scratch_bo);
+   brw_bo_put(brw->gs.base.scratch_bo);
+   brw_bo_put(brw->wm.base.scratch_bo);
+   brw_bo_put(brw->hw_bt_pool.bo);
 
drm_intel_gem_context_destroy(brw->hw_ctx);
 
@@ -955,10 +949,8 @@ intelDestroyContext(__DRIcontext * driContextPriv)
brw_fini_pipe_control(brw);
intel_batchbuffer_free(brw);
 
-   drm_intel_bo_unreference(brw->throttle_batch[1]);
-   drm_intel_bo_unreference(brw->throttle_batch[0]);
-   brw->throttle_batch[1] = NULL;
-   brw->throttle_batch[0] = NULL;
+   brw_bo_put(brw->throttle_batch[1]);
+   brw_bo_put(brw->throttle_batch[0]);
 
driDestroyOptionCache(&brw->optionCache);
 
@@ -1402,6 +1394,7 @@ intel_process_dri2_buffer(struct brw_context *brw,
intel_update_winsys_renderbuffer_miptree(brw, rb, bo,
 drawable->w, drawable->h,
 buffer->pitch);
+   brw_bo_put(bo);
 
if (brw_is_front_buffer_drawing(fb) &&
(buffer->attachment == __DRI_BUFFER_FRONT_LEFT ||
@@ -1411,8 +1404,6 @@ intel_process_dri2_buffer(struct brw_context *brw,
}
 
assert(rb->mt);
-
-   drm_intel_bo_unreference(bo);
 }
 
 /**
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c 
b/src/mesa/drivers/dri/i965/brw_draw.c
index 6dc0fd8..1aa0093 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -291,7 +291,7 @@ brw_merge_inputs(struct brw_context *brw,
GLuint i;
 
for (i = 0; i < brw->vb.nr_buffers; i++) {
-  drm_intel_bo_unreference(brw->vb.buffers[i].bo);
+  brw_bo_put(brw->vb.buffers[i].bo);
   brw->vb.buffers[i].bo = NULL;
}
brw->vb.nr_buffers = 0;
@@ -482,13 +482,12 @@ brw_try_draw_prims(struct gl_context *ctx,
   brw->draw.gl_basevertex =
  prims[i].indexed ? prims[i].basevertex : prims[i].start;
 
-  drm_intel_bo_unreference(brw->draw.draw_params_bo);
+  brw_bo_put(brw->draw.draw_params_bo);
 
   if (prims[i].is_indirect) {
  /* Point draw_params_bo at the indirect buffer. */
  brw->draw.draw_params_bo =
-intel_buffer_object(ctx->DrawIndirectBuffer)->buffer;
- drm_intel_bo_reference(brw->draw.draw_params_bo);
+   

[Mesa-dev] [PATCH 06/70] i965: Move the OACONTROL pipelined access check from context to screen

2015-08-07 Thread Chris Wilson
Similarly to the pipelined SO_OFFSET check, this moves the global HW
compatability check to the screen next to the other global checks.

Signed-off-by: Chris Wilson 
---
 src/mesa/drivers/dri/i965/intel_extensions.c | 68 +---
 src/mesa/drivers/dri/i965/intel_screen.c | 17 +++
 src/mesa/drivers/dri/i965/intel_screen.h |  3 +-
 3 files changed, 21 insertions(+), 67 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c 
b/src/mesa/drivers/dri/i965/intel_extensions.c
index 6346fbc..e7828c7 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -32,71 +32,6 @@
 #include "intel_reg.h"
 #include "utils.h"
 
-static bool
-can_write_oacontrol(struct brw_context *brw)
-{
-   if (brw->gen < 6 || brw->gen >= 8)
-  return false;
-
-   static int result = -1;
-   if (result != -1)
-  return result;
-
-   /* Set "Select Context ID" to a particular address (which is likely not a
-* context), but leave all counting disabled.  This should be harmless.
-*/
-   const int expected_value = 0x31337000;
-   const int offset = 110;
-
-   uint32_t *data;
-   /* Set a value in a BO to a known quantity.  The workaround BO already
-* exists and doesn't contain anything important, so we may as well use it.
-*/
-   drm_intel_bo_map(brw->workaround_bo, true);
-   data = brw->workaround_bo->virtual;
-   data[offset] = 0x;
-   drm_intel_bo_unmap(brw->workaround_bo);
-
-   /* Write OACONTROL. */
-   BEGIN_BATCH(3);
-   OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
-   OUT_BATCH(OACONTROL);
-   OUT_BATCH(expected_value);
-   ADVANCE_BATCH();
-
-   brw_emit_mi_flush(brw);
-
-   /* Save the register's value back to the buffer. */
-   BEGIN_BATCH(3);
-   OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2));
-   OUT_BATCH(OACONTROL);
-   OUT_RELOC(brw->workaround_bo,
- I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
- offset * sizeof(uint32_t));
-   ADVANCE_BATCH();
-
-   brw_emit_mi_flush(brw);
-
-   /* Set OACONTROL back to zero (everything off). */
-   BEGIN_BATCH(3);
-   OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
-   OUT_BATCH(OACONTROL);
-   OUT_BATCH(0);
-   ADVANCE_BATCH();
-
-   intel_batchbuffer_flush(brw);
-
-   /* Check whether the value got written. */
-   drm_intel_bo_map(brw->workaround_bo, false);
-   data = brw->workaround_bo->virtual;
-   bool success = data[offset] == expected_value;
-   drm_intel_bo_unmap(brw->workaround_bo);
-
-   result = success;
-
-   return success;
-}
-
 /**
  * Initializes potential list of extensions if ctx == NULL, or actually enables
  * extensions for a context.
@@ -207,7 +142,8 @@ intelInitExtensions(struct gl_context *ctx)
   ctx->Extensions.EXT_shader_integer_mix = ctx->Const.GLSLVersion >= 130;
   ctx->Extensions.EXT_timer_query = true;
 
-  if (brw->gen == 5 || can_write_oacontrol(brw)) {
+  if (brw->gen == 5 ||
+  brw->intelScreen->hw_has_pipelined_register & 
HW_HAS_PIPELINED_OACONTROL) {
  ctx->Extensions.AMD_performance_monitor = true;
  ctx->Extensions.INTEL_performance_query = true;
   }
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index 0a64d2b..36c7bb2 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -1275,6 +1275,21 @@ intel_detect_pipelined_so(struct intel_screen *screen)
   false);
 }
 
+static bool
+intel_detect_pipelined_oacontrol(struct intel_screen *screen)
+{
+   if (screen->devinfo->gen < 6 || screen->devinfo->gen >= 8)
+  return false;
+
+   /* Set "Select Context ID" to a particular address (which is likely not a
+* context), but leave all counting disabled.  This should be harmless.
+*/
+   return intel_detect_pipelined_register(screen,
+  OACONTROL,
+  0x31337000,
+  true);
+}
+
 /**
  * Return array of MSAA modes supported by the hardware. The array is
  * zero-terminated and sorted in decreasing order.
@@ -1536,6 +1551,8 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp)
intelScreen->hw_has_timestamp = intel_detect_timestamp(intelScreen);
if (intel_detect_pipelined_so(intelScreen))
   intelScreen->hw_has_pipelined_register |= HW_HAS_PIPELINED_SOL_OFFSET;
+   if (intel_detect_pipelined_oacontrol(intelScreen))
+  intelScreen->hw_has_pipelined_register |= HW_HAS_PIPELINED_OACONTROL;
 
const char *force_msaa = getenv("INTEL_FORCE_MSAA");
if (force_msaa) {
diff --git a/src/mesa/drivers/dri/i965/intel_screen.h 
b/src/mesa/drivers/dri/i965/intel_screen.h
index 7890706..e054b69 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.h
+++ b/src/mesa/drivers/dri/i965/intel_screen.h
@@ -70,7 +70,8 @@ struct intel_screen
 * for each register.
 */
unsigned hw_has_pipelined

[Mesa-dev] [PATCH 04/70] i965: Share the workaround bo between all contexts

2015-08-07 Thread Chris Wilson
Since the workaround bo is used strictly as a write-only buffer, we need
only allocate one per screen and use the same one from all contexts.

(The caveat here is during extension initialisation, where we write into
and read back register values from the buffer, but that is performed only
once for the first context - and baring synchronisation issues should not
be a problem. Safer would be to move that also to the screen.)

v2: Give the workaround bo its own init function and don't piggy back
intel_bufmgr_init() since it is not that related.

Signed-off-by: Chris Wilson 
Cc: Kenneth Graunke 
Cc: Martin Peres 
---
 src/mesa/drivers/dri/i965/brw_context.c  |  7 +--
 src/mesa/drivers/dri/i965/brw_context.h  |  4 ++--
 src/mesa/drivers/dri/i965/brw_pipe_control.c | 13 -
 src/mesa/drivers/dri/i965/intel_screen.c | 15 +++
 src/mesa/drivers/dri/i965/intel_screen.h |  1 +
 5 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index efcd91a..ac744d7 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -819,12 +819,7 @@ brwCreateContext(gl_api api,
   }
}
 
-   if (brw_init_pipe_control(brw, devinfo)) {
-  *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
-  intelDestroyContext(driContextPriv);
-  return false;
-   }
-
+   brw_init_pipe_control(brw, devinfo);
brw_init_state(brw);
 
intelInitExtensions(ctx);
diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index ffdf821..166b852 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -2016,8 +2016,8 @@ gen9_use_linear_1d_layout(const struct brw_context *brw,
   const struct intel_mipmap_tree *mt);
 
 /* brw_pipe_control.c */
-int brw_init_pipe_control(struct brw_context *brw,
- const struct brw_device_info *info);
+void brw_init_pipe_control(struct brw_context *brw,
+   const struct brw_device_info *info);
 void brw_fini_pipe_control(struct brw_context *brw);
 
 void brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags);
diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c 
b/src/mesa/drivers/dri/i965/brw_pipe_control.c
index 7ee3cb6..872bfe8 100644
--- a/src/mesa/drivers/dri/i965/brw_pipe_control.c
+++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c
@@ -330,26 +330,21 @@ brw_emit_mi_flush(struct brw_context *brw)
brw_render_cache_set_clear(brw);
 }
 
-int
+void
 brw_init_pipe_control(struct brw_context *brw,
   const struct brw_device_info *devinfo)
 {
if (devinfo->gen < 6)
-  return 0;
+  return;
 
/* We can't just use brw_state_batch to get a chunk of space for
 * the gen6 workaround because it involves actually writing to
 * the buffer, and the kernel doesn't let us write to the batch.
 */
-   brw->workaround_bo = drm_intel_bo_alloc(brw->bufmgr,
-   "pipe_control workaround",
-   4096, 4096);
-   if (brw->workaround_bo == NULL)
-  return -ENOMEM;
+   brw->workaround_bo = brw->intelScreen->workaround_bo;
+   drm_intel_bo_reference(brw->workaround_bo);
 
brw->pipe_controls_since_last_cs_stall = 0;
-
-   return 0;
 }
 
 void
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index 147fa1e..61f1dbe 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -967,6 +967,7 @@ intelDestroyScreen(__DRIscreen * sPriv)
 {
struct intel_screen *intelScreen = sPriv->driverPrivate;
 
+   drm_intel_bo_unreference(intelScreen->workaround_bo);
dri_bufmgr_destroy(intelScreen->bufmgr);
driDestroyOptionInfo(&intelScreen->optionCache);
 
@@ -1106,6 +1107,17 @@ intel_init_bufmgr(struct intel_screen *intelScreen)
 }
 
 static bool
+intel_init_workaround_bo(struct intel_screen *intelScreen)
+{
+   /* A small scratch bo shared by all contexts, primarily used
+* for doing PIPECONTROL serialisation writes that are discarded.
+*/
+   intelScreen->workaround_bo =
+  drm_intel_bo_alloc(intelScreen->bufmgr, "pipe_control w/a", 4096, 4096);
+   return intelScreen->workaround_bo != NULL;
+}
+
+static bool
 intel_detect_swizzling(struct intel_screen *screen)
 {
drm_intel_bo *buffer;
@@ -1417,6 +1429,9 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp)
if (!intel_init_bufmgr(intelScreen))
return false;
 
+   if (!intel_init_workaround_bo(intelScreen))
+   return false;
+
intelScreen->deviceID = drm_intel_bufmgr_gem_get_devid(intelScreen->bufmgr);
intelScreen->devinfo = brw_get_device_info(intelScreen->deviceID,
   brw_get_revision(psp->fd));
diff --git a/src/mesa/drivers/dri/i965/intel_scre

[Mesa-dev] [PATCH 09/70] i965: Remove direct includes of intel_batchbuffer.h

2015-08-07 Thread Chris Wilson
Upcoming patches eliminate the intel_batchbuffer interface and one of
the minor changes that causes a lot of churn is the removal of the
header, along with the occassional need to now call intel_reg.h
themselves. This patch moves the individual includes into brw_context.h.

Signed-off-by: Chris Wilson 
---
 src/mesa/drivers/dri/i965/brw_binding_tables.c  | 3 ++-
 src/mesa/drivers/dri/i965/brw_blorp.cpp | 1 -
 src/mesa/drivers/dri/i965/brw_cc.c  | 1 -
 src/mesa/drivers/dri/i965/brw_clear.c   | 1 -
 src/mesa/drivers/dri/i965/brw_clip.c| 2 --
 src/mesa/drivers/dri/i965/brw_clip_line.c   | 2 --
 src/mesa/drivers/dri/i965/brw_clip_point.c  | 2 --
 src/mesa/drivers/dri/i965/brw_clip_tri.c| 2 --
 src/mesa/drivers/dri/i965/brw_clip_unfilled.c   | 2 --
 src/mesa/drivers/dri/i965/brw_clip_util.c   | 2 --
 src/mesa/drivers/dri/i965/brw_compute.c | 1 -
 src/mesa/drivers/dri/i965/brw_conditional_render.c  | 2 +-
 src/mesa/drivers/dri/i965/brw_context.c | 1 -
 src/mesa/drivers/dri/i965/brw_context.h | 3 +++
 src/mesa/drivers/dri/i965/brw_cs.cpp| 1 -
 src/mesa/drivers/dri/i965/brw_curbe.c   | 1 -
 src/mesa/drivers/dri/i965/brw_draw.c| 2 +-
 src/mesa/drivers/dri/i965/brw_draw_upload.c | 1 -
 src/mesa/drivers/dri/i965/brw_ff_gs.c   | 2 --
 src/mesa/drivers/dri/i965/brw_ff_gs_emit.c  | 1 -
 src/mesa/drivers/dri/i965/brw_meta_fast_clear.c | 1 -
 src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c   | 1 -
 src/mesa/drivers/dri/i965/brw_meta_updownsample.c   | 1 -
 src/mesa/drivers/dri/i965/brw_misc_state.c  | 2 +-
 src/mesa/drivers/dri/i965/brw_performance_monitor.c | 3 ++-
 src/mesa/drivers/dri/i965/brw_pipe_control.c| 1 -
 src/mesa/drivers/dri/i965/brw_primitive_restart.c   | 2 --
 src/mesa/drivers/dri/i965/brw_program.c | 3 ++-
 src/mesa/drivers/dri/i965/brw_queryobj.c| 2 +-
 src/mesa/drivers/dri/i965/brw_sampler_state.c   | 2 +-
 src/mesa/drivers/dri/i965/brw_sf.c  | 2 --
 src/mesa/drivers/dri/i965/brw_sf_emit.c | 2 --
 src/mesa/drivers/dri/i965/brw_state_batch.c | 1 -
 src/mesa/drivers/dri/i965/brw_state_cache.c | 1 -
 src/mesa/drivers/dri/i965/brw_state_dump.c  | 1 -
 src/mesa/drivers/dri/i965/brw_state_upload.c| 2 +-
 src/mesa/drivers/dri/i965/brw_urb.c | 3 ++-
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c| 1 -
 src/mesa/drivers/dri/i965/gen6_blorp.cpp| 1 -
 src/mesa/drivers/dri/i965/gen6_cc.c | 1 -
 src/mesa/drivers/dri/i965/gen6_clip_state.c | 1 -
 src/mesa/drivers/dri/i965/gen6_depth_state.c| 1 -
 src/mesa/drivers/dri/i965/gen6_depthstencil.c   | 1 -
 src/mesa/drivers/dri/i965/gen6_gs_state.c   | 1 -
 src/mesa/drivers/dri/i965/gen6_multisample_state.c  | 2 --
 src/mesa/drivers/dri/i965/gen6_queryobj.c   | 1 -
 src/mesa/drivers/dri/i965/gen6_sampler_state.c  | 1 -
 src/mesa/drivers/dri/i965/gen6_scissor_state.c  | 1 -
 src/mesa/drivers/dri/i965/gen6_sf_state.c   | 1 -
 src/mesa/drivers/dri/i965/gen6_sol.c| 1 -
 src/mesa/drivers/dri/i965/gen6_surface_state.c  | 1 -
 src/mesa/drivers/dri/i965/gen6_urb.c| 1 -
 src/mesa/drivers/dri/i965/gen6_viewport_state.c | 1 -
 src/mesa/drivers/dri/i965/gen6_vs_state.c   | 2 +-
 src/mesa/drivers/dri/i965/gen6_wm_state.c   | 1 -
 src/mesa/drivers/dri/i965/gen7_blorp.cpp| 1 -
 src/mesa/drivers/dri/i965/gen7_disable.c| 1 -
 src/mesa/drivers/dri/i965/gen7_gs_state.c   | 1 -
 src/mesa/drivers/dri/i965/gen7_misc_state.c | 1 -
 src/mesa/drivers/dri/i965/gen7_sf_state.c   | 1 -
 src/mesa/drivers/dri/i965/gen7_sol_state.c  | 2 +-
 src/mesa/drivers/dri/i965/gen7_urb.c| 1 -
 src/mesa/drivers/dri/i965/gen7_viewport_state.c | 1 -
 src/mesa/drivers/dri/i965/gen7_vs_state.c   | 1 -
 src/mesa/drivers/dri/i965/gen7_wm_state.c   | 1 -
 src/mesa/drivers/dri/i965/gen7_wm_surface_state.c   | 1 -
 src/mesa/drivers/dri/i965/gen8_blend_state.c| 1 -
 src/mesa/drivers/dri/i965/gen8_depth_state.c| 2 +-
 src/mesa/drivers/dri/i965/gen8_disable.c| 1 -
 src/mesa/drivers/dri/i965/gen8_draw_upload.c| 1 -
 src/mesa/drivers/dri/i965/gen8_gs_state.c   | 1 -
 src/mesa/drivers/dri/i965/gen8_misc_state.c | 1 -
 src/mesa/drivers/dri/i965/gen8_multisample_state.c  | 2 --
 src/mesa/drivers/dri/i965/gen8_ps_state.c   | 1 -
 src/mesa/drivers/dri/i965/gen8_sf_state.c   | 1 -
 src/mesa/drivers/dri/i965/gen8_sol_state.c  | 1 -
 src/mesa/drivers/dri/i965/gen8_surface_state.c  | 1 -
 src/mesa/drivers/dri/i965/gen8_viewport_state.c | 1 -
 src/mesa/drivers/dri/i965/gen8_vs_state.c   | 1 -

[Mesa-dev] [PATCH 03/70] i965: Only flush the batchbuffer if we need to zero the SO offsets

2015-08-07 Thread Chris Wilson
If we don't have pipelined register access (e.g. Haswell before kernel
v4.2), then we can only implement EXT_transform_feedback by reseting the
SO offsets *between* batches. However, if we do have pipelined access to
the SO registers on gen7, we can simply emit an inline reset of the SO
registers without a full batch flush.

Signed-off-by: Chris Wilson 
Cc: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/brw_context.h  |  1 +
 src/mesa/drivers/dri/i965/gen7_sol_state.c   | 13 -
 src/mesa/drivers/dri/i965/intel_extensions.c |  3 ++-
 3 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 62e39be..ffdf821 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1122,6 +1122,7 @@ struct brw_context
bool no_simd8 : 1;
bool use_rep_send : 1;
bool use_resource_streamer : 1;
+   bool has_pipelined_so : 1;
 
/**
 * Some versions of Gen hardware don't do centroid interpolation correctly
diff --git a/src/mesa/drivers/dri/i965/gen7_sol_state.c 
b/src/mesa/drivers/dri/i965/gen7_sol_state.c
index 41573a8..da6f2dd 100644
--- a/src/mesa/drivers/dri/i965/gen7_sol_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_sol_state.c
@@ -446,7 +446,7 @@ gen7_begin_transform_feedback(struct gl_context *ctx, 
GLenum mode,
/* Reset the SO buffer offsets to 0. */
if (brw->gen >= 8) {
   brw_obj->zero_offsets = true;
-   } else {
+   } else if (!brw->has_pipelined_so) {
   intel_batchbuffer_flush(brw);
   brw->batch.needs_sol_reset = true;
}
@@ -462,6 +462,17 @@ gen7_begin_transform_feedback(struct gl_context *ctx, 
GLenum mode,
   brw_obj->prims_generated[i] = 0;
}
 
+   /* Reset the SOL buffer offset registers. */
+   if (brw->gen == 7 && brw->has_pipelined_so) {
+  for (int i = 0; i < 4; i++) {
+ BEGIN_BATCH(3);
+ OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
+ OUT_BATCH(GEN7_SO_WRITE_OFFSET(i));
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+  }
+   }
+
/* Store the starting value of the SO_NUM_PRIMS_WRITTEN counters. */
gen7_save_primitives_written_counters(brw, brw_obj);
 
diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c 
b/src/mesa/drivers/dri/i965/intel_extensions.c
index 4a0..bf8fdae 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -328,7 +328,8 @@ intelInitExtensions(struct gl_context *ctx)
   ctx->Extensions.ARB_texture_compression_bptc = true;
   ctx->Extensions.ARB_texture_view = true;
 
-  if (can_do_pipelined_register_writes(brw)) {
+  brw->has_pipelined_so = can_do_pipelined_register_writes(brw);
+  if (brw->has_pipelined_so) {
  ctx->Extensions.ARB_draw_indirect = true;
  ctx->Extensions.ARB_transform_feedback2 = true;
  ctx->Extensions.ARB_transform_feedback3 = true;
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 15/70] i965: Wrap drm_intel_bo_madvise() for brw_bo

2015-08-07 Thread Chris Wilson
To reduce later churn, extract drm_intel_bo_madvise() with a smaller
wrapper.

Signed-off-by: Chris Wilson 
---
 src/mesa/drivers/dri/i965/brw_batch.h| 5 +
 src/mesa/drivers/dri/i965/brw_object_purgeable.c | 4 ++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_batch.h 
b/src/mesa/drivers/dri/i965/brw_batch.h
index 31f39b4..1d0f0dd 100644
--- a/src/mesa/drivers/dri/i965/brw_batch.h
+++ b/src/mesa/drivers/dri/i965/brw_batch.h
@@ -78,6 +78,11 @@ inline static void brw_bo_put(brw_bo *bo)
   drm_intel_bo_unreference(bo);
 }
 
+inline static int brw_bo_madvise(brw_bo *bo, int state)
+{
+   return drm_intel_bo_madvise(bo, state);
+}
+
 inline static uint32_t brw_bo_flink(brw_bo *bo)
 {
uint32_t name = 0;
diff --git a/src/mesa/drivers/dri/i965/brw_object_purgeable.c 
b/src/mesa/drivers/dri/i965/brw_object_purgeable.c
index 8d7ebdb..bb588cd 100644
--- a/src/mesa/drivers/dri/i965/brw_object_purgeable.c
+++ b/src/mesa/drivers/dri/i965/brw_object_purgeable.c
@@ -43,7 +43,7 @@ intel_buffer_purgeable(brw_bo *buffer)
int retained = 0;
 
if (buffer != NULL)
-  retained = drm_intel_bo_madvise(buffer, I915_MADV_DONTNEED);
+  retained = brw_bo_madvise(buffer, I915_MADV_DONTNEED);
 
return retained ? GL_VOLATILE_APPLE : GL_RELEASED_APPLE;
 }
@@ -107,7 +107,7 @@ intel_buffer_unpurgeable(brw_bo *buffer)
 
retained = 0;
if (buffer != NULL)
-  retained = drm_intel_bo_madvise(buffer, I915_MADV_WILLNEED);
+  retained = brw_bo_madvise(buffer, I915_MADV_WILLNEED);
 
return retained ? GL_RETAINED_APPLE : GL_UNDEFINED_APPLE;
 }
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 14/70] i965: Add a simple utility function to wrap drm_intel_bo_flink()

2015-08-07 Thread Chris Wilson
Just to reduce some later churn, pull out the flink wrapper.

Signed-off-by: Chris Wilson 
---
 src/mesa/drivers/dri/i965/brw_batch.h|  7 +++
 src/mesa/drivers/dri/i965/brw_context.c  | 11 +--
 src/mesa/drivers/dri/i965/intel_screen.c |  6 ++
 3 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_batch.h 
b/src/mesa/drivers/dri/i965/brw_batch.h
index 5745aa4..31f39b4 100644
--- a/src/mesa/drivers/dri/i965/brw_batch.h
+++ b/src/mesa/drivers/dri/i965/brw_batch.h
@@ -78,6 +78,13 @@ inline static void brw_bo_put(brw_bo *bo)
   drm_intel_bo_unreference(bo);
 }
 
+inline static uint32_t brw_bo_flink(brw_bo *bo)
+{
+   uint32_t name = 0;
+   drm_intel_bo_flink(bo, &name);
+   return name;
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index 583ce7f..f8add33 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -1361,12 +1361,11 @@ intel_process_dri2_buffer(struct brw_context *brw,
 
uint32_t old_name = 0;
if (last_mt) {
-   /* The bo already has a name because the miptree was created by a
-   * previous call to intel_process_dri2_buffer(). If a bo already has a
-   * name, then drm_intel_bo_flink() is a low-cost getter.  It does not
-   * create a new name.
-   */
-  drm_intel_bo_flink(last_mt->bo, &old_name);
+  /* The bo already has a name because the miptree was created by a
+   * previous call to intel_process_dri2_buffer(). If a bo already has a
+   * name, then flink is a low-cost getter. It does not create a new name.
+   */
+  old_name = brw_bo_flink(last_mt->bo);
}
 
if (old_name == buffer->name)
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index b8e96be..c84f495 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -559,7 +559,7 @@ intel_query_image(__DRIimage *image, int attrib, int *value)
   *value = image->bo->handle;
   return true;
case __DRI_IMAGE_ATTRIB_NAME:
-  return !drm_intel_bo_flink(image->bo, (uint32_t *) value);
+  return drm_intel_bo_flink(image->bo, (uint32_t *) value) == 0;
case __DRI_IMAGE_ATTRIB_FORMAT:
   *value = image->dri_format;
   return true;
@@ -575,9 +575,7 @@ intel_query_image(__DRIimage *image, int attrib, int *value)
   *value = image->planar_format->components;
   return true;
case __DRI_IMAGE_ATTRIB_FD:
-  if (drm_intel_bo_gem_export_to_prime(image->bo, value) == 0)
- return true;
-  return false;
+  return drm_intel_bo_gem_export_to_prime(image->bo, value) == 0;
case __DRI_IMAGE_ATTRIB_FOURCC:
   if (intel_lookup_fourcc(image->dri_format, value))
  return true;
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 16/70] i965: Move the render_cache dirty set from the context to the batch

2015-08-07 Thread Chris Wilson
To reduce churn later, move the brw->render_cache dirty set into the
batch (i.e. brw->batch.render_cache).

Signed-off-by: Chris Wilson 
---
 src/mesa/drivers/dri/i965/brw_batch.h   |  7 +++
 src/mesa/drivers/dri/i965/brw_context.h |  7 ---
 src/mesa/drivers/dri/i965/intel_fbo.c   | 12 ++--
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_batch.h 
b/src/mesa/drivers/dri/i965/brw_batch.h
index 1d0f0dd..3b8d354 100644
--- a/src/mesa/drivers/dri/i965/brw_batch.h
+++ b/src/mesa/drivers/dri/i965/brw_batch.h
@@ -64,6 +64,13 @@ typedef struct brw_batch {
   uint32_t *map_next;
   int reloc_count;
} saved;
+
+   /**
+* Set of brw_bo* that have been rendered to within this batchbuffer
+* and would need flushing before being used from another cache domain that
+* isn't coherent with it (i.e. the sampler).
+*/
+   struct set *render_cache;
 } brw_batch;
 
 inline static brw_bo *brw_bo_get(brw_bo *bo)
diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index cd8ea50..238d0eb 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1009,13 +1009,6 @@ struct brw_context
uint8_t pipe_controls_since_last_cs_stall;
 
/**
-* Set of brw_bo* that have been rendered to within this batchbuffer
-* and would need flushing before being used from another cache domain that
-* isn't coherent with it (i.e. the sampler).
-*/
-   struct set *render_cache;
-
-   /**
 * Number of resets observed in the system at context creation.
 *
 * This is tracked in the context so that we can determine that another
diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c 
b/src/mesa/drivers/dri/i965/intel_fbo.c
index b22931e..fbccf7e 100644
--- a/src/mesa/drivers/dri/i965/intel_fbo.c
+++ b/src/mesa/drivers/dri/i965/intel_fbo.c
@@ -1050,15 +1050,15 @@ brw_render_cache_set_clear(struct brw_context *brw)
 {
struct set_entry *entry;
 
-   set_foreach(brw->render_cache, entry) {
-  _mesa_set_remove(brw->render_cache, entry);
+   set_foreach(brw->batch.render_cache, entry) {
+  _mesa_set_remove(brw->batch.render_cache, entry);
}
 }
 
 void
 brw_render_cache_set_add_bo(struct brw_context *brw, brw_bo *bo)
 {
-   _mesa_set_add(brw->render_cache, bo);
+   _mesa_set_add(brw->batch.render_cache, bo);
 }
 
 /**
@@ -1076,7 +1076,7 @@ brw_render_cache_set_add_bo(struct brw_context *brw, 
brw_bo *bo)
 void
 brw_render_cache_set_check_flush(struct brw_context *brw, brw_bo *bo)
 {
-   if (!_mesa_set_search(brw->render_cache, bo))
+   if (!_mesa_set_search(brw->batch.render_cache, bo))
   return;
 
brw_emit_mi_flush(brw);
@@ -1103,6 +1103,6 @@ intel_fbo_init(struct brw_context *brw)
dd->EGLImageTargetRenderbufferStorage =
   intel_image_target_renderbuffer_storage;
 
-   brw->render_cache = _mesa_set_create(brw, _mesa_hash_pointer,
-_mesa_key_pointer_equal);
+   brw->batch.render_cache = _mesa_set_create(brw, _mesa_hash_pointer,
+  _mesa_key_pointer_equal);
 }
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 19/70] i965: Move pipelined register access to its own file

2015-08-07 Thread Chris Wilson
Move the pipelined register access out of intel_batchbuffer into its
own utility file in preparation for replacing intel_batchbuffer. This
also gives us the opportunity to refactor a few similar routines for
writing registers, and so should prove useful in its own right.

Signed-off-by: Chris Wilson 
---
 src/mesa/drivers/dri/i965/Makefile.sources |  2 +
 src/mesa/drivers/dri/i965/brw_conditional_render.c |  1 +
 src/mesa/drivers/dri/i965/brw_context.h| 12 
 src/mesa/drivers/dri/i965/brw_draw.c   |  1 +
 src/mesa/drivers/dri/i965/brw_pipelined_register.c | 79 ++
 src/mesa/drivers/dri/i965/brw_pipelined_register.h | 46 +
 src/mesa/drivers/dri/i965/intel_batchbuffer.c  | 52 --
 7 files changed, 129 insertions(+), 64 deletions(-)
 create mode 100644 src/mesa/drivers/dri/i965/brw_pipelined_register.c
 create mode 100644 src/mesa/drivers/dri/i965/brw_pipelined_register.h

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources 
b/src/mesa/drivers/dri/i965/Makefile.sources
index a007440..be80246 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -90,6 +90,8 @@ i965_FILES = \
brw_packed_float.c \
brw_performance_monitor.c \
brw_pipe_control.c \
+   brw_pipelined_register.c \
+   brw_pipelined_register.h \
brw_primitive_restart.c \
brw_program.c \
brw_program.h \
diff --git a/src/mesa/drivers/dri/i965/brw_conditional_render.c 
b/src/mesa/drivers/dri/i965/brw_conditional_render.c
index ffd10a6..962c1f1 100644
--- a/src/mesa/drivers/dri/i965/brw_conditional_render.c
+++ b/src/mesa/drivers/dri/i965/brw_conditional_render.c
@@ -35,6 +35,7 @@
 
 #include "brw_context.h"
 #include "brw_defines.h"
+#include "brw_pipelined_register.h"
 #include "intel_reg.h"
 
 static void
diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 2c9ac9a..a9bc5e2 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1593,18 +1593,6 @@ void brw_store_register_mem64(struct brw_context *brw,
 void brw_init_conditional_render_functions(struct dd_function_table 
*functions);
 bool brw_check_conditional_render(struct brw_context *brw);
 
-/** intel_batchbuffer.c */
-void brw_load_register_mem(struct brw_context *brw,
-   uint32_t reg,
-   brw_bo *bo,
-   uint32_t read_domains, uint32_t write_domain,
-   uint32_t offset);
-void brw_load_register_mem64(struct brw_context *brw,
- uint32_t reg,
- brw_bo *bo,
- uint32_t read_domains, uint32_t write_domain,
- uint32_t offset);
-
 /*==
  * brw_state_dump.c
  */
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c 
b/src/mesa/drivers/dri/i965/brw_draw.c
index 8ffc1c5..c819bb7 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -45,6 +45,7 @@
 #include "brw_draw.h"
 #include "brw_defines.h"
 #include "brw_context.h"
+#include "brw_pipelined_register.h"
 #include "brw_state.h"
 #include "brw_vs.h"
 
diff --git a/src/mesa/drivers/dri/i965/brw_pipelined_register.c 
b/src/mesa/drivers/dri/i965/brw_pipelined_register.c
new file mode 100644
index 000..9424e4a
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_pipelined_register.c
@@ -0,0 +1,79 @@
+/*
+ * Copyright © 2010-2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_context.h"
+#include "brw_pipelined_register.h"
+
+#include "intel_reg.h"
+
+static void
+load_sized_register_mem(struct brw_context *brw,
+uint32_t reg,
+ 

[Mesa-dev] [PATCH 20/70] i965: Replace opencoded brw_load_register_mem()

2015-08-07 Thread Chris Wilson
gen7_sol_state loads the SOL_OFFSET registers from its scratch buffer
by hand, switch it over to the common routine for emitting that command.

Signed-off-by: Chris Wilson 
---
 src/mesa/drivers/dri/i965/gen7_sol_state.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/gen7_sol_state.c 
b/src/mesa/drivers/dri/i965/gen7_sol_state.c
index 396d029..c863dfc 100644
--- a/src/mesa/drivers/dri/i965/gen7_sol_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_sol_state.c
@@ -31,6 +31,7 @@
 #include "brw_context.h"
 #include "brw_state.h"
 #include "brw_defines.h"
+#include "brw_pipelined_register.h"
 #include "intel_buffer_objects.h"
 #include "intel_reg.h"
 #include "main/transformfeedback.h"
@@ -547,13 +548,12 @@ gen7_resume_transform_feedback(struct gl_context *ctx,
/* Reload the SOL buffer offset registers. */
if (brw->gen < 8) {
   for (int i = 0; i < 4; i++) {
- BEGIN_BATCH(3);
- OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (3 - 2));
- OUT_BATCH(GEN7_SO_WRITE_OFFSET(i));
- OUT_RELOC(brw_obj->offset_bo,
-   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
-   i * sizeof(uint32_t));
- ADVANCE_BATCH();
+ brw_load_register_mem(brw,
+   GEN7_SO_WRITE_OFFSET(i),
+   brw_obj->offset_bo,
+   I915_GEM_DOMAIN_INSTRUCTION,
+   I915_GEM_DOMAIN_INSTRUCTION,
+   i * sizeof(uint32_t));
   }
}
 
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 18/70] i965: Extract brw_batch_busy()

2015-08-07 Thread Chris Wilson
A simple helper to check whether the last batch buffer submitted to the
hardware is still busy. Extract it now to reduce churn later.

Signed-off-by: Chris Wilson 
---
 src/mesa/drivers/dri/i965/brw_batch.h  | 5 +
 src/mesa/drivers/dri/i965/brw_cs.cpp   | 5 ++---
 src/mesa/drivers/dri/i965/brw_fs.cpp   | 5 ++---
 src/mesa/drivers/dri/i965/brw_vec4.cpp | 5 ++---
 4 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_batch.h 
b/src/mesa/drivers/dri/i965/brw_batch.h
index b9e67dd..ac64f2f 100644
--- a/src/mesa/drivers/dri/i965/brw_batch.h
+++ b/src/mesa/drivers/dri/i965/brw_batch.h
@@ -102,6 +102,11 @@ inline static uint32_t brw_bo_flink(brw_bo *bo)
 void brw_batch_clear_dirty(brw_batch *batch);
 void brw_bo_mark_dirty(brw_batch *batch, brw_bo *bo);
 
+inline static bool brw_batch_busy(brw_batch *batch)
+{
+   return batch->last_bo && drm_intel_bo_busy(batch->last_bo);
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/mesa/drivers/dri/i965/brw_cs.cpp 
b/src/mesa/drivers/dri/i965/brw_cs.cpp
index b0c0272..bf1c073 100644
--- a/src/mesa/drivers/dri/i965/brw_cs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_cs.cpp
@@ -67,8 +67,7 @@ brw_cs_emit(struct brw_context *brw,
double start_time = 0;
 
if (unlikely(brw->perf_debug)) {
-  start_busy = (brw->batch.last_bo &&
-drm_intel_bo_busy(brw->batch.last_bo));
+  start_busy = brw_batch_busy(&brw->batch);
   start_time = get_time();
}
 
@@ -151,7 +150,7 @@ brw_cs_emit(struct brw_context *brw,
   }
   shader->compiled_once = true;
 
-  if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
+  if (start_busy && !brw_batch_busy(&brw->batch)) {
  perf_debug("CS compile took %.03f ms and stalled the GPU\n",
 (get_time() - start_time) * 1000);
   }
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index ce1edc3..ddd5452 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -5150,8 +5150,7 @@ brw_wm_fs_emit(struct brw_context *brw,
double start_time = 0;
 
if (unlikely(brw->perf_debug)) {
-  start_busy = (brw->batch.last_bo &&
-drm_intel_bo_busy(brw->batch.last_bo));
+  start_busy = brw_batch_busy(&brw->batch);
   start_time = get_time();
}
 
@@ -5237,7 +5236,7 @@ brw_wm_fs_emit(struct brw_context *brw,
  brw_wm_debug_recompile(brw, prog, key);
   shader->compiled_once = true;
 
-  if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
+  if (start_busy && !brw_batch_busy(&brw->batch)) {
  perf_debug("FS compile took %.03f ms and stalled the GPU\n",
 (get_time() - start_time) * 1000);
   }
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index f18915a..33eeedb 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -1903,8 +1903,7 @@ brw_vs_emit(struct brw_context *brw,
const unsigned *assembly = NULL;
 
if (unlikely(brw->perf_debug)) {
-  start_busy = (brw->batch.last_bo &&
-drm_intel_bo_busy(brw->batch.last_bo));
+  start_busy = brw_batch_busy(&brw->batch);
   start_time = get_time();
}
 
@@ -2000,7 +1999,7 @@ brw_vs_emit(struct brw_context *brw,
   if (shader->compiled_once) {
  brw_vs_debug_recompile(brw, prog, key);
   }
-  if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
+  if (start_busy && !brw_batch_busy(&brw->batch)) {
  perf_debug("VS compile took %.03f ms and stalled the GPU\n",
 (get_time() - start_time) * 1000);
   }
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 28/70] i965: Refactor batch buffer dumping

2015-08-07 Thread Chris Wilson
Move the computation of the state offset into a smaller helper to reduce
churn later.

Signed-off-by: Chris Wilson 
---
 src/mesa/drivers/dri/i965/brw_state_dump.c | 62 --
 1 file changed, 33 insertions(+), 29 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c 
b/src/mesa/drivers/dri/i965/brw_state_dump.c
index 620c7ca..a597c1f 100644
--- a/src/mesa/drivers/dri/i965/brw_state_dump.c
+++ b/src/mesa/drivers/dri/i965/brw_state_dump.c
@@ -66,6 +66,11 @@ static const char *surface_tiling[] = {
"Y-tiled"
 };
 
+static void *batch_in(struct brw_context *brw, unsigned offset)
+{
+   return (void *)brw->batch.bo->virtual + offset;
+}
+
 static void
 batch_out(struct brw_context *brw, const char *name, uint32_t offset,
  int index, char *fmt, ...) PRINTFLIKE(5, 6);
@@ -74,7 +79,7 @@ static void
 batch_out(struct brw_context *brw, const char *name, uint32_t offset,
  int index, char *fmt, ...)
 {
-   uint32_t *data = brw->batch.bo->virtual + offset;
+   uint32_t *data = batch_in(brw, offset);
va_list va;
 
fprintf(stderr, "0x%08x:  0x%08x: %8s: ",
@@ -88,7 +93,7 @@ static void
 batch_out64(struct brw_context *brw, const char *name, uint32_t offset,
 int index, char *fmt, ...)
 {
-   uint32_t *tmp = brw->batch.bo->virtual + offset;
+   uint32_t *tmp = batch_in(brw, offset);
 
/* Swap the dwords since we want to handle this as a 64b value, but the data
 * is typically emitted as dwords.
@@ -120,7 +125,7 @@ get_965_surfacetype(unsigned int surfacetype)
 static void dump_vs_state(struct brw_context *brw, uint32_t offset)
 {
const char *name = "VS_STATE";
-   struct brw_vs_unit_state *vs = brw->batch.bo->virtual + offset;
+   struct brw_vs_unit_state *vs = batch_in(brw, offset);
 
batch_out(brw, name, offset, 0, "thread0\n");
batch_out(brw, name, offset, 1, "thread1\n");
@@ -135,7 +140,7 @@ static void dump_vs_state(struct brw_context *brw, uint32_t 
offset)
 static void dump_gs_state(struct brw_context *brw, uint32_t offset)
 {
const char *name = "GS_STATE";
-   struct brw_gs_unit_state *gs = brw->batch.bo->virtual + offset;
+   struct brw_gs_unit_state *gs = batch_in(brw, offset);
 
batch_out(brw, name, offset, 0, "thread0\n");
batch_out(brw, name, offset, 1, "thread1\n");
@@ -150,7 +155,7 @@ static void dump_gs_state(struct brw_context *brw, uint32_t 
offset)
 static void dump_clip_state(struct brw_context *brw, uint32_t offset)
 {
const char *name = "CLIP_STATE";
-   struct brw_clip_unit_state *clip = brw->batch.bo->virtual + offset;
+   struct brw_clip_unit_state *clip = batch_in(brw, offset);
 
batch_out(brw, name, offset, 0, "thread0\n");
batch_out(brw, name, offset, 1, "thread1\n");
@@ -169,7 +174,7 @@ static void dump_clip_state(struct brw_context *brw, 
uint32_t offset)
 static void dump_sf_state(struct brw_context *brw, uint32_t offset)
 {
const char *name = "SF_STATE";
-   struct brw_sf_unit_state *sf = brw->batch.bo->virtual + offset;
+   struct brw_sf_unit_state *sf = batch_in(brw, offset);
 
batch_out(brw, name, offset, 0, "thread0\n");
batch_out(brw, name, offset, 1, "thread1\n");
@@ -185,7 +190,7 @@ static void dump_sf_state(struct brw_context *brw, uint32_t 
offset)
 static void dump_wm_state(struct brw_context *brw, uint32_t offset)
 {
const char *name = "WM_STATE";
-   struct brw_wm_unit_state *wm = brw->batch.bo->virtual + offset;
+   struct brw_wm_unit_state *wm = batch_in(brw, offset);
 
batch_out(brw, name, offset, 0, "thread0\n");
batch_out(brw, name, offset, 1, "thread1\n");
@@ -212,7 +217,7 @@ static void dump_wm_state(struct brw_context *brw, uint32_t 
offset)
 static void dump_surface_state(struct brw_context *brw, uint32_t offset)
 {
const char *name = "SURF";
-   uint32_t *surf = brw->batch.bo->virtual + offset;
+   uint32_t *surf = batch_in(brw, offset);
 
batch_out(brw, name, offset, 0, "%s %s\n",
 get_965_surfacetype(GET_FIELD(surf[0], BRW_SURFACE_TYPE)),
@@ -236,7 +241,7 @@ static void dump_surface_state(struct brw_context *brw, 
uint32_t offset)
 static void dump_gen7_surface_state(struct brw_context *brw, uint32_t offset)
 {
const char *name = "SURF";
-   uint32_t *surf = brw->batch.bo->virtual + offset;
+   uint32_t *surf = batch_in(brw, offset);
 
batch_out(brw, name, offset, 0, "%s %s %s\n",
  get_965_surfacetype(GET_FIELD(surf[0], BRW_SURFACE_TYPE)),
@@ -275,7 +280,7 @@ static float q_to_float(uint32_t data, int integer_end, int 
integer_start,
 static void
 dump_gen8_surface_state(struct brw_context *brw, uint32_t offset, int index)
 {
-   uint32_t *surf = brw->batch.bo->virtual + offset;
+   uint32_t *surf = batch_in(brw, offset);
int aux_mode = surf[6] & INTEL_MASK(2, 0);
const char *aux_str;
char *name;
@@ -349,8 +354,7 @@ dump_sdc(struct brw_context *brw, uint32_t offset)
const char *name = "SDC";
 
if (brw->gen >= 5 && brw->gen <= 6) {
-  struct gen5_sample

[Mesa-dev] [PATCH 30/70] i965: Pass can-use-active flag to brw_bo_create()

2015-08-07 Thread Chris Wilson
The introduction of brw_bo_create() allows us to pass a new flag down
when creating a linear buffer to allow the allocator to return a
currently active buffer. (Previously all linear buffers were presumed to
be allocated for CPU access and so the allocator only returned an idle
buffer.)

Signed-off-by: Chris Wilson 
---
 src/mesa/drivers/dri/i965/brw_binding_tables.c  |  3 ++-
 src/mesa/drivers/dri/i965/brw_performance_monitor.c | 15 +++
 src/mesa/drivers/dri/i965/brw_program.c |  2 +-
 src/mesa/drivers/dri/i965/brw_queryobj.c|  9 ++---
 src/mesa/drivers/dri/i965/gen6_queryobj.c   |  3 ++-
 src/mesa/drivers/dri/i965/gen6_sol.c|  6 --
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c   |  3 ++-
 7 files changed, 28 insertions(+), 13 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_binding_tables.c 
b/src/mesa/drivers/dri/i965/brw_binding_tables.c
index 1deed23..a1afdc2 100644
--- a/src/mesa/drivers/dri/i965/brw_binding_tables.c
+++ b/src/mesa/drivers/dri/i965/brw_binding_tables.c
@@ -328,7 +328,8 @@ gen7_enable_hw_binding_tables(struct brw_context *brw)
* "A maximum of 16,383 Binding tables are allowed in any batch buffer"
*/
   static const int max_size = 16383 * 4;
-  brw->hw_bt_pool.bo = brw_bo_create(&brw->batch, "hw_bt", max_size, 64, 
0);
+  brw->hw_bt_pool.bo =
+ brw_bo_create(&brw->batch, "hw_bt", max_size, 64, 
BO_ALLOC_FOR_RENDER);
   brw->hw_bt_pool.next_offset = 0;
}
 
diff --git a/src/mesa/drivers/dri/i965/brw_performance_monitor.c 
b/src/mesa/drivers/dri/i965/brw_performance_monitor.c
index 4d54fa2..3168613 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_monitor.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_monitor.c
@@ -1081,6 +1081,12 @@ reinitialize_perf_monitor(struct brw_context *brw,
monitor->pipeline_stats_results = NULL;
 }
 
+#ifdef DEBUG
+#define DBG_BO_ALLOC_FLAG 0
+#else
+#define DBG_BO_ALLOC_FLAG BO_ALLOC_FOR_RENDER
+#endif
+
 /**
  * Driver hook for glBeginPerformanceMonitorAMD().
  */
@@ -1094,7 +1100,6 @@ brw_begin_perf_monitor(struct gl_context *ctx,
DBG("Begin(%d)\n", m->Name);
 
reinitialize_perf_monitor(brw, monitor);
-
if (monitor_needs_oa(brw, m)) {
   /* If the global OA bookend BO doesn't exist, allocate it.  This should
* only happen once, but we delay until BeginPerfMonitor time to avoid
@@ -1102,11 +1107,12 @@ brw_begin_perf_monitor(struct gl_context *ctx,
*/
   if (!brw->perfmon.bookend_bo) {
  brw->perfmon.bookend_bo = brw_bo_create(&brw->batch, "OA bookend BO",
- BOOKEND_BO_SIZE_BYTES, 64, 0);
+ BOOKEND_BO_SIZE_BYTES, 64,
+ DBG_BO_ALLOC_FLAG);
   }
 
   monitor->oa_bo = brw_bo_create(&brw->batch, "perf. monitor OA bo",
- 4096, 64, 0);
+ 4096, 64, DBG_BO_ALLOC_FLAG);
 #ifdef DEBUG
   /* Pre-filling the BO helps debug whether writes landed. */
   drm_intel_bo_map(monitor->oa_bo, true);
@@ -1141,7 +1147,8 @@ brw_begin_perf_monitor(struct gl_context *ctx,
 
if (monitor_needs_statistics_registers(brw, m)) {
   monitor->pipeline_stats_bo =
- brw_bo_create(&brw->batch, "perf. monitor stats bo", 4096, 64, 0);
+ brw_bo_create(&brw->batch, "perf. monitor stats bo",
+   4096, 64, BO_ALLOC_FOR_RENDER);
 
   /* Take starting snapshots. */
   snapshot_statistics_registers(brw, monitor, 0);
diff --git a/src/mesa/drivers/dri/i965/brw_program.c 
b/src/mesa/drivers/dri/i965/brw_program.c
index 22abea7..cd9cfc6 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -301,7 +301,7 @@ brw_init_shader_time(struct brw_context *brw)
const int max_entries = 2048;
brw->shader_time.bo = brw_bo_create(&brw->batch, "shader time",
max_entries * SHADER_TIME_STRIDE * 3,
-   4096, 0);
+   4096, BO_ALLOC_FOR_RENDER);
brw->shader_time.names = rzalloc_array(brw, const char *, max_entries);
brw->shader_time.ids = rzalloc_array(brw, int, max_entries);
brw->shader_time.types = rzalloc_array(brw, enum shader_time_shader_type,
diff --git a/src/mesa/drivers/dri/i965/brw_queryobj.c 
b/src/mesa/drivers/dri/i965/brw_queryobj.c
index 29ec56a..51ba2d6 100644
--- a/src/mesa/drivers/dri/i965/brw_queryobj.c
+++ b/src/mesa/drivers/dri/i965/brw_queryobj.c
@@ -236,7 +236,8 @@ brw_begin_query(struct gl_context *ctx, struct 
gl_query_object *q)
* the system was doing other work, such as running other applications.
*/
   brw_bo_put(query->bo);
-  query->bo = brw_bo_create(&brw->batch, "timer query", 4096, 4096, 0);
+  query->bo = brw_bo_create(&brw->batch, "

[Mesa-dev] [PATCH 21/70] i965: Refactor setting a register with an immediate constant

2015-08-07 Thread Chris Wilson
We have a few instances where we set a register to an immediate value
(MI_LOAD_REGISTER_IMM), so let's replace them with a simple routine.

Signed-off-by: Chris Wilson 
---
 src/mesa/drivers/dri/i965/brw_draw.c|  6 +-
 src/mesa/drivers/dri/i965/brw_performance_monitor.c | 17 ++---
 src/mesa/drivers/dri/i965/brw_pipelined_register.c  | 12 
 src/mesa/drivers/dri/i965/brw_pipelined_register.h  |  4 
 src/mesa/drivers/dri/i965/brw_state_upload.c| 11 +--
 src/mesa/drivers/dri/i965/gen7_sol_state.c  |  6 +-
 src/mesa/drivers/dri/i965/gen8_depth_state.c|  9 -
 7 files changed, 33 insertions(+), 32 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_draw.c 
b/src/mesa/drivers/dri/i965/brw_draw.c
index c819bb7..611abea 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -248,11 +248,7 @@ brw_emit_prim(struct brw_context *brw,
  brw_load_register_mem(brw, GEN7_3DPRIM_START_INSTANCE, bo,
I915_GEM_DOMAIN_VERTEX, 0,
prim->indirect_offset + 12);
- BEGIN_BATCH(3);
- OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
- OUT_BATCH(GEN7_3DPRIM_BASE_VERTEX);
- OUT_BATCH(0);
- ADVANCE_BATCH();
+ brw_load_register_imm(brw, GEN7_3DPRIM_BASE_VERTEX, 0);
   }
} else {
   indirect_flag = 0;
diff --git a/src/mesa/drivers/dri/i965/brw_performance_monitor.c 
b/src/mesa/drivers/dri/i965/brw_performance_monitor.c
index 1afc968..b92b1d7 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_monitor.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_monitor.c
@@ -54,6 +54,7 @@
 
 #include "brw_context.h"
 #include "brw_defines.h"
+#include "brw_pipelined_register.h"
 
 #include "intel_reg.h"
 
@@ -659,12 +660,10 @@ start_oa_counters(struct brw_context *brw)
   unreachable("Tried to enable OA counters on an unsupported generation.");
}
 
-   BEGIN_BATCH(3);
-   OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
-   OUT_BATCH(OACONTROL);
-   OUT_BATCH(counter_format << OACONTROL_COUNTER_SELECT_SHIFT |
- OACONTROL_ENABLE_COUNTERS);
-   ADVANCE_BATCH();
+   brw_load_register_imm(brw,
+ OACONTROL,
+ counter_format << OACONTROL_COUNTER_SELECT_SHIFT |
+ OACONTROL_ENABLE_COUNTERS);
 }
 
 /**
@@ -677,11 +676,7 @@ stop_oa_counters(struct brw_context *brw)
if (brw->gen == 5)
   return;
 
-   BEGIN_BATCH(3);
-   OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
-   OUT_BATCH(OACONTROL);
-   OUT_BATCH(0);
-   ADVANCE_BATCH();
+   brw_load_register_imm(brw, OACONTROL, 0);
 }
 
 /**
diff --git a/src/mesa/drivers/dri/i965/brw_pipelined_register.c 
b/src/mesa/drivers/dri/i965/brw_pipelined_register.c
index 9424e4a..07335d9 100644
--- a/src/mesa/drivers/dri/i965/brw_pipelined_register.c
+++ b/src/mesa/drivers/dri/i965/brw_pipelined_register.c
@@ -77,3 +77,15 @@ brw_load_register_mem64(struct brw_context *brw,
 {
load_sized_register_mem(brw, reg, bo, read_domains, write_domain, offset, 
2);
 }
+
+void
+brw_load_register_imm(struct brw_context *brw,
+  uint32_t reg,
+  uint32_t value)
+{
+   BEGIN_BATCH(3);
+   OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
+   OUT_BATCH(reg);
+   OUT_BATCH(value);
+   ADVANCE_BATCH();
+}
diff --git a/src/mesa/drivers/dri/i965/brw_pipelined_register.h 
b/src/mesa/drivers/dri/i965/brw_pipelined_register.h
index c3dd02f..d3fea14 100644
--- a/src/mesa/drivers/dri/i965/brw_pipelined_register.h
+++ b/src/mesa/drivers/dri/i965/brw_pipelined_register.h
@@ -28,6 +28,10 @@
 extern "C" {
 #endif
 
+void brw_load_register_imm(struct brw_context *brw,
+   uint32_t reg,
+   uint32_t value);
+
 void brw_load_register_mem(struct brw_context *brw,
uint32_t reg,
brw_bo *bo,
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c 
b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 03e9c3b..c2af48c 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -41,6 +41,7 @@
 #include "brw_gs.h"
 #include "brw_wm.h"
 #include "brw_cs.h"
+#include "brw_pipelined_register.h"
 #include "main/framebuffer.h"
 
 static const struct brw_tracked_state *gen4_atoms[] =
@@ -359,12 +360,10 @@ brw_upload_initial_gpu_state(struct brw_context *brw)
 
/* Recommended optimization for Victim Cache eviction in pixel backend. */
if (brw->gen >= 9) {
-  BEGIN_BATCH(3);
-  OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
-  OUT_BATCH(GEN7_CACHE_MODE_1);
-  OUT_BATCH((GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC << 16) |
-GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC);
-  ADVANCE_BATCH();
+  brw_load_register_imm(brw,
+GEN7_CACHE_MODE_1,
+(GEN9_PARTIAL_RE

[Mesa-dev] [PATCH 29/70] i965: Move brw_bo creation to brw_batch.h

2015-08-07 Thread Chris Wilson
Churn now to reduce churn later.

Signed-off-by: Chris Wilson 
---
 src/mesa/drivers/dri/i965/brw_batch.h  | 34 +++
 src/mesa/drivers/dri/i965/brw_binding_tables.c |  3 +-
 src/mesa/drivers/dri/i965/brw_context.c|  3 +-
 .../drivers/dri/i965/brw_performance_monitor.c | 11 +++--
 src/mesa/drivers/dri/i965/brw_program.c|  8 ++--
 src/mesa/drivers/dri/i965/brw_queryobj.c   |  6 +--
 src/mesa/drivers/dri/i965/brw_state_cache.c|  6 +--
 src/mesa/drivers/dri/i965/gen6_queryobj.c  |  2 +-
 src/mesa/drivers/dri/i965/gen6_sol.c   |  4 +-
 src/mesa/drivers/dri/i965/intel_batchbuffer.c  |  3 +-
 src/mesa/drivers/dri/i965/intel_buffer_objects.c   | 16 
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c  | 48 +++---
 src/mesa/drivers/dri/i965/intel_upload.c   |  4 +-
 13 files changed, 87 insertions(+), 61 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_batch.h 
b/src/mesa/drivers/dri/i965/brw_batch.h
index 849a442..705f2f9 100644
--- a/src/mesa/drivers/dri/i965/brw_batch.h
+++ b/src/mesa/drivers/dri/i965/brw_batch.h
@@ -99,6 +99,40 @@ typedef struct brw_batch {
struct set *render_cache;
 } brw_batch;
 
+inline static brw_bo *brw_bo_create(brw_batch *batch,
+const char *name,
+uint64_t size,
+uint64_t alignment,
+unsigned flags)
+{
+   return drm_intel_bo_alloc(batch->bufmgr, name, size, alignment);
+}
+
+inline static brw_bo *brw_bo_create_tiled(brw_batch *batch,
+  const char *name,
+  uint32_t width,
+  uint32_t height,
+  uint32_t cpp,
+  uint32_t *tiling,
+  uint32_t *pitch,
+  unsigned flags)
+{
+   unsigned long __pitch;
+   brw_bo *bo = drm_intel_bo_alloc_tiled(batch->bufmgr, name,
+ width, height, cpp,
+ tiling, &__pitch,
+ flags);
+   *pitch = __pitch;
+   return bo;
+}
+
+inline static brw_bo *brw_bo_create_from_name(brw_batch *batch,
+  const char *name,
+  uint32_t global_name)
+{
+   return drm_intel_bo_gem_create_from_name(batch->bufmgr, name, global_name);
+}
+
 inline static brw_bo *brw_bo_get(brw_bo *bo)
 {
drm_intel_bo_reference(bo);
diff --git a/src/mesa/drivers/dri/i965/brw_binding_tables.c 
b/src/mesa/drivers/dri/i965/brw_binding_tables.c
index c03dc59..1deed23 100644
--- a/src/mesa/drivers/dri/i965/brw_binding_tables.c
+++ b/src/mesa/drivers/dri/i965/brw_binding_tables.c
@@ -328,8 +328,7 @@ gen7_enable_hw_binding_tables(struct brw_context *brw)
* "A maximum of 16,383 Binding tables are allowed in any batch buffer"
*/
   static const int max_size = 16383 * 4;
-  brw->hw_bt_pool.bo = drm_intel_bo_alloc(brw->batch.bufmgr, "hw_bt",
-  max_size, 64);
+  brw->hw_bt_pool.bo = brw_bo_create(&brw->batch, "hw_bt", max_size, 64, 
0);
   brw->hw_bt_pool.next_offset = 0;
}
 
diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index ffc3b1f..21e6090 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -1358,8 +1358,7 @@ intel_process_dri2_buffer(struct brw_context *brw,
   buffer->cpp, buffer->pitch);
}
 
-   bo = drm_intel_bo_gem_create_from_name(brw->batch.bufmgr, buffer_name,
-  buffer->name);
+   bo = brw_bo_create_from_name(&brw->batch, buffer_name, buffer->name);
if (!bo) {
   fprintf(stderr,
   "Failed to open BO for returned DRI2 buffer "
diff --git a/src/mesa/drivers/dri/i965/brw_performance_monitor.c 
b/src/mesa/drivers/dri/i965/brw_performance_monitor.c
index 4795861..4d54fa2 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_monitor.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_monitor.c
@@ -1101,13 +1101,12 @@ brw_begin_perf_monitor(struct gl_context *ctx,
* wasting memory for contexts that don't use performance monitors.
*/
   if (!brw->perfmon.bookend_bo) {
- brw->perfmon.bookend_bo = drm_intel_bo_alloc(brw->batch.bufmgr,
-  "OA bookend BO",
-  BOOKEND_BO_SIZE_BYTES, 
64);
+ brw->perfmon.bookend_bo = brw_bo_create(&brw->batch, "OA bookend BO",
+ BOOKEND_BO_SIZE_BYTES, 64, 0);
   }
 
-  mon

[Mesa-dev] [PATCH 25/70] i965: Move bufmgr from brw_context to brw_batch

2015-08-07 Thread Chris Wilson
Since brw_batch will become the dominate interface for brw_bo, move the
pointer now to reduce later churn.

Signed-off-by: Chris Wilson 
---
 src/mesa/drivers/dri/i965/brw_batch.h   |  2 ++
 src/mesa/drivers/dri/i965/brw_binding_tables.c  |  2 +-
 src/mesa/drivers/dri/i965/brw_context.c |  8 
 src/mesa/drivers/dri/i965/brw_context.h |  6 ++
 src/mesa/drivers/dri/i965/brw_performance_monitor.c |  6 +++---
 src/mesa/drivers/dri/i965/brw_program.c |  4 ++--
 src/mesa/drivers/dri/i965/brw_queryobj.c| 16 
 src/mesa/drivers/dri/i965/brw_state_cache.c |  4 ++--
 src/mesa/drivers/dri/i965/gen6_queryobj.c   |  2 +-
 src/mesa/drivers/dri/i965/gen6_sol.c|  4 ++--
 src/mesa/drivers/dri/i965/intel_batchbuffer.c   |  2 +-
 src/mesa/drivers/dri/i965/intel_buffer_objects.c|  6 +++---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c   | 10 +-
 src/mesa/drivers/dri/i965/intel_upload.c|  2 +-
 14 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_batch.h 
b/src/mesa/drivers/dri/i965/brw_batch.h
index 6c24465..9e2d7fa 100644
--- a/src/mesa/drivers/dri/i965/brw_batch.h
+++ b/src/mesa/drivers/dri/i965/brw_batch.h
@@ -67,6 +67,8 @@ typedef struct brw_batch {
   int reloc_count;
} saved;
 
+   dri_bufmgr *bufmgr;
+
/**
 * Set of brw_bo* that have been rendered to within this batchbuffer
 * and would need flushing before being used from another cache domain that
diff --git a/src/mesa/drivers/dri/i965/brw_binding_tables.c 
b/src/mesa/drivers/dri/i965/brw_binding_tables.c
index 9fe4bc8..c03dc59 100644
--- a/src/mesa/drivers/dri/i965/brw_binding_tables.c
+++ b/src/mesa/drivers/dri/i965/brw_binding_tables.c
@@ -328,7 +328,7 @@ gen7_enable_hw_binding_tables(struct brw_context *brw)
* "A maximum of 16,383 Binding tables are allowed in any batch buffer"
*/
   static const int max_size = 16383 * 4;
-  brw->hw_bt_pool.bo = drm_intel_bo_alloc(brw->bufmgr, "hw_bt",
+  brw->hw_bt_pool.bo = drm_intel_bo_alloc(brw->batch.bufmgr, "hw_bt",
   max_size, 64);
   brw->hw_bt_pool.next_offset = 0;
}
diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index 707bdf2..971d86d 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -628,7 +628,7 @@ brw_process_driconf_options(struct brw_context *brw)
case DRI_CONF_BO_REUSE_DISABLED:
   break;
case DRI_CONF_BO_REUSE_ALL:
-  intel_bufmgr_gem_enable_reuse(brw->bufmgr);
+  intel_bufmgr_gem_enable_reuse(brw->intelScreen->bufmgr);
   break;
}
 
@@ -707,7 +707,7 @@ brwCreateContext(gl_api api,
driContextPriv->driverPrivate = brw;
brw->driContext = driContextPriv;
brw->intelScreen = screen;
-   brw->bufmgr = screen->bufmgr;
+   brw->batch.bufmgr = screen->bufmgr;
 
brw->gen = devinfo->gen;
brw->gt = devinfo->gt;
@@ -812,7 +812,7 @@ brwCreateContext(gl_api api,
* This is required for transform feedback buffer offsets, query objects,
* and also allows us to reduce how much state we have to emit.
*/
-  brw->hw_ctx = drm_intel_gem_context_create(brw->bufmgr);
+  brw->hw_ctx = drm_intel_gem_context_create(brw->batch.bufmgr);
 
   if (!brw->hw_ctx) {
  fprintf(stderr, "Gen6+ requires Kernel 3.6 or later.\n");
@@ -1377,7 +1377,7 @@ intel_process_dri2_buffer(struct brw_context *brw,
   buffer->cpp, buffer->pitch);
}
 
-   bo = drm_intel_bo_gem_create_from_name(brw->bufmgr, buffer_name,
+   bo = drm_intel_bo_gem_create_from_name(brw->batch.bufmgr, buffer_name,
   buffer->name);
if (!bo) {
   fprintf(stderr,
diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 31b8c3b..6301da4 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1000,7 +1000,8 @@ struct brw_context
 
} vtbl;
 
-   dri_bufmgr *bufmgr;
+   brw_batch batch;
+   bool no_batch_wrap;
 
drm_intel_context *hw_ctx;
 
@@ -1016,9 +1017,6 @@ struct brw_context
 */
uint32_t reset_count;
 
-   brw_batch batch;
-   bool no_batch_wrap;
-
struct {
   brw_bo *bo;
   uint32_t next_offset;
diff --git a/src/mesa/drivers/dri/i965/brw_performance_monitor.c 
b/src/mesa/drivers/dri/i965/brw_performance_monitor.c
index b92b1d7..4795861 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_monitor.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_monitor.c
@@ -1101,13 +1101,13 @@ brw_begin_perf_monitor(struct gl_context *ctx,
* wasting memory for contexts that don't use performance monitors.
*/
   if (!brw->perfmon.bookend_bo) {
- brw->perfmon.bookend_bo = drm_intel_bo_alloc(brw->bufmgr,
+ 

[Mesa-dev] [PATCH 27/70] i965: Move HW context into brw_batch

2015-08-07 Thread Chris Wilson
To reduce churn later, move the HW context variable from brw_context to
brw_batch.

Signed-off-by: Chris Wilson 
---
 src/mesa/drivers/dri/i965/brw_batch.h |  2 ++
 src/mesa/drivers/dri/i965/brw_context.c   | 22 +++---
 src/mesa/drivers/dri/i965/brw_context.h   |  2 --
 src/mesa/drivers/dri/i965/brw_queryobj.c  |  4 ++--
 src/mesa/drivers/dri/i965/brw_reset.c |  6 ++---
 src/mesa/drivers/dri/i965/brw_state_upload.c  |  2 +-
 src/mesa/drivers/dri/i965/gen7_misc_state.c   |  2 +-
 src/mesa/drivers/dri/i965/gen8_depth_state.c  |  2 +-
 src/mesa/drivers/dri/i965/intel_batchbuffer.c | 32 ++-
 src/mesa/drivers/dri/i965/intel_batchbuffer.h |  2 +-
 10 files changed, 40 insertions(+), 36 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_batch.h 
b/src/mesa/drivers/dri/i965/brw_batch.h
index e3a83ea..849a442 100644
--- a/src/mesa/drivers/dri/i965/brw_batch.h
+++ b/src/mesa/drivers/dri/i965/brw_batch.h
@@ -89,6 +89,8 @@ typedef struct brw_batch {
bool always_flush : 1;
bool disable_throttling : 1;
 
+   drm_intel_context *hw_ctx;
+
/**
 * Set of brw_bo* that have been rendered to within this batchbuffer
 * and would need flushing before being used from another cache domain that
diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index 12313ec..ffc3b1f 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -802,23 +802,9 @@ brwCreateContext(gl_api api,
 
intel_fbo_init(brw);
 
-   intel_batchbuffer_init(brw);
-
-   if (brw->gen >= 6) {
-  /* Create a new hardware context.  Using a hardware context means that
-   * our GPU state will be saved/restored on context switch, allowing us
-   * to assume that the GPU is in the same state we left it in.
-   *
-   * This is required for transform feedback buffer offsets, query objects,
-   * and also allows us to reduce how much state we have to emit.
-   */
-  brw->hw_ctx = drm_intel_gem_context_create(brw->batch.bufmgr);
-
-  if (!brw->hw_ctx) {
- fprintf(stderr, "Gen6+ requires Kernel 3.6 or later.\n");
- intelDestroyContext(driContextPriv);
- return false;
-  }
+   if (!intel_batchbuffer_init(brw)) {
+  intelDestroyContext(driContextPriv);
+  return false;
}
 
brw_init_pipe_control(brw, devinfo);
@@ -936,8 +922,6 @@ intelDestroyContext(__DRIcontext * driContextPriv)
brw_bo_put(brw->wm.base.scratch_bo);
brw_bo_put(brw->hw_bt_pool.bo);
 
-   drm_intel_gem_context_destroy(brw->hw_ctx);
-
if (ctx->swrast_context) {
   _swsetup_DestroyContext(&brw->ctx);
   _tnl_DestroyContext(&brw->ctx);
diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index a4169f4..f1fcbd8 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1003,8 +1003,6 @@ struct brw_context
brw_batch batch;
bool no_batch_wrap;
 
-   drm_intel_context *hw_ctx;
-
/** BO for post-sync nonzero writes for gen6 workaround. */
brw_bo *workaround_bo;
uint8_t pipe_controls_since_last_cs_stall;
diff --git a/src/mesa/drivers/dri/i965/brw_queryobj.c 
b/src/mesa/drivers/dri/i965/brw_queryobj.c
index 6f29d04..cb0c210 100644
--- a/src/mesa/drivers/dri/i965/brw_queryobj.c
+++ b/src/mesa/drivers/dri/i965/brw_queryobj.c
@@ -425,7 +425,7 @@ brw_emit_query_begin(struct brw_context *brw)
struct gl_context *ctx = &brw->ctx;
struct brw_query_object *query = brw->query.obj;
 
-   if (brw->hw_ctx)
+   if (brw->batch.hw_ctx)
   return;
 
/* Skip if we're not doing any queries, or we've already recorded the
@@ -452,7 +452,7 @@ brw_emit_query_end(struct brw_context *brw)
 {
struct brw_query_object *query = brw->query.obj;
 
-   if (brw->hw_ctx)
+   if (brw->batch.hw_ctx)
   return;
 
if (!brw->query.begin_emitted)
diff --git a/src/mesa/drivers/dri/i965/brw_reset.c 
b/src/mesa/drivers/dri/i965/brw_reset.c
index e3182b1..f84df22 100644
--- a/src/mesa/drivers/dri/i965/brw_reset.c
+++ b/src/mesa/drivers/dri/i965/brw_reset.c
@@ -40,7 +40,7 @@ brw_get_graphics_reset_status(struct gl_context *ctx)
 * DRM_IOCTL_I915_GET_RESET_STATS is not supported), this function should
 * not be accessible.
 */
-   assert(brw->hw_ctx != NULL);
+   assert(brw->batch.hw_ctx);
 
/* A reset status other than NO_ERROR was returned last time. I915 returns
 * nonzero active/pending only if reset has been encountered and completed.
@@ -49,8 +49,8 @@ brw_get_graphics_reset_status(struct gl_context *ctx)
if (brw->reset_count != 0)
   return GL_NO_ERROR;
 
-   err = drm_intel_get_reset_stats(brw->hw_ctx, &reset_count, &active,
-   &pending);
+   err = drm_intel_get_reset_stats(brw->batch.hw_ctx,
+   &reset_count, &active, &pending);
if (err)
   return GL_NO_ERR

  1   2   >