Re: [Mesa-dev] [PATCH 2/2] gallium: add flag PIPE_TRANSFER_MAP_PERMANENTLY

2013-07-23 Thread Marek Olšák
Yes, absolutely, we should have a CAP for that.

Marek

On Wed, Jul 24, 2013 at 12:20 AM, Jose Fonseca  wrote:
> - Original Message -
>> FYI, OpenGL 4.4, which was released yesterday, adds GL_MAP_PERSISTENT
>> and GL_MAP_COHERENT bits as valid parameters of glMapBufferRange and
>> glBufferStorage, allowing to use buffers for rendering while they are
>> mapped and upload/download data to/from the buffers simultaneously.
>> It's now clear that Gallium will have to support it at some point,
>> it's only a matter of when. And once we have it, gallium can use it
>> for faster buffer uploads.
>
> IIRC, this sort of mappings are tricky, but not totally impossible on Windows 
> WDDM.  I'm OK adding this, but until we manage to get it working on Windows 
> this would need to be conditional though.
>
> Should we have a CAP, instead of having the st tracker systematically retry 
> with/without PERSISTENT flag?
>
> Jose
>
>>
>> Marek
>>
>> On Mon, Jan 2, 2012 at 1:22 AM, Marek Olšák  wrote:
>> > Please see the diff for further info.
>> >
>> > This paves the way for moving user buffer uploads out of drivers and should
>> > allow to clean up the mess in u_upload_mgr in the meantime.
>> >
>> > For now only allowed for buffers on r300 and r600.
>> > ---
>> >  src/gallium/drivers/i915/i915_resource_buffer.c  |7 ++-
>> >  src/gallium/drivers/i915/i915_resource_texture.c |7 ++-
>> >  src/gallium/drivers/llvmpipe/lp_texture.c|4 
>> >  src/gallium/drivers/nouveau/nouveau_buffer.c |8 +++-
>> >  src/gallium/drivers/nv50/nv50_transfer.c |2 +-
>> >  src/gallium/drivers/nvc0/nvc0_transfer.c |2 +-
>> >  src/gallium/drivers/nvfx/nvfx_transfer.c |3 +++
>> >  src/gallium/drivers/r300/r300_transfer.c |4 
>> >  src/gallium/drivers/r600/r600_texture.c  |4 
>> >  src/gallium/drivers/svga/svga_resource_buffer.c  |4 
>> >  src/gallium/drivers/svga/svga_resource_texture.c |2 +-
>> >  src/gallium/include/pipe/p_defines.h |   16 
>> >  12 files changed, 57 insertions(+), 6 deletions(-)
>> >
>> > diff --git a/src/gallium/drivers/i915/i915_resource_buffer.c
>> > b/src/gallium/drivers/i915/i915_resource_buffer.c
>> > index 77c0345..c54e481 100644
>> > --- a/src/gallium/drivers/i915/i915_resource_buffer.c
>> > +++ b/src/gallium/drivers/i915/i915_resource_buffer.c
>> > @@ -68,8 +68,13 @@ i915_get_transfer(struct pipe_context *pipe,
>> >const struct pipe_box *box)
>> >  {
>> > struct i915_context *i915 = i915_context(pipe);
>> > -   struct pipe_transfer *transfer = util_slab_alloc(&i915->transfer_pool);
>> > +   struct pipe_transfer *transfer;
>> >
>> > +   if (usage & PIPE_TRANSFER_MAP_PERMANENTLY) {
>> > +  return NULL;
>> > +   }
>> > +
>> > +   transfer = util_slab_alloc(&i915->transfer_pool);
>> > if (transfer == NULL)
>> >return NULL;
>> >
>> > diff --git a/src/gallium/drivers/i915/i915_resource_texture.c
>> > b/src/gallium/drivers/i915/i915_resource_texture.c
>> > index 8ff733a..64d071c 100644
>> > --- a/src/gallium/drivers/i915/i915_resource_texture.c
>> > +++ b/src/gallium/drivers/i915/i915_resource_texture.c
>> > @@ -720,9 +720,14 @@ i915_texture_get_transfer(struct pipe_context *pipe,
>> >  {
>> > struct i915_context *i915 = i915_context(pipe);
>> > struct i915_texture *tex = i915_texture(resource);
>> > -   struct i915_transfer *transfer =
>> > util_slab_alloc(&i915->texture_transfer_pool);
>> > +   struct i915_transfer *transfer;
>> > boolean use_staging_texture = FALSE;
>> >
>> > +   if (usage & PIPE_TRANSFER_MAP_PERMANENTLY) {
>> > +  return NULL;
>> > +   }
>> > +
>> > +   transfer = util_slab_alloc(&i915->texture_transfer_pool);
>> > if (transfer == NULL)
>> >return NULL;
>> >
>> > diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c
>> > b/src/gallium/drivers/llvmpipe/lp_texture.c
>> > index ca38571..d86d493 100644
>> > --- a/src/gallium/drivers/llvmpipe/lp_texture.c
>> > +++ b/src/gallium/drivers/llvmpipe/lp_texture.c
>> > @@ -587,6 +587,10 @@ llvmpipe_get_transfer(struct pipe_context *pipe,
>> > assert(resource);
>> > assert(level <= resource->last_level);
>> >
>> > +   if (usage & PIPE_TRANSFER_MAP_PERMANENTLY) {
>> > +  return NULL;
>> > +   }
>> > +
>> > /*
>> >  * Transfers, like other pipe operations, must happen in order, so
>> >  flush the
>> >  * context if necessary.
>> > diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c
>> > b/src/gallium/drivers/nouveau/nouveau_buffer.c
>> > index f822625..02186ba 100644
>> > --- a/src/gallium/drivers/nouveau/nouveau_buffer.c
>> > +++ b/src/gallium/drivers/nouveau/nouveau_buffer.c
>> > @@ -172,7 +172,13 @@ nouveau_buffer_transfer_get(struct pipe_context *pipe,
>> >  {
>> > struct nv04_resource *buf = nv04_resource(resource);
>> > struct nouveau_context *nv = nouveau_context(pipe);
>> > -   struct nouveau_transfer *xfr

Re: [Mesa-dev] [PATCH 2/2] gallium: add flag PIPE_TRANSFER_MAP_PERMANENTLY

2013-07-23 Thread Jose Fonseca
- Original Message -
> FYI, OpenGL 4.4, which was released yesterday, adds GL_MAP_PERSISTENT
> and GL_MAP_COHERENT bits as valid parameters of glMapBufferRange and
> glBufferStorage, allowing to use buffers for rendering while they are
> mapped and upload/download data to/from the buffers simultaneously.
> It's now clear that Gallium will have to support it at some point,
> it's only a matter of when. And once we have it, gallium can use it
> for faster buffer uploads.

IIRC, this sort of mappings are tricky, but not totally impossible on Windows 
WDDM.  I'm OK adding this, but until we manage to get it working on Windows 
this would need to be conditional though.

Should we have a CAP, instead of having the st tracker systematically retry 
with/without PERSISTENT flag?

Jose

> 
> Marek
> 
> On Mon, Jan 2, 2012 at 1:22 AM, Marek Olšák  wrote:
> > Please see the diff for further info.
> >
> > This paves the way for moving user buffer uploads out of drivers and should
> > allow to clean up the mess in u_upload_mgr in the meantime.
> >
> > For now only allowed for buffers on r300 and r600.
> > ---
> >  src/gallium/drivers/i915/i915_resource_buffer.c  |7 ++-
> >  src/gallium/drivers/i915/i915_resource_texture.c |7 ++-
> >  src/gallium/drivers/llvmpipe/lp_texture.c|4 
> >  src/gallium/drivers/nouveau/nouveau_buffer.c |8 +++-
> >  src/gallium/drivers/nv50/nv50_transfer.c |2 +-
> >  src/gallium/drivers/nvc0/nvc0_transfer.c |2 +-
> >  src/gallium/drivers/nvfx/nvfx_transfer.c |3 +++
> >  src/gallium/drivers/r300/r300_transfer.c |4 
> >  src/gallium/drivers/r600/r600_texture.c  |4 
> >  src/gallium/drivers/svga/svga_resource_buffer.c  |4 
> >  src/gallium/drivers/svga/svga_resource_texture.c |2 +-
> >  src/gallium/include/pipe/p_defines.h |   16 
> >  12 files changed, 57 insertions(+), 6 deletions(-)
> >
> > diff --git a/src/gallium/drivers/i915/i915_resource_buffer.c
> > b/src/gallium/drivers/i915/i915_resource_buffer.c
> > index 77c0345..c54e481 100644
> > --- a/src/gallium/drivers/i915/i915_resource_buffer.c
> > +++ b/src/gallium/drivers/i915/i915_resource_buffer.c
> > @@ -68,8 +68,13 @@ i915_get_transfer(struct pipe_context *pipe,
> >const struct pipe_box *box)
> >  {
> > struct i915_context *i915 = i915_context(pipe);
> > -   struct pipe_transfer *transfer = util_slab_alloc(&i915->transfer_pool);
> > +   struct pipe_transfer *transfer;
> >
> > +   if (usage & PIPE_TRANSFER_MAP_PERMANENTLY) {
> > +  return NULL;
> > +   }
> > +
> > +   transfer = util_slab_alloc(&i915->transfer_pool);
> > if (transfer == NULL)
> >return NULL;
> >
> > diff --git a/src/gallium/drivers/i915/i915_resource_texture.c
> > b/src/gallium/drivers/i915/i915_resource_texture.c
> > index 8ff733a..64d071c 100644
> > --- a/src/gallium/drivers/i915/i915_resource_texture.c
> > +++ b/src/gallium/drivers/i915/i915_resource_texture.c
> > @@ -720,9 +720,14 @@ i915_texture_get_transfer(struct pipe_context *pipe,
> >  {
> > struct i915_context *i915 = i915_context(pipe);
> > struct i915_texture *tex = i915_texture(resource);
> > -   struct i915_transfer *transfer =
> > util_slab_alloc(&i915->texture_transfer_pool);
> > +   struct i915_transfer *transfer;
> > boolean use_staging_texture = FALSE;
> >
> > +   if (usage & PIPE_TRANSFER_MAP_PERMANENTLY) {
> > +  return NULL;
> > +   }
> > +
> > +   transfer = util_slab_alloc(&i915->texture_transfer_pool);
> > if (transfer == NULL)
> >return NULL;
> >
> > diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c
> > b/src/gallium/drivers/llvmpipe/lp_texture.c
> > index ca38571..d86d493 100644
> > --- a/src/gallium/drivers/llvmpipe/lp_texture.c
> > +++ b/src/gallium/drivers/llvmpipe/lp_texture.c
> > @@ -587,6 +587,10 @@ llvmpipe_get_transfer(struct pipe_context *pipe,
> > assert(resource);
> > assert(level <= resource->last_level);
> >
> > +   if (usage & PIPE_TRANSFER_MAP_PERMANENTLY) {
> > +  return NULL;
> > +   }
> > +
> > /*
> >  * Transfers, like other pipe operations, must happen in order, so
> >  flush the
> >  * context if necessary.
> > diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c
> > b/src/gallium/drivers/nouveau/nouveau_buffer.c
> > index f822625..02186ba 100644
> > --- a/src/gallium/drivers/nouveau/nouveau_buffer.c
> > +++ b/src/gallium/drivers/nouveau/nouveau_buffer.c
> > @@ -172,7 +172,13 @@ nouveau_buffer_transfer_get(struct pipe_context *pipe,
> >  {
> > struct nv04_resource *buf = nv04_resource(resource);
> > struct nouveau_context *nv = nouveau_context(pipe);
> > -   struct nouveau_transfer *xfr = CALLOC_STRUCT(nouveau_transfer);
> > +   struct nouveau_transfer *xfr;
> > +
> > +   if (usage & PIPE_TRANSFER_MAP_PERMANENTLY) {
> > +  return NULL;
> > +   }
> > +
> > +   xfr = CALLOC_STRUCT(nouveau_transfer);
> > 

Re: [Mesa-dev] [PATCH 2/2] gallium: add flag PIPE_TRANSFER_MAP_PERMANENTLY

2013-07-23 Thread Marek Olšák
FYI, OpenGL 4.4, which was released yesterday, adds GL_MAP_PERSISTENT
and GL_MAP_COHERENT bits as valid parameters of glMapBufferRange and
glBufferStorage, allowing to use buffers for rendering while they are
mapped and upload/download data to/from the buffers simultaneously.
It's now clear that Gallium will have to support it at some point,
it's only a matter of when. And once we have it, gallium can use it
for faster buffer uploads.

Marek

On Mon, Jan 2, 2012 at 1:22 AM, Marek Olšák  wrote:
> Please see the diff for further info.
>
> This paves the way for moving user buffer uploads out of drivers and should
> allow to clean up the mess in u_upload_mgr in the meantime.
>
> For now only allowed for buffers on r300 and r600.
> ---
>  src/gallium/drivers/i915/i915_resource_buffer.c  |7 ++-
>  src/gallium/drivers/i915/i915_resource_texture.c |7 ++-
>  src/gallium/drivers/llvmpipe/lp_texture.c|4 
>  src/gallium/drivers/nouveau/nouveau_buffer.c |8 +++-
>  src/gallium/drivers/nv50/nv50_transfer.c |2 +-
>  src/gallium/drivers/nvc0/nvc0_transfer.c |2 +-
>  src/gallium/drivers/nvfx/nvfx_transfer.c |3 +++
>  src/gallium/drivers/r300/r300_transfer.c |4 
>  src/gallium/drivers/r600/r600_texture.c  |4 
>  src/gallium/drivers/svga/svga_resource_buffer.c  |4 
>  src/gallium/drivers/svga/svga_resource_texture.c |2 +-
>  src/gallium/include/pipe/p_defines.h |   16 
>  12 files changed, 57 insertions(+), 6 deletions(-)
>
> diff --git a/src/gallium/drivers/i915/i915_resource_buffer.c 
> b/src/gallium/drivers/i915/i915_resource_buffer.c
> index 77c0345..c54e481 100644
> --- a/src/gallium/drivers/i915/i915_resource_buffer.c
> +++ b/src/gallium/drivers/i915/i915_resource_buffer.c
> @@ -68,8 +68,13 @@ i915_get_transfer(struct pipe_context *pipe,
>const struct pipe_box *box)
>  {
> struct i915_context *i915 = i915_context(pipe);
> -   struct pipe_transfer *transfer = util_slab_alloc(&i915->transfer_pool);
> +   struct pipe_transfer *transfer;
>
> +   if (usage & PIPE_TRANSFER_MAP_PERMANENTLY) {
> +  return NULL;
> +   }
> +
> +   transfer = util_slab_alloc(&i915->transfer_pool);
> if (transfer == NULL)
>return NULL;
>
> diff --git a/src/gallium/drivers/i915/i915_resource_texture.c 
> b/src/gallium/drivers/i915/i915_resource_texture.c
> index 8ff733a..64d071c 100644
> --- a/src/gallium/drivers/i915/i915_resource_texture.c
> +++ b/src/gallium/drivers/i915/i915_resource_texture.c
> @@ -720,9 +720,14 @@ i915_texture_get_transfer(struct pipe_context *pipe,
>  {
> struct i915_context *i915 = i915_context(pipe);
> struct i915_texture *tex = i915_texture(resource);
> -   struct i915_transfer *transfer = 
> util_slab_alloc(&i915->texture_transfer_pool);
> +   struct i915_transfer *transfer;
> boolean use_staging_texture = FALSE;
>
> +   if (usage & PIPE_TRANSFER_MAP_PERMANENTLY) {
> +  return NULL;
> +   }
> +
> +   transfer = util_slab_alloc(&i915->texture_transfer_pool);
> if (transfer == NULL)
>return NULL;
>
> diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c 
> b/src/gallium/drivers/llvmpipe/lp_texture.c
> index ca38571..d86d493 100644
> --- a/src/gallium/drivers/llvmpipe/lp_texture.c
> +++ b/src/gallium/drivers/llvmpipe/lp_texture.c
> @@ -587,6 +587,10 @@ llvmpipe_get_transfer(struct pipe_context *pipe,
> assert(resource);
> assert(level <= resource->last_level);
>
> +   if (usage & PIPE_TRANSFER_MAP_PERMANENTLY) {
> +  return NULL;
> +   }
> +
> /*
>  * Transfers, like other pipe operations, must happen in order, so flush 
> the
>  * context if necessary.
> diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c 
> b/src/gallium/drivers/nouveau/nouveau_buffer.c
> index f822625..02186ba 100644
> --- a/src/gallium/drivers/nouveau/nouveau_buffer.c
> +++ b/src/gallium/drivers/nouveau/nouveau_buffer.c
> @@ -172,7 +172,13 @@ nouveau_buffer_transfer_get(struct pipe_context *pipe,
>  {
> struct nv04_resource *buf = nv04_resource(resource);
> struct nouveau_context *nv = nouveau_context(pipe);
> -   struct nouveau_transfer *xfr = CALLOC_STRUCT(nouveau_transfer);
> +   struct nouveau_transfer *xfr;
> +
> +   if (usage & PIPE_TRANSFER_MAP_PERMANENTLY) {
> +  return NULL;
> +   }
> +
> +   xfr = CALLOC_STRUCT(nouveau_transfer);
> if (!xfr)
>return NULL;
>
> diff --git a/src/gallium/drivers/nv50/nv50_transfer.c 
> b/src/gallium/drivers/nv50/nv50_transfer.c
> index 6f860e7..8ddebeb 100644
> --- a/src/gallium/drivers/nv50/nv50_transfer.c
> +++ b/src/gallium/drivers/nv50/nv50_transfer.c
> @@ -243,7 +243,7 @@ nv50_miptree_transfer_new(struct pipe_context *pctx,
> uint32_t size;
> int ret;
>
> -   if (usage & PIPE_TRANSFER_MAP_DIRECTLY)
> +   if (usage & (PIPE_TRANSFER_MAP_DIRECTLY | PIPE_TRANSFER_MAP_PERMANENTLY))
>return NULL;
>
> tx = CALLOC_STRUCT(nv50_t

Re: [Mesa-dev] [PATCH 2/2] gallium: add flag PIPE_TRANSFER_MAP_PERMANENTLY

2012-01-28 Thread Jose Fonseca


- Original Message -
> On Don, 2012-01-26 at 20:45 +0100, Marek Olšák wrote:
> > On Tue, Jan 10, 2012 at 6:20 PM, Jose Fonseca 
> > wrote:
> > >> On Tue, Jan 10, 2012 at 5:15 PM, Jose Fonseca
> > >> 
> > >> wrote:
> > >> > 
> > >> > Also, please provide app name and performance figures w/ this
> > >> > change.
> > >>
> > >> OK. Torcs, the Forza track at the start.
> > >>
> > >> With u_upload_unmap before drawing:
> > >> 21.4 - 22.1 fps
> > >>
> > >> Without u_upload_unmap:
> > >> 22.7 - 23.1 fps
> > >
> > >
> > >> The improvement is approximately 1.1 fps, which is probably too
> > >> little
> > >> for other people to care, but why throw it away?
> > >
> > > This is roughly 5%. It is still significant.
> > >
> > > But it surprises me it is so high.
> > >
> > > Perhaps we should have a couple of fast entrypoints for
> > > transfering to/from buffers without the overhead of creating
> > > transfer objects.
> > 
> > Okay so the plan is to keep refactoring the transfer API until the
> > overhead disappears? Sounds good.
> > 
> > I propose these changes:
> > 1) Merge get_transfer with transfer_map.
> > 2) Merge transfer_unmap with transfer_destroy.
> 
> AFAIR everybody agreed a while ago this should be done.

I agree.

I know that in some places we pass transfer objects around, and then map/unmap 
elsewhere, which will make the refactoring harder, but it shouldn't be too hard.
 
> > 3) Make the pipe_transfer struct fully opaque, but then there must
> > be
> > another way to return the strides of mapped textures (out
> > parameters
> > of transfer_map?). Think of this as decoupling driver-private
> > transfer
> > objects from ordinary return values like the strides.
> > 4) The drivers which don't need transfer objects (e.g. non-texture
> > transfers) can return a NULL pipe_transfer struct, making transfer
> > objects fully optional. Only the returned pointer into the resource
> > determines whether transfer_map has been successful.
> > (1) and (2) should help reduce the call overhead. (3) is a
> > preparation
> > for (4). (4) should help kill all the code required to allocate,
> > initialize, and free the pipe_transfer struct when it's needed by
> > neither a driver nor a state tracker.
> 
> Sounds good to me.

Sounds great to me too.

One way to do this would be to make struct pipe_transfer a state tracker owned 
structure describing the transfer:

struct pipe_transfer
{
   struct pipe_resource *resource; /**< resource to transfer to/from  */
   unsigned level; /**< texture mipmap level */
   enum pipe_transfer_usage usage;
   struct pipe_box box;/**< region of the resource to access */
   unsigned stride;/**< row stride in bytes */
   unsigned layer_stride;  /**< image/layer stride in bytes */
   void *data;

   // Optional opaque driver private data associated with this transfer
   void *private;
};

And get/destroy_transfer would be disappear, being their functionality done 
inside map/unmap_transfer.  For example:

   struct pipe_transfer transfer;

   // TODO: write some inline helper functions to simplify most of this.
   transfer.resource = resource; // No need to reference count
   transfer.level = 0;
   transfer.usage = ...;
   ...

   if (pipe->map_transfer(pipe, &transfer)) {
   memcpy(transfer->data, buffer, size));
   pipe->unmap_transfer(pipe, &transfer);
   }

> Something else that might be useful at some point would be another
> entrypoint which passes in a pointer to the data to be transferred.
> This
> could allow saving a CPU copy in cases where the state tracker
> already
> has the data in the form appropriate for the transfer. For drivers
> that
> can't take advantage of that, there could be a utility function which
> uses the get_transfer and transfer_destroy hooks.

Yes. It would provide yet another way of doing transfers, but it could save 
intermediate copies on several hot spots.

Jose
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] gallium: add flag PIPE_TRANSFER_MAP_PERMANENTLY

2012-01-27 Thread Michel Dänzer
On Don, 2012-01-26 at 20:45 +0100, Marek Olšák wrote: 
> On Tue, Jan 10, 2012 at 6:20 PM, Jose Fonseca  wrote:
> >> On Tue, Jan 10, 2012 at 5:15 PM, Jose Fonseca 
> >> wrote:
> >> > 
> >> > Also, please provide app name and performance figures w/ this
> >> > change.
> >>
> >> OK. Torcs, the Forza track at the start.
> >>
> >> With u_upload_unmap before drawing:
> >> 21.4 - 22.1 fps
> >>
> >> Without u_upload_unmap:
> >> 22.7 - 23.1 fps
> >
> >
> >> The improvement is approximately 1.1 fps, which is probably too
> >> little
> >> for other people to care, but why throw it away?
> >
> > This is roughly 5%. It is still significant.
> >
> > But it surprises me it is so high.
> >
> > Perhaps we should have a couple of fast entrypoints for transfering to/from 
> > buffers without the overhead of creating transfer objects.
> 
> Okay so the plan is to keep refactoring the transfer API until the
> overhead disappears? Sounds good.
> 
> I propose these changes:
> 1) Merge get_transfer with transfer_map.
> 2) Merge transfer_unmap with transfer_destroy.

AFAIR everybody agreed a while ago this should be done.

> 3) Make the pipe_transfer struct fully opaque, but then there must be
> another way to return the strides of mapped textures (out parameters
> of transfer_map?). Think of this as decoupling driver-private transfer
> objects from ordinary return values like the strides.
> 4) The drivers which don't need transfer objects (e.g. non-texture
> transfers) can return a NULL pipe_transfer struct, making transfer
> objects fully optional. Only the returned pointer into the resource
> determines whether transfer_map has been successful.
> 
> (1) and (2) should help reduce the call overhead. (3) is a preparation
> for (4). (4) should help kill all the code required to allocate,
> initialize, and free the pipe_transfer struct when it's needed by
> neither a driver nor a state tracker.

Sounds good to me.

Something else that might be useful at some point would be another
entrypoint which passes in a pointer to the data to be transferred. This
could allow saving a CPU copy in cases where the state tracker already
has the data in the form appropriate for the transfer. For drivers that
can't take advantage of that, there could be a utility function which
uses the get_transfer and transfer_destroy hooks.


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast |  Debian, X and DRI developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] gallium: add flag PIPE_TRANSFER_MAP_PERMANENTLY

2012-01-26 Thread Marek Olšák
On Tue, Jan 10, 2012 at 6:20 PM, Jose Fonseca  wrote:
>
>
> - Original Message -
>> On Tue, Jan 10, 2012 at 5:15 PM, Jose Fonseca 
>> wrote:
>> > - Original Message -
>> >> The flag is optional, it doesn't have to implemented by everybody.
>> >> If
>> >> we do the uploads in the state tracker, we will also do any
>> >> required
>> >> data transformation so that drivers don't have to do it at all.
>> >
>> > The same can be said for everything.
>> >
>> > I don't object adding yet another code path specific to a subset of
>> > hardware, provided the benefits justify its existence.
>> >
>> > But honestly I'm not yet convinced of this, as there was no attempt
>> > to backup with solid arguments why this matters.
>> >
>> > Furthermore this violates one of the principles gallium (and all 3D
>> > apis) have of unmapping all resources when drawing.
>>
>> If my idea violates one principle of all 3D APIs, this violates them
>> all:
>> http://www.opengl.org/registry/specs/AMD/pinned_memory.txt
>
> Indeed.
>
>> It allows reading user memory by hardware, and neither the driver nor
>> the hardware is notified when the memory contents are changed by the
>> user.
>>
>> BTW, I don't insist on this. I thought it would be a nice addition
>> allowing user buffer uploads to be moved out of (especially radeon)
>> drivers. If you really believe it's a bad optimization, feel free to
>> revert. WIthout it, things wouldn't change for me at all...
>
> Fair enough.
>
>> >
>> >> > It looks to me that state trackers and/or drivers are not
>> >> > properly
>> >> > using PIPE_USAGE_STREAM flag.  As all cases where
>> >> > PIPE_TRANSFER_MAP_PERMANENTLY would be used, the right way to do
>> >> > it would be for the state tracker to set PIPE_USAGE_STREAM, the
>> >> > pipe driver to recognize PIPE_USAGE_STREAM, and keep the mapping
>> >> > permanently internally, making mapping/unmapping of such buffers
>> >> > mere no-ops.
>> >>
>> >> We were doing that already and it wasn't good enough. Avoiding
>> >> create+map+unmap+destroy *calls* have brought higher frame rates
>> >> in
>> >> apps with lots of draw operations.
>> >
>> > I understand the number map/unmap call is reduced. But how does
>> > this interface change affect in any way the number of
>> > create/destroy calls?
>>
>> create = get_transfer
>> destroy = transfer_destroy
>
> Ah. BTW, we should probably unify create+map and destroy+unmap to reduce 
> overhead. I would probably helps here.
>
>> > Also, please provide app name and performance figures w/ this
>> > change.
>>
>> OK. Torcs, the Forza track at the start.
>>
>> With u_upload_unmap before drawing:
>> 21.4 - 22.1 fps
>>
>> Without u_upload_unmap:
>> 22.7 - 23.1 fps
>
>
>> The improvement is approximately 1.1 fps, which is probably too
>> little
>> for other people to care, but why throw it away?
>
> This is roughly 5%. It is still significant.
>
> But it surprises me it is so high.
>
> Perhaps we should have a couple of fast entrypoints for transfering to/from 
> buffers without the overhead of creating transfer objects.

Okay so the plan is to keep refactoring the transfer API until the
overhead disappears? Sounds good.

I propose these changes:
1) Merge get_transfer with transfer_map.
2) Merge transfer_unmap with transfer_destroy.
3) Make the pipe_transfer struct fully opaque, but then there must be
another way to return the strides of mapped textures (out parameters
of transfer_map?). Think of this as decoupling driver-private transfer
objects from ordinary return values like the strides.
4) The drivers which don't need transfer objects (e.g. non-texture
transfers) can return a NULL pipe_transfer struct, making transfer
objects fully optional. Only the returned pointer into the resource
determines whether transfer_map has been successful.

(1) and (2) should help reduce the call overhead. (3) is a preparation
for (4). (4) should help kill all the code required to allocate,
initialize, and free the pipe_transfer struct when it's needed by
neither a driver nor a state tracker.

Comments welcome.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] gallium: add flag PIPE_TRANSFER_MAP_PERMANENTLY

2012-01-10 Thread Jose Fonseca


- Original Message -
> On Tue, Jan 10, 2012 at 5:15 PM, Jose Fonseca 
> wrote:
> > - Original Message -
> >> The flag is optional, it doesn't have to implemented by everybody.
> >> If
> >> we do the uploads in the state tracker, we will also do any
> >> required
> >> data transformation so that drivers don't have to do it at all.
> >
> > The same can be said for everything.
> >
> > I don't object adding yet another code path specific to a subset of
> > hardware, provided the benefits justify its existence.
> >
> > But honestly I'm not yet convinced of this, as there was no attempt
> > to backup with solid arguments why this matters.
> >
> > Furthermore this violates one of the principles gallium (and all 3D
> > apis) have of unmapping all resources when drawing.
> 
> If my idea violates one principle of all 3D APIs, this violates them
> all:
> http://www.opengl.org/registry/specs/AMD/pinned_memory.txt

Indeed.

> It allows reading user memory by hardware, and neither the driver nor
> the hardware is notified when the memory contents are changed by the
> user.
>
> BTW, I don't insist on this. I thought it would be a nice addition
> allowing user buffer uploads to be moved out of (especially radeon)
> drivers. If you really believe it's a bad optimization, feel free to
> revert. WIthout it, things wouldn't change for me at all...

Fair enough.

> >
> >> > It looks to me that state trackers and/or drivers are not
> >> > properly
> >> > using PIPE_USAGE_STREAM flag.  As all cases where
> >> > PIPE_TRANSFER_MAP_PERMANENTLY would be used, the right way to do
> >> > it would be for the state tracker to set PIPE_USAGE_STREAM, the
> >> > pipe driver to recognize PIPE_USAGE_STREAM, and keep the mapping
> >> > permanently internally, making mapping/unmapping of such buffers
> >> > mere no-ops.
> >>
> >> We were doing that already and it wasn't good enough. Avoiding
> >> create+map+unmap+destroy *calls* have brought higher frame rates
> >> in
> >> apps with lots of draw operations.
> >
> > I understand the number map/unmap call is reduced. But how does
> > this interface change affect in any way the number of
> > create/destroy calls?
> 
> create = get_transfer
> destroy = transfer_destroy

Ah. BTW, we should probably unify create+map and destroy+unmap to reduce 
overhead. I would probably helps here.

> > Also, please provide app name and performance figures w/ this
> > change.
> 
> OK. Torcs, the Forza track at the start.
> 
> With u_upload_unmap before drawing:
> 21.4 - 22.1 fps
> 
> Without u_upload_unmap:
> 22.7 - 23.1 fps


> The improvement is approximately 1.1 fps, which is probably too
> little
> for other people to care, but why throw it away?

This is roughly 5%. It is still significant.

But it surprises me it is so high.

Perhaps we should have a couple of fast entrypoints for transfering to/from 
buffers without the overhead of creating transfer objects.

Jose
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] gallium: add flag PIPE_TRANSFER_MAP_PERMANENTLY

2012-01-10 Thread Marek Olšák
On Tue, Jan 10, 2012 at 5:15 PM, Jose Fonseca  wrote:
> - Original Message -
>> The flag is optional, it doesn't have to implemented by everybody. If
>> we do the uploads in the state tracker, we will also do any required
>> data transformation so that drivers don't have to do it at all.
>
> The same can be said for everything.
>
> I don't object adding yet another code path specific to a subset of hardware, 
> provided the benefits justify its existence.
>
> But honestly I'm not yet convinced of this, as there was no attempt to backup 
> with solid arguments why this matters.
>
> Furthermore this violates one of the principles gallium (and all 3D apis) 
> have of unmapping all resources when drawing.

If my idea violates one principle of all 3D APIs, this violates them all:
http://www.opengl.org/registry/specs/AMD/pinned_memory.txt

It allows reading user memory by hardware, and neither the driver nor
the hardware is notified when the memory contents are changed by the
user.

BTW, I don't insist on this. I thought it would be a nice addition
allowing user buffer uploads to be moved out of (especially radeon)
drivers. If you really believe it's a bad optimization, feel free to
revert. WIthout it, things wouldn't change for me at all...

>
>> > It looks to me that state trackers and/or drivers are not properly
>> > using PIPE_USAGE_STREAM flag.  As all cases where
>> > PIPE_TRANSFER_MAP_PERMANENTLY would be used, the right way to do
>> > it would be for the state tracker to set PIPE_USAGE_STREAM, the
>> > pipe driver to recognize PIPE_USAGE_STREAM, and keep the mapping
>> > permanently internally, making mapping/unmapping of such buffers
>> > mere no-ops.
>>
>> We were doing that already and it wasn't good enough. Avoiding
>> create+map+unmap+destroy *calls* have brought higher frame rates in
>> apps with lots of draw operations.
>
> I understand the number map/unmap call is reduced. But how does this 
> interface change affect in any way the number of create/destroy calls?

create = get_transfer
destroy = transfer_destroy

>
> Also, please provide app name and performance figures w/ this change.

OK. Torcs, the Forza track at the start.

With u_upload_unmap before drawing:
21.4 - 22.1 fps

Without u_upload_unmap:
22.7 - 23.1 fps

The improvement is approximately 1.1 fps, which is probably too little
for other people to care, but why throw it away?

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] gallium: add flag PIPE_TRANSFER_MAP_PERMANENTLY

2012-01-10 Thread Jose Fonseca


- Original Message -
> On 01/10/2012 01:13 PM, Jose Fonseca wrote:
> > - Original Message -
> >> On 10.01.2012 12:29, Jose Fonseca wrote:
> >>> Still catching up on email traffic during holidays...
> >>>
> >>> I agree that user buffer uploads should be moved out of drivers,
> >>> but I don't think this is the way to go.
> >>
> >> I don't. If the state tracker uploads user buffers and presents
> >> them
> >> to
> >> the driver as normal buffers, it will have to change the
> >> pipe_vertex_buffers for each draw, causing annoying re-validation
> >> overhead.
> > 
> > This is not strictly necessary.  We could simply use index_bias
> > provided the vertex strides don't change (the likely scenario for
> > the sort of cases you are talking here). Thomas did that something
> > along those lines for the svga driver.
> > 
> Changing the index bias changes the VertexID, so you can't. But I
> suppose we could add a second index bias that doesn't affect it ...
> 
> > And we could do much better than that: I'm talking about proper
> > reverse-engineering of the application intention, such as using
> > one-shot dynamic vertex buffers for one-shot data, and reusable
> > static vertex buffers for immutable data. Employing both
> > heuristics like scanning the user memory for changes and end of
> > data; and also get information from the OS virtual memory
> > subsystem (e.g., if the user memory pointer is in a read-only
> > segment then it won't likely change, if it's in a stack segment it
> > will likely be one-off, if its in malloc, then we can get an upper
> > bound from the OS).   This analysis / memory tracking is something
> > too complex to do for each driver, but worthwhile if its done in
> > the state tracker for all drivers benefit.
> > 
> > I honestly think that's unnecessary to bother pipe drivers with
> > user pointers and that we should put our bets on solving this
> > problem efficiently once for all -- but I'm fine leaving these
> > user pointers in the interface eternally, for sake of both
> > skeptics and software renderers (who can always benefit from
> > accessing user memory directly).
> > 
> > 
> >> This is very bad if there are a lot of small draw calls.
> >> Unfortunately many applications do this, and we do care about
> >> these
> >> cases.
> >>
> >> Also, another example I'm dealing with atm, it will be difficult
> >> to
> >> extract a small set of wide-spread vertices from a user buffer
> >> into a
> >> oneshot vertex buffer (there are apps where this really helps a
> >> lot)
> >> because you're changing the vertex index / gl_VertexID.
> >> I can do that, because I know the hw has a special vertex-index
> >> buffer
> >> (nvc0) or manual VERTEX_ID attribute (nv50) that can be routed to
> >> the
> >> VERTEXID system value so I don't need to modify the shader.
> > 
> > Backing user buffers on hardware buffers does not necessary imply
> > remapping vertices -- that's merely how it is implemented now. We
> > could either create a one-shot hardware buffer with the same size
> > and fill in just the necessary spots; or better to recognize the
> > underlying user memory buffer start and size and track changes. We
> > could also expose this vertex-index buffer in the interface (other
> > drivers could implement it as vertex texture).
> > 
> 
> Filling in just the necessary spots wastes a lot of space, especially
> if
> these spots are re-used with different data (you need to reallocate
> space), and checking if the relevant data changed is a performance
> loss
> in itself.

Don't forget that the alternative to checking is to copy blindly, which also 
implies reading the source.  If one uses hashes instead of compares, and one 
can determine statistically that certain contents is usually immutable, then 
one can guarantee that checking to be a win, especially considering the 
bandwidth difference on PCIe vs VRAM.

After a couple of frames it should be clear whether a user memory buffer at a 
given address is static or dynamic.

> 
> > 
> > Probably the best approach is a mixture: back big dynamic/static
> > user buffers in individual hardware buffers, back small dynamic
> > user buffers in linearly suballocated ranges of coarse sized
> > hardware buffers (i.e., what we do now).
> > 
> 
> In the end, I'll only be convinced removing user buffers is good if
> the
> new solution performs equally or better in all cases (or at least not
> significantly worse in the ugliest of cases) ...

Of course. As I said, I'm fine w/ leaving user buffers interface if just for 
software renderers sake. And this is all just an idea at this stage.

Jose
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] gallium: add flag PIPE_TRANSFER_MAP_PERMANENTLY

2012-01-10 Thread Christoph Bumiller
On 01/10/2012 01:13 PM, Jose Fonseca wrote:
> - Original Message -
>> On 10.01.2012 12:29, Jose Fonseca wrote:
>>> Still catching up on email traffic during holidays...
>>>
>>> I agree that user buffer uploads should be moved out of drivers,
>>> but I don't think this is the way to go.
>>
>> I don't. If the state tracker uploads user buffers and presents them
>> to
>> the driver as normal buffers, it will have to change the
>> pipe_vertex_buffers for each draw, causing annoying re-validation
>> overhead. 
> 
> This is not strictly necessary.  We could simply use index_bias provided the 
> vertex strides don't change (the likely scenario for the sort of cases you 
> are talking here). Thomas did that something along those lines for the svga 
> driver.
> 
Changing the index bias changes the VertexID, so you can't. But I
suppose we could add a second index bias that doesn't affect it ...

> And we could do much better than that: I'm talking about proper 
> reverse-engineering of the application intention, such as using one-shot 
> dynamic vertex buffers for one-shot data, and reusable static vertex buffers 
> for immutable data. Employing both heuristics like scanning the user memory 
> for changes and end of data; and also get information from the OS virtual 
> memory subsystem (e.g., if the user memory pointer is in a read-only segment 
> then it won't likely change, if it's in a stack segment it will likely be 
> one-off, if its in malloc, then we can get an upper bound from the OS).   
> This analysis / memory tracking is something too complex to do for each 
> driver, but worthwhile if its done in the state tracker for all drivers 
> benefit.
> 
> I honestly think that's unnecessary to bother pipe drivers with user pointers 
> and that we should put our bets on solving this problem efficiently once for 
> all -- but I'm fine leaving these user pointers in the interface eternally, 
> for sake of both skeptics and software renderers (who can always benefit from 
> accessing user memory directly). 
> 
> 
>> This is very bad if there are a lot of small draw calls.
>> Unfortunately many applications do this, and we do care about these
>> cases.
>>
>> Also, another example I'm dealing with atm, it will be difficult to
>> extract a small set of wide-spread vertices from a user buffer into a
>> oneshot vertex buffer (there are apps where this really helps a lot)
>> because you're changing the vertex index / gl_VertexID.
>> I can do that, because I know the hw has a special vertex-index
>> buffer
>> (nvc0) or manual VERTEX_ID attribute (nv50) that can be routed to the
>> VERTEXID system value so I don't need to modify the shader.
> 
> Backing user buffers on hardware buffers does not necessary imply remapping 
> vertices -- that's merely how it is implemented now. We could either create a 
> one-shot hardware buffer with the same size and fill in just the necessary 
> spots; or better to recognize the underlying user memory buffer start and 
> size and track changes. We could also expose this vertex-index buffer in the 
> interface (other drivers could implement it as vertex texture).
> 

Filling in just the necessary spots wastes a lot of space, especially if
these spots are re-used with different data (you need to reallocate
space), and checking if the relevant data changed is a performance loss
in itself.

> 
> Probably the best approach is a mixture: back big dynamic/static user buffers 
> in individual hardware buffers, back small dynamic user buffers in linearly 
> suballocated ranges of coarse sized hardware buffers (i.e., what we do now).
> 

In the end, I'll only be convinced removing user buffers is good if the
new solution performs equally or better in all cases (or at least not
significantly worse in the ugliest of cases) ...

> 
> Jose

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] gallium: add flag PIPE_TRANSFER_MAP_PERMANENTLY

2012-01-10 Thread Jose Fonseca
- Original Message -
> On Tue, Jan 10, 2012 at 12:29 PM, Jose Fonseca 
> wrote:
> > Still catching up on email traffic during holidays...
> >
> > I agree that user buffer uploads should be moved out of drivers,
> > but I don't think this is the way to go.
> >
> > This "PIPE_TRANSFER_MAP_PERMANENTLY" means the driver relinquishes
> > the ability to transform this data in any way before reashing the
> > GPU -- i.e., prevents any sort of workarounds -- something can't
> > be always guaranteed. Flushing with map helps is also
> > non-portable.
> 
> The flag is optional, it doesn't have to implemented by everybody. If
> we do the uploads in the state tracker, we will also do any required
> data transformation so that drivers don't have to do it at all.

The same can be said for everything.

I don't object adding yet another code path specific to a subset of hardware, 
provided the benefits justify its existence.

But honestly I'm not yet convinced of this, as there was no attempt to backup 
with solid arguments why this matters.

Furthermore this violates one of the principles gallium (and all 3D apis) have 
of unmapping all resources when drawing.

> > It looks to me that state trackers and/or drivers are not properly
> > using PIPE_USAGE_STREAM flag.  As all cases where
> > PIPE_TRANSFER_MAP_PERMANENTLY would be used, the right way to do
> > it would be for the state tracker to set PIPE_USAGE_STREAM, the
> > pipe driver to recognize PIPE_USAGE_STREAM, and keep the mapping
> > permanently internally, making mapping/unmapping of such buffers
> > mere no-ops.
> 
> We were doing that already and it wasn't good enough. Avoiding
> create+map+unmap+destroy *calls* have brought higher frame rates in
> apps with lots of draw operations.

I understand the number map/unmap call is reduced. But how does this interface 
change affect in any way the number of create/destroy calls?

Also, please provide app name and performance figures w/ this change.

Jose

PS: Keith Whitwell de-fact no longer is the Gallium's "Benevolent Dictator for 
Life", but we'll need to institute/enforce more due process with Gallium 
interface changes to ensure that gallium doesn't become something radically 
different from its vision & future requirements.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] gallium: add flag PIPE_TRANSFER_MAP_PERMANENTLY

2012-01-10 Thread Marek Olšák
On Tue, Jan 10, 2012 at 12:29 PM, Jose Fonseca  wrote:
> Still catching up on email traffic during holidays...
>
> I agree that user buffer uploads should be moved out of drivers, but I don't 
> think this is the way to go.
>
> This "PIPE_TRANSFER_MAP_PERMANENTLY" means the driver relinquishes the 
> ability to transform this data in any way before reashing the GPU -- i.e., 
> prevents any sort of workarounds -- something can't be always guaranteed. 
> Flushing with map helps is also non-portable.

The flag is optional, it doesn't have to implemented by everybody. If
we do the uploads in the state tracker, we will also do any required
data transformation so that drivers don't have to do it at all.

>
>
> It looks to me that state trackers and/or drivers are not properly using 
> PIPE_USAGE_STREAM flag.  As all cases where PIPE_TRANSFER_MAP_PERMANENTLY 
> would be used, the right way to do it would be for the state tracker to set 
> PIPE_USAGE_STREAM, the pipe driver to recognize PIPE_USAGE_STREAM, and keep 
> the mapping permanently internally, making mapping/unmapping of such buffers 
> mere no-ops.

We were doing that already and it wasn't good enough. Avoiding
create+map+unmap+destroy *calls* have brought higher frame rates in
apps with lots of draw operations. It may not be the ideal
optimization, but so far it's the best we have.

Marek

>
>
> In summary, for me PIPE_TRANSFER_MAP_PERMANENTLY is premature/bad 
> optmization.  Before we are worried about saving a couple of map/unmap calls, 
> we should ensure that PIPE_USAGE_STREAM/PIPE_USAGE_DYNAMIC code paths are 
> fully optimal.
>
>
> Jose
>
> - Original Message -
>> Please see the diff for further info.
>>
>> This paves the way for moving user buffer uploads out of drivers and
>> should
>> allow to clean up the mess in u_upload_mgr in the meantime.
>>
>> For now only allowed for buffers on r300 and r600.
>> ---
>>  src/gallium/drivers/i915/i915_resource_buffer.c  |    7 ++-
>>  src/gallium/drivers/i915/i915_resource_texture.c |    7 ++-
>>  src/gallium/drivers/llvmpipe/lp_texture.c        |    4 
>>  src/gallium/drivers/nouveau/nouveau_buffer.c     |    8 +++-
>>  src/gallium/drivers/nv50/nv50_transfer.c         |    2 +-
>>  src/gallium/drivers/nvc0/nvc0_transfer.c         |    2 +-
>>  src/gallium/drivers/nvfx/nvfx_transfer.c         |    3 +++
>>  src/gallium/drivers/r300/r300_transfer.c         |    4 
>>  src/gallium/drivers/r600/r600_texture.c          |    4 
>>  src/gallium/drivers/svga/svga_resource_buffer.c  |    4 
>>  src/gallium/drivers/svga/svga_resource_texture.c |    2 +-
>>  src/gallium/include/pipe/p_defines.h             |   16
>>  
>>  12 files changed, 57 insertions(+), 6 deletions(-)
>>
>> diff --git a/src/gallium/drivers/i915/i915_resource_buffer.c
>> b/src/gallium/drivers/i915/i915_resource_buffer.c
>> index 77c0345..c54e481 100644
>> --- a/src/gallium/drivers/i915/i915_resource_buffer.c
>> +++ b/src/gallium/drivers/i915/i915_resource_buffer.c
>> @@ -68,8 +68,13 @@ i915_get_transfer(struct pipe_context *pipe,
>>                    const struct pipe_box *box)
>>  {
>>     struct i915_context *i915 = i915_context(pipe);
>> -   struct pipe_transfer *transfer =
>> util_slab_alloc(&i915->transfer_pool);
>> +   struct pipe_transfer *transfer;
>>
>> +   if (usage & PIPE_TRANSFER_MAP_PERMANENTLY) {
>> +      return NULL;
>> +   }
>> +
>> +   transfer = util_slab_alloc(&i915->transfer_pool);
>>     if (transfer == NULL)
>>        return NULL;
>>
>> diff --git a/src/gallium/drivers/i915/i915_resource_texture.c
>> b/src/gallium/drivers/i915/i915_resource_texture.c
>> index 8ff733a..64d071c 100644
>> --- a/src/gallium/drivers/i915/i915_resource_texture.c
>> +++ b/src/gallium/drivers/i915/i915_resource_texture.c
>> @@ -720,9 +720,14 @@ i915_texture_get_transfer(struct pipe_context
>> *pipe,
>>  {
>>     struct i915_context *i915 = i915_context(pipe);
>>     struct i915_texture *tex = i915_texture(resource);
>> -   struct i915_transfer *transfer =
>> util_slab_alloc(&i915->texture_transfer_pool);
>> +   struct i915_transfer *transfer;
>>     boolean use_staging_texture = FALSE;
>>
>> +   if (usage & PIPE_TRANSFER_MAP_PERMANENTLY) {
>> +      return NULL;
>> +   }
>> +
>> +   transfer = util_slab_alloc(&i915->texture_transfer_pool);
>>     if (transfer == NULL)
>>        return NULL;
>>
>> diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c
>> b/src/gallium/drivers/llvmpipe/lp_texture.c
>> index ca38571..d86d493 100644
>> --- a/src/gallium/drivers/llvmpipe/lp_texture.c
>> +++ b/src/gallium/drivers/llvmpipe/lp_texture.c
>> @@ -587,6 +587,10 @@ llvmpipe_get_transfer(struct pipe_context *pipe,
>>     assert(resource);
>>     assert(level <= resource->last_level);
>>
>> +   if (usage & PIPE_TRANSFER_MAP_PERMANENTLY) {
>> +      return NULL;
>> +   }
>> +
>>     /*
>>      * Transfers, like other pipe operations, must happen in order,
>>      so flush the
>>      * context if necessary.
>> 

Re: [Mesa-dev] [PATCH 2/2] gallium: add flag PIPE_TRANSFER_MAP_PERMANENTLY

2012-01-10 Thread Jose Fonseca
- Original Message -
> On 10.01.2012 12:29, Jose Fonseca wrote:
> > Still catching up on email traffic during holidays...
> >
> > I agree that user buffer uploads should be moved out of drivers,
> > but I don't think this is the way to go.
> 
> I don't. If the state tracker uploads user buffers and presents them
> to
> the driver as normal buffers, it will have to change the
> pipe_vertex_buffers for each draw, causing annoying re-validation
> overhead. 

This is not strictly necessary.  We could simply use index_bias provided the 
vertex strides don't change (the likely scenario for the sort of cases you are 
talking here). Thomas did that something along those lines for the svga driver.

And we could do much better than that: I'm talking about proper 
reverse-engineering of the application intention, such as using one-shot 
dynamic vertex buffers for one-shot data, and reusable static vertex buffers 
for immutable data. Employing both heuristics like scanning the user memory for 
changes and end of data; and also get information from the OS virtual memory 
subsystem (e.g., if the user memory pointer is in a read-only segment then it 
won't likely change, if it's in a stack segment it will likely be one-off, if 
its in malloc, then we can get an upper bound from the OS).   This analysis / 
memory tracking is something too complex to do for each driver, but worthwhile 
if its done in the state tracker for all drivers benefit.

I honestly think that's unnecessary to bother pipe drivers with user pointers 
and that we should put our bets on solving this problem efficiently once for 
all -- but I'm fine leaving these user pointers in the interface eternally, for 
sake of both skeptics and software renderers (who can always benefit from 
accessing user memory directly). 


> This is very bad if there are a lot of small draw calls.
> Unfortunately many applications do this, and we do care about these
> cases.
> 
> Also, another example I'm dealing with atm, it will be difficult to
> extract a small set of wide-spread vertices from a user buffer into a
> oneshot vertex buffer (there are apps where this really helps a lot)
> because you're changing the vertex index / gl_VertexID.
> I can do that, because I know the hw has a special vertex-index
> buffer
> (nvc0) or manual VERTEX_ID attribute (nv50) that can be routed to the
> VERTEXID system value so I don't need to modify the shader.

Backing user buffers on hardware buffers does not necessary imply remapping 
vertices -- that's merely how it is implemented now. We could either create a 
one-shot hardware buffer with the same size and fill in just the necessary 
spots; or better to recognize the underlying user memory buffer start and size 
and track changes. We could also expose this vertex-index buffer in the 
interface (other drivers could implement it as vertex texture).


Probably the best approach is a mixture: back big dynamic/static user buffers 
in individual hardware buffers, back small dynamic user buffers in linearly 
suballocated ranges of coarse sized hardware buffers (i.e., what we do now).


Jose
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] gallium: add flag PIPE_TRANSFER_MAP_PERMANENTLY

2012-01-10 Thread Christoph Bumiller
On 10.01.2012 12:29, Jose Fonseca wrote:
> Still catching up on email traffic during holidays...
>
> I agree that user buffer uploads should be moved out of drivers, but I don't 
> think this is the way to go. 

I don't. If the state tracker uploads user buffers and presents them to
the driver as normal buffers, it will have to change the
pipe_vertex_buffers for each draw, causing annoying re-validation
overhead. This is very bad if there are a lot of small draw calls.
Unfortunately many applications do this, and we do care about these cases.

Also, another example I'm dealing with atm, it will be difficult to
extract a small set of wide-spread vertices from a user buffer into a
oneshot vertex buffer (there are apps where this really helps a lot)
because you're changing the vertex index / gl_VertexID.
I can do that, because I know the hw has a special vertex-index buffer
(nvc0) or manual VERTEX_ID attribute (nv50) that can be routed to the
VERTEXID system value so I don't need to modify the shader.

> This "PIPE_TRANSFER_MAP_PERMANENTLY" means the driver relinquishes the 
> ability to transform this data in any way before reashing the GPU -- i.e., 
> prevents any sort of workarounds -- something can't be always guaranteed. 
> Flushing with map helps is also non-portable.
>
>
> It looks to me that state trackers and/or drivers are not properly using 
> PIPE_USAGE_STREAM flag.  As all cases where PIPE_TRANSFER_MAP_PERMANENTLY 
> would be used, the right way to do it would be for the state tracker to set 
> PIPE_USAGE_STREAM, the pipe driver to recognize PIPE_USAGE_STREAM, and keep 
> the mapping permanently internally, making mapping/unmapping of such buffers 
> mere no-ops.
>
>
> In summary, for me PIPE_TRANSFER_MAP_PERMANENTLY is premature/bad 
> optmization.  Before we are worried about saving a couple of map/unmap calls, 
> we should ensure that PIPE_USAGE_STREAM/PIPE_USAGE_DYNAMIC code paths are 
> fully optimal.
>
>
> Jose
>
> - Original Message -
>> Please see the diff for further info.
>>
>> This paves the way for moving user buffer uploads out of drivers and
>> should
>> allow to clean up the mess in u_upload_mgr in the meantime.
>>
>> For now only allowed for buffers on r300 and r600.
>> ---
>>  src/gallium/drivers/i915/i915_resource_buffer.c  |7 ++-
>>  src/gallium/drivers/i915/i915_resource_texture.c |7 ++-
>>  src/gallium/drivers/llvmpipe/lp_texture.c|4 
>>  src/gallium/drivers/nouveau/nouveau_buffer.c |8 +++-
>>  src/gallium/drivers/nv50/nv50_transfer.c |2 +-
>>  src/gallium/drivers/nvc0/nvc0_transfer.c |2 +-
>>  src/gallium/drivers/nvfx/nvfx_transfer.c |3 +++
>>  src/gallium/drivers/r300/r300_transfer.c |4 
>>  src/gallium/drivers/r600/r600_texture.c  |4 
>>  src/gallium/drivers/svga/svga_resource_buffer.c  |4 
>>  src/gallium/drivers/svga/svga_resource_texture.c |2 +-
>>  src/gallium/include/pipe/p_defines.h |   16
>>  
>>  12 files changed, 57 insertions(+), 6 deletions(-)
>>
>> diff --git a/src/gallium/drivers/i915/i915_resource_buffer.c
>> b/src/gallium/drivers/i915/i915_resource_buffer.c
>> index 77c0345..c54e481 100644
>> --- a/src/gallium/drivers/i915/i915_resource_buffer.c
>> +++ b/src/gallium/drivers/i915/i915_resource_buffer.c
>> @@ -68,8 +68,13 @@ i915_get_transfer(struct pipe_context *pipe,
>>const struct pipe_box *box)
>>  {
>> struct i915_context *i915 = i915_context(pipe);
>> -   struct pipe_transfer *transfer =
>> util_slab_alloc(&i915->transfer_pool);
>> +   struct pipe_transfer *transfer;
>>  
>> +   if (usage & PIPE_TRANSFER_MAP_PERMANENTLY) {
>> +  return NULL;
>> +   }
>> +
>> +   transfer = util_slab_alloc(&i915->transfer_pool);
>> if (transfer == NULL)
>>return NULL;
>>  
>> diff --git a/src/gallium/drivers/i915/i915_resource_texture.c
>> b/src/gallium/drivers/i915/i915_resource_texture.c
>> index 8ff733a..64d071c 100644
>> --- a/src/gallium/drivers/i915/i915_resource_texture.c
>> +++ b/src/gallium/drivers/i915/i915_resource_texture.c
>> @@ -720,9 +720,14 @@ i915_texture_get_transfer(struct pipe_context
>> *pipe,
>>  {
>> struct i915_context *i915 = i915_context(pipe);
>> struct i915_texture *tex = i915_texture(resource);
>> -   struct i915_transfer *transfer =
>> util_slab_alloc(&i915->texture_transfer_pool);
>> +   struct i915_transfer *transfer;
>> boolean use_staging_texture = FALSE;
>>  
>> +   if (usage & PIPE_TRANSFER_MAP_PERMANENTLY) {
>> +  return NULL;
>> +   }
>> +
>> +   transfer = util_slab_alloc(&i915->texture_transfer_pool);
>> if (transfer == NULL)
>>return NULL;
>>  
>> diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c
>> b/src/gallium/drivers/llvmpipe/lp_texture.c
>> index ca38571..d86d493 100644
>> --- a/src/gallium/drivers/llvmpipe/lp_texture.c
>> +++ b/src/gallium/drivers/llvmpipe/lp_texture.c
>> @@ -587,6 +587,10 @@ llvm

Re: [Mesa-dev] [PATCH 2/2] gallium: add flag PIPE_TRANSFER_MAP_PERMANENTLY

2012-01-10 Thread Jose Fonseca
Still catching up on email traffic during holidays...

I agree that user buffer uploads should be moved out of drivers, but I don't 
think this is the way to go. 

This "PIPE_TRANSFER_MAP_PERMANENTLY" means the driver relinquishes the ability 
to transform this data in any way before reashing the GPU -- i.e., prevents any 
sort of workarounds -- something can't be always guaranteed. Flushing with map 
helps is also non-portable.


It looks to me that state trackers and/or drivers are not properly using 
PIPE_USAGE_STREAM flag.  As all cases where PIPE_TRANSFER_MAP_PERMANENTLY would 
be used, the right way to do it would be for the state tracker to set 
PIPE_USAGE_STREAM, the pipe driver to recognize PIPE_USAGE_STREAM, and keep the 
mapping permanently internally, making mapping/unmapping of such buffers mere 
no-ops.


In summary, for me PIPE_TRANSFER_MAP_PERMANENTLY is premature/bad optmization.  
Before we are worried about saving a couple of map/unmap calls, we should 
ensure that PIPE_USAGE_STREAM/PIPE_USAGE_DYNAMIC code paths are fully optimal.


Jose

- Original Message -
> Please see the diff for further info.
> 
> This paves the way for moving user buffer uploads out of drivers and
> should
> allow to clean up the mess in u_upload_mgr in the meantime.
> 
> For now only allowed for buffers on r300 and r600.
> ---
>  src/gallium/drivers/i915/i915_resource_buffer.c  |7 ++-
>  src/gallium/drivers/i915/i915_resource_texture.c |7 ++-
>  src/gallium/drivers/llvmpipe/lp_texture.c|4 
>  src/gallium/drivers/nouveau/nouveau_buffer.c |8 +++-
>  src/gallium/drivers/nv50/nv50_transfer.c |2 +-
>  src/gallium/drivers/nvc0/nvc0_transfer.c |2 +-
>  src/gallium/drivers/nvfx/nvfx_transfer.c |3 +++
>  src/gallium/drivers/r300/r300_transfer.c |4 
>  src/gallium/drivers/r600/r600_texture.c  |4 
>  src/gallium/drivers/svga/svga_resource_buffer.c  |4 
>  src/gallium/drivers/svga/svga_resource_texture.c |2 +-
>  src/gallium/include/pipe/p_defines.h |   16
>  
>  12 files changed, 57 insertions(+), 6 deletions(-)
> 
> diff --git a/src/gallium/drivers/i915/i915_resource_buffer.c
> b/src/gallium/drivers/i915/i915_resource_buffer.c
> index 77c0345..c54e481 100644
> --- a/src/gallium/drivers/i915/i915_resource_buffer.c
> +++ b/src/gallium/drivers/i915/i915_resource_buffer.c
> @@ -68,8 +68,13 @@ i915_get_transfer(struct pipe_context *pipe,
>const struct pipe_box *box)
>  {
> struct i915_context *i915 = i915_context(pipe);
> -   struct pipe_transfer *transfer =
> util_slab_alloc(&i915->transfer_pool);
> +   struct pipe_transfer *transfer;
>  
> +   if (usage & PIPE_TRANSFER_MAP_PERMANENTLY) {
> +  return NULL;
> +   }
> +
> +   transfer = util_slab_alloc(&i915->transfer_pool);
> if (transfer == NULL)
>return NULL;
>  
> diff --git a/src/gallium/drivers/i915/i915_resource_texture.c
> b/src/gallium/drivers/i915/i915_resource_texture.c
> index 8ff733a..64d071c 100644
> --- a/src/gallium/drivers/i915/i915_resource_texture.c
> +++ b/src/gallium/drivers/i915/i915_resource_texture.c
> @@ -720,9 +720,14 @@ i915_texture_get_transfer(struct pipe_context
> *pipe,
>  {
> struct i915_context *i915 = i915_context(pipe);
> struct i915_texture *tex = i915_texture(resource);
> -   struct i915_transfer *transfer =
> util_slab_alloc(&i915->texture_transfer_pool);
> +   struct i915_transfer *transfer;
> boolean use_staging_texture = FALSE;
>  
> +   if (usage & PIPE_TRANSFER_MAP_PERMANENTLY) {
> +  return NULL;
> +   }
> +
> +   transfer = util_slab_alloc(&i915->texture_transfer_pool);
> if (transfer == NULL)
>return NULL;
>  
> diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c
> b/src/gallium/drivers/llvmpipe/lp_texture.c
> index ca38571..d86d493 100644
> --- a/src/gallium/drivers/llvmpipe/lp_texture.c
> +++ b/src/gallium/drivers/llvmpipe/lp_texture.c
> @@ -587,6 +587,10 @@ llvmpipe_get_transfer(struct pipe_context *pipe,
> assert(resource);
> assert(level <= resource->last_level);
>  
> +   if (usage & PIPE_TRANSFER_MAP_PERMANENTLY) {
> +  return NULL;
> +   }
> +
> /*
>  * Transfers, like other pipe operations, must happen in order,
>  so flush the
>  * context if necessary.
> diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c
> b/src/gallium/drivers/nouveau/nouveau_buffer.c
> index f822625..02186ba 100644
> --- a/src/gallium/drivers/nouveau/nouveau_buffer.c
> +++ b/src/gallium/drivers/nouveau/nouveau_buffer.c
> @@ -172,7 +172,13 @@ nouveau_buffer_transfer_get(struct pipe_context
> *pipe,
>  {
> struct nv04_resource *buf = nv04_resource(resource);
> struct nouveau_context *nv = nouveau_context(pipe);
> -   struct nouveau_transfer *xfr = CALLOC_STRUCT(nouveau_transfer);
> +   struct nouveau_transfer *xfr;
> +
> +   if (usage & PIPE_TRANSFER_MAP_PERMANENTLY) {
> +  return NU

Re: [Mesa-dev] [PATCH 2/2] gallium: add flag PIPE_TRANSFER_MAP_PERMANENTLY

2012-01-02 Thread Christian König
Looks good on first sign. I'm waiting for this for quite some time now, 
cause it makes XvMC state tracker implementation more cleaner and faster.


Acked-by: Christian König 

On 02.01.2012 01:22, Marek Olšák wrote:

Please see the diff for further info.

This paves the way for moving user buffer uploads out of drivers and should
allow to clean up the mess in u_upload_mgr in the meantime.

For now only allowed for buffers on r300 and r600.
---
  src/gallium/drivers/i915/i915_resource_buffer.c  |7 ++-
  src/gallium/drivers/i915/i915_resource_texture.c |7 ++-
  src/gallium/drivers/llvmpipe/lp_texture.c|4 
  src/gallium/drivers/nouveau/nouveau_buffer.c |8 +++-
  src/gallium/drivers/nv50/nv50_transfer.c |2 +-
  src/gallium/drivers/nvc0/nvc0_transfer.c |2 +-
  src/gallium/drivers/nvfx/nvfx_transfer.c |3 +++
  src/gallium/drivers/r300/r300_transfer.c |4 
  src/gallium/drivers/r600/r600_texture.c  |4 
  src/gallium/drivers/svga/svga_resource_buffer.c  |4 
  src/gallium/drivers/svga/svga_resource_texture.c |2 +-
  src/gallium/include/pipe/p_defines.h |   16 
  12 files changed, 57 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/i915/i915_resource_buffer.c 
b/src/gallium/drivers/i915/i915_resource_buffer.c
index 77c0345..c54e481 100644
--- a/src/gallium/drivers/i915/i915_resource_buffer.c
+++ b/src/gallium/drivers/i915/i915_resource_buffer.c
@@ -68,8 +68,13 @@ i915_get_transfer(struct pipe_context *pipe,
const struct pipe_box *box)
  {
 struct i915_context *i915 = i915_context(pipe);
-   struct pipe_transfer *transfer = util_slab_alloc(&i915->transfer_pool);
+   struct pipe_transfer *transfer;

+   if (usage&  PIPE_TRANSFER_MAP_PERMANENTLY) {
+  return NULL;
+   }
+
+   transfer = util_slab_alloc(&i915->transfer_pool);
 if (transfer == NULL)
return NULL;

diff --git a/src/gallium/drivers/i915/i915_resource_texture.c 
b/src/gallium/drivers/i915/i915_resource_texture.c
index 8ff733a..64d071c 100644
--- a/src/gallium/drivers/i915/i915_resource_texture.c
+++ b/src/gallium/drivers/i915/i915_resource_texture.c
@@ -720,9 +720,14 @@ i915_texture_get_transfer(struct pipe_context *pipe,
  {
 struct i915_context *i915 = i915_context(pipe);
 struct i915_texture *tex = i915_texture(resource);
-   struct i915_transfer *transfer = 
util_slab_alloc(&i915->texture_transfer_pool);
+   struct i915_transfer *transfer;
 boolean use_staging_texture = FALSE;

+   if (usage&  PIPE_TRANSFER_MAP_PERMANENTLY) {
+  return NULL;
+   }
+
+   transfer = util_slab_alloc(&i915->texture_transfer_pool);
 if (transfer == NULL)
return NULL;

diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c 
b/src/gallium/drivers/llvmpipe/lp_texture.c
index ca38571..d86d493 100644
--- a/src/gallium/drivers/llvmpipe/lp_texture.c
+++ b/src/gallium/drivers/llvmpipe/lp_texture.c
@@ -587,6 +587,10 @@ llvmpipe_get_transfer(struct pipe_context *pipe,
 assert(resource);
 assert(level<= resource->last_level);

+   if (usage&  PIPE_TRANSFER_MAP_PERMANENTLY) {
+  return NULL;
+   }
+
 /*
  * Transfers, like other pipe operations, must happen in order, so flush 
the
  * context if necessary.
diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c 
b/src/gallium/drivers/nouveau/nouveau_buffer.c
index f822625..02186ba 100644
--- a/src/gallium/drivers/nouveau/nouveau_buffer.c
+++ b/src/gallium/drivers/nouveau/nouveau_buffer.c
@@ -172,7 +172,13 @@ nouveau_buffer_transfer_get(struct pipe_context *pipe,
  {
 struct nv04_resource *buf = nv04_resource(resource);
 struct nouveau_context *nv = nouveau_context(pipe);
-   struct nouveau_transfer *xfr = CALLOC_STRUCT(nouveau_transfer);
+   struct nouveau_transfer *xfr;
+
+   if (usage&  PIPE_TRANSFER_MAP_PERMANENTLY) {
+  return NULL;
+   }
+
+   xfr = CALLOC_STRUCT(nouveau_transfer);
 if (!xfr)
return NULL;

diff --git a/src/gallium/drivers/nv50/nv50_transfer.c 
b/src/gallium/drivers/nv50/nv50_transfer.c
index 6f860e7..8ddebeb 100644
--- a/src/gallium/drivers/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nv50/nv50_transfer.c
@@ -243,7 +243,7 @@ nv50_miptree_transfer_new(struct pipe_context *pctx,
 uint32_t size;
 int ret;

-   if (usage&  PIPE_TRANSFER_MAP_DIRECTLY)
+   if (usage&  (PIPE_TRANSFER_MAP_DIRECTLY | PIPE_TRANSFER_MAP_PERMANENTLY))
return NULL;

 tx = CALLOC_STRUCT(nv50_transfer);
diff --git a/src/gallium/drivers/nvc0/nvc0_transfer.c 
b/src/gallium/drivers/nvc0/nvc0_transfer.c
index f168637..c04f41f 100644
--- a/src/gallium/drivers/nvc0/nvc0_transfer.c
+++ b/src/gallium/drivers/nvc0/nvc0_transfer.c
@@ -243,7 +243,7 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx,
 uint32_t size;
 int ret;

-   if (usage&  PIPE_TRANSFER_MAP_DIRECTLY)
+   if (usage&  (PIPE_TRANSFER_MAP_DIRECTLY | PIPE_TRANSFER_MAP_PERMANENTLY))

[Mesa-dev] [PATCH 2/2] gallium: add flag PIPE_TRANSFER_MAP_PERMANENTLY

2012-01-01 Thread Marek Olšák
Please see the diff for further info.

This paves the way for moving user buffer uploads out of drivers and should
allow to clean up the mess in u_upload_mgr in the meantime.

For now only allowed for buffers on r300 and r600.
---
 src/gallium/drivers/i915/i915_resource_buffer.c  |7 ++-
 src/gallium/drivers/i915/i915_resource_texture.c |7 ++-
 src/gallium/drivers/llvmpipe/lp_texture.c|4 
 src/gallium/drivers/nouveau/nouveau_buffer.c |8 +++-
 src/gallium/drivers/nv50/nv50_transfer.c |2 +-
 src/gallium/drivers/nvc0/nvc0_transfer.c |2 +-
 src/gallium/drivers/nvfx/nvfx_transfer.c |3 +++
 src/gallium/drivers/r300/r300_transfer.c |4 
 src/gallium/drivers/r600/r600_texture.c  |4 
 src/gallium/drivers/svga/svga_resource_buffer.c  |4 
 src/gallium/drivers/svga/svga_resource_texture.c |2 +-
 src/gallium/include/pipe/p_defines.h |   16 
 12 files changed, 57 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/i915/i915_resource_buffer.c 
b/src/gallium/drivers/i915/i915_resource_buffer.c
index 77c0345..c54e481 100644
--- a/src/gallium/drivers/i915/i915_resource_buffer.c
+++ b/src/gallium/drivers/i915/i915_resource_buffer.c
@@ -68,8 +68,13 @@ i915_get_transfer(struct pipe_context *pipe,
   const struct pipe_box *box)
 {
struct i915_context *i915 = i915_context(pipe);
-   struct pipe_transfer *transfer = util_slab_alloc(&i915->transfer_pool);
+   struct pipe_transfer *transfer;
 
+   if (usage & PIPE_TRANSFER_MAP_PERMANENTLY) {
+  return NULL;
+   }
+
+   transfer = util_slab_alloc(&i915->transfer_pool);
if (transfer == NULL)
   return NULL;
 
diff --git a/src/gallium/drivers/i915/i915_resource_texture.c 
b/src/gallium/drivers/i915/i915_resource_texture.c
index 8ff733a..64d071c 100644
--- a/src/gallium/drivers/i915/i915_resource_texture.c
+++ b/src/gallium/drivers/i915/i915_resource_texture.c
@@ -720,9 +720,14 @@ i915_texture_get_transfer(struct pipe_context *pipe,
 {
struct i915_context *i915 = i915_context(pipe);
struct i915_texture *tex = i915_texture(resource);
-   struct i915_transfer *transfer = 
util_slab_alloc(&i915->texture_transfer_pool);
+   struct i915_transfer *transfer;
boolean use_staging_texture = FALSE;
 
+   if (usage & PIPE_TRANSFER_MAP_PERMANENTLY) {
+  return NULL;
+   }
+
+   transfer = util_slab_alloc(&i915->texture_transfer_pool);
if (transfer == NULL)
   return NULL;
 
diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c 
b/src/gallium/drivers/llvmpipe/lp_texture.c
index ca38571..d86d493 100644
--- a/src/gallium/drivers/llvmpipe/lp_texture.c
+++ b/src/gallium/drivers/llvmpipe/lp_texture.c
@@ -587,6 +587,10 @@ llvmpipe_get_transfer(struct pipe_context *pipe,
assert(resource);
assert(level <= resource->last_level);
 
+   if (usage & PIPE_TRANSFER_MAP_PERMANENTLY) {
+  return NULL;
+   }
+
/*
 * Transfers, like other pipe operations, must happen in order, so flush the
 * context if necessary.
diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c 
b/src/gallium/drivers/nouveau/nouveau_buffer.c
index f822625..02186ba 100644
--- a/src/gallium/drivers/nouveau/nouveau_buffer.c
+++ b/src/gallium/drivers/nouveau/nouveau_buffer.c
@@ -172,7 +172,13 @@ nouveau_buffer_transfer_get(struct pipe_context *pipe,
 {
struct nv04_resource *buf = nv04_resource(resource);
struct nouveau_context *nv = nouveau_context(pipe);
-   struct nouveau_transfer *xfr = CALLOC_STRUCT(nouveau_transfer);
+   struct nouveau_transfer *xfr;
+
+   if (usage & PIPE_TRANSFER_MAP_PERMANENTLY) {
+  return NULL;
+   }
+
+   xfr = CALLOC_STRUCT(nouveau_transfer);
if (!xfr)
   return NULL;
 
diff --git a/src/gallium/drivers/nv50/nv50_transfer.c 
b/src/gallium/drivers/nv50/nv50_transfer.c
index 6f860e7..8ddebeb 100644
--- a/src/gallium/drivers/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nv50/nv50_transfer.c
@@ -243,7 +243,7 @@ nv50_miptree_transfer_new(struct pipe_context *pctx,
uint32_t size;
int ret;
 
-   if (usage & PIPE_TRANSFER_MAP_DIRECTLY)
+   if (usage & (PIPE_TRANSFER_MAP_DIRECTLY | PIPE_TRANSFER_MAP_PERMANENTLY))
   return NULL;
 
tx = CALLOC_STRUCT(nv50_transfer);
diff --git a/src/gallium/drivers/nvc0/nvc0_transfer.c 
b/src/gallium/drivers/nvc0/nvc0_transfer.c
index f168637..c04f41f 100644
--- a/src/gallium/drivers/nvc0/nvc0_transfer.c
+++ b/src/gallium/drivers/nvc0/nvc0_transfer.c
@@ -243,7 +243,7 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx,
uint32_t size;
int ret;
 
-   if (usage & PIPE_TRANSFER_MAP_DIRECTLY)
+   if (usage & (PIPE_TRANSFER_MAP_DIRECTLY | PIPE_TRANSFER_MAP_PERMANENTLY))
   return NULL;
 
tx = CALLOC_STRUCT(nvc0_transfer);
diff --git a/src/gallium/drivers/nvfx/nvfx_transfer.c 
b/src/gallium/drivers/nvfx/nvfx_transfer.c
index 7a218b1..605af8e 100644
--- a/src/gallium/drivers/nvfx/nvfx_transfer.c
+++ b/src/ga