Re: [Mesa-dev] [PATCH 2/2] gallium: add flag PIPE_TRANSFER_MAP_PERMANENTLY
Yes, absolutely, we should have a CAP for that. Marek On Wed, Jul 24, 2013 at 12:20 AM, Jose Fonseca wrote: > - Original Message - >> FYI, OpenGL 4.4, which was released yesterday, adds GL_MAP_PERSISTENT >> and GL_MAP_COHERENT bits as valid parameters of glMapBufferRange and >> glBufferStorage, allowing to use buffers for rendering while they are >> mapped and upload/download data to/from the buffers simultaneously. >> It's now clear that Gallium will have to support it at some point, >> it's only a matter of when. And once we have it, gallium can use it >> for faster buffer uploads. > > IIRC, this sort of mappings are tricky, but not totally impossible on Windows > WDDM. I'm OK adding this, but until we manage to get it working on Windows > this would need to be conditional though. > > Should we have a CAP, instead of having the st tracker systematically retry > with/without PERSISTENT flag? > > Jose > >> >> Marek >> >> On Mon, Jan 2, 2012 at 1:22 AM, Marek Olšák wrote: >> > Please see the diff for further info. >> > >> > This paves the way for moving user buffer uploads out of drivers and should >> > allow to clean up the mess in u_upload_mgr in the meantime. >> > >> > For now only allowed for buffers on r300 and r600. >> > --- >> > src/gallium/drivers/i915/i915_resource_buffer.c |7 ++- >> > src/gallium/drivers/i915/i915_resource_texture.c |7 ++- >> > src/gallium/drivers/llvmpipe/lp_texture.c|4 >> > src/gallium/drivers/nouveau/nouveau_buffer.c |8 +++- >> > src/gallium/drivers/nv50/nv50_transfer.c |2 +- >> > src/gallium/drivers/nvc0/nvc0_transfer.c |2 +- >> > src/gallium/drivers/nvfx/nvfx_transfer.c |3 +++ >> > src/gallium/drivers/r300/r300_transfer.c |4 >> > src/gallium/drivers/r600/r600_texture.c |4 >> > src/gallium/drivers/svga/svga_resource_buffer.c |4 >> > src/gallium/drivers/svga/svga_resource_texture.c |2 +- >> > src/gallium/include/pipe/p_defines.h | 16 >> > 12 files changed, 57 insertions(+), 6 deletions(-) >> > >> > diff --git a/src/gallium/drivers/i915/i915_resource_buffer.c >> > b/src/gallium/drivers/i915/i915_resource_buffer.c >> > index 77c0345..c54e481 100644 >> > --- a/src/gallium/drivers/i915/i915_resource_buffer.c >> > +++ b/src/gallium/drivers/i915/i915_resource_buffer.c >> > @@ -68,8 +68,13 @@ i915_get_transfer(struct pipe_context *pipe, >> >const struct pipe_box *box) >> > { >> > struct i915_context *i915 = i915_context(pipe); >> > - struct pipe_transfer *transfer = util_slab_alloc(&i915->transfer_pool); >> > + struct pipe_transfer *transfer; >> > >> > + if (usage & PIPE_TRANSFER_MAP_PERMANENTLY) { >> > + return NULL; >> > + } >> > + >> > + transfer = util_slab_alloc(&i915->transfer_pool); >> > if (transfer == NULL) >> >return NULL; >> > >> > diff --git a/src/gallium/drivers/i915/i915_resource_texture.c >> > b/src/gallium/drivers/i915/i915_resource_texture.c >> > index 8ff733a..64d071c 100644 >> > --- a/src/gallium/drivers/i915/i915_resource_texture.c >> > +++ b/src/gallium/drivers/i915/i915_resource_texture.c >> > @@ -720,9 +720,14 @@ i915_texture_get_transfer(struct pipe_context *pipe, >> > { >> > struct i915_context *i915 = i915_context(pipe); >> > struct i915_texture *tex = i915_texture(resource); >> > - struct i915_transfer *transfer = >> > util_slab_alloc(&i915->texture_transfer_pool); >> > + struct i915_transfer *transfer; >> > boolean use_staging_texture = FALSE; >> > >> > + if (usage & PIPE_TRANSFER_MAP_PERMANENTLY) { >> > + return NULL; >> > + } >> > + >> > + transfer = util_slab_alloc(&i915->texture_transfer_pool); >> > if (transfer == NULL) >> >return NULL; >> > >> > diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c >> > b/src/gallium/drivers/llvmpipe/lp_texture.c >> > index ca38571..d86d493 100644 >> > --- a/src/gallium/drivers/llvmpipe/lp_texture.c >> > +++ b/src/gallium/drivers/llvmpipe/lp_texture.c >> > @@ -587,6 +587,10 @@ llvmpipe_get_transfer(struct pipe_context *pipe, >> > assert(resource); >> > assert(level <= resource->last_level); >> > >> > + if (usage & PIPE_TRANSFER_MAP_PERMANENTLY) { >> > + return NULL; >> > + } >> > + >> > /* >> > * Transfers, like other pipe operations, must happen in order, so >> > flush the >> > * context if necessary. >> > diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c >> > b/src/gallium/drivers/nouveau/nouveau_buffer.c >> > index f822625..02186ba 100644 >> > --- a/src/gallium/drivers/nouveau/nouveau_buffer.c >> > +++ b/src/gallium/drivers/nouveau/nouveau_buffer.c >> > @@ -172,7 +172,13 @@ nouveau_buffer_transfer_get(struct pipe_context *pipe, >> > { >> > struct nv04_resource *buf = nv04_resource(resource); >> > struct nouveau_context *nv = nouveau_context(pipe); >> > - struct nouveau_transfer *xfr
Re: [Mesa-dev] [PATCH 2/2] gallium: add flag PIPE_TRANSFER_MAP_PERMANENTLY
- Original Message - > FYI, OpenGL 4.4, which was released yesterday, adds GL_MAP_PERSISTENT > and GL_MAP_COHERENT bits as valid parameters of glMapBufferRange and > glBufferStorage, allowing to use buffers for rendering while they are > mapped and upload/download data to/from the buffers simultaneously. > It's now clear that Gallium will have to support it at some point, > it's only a matter of when. And once we have it, gallium can use it > for faster buffer uploads. IIRC, this sort of mappings are tricky, but not totally impossible on Windows WDDM. I'm OK adding this, but until we manage to get it working on Windows this would need to be conditional though. Should we have a CAP, instead of having the st tracker systematically retry with/without PERSISTENT flag? Jose > > Marek > > On Mon, Jan 2, 2012 at 1:22 AM, Marek Olšák wrote: > > Please see the diff for further info. > > > > This paves the way for moving user buffer uploads out of drivers and should > > allow to clean up the mess in u_upload_mgr in the meantime. > > > > For now only allowed for buffers on r300 and r600. > > --- > > src/gallium/drivers/i915/i915_resource_buffer.c |7 ++- > > src/gallium/drivers/i915/i915_resource_texture.c |7 ++- > > src/gallium/drivers/llvmpipe/lp_texture.c|4 > > src/gallium/drivers/nouveau/nouveau_buffer.c |8 +++- > > src/gallium/drivers/nv50/nv50_transfer.c |2 +- > > src/gallium/drivers/nvc0/nvc0_transfer.c |2 +- > > src/gallium/drivers/nvfx/nvfx_transfer.c |3 +++ > > src/gallium/drivers/r300/r300_transfer.c |4 > > src/gallium/drivers/r600/r600_texture.c |4 > > src/gallium/drivers/svga/svga_resource_buffer.c |4 > > src/gallium/drivers/svga/svga_resource_texture.c |2 +- > > src/gallium/include/pipe/p_defines.h | 16 > > 12 files changed, 57 insertions(+), 6 deletions(-) > > > > diff --git a/src/gallium/drivers/i915/i915_resource_buffer.c > > b/src/gallium/drivers/i915/i915_resource_buffer.c > > index 77c0345..c54e481 100644 > > --- a/src/gallium/drivers/i915/i915_resource_buffer.c > > +++ b/src/gallium/drivers/i915/i915_resource_buffer.c > > @@ -68,8 +68,13 @@ i915_get_transfer(struct pipe_context *pipe, > >const struct pipe_box *box) > > { > > struct i915_context *i915 = i915_context(pipe); > > - struct pipe_transfer *transfer = util_slab_alloc(&i915->transfer_pool); > > + struct pipe_transfer *transfer; > > > > + if (usage & PIPE_TRANSFER_MAP_PERMANENTLY) { > > + return NULL; > > + } > > + > > + transfer = util_slab_alloc(&i915->transfer_pool); > > if (transfer == NULL) > >return NULL; > > > > diff --git a/src/gallium/drivers/i915/i915_resource_texture.c > > b/src/gallium/drivers/i915/i915_resource_texture.c > > index 8ff733a..64d071c 100644 > > --- a/src/gallium/drivers/i915/i915_resource_texture.c > > +++ b/src/gallium/drivers/i915/i915_resource_texture.c > > @@ -720,9 +720,14 @@ i915_texture_get_transfer(struct pipe_context *pipe, > > { > > struct i915_context *i915 = i915_context(pipe); > > struct i915_texture *tex = i915_texture(resource); > > - struct i915_transfer *transfer = > > util_slab_alloc(&i915->texture_transfer_pool); > > + struct i915_transfer *transfer; > > boolean use_staging_texture = FALSE; > > > > + if (usage & PIPE_TRANSFER_MAP_PERMANENTLY) { > > + return NULL; > > + } > > + > > + transfer = util_slab_alloc(&i915->texture_transfer_pool); > > if (transfer == NULL) > >return NULL; > > > > diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c > > b/src/gallium/drivers/llvmpipe/lp_texture.c > > index ca38571..d86d493 100644 > > --- a/src/gallium/drivers/llvmpipe/lp_texture.c > > +++ b/src/gallium/drivers/llvmpipe/lp_texture.c > > @@ -587,6 +587,10 @@ llvmpipe_get_transfer(struct pipe_context *pipe, > > assert(resource); > > assert(level <= resource->last_level); > > > > + if (usage & PIPE_TRANSFER_MAP_PERMANENTLY) { > > + return NULL; > > + } > > + > > /* > > * Transfers, like other pipe operations, must happen in order, so > > flush the > > * context if necessary. > > diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c > > b/src/gallium/drivers/nouveau/nouveau_buffer.c > > index f822625..02186ba 100644 > > --- a/src/gallium/drivers/nouveau/nouveau_buffer.c > > +++ b/src/gallium/drivers/nouveau/nouveau_buffer.c > > @@ -172,7 +172,13 @@ nouveau_buffer_transfer_get(struct pipe_context *pipe, > > { > > struct nv04_resource *buf = nv04_resource(resource); > > struct nouveau_context *nv = nouveau_context(pipe); > > - struct nouveau_transfer *xfr = CALLOC_STRUCT(nouveau_transfer); > > + struct nouveau_transfer *xfr; > > + > > + if (usage & PIPE_TRANSFER_MAP_PERMANENTLY) { > > + return NULL; > > + } > > + > > + xfr = CALLOC_STRUCT(nouveau_transfer); > >
Re: [Mesa-dev] [PATCH 2/2] gallium: add flag PIPE_TRANSFER_MAP_PERMANENTLY
FYI, OpenGL 4.4, which was released yesterday, adds GL_MAP_PERSISTENT and GL_MAP_COHERENT bits as valid parameters of glMapBufferRange and glBufferStorage, allowing to use buffers for rendering while they are mapped and upload/download data to/from the buffers simultaneously. It's now clear that Gallium will have to support it at some point, it's only a matter of when. And once we have it, gallium can use it for faster buffer uploads. Marek On Mon, Jan 2, 2012 at 1:22 AM, Marek Olšák wrote: > Please see the diff for further info. > > This paves the way for moving user buffer uploads out of drivers and should > allow to clean up the mess in u_upload_mgr in the meantime. > > For now only allowed for buffers on r300 and r600. > --- > src/gallium/drivers/i915/i915_resource_buffer.c |7 ++- > src/gallium/drivers/i915/i915_resource_texture.c |7 ++- > src/gallium/drivers/llvmpipe/lp_texture.c|4 > src/gallium/drivers/nouveau/nouveau_buffer.c |8 +++- > src/gallium/drivers/nv50/nv50_transfer.c |2 +- > src/gallium/drivers/nvc0/nvc0_transfer.c |2 +- > src/gallium/drivers/nvfx/nvfx_transfer.c |3 +++ > src/gallium/drivers/r300/r300_transfer.c |4 > src/gallium/drivers/r600/r600_texture.c |4 > src/gallium/drivers/svga/svga_resource_buffer.c |4 > src/gallium/drivers/svga/svga_resource_texture.c |2 +- > src/gallium/include/pipe/p_defines.h | 16 > 12 files changed, 57 insertions(+), 6 deletions(-) > > diff --git a/src/gallium/drivers/i915/i915_resource_buffer.c > b/src/gallium/drivers/i915/i915_resource_buffer.c > index 77c0345..c54e481 100644 > --- a/src/gallium/drivers/i915/i915_resource_buffer.c > +++ b/src/gallium/drivers/i915/i915_resource_buffer.c > @@ -68,8 +68,13 @@ i915_get_transfer(struct pipe_context *pipe, >const struct pipe_box *box) > { > struct i915_context *i915 = i915_context(pipe); > - struct pipe_transfer *transfer = util_slab_alloc(&i915->transfer_pool); > + struct pipe_transfer *transfer; > > + if (usage & PIPE_TRANSFER_MAP_PERMANENTLY) { > + return NULL; > + } > + > + transfer = util_slab_alloc(&i915->transfer_pool); > if (transfer == NULL) >return NULL; > > diff --git a/src/gallium/drivers/i915/i915_resource_texture.c > b/src/gallium/drivers/i915/i915_resource_texture.c > index 8ff733a..64d071c 100644 > --- a/src/gallium/drivers/i915/i915_resource_texture.c > +++ b/src/gallium/drivers/i915/i915_resource_texture.c > @@ -720,9 +720,14 @@ i915_texture_get_transfer(struct pipe_context *pipe, > { > struct i915_context *i915 = i915_context(pipe); > struct i915_texture *tex = i915_texture(resource); > - struct i915_transfer *transfer = > util_slab_alloc(&i915->texture_transfer_pool); > + struct i915_transfer *transfer; > boolean use_staging_texture = FALSE; > > + if (usage & PIPE_TRANSFER_MAP_PERMANENTLY) { > + return NULL; > + } > + > + transfer = util_slab_alloc(&i915->texture_transfer_pool); > if (transfer == NULL) >return NULL; > > diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c > b/src/gallium/drivers/llvmpipe/lp_texture.c > index ca38571..d86d493 100644 > --- a/src/gallium/drivers/llvmpipe/lp_texture.c > +++ b/src/gallium/drivers/llvmpipe/lp_texture.c > @@ -587,6 +587,10 @@ llvmpipe_get_transfer(struct pipe_context *pipe, > assert(resource); > assert(level <= resource->last_level); > > + if (usage & PIPE_TRANSFER_MAP_PERMANENTLY) { > + return NULL; > + } > + > /* > * Transfers, like other pipe operations, must happen in order, so flush > the > * context if necessary. > diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c > b/src/gallium/drivers/nouveau/nouveau_buffer.c > index f822625..02186ba 100644 > --- a/src/gallium/drivers/nouveau/nouveau_buffer.c > +++ b/src/gallium/drivers/nouveau/nouveau_buffer.c > @@ -172,7 +172,13 @@ nouveau_buffer_transfer_get(struct pipe_context *pipe, > { > struct nv04_resource *buf = nv04_resource(resource); > struct nouveau_context *nv = nouveau_context(pipe); > - struct nouveau_transfer *xfr = CALLOC_STRUCT(nouveau_transfer); > + struct nouveau_transfer *xfr; > + > + if (usage & PIPE_TRANSFER_MAP_PERMANENTLY) { > + return NULL; > + } > + > + xfr = CALLOC_STRUCT(nouveau_transfer); > if (!xfr) >return NULL; > > diff --git a/src/gallium/drivers/nv50/nv50_transfer.c > b/src/gallium/drivers/nv50/nv50_transfer.c > index 6f860e7..8ddebeb 100644 > --- a/src/gallium/drivers/nv50/nv50_transfer.c > +++ b/src/gallium/drivers/nv50/nv50_transfer.c > @@ -243,7 +243,7 @@ nv50_miptree_transfer_new(struct pipe_context *pctx, > uint32_t size; > int ret; > > - if (usage & PIPE_TRANSFER_MAP_DIRECTLY) > + if (usage & (PIPE_TRANSFER_MAP_DIRECTLY | PIPE_TRANSFER_MAP_PERMANENTLY)) >return NULL; > > tx = CALLOC_STRUCT(nv50_t
Re: [Mesa-dev] [PATCH 2/2] gallium: add flag PIPE_TRANSFER_MAP_PERMANENTLY
- Original Message - > On Don, 2012-01-26 at 20:45 +0100, Marek Olšák wrote: > > On Tue, Jan 10, 2012 at 6:20 PM, Jose Fonseca > > wrote: > > >> On Tue, Jan 10, 2012 at 5:15 PM, Jose Fonseca > > >> > > >> wrote: > > >> > > > >> > Also, please provide app name and performance figures w/ this > > >> > change. > > >> > > >> OK. Torcs, the Forza track at the start. > > >> > > >> With u_upload_unmap before drawing: > > >> 21.4 - 22.1 fps > > >> > > >> Without u_upload_unmap: > > >> 22.7 - 23.1 fps > > > > > > > > >> The improvement is approximately 1.1 fps, which is probably too > > >> little > > >> for other people to care, but why throw it away? > > > > > > This is roughly 5%. It is still significant. > > > > > > But it surprises me it is so high. > > > > > > Perhaps we should have a couple of fast entrypoints for > > > transfering to/from buffers without the overhead of creating > > > transfer objects. > > > > Okay so the plan is to keep refactoring the transfer API until the > > overhead disappears? Sounds good. > > > > I propose these changes: > > 1) Merge get_transfer with transfer_map. > > 2) Merge transfer_unmap with transfer_destroy. > > AFAIR everybody agreed a while ago this should be done. I agree. I know that in some places we pass transfer objects around, and then map/unmap elsewhere, which will make the refactoring harder, but it shouldn't be too hard. > > 3) Make the pipe_transfer struct fully opaque, but then there must > > be > > another way to return the strides of mapped textures (out > > parameters > > of transfer_map?). Think of this as decoupling driver-private > > transfer > > objects from ordinary return values like the strides. > > 4) The drivers which don't need transfer objects (e.g. non-texture > > transfers) can return a NULL pipe_transfer struct, making transfer > > objects fully optional. Only the returned pointer into the resource > > determines whether transfer_map has been successful. > > (1) and (2) should help reduce the call overhead. (3) is a > > preparation > > for (4). (4) should help kill all the code required to allocate, > > initialize, and free the pipe_transfer struct when it's needed by > > neither a driver nor a state tracker. > > Sounds good to me. Sounds great to me too. One way to do this would be to make struct pipe_transfer a state tracker owned structure describing the transfer: struct pipe_transfer { struct pipe_resource *resource; /**< resource to transfer to/from */ unsigned level; /**< texture mipmap level */ enum pipe_transfer_usage usage; struct pipe_box box;/**< region of the resource to access */ unsigned stride;/**< row stride in bytes */ unsigned layer_stride; /**< image/layer stride in bytes */ void *data; // Optional opaque driver private data associated with this transfer void *private; }; And get/destroy_transfer would be disappear, being their functionality done inside map/unmap_transfer. For example: struct pipe_transfer transfer; // TODO: write some inline helper functions to simplify most of this. transfer.resource = resource; // No need to reference count transfer.level = 0; transfer.usage = ...; ... if (pipe->map_transfer(pipe, &transfer)) { memcpy(transfer->data, buffer, size)); pipe->unmap_transfer(pipe, &transfer); } > Something else that might be useful at some point would be another > entrypoint which passes in a pointer to the data to be transferred. > This > could allow saving a CPU copy in cases where the state tracker > already > has the data in the form appropriate for the transfer. For drivers > that > can't take advantage of that, there could be a utility function which > uses the get_transfer and transfer_destroy hooks. Yes. It would provide yet another way of doing transfers, but it could save intermediate copies on several hot spots. Jose ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] gallium: add flag PIPE_TRANSFER_MAP_PERMANENTLY
On Don, 2012-01-26 at 20:45 +0100, Marek Olšák wrote: > On Tue, Jan 10, 2012 at 6:20 PM, Jose Fonseca wrote: > >> On Tue, Jan 10, 2012 at 5:15 PM, Jose Fonseca > >> wrote: > >> > > >> > Also, please provide app name and performance figures w/ this > >> > change. > >> > >> OK. Torcs, the Forza track at the start. > >> > >> With u_upload_unmap before drawing: > >> 21.4 - 22.1 fps > >> > >> Without u_upload_unmap: > >> 22.7 - 23.1 fps > > > > > >> The improvement is approximately 1.1 fps, which is probably too > >> little > >> for other people to care, but why throw it away? > > > > This is roughly 5%. It is still significant. > > > > But it surprises me it is so high. > > > > Perhaps we should have a couple of fast entrypoints for transfering to/from > > buffers without the overhead of creating transfer objects. > > Okay so the plan is to keep refactoring the transfer API until the > overhead disappears? Sounds good. > > I propose these changes: > 1) Merge get_transfer with transfer_map. > 2) Merge transfer_unmap with transfer_destroy. AFAIR everybody agreed a while ago this should be done. > 3) Make the pipe_transfer struct fully opaque, but then there must be > another way to return the strides of mapped textures (out parameters > of transfer_map?). Think of this as decoupling driver-private transfer > objects from ordinary return values like the strides. > 4) The drivers which don't need transfer objects (e.g. non-texture > transfers) can return a NULL pipe_transfer struct, making transfer > objects fully optional. Only the returned pointer into the resource > determines whether transfer_map has been successful. > > (1) and (2) should help reduce the call overhead. (3) is a preparation > for (4). (4) should help kill all the code required to allocate, > initialize, and free the pipe_transfer struct when it's needed by > neither a driver nor a state tracker. Sounds good to me. Something else that might be useful at some point would be another entrypoint which passes in a pointer to the data to be transferred. This could allow saving a CPU copy in cases where the state tracker already has the data in the form appropriate for the transfer. For drivers that can't take advantage of that, there could be a utility function which uses the get_transfer and transfer_destroy hooks. -- Earthling Michel Dänzer | http://www.amd.com Libre software enthusiast | Debian, X and DRI developer ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] gallium: add flag PIPE_TRANSFER_MAP_PERMANENTLY
On Tue, Jan 10, 2012 at 6:20 PM, Jose Fonseca wrote: > > > - Original Message - >> On Tue, Jan 10, 2012 at 5:15 PM, Jose Fonseca >> wrote: >> > - Original Message - >> >> The flag is optional, it doesn't have to implemented by everybody. >> >> If >> >> we do the uploads in the state tracker, we will also do any >> >> required >> >> data transformation so that drivers don't have to do it at all. >> > >> > The same can be said for everything. >> > >> > I don't object adding yet another code path specific to a subset of >> > hardware, provided the benefits justify its existence. >> > >> > But honestly I'm not yet convinced of this, as there was no attempt >> > to backup with solid arguments why this matters. >> > >> > Furthermore this violates one of the principles gallium (and all 3D >> > apis) have of unmapping all resources when drawing. >> >> If my idea violates one principle of all 3D APIs, this violates them >> all: >> http://www.opengl.org/registry/specs/AMD/pinned_memory.txt > > Indeed. > >> It allows reading user memory by hardware, and neither the driver nor >> the hardware is notified when the memory contents are changed by the >> user. >> >> BTW, I don't insist on this. I thought it would be a nice addition >> allowing user buffer uploads to be moved out of (especially radeon) >> drivers. If you really believe it's a bad optimization, feel free to >> revert. WIthout it, things wouldn't change for me at all... > > Fair enough. > >> > >> >> > It looks to me that state trackers and/or drivers are not >> >> > properly >> >> > using PIPE_USAGE_STREAM flag. As all cases where >> >> > PIPE_TRANSFER_MAP_PERMANENTLY would be used, the right way to do >> >> > it would be for the state tracker to set PIPE_USAGE_STREAM, the >> >> > pipe driver to recognize PIPE_USAGE_STREAM, and keep the mapping >> >> > permanently internally, making mapping/unmapping of such buffers >> >> > mere no-ops. >> >> >> >> We were doing that already and it wasn't good enough. Avoiding >> >> create+map+unmap+destroy *calls* have brought higher frame rates >> >> in >> >> apps with lots of draw operations. >> > >> > I understand the number map/unmap call is reduced. But how does >> > this interface change affect in any way the number of >> > create/destroy calls? >> >> create = get_transfer >> destroy = transfer_destroy > > Ah. BTW, we should probably unify create+map and destroy+unmap to reduce > overhead. I would probably helps here. > >> > Also, please provide app name and performance figures w/ this >> > change. >> >> OK. Torcs, the Forza track at the start. >> >> With u_upload_unmap before drawing: >> 21.4 - 22.1 fps >> >> Without u_upload_unmap: >> 22.7 - 23.1 fps > > >> The improvement is approximately 1.1 fps, which is probably too >> little >> for other people to care, but why throw it away? > > This is roughly 5%. It is still significant. > > But it surprises me it is so high. > > Perhaps we should have a couple of fast entrypoints for transfering to/from > buffers without the overhead of creating transfer objects. Okay so the plan is to keep refactoring the transfer API until the overhead disappears? Sounds good. I propose these changes: 1) Merge get_transfer with transfer_map. 2) Merge transfer_unmap with transfer_destroy. 3) Make the pipe_transfer struct fully opaque, but then there must be another way to return the strides of mapped textures (out parameters of transfer_map?). Think of this as decoupling driver-private transfer objects from ordinary return values like the strides. 4) The drivers which don't need transfer objects (e.g. non-texture transfers) can return a NULL pipe_transfer struct, making transfer objects fully optional. Only the returned pointer into the resource determines whether transfer_map has been successful. (1) and (2) should help reduce the call overhead. (3) is a preparation for (4). (4) should help kill all the code required to allocate, initialize, and free the pipe_transfer struct when it's needed by neither a driver nor a state tracker. Comments welcome. Marek ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] gallium: add flag PIPE_TRANSFER_MAP_PERMANENTLY
- Original Message - > On Tue, Jan 10, 2012 at 5:15 PM, Jose Fonseca > wrote: > > - Original Message - > >> The flag is optional, it doesn't have to implemented by everybody. > >> If > >> we do the uploads in the state tracker, we will also do any > >> required > >> data transformation so that drivers don't have to do it at all. > > > > The same can be said for everything. > > > > I don't object adding yet another code path specific to a subset of > > hardware, provided the benefits justify its existence. > > > > But honestly I'm not yet convinced of this, as there was no attempt > > to backup with solid arguments why this matters. > > > > Furthermore this violates one of the principles gallium (and all 3D > > apis) have of unmapping all resources when drawing. > > If my idea violates one principle of all 3D APIs, this violates them > all: > http://www.opengl.org/registry/specs/AMD/pinned_memory.txt Indeed. > It allows reading user memory by hardware, and neither the driver nor > the hardware is notified when the memory contents are changed by the > user. > > BTW, I don't insist on this. I thought it would be a nice addition > allowing user buffer uploads to be moved out of (especially radeon) > drivers. If you really believe it's a bad optimization, feel free to > revert. WIthout it, things wouldn't change for me at all... Fair enough. > > > >> > It looks to me that state trackers and/or drivers are not > >> > properly > >> > using PIPE_USAGE_STREAM flag. As all cases where > >> > PIPE_TRANSFER_MAP_PERMANENTLY would be used, the right way to do > >> > it would be for the state tracker to set PIPE_USAGE_STREAM, the > >> > pipe driver to recognize PIPE_USAGE_STREAM, and keep the mapping > >> > permanently internally, making mapping/unmapping of such buffers > >> > mere no-ops. > >> > >> We were doing that already and it wasn't good enough. Avoiding > >> create+map+unmap+destroy *calls* have brought higher frame rates > >> in > >> apps with lots of draw operations. > > > > I understand the number map/unmap call is reduced. But how does > > this interface change affect in any way the number of > > create/destroy calls? > > create = get_transfer > destroy = transfer_destroy Ah. BTW, we should probably unify create+map and destroy+unmap to reduce overhead. I would probably helps here. > > Also, please provide app name and performance figures w/ this > > change. > > OK. Torcs, the Forza track at the start. > > With u_upload_unmap before drawing: > 21.4 - 22.1 fps > > Without u_upload_unmap: > 22.7 - 23.1 fps > The improvement is approximately 1.1 fps, which is probably too > little > for other people to care, but why throw it away? This is roughly 5%. It is still significant. But it surprises me it is so high. Perhaps we should have a couple of fast entrypoints for transfering to/from buffers without the overhead of creating transfer objects. Jose ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] gallium: add flag PIPE_TRANSFER_MAP_PERMANENTLY
On Tue, Jan 10, 2012 at 5:15 PM, Jose Fonseca wrote: > - Original Message - >> The flag is optional, it doesn't have to implemented by everybody. If >> we do the uploads in the state tracker, we will also do any required >> data transformation so that drivers don't have to do it at all. > > The same can be said for everything. > > I don't object adding yet another code path specific to a subset of hardware, > provided the benefits justify its existence. > > But honestly I'm not yet convinced of this, as there was no attempt to backup > with solid arguments why this matters. > > Furthermore this violates one of the principles gallium (and all 3D apis) > have of unmapping all resources when drawing. If my idea violates one principle of all 3D APIs, this violates them all: http://www.opengl.org/registry/specs/AMD/pinned_memory.txt It allows reading user memory by hardware, and neither the driver nor the hardware is notified when the memory contents are changed by the user. BTW, I don't insist on this. I thought it would be a nice addition allowing user buffer uploads to be moved out of (especially radeon) drivers. If you really believe it's a bad optimization, feel free to revert. WIthout it, things wouldn't change for me at all... > >> > It looks to me that state trackers and/or drivers are not properly >> > using PIPE_USAGE_STREAM flag. As all cases where >> > PIPE_TRANSFER_MAP_PERMANENTLY would be used, the right way to do >> > it would be for the state tracker to set PIPE_USAGE_STREAM, the >> > pipe driver to recognize PIPE_USAGE_STREAM, and keep the mapping >> > permanently internally, making mapping/unmapping of such buffers >> > mere no-ops. >> >> We were doing that already and it wasn't good enough. Avoiding >> create+map+unmap+destroy *calls* have brought higher frame rates in >> apps with lots of draw operations. > > I understand the number map/unmap call is reduced. But how does this > interface change affect in any way the number of create/destroy calls? create = get_transfer destroy = transfer_destroy > > Also, please provide app name and performance figures w/ this change. OK. Torcs, the Forza track at the start. With u_upload_unmap before drawing: 21.4 - 22.1 fps Without u_upload_unmap: 22.7 - 23.1 fps The improvement is approximately 1.1 fps, which is probably too little for other people to care, but why throw it away? Marek ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] gallium: add flag PIPE_TRANSFER_MAP_PERMANENTLY
- Original Message - > On 01/10/2012 01:13 PM, Jose Fonseca wrote: > > - Original Message - > >> On 10.01.2012 12:29, Jose Fonseca wrote: > >>> Still catching up on email traffic during holidays... > >>> > >>> I agree that user buffer uploads should be moved out of drivers, > >>> but I don't think this is the way to go. > >> > >> I don't. If the state tracker uploads user buffers and presents > >> them > >> to > >> the driver as normal buffers, it will have to change the > >> pipe_vertex_buffers for each draw, causing annoying re-validation > >> overhead. > > > > This is not strictly necessary. We could simply use index_bias > > provided the vertex strides don't change (the likely scenario for > > the sort of cases you are talking here). Thomas did that something > > along those lines for the svga driver. > > > Changing the index bias changes the VertexID, so you can't. But I > suppose we could add a second index bias that doesn't affect it ... > > > And we could do much better than that: I'm talking about proper > > reverse-engineering of the application intention, such as using > > one-shot dynamic vertex buffers for one-shot data, and reusable > > static vertex buffers for immutable data. Employing both > > heuristics like scanning the user memory for changes and end of > > data; and also get information from the OS virtual memory > > subsystem (e.g., if the user memory pointer is in a read-only > > segment then it won't likely change, if it's in a stack segment it > > will likely be one-off, if its in malloc, then we can get an upper > > bound from the OS). This analysis / memory tracking is something > > too complex to do for each driver, but worthwhile if its done in > > the state tracker for all drivers benefit. > > > > I honestly think that's unnecessary to bother pipe drivers with > > user pointers and that we should put our bets on solving this > > problem efficiently once for all -- but I'm fine leaving these > > user pointers in the interface eternally, for sake of both > > skeptics and software renderers (who can always benefit from > > accessing user memory directly). > > > > > >> This is very bad if there are a lot of small draw calls. > >> Unfortunately many applications do this, and we do care about > >> these > >> cases. > >> > >> Also, another example I'm dealing with atm, it will be difficult > >> to > >> extract a small set of wide-spread vertices from a user buffer > >> into a > >> oneshot vertex buffer (there are apps where this really helps a > >> lot) > >> because you're changing the vertex index / gl_VertexID. > >> I can do that, because I know the hw has a special vertex-index > >> buffer > >> (nvc0) or manual VERTEX_ID attribute (nv50) that can be routed to > >> the > >> VERTEXID system value so I don't need to modify the shader. > > > > Backing user buffers on hardware buffers does not necessary imply > > remapping vertices -- that's merely how it is implemented now. We > > could either create a one-shot hardware buffer with the same size > > and fill in just the necessary spots; or better to recognize the > > underlying user memory buffer start and size and track changes. We > > could also expose this vertex-index buffer in the interface (other > > drivers could implement it as vertex texture). > > > > Filling in just the necessary spots wastes a lot of space, especially > if > these spots are re-used with different data (you need to reallocate > space), and checking if the relevant data changed is a performance > loss > in itself. Don't forget that the alternative to checking is to copy blindly, which also implies reading the source. If one uses hashes instead of compares, and one can determine statistically that certain contents is usually immutable, then one can guarantee that checking to be a win, especially considering the bandwidth difference on PCIe vs VRAM. After a couple of frames it should be clear whether a user memory buffer at a given address is static or dynamic. > > > > > Probably the best approach is a mixture: back big dynamic/static > > user buffers in individual hardware buffers, back small dynamic > > user buffers in linearly suballocated ranges of coarse sized > > hardware buffers (i.e., what we do now). > > > > In the end, I'll only be convinced removing user buffers is good if > the > new solution performs equally or better in all cases (or at least not > significantly worse in the ugliest of cases) ... Of course. As I said, I'm fine w/ leaving user buffers interface if just for software renderers sake. And this is all just an idea at this stage. Jose ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] gallium: add flag PIPE_TRANSFER_MAP_PERMANENTLY
On 01/10/2012 01:13 PM, Jose Fonseca wrote: > - Original Message - >> On 10.01.2012 12:29, Jose Fonseca wrote: >>> Still catching up on email traffic during holidays... >>> >>> I agree that user buffer uploads should be moved out of drivers, >>> but I don't think this is the way to go. >> >> I don't. If the state tracker uploads user buffers and presents them >> to >> the driver as normal buffers, it will have to change the >> pipe_vertex_buffers for each draw, causing annoying re-validation >> overhead. > > This is not strictly necessary. We could simply use index_bias provided the > vertex strides don't change (the likely scenario for the sort of cases you > are talking here). Thomas did that something along those lines for the svga > driver. > Changing the index bias changes the VertexID, so you can't. But I suppose we could add a second index bias that doesn't affect it ... > And we could do much better than that: I'm talking about proper > reverse-engineering of the application intention, such as using one-shot > dynamic vertex buffers for one-shot data, and reusable static vertex buffers > for immutable data. Employing both heuristics like scanning the user memory > for changes and end of data; and also get information from the OS virtual > memory subsystem (e.g., if the user memory pointer is in a read-only segment > then it won't likely change, if it's in a stack segment it will likely be > one-off, if its in malloc, then we can get an upper bound from the OS). > This analysis / memory tracking is something too complex to do for each > driver, but worthwhile if its done in the state tracker for all drivers > benefit. > > I honestly think that's unnecessary to bother pipe drivers with user pointers > and that we should put our bets on solving this problem efficiently once for > all -- but I'm fine leaving these user pointers in the interface eternally, > for sake of both skeptics and software renderers (who can always benefit from > accessing user memory directly). > > >> This is very bad if there are a lot of small draw calls. >> Unfortunately many applications do this, and we do care about these >> cases. >> >> Also, another example I'm dealing with atm, it will be difficult to >> extract a small set of wide-spread vertices from a user buffer into a >> oneshot vertex buffer (there are apps where this really helps a lot) >> because you're changing the vertex index / gl_VertexID. >> I can do that, because I know the hw has a special vertex-index >> buffer >> (nvc0) or manual VERTEX_ID attribute (nv50) that can be routed to the >> VERTEXID system value so I don't need to modify the shader. > > Backing user buffers on hardware buffers does not necessary imply remapping > vertices -- that's merely how it is implemented now. We could either create a > one-shot hardware buffer with the same size and fill in just the necessary > spots; or better to recognize the underlying user memory buffer start and > size and track changes. We could also expose this vertex-index buffer in the > interface (other drivers could implement it as vertex texture). > Filling in just the necessary spots wastes a lot of space, especially if these spots are re-used with different data (you need to reallocate space), and checking if the relevant data changed is a performance loss in itself. > > Probably the best approach is a mixture: back big dynamic/static user buffers > in individual hardware buffers, back small dynamic user buffers in linearly > suballocated ranges of coarse sized hardware buffers (i.e., what we do now). > In the end, I'll only be convinced removing user buffers is good if the new solution performs equally or better in all cases (or at least not significantly worse in the ugliest of cases) ... > > Jose ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] gallium: add flag PIPE_TRANSFER_MAP_PERMANENTLY
- Original Message - > On Tue, Jan 10, 2012 at 12:29 PM, Jose Fonseca > wrote: > > Still catching up on email traffic during holidays... > > > > I agree that user buffer uploads should be moved out of drivers, > > but I don't think this is the way to go. > > > > This "PIPE_TRANSFER_MAP_PERMANENTLY" means the driver relinquishes > > the ability to transform this data in any way before reashing the > > GPU -- i.e., prevents any sort of workarounds -- something can't > > be always guaranteed. Flushing with map helps is also > > non-portable. > > The flag is optional, it doesn't have to implemented by everybody. If > we do the uploads in the state tracker, we will also do any required > data transformation so that drivers don't have to do it at all. The same can be said for everything. I don't object adding yet another code path specific to a subset of hardware, provided the benefits justify its existence. But honestly I'm not yet convinced of this, as there was no attempt to backup with solid arguments why this matters. Furthermore this violates one of the principles gallium (and all 3D apis) have of unmapping all resources when drawing. > > It looks to me that state trackers and/or drivers are not properly > > using PIPE_USAGE_STREAM flag. As all cases where > > PIPE_TRANSFER_MAP_PERMANENTLY would be used, the right way to do > > it would be for the state tracker to set PIPE_USAGE_STREAM, the > > pipe driver to recognize PIPE_USAGE_STREAM, and keep the mapping > > permanently internally, making mapping/unmapping of such buffers > > mere no-ops. > > We were doing that already and it wasn't good enough. Avoiding > create+map+unmap+destroy *calls* have brought higher frame rates in > apps with lots of draw operations. I understand the number map/unmap call is reduced. But how does this interface change affect in any way the number of create/destroy calls? Also, please provide app name and performance figures w/ this change. Jose PS: Keith Whitwell de-fact no longer is the Gallium's "Benevolent Dictator for Life", but we'll need to institute/enforce more due process with Gallium interface changes to ensure that gallium doesn't become something radically different from its vision & future requirements. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] gallium: add flag PIPE_TRANSFER_MAP_PERMANENTLY
On Tue, Jan 10, 2012 at 12:29 PM, Jose Fonseca wrote: > Still catching up on email traffic during holidays... > > I agree that user buffer uploads should be moved out of drivers, but I don't > think this is the way to go. > > This "PIPE_TRANSFER_MAP_PERMANENTLY" means the driver relinquishes the > ability to transform this data in any way before reashing the GPU -- i.e., > prevents any sort of workarounds -- something can't be always guaranteed. > Flushing with map helps is also non-portable. The flag is optional, it doesn't have to implemented by everybody. If we do the uploads in the state tracker, we will also do any required data transformation so that drivers don't have to do it at all. > > > It looks to me that state trackers and/or drivers are not properly using > PIPE_USAGE_STREAM flag. As all cases where PIPE_TRANSFER_MAP_PERMANENTLY > would be used, the right way to do it would be for the state tracker to set > PIPE_USAGE_STREAM, the pipe driver to recognize PIPE_USAGE_STREAM, and keep > the mapping permanently internally, making mapping/unmapping of such buffers > mere no-ops. We were doing that already and it wasn't good enough. Avoiding create+map+unmap+destroy *calls* have brought higher frame rates in apps with lots of draw operations. It may not be the ideal optimization, but so far it's the best we have. Marek > > > In summary, for me PIPE_TRANSFER_MAP_PERMANENTLY is premature/bad > optmization. Before we are worried about saving a couple of map/unmap calls, > we should ensure that PIPE_USAGE_STREAM/PIPE_USAGE_DYNAMIC code paths are > fully optimal. > > > Jose > > - Original Message - >> Please see the diff for further info. >> >> This paves the way for moving user buffer uploads out of drivers and >> should >> allow to clean up the mess in u_upload_mgr in the meantime. >> >> For now only allowed for buffers on r300 and r600. >> --- >> src/gallium/drivers/i915/i915_resource_buffer.c | 7 ++- >> src/gallium/drivers/i915/i915_resource_texture.c | 7 ++- >> src/gallium/drivers/llvmpipe/lp_texture.c | 4 >> src/gallium/drivers/nouveau/nouveau_buffer.c | 8 +++- >> src/gallium/drivers/nv50/nv50_transfer.c | 2 +- >> src/gallium/drivers/nvc0/nvc0_transfer.c | 2 +- >> src/gallium/drivers/nvfx/nvfx_transfer.c | 3 +++ >> src/gallium/drivers/r300/r300_transfer.c | 4 >> src/gallium/drivers/r600/r600_texture.c | 4 >> src/gallium/drivers/svga/svga_resource_buffer.c | 4 >> src/gallium/drivers/svga/svga_resource_texture.c | 2 +- >> src/gallium/include/pipe/p_defines.h | 16 >> >> 12 files changed, 57 insertions(+), 6 deletions(-) >> >> diff --git a/src/gallium/drivers/i915/i915_resource_buffer.c >> b/src/gallium/drivers/i915/i915_resource_buffer.c >> index 77c0345..c54e481 100644 >> --- a/src/gallium/drivers/i915/i915_resource_buffer.c >> +++ b/src/gallium/drivers/i915/i915_resource_buffer.c >> @@ -68,8 +68,13 @@ i915_get_transfer(struct pipe_context *pipe, >> const struct pipe_box *box) >> { >> struct i915_context *i915 = i915_context(pipe); >> - struct pipe_transfer *transfer = >> util_slab_alloc(&i915->transfer_pool); >> + struct pipe_transfer *transfer; >> >> + if (usage & PIPE_TRANSFER_MAP_PERMANENTLY) { >> + return NULL; >> + } >> + >> + transfer = util_slab_alloc(&i915->transfer_pool); >> if (transfer == NULL) >> return NULL; >> >> diff --git a/src/gallium/drivers/i915/i915_resource_texture.c >> b/src/gallium/drivers/i915/i915_resource_texture.c >> index 8ff733a..64d071c 100644 >> --- a/src/gallium/drivers/i915/i915_resource_texture.c >> +++ b/src/gallium/drivers/i915/i915_resource_texture.c >> @@ -720,9 +720,14 @@ i915_texture_get_transfer(struct pipe_context >> *pipe, >> { >> struct i915_context *i915 = i915_context(pipe); >> struct i915_texture *tex = i915_texture(resource); >> - struct i915_transfer *transfer = >> util_slab_alloc(&i915->texture_transfer_pool); >> + struct i915_transfer *transfer; >> boolean use_staging_texture = FALSE; >> >> + if (usage & PIPE_TRANSFER_MAP_PERMANENTLY) { >> + return NULL; >> + } >> + >> + transfer = util_slab_alloc(&i915->texture_transfer_pool); >> if (transfer == NULL) >> return NULL; >> >> diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c >> b/src/gallium/drivers/llvmpipe/lp_texture.c >> index ca38571..d86d493 100644 >> --- a/src/gallium/drivers/llvmpipe/lp_texture.c >> +++ b/src/gallium/drivers/llvmpipe/lp_texture.c >> @@ -587,6 +587,10 @@ llvmpipe_get_transfer(struct pipe_context *pipe, >> assert(resource); >> assert(level <= resource->last_level); >> >> + if (usage & PIPE_TRANSFER_MAP_PERMANENTLY) { >> + return NULL; >> + } >> + >> /* >> * Transfers, like other pipe operations, must happen in order, >> so flush the >> * context if necessary. >>
Re: [Mesa-dev] [PATCH 2/2] gallium: add flag PIPE_TRANSFER_MAP_PERMANENTLY
- Original Message - > On 10.01.2012 12:29, Jose Fonseca wrote: > > Still catching up on email traffic during holidays... > > > > I agree that user buffer uploads should be moved out of drivers, > > but I don't think this is the way to go. > > I don't. If the state tracker uploads user buffers and presents them > to > the driver as normal buffers, it will have to change the > pipe_vertex_buffers for each draw, causing annoying re-validation > overhead. This is not strictly necessary. We could simply use index_bias provided the vertex strides don't change (the likely scenario for the sort of cases you are talking here). Thomas did that something along those lines for the svga driver. And we could do much better than that: I'm talking about proper reverse-engineering of the application intention, such as using one-shot dynamic vertex buffers for one-shot data, and reusable static vertex buffers for immutable data. Employing both heuristics like scanning the user memory for changes and end of data; and also get information from the OS virtual memory subsystem (e.g., if the user memory pointer is in a read-only segment then it won't likely change, if it's in a stack segment it will likely be one-off, if its in malloc, then we can get an upper bound from the OS). This analysis / memory tracking is something too complex to do for each driver, but worthwhile if its done in the state tracker for all drivers benefit. I honestly think that's unnecessary to bother pipe drivers with user pointers and that we should put our bets on solving this problem efficiently once for all -- but I'm fine leaving these user pointers in the interface eternally, for sake of both skeptics and software renderers (who can always benefit from accessing user memory directly). > This is very bad if there are a lot of small draw calls. > Unfortunately many applications do this, and we do care about these > cases. > > Also, another example I'm dealing with atm, it will be difficult to > extract a small set of wide-spread vertices from a user buffer into a > oneshot vertex buffer (there are apps where this really helps a lot) > because you're changing the vertex index / gl_VertexID. > I can do that, because I know the hw has a special vertex-index > buffer > (nvc0) or manual VERTEX_ID attribute (nv50) that can be routed to the > VERTEXID system value so I don't need to modify the shader. Backing user buffers on hardware buffers does not necessary imply remapping vertices -- that's merely how it is implemented now. We could either create a one-shot hardware buffer with the same size and fill in just the necessary spots; or better to recognize the underlying user memory buffer start and size and track changes. We could also expose this vertex-index buffer in the interface (other drivers could implement it as vertex texture). Probably the best approach is a mixture: back big dynamic/static user buffers in individual hardware buffers, back small dynamic user buffers in linearly suballocated ranges of coarse sized hardware buffers (i.e., what we do now). Jose ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] gallium: add flag PIPE_TRANSFER_MAP_PERMANENTLY
On 10.01.2012 12:29, Jose Fonseca wrote: > Still catching up on email traffic during holidays... > > I agree that user buffer uploads should be moved out of drivers, but I don't > think this is the way to go. I don't. If the state tracker uploads user buffers and presents them to the driver as normal buffers, it will have to change the pipe_vertex_buffers for each draw, causing annoying re-validation overhead. This is very bad if there are a lot of small draw calls. Unfortunately many applications do this, and we do care about these cases. Also, another example I'm dealing with atm, it will be difficult to extract a small set of wide-spread vertices from a user buffer into a oneshot vertex buffer (there are apps where this really helps a lot) because you're changing the vertex index / gl_VertexID. I can do that, because I know the hw has a special vertex-index buffer (nvc0) or manual VERTEX_ID attribute (nv50) that can be routed to the VERTEXID system value so I don't need to modify the shader. > This "PIPE_TRANSFER_MAP_PERMANENTLY" means the driver relinquishes the > ability to transform this data in any way before reashing the GPU -- i.e., > prevents any sort of workarounds -- something can't be always guaranteed. > Flushing with map helps is also non-portable. > > > It looks to me that state trackers and/or drivers are not properly using > PIPE_USAGE_STREAM flag. As all cases where PIPE_TRANSFER_MAP_PERMANENTLY > would be used, the right way to do it would be for the state tracker to set > PIPE_USAGE_STREAM, the pipe driver to recognize PIPE_USAGE_STREAM, and keep > the mapping permanently internally, making mapping/unmapping of such buffers > mere no-ops. > > > In summary, for me PIPE_TRANSFER_MAP_PERMANENTLY is premature/bad > optmization. Before we are worried about saving a couple of map/unmap calls, > we should ensure that PIPE_USAGE_STREAM/PIPE_USAGE_DYNAMIC code paths are > fully optimal. > > > Jose > > - Original Message - >> Please see the diff for further info. >> >> This paves the way for moving user buffer uploads out of drivers and >> should >> allow to clean up the mess in u_upload_mgr in the meantime. >> >> For now only allowed for buffers on r300 and r600. >> --- >> src/gallium/drivers/i915/i915_resource_buffer.c |7 ++- >> src/gallium/drivers/i915/i915_resource_texture.c |7 ++- >> src/gallium/drivers/llvmpipe/lp_texture.c|4 >> src/gallium/drivers/nouveau/nouveau_buffer.c |8 +++- >> src/gallium/drivers/nv50/nv50_transfer.c |2 +- >> src/gallium/drivers/nvc0/nvc0_transfer.c |2 +- >> src/gallium/drivers/nvfx/nvfx_transfer.c |3 +++ >> src/gallium/drivers/r300/r300_transfer.c |4 >> src/gallium/drivers/r600/r600_texture.c |4 >> src/gallium/drivers/svga/svga_resource_buffer.c |4 >> src/gallium/drivers/svga/svga_resource_texture.c |2 +- >> src/gallium/include/pipe/p_defines.h | 16 >> >> 12 files changed, 57 insertions(+), 6 deletions(-) >> >> diff --git a/src/gallium/drivers/i915/i915_resource_buffer.c >> b/src/gallium/drivers/i915/i915_resource_buffer.c >> index 77c0345..c54e481 100644 >> --- a/src/gallium/drivers/i915/i915_resource_buffer.c >> +++ b/src/gallium/drivers/i915/i915_resource_buffer.c >> @@ -68,8 +68,13 @@ i915_get_transfer(struct pipe_context *pipe, >>const struct pipe_box *box) >> { >> struct i915_context *i915 = i915_context(pipe); >> - struct pipe_transfer *transfer = >> util_slab_alloc(&i915->transfer_pool); >> + struct pipe_transfer *transfer; >> >> + if (usage & PIPE_TRANSFER_MAP_PERMANENTLY) { >> + return NULL; >> + } >> + >> + transfer = util_slab_alloc(&i915->transfer_pool); >> if (transfer == NULL) >>return NULL; >> >> diff --git a/src/gallium/drivers/i915/i915_resource_texture.c >> b/src/gallium/drivers/i915/i915_resource_texture.c >> index 8ff733a..64d071c 100644 >> --- a/src/gallium/drivers/i915/i915_resource_texture.c >> +++ b/src/gallium/drivers/i915/i915_resource_texture.c >> @@ -720,9 +720,14 @@ i915_texture_get_transfer(struct pipe_context >> *pipe, >> { >> struct i915_context *i915 = i915_context(pipe); >> struct i915_texture *tex = i915_texture(resource); >> - struct i915_transfer *transfer = >> util_slab_alloc(&i915->texture_transfer_pool); >> + struct i915_transfer *transfer; >> boolean use_staging_texture = FALSE; >> >> + if (usage & PIPE_TRANSFER_MAP_PERMANENTLY) { >> + return NULL; >> + } >> + >> + transfer = util_slab_alloc(&i915->texture_transfer_pool); >> if (transfer == NULL) >>return NULL; >> >> diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c >> b/src/gallium/drivers/llvmpipe/lp_texture.c >> index ca38571..d86d493 100644 >> --- a/src/gallium/drivers/llvmpipe/lp_texture.c >> +++ b/src/gallium/drivers/llvmpipe/lp_texture.c >> @@ -587,6 +587,10 @@ llvm
Re: [Mesa-dev] [PATCH 2/2] gallium: add flag PIPE_TRANSFER_MAP_PERMANENTLY
Still catching up on email traffic during holidays... I agree that user buffer uploads should be moved out of drivers, but I don't think this is the way to go. This "PIPE_TRANSFER_MAP_PERMANENTLY" means the driver relinquishes the ability to transform this data in any way before reashing the GPU -- i.e., prevents any sort of workarounds -- something can't be always guaranteed. Flushing with map helps is also non-portable. It looks to me that state trackers and/or drivers are not properly using PIPE_USAGE_STREAM flag. As all cases where PIPE_TRANSFER_MAP_PERMANENTLY would be used, the right way to do it would be for the state tracker to set PIPE_USAGE_STREAM, the pipe driver to recognize PIPE_USAGE_STREAM, and keep the mapping permanently internally, making mapping/unmapping of such buffers mere no-ops. In summary, for me PIPE_TRANSFER_MAP_PERMANENTLY is premature/bad optmization. Before we are worried about saving a couple of map/unmap calls, we should ensure that PIPE_USAGE_STREAM/PIPE_USAGE_DYNAMIC code paths are fully optimal. Jose - Original Message - > Please see the diff for further info. > > This paves the way for moving user buffer uploads out of drivers and > should > allow to clean up the mess in u_upload_mgr in the meantime. > > For now only allowed for buffers on r300 and r600. > --- > src/gallium/drivers/i915/i915_resource_buffer.c |7 ++- > src/gallium/drivers/i915/i915_resource_texture.c |7 ++- > src/gallium/drivers/llvmpipe/lp_texture.c|4 > src/gallium/drivers/nouveau/nouveau_buffer.c |8 +++- > src/gallium/drivers/nv50/nv50_transfer.c |2 +- > src/gallium/drivers/nvc0/nvc0_transfer.c |2 +- > src/gallium/drivers/nvfx/nvfx_transfer.c |3 +++ > src/gallium/drivers/r300/r300_transfer.c |4 > src/gallium/drivers/r600/r600_texture.c |4 > src/gallium/drivers/svga/svga_resource_buffer.c |4 > src/gallium/drivers/svga/svga_resource_texture.c |2 +- > src/gallium/include/pipe/p_defines.h | 16 > > 12 files changed, 57 insertions(+), 6 deletions(-) > > diff --git a/src/gallium/drivers/i915/i915_resource_buffer.c > b/src/gallium/drivers/i915/i915_resource_buffer.c > index 77c0345..c54e481 100644 > --- a/src/gallium/drivers/i915/i915_resource_buffer.c > +++ b/src/gallium/drivers/i915/i915_resource_buffer.c > @@ -68,8 +68,13 @@ i915_get_transfer(struct pipe_context *pipe, >const struct pipe_box *box) > { > struct i915_context *i915 = i915_context(pipe); > - struct pipe_transfer *transfer = > util_slab_alloc(&i915->transfer_pool); > + struct pipe_transfer *transfer; > > + if (usage & PIPE_TRANSFER_MAP_PERMANENTLY) { > + return NULL; > + } > + > + transfer = util_slab_alloc(&i915->transfer_pool); > if (transfer == NULL) >return NULL; > > diff --git a/src/gallium/drivers/i915/i915_resource_texture.c > b/src/gallium/drivers/i915/i915_resource_texture.c > index 8ff733a..64d071c 100644 > --- a/src/gallium/drivers/i915/i915_resource_texture.c > +++ b/src/gallium/drivers/i915/i915_resource_texture.c > @@ -720,9 +720,14 @@ i915_texture_get_transfer(struct pipe_context > *pipe, > { > struct i915_context *i915 = i915_context(pipe); > struct i915_texture *tex = i915_texture(resource); > - struct i915_transfer *transfer = > util_slab_alloc(&i915->texture_transfer_pool); > + struct i915_transfer *transfer; > boolean use_staging_texture = FALSE; > > + if (usage & PIPE_TRANSFER_MAP_PERMANENTLY) { > + return NULL; > + } > + > + transfer = util_slab_alloc(&i915->texture_transfer_pool); > if (transfer == NULL) >return NULL; > > diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c > b/src/gallium/drivers/llvmpipe/lp_texture.c > index ca38571..d86d493 100644 > --- a/src/gallium/drivers/llvmpipe/lp_texture.c > +++ b/src/gallium/drivers/llvmpipe/lp_texture.c > @@ -587,6 +587,10 @@ llvmpipe_get_transfer(struct pipe_context *pipe, > assert(resource); > assert(level <= resource->last_level); > > + if (usage & PIPE_TRANSFER_MAP_PERMANENTLY) { > + return NULL; > + } > + > /* > * Transfers, like other pipe operations, must happen in order, > so flush the > * context if necessary. > diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c > b/src/gallium/drivers/nouveau/nouveau_buffer.c > index f822625..02186ba 100644 > --- a/src/gallium/drivers/nouveau/nouveau_buffer.c > +++ b/src/gallium/drivers/nouveau/nouveau_buffer.c > @@ -172,7 +172,13 @@ nouveau_buffer_transfer_get(struct pipe_context > *pipe, > { > struct nv04_resource *buf = nv04_resource(resource); > struct nouveau_context *nv = nouveau_context(pipe); > - struct nouveau_transfer *xfr = CALLOC_STRUCT(nouveau_transfer); > + struct nouveau_transfer *xfr; > + > + if (usage & PIPE_TRANSFER_MAP_PERMANENTLY) { > + return NU
Re: [Mesa-dev] [PATCH 2/2] gallium: add flag PIPE_TRANSFER_MAP_PERMANENTLY
Looks good on first sign. I'm waiting for this for quite some time now, cause it makes XvMC state tracker implementation more cleaner and faster. Acked-by: Christian König On 02.01.2012 01:22, Marek Olšák wrote: Please see the diff for further info. This paves the way for moving user buffer uploads out of drivers and should allow to clean up the mess in u_upload_mgr in the meantime. For now only allowed for buffers on r300 and r600. --- src/gallium/drivers/i915/i915_resource_buffer.c |7 ++- src/gallium/drivers/i915/i915_resource_texture.c |7 ++- src/gallium/drivers/llvmpipe/lp_texture.c|4 src/gallium/drivers/nouveau/nouveau_buffer.c |8 +++- src/gallium/drivers/nv50/nv50_transfer.c |2 +- src/gallium/drivers/nvc0/nvc0_transfer.c |2 +- src/gallium/drivers/nvfx/nvfx_transfer.c |3 +++ src/gallium/drivers/r300/r300_transfer.c |4 src/gallium/drivers/r600/r600_texture.c |4 src/gallium/drivers/svga/svga_resource_buffer.c |4 src/gallium/drivers/svga/svga_resource_texture.c |2 +- src/gallium/include/pipe/p_defines.h | 16 12 files changed, 57 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/i915/i915_resource_buffer.c b/src/gallium/drivers/i915/i915_resource_buffer.c index 77c0345..c54e481 100644 --- a/src/gallium/drivers/i915/i915_resource_buffer.c +++ b/src/gallium/drivers/i915/i915_resource_buffer.c @@ -68,8 +68,13 @@ i915_get_transfer(struct pipe_context *pipe, const struct pipe_box *box) { struct i915_context *i915 = i915_context(pipe); - struct pipe_transfer *transfer = util_slab_alloc(&i915->transfer_pool); + struct pipe_transfer *transfer; + if (usage& PIPE_TRANSFER_MAP_PERMANENTLY) { + return NULL; + } + + transfer = util_slab_alloc(&i915->transfer_pool); if (transfer == NULL) return NULL; diff --git a/src/gallium/drivers/i915/i915_resource_texture.c b/src/gallium/drivers/i915/i915_resource_texture.c index 8ff733a..64d071c 100644 --- a/src/gallium/drivers/i915/i915_resource_texture.c +++ b/src/gallium/drivers/i915/i915_resource_texture.c @@ -720,9 +720,14 @@ i915_texture_get_transfer(struct pipe_context *pipe, { struct i915_context *i915 = i915_context(pipe); struct i915_texture *tex = i915_texture(resource); - struct i915_transfer *transfer = util_slab_alloc(&i915->texture_transfer_pool); + struct i915_transfer *transfer; boolean use_staging_texture = FALSE; + if (usage& PIPE_TRANSFER_MAP_PERMANENTLY) { + return NULL; + } + + transfer = util_slab_alloc(&i915->texture_transfer_pool); if (transfer == NULL) return NULL; diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index ca38571..d86d493 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -587,6 +587,10 @@ llvmpipe_get_transfer(struct pipe_context *pipe, assert(resource); assert(level<= resource->last_level); + if (usage& PIPE_TRANSFER_MAP_PERMANENTLY) { + return NULL; + } + /* * Transfers, like other pipe operations, must happen in order, so flush the * context if necessary. diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c b/src/gallium/drivers/nouveau/nouveau_buffer.c index f822625..02186ba 100644 --- a/src/gallium/drivers/nouveau/nouveau_buffer.c +++ b/src/gallium/drivers/nouveau/nouveau_buffer.c @@ -172,7 +172,13 @@ nouveau_buffer_transfer_get(struct pipe_context *pipe, { struct nv04_resource *buf = nv04_resource(resource); struct nouveau_context *nv = nouveau_context(pipe); - struct nouveau_transfer *xfr = CALLOC_STRUCT(nouveau_transfer); + struct nouveau_transfer *xfr; + + if (usage& PIPE_TRANSFER_MAP_PERMANENTLY) { + return NULL; + } + + xfr = CALLOC_STRUCT(nouveau_transfer); if (!xfr) return NULL; diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c index 6f860e7..8ddebeb 100644 --- a/src/gallium/drivers/nv50/nv50_transfer.c +++ b/src/gallium/drivers/nv50/nv50_transfer.c @@ -243,7 +243,7 @@ nv50_miptree_transfer_new(struct pipe_context *pctx, uint32_t size; int ret; - if (usage& PIPE_TRANSFER_MAP_DIRECTLY) + if (usage& (PIPE_TRANSFER_MAP_DIRECTLY | PIPE_TRANSFER_MAP_PERMANENTLY)) return NULL; tx = CALLOC_STRUCT(nv50_transfer); diff --git a/src/gallium/drivers/nvc0/nvc0_transfer.c b/src/gallium/drivers/nvc0/nvc0_transfer.c index f168637..c04f41f 100644 --- a/src/gallium/drivers/nvc0/nvc0_transfer.c +++ b/src/gallium/drivers/nvc0/nvc0_transfer.c @@ -243,7 +243,7 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx, uint32_t size; int ret; - if (usage& PIPE_TRANSFER_MAP_DIRECTLY) + if (usage& (PIPE_TRANSFER_MAP_DIRECTLY | PIPE_TRANSFER_MAP_PERMANENTLY))
[Mesa-dev] [PATCH 2/2] gallium: add flag PIPE_TRANSFER_MAP_PERMANENTLY
Please see the diff for further info. This paves the way for moving user buffer uploads out of drivers and should allow to clean up the mess in u_upload_mgr in the meantime. For now only allowed for buffers on r300 and r600. --- src/gallium/drivers/i915/i915_resource_buffer.c |7 ++- src/gallium/drivers/i915/i915_resource_texture.c |7 ++- src/gallium/drivers/llvmpipe/lp_texture.c|4 src/gallium/drivers/nouveau/nouveau_buffer.c |8 +++- src/gallium/drivers/nv50/nv50_transfer.c |2 +- src/gallium/drivers/nvc0/nvc0_transfer.c |2 +- src/gallium/drivers/nvfx/nvfx_transfer.c |3 +++ src/gallium/drivers/r300/r300_transfer.c |4 src/gallium/drivers/r600/r600_texture.c |4 src/gallium/drivers/svga/svga_resource_buffer.c |4 src/gallium/drivers/svga/svga_resource_texture.c |2 +- src/gallium/include/pipe/p_defines.h | 16 12 files changed, 57 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/i915/i915_resource_buffer.c b/src/gallium/drivers/i915/i915_resource_buffer.c index 77c0345..c54e481 100644 --- a/src/gallium/drivers/i915/i915_resource_buffer.c +++ b/src/gallium/drivers/i915/i915_resource_buffer.c @@ -68,8 +68,13 @@ i915_get_transfer(struct pipe_context *pipe, const struct pipe_box *box) { struct i915_context *i915 = i915_context(pipe); - struct pipe_transfer *transfer = util_slab_alloc(&i915->transfer_pool); + struct pipe_transfer *transfer; + if (usage & PIPE_TRANSFER_MAP_PERMANENTLY) { + return NULL; + } + + transfer = util_slab_alloc(&i915->transfer_pool); if (transfer == NULL) return NULL; diff --git a/src/gallium/drivers/i915/i915_resource_texture.c b/src/gallium/drivers/i915/i915_resource_texture.c index 8ff733a..64d071c 100644 --- a/src/gallium/drivers/i915/i915_resource_texture.c +++ b/src/gallium/drivers/i915/i915_resource_texture.c @@ -720,9 +720,14 @@ i915_texture_get_transfer(struct pipe_context *pipe, { struct i915_context *i915 = i915_context(pipe); struct i915_texture *tex = i915_texture(resource); - struct i915_transfer *transfer = util_slab_alloc(&i915->texture_transfer_pool); + struct i915_transfer *transfer; boolean use_staging_texture = FALSE; + if (usage & PIPE_TRANSFER_MAP_PERMANENTLY) { + return NULL; + } + + transfer = util_slab_alloc(&i915->texture_transfer_pool); if (transfer == NULL) return NULL; diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index ca38571..d86d493 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -587,6 +587,10 @@ llvmpipe_get_transfer(struct pipe_context *pipe, assert(resource); assert(level <= resource->last_level); + if (usage & PIPE_TRANSFER_MAP_PERMANENTLY) { + return NULL; + } + /* * Transfers, like other pipe operations, must happen in order, so flush the * context if necessary. diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c b/src/gallium/drivers/nouveau/nouveau_buffer.c index f822625..02186ba 100644 --- a/src/gallium/drivers/nouveau/nouveau_buffer.c +++ b/src/gallium/drivers/nouveau/nouveau_buffer.c @@ -172,7 +172,13 @@ nouveau_buffer_transfer_get(struct pipe_context *pipe, { struct nv04_resource *buf = nv04_resource(resource); struct nouveau_context *nv = nouveau_context(pipe); - struct nouveau_transfer *xfr = CALLOC_STRUCT(nouveau_transfer); + struct nouveau_transfer *xfr; + + if (usage & PIPE_TRANSFER_MAP_PERMANENTLY) { + return NULL; + } + + xfr = CALLOC_STRUCT(nouveau_transfer); if (!xfr) return NULL; diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c index 6f860e7..8ddebeb 100644 --- a/src/gallium/drivers/nv50/nv50_transfer.c +++ b/src/gallium/drivers/nv50/nv50_transfer.c @@ -243,7 +243,7 @@ nv50_miptree_transfer_new(struct pipe_context *pctx, uint32_t size; int ret; - if (usage & PIPE_TRANSFER_MAP_DIRECTLY) + if (usage & (PIPE_TRANSFER_MAP_DIRECTLY | PIPE_TRANSFER_MAP_PERMANENTLY)) return NULL; tx = CALLOC_STRUCT(nv50_transfer); diff --git a/src/gallium/drivers/nvc0/nvc0_transfer.c b/src/gallium/drivers/nvc0/nvc0_transfer.c index f168637..c04f41f 100644 --- a/src/gallium/drivers/nvc0/nvc0_transfer.c +++ b/src/gallium/drivers/nvc0/nvc0_transfer.c @@ -243,7 +243,7 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx, uint32_t size; int ret; - if (usage & PIPE_TRANSFER_MAP_DIRECTLY) + if (usage & (PIPE_TRANSFER_MAP_DIRECTLY | PIPE_TRANSFER_MAP_PERMANENTLY)) return NULL; tx = CALLOC_STRUCT(nvc0_transfer); diff --git a/src/gallium/drivers/nvfx/nvfx_transfer.c b/src/gallium/drivers/nvfx/nvfx_transfer.c index 7a218b1..605af8e 100644 --- a/src/gallium/drivers/nvfx/nvfx_transfer.c +++ b/src/ga