VERSION | 2 src/egl/SConscript | 1 src/gallium/auxiliary/gallivm/lp_bld_const.c | 2 src/gallium/auxiliary/pipebuffer/pb_bufmgr.h | 5 src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c | 42 +++++- src/gallium/drivers/freedreno/a2xx/a2xx.xml.h | 2 src/gallium/drivers/freedreno/a3xx/a3xx.xml.h | 2 src/gallium/drivers/freedreno/a4xx/a4xx.xml.h | 5 src/gallium/drivers/freedreno/a4xx/fd4_format.c | 10 - src/gallium/drivers/freedreno/adreno_common.xml.h | 2 src/gallium/drivers/freedreno/adreno_pm4.xml.h | 2 src/gallium/drivers/nouveau/nouveau_buffer.c | 12 + src/gallium/drivers/nouveau/nv30/nv30_miptree.c | 29 ++-- src/gallium/drivers/nouveau/nv30/nv30_resource.h | 3 src/gallium/drivers/nouveau/nv50/nv50_context.c | 6 src/gallium/drivers/nouveau/nv50/nv50_context.h | 2 src/gallium/drivers/nouveau/nv50/nv50_program.c | 1 src/gallium/drivers/nouveau/nv50/nv50_program.h | 1 src/gallium/drivers/nouveau/nv50/nv50_query.c | 15 +- src/gallium/drivers/nouveau/nv50/nv50_shader_state.c | 7 - src/gallium/drivers/nouveau/nv50/nv50_state_validate.c | 3 src/gallium/drivers/nouveau/nv50/nv50_vbo.c | 21 +-- src/gallium/drivers/r600/r600_pipe.h | 26 ---- src/gallium/drivers/r600/r600_state.c | 2 src/gallium/drivers/r600/r600_state_common.c | 25 ++++ src/gallium/drivers/r600/sb/sb_bc_finalize.cpp | 3 src/gallium/drivers/radeonsi/si_hw_context.c | 17 +- src/gallium/drivers/radeonsi/si_shader.h | 1 src/gallium/drivers/radeonsi/si_state.c | 24 +++ src/gallium/drivers/radeonsi/si_state.h | 2 src/gallium/drivers/radeonsi/si_state_shaders.c | 10 + src/gallium/drivers/vc4/vc4_qir.c | 1 src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 3 src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 3 src/glsl/linker.cpp | 64 +++++----- src/mesa/drivers/dri/i965/brw_fs.cpp | 8 - src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp | 1 src/mesa/drivers/dri/i965/intel_blit.c | 72 +++++------ src/mesa/drivers/dri/i965/intel_pixel_read.c | 4 src/mesa/drivers/dri/i965/intel_tex_subimage.c | 4 src/mesa/main/formats.c | 106 +++++++++++++++++ src/mesa/main/formats.csv | 16 ++ src/mesa/main/formats.h | 17 ++ src/mesa/main/glformats.c | 32 +++++ src/mesa/main/image.c | 58 ++++++++- src/mesa/main/image.h | 20 +-- src/mesa/main/readpix.c | 14 -- src/mesa/main/texcompress_fxt1.c | 2 src/mesa/main/texcompress_s3tc.c | 8 - src/mesa/main/texgetimage.c | 21 +-- src/mesa/main/texstore.c | 28 ++-- src/mesa/main/uniform_query.cpp | 35 ++++- src/mesa/main/uniforms.c | 72 +++++------ src/mesa/main/uniforms.h | 2 src/mesa/state_tracker/st_cb_readpixels.c | 4 src/mesa/swrast/s_drawpix.c | 14 +- src/mesa/swrast/s_texfetch.c | 16 ++ 57 files changed, 637 insertions(+), 273 deletions(-)
New commits: commit 271290f0774e123f221d6415e4b158e4d4b958cc Author: Emil Velikov <emil.l.veli...@gmail.com> Date: Sun Sep 6 19:30:23 2015 +0100 Update version to 11.0.0-rc3 Signed-off-by: Emil Velikov <emil.l.veli...@gmail.com> diff --git a/VERSION b/VERSION index f25fb60..c4bfb78 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -11.0.0-rc2 +11.0.0-rc3 commit 7bf27c2393e3d07f6293b30cc859a6ef2aa07212 Author: Ilia Mirkin <imir...@alum.mit.edu> Date: Thu Jul 2 18:44:18 2015 -0400 nouveau: don't mark full range as used on unmap with explicit flush Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> Cc: mesa-sta...@lists.freedesktop.org (cherry picked from commit a778831735ea45f789c247c40677cd26adc78e3e) diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c b/src/gallium/drivers/nouveau/nouveau_buffer.c index 67e181e..912b778 100644 --- a/src/gallium/drivers/nouveau/nouveau_buffer.c +++ b/src/gallium/drivers/nouveau/nouveau_buffer.c @@ -532,8 +532,13 @@ nouveau_buffer_transfer_unmap(struct pipe_context *pipe, struct nv04_resource *buf = nv04_resource(transfer->resource); if (tx->base.usage & PIPE_TRANSFER_WRITE) { - if (!(tx->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT) && tx->map) - nouveau_transfer_write(nv, tx, 0, tx->base.box.width); + if (!(tx->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) { + if (tx->map) + nouveau_transfer_write(nv, tx, 0, tx->base.box.width); + + util_range_add(&buf->valid_buffer_range, + tx->base.box.x, tx->base.box.x + tx->base.box.width); + } if (likely(buf->domain)) { const uint8_t bind = buf->base.bind; @@ -541,9 +546,6 @@ nouveau_buffer_transfer_unmap(struct pipe_context *pipe, if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) nv->vbo_dirty = true; } - - util_range_add(&buf->valid_buffer_range, - tx->base.box.x, tx->base.box.x + tx->base.box.width); } if (!tx->bo && (tx->base.usage & PIPE_TRANSFER_WRITE)) commit 7f80a2383ea4ecdf85ea16eed1d3aac2acc0a5f4 Author: Ilia Mirkin <imir...@alum.mit.edu> Date: Mon Aug 24 11:49:05 2015 -0400 nv50: avoid using inline vertex data submit when gl_VertexID is used The hardware only generates vertexid when vertices come from a VBO. This fixes: vertexid-drawelements vertexid-drawarrays Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> Cc: "11.0" <mesa-sta...@lists.freedesktop.org> (cherry picked from commit c830d193db5c90cf0af57ff73606e2aa12aed9a8) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c index 02dc367..eff4477 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_program.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c @@ -66,6 +66,7 @@ nv50_vertprog_assign_slots(struct nv50_ir_prog_info *info) case TGSI_SEMANTIC_VERTEXID: prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID; prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID_DRAW_ARRAYS_ADD_START; + prog->vp.vertexid = 1; continue; default: break; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.h b/src/gallium/drivers/nouveau/nv50/nv50_program.h index 5d3ff56..f4e8e94 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_program.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.h @@ -76,6 +76,7 @@ struct nv50_program { ubyte psiz; /* output slot of point size */ ubyte bfc[2]; /* indices into varying for FFC (FP) or BFC (VP) */ ubyte edgeflag; + ubyte vertexid; ubyte clpd[2]; /* output slot of clip distance[i]'s 1st component */ ubyte clpd_nr; } vp; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c index b304a17..66dcf43 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c @@ -503,7 +503,8 @@ static struct state_validate { { nv50_validate_samplers, NV50_NEW_SAMPLERS }, { nv50_stream_output_validate, NV50_NEW_STRMOUT | NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG }, - { nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS }, + { nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS | + NV50_NEW_VERTPROG }, { nv50_validate_min_samples, NV50_NEW_MIN_SAMPLES }, }; #define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0])) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c index 600b973..e798473 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c @@ -293,7 +293,8 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50) uint64_t addrs[PIPE_MAX_ATTRIBS]; uint32_t limits[PIPE_MAX_ATTRIBS]; struct nouveau_pushbuf *push = nv50->base.pushbuf; - struct nv50_vertex_stateobj *vertex = nv50->vertex; + struct nv50_vertex_stateobj dummy = {}; + struct nv50_vertex_stateobj *vertex = nv50->vertex ? nv50->vertex : &dummy; struct pipe_vertex_buffer *vb; struct nv50_vertex_element *ve; uint32_t mask; @@ -301,6 +302,14 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50) unsigned i; const unsigned n = MAX2(vertex->num_elements, nv50->state.num_vtxelts); + /* A vertexid is not generated for inline data uploads. Have to use a + * VBO. This check must come after the vertprog has been validated, + * otherwise vertexid may be unset. + */ + assert(nv50->vertprog->translated); + if (nv50->vertprog->vp.vertexid) + nv50->vbo_push_hint = 0; + if (unlikely(vertex->need_conversion)) nv50->vbo_fifo = ~0; else commit 3e1fde76b6eea459ff4a22231c1d3cc73d9b6f9a Author: Ilia Mirkin <imir...@alum.mit.edu> Date: Fri Jul 3 20:32:53 2015 -0400 nv50: don't flush vertex arrays when index buffer changes The index buffer is fed in inline over a pushbuf. It's not related to vertices or any caching that might be done on them. Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> Cc: mesa-sta...@lists.freedesktop.org (cherry picked from commit 4a025c6bc835387a31007fdf30a130e612e54e19) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c index f35326d..600b973 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c @@ -836,10 +836,6 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) nv50->base.vbo_dirty = true; } - if (!nv50->base.vbo_dirty && nv50->idxbuf.buffer && - nv50->idxbuf.buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT) - nv50->base.vbo_dirty = true; - if (nv50->base.vbo_dirty) { BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FLUSH), 1); PUSH_DATA (push, 0); commit 747e1b03bfac3e32878a6f68002b5bb83194fad3 Author: Ilia Mirkin <imir...@alum.mit.edu> Date: Fri Jul 3 20:16:48 2015 -0400 nv50: rebind bo to bufctx when invalidating idxbuf storage There is nothing to be done on a dirty idxbuf, but the bo may have changed, so we have to rebind it to the bufctx. Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> Cc: mesa-sta...@lists.freedesktop.org (cherry picked from commit 1f62d36ae21043c472fc182fd4b738ec1d54a2d2) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.c b/src/gallium/drivers/nouveau/nv50/nv50_context.c index f8d46db..152c2ce 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_context.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_context.c @@ -199,9 +199,13 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx, } } - if (nv50->idxbuf.buffer == res) + if (nv50->idxbuf.buffer == res) { + /* Just rebind to the bufctx as there is no separate dirty bit */ + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_INDEX); + BCTX_REFN(nv50->bufctx_3d, INDEX, nv04_resource(res), RD); if (!--ref) return ref; + } for (s = 0; s < 3; ++s) { assert(nv50->num_textures[s] <= PIPE_MAX_SAMPLERS); commit b85ec1e34b317accd7f69bb5f23bf9a7a8d84561 Author: Ilia Mirkin <imir...@alum.mit.edu> Date: Fri Jul 3 19:21:21 2015 -0400 nv50: clear buffer status on all vertex bufs, not just the first one Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> Cc: mesa-sta...@lists.freedesktop.org (cherry picked from commit 114cc18b98b6e016ab1986577aa3df12acc22cca) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c index ca51ea1..f35326d 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c @@ -317,7 +317,6 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50) if (buf && buf->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) { buf->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING; nv50->base.vbo_dirty = true; - break; } } } commit acb822f1bdb7f8f812d6b3f1196b5913c066e15a Author: Ilia Mirkin <imir...@alum.mit.edu> Date: Thu Jan 1 06:09:59 2015 -0500 nv50: fix drawing from tfb, direct-to-pushbuf submits The stride was being set to 0, which is illegal (and also non-sensical). Also we must wait for the buffer to become available for reading as otherwise a wrong value may be prefetched. Since we must wait for the buffer anyways, and it's mapped and in GART, we may as well avoid the annoyance of the indirect pushbuf submit. Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> Cc: mesa-sta...@lists.freedesktop.org (cherry picked from commit 75e34d1df8b0ab56e5e658b8ef90ff6057ec954e) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h index ce12e71..82a728f 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_context.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h @@ -197,7 +197,7 @@ extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *); /* nv50_query.c */ void nv50_init_query_functions(struct nv50_context *); -void nv50_query_pushbuf_submit(struct nouveau_pushbuf *, +void nv50_query_pushbuf_submit(struct nouveau_pushbuf *, uint16_t method, struct pipe_query *, unsigned result_offset); void nv84_query_fifo_wait(struct nouveau_pushbuf *, struct pipe_query *); void nva0_so_target_save_offset(struct pipe_context *, diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c index f4adbf8..5368ee7 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c @@ -266,6 +266,7 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq) nv50_query_get(push, q, 0, 0x1000f010); break; case NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET: + q->sequence++; nv50_query_get(push, q, 0, 0x0d005002 | (q->index << 5)); break; case PIPE_QUERY_TIMESTAMP_DISJOINT: @@ -451,18 +452,18 @@ nv50_render_condition(struct pipe_context *pipe, } void -nv50_query_pushbuf_submit(struct nouveau_pushbuf *push, +nv50_query_pushbuf_submit(struct nouveau_pushbuf *push, uint16_t method, struct pipe_query *pq, unsigned result_offset) { struct nv50_query *q = nv50_query(pq); - /* XXX: does this exist ? */ -#define NV50_IB_ENTRY_1_NO_PREFETCH (0 << (31 - 8)) + nv50_query_update(q); + if (q->state != NV50_QUERY_STATE_READY) + nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, push->client); + q->state = NV50_QUERY_STATE_READY; - PUSH_REFN(push, q->bo, NOUVEAU_BO_RD | NOUVEAU_BO_GART); - nouveau_pushbuf_space(push, 0, 0, 1); - nouveau_pushbuf_data(push, q->bo, q->offset + result_offset, 4 | - NV50_IB_ENTRY_1_NO_PREFETCH); + BEGIN_NV04(push, SUBC_3D(method), 1); + PUSH_DATA (push, q->data[result_offset / 4]); } void diff --git a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c index b033ce5..fdde11f 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c @@ -641,12 +641,12 @@ nv50_stream_output_validate(struct nv50_context *nv50) PUSH_DATA (push, so->num_attribs[i]); if (n == 4) { PUSH_DATA(push, targ->pipe.buffer_size); - - BEGIN_NV04(push, NVA0_3D(STRMOUT_OFFSET(i)), 1); if (!targ->clean) { assert(targ->pq); - nv50_query_pushbuf_submit(push, targ->pq, 0x4); + nv50_query_pushbuf_submit(push, NVA0_3D_STRMOUT_OFFSET(i), + targ->pq, 0x4); } else { + BEGIN_NV04(push, NVA0_3D(STRMOUT_OFFSET(i)), 1); PUSH_DATA(push, 0); targ->clean = false; } @@ -655,6 +655,7 @@ nv50_stream_output_validate(struct nv50_context *nv50) (so->stride[i] * nv50->state.prim_size); prims = MIN2(prims, limit); } + targ->stride = so->stride[i]; BCTX_REFN(nv50->bufctx_3d, SO, buf, WR); } if (prims != ~0) { diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c index 6324726..ca51ea1 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c @@ -736,9 +736,8 @@ nva0_draw_stream_output(struct nv50_context *nv50, BEGIN_NV04(push, NVA0_3D(DRAW_TFB_BASE), 1); PUSH_DATA (push, 0); BEGIN_NV04(push, NVA0_3D(DRAW_TFB_STRIDE), 1); - PUSH_DATA (push, 0); - BEGIN_NV04(push, NVA0_3D(DRAW_TFB_BYTES), 1); - nv50_query_pushbuf_submit(push, so->pq, 0x4); + PUSH_DATA (push, so->stride); + nv50_query_pushbuf_submit(push, NVA0_3D_DRAW_TFB_BYTES, so->pq, 0x4); BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1); PUSH_DATA (push, 0); commit ddf459492d04df02c9e3ed6471de0b21269694a4 Author: Oded Gabbay <oded.gab...@gmail.com> Date: Thu Sep 3 19:00:26 2015 +0300 llvmpipe: convert double to long long instead of unsigned long long round(val*dscale) produces a double result, as val and dscale are double. However, LLVMConstInt receives unsigned long long, so there is an implicit conversion from double to unsigned long long. This is an undefined behavior. Therefore, we need to first explicitly convert the round result to long long, and then let the compiler handle conversion from that to unsigned long long. This bug manifests itself in POWER, where all IMM values of -1 are being converted to 0 implicitly, causing a wrong LLVM IR output. Signed-off-by: Oded Gabbay <oded.gab...@gmail.com> CC: "10.6 11.0" <mesa-sta...@lists.freedesktop.org> Reviewed-by: Tom Stellard <thomas.stell...@amd.com> Reviewed-by: Roland Scheidegger <srol...@vmware.com> (cherry picked from commit 4f2290d1612569686284609059d29a85c9de67cf) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_const.c b/src/gallium/auxiliary/gallivm/lp_bld_const.c index 0f5a8f8..9cd7c55 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_const.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_const.c @@ -311,7 +311,7 @@ lp_build_const_elem(struct gallivm_state *gallivm, else { double dscale = lp_const_scale(type); - elem = LLVMConstInt(elem_type, round(val*dscale), 0); + elem = LLVMConstInt(elem_type, (long long) round(val*dscale), 0); } return elem; commit fcdaa190e558241607595d91cb1e6aa9e28746fc Author: Hans de Goede <hdego...@redhat.com> Date: Thu Sep 3 12:38:01 2015 +0200 nv30: Implement color resolve for msaa Note this is not ideal. Since the sifm can only do source sizes upto 1024x1024 we end up using the blitter on nv4x, which is not that fast. And on nv3x we end up using the cpu which is really slow. Cc: "10.6 11.0" <mesa-sta...@lists.freedesktop.org> Signed-off-by: Hans de Goede <hdego...@redhat.com> Reviewed-by: Ilia Mirkin <imir...@alum.mit.edu> (cherry picked from commit 3c6c4d4f298ec81fe57992790a68aaab2e573519) diff --git a/src/gallium/drivers/nouveau/nv30/nv30_miptree.c b/src/gallium/drivers/nouveau/nv30/nv30_miptree.c index 2276347..76bb8b8 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_miptree.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_miptree.c @@ -145,21 +145,18 @@ nv30_resource_copy_region(struct pipe_context *pipe, nv30_transfer_rect(nv30, NEAREST, &src, &dst); } -void -nv30_resource_resolve(struct pipe_context *pipe, - const struct pipe_resolve_info *info) +static void +nv30_resource_resolve(struct nv30_context *nv30, + const struct pipe_blit_info *info) { -#if 0 - struct nv30_context *nv30 = nv30_context(pipe); struct nv30_rect src, dst; - define_rect(info->src.res, 0, 0, info->src.x0, info->src.y0, - info->src.x1 - info->src.x0, info->src.y1 - info->src.y0, &src); - define_rect(info->dst.res, info->dst.level, 0, info->dst.x0, info->dst.y0, - info->dst.x1 - info->dst.x0, info->dst.y1 - info->dst.y0, &dst); + define_rect(info->src.resource, 0, info->src.box.z, info->src.box.x, + info->src.box.y, info->src.box.width, info->src.box.height, &src); + define_rect(info->dst.resource, 0, info->dst.box.z, info->dst.box.x, + info->dst.box.y, info->dst.box.width, info->dst.box.height, &dst); nv30_transfer_rect(nv30, BILINEAR, &src, &dst); -#endif } void @@ -173,7 +170,7 @@ nv30_blit(struct pipe_context *pipe, info.dst.resource->nr_samples <= 1 && !util_format_is_depth_or_stencil(info.src.resource->format) && !util_format_is_pure_integer(info.src.resource->format)) { - debug_printf("nv30: color resolve unimplemented\n"); + nv30_resource_resolve(nv30, blit_info); return; } diff --git a/src/gallium/drivers/nouveau/nv30/nv30_resource.h b/src/gallium/drivers/nouveau/nv30/nv30_resource.h index 8dac779..20d86b6 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_resource.h +++ b/src/gallium/drivers/nouveau/nv30/nv30_resource.h @@ -66,9 +66,6 @@ nv30_resource_copy_region(struct pipe_context *pipe, const struct pipe_box *src_box); void -nv30_resource_resolve(struct pipe_context *, const struct pipe_resolve_info *); - -void nv30_blit(struct pipe_context *pipe, const struct pipe_blit_info *blit_info); commit 0abcd9c8fcc74428299630fa96bc51551b952324 Author: Hans de Goede <hdego...@redhat.com> Date: Wed Aug 12 13:39:42 2015 +0200 nv30: Fix creation of scanout buffers Scanout buffers on nv30 must always be non-swizzled and have special width alignment constraints. These constrains have been taken from the xf86-video-nouveau src/nv_accel_common.c: nouveau_allocate_surface() function. nouveau_allocate_surface() applies these width constraints only when a tiled attribute is set, which it sets for all surfaces allocated via dri, and this "tiling" is not the same as swizzling, scanout surfaces must be linear / have a uniform_pitch or only complete garbage is shown. This commit fixes dri3 on nv30 showing a garbled display, with dri3 the scanout buffers are allocated by mesa, rather then by the ddx, and the wrong stride of these buffers was causing the garbled display. Cc: "10.6 11.0" <mesa-sta...@lists.freedesktop.org> Signed-off-by: Hans de Goede <hdego...@redhat.com> Reviewed-by: Ilia Mirkin <imir...@alum.mit.edu> (cherry picked from commit 3329703eb116a7ad73bc694356b43e014532240b) diff --git a/src/gallium/drivers/nouveau/nv30/nv30_miptree.c b/src/gallium/drivers/nouveau/nv30/nv30_miptree.c index c75b4b9..2276347 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_miptree.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_miptree.c @@ -28,6 +28,7 @@ #include "util/u_surface.h" #include "nv_m2mf.xml.h" +#include "nv_object.xml.h" #include "nv30/nv30_screen.h" #include "nv30/nv30_context.h" #include "nv30/nv30_resource.h" @@ -362,6 +363,7 @@ nv30_miptree_create(struct pipe_screen *pscreen, blocksz = util_format_get_blocksize(pt->format); if ((pt->target == PIPE_TEXTURE_RECT) || + (pt->bind & PIPE_BIND_SCANOUT) || !util_is_power_of_two(pt->width0) || !util_is_power_of_two(pt->height0) || !util_is_power_of_two(pt->depth0) || @@ -369,6 +371,14 @@ nv30_miptree_create(struct pipe_screen *pscreen, util_format_is_float(pt->format) || mt->ms_mode) { mt->uniform_pitch = util_format_get_nblocksx(pt->format, w) * blocksz; mt->uniform_pitch = align(mt->uniform_pitch, 64); + if (pt->bind & PIPE_BIND_SCANOUT) { + struct nv30_screen *screen = nv30_screen(pscreen); + int pitch_align = MAX2( + screen->eng3d->oclass >= NV40_3D_CLASS ? 1024 : 256, + /* round_down_pow2(mt->uniform_pitch / 4) */ + 1 << (util_last_bit(mt->uniform_pitch / 4) - 1)); + mt->uniform_pitch = align(mt->uniform_pitch, pitch_align); + } } if (!mt->uniform_pitch) commit 0b14d3586338f304e2816e3395cdcc940d6073c1 Author: Boyan Ding <boyan.j.d...@gmail.com> Date: Wed Aug 26 19:52:50 2015 +0800 vc4: Initialize pack field of qreg to 0 in qir_get_temp This avoids generation of undefined packing in qir and qpu instructions, fixing a lot of rendering errors. Fixes 8b36d107fdd (vc4: Pack the unorm-packing bits into a src MUL instruction when possible.) Cc: mesa-sta...@lists.freedesktop.org Signed-off-by: Boyan Ding <boyan.j.d...@gmail.com> Reviewed-by: Eric Anholt <e...@anholt.net> Reviewed-by: Emil Velikov <emil.l.veli...@gmail.com> (cherry picked from commit 48de40ce9c45de154965490843f9e50407970c26) diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c index 9d93071..073ba5f 100644 --- a/src/gallium/drivers/vc4/vc4_qir.c +++ b/src/gallium/drivers/vc4/vc4_qir.c @@ -314,6 +314,7 @@ qir_get_temp(struct vc4_compile *c) reg.file = QFILE_TEMP; reg.index = c->num_temps++; + reg.pack = 0; if (c->num_temps > c->defs_array_size) { uint32_t old_size = c->defs_array_size; commit a6710090af7bfda005388d9ee8f108b3aeb15e57 Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Fri Sep 4 19:02:28 2015 +0100 i965: Disallow PixelTransfer operations for tiled-memcpy TexImage/ReadPixels The tiled memcpy fast paths perform a simple blit (with only a couple of trivial pixel conversion routines) and do not accommodate PixelTransfer operations. Therefore if any are set, fallback to the regular routines. Note that PixelTransfer only applies to TexImage and ReadPixels, not to GetTexImage. Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> Cc: Jason Ekstrand <jason.ekstr...@intel.com> Cc: Kenneth Graunke <kenn...@whitecape.org> Reviewed-by: Jason Ekstrand <jason.ekstr...@intel.com> Reviewed-by: Kenneth Graunke <kenn...@whitecape.org> Cc: mesa-sta...@lists.freedesktop.org (cherry picked from commit 099f5b3a62be1919add02a4cb887841c9f0f2fe4) diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c b/src/mesa/drivers/dri/i965/intel_pixel_read.c index 3fe506e..eb366cd 100644 --- a/src/mesa/drivers/dri/i965/intel_pixel_read.c +++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c @@ -109,6 +109,10 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx, pack->Invert) return false; + /* Only a simple blit, no scale, bias or other mapping. */ + if (ctx->_ImageTransferState) + return false; + /* This renderbuffer can come from a texture. In this case, we impose * some of the same restrictions we have for textures and adjust for * miplevels. diff --git a/src/mesa/drivers/dri/i965/intel_tex_subimage.c b/src/mesa/drivers/dri/i965/intel_tex_subimage.c index 31e511f..44921e5 100644 --- a/src/mesa/drivers/dri/i965/intel_tex_subimage.c +++ b/src/mesa/drivers/dri/i965/intel_tex_subimage.c @@ -118,6 +118,10 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx, packing->Invert) return false; + /* Only a simple blit, no scale, bias or other mapping. */ + if (ctx->_ImageTransferState) + return false; + if (!intel_get_memcpy(texImage->TexFormat, format, type, &mem_copy, &cpp, INTEL_UPLOAD)) return false; commit 0c98ba7abffd91a127c7bc5cc0ceaa1f2d2106fd Author: Kenneth Graunke <kenn...@whitecape.org> Date: Wed Sep 2 16:39:27 2015 -0700 i965: Fix copy propagation type changes. commit 472ef9a02f2e5c5d0caa2809cb736a0f4f0d4693 introduced code to change the types of SEL and MOV instructions for moves that simply "copy bits around". It didn't account for type conversion moves, however. So it would happily turn this: mov(8) vgrf6:D, -vgrf5:D mov(8) vgrf7:F, vgrf6:UD into this: mov(8) vgrf6:D, -vgrf5:D mov(8) vgrf7:D, -vgrf5:D which erroneously drops the conversion to float. Cc: "11.0 10.6" <mesa-sta...@lists.freedesktop.org> Signed-off-by: Kenneth Graunke <kenn...@whitecape.org> Reviewed-by: Jason Ekstrand <jason.ekstr...@intel.com> Reviewed-by: Matt Turner <matts...@gmail.com> (cherry picked from commit 2ace64fd598816fd1be9877962734242fc27b87b) diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp index 5445ad5..230b0ca 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp @@ -279,6 +279,7 @@ static bool can_change_source_types(fs_inst *inst) { return !inst->src[0].abs && !inst->src[0].negate && + inst->dst.type == inst->src[0].type && (inst->opcode == BRW_OPCODE_MOV || (inst->opcode == BRW_OPCODE_SEL && inst->predicate != BRW_PREDICATE_NONE && commit eef8258a86b6df103cb31cfa6feeddc32ac4eb95 Author: Marek Olšák <marek.ol...@amd.com> Date: Tue Sep 1 04:14:43 2015 +0200 winsys/radeon: remove exported buffers from the cache Cc: 11.0 <mesa-sta...@lists.freedesktop.org> Reviewed-by: Alex Deucher <alexander.deuc...@amd.com> (cherry picked from commit efea7c3a3f91219db6e2fa3588388b6be4ecfa40) diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c index 3a9ac44..7c19876 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c @@ -1126,6 +1126,9 @@ static boolean radeon_winsys_bo_get_handle(struct pb_buffer *buffer, memset(&flink, 0, sizeof(flink)); + if ((void*)bo != (void*)buffer) + pb_cache_manager_remove_buffer(buffer); + if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) { if (!bo->flink_name) { flink.handle = bo->handle; commit 747cd2c27382f4cdd1cb9149447b677af340335e Author: Marek Olšák <marek.ol...@amd.com> Date: Tue Sep 1 04:14:33 2015 +0200 winsys/amdgpu: remove exported buffers from the cache Cc: 11.0 <mesa-sta...@lists.freedesktop.org> Reviewed-by: Alex Deucher <alexander.deuc...@amd.com> (cherry picked from commit 54964c77510b060806615c842692c0f393e807e6) diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c index 50c42e3..fe55dc3 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c @@ -684,6 +684,9 @@ static boolean amdgpu_bo_get_handle(struct pb_buffer *buffer, enum amdgpu_bo_handle_type type; int r; + if ((void*)bo != (void*)buffer) + pb_cache_manager_remove_buffer(buffer); + switch (whandle->type) { case DRM_API_HANDLE_TYPE_SHARED: type = amdgpu_bo_handle_type_gem_flink_name; commit ecdd69cd0509119adfd01c4fed512609963d0720 Author: Marek Olšák <marek.ol...@amd.com> Date: Tue Sep 1 04:07:54 2015 +0200 gallium/pb_bufmgr_cache: add a way to remove buffers from the cache explicitly This must be done before exporting a buffer as dmabuf fds, because we lose track of who is using it and can't trust the reference counter. Cc: 11.0 <mesa-sta...@lists.freedesktop.org> Reviewed-by: Alex Deucher <alexander.deuc...@amd.com> (cherry picked from commit 35d0f12797237cdd38e7fd2c39d3c19e875875ca) diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h index 147ce39..1638d96 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h @@ -166,6 +166,11 @@ pb_cache_manager_create(struct pb_manager *provider, unsigned bypass_usage, uint64_t maximum_cache_size); +/** + * Remove a buffer from the cache, but keep it alive. + */ +void +pb_cache_manager_remove_buffer(struct pb_buffer *buf); struct pb_fence_ops; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c index 3b35049..cc8ae84 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c @@ -104,18 +104,42 @@ pb_cache_manager(struct pb_manager *mgr) } +static void +_pb_cache_manager_remove_buffer_locked(struct pb_cache_buffer *buf) +{ + struct pb_cache_manager *mgr = buf->mgr; + + if (buf->head.next) { + LIST_DEL(&buf->head); + assert(mgr->numDelayed); + --mgr->numDelayed; + mgr->cache_size -= buf->base.size; + } + buf->mgr = NULL; +} + +void +pb_cache_manager_remove_buffer(struct pb_buffer *pb_buf) +{ + struct pb_cache_buffer *buf = (struct pb_cache_buffer*)pb_buf; + struct pb_cache_manager *mgr = buf->mgr; + + if (!mgr) + return; + + pipe_mutex_lock(mgr->mutex); + _pb_cache_manager_remove_buffer_locked(buf); + pipe_mutex_unlock(mgr->mutex); +} + /** * Actually destroy the buffer. */ static inline void _pb_cache_buffer_destroy(struct pb_cache_buffer *buf) { - struct pb_cache_manager *mgr = buf->mgr; - - LIST_DEL(&buf->head); - assert(mgr->numDelayed); - --mgr->numDelayed; - mgr->cache_size -= buf->base.size; + if (buf->mgr) + _pb_cache_manager_remove_buffer_locked(buf); assert(!pipe_is_referenced(&buf->base.reference)); pb_reference(&buf->buffer, NULL); FREE(buf); @@ -156,6 +180,12 @@ pb_cache_buffer_destroy(struct pb_buffer *_buf) struct pb_cache_buffer *buf = pb_cache_buffer(_buf); struct pb_cache_manager *mgr = buf->mgr; + if (!mgr) { + pb_reference(&buf->buffer, NULL); + FREE(buf); + return; + } + pipe_mutex_lock(mgr->mutex); assert(!pipe_is_referenced(&buf->base.reference)); commit 74fa10693227c08d227957e9544f60ee68b5762c Author: Kenneth Graunke <kenn...@whitecape.org> Date: Wed Sep 2 10:42:57 2015 -0700 glsl: Handle attribute aliasing in attribute storage limit check. In various versions of OpenGL and GLSL, it's possible to declare multiple VS input variables with aliasing attribute locations. So, when computing the storage requirements for vertex attributes, we can't simply add up the sizes. Instead, we need to look at the enabled slots. This patch begins tracking which attributes are double types that are larger than 128-bits (i.e. take up two vec4 slots). We then count normal attributes once, and count the double-size attributes a second time. Fixes deQP functional.attribute_location.bind_aliasing.max_cond_* tests on i965, which regressed with commit ad208d975a6d3aebe14f7c2c16039ee20. No Piglit changes on llvmpipe (which actually supports dvecs). Cc: "10.6 11.0" <mesa-sta...@lists.freedesktop.org> Tested-by: Mark Janes <mark.a.ja...@intel.com> Reviewed-by: Ilia Mirkin <imir...@alum.mit.edu> Reviewed-by: Dave Airlie <airl...@redhat.com> Signed-off-by: Kenneth Graunke <kenn...@whitecape.org> (cherry picked from commit c3294ca5a13cf3f0eb3d9907a46ff8ce4bc2963b) diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index 47f7d25..934062f 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -2339,6 +2339,7 @@ assign_attribute_or_color_locations(gl_shader_program *prog, */ unsigned used_locations = (max_index >= 32) ? ~0 : ~((1 << max_index) - 1); + unsigned double_storage_locations = 0; assert((target_index == MESA_SHADER_VERTEX) || (target_index == MESA_SHADER_FRAGMENT)); @@ -2452,34 +2453,6 @@ assign_attribute_or_color_locations(gl_shader_program *prog, const unsigned slots = var->type->count_attribute_slots(); - /* From GL4.5 core spec, section 11.1.1 (Vertex Attributes): - * - * "A program with more than the value of MAX_VERTEX_ATTRIBS active - * attribute variables may fail to link, unless device-dependent - * optimizations are able to make the program fit within available - * hardware resources. For the purposes of this test, attribute variables - * of the type dvec3, dvec4, dmat2x3, dmat2x4, dmat3, dmat3x4, dmat4x3, - * and dmat4 may count as consuming twice as many attributes as equivalent - * single-precision types. While these types use the same number of - * generic attributes as their single-precision equivalents, - * implementations are permitted to consume two single-precision vectors - * of internal storage for each three- or four-component double-precision - * vector." - * Until someone has a good reason in Mesa, enforce that now. - */ - if (target_index == MESA_SHADER_VERTEX) { - total_attribs_size += slots; - if (var->type->without_array() == glsl_type::dvec3_type || - var->type->without_array() == glsl_type::dvec4_type || - var->type->without_array() == glsl_type::dmat2x3_type || - var->type->without_array() == glsl_type::dmat2x4_type || - var->type->without_array() == glsl_type::dmat3_type || - var->type->without_array() == glsl_type::dmat3x4_type || - var->type->without_array() == glsl_type::dmat4x3_type || - var->type->without_array() == glsl_type::dmat4_type) - total_attribs_size += slots; - } - /* If the variable is not a built-in and has a location statically * assigned in the shader (presumably via a layout qualifier), make sure * that it doesn't collide with other assigned locations. Otherwise, @@ -2594,6 +2567,38 @@ assign_attribute_or_color_locations(gl_shader_program *prog, } used_locations |= (use_mask << attr); + + /* From the GL 4.5 core spec, section 11.1.1 (Vertex Attributes): + * + * "A program with more than the value of MAX_VERTEX_ATTRIBS + * active attribute variables may fail to link, unless + * device-dependent optimizations are able to make the program + * fit within available hardware resources. For the purposes + * of this test, attribute variables of the type dvec3, dvec4, + * dmat2x3, dmat2x4, dmat3, dmat3x4, dmat4x3, and dmat4 may + * count as consuming twice as many attributes as equivalent + * single-precision types. While these types use the same number + * of generic attributes as their single-precision equivalents, + * implementations are permitted to consume two single-precision + * vectors of internal storage for each three- or four-component + * double-precision vector." + * + * Mark this attribute slot as taking up twice as much space + * so we can count it properly against limits. According to + * issue (3) of the GL_ARB_vertex_attrib_64bit behavior, this + * is optional behavior, but it seems preferable. + */ + const glsl_type *type = var->type->without_array(); + if (type == glsl_type::dvec3_type || + type == glsl_type::dvec4_type || + type == glsl_type::dmat2x3_type || + type == glsl_type::dmat2x4_type || + type == glsl_type::dmat3_type || + type == glsl_type::dmat3x4_type || + type == glsl_type::dmat4x3_type || + type == glsl_type::dmat4_type) { + double_storage_locations |= (use_mask << attr); + } } continue; @@ -2605,6 +2610,9 @@ assign_attribute_or_color_locations(gl_shader_program *prog, } if (target_index == MESA_SHADER_VERTEX) { + unsigned total_attribs_size = + _mesa_bitcount(used_locations & ((1 << max_index) - 1)) + + _mesa_bitcount(double_storage_locations); if (total_attribs_size > max_index) { linker_error(prog, "attempt to use %d vertex attribute slots only %d available ", commit 1153420017873011a91367a4fd81cad6a3878023 Author: Ian Romanick <ian.d.roman...@intel.com> Date: Mon Aug 31 18:44:42 2015 -0700 mesa: Don't allow wrong type setters for matrix uniforms Previously we would allow glUniformMatrix4fv on a dmat4 and glUniformMatrix4dv on a mat4. Both are illegal. That later also overwrites the storage for the mat4 and causes bad things to happen. Should fix the (new) arb_gpu_shader_fp64-wrong-type-setter piglit test. Signed-off-by: Ian Romanick <ian.d.roman...@intel.com> Reviewed-by: Timothy Arceri <t_arc...@yahoo.com.au> Cc: Dave Airlie <airl...@redhat.com> Cc: "10.6 11.0" <mesa-sta...@lists.freedesktop.org> (cherry picked from commit 7237c937af3b495191bee2f7240901e3a9daf1fb) diff --git a/src/mesa/main/uniform_query.cpp b/src/mesa/main/uniform_query.cpp index fc2b5f5..0bee594 100644 --- a/src/mesa/main/uniform_query.cpp +++ b/src/mesa/main/uniform_query.cpp @@ -919,6 +919,31 @@ _mesa_uniform_matrix(struct gl_context *ctx, struct gl_shader_program *shProg, } } + /* Section 2.11.7 (Uniform Variables) of the OpenGL 4.2 Core Profile spec + * says: + * + * "If any of the following conditions occur, an INVALID_OPERATION + * error is generated by the Uniform* commands, and no uniform values + * are changed: + * + * ... + * + * - if the uniform declared in the shader is not of type boolean and