On Sun, 2015-10-18 at 21:31 -0700, Kristian Høgsberg Kristensen wrote: > I've been giving the SSBO functionality a closer look and I found a > few problems, each addressed by a patch in this series. The commit > messages describe the issues and the fixes, but the net effect takes > my vertex shader case from this: > > add(8) g7<1>D g2<0,1,0>D 16D { align1 1Q > compacted }; > mov(1) g9<1>UD 0x00000000UD { align1 > WE_all compacted }; > mov(8) g16<1>UD 0D { align1 > WE_all 1Q }; > mov(1) g11<1>UD 0x00000000UD { align1 > WE_all compacted }; > mov(8) g18<1>UD 0D { align1 > WE_all 1Q }; > mov(1) g13<1>UD 0x00000000UD { align1 > WE_all compacted }; > mov(8) g20<1>UD 0D { align1 > WE_all 1Q }; > mov(1) g15<1>UD 0x00000000UD { align1 > WE_all compacted }; > mov(8) g29<1>UD 0D { align1 > WE_all 1Q }; > mov(8) g32<1>UD g2<0,1,0>D { align1 1Q }; > mov(1) g22<1>UD 0x00000000UD { align1 > WE_all compacted }; > mov(8) g31<1>UD 0D { align1 > WE_all 1Q }; > mov(1) g24<1>UD 0x00000000UD { align1 > WE_all compacted }; > mov(8) g33<1>UD 0D { align1 > WE_all 1Q }; > mov(1) g26<1>UD 0x00000000UD { align1 > WE_all compacted }; > mov(8) g35<1>UD 0D { align1 > WE_all 1Q }; > mov(1) g28<1>UD 0x00000000UD { align1 > WE_all compacted }; > mov(8) g37<1>UD 0D { align1 > WE_all 1Q }; > mov(8) g119<1>UD g1<8,8,1>UD { align1 > WE_all 1Q compacted }; > mov(8) g17<1>UD g7<8,8,1>D { align1 1Q }; > mov(1) g16.7<1>UD 65535D { align1 > WE_all }; > mov(1) g18.7<1>UD 65535D { align1 > WE_all }; > mov(1) g20.7<1>UD 65535D { align1 > WE_all }; > mov(1) g29.7<1>UD 65535D { align1 > WE_all }; > add(8) g34<1>UD g32<8,8,1>UD 0x00000004UD { align1 1Q > compacted }; > mov(1) g31.7<1>UD 65535D { align1 > WE_all }; > mov(1) g33.7<1>UD 65535D { align1 > WE_all }; > mov(1) g35.7<1>UD 65535D { align1 > WE_all }; > mov(1) g37.7<1>UD 65535D { align1 > WE_all }; > add(8) g19<1>UD g17<8,8,1>UD 0x00000004UD { align1 1Q > compacted }; > and(1) a0<1>UD g9<0,1,0>UD 0x000000ffUD { align1 > WE_all compacted }; > or(1) a0<1>UD a0<0,1,0>UD 0x04186e00UD { align1 > WE_all }; > send(8) g10<1>UD g16<8,8,1>UD a0<0,1,0>UD > dp data 1 indirect { > align1 1Q compacted }; > add(8) g36<1>UD g34<8,8,1>UD 0x00000004UD { align1 1Q > compacted }; > and(1) a0<1>UD g22<0,1,0>UD 0x000000ffUD { align1 > WE_all compacted }; > or(1) a0<1>UD a0<0,1,0>UD 0x04186e00UD { align1 > WE_all }; > send(8) g124<1>UD g31<8,8,1>UD a0<0,1,0>UD > dp data 1 indirect { > align1 1Q compacted }; > and(1) a0<1>UD g24<0,1,0>UD 0x000000ffUD { align1 > WE_all compacted }; > or(1) a0<1>UD a0<0,1,0>UD 0x04186e00UD { align1 > WE_all }; > send(8) g125<1>UD g33<8,8,1>UD a0<0,1,0>UD > dp data 1 indirect { > align1 1Q compacted }; > and(1) a0<1>UD g11<0,1,0>UD 0x000000ffUD { align1 > WE_all compacted }; > or(1) a0<1>UD a0<0,1,0>UD 0x04186e00UD { align1 > WE_all }; > send(8) g12<1>UD g18<8,8,1>UD a0<0,1,0>UD > dp data 1 indirect { > align1 1Q compacted }; > add(8) g21<1>UD g19<8,8,1>UD 0x00000004UD { align1 1Q > compacted }; > add(8) g120<1>F g3<8,8,1>F g10<8,8,1>F { align1 1Q > compacted }; > and(1) a0<1>UD g26<0,1,0>UD 0x000000ffUD { align1 > WE_all compacted }; > or(1) a0<1>UD a0<0,1,0>UD 0x04186e00UD { align1 > WE_all }; > send(8) g126<1>UD g35<8,8,1>UD a0<0,1,0>UD > dp data 1 indirect { > align1 1Q compacted }; > add(8) g38<1>UD g36<8,8,1>UD 0x00000004UD { align1 1Q > compacted }; > add(8) g121<1>F g4<8,8,1>F g12<8,8,1>F { align1 1Q > compacted }; > and(1) a0<1>UD g13<0,1,0>UD 0x000000ffUD { align1 > WE_all compacted }; > or(1) a0<1>UD a0<0,1,0>UD 0x04186e00UD { align1 > WE_all }; > send(8) g14<1>UD g20<8,8,1>UD a0<0,1,0>UD > dp data 1 indirect { > align1 1Q compacted }; > add(8) g30<1>UD g21<8,8,1>UD 0x00000004UD { align1 1Q > compacted }; > and(1) a0<1>UD g28<0,1,0>UD 0x000000ffUD { align1 > WE_all compacted }; > or(1) a0<1>UD a0<0,1,0>UD 0x04186e00UD { align1 > WE_all }; > send(8) g127<1>UD g37<8,8,1>UD a0<0,1,0>UD > dp data 1 indirect { > align1 1Q compacted }; > add(8) g122<1>F g5<8,8,1>F g14<8,8,1>F { align1 1Q > compacted }; > and(1) a0<1>UD g15<0,1,0>UD 0x000000ffUD { align1 > WE_all compacted }; > or(1) a0<1>UD a0<0,1,0>UD 0x04186e00UD { align1 > WE_all }; > send(8) g16<1>UD g29<8,8,1>UD a0<0,1,0>UD > dp data 1 indirect { > align1 1Q compacted }; > add(8) g123<1>F g6<8,8,1>F g16<8,8,1>F { align1 1Q > compacted }; > send(8) null g119<8,8,1>F > urb 1 SIMD8 write mlen 9 rlen 0 { > align1 1Q EOT }; > > to this: > > add(8) g7<1>D g2<0,1,0>D 16D { align1 1Q > compacted }; > mov(8) g9<1>UD g2<0,1,0>D { align1 1Q }; > mov(8) g119<1>UD g1<8,8,1>UD { align1 > WE_all 1Q compacted }; > mov(8) g8<1>UD g7<8,8,1>D { align1 1Q }; > send(8) g124<1>UD g9<8,8,1>UD > dp data 1 ( untyped surface read, Surface = 0, > SIMD8, Mask = 0x0) mlen 1 rlen 4 { align1 1Q }; > send(8) g7<1>UD g8<8,8,1>UD > dp data 1 ( untyped surface read, Surface = 0, > SIMD8, Mask = 0x0) mlen 1 rlen 4 { align1 1Q }; > add(8) g120<1>F g3<8,8,1>F g7<8,8,1>F { align1 1Q > compacted }; > add(8) g121<1>F g4<8,8,1>F g8<8,8,1>F { align1 1Q > compacted }; > add(8) g122<1>F g5<8,8,1>F g9<8,8,1>F { align1 1Q > compacted }; > add(8) g123<1>F g6<8,8,1>F g10<8,8,1>F { align1 1Q > compacted }; > send(8) null g119<8,8,1>F > urb 1 SIMD8 write mlen 9 rlen 0 { > align1 1Q EOT };
thanks Kristian, that looks like a big win! I granted Rb to patches 1 and 3. Patch 2 looks ok to me as well (I only had a minor comment) but I am not familiar enough with that code so I'd rather let Curro give the Rb for that one. Iago > Kristian Høgsberg Kristensen (3): > i965: Don't use message headers for typed/untyped reads > i965/fs: Make emit_uniformize a no-op for immediates > i965/fs: Read all components of a SSBO field with one send > > src/mesa/drivers/dri/i965/brw_eu_emit.c | 3 +-- > src/mesa/drivers/dri/i965/brw_fs.cpp | 4 ++-- > src/mesa/drivers/dri/i965/brw_fs_builder.h | 3 +++ > src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 25 +++++++------------------ > 4 files changed, 13 insertions(+), 22 deletions(-) > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev