On Thu, Aug 14, 2014 at 12:21 PM, Roland Scheidegger <srol...@vmware.com> wrote: > Am 14.08.2014 16:39, schrieb Ilia Mirkin: >> I guess a question is whether we should even bother with the fine >> version at all then? Just map everything to DDX/DDY... Although I >> guess if llvmpipe does the coarse version sometimes, at least the fine >> version is warranted. > I think it's nice to have both versions. llvmpipe only does the coarse > version for its internal use.
Yeah, I also just found out that r600 evergreen+ can do either one, and at present does the coarse one. I'm going to push these out tonight unless there are further comments. (Pretty sure all the patches got R-b'd... if not, I'll obviously wait on those) > If a shader would do a ddx and ddy and then use the values for a texture > instruction with explicit derivatives, some slower path is used for > sampling (which can handle different mip levels in a quad) (though this > is a lot subject currently to debug vars such as no_quad_lod). The > problem is that even if you'd do a coarse_ddx, we still would fall back > to that slower path anyway, because (unlike intel hw where it really > matters if the actual lod values are different) we won't detect that > there is in fact just one lod per quad, so right now there would not > really be a benefit. Obviously, if you do the derivatives calculations > as part of the sampling itself, this is not a problem. FWIW the slow > path isn't actually all THAT more complicated than the per-quad lod path > - strides, mip image offsets etc. need to be looked up per pixel rather > than per quad, plus some slowness comes from the fact that stupid > sse/avx (only avx2) doesn't have true vector shift... There's also the > fact that the tex filter may be different too per pixel (with different > min/mag filter) though since we do (in some cases at least with avx) do > texture sampling for multiple quads at once this is something which > needs to be handled in any case. I suspect hw being slower with > different effective lods per pixel has similar reasons - there's just > more work to be done. > > Roland > > > >> >> On Thu, Aug 14, 2014 at 10:12 AM, Roland Scheidegger <srol...@vmware.com> >> wrote: >>> Reviewed-by: Roland Scheidegger <srol...@vmware.com> >>> >>> llvmpipe also already does the fine version. A coarse version (which we >>> indeed do when used implicitly for sampling though with some other >>> changes) might be minimally simpler though not even sure (might save a >>> shuffle instruction somewhere), but probably not worth it (plus, d3d10 >>> sm4 had deriv_rtx and sm5 deriv_rtx_coarse/deriv_rtx_fine but the sm4 >>> versions correspond to the fine versions so this was required). >>> >>> Roland >>> >>> Am 14.08.2014 06:52, schrieb Ilia Mirkin: >>>> Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> >>>> --- >>>> src/gallium/auxiliary/tgsi/tgsi_info.c | 3 +++ >>>> src/gallium/auxiliary/tgsi/tgsi_util.c | 2 ++ >>>> src/gallium/docs/source/screen.rst | 2 ++ >>>> src/gallium/docs/source/tgsi.rst | 12 ++++++++++-- >>>> src/gallium/drivers/freedreno/freedreno_screen.c | 1 + >>>> src/gallium/drivers/i915/i915_screen.c | 1 + >>>> src/gallium/drivers/ilo/ilo_screen.c | 1 + >>>> src/gallium/drivers/llvmpipe/lp_screen.c | 1 + >>>> src/gallium/drivers/nouveau/nv30/nv30_screen.c | 1 + >>>> src/gallium/drivers/nouveau/nv50/nv50_screen.c | 1 + >>>> src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 1 + >>>> src/gallium/drivers/r300/r300_screen.c | 1 + >>>> src/gallium/drivers/r600/r600_pipe.c | 1 + >>>> src/gallium/drivers/radeonsi/si_pipe.c | 1 + >>>> src/gallium/drivers/softpipe/sp_screen.c | 1 + >>>> src/gallium/drivers/svga/svga_screen.c | 1 + >>>> src/gallium/drivers/vc4/vc4_screen.c | 1 + >>>> src/gallium/include/pipe/p_defines.h | 1 + >>>> src/gallium/include/pipe/p_shader_tokens.h | 5 ++++- >>>> 19 files changed, 35 insertions(+), 3 deletions(-) >>>> >>>> diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c >>>> b/src/gallium/auxiliary/tgsi/tgsi_info.c >>>> index e24348f..35f9747 100644 >>>> --- a/src/gallium/auxiliary/tgsi/tgsi_info.c >>>> +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c >>>> @@ -235,6 +235,9 @@ static const struct tgsi_opcode_info >>>> opcode_info[TGSI_OPCODE_LAST] = >>>> { 1, 1, 0, 0, 0, 0, OTHR, "INTERP_CENTROID", >>>> TGSI_OPCODE_INTERP_CENTROID }, >>>> { 1, 2, 0, 0, 0, 0, OTHR, "INTERP_SAMPLE", TGSI_OPCODE_INTERP_SAMPLE }, >>>> { 1, 2, 0, 0, 0, 0, OTHR, "INTERP_OFFSET", TGSI_OPCODE_INTERP_OFFSET }, >>>> + >>>> + { 1, 1, 0, 0, 0, 0, COMP, "DDX_FINE", TGSI_OPCODE_DDX_FINE }, >>>> + { 1, 1, 0, 0, 0, 0, COMP, "DDY_FINE", TGSI_OPCODE_DDY_FINE }, >>>> }; >>>> >>>> const struct tgsi_opcode_info * >>>> diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c >>>> b/src/gallium/auxiliary/tgsi/tgsi_util.c >>>> index e48159c..e1cba95 100644 >>>> --- a/src/gallium/auxiliary/tgsi/tgsi_util.c >>>> +++ b/src/gallium/auxiliary/tgsi/tgsi_util.c >>>> @@ -245,6 +245,8 @@ tgsi_util_get_inst_usage_mask(const struct >>>> tgsi_full_instruction *inst, >>>> case TGSI_OPCODE_USNE: >>>> case TGSI_OPCODE_IMUL_HI: >>>> case TGSI_OPCODE_UMUL_HI: >>>> + case TGSI_OPCODE_DDX_FINE: >>>> + case TGSI_OPCODE_DDY_FINE: >>>> /* Channel-wise operations */ >>>> read_mask = write_mask; >>>> break; >>>> diff --git a/src/gallium/docs/source/screen.rst >>>> b/src/gallium/docs/source/screen.rst >>>> index 814e3ae..6fecc15 100644 >>>> --- a/src/gallium/docs/source/screen.rst >>>> +++ b/src/gallium/docs/source/screen.rst >>>> @@ -213,6 +213,8 @@ The integer capabilities: >>>> * ``PIPE_CAP_DRAW_INDIRECT``: Whether the driver supports taking draw >>>> arguments >>>> { count, instance_count, start, index_bias } from a PIPE_BUFFER >>>> resource. >>>> See pipe_draw_info. >>>> +* ``PIPE_CAP_TGSI_FS_FINE_DERIVATIVE``: Whether the fragment shader >>>> supports >>>> + the FINE versions of DDX/DDY. >>>> >>>> >>>> .. _pipe_capf: >>>> diff --git a/src/gallium/docs/source/tgsi.rst >>>> b/src/gallium/docs/source/tgsi.rst >>>> index ac0ea54..7d5918f 100644 >>>> --- a/src/gallium/docs/source/tgsi.rst >>>> +++ b/src/gallium/docs/source/tgsi.rst >>>> @@ -433,7 +433,11 @@ This instruction replicates its result. >>>> dst = \cos{src.x} >>>> >>>> >>>> -.. opcode:: DDX - Derivative Relative To X >>>> +.. opcode:: DDX, DDX_FINE - Derivative Relative To X >>>> + >>>> +The fine variant is only used when ``PIPE_CAP_TGSI_FS_FINE_DERIVATIVE`` is >>>> +advertised. When it is, the fine version guarantees one derivative per row >>>> +while DDX is allowed to be the same for the entire 2x2 quad. >>>> >>>> .. math:: >>>> >>>> @@ -446,7 +450,11 @@ This instruction replicates its result. >>>> dst.w = partialx(src.w) >>>> >>>> >>>> -.. opcode:: DDY - Derivative Relative To Y >>>> +.. opcode:: DDY, DDY_FINE - Derivative Relative To Y >>>> + >>>> +The fine variant is only used when ``PIPE_CAP_TGSI_FS_FINE_DERIVATIVE`` is >>>> +advertised. When it is, the fine version guarantees one derivative per >>>> column >>>> +while DDY is allowed to be the same for the entire 2x2 quad. >>>> >>>> .. math:: >>>> >>>> diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c >>>> b/src/gallium/drivers/freedreno/freedreno_screen.c >>>> index de69b14..b156d8b 100644 >>>> --- a/src/gallium/drivers/freedreno/freedreno_screen.c >>>> +++ b/src/gallium/drivers/freedreno/freedreno_screen.c >>>> @@ -216,6 +216,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum >>>> pipe_cap param) >>>> case PIPE_CAP_TEXTURE_GATHER_OFFSETS: >>>> case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION: >>>> case PIPE_CAP_DRAW_INDIRECT: >>>> + case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: >>>> return 0; >>>> >>>> /* Stream output. */ >>>> diff --git a/src/gallium/drivers/i915/i915_screen.c >>>> b/src/gallium/drivers/i915/i915_screen.c >>>> index ca3dd4a..53d5e75 100644 >>>> --- a/src/gallium/drivers/i915/i915_screen.c >>>> +++ b/src/gallium/drivers/i915/i915_screen.c >>>> @@ -231,6 +231,7 @@ i915_get_param(struct pipe_screen *screen, enum >>>> pipe_cap cap) >>>> case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT: >>>> case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT: >>>> case PIPE_CAP_DRAW_INDIRECT: >>>> + case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: >>>> return 0; >>>> >>>> case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT: >>>> diff --git a/src/gallium/drivers/ilo/ilo_screen.c >>>> b/src/gallium/drivers/ilo/ilo_screen.c >>>> index bd6d8dd..991d2d0 100644 >>>> --- a/src/gallium/drivers/ilo/ilo_screen.c >>>> +++ b/src/gallium/drivers/ilo/ilo_screen.c >>>> @@ -433,6 +433,7 @@ ilo_get_param(struct pipe_screen *screen, enum >>>> pipe_cap param) >>>> case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION: >>>> case PIPE_CAP_MAX_VERTEX_STREAMS: >>>> case PIPE_CAP_DRAW_INDIRECT: >>>> + case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: >>>> return 0; >>>> >>>> default: >>>> diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c >>>> b/src/gallium/drivers/llvmpipe/lp_screen.c >>>> index 347b1af..f4f3257 100644 >>>> --- a/src/gallium/drivers/llvmpipe/lp_screen.c >>>> +++ b/src/gallium/drivers/llvmpipe/lp_screen.c >>>> @@ -248,6 +248,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum >>>> pipe_cap param) >>>> case PIPE_CAP_SAMPLE_SHADING: >>>> case PIPE_CAP_TEXTURE_GATHER_OFFSETS: >>>> case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION: >>>> + case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: >>>> return 0; >>>> case PIPE_CAP_FAKE_SW_MSAA: >>>> return 1; >>>> diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c >>>> b/src/gallium/drivers/nouveau/nv30/nv30_screen.c >>>> index 2860188..4766955 100644 >>>> --- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c >>>> +++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c >>>> @@ -148,6 +148,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, >>>> enum pipe_cap param) >>>> case PIPE_CAP_USER_VERTEX_BUFFERS: >>>> case PIPE_CAP_COMPUTE: >>>> case PIPE_CAP_DRAW_INDIRECT: >>>> + case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: >>>> return 0; >>>> } >>>> >>>> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c >>>> b/src/gallium/drivers/nouveau/nv50/nv50_screen.c >>>> index 7b1b112..34cca3d 100644 >>>> --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c >>>> +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c >>>> @@ -200,6 +200,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, >>>> enum pipe_cap param) >>>> case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION: >>>> case PIPE_CAP_COMPUTE: >>>> case PIPE_CAP_DRAW_INDIRECT: >>>> + case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: >>>> return 0; >>>> } >>>> >>>> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c >>>> b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c >>>> index d372a0f..17aee63 100644 >>>> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c >>>> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c >>>> @@ -184,6 +184,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, >>>> enum pipe_cap param) >>>> case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT: >>>> case PIPE_CAP_FAKE_SW_MSAA: >>>> case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION: >>>> + case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: >>>> return 0; >>>> } >>>> >>>> diff --git a/src/gallium/drivers/r300/r300_screen.c >>>> b/src/gallium/drivers/r300/r300_screen.c >>>> index 4e46f77..ad599e9 100644 >>>> --- a/src/gallium/drivers/r300/r300_screen.c >>>> +++ b/src/gallium/drivers/r300/r300_screen.c >>>> @@ -178,6 +178,7 @@ static int r300_get_param(struct pipe_screen* pscreen, >>>> enum pipe_cap param) >>>> case PIPE_CAP_TEXTURE_GATHER_OFFSETS: >>>> case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION: >>>> case PIPE_CAP_DRAW_INDIRECT: >>>> + case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: >>>> return 0; >>>> >>>> /* SWTCL-only features. */ >>>> diff --git a/src/gallium/drivers/r600/r600_pipe.c >>>> b/src/gallium/drivers/r600/r600_pipe.c >>>> index 20d9f95..8a5ba79 100644 >>>> --- a/src/gallium/drivers/r600/r600_pipe.c >>>> +++ b/src/gallium/drivers/r600/r600_pipe.c >>>> @@ -319,6 +319,7 @@ static int r600_get_param(struct pipe_screen* pscreen, >>>> enum pipe_cap param) >>>> case PIPE_CAP_SAMPLE_SHADING: >>>> case PIPE_CAP_TEXTURE_GATHER_OFFSETS: >>>> case PIPE_CAP_DRAW_INDIRECT: >>>> + case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: >>>> return 0; >>>> >>>> /* Stream output. */ >>>> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c >>>> b/src/gallium/drivers/radeonsi/si_pipe.c >>>> index 879387f..0f10f3b 100644 >>>> --- a/src/gallium/drivers/radeonsi/si_pipe.c >>>> +++ b/src/gallium/drivers/radeonsi/si_pipe.c >>>> @@ -254,6 +254,7 @@ static int si_get_param(struct pipe_screen* pscreen, >>>> enum pipe_cap param) >>>> case PIPE_CAP_FAKE_SW_MSAA: >>>> case PIPE_CAP_TEXTURE_GATHER_OFFSETS: >>>> case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION: >>>> + case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: >>>> return 0; >>>> >>>> case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: >>>> diff --git a/src/gallium/drivers/softpipe/sp_screen.c >>>> b/src/gallium/drivers/softpipe/sp_screen.c >>>> index 7be39d4..5e2640d 100644 >>>> --- a/src/gallium/drivers/softpipe/sp_screen.c >>>> +++ b/src/gallium/drivers/softpipe/sp_screen.c >>>> @@ -194,6 +194,7 @@ softpipe_get_param(struct pipe_screen *screen, enum >>>> pipe_cap param) >>>> case PIPE_CAP_SAMPLE_SHADING: >>>> case PIPE_CAP_TEXTURE_GATHER_OFFSETS: >>>> case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION: >>>> + case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: >>>> return 0; >>>> case PIPE_CAP_FAKE_SW_MSAA: >>>> return 1; >>>> diff --git a/src/gallium/drivers/svga/svga_screen.c >>>> b/src/gallium/drivers/svga/svga_screen.c >>>> index 2fcc75c..d140f56 100644 >>>> --- a/src/gallium/drivers/svga/svga_screen.c >>>> +++ b/src/gallium/drivers/svga/svga_screen.c >>>> @@ -278,6 +278,7 @@ svga_get_param(struct pipe_screen *screen, enum >>>> pipe_cap param) >>>> case PIPE_CAP_TEXTURE_GATHER_OFFSETS: >>>> case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION: >>>> case PIPE_CAP_DRAW_INDIRECT: >>>> + case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: >>>> return 0; >>>> case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT: >>>> return 64; >>>> diff --git a/src/gallium/drivers/vc4/vc4_screen.c >>>> b/src/gallium/drivers/vc4/vc4_screen.c >>>> index c044c8e..7e59613 100644 >>>> --- a/src/gallium/drivers/vc4/vc4_screen.c >>>> +++ b/src/gallium/drivers/vc4/vc4_screen.c >>>> @@ -157,6 +157,7 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum >>>> pipe_cap param) >>>> case PIPE_CAP_MAX_TEXEL_OFFSET: >>>> case PIPE_CAP_MAX_VERTEX_STREAMS: >>>> case PIPE_CAP_DRAW_INDIRECT: >>>> + case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: >>>> return 0; >>>> >>>> /* Stream output. */ >>>> diff --git a/src/gallium/include/pipe/p_defines.h >>>> b/src/gallium/include/pipe/p_defines.h >>>> index 7a10d98..53d5d4b 100644 >>>> --- a/src/gallium/include/pipe/p_defines.h >>>> +++ b/src/gallium/include/pipe/p_defines.h >>>> @@ -562,6 +562,7 @@ enum pipe_cap { >>>> PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION = 99, >>>> PIPE_CAP_MAX_VERTEX_STREAMS = 100, >>>> PIPE_CAP_DRAW_INDIRECT = 101, >>>> + PIPE_CAP_TGSI_FS_FINE_DERIVATIVE = 102, >>>> }; >>>> >>>> #define PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50 (1 << 0) >>>> diff --git a/src/gallium/include/pipe/p_shader_tokens.h >>>> b/src/gallium/include/pipe/p_shader_tokens.h >>>> index 0d3ad6a..2921f81 100644 >>>> --- a/src/gallium/include/pipe/p_shader_tokens.h >>>> +++ b/src/gallium/include/pipe/p_shader_tokens.h >>>> @@ -481,7 +481,10 @@ struct tgsi_property_data { >>>> #define TGSI_OPCODE_INTERP_SAMPLE 193 >>>> #define TGSI_OPCODE_INTERP_OFFSET 194 >>>> >>>> -#define TGSI_OPCODE_LAST 195 >>>> +#define TGSI_OPCODE_DDX_FINE 195 >>>> +#define TGSI_OPCODE_DDY_FINE 196 >>>> + >>>> +#define TGSI_OPCODE_LAST 197 >>>> >>>> #define TGSI_SAT_NONE 0 /* do not saturate */ >>>> #define TGSI_SAT_ZERO_ONE 1 /* clamp to [0,1] */ >>>> >>> > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev