On Tue, 2014-01-07 at 21:49 +0100, Marek Olšák wrote: > FYI, Evergreen has dedicated instructions for both MAD and FMA. FMA > seems to be available on DX11 chips only.
FWIW, not all evergreen chips support FMA, only high-end chips that support FP64 (I guess cypress only), according to the isa docs: > Instructions > FMA > Description > Fused single-precision multiply-add. Only for double-precision parts. > dst = src0 * src1 + src2 > Vadim > > Marek > > On Tue, Jan 7, 2014 at 8:20 PM, Roland Scheidegger <srol...@vmware.com> wrote: > > Yes that is certainly related. I'm actually not entirely sure what is > > allowed in glsl by default as OpenGL seems to have some lax rules > > regarding precision in any case (float calculations not required but > > allowed to use denorms, at least earlier versions weren't required to > > support Infs neither and so on). > > It is quite possible the "MAD" we were always using would have been > > allowed to really do fma (at least with OpenGL), unless the "precise" > > qualifier was used (which isn't supported yet?). > > TGSI also isn't really watertight about such issues neither (that is if > > you use it with hw such as r300 then you certainly don't expect ieee754 > > rules to be followed but if you've got a d3d10-capable backend then you > > are expected to follow rules specified there which are _mostly_ > > ieee754-2008). > > So I'm not really sure if TGSI MAD should be allowed to do either > > rounding or not, but someday it should be figured out and spelled out > > explicitly in docs. > > > > Roland > > > > > > Am 07.01.2014 19:24, schrieb Maxence Le Doré: > >> I forgot the link : > >> > >> https://urldefense.proofpoint.com/v1/url?u=http://www.geeks3d.com/20120106/precise-qualifier-in-glsl-and-nvidia-geforce-cards/&k=oIvRg1%2BdGAgOoM1BIlLLqw%3D%3D%0A&r=F4msKE2WxRzA%2BwN%2B25muztFm5TSPwE8HKJfWfR2NgfY%3D%0A&m=%2FzSAl55KOH0z7T5qkRj6BX164wf6QpYOnJLIzojXBQc%3D%0A&s=0ac5e0fbd69867705f0c52090c9ddf84e7832be80e724a0983c5aa2f5dde72e0 > >> > >> 2014/1/7 Maxence Le Doré <maxence.led...@gmail.com>: > >>> For this reason, GLSL 4.0 introduces the 'precise' qualifier. I invite > >>> you to take a look at this article. > >>> > >>> 2014/1/6 Roland Scheidegger <srol...@vmware.com>: > >>>> Am 05.01.2014 01:34, schrieb Maxence Le Doré: > >>>>> FMA(a,b,c) keeps extra precision (usually 1 more bit of mantissa, > >>>>> afaik) for the result a*b and add this to c, to finally produce a > >>>>> IEEE754 32bit float result. > >>>>> > >>>>> MAD(a,b,c) product a IEEE754 32bit float product a*b and add it to C. > >>>>> > >>>>> So, fma can be slightly more accurate. An accuracy that is something > >>>>> very appreciate. > >>>> > >>>> Actually in "newer" languages (such as opencl) "mad" is used to indicate > >>>> intermediate rounding does not matter, so if your cpu can do fma but not > >>>> mul+add in a single cycle it is allowed to use fma instead. > >>>> FMA OTOH of course forces no intermediate rounding. > >>>> Our tgsi definitions certainly initially were meaning intermediate > >>>> rounding should take place, I don't know if we need to keep it that way > >>>> or could repurpose that slightly (so if you require the intermediate > >>>> rounding you'd just use mul+add). > >>>> > >>>> Roland > >>>> > >>>> > >>>> > >>>>> > >>>>> > >>>>> 2014/1/5 Marek Olšák <mar...@gmail.com>: > >>>>>> How is FMA different from MAD? > >>>>>> > >>>>>> Please document the new opcodes in src/gallium/docs/source/tgsi.rst. > >>>>>> > >>>>>> Marek > >>>>>> > >>>>>> On Sun, Jan 5, 2014 at 12:42 AM, Maxence Le Doré > >>>>>> <maxence.led...@gmail.com> wrote: > >>>>>>> From: Maxence Le Doré <Maxence Le Doré> > >>>>>>> > >>>>>>> --- > >>>>>>> src/gallium/auxiliary/tgsi/tgsi_info.c | 16 ++++++++++++++++ > >>>>>>> src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h | 6 ++++++ > >>>>>>> src/gallium/include/pipe/p_shader_tokens.h | 9 ++++++++- > >>>>>>> 3 files changed, 30 insertions(+), 1 deletion(-) > >>>>>>> > >>>>>>> diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c > >>>>>>> b/src/gallium/auxiliary/tgsi/tgsi_info.c > >>>>>>> index 0beef44..ed55940 100644 > >>>>>>> --- a/src/gallium/auxiliary/tgsi/tgsi_info.c > >>>>>>> +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c > >>>>>>> @@ -221,6 +221,12 @@ static const struct tgsi_opcode_info > >>>>>>> opcode_info[TGSI_OPCODE_LAST] = > >>>>>>> { 1, 3, 1, 0, 0, 0, OTHR, "TXL2", TGSI_OPCODE_TXL2 }, > >>>>>>> { 1, 2, 0, 0, 0, 0, COMP, "IMUL_HI", TGSI_OPCODE_IMUL_HI }, > >>>>>>> { 1, 2, 0, 0, 0, 0, COMP, "UMUL_HI", TGSI_OPCODE_UMUL_HI }, > >>>>>>> + { 1, 3, 0, 0, 0, 0, COMP, "FMA", TGSI_OPCODE_FMA }, > >>>>>>> + { 1, 1, 0, 0, 0, 0, COMP, "POPCNT", TGSI_OPCODE_POPCNT }, > >>>>>>> + { 1, 1, 0, 0, 0, 0, COMP, "IMSB", TGSI_OPCODE_IMSB }, > >>>>>>> + { 1, 1, 0, 0, 0, 0, COMP, "ILSB", TGSI_OPCODE_ILSB }, > >>>>>>> + { 1, 1, 0, 0, 0, 0, COMP, "UMSB", TGSI_OPCODE_UMSB }, > >>>>>>> + { 1, 1, 0, 0, 0, 0, COMP, "ULSB", TGSI_OPCODE_ULSB }, > >>>>>>> }; > >>>>>>> > >>>>>>> const struct tgsi_opcode_info * > >>>>>>> @@ -321,6 +327,11 @@ tgsi_opcode_infer_type( uint opcode ) > >>>>>>> case TGSI_OPCODE_IABS: > >>>>>>> case TGSI_OPCODE_ISSG: > >>>>>>> case TGSI_OPCODE_IMUL_HI: > >>>>>>> + case TGSI_OPCODE_POPCNT: > >>>>>>> + case TGSI_OPCODE_ILSB: > >>>>>>> + case TGSI_OPCODE_IMSB: > >>>>>>> + case TGSI_OPCODE_ULSB: > >>>>>>> + case TGSI_OPCODE_UMSB: > >>>>>>> return TGSI_TYPE_SIGNED; > >>>>>>> default: > >>>>>>> return TGSI_TYPE_FLOAT; > >>>>>>> @@ -344,9 +355,14 @@ tgsi_opcode_infer_src_type( uint opcode ) > >>>>>>> case TGSI_OPCODE_SAMPLE_I: > >>>>>>> case TGSI_OPCODE_SAMPLE_I_MS: > >>>>>>> case TGSI_OPCODE_UMUL_HI: > >>>>>>> + case TGSI_OPCODE_POPCNT: > >>>>>>> + case TGSI_OPCODE_ULSB: > >>>>>>> + case TGSI_OPCODE_UMSB: > >>>>>>> return TGSI_TYPE_UNSIGNED; > >>>>>>> case TGSI_OPCODE_IMUL_HI: > >>>>>>> case TGSI_OPCODE_I2F: > >>>>>>> + case TGSI_OPCODE_ILSB: > >>>>>>> + case TGSI_OPCODE_IMSB: > >>>>>>> return TGSI_TYPE_SIGNED; > >>>>>>> case TGSI_OPCODE_ARL: > >>>>>>> case TGSI_OPCODE_ARR: > >>>>>>> diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h > >>>>>>> b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h > >>>>>>> index 1ef78dd..cba0975 100644 > >>>>>>> --- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h > >>>>>>> +++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h > >>>>>>> @@ -206,6 +206,12 @@ OP13(UCMP) > >>>>>>> > >>>>>>> OP12(IMUL_HI) > >>>>>>> OP12(UMUL_HI) > >>>>>>> +OP13(FMA) > >>>>>>> +OP11(POPCNT) > >>>>>>> +OP11(IMSB) > >>>>>>> +OP11(ILSB) > >>>>>>> +OP11(UMSB) > >>>>>>> +OP11(ULSB) > >>>>>>> > >>>>>>> #undef OP00 > >>>>>>> #undef OP01 > >>>>>>> diff --git a/src/gallium/include/pipe/p_shader_tokens.h > >>>>>>> b/src/gallium/include/pipe/p_shader_tokens.h > >>>>>>> index 8010902..5ed0c34 100644 > >>>>>>> --- a/src/gallium/include/pipe/p_shader_tokens.h > >>>>>>> +++ b/src/gallium/include/pipe/p_shader_tokens.h > >>>>>>> @@ -453,7 +453,14 @@ struct tgsi_property_data { > >>>>>>> #define TGSI_OPCODE_IMUL_HI 180 > >>>>>>> #define TGSI_OPCODE_UMUL_HI 181 > >>>>>>> > >>>>>>> -#define TGSI_OPCODE_LAST 182 > >>>>>>> +#define TGSI_OPCODE_FMA 182 > >>>>>>> +#define TGSI_OPCODE_POPCNT 183 > >>>>>>> +#define TGSI_OPCODE_IMSB 184 > >>>>>>> +#define TGSI_OPCODE_ILSB 185 > >>>>>>> +#define TGSI_OPCODE_UMSB 186 > >>>>>>> +#define TGSI_OPCODE_ULSB 187 > >>>>>>> + > >>>>>>> +#define TGSI_OPCODE_LAST 188 > >>>>>>> > >>>>>>> #define TGSI_SAT_NONE 0 /* do not saturate */ > >>>>>>> #define TGSI_SAT_ZERO_ONE 1 /* clamp to [0,1] */ > >>>>>>> -- > >>>>>>> 1.8.5.2 > >>>>>>> > >>>>>>> _______________________________________________ > >>>>>>> mesa-dev mailing list > >>>>>>> mesa-dev@lists.freedesktop.org > >>>>>>> https://urldefense.proofpoint.com/v1/url?u=http://lists.freedesktop.org/mailman/listinfo/mesa-dev&k=oIvRg1%2BdGAgOoM1BIlLLqw%3D%3D%0A&r=F4msKE2WxRzA%2BwN%2B25muztFm5TSPwE8HKJfWfR2NgfY%3D%0A&m=UbogYegyhJOd8MD5pXDHVLpkdfH0gvvci5OW50vTexw%3D%0A&s=4da499b672e3e50826f923f340c51cfaa5c79830951ccc3034abd52a1b7402a8 > >>>>> _______________________________________________ > >>>>> mesa-dev mailing list > >>>>> mesa-dev@lists.freedesktop.org > >>>>> https://urldefense.proofpoint.com/v1/url?u=http://lists.freedesktop.org/mailman/listinfo/mesa-dev&k=oIvRg1%2BdGAgOoM1BIlLLqw%3D%3D%0A&r=F4msKE2WxRzA%2BwN%2B25muztFm5TSPwE8HKJfWfR2NgfY%3D%0A&m=UbogYegyhJOd8MD5pXDHVLpkdfH0gvvci5OW50vTexw%3D%0A&s=4da499b672e3e50826f923f340c51cfaa5c79830951ccc3034abd52a1b7402a8 > >>>>> > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev