[FFmpeg-devel] [PATCH] checkasm/lpc: test compute_autocorr
--- tests/checkasm/lpc.c | 36 1 file changed, 36 insertions(+) diff --git a/tests/checkasm/lpc.c b/tests/checkasm/lpc.c index 592e34c03d..8e92a9e1b4 100644 --- a/tests/checkasm/lpc.c +++ b/tests/checkasm/lpc.c @@ -57,10 +57,40 @@ static void test_window(int len) bench_new(src, len, dst1); } +static void test_compute_autocorr(int lag) +{ +LOCAL_ALIGNED(16, double, src, [5000]); +LOCAL_ALIGNED(16, double, dst0, [32]); +LOCAL_ALIGNED(16, double, dst1, [32]); +const size_t len = 5000; + +declare_func(void, const double *in, ptrdiff_t len, int lag, double *out); + +for (size_t i = 0; i < len; i++) { +src[i] = (double)rnd() / (double)UINT_MAX; +} + +call_ref(src, len, lag, dst0); +call_new(src, len, lag, dst1); + +for (size_t i = 0; i < lag; i++) { +if (!double_near_abs_eps(dst0[i], dst1[i], EPS)) { +fprintf(stderr, "%zu: %- .12f - %- .12f = % .12g\n", +i, dst0[i], dst1[i], dst0[i] - dst1[i]); +fail(); +break; +} +} + +bench_new(src, len, lag, dst1); +} + void checkasm_check_lpc(void) { LPCContext ctx; int len = rnd() % 5000; +static const int lags[] = { 10, 30, 32 }; + ff_lpc_init(, 32, 16, FF_LPC_TYPE_DEFAULT); if (check_func(ctx.lpc_apply_welch_window, "apply_welch_window_even")) { @@ -73,5 +103,11 @@ void checkasm_check_lpc(void) } report("apply_welch_window_odd"); +for (size_t i = 0; i < FF_ARRAY_ELEMS(lags); i++) { +if (check_func(ctx.lpc_compute_autocorr, "autocorr_%d", lags[i])) +test_compute_autocorr(lags[i]); +report("compute_autocorr_%d", lags[i]); +} + ff_lpc_end(); } -- 2.43.0 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] checkasm/lpc: test compute_autocorr
Le maanantaina 11. joulukuuta 2023, 22.41.03 EET Rémi Denis-Courmont a écrit : > --- > tests/checkasm/lpc.c | 36 > 1 file changed, 36 insertions(+) > > diff --git a/tests/checkasm/lpc.c b/tests/checkasm/lpc.c > index 592e34c03d..8e92a9e1b4 100644 > --- a/tests/checkasm/lpc.c > +++ b/tests/checkasm/lpc.c > @@ -57,10 +57,40 @@ static void test_window(int len) > bench_new(src, len, dst1); > } > > +static void test_compute_autocorr(int lag) > +{ > +LOCAL_ALIGNED(16, double, src, [5000]); > +LOCAL_ALIGNED(16, double, dst0, [32]); > +LOCAL_ALIGNED(16, double, dst1, [32]); > +const size_t len = 5000; > + > +declare_func(void, const double *in, ptrdiff_t len, int lag, double > *out); + > +for (size_t i = 0; i < len; i++) { > +src[i] = (double)rnd() / (double)UINT_MAX; Not sure if we should test negative numbers here. > +} > + > +call_ref(src, len, lag, dst0); > +call_new(src, len, lag, dst1); Presumably src needs to be offset by one element, as the first iteration of the loop reads at offset minus one (in C code: sum1 += ...). > + > +for (size_t i = 0; i < lag; i++) { > +if (!double_near_abs_eps(dst0[i], dst1[i], EPS)) { > +fprintf(stderr, "%zu: %- .12f - %- .12f = % .12g\n", > +i, dst0[i], dst1[i], dst0[i] - dst1[i]); > +fail(); > +break; > +} > +} > + > +bench_new(src, len, lag, dst1); > +} > + > void checkasm_check_lpc(void) > { > LPCContext ctx; > int len = rnd() % 5000; > +static const int lags[] = { 10, 30, 32 }; > + > ff_lpc_init(, 32, 16, FF_LPC_TYPE_DEFAULT); > > if (check_func(ctx.lpc_apply_welch_window, "apply_welch_window_even")) > { @@ -73,5 +103,11 @@ void checkasm_check_lpc(void) > } > report("apply_welch_window_odd"); > > +for (size_t i = 0; i < FF_ARRAY_ELEMS(lags); i++) { > +if (check_func(ctx.lpc_compute_autocorr, "autocorr_%d", lags[i])) > +test_compute_autocorr(lags[i]); > +report("compute_autocorr_%d", lags[i]); > +} > + > ff_lpc_end(); > } -- 雷米‧德尼-库尔蒙 http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH 3/3] lavc/svq1enc: R-V V ssd_int8_vs_int16
Le 29 décembre 2023 12:57:20 GMT+01:00, flow gg a écrit : >C908 >ssd_int8_vs_int16_c: 207.7 >ssd_int8_vs_int16_rvv_i32: 28.0 At a quick glance, it won't work if the input length is not a multiple of the vector length. Also do you really need to extend accumulators to 32 bits? ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH 3/3] lavc/svq1enc: R-V V ssd_int8_vs_int16
Le 30 décembre 2023 15:00:53 GMT+01:00, flow gg a écrit : >> At a quick glance, it won't work if the input length is not a multiple of >the vector length. > >Why? You're not handling tails as far as I see. > I tried 1024, 32*3, 32*7 and all passed the test. They're all multiples of the vector length. >> Also do you really need to extend accumulators to 32 bits? > >It won't overflow after the test is changed, so it's not needed anymore. >I have modified it in this reply. > >Rémi Denis-Courmont 于2023年12月30日周六 20:15写道: > >> >> >> Le 29 décembre 2023 12:57:20 GMT+01:00, flow gg a >> écrit : >> >C908 >> >ssd_int8_vs_int16_c: 207.7 >> >ssd_int8_vs_int16_rvv_i32: 28.0 >> >> At a quick glance, it won't work if the input length is not a multiple of >> the vector length. >> >> Also do you really need to extend accumulators to 32 bits? >> ___ >> ffmpeg-devel mailing list >> ffmpeg-devel@ffmpeg.org >> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel >> >> To unsubscribe, visit link above, or email >> ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". >> >___ >ffmpeg-devel mailing list >ffmpeg-devel@ffmpeg.org >https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > >To unsubscribe, visit link above, or email >ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH 2/3] checkasm/svqenc: add ssd_int8_vs_int16 test
Le 29 décembre 2023 12:57:01 GMT+01:00, flow gg a écrit : >Tests on x86 might fail, possibly due to a 16-bit sub overflow I don't know anything about the SVQ encoder. Still, especially for an encoder, overflows are probably not expected. So then it is as Martin wrote. ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] fate: Allow running multiple rounds of tests with differing settings
Le tiistaina 12. joulukuuta 2023, 0.14.06 EET Martin Storsjö a écrit : > This can be used to run tests multple times, with e.g. differing > QEMU settings, by adding something like this to the FATE configuration > file: > > target_exec="qemu-aarch64-static" > fate_targets="fate-checkasm fate-cpu" > > fate_environments="sve128 sve256 sve512" > sve128_env="QEMU_CPU=max,sve128=on" > sve256_env="QEMU_CPU=max,sve256=on" > sve512_env="QEMU_CPU=max,sve512=on" I'm fine with that, but for the sake of generality, shouldn't rather the entire target_exec prefix be indirected? Some runners may want to use command line flags rather than environment variables. -- Rémi Denis-Courmont http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH 3/3] lavc/svq1enc: R-V V ssd_int8_vs_int16
Le lauantaina 30. joulukuuta 2023, 18.20.15 EET flow gg a écrit : > I mistook it, seeing the vector length as the length of the vector register > .. > I have modified it in this reply. Setting element size to 8-bit is unnecessary, and a widening subtraction can presumably avoid the sign extension. -- レミ・デニ-クールモン http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] Hardware purchase request: AVX512-capable laptop
Le keskiviikkona 3. tammikuuta 2024, 2.56.12 EET Lynne a écrit : > As some of you know, my laptop died nearly 2 years ago, and > I've been working on a desktop machine, which is currently a Zen 3. > AVX512 has become more popular in the meantime, with Zen 4 > and future AMD CPUs shipping with it, but currently, we have very > little AVX512. Frankly, generally speaking, I don't think it makes sense to buy laptops for development *unless* desktop systems are not an option. And here, a desktop system is not only an option, but it is the technically better and already purchased option. A desktop is cheaper, more faster, more serviceable and more incrementally upgradeable. More prosaically a desktop system is much more suitable to occupational well-being - laptops are awfully inadequate in terms of ergonomy, unless they are docked, at which point they become expensive under-provisioned desktops. A laptop would of course be necessary whilst spending extended periods of your time away from home. But if so, that would be a discretionary choice of life style choice. There is nothing wrong with doing that per se, but I really don't think that an open-source foundation should be addressing discretionary life style choices. -- 雷米‧德尼-库尔蒙 http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] Hardware purchase request: AVX512-capable laptop
Le lauantaina 6. tammikuuta 2024, 19.59.47 EET Michael Niedermayer a écrit : > What i do with my laptop is i have it on this thing: > https://www.amazon.de/gp/product/B072PZLZ25 > That can adjust tilt, rotate and height (and of course it can be moved > around on the table) > put a good keyboard below it and a good mouse to its right. I think that I already addressed that up-thread? A docked laptop is basically the same as a desktop system, but more expensive and less powerful. I can't imagine that Lynne would use the laptop for development from her home, whilst she already has a gotten quite the S-class monster of a desktop workstation for that purpose, whose technical specifications are sure to outclass any contemporary laptop. -- 雷米‧德尼-库尔蒙 http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] Hardware purchase request: AVX512-capable laptop
Le 6 janvier 2024 20:26:42 GMT+02:00, Michael Niedermayer a écrit : > >I think some kind of remotely usable system does make sense for every volunteer >who wants to work. It simply results in more available time for that work. > >Even i (who doesnt travel volunteerly around) have needed and used my notebook >for FFmpeg away from my desktop system many times. >When ive spend some time in appartments of other familiy members, when i had to >change my own apartment due to very noisy neighbors and so forth > >Maybe a compromise would be a cheap laptop that is just used to login and >access the more powerfull hardware via SSH ? That sounds much more sensible indeed. ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] Hardware purchase request: AVX512-capable laptop
Le lauantaina 6. tammikuuta 2024, 12.38.28 EET Lynne a écrit : > Emergencies could happen, but progress must always happen. Laptops are more prone to breaking, and as already noted less serviceable. The whole premise is that your current laptop broke after just 2 years, while the normally (fiscally) expected lifetime of a laptop is 3 years. Don't get me wrong! I don't deny that emergencies of that sort do happen to software engineers. Considering the "running cost" of a skilled software engineer, many employers will want to minimise the risk that they get bogged down by lack of a development computer, and the inability to carry a laptop with them on business trips. But then, whose emergency would are they exactly? If an entity has dire needs of your continued ability to work, then they should take the measures and costs. That's just not something that the FFmpeg foundation should bare. Except maybe for Michael, I think the project will do just fine if any developer is out of a computer for a week, to be honest. Finally, the flip side of this is that the ergonomy and performance of your FFmpeg development environment is at least as critical, if not more, to your continued ability to work. In other words, if a developer is critical to the project, then it is detrimental to the project if they use a laptop, because laptops are slower and less healthy. > Also, I think some developers here would disagree with the notion that > desktop machines are always the best option, and I think I that subjects > such as ergonomy, uselessness when not docked, That's simply not a matter of subjective opinion of an hypothetical developer. The point about ergonomy is generally accepted among specialists based on serious studies. And by specialists I mean medical doctors and occupational healthcare therapists, not FFmpeg developers. That's not "subjective" in my book. > being less serviceable, are > subjective metrics. How about you count the number of parts that can be independently replaced in a laptop vs a desktop. Care to explain how that metric is a "subjective" exactly? And that's not even counting that some of the serviceable laptop parts are more or less model-dependent. -- レミ・デニ-クールモン http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] Hardware purchase request: AVX512-capable laptop
Le lauantaina 6. tammikuuta 2024, 18.21.00 EET Lynne a écrit : > As for whether this is a lifestyle choice, we generally pay for anything > that involves conferences, from train tickets, planes, parking, and > sometimes for location/stand rent. I would personally agree that representing FFmpeg at (non-FFmpeg-specific) conferences is a choice of life style. But it is normal to refund reasonable expenses made to represent the project. > I'm asking for a useful bit of permanent hardware. I don't question that providing you with one development system with the relevant Vulkan hardware support and AVX-512 is (or was) justified. If you do all the work for free (or paid by some other entity than FFmpeg), that's indeed excellent ROI. But the "business" case for a *second* system with all the disadvantages of a laptop is frankly not so clear. -- Rémi Denis-Courmont http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] Hardware purchase request: AVX512-capable laptop
Le lauantaina 6. tammikuuta 2024, 18.13.33 EET Lynne a écrit : > A fire would put me out for much more than a week tbh. What aboutism much? In this case, you would loose your internet access, and potentially spend a long time hospitalised. You're dodging the real issues here: why should *you* get a laptop? Sure, some of the stuff that you do will with high probability become important in the medium term (e.g. Vulkan video decoding stuff), but it does not seem so urgent as to justify purchasing a second computer. The foundation already invested in a well-above average price to equip you with a suitable desktop system for your development. It would be far more sensible to spend on updated or replacement parts for that system as needed, than to buy a whole new system just in case. Furthermore, there are quite a few key developers and system adminstrators in the project whose continued ability to work is at least equally critical. > Other than that, occasional trips, during which reviews still have to be > made. Plus, power analysis of whether AVX512 helps on current-gen mobile > devices. I don't see this as being too big of a thing to ask for, > considering how much we have and how much we receive each year, and how > very rarely requests are done. Nor something that deserves a lengthy > article on the benefits, ergonomics, and ongoing maintenance of desktop > versus mobile systems. Indeed, you should not have made the preposterous argument that my points were "subjective" when they were not. Then I would not have had to waste time elaborating. -- レミ・デニ-クールモン http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH 3/3] lavc/svq1enc: R-V V ssd_int8_vs_int16
Le perjantaina 5. tammikuuta 2024, 2.56.18 EET flow gg a écrit : > One vset can be reduced, but vwsub should not be used in this case. I > modified it in this reply. Fair enough, but are you sure that that's faster than keeping the vsetvli and removing the sign extension? > Rémi Denis-Courmont 于2024年1月5日周五 00:00写道: > > > Le lauantaina 30. joulukuuta 2023, 18.20.15 EET flow gg a écrit : > > > I mistook it, seeing the vector length as the length of the vector > > > > register > > > > > .. > > > I have modified it in this reply. > > > > Setting element size to 8-bit is unnecessary, and a widening subtraction > > can > > presumably avoid the sign extension. > > > > -- > > レミ・デニ-クールモン > > http://www.remlab.net/ > > > > > > > > ___ > > ffmpeg-devel mailing list > > ffmpeg-devel@ffmpeg.org > > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > > > To unsubscribe, visit link above, or email > > ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". -- Rémi Denis-Courmont http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH v2] checkasm: Generalize crash handling
Looks OK (not tested). -- Rémi Denis-Courmont http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH 3/3] lavc/svq1enc: R-V V ssd_int8_vs_int16
Le sunnuntaina 7. tammikuuta 2024, 3.33.39 EET flow gg a écrit : > I tested it, and indeed using vwsub is faster. Updated it in the reply. > > --- > > I have a question: if I tweak the load order a bit, using one less vset, it > leads to being slower (the patch I submitted is 13.2, if I make the > following change, the time would be 15.2). > But I thought it would be faster. I would guess that v0 is needed before v8 in the internal implementation of vwsub. This kind of makes sense as the element still need to be sign-extended. Thus vwsub ends up stalling the pipeline in wait for vle8 to complete. That's just a guess though, as I don't have internal cycle timing documentation. > - vsetvli t0, a2, e8, m2, tu, ma > - vle8.v v0, (a0) > - sub a2, a2, t0 > - vsetvli zero, t0, e16, m4, tu, ma > - vle16.v v8, (a1) > - vsetvli zero, t0, e8, m2, tu, ma > - vwsub.wv v16, v8, v0 > > + vsetvli t0, a2, e16, m4, tu, ma > + vle16.v v8, (a1) > + sub a2, a2, t0 > + vsetvli zero, t0, e8, m2, tu, ma > + vle8.v v0, (a0) > + vwsub.wv v16, v8, v0 -- 雷米‧德尼-库尔蒙 http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH 3/3] lavc/svq1enc: R-V V ssd_int8_vs_int16
Le sunnuntaina 7. tammikuuta 2024, 10.36.23 EET flow gg a écrit : > Alright, I learned a bit more, so should we not consider the internal > implementation? You asked what the reason was for your counter-intuitive observations, and I provided a plausible hypothesis. Nothing more ,nothing less. Of course we should take performance characteristics of real hardware into account, as is done on all other ISAs. The flip side however is that we might have to make tradeoffs when design from other vendors come out exhibiting different characteristics. -- 雷米‧德尼-库尔蒙 http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH 3/3] lavc/svq1enc: R-V V ssd_int8_vs_int16
+vsetvli t0, a2, e8, m2, tu, ma +vle8.v v0, (a0) +sub a2, a2, t0 +vsetvli zero, t0, e16, m4, tu, ma +vle16.v v8, (a1) +vsetvli zero, t0, e8, m2, tu, ma +vwsub.wv v16, v8, v0 +vsetvli zero, t0, e16, m4, tu, ma It looks to me like the second vsetvli is unnecessary, and consequently the third as well. As for the later ones, please use `vsetvli zero, zero` if you intend to change SEW while preserving VL and the LMUL:SEW ratio. -- 雷米‧德尼-库尔蒙 http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] Hardware purchase request: AVX512-capable laptop
Le maanantaina 15. tammikuuta 2024, 16.06.32 EET Paul B Mahol a écrit : > > I agree with Remi's objections to this. > > > > Kieran > > Poor and irrelevant devs object and want to keep money for themself. Neither of us are poor, which makes this defamatory. While we may subjectively be irrelavant, that is completely inappropriate wording. For you reference, Nicolas was able to articulate that characterisation in a much more business-compatible fashion. So this is being reported to the CC. -- 雷米‧德尼-库尔蒙 http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] Hardware purchase request: AVX512-capable laptop
Le maanantaina 15. tammikuuta 2024, 16.59.40 EET Lynne a écrit : > I've been pinging this for a week now and he hasn't reiterated > his position again or made it clearer. I think my position was clear. I don't see the point in rereiterating it whilst we are evidently not going to reach an agreement. Besides, you have previously complained that my explanations were unnecessarily long. But since you bring it up and to sum up, I find completely reasonable for FFmpeg to provision hardware with the feature necessary to test your work, such as AVX-512 and Vulkan video decoding. But: 1) You already have been provided such hardware in the form of a desktop computer (and I am told that it was extremely expensive). 2) In general it makes more sense to get a desktop than a laptop for that purpose. Leaving aside those specific hardware requirements, I think it is completely reasonable for you to have a laptop, as most of us probably do. But I also think that it is not reasonable for the foundation to pay for personal laptops. Maybe you need a laptop specifically to work on FFmpeg for whatever reason. Then a cheap laptop for remote access, as Michael suggested, sounds like a reasonable compromise to me. Nevertheless, I think that: - If your employment requires you to work away from your desktop a lot, then your employer should provide the laptop. - If you want to work from your couch or from the beach (figuratively), that is really on you. -- Rémi Denis-Courmont http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] Back port riscv: test for assembler support to 6.1
Le sunnuntaina 7. tammikuuta 2024, 6.20.29 EET Brad Smith a écrit : > I don't have a system. But I have attached what should be there or close > to back ports for 6.1 and 6.0. If someone could please build test these > patches. I have no objections but I do not have a test system either. In any case, the RISC-V support requires OS adaptation to detect multi- lettered extensions, and it is very unlikely that I will be able to test OpenBSD (I don't even know how it's supposed to work). -- Rémi Denis-Courmont http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] checkasm: Test whether direct cycle counter access works
Le torstaina 11. tammikuuta 2024, 14.53.05 EET Martin Storsjö a écrit : > This should print a nicer error message than crashing due to > an illegal instruction, if direct cycle counter access isn't > allowed. > > This matches the dav1d checkasm commit > 95a192549a448b70d9542e840c4e34b60d09b093. > --- > tests/checkasm/checkasm.c | 12 +++- > 1 file changed, 11 insertions(+), 1 deletion(-) > > diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c > index 994d64e96b..9c5abb53dc 100644 > --- a/tests/checkasm/checkasm.c > +++ b/tests/checkasm/checkasm.c > @@ -754,6 +754,14 @@ static int bench_init_kperf(void) > static int bench_init_ffmpeg(void) > { > #ifdef AV_READ_TIME > +if (!checkasm_save_context()) { > +checkasm_set_signal_handler_state(1); > +AV_READ_TIME(); > +checkasm_set_signal_handler_state(0); > +} else { > +fprintf(stderr, "checkasm: unable to access cycle counter\n"); AV_READ_TIME() reads time, not cycles. If we want cycle count, then we should add a separate macro, as the two are different performance counters at least on RISC-V. As things stand, this code won't do anything on RISC-V, sinec AV_READ_TIME() actually reads, well, time, not cycles. > +return -1; > +} > printf("benchmarking with native FFmpeg timers\n"); > return 0; > #else > @@ -927,7 +935,9 @@ int checkasm_bench_func(void) > /* Indicate that the current test has failed */ > void checkasm_fail_func(const char *msg, ...) > { > -if (state.current_func_ver->cpu && state.current_func_ver->ok) { > +if (state.current_func_ver && state.current_func_ver->cpu && > +state.current_func_ver->ok) > +{ > va_list arg; > > print_cpu_name(); -- 雷米‧德尼-库尔蒙 http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] checkasm: Test whether direct cycle counter access works
Le torstaina 11. tammikuuta 2024, 16.15.29 EET Martin Storsjö a écrit : > > AV_READ_TIME() reads time, not cycles. > > Right, I can adjust the wording. Exactly what kind of measurement > AV_READ_TIME returns varies between architectures and environments indeed. In practice, yes, but I would argue that it's a bug if it does not measure time. At the very least because, the name is extremely misleading. > What about: > > checkasm: unable to execute platform specific timer > > > If we want cycle count, then we should add a separate macro, as the two > > are different performance counters at least on RISC-V. > > That's not what I try to do here, I just want to test whether the timer, > whatever we have in AV_READ_TIME, is usable. Sure, I can live with that, but I thought that checkasm actually prefered to measure cycles than time periods. > > As things stand, this code won't do anything on RISC-V, sinec > > AV_READ_TIME() actually reads, well, time, not cycles. > > Should I interpret this, as, the current AV_READ_TIME implementation on > RISC-V always succeeds, contrary to the previous implementation (with > rdcycle) which is unavailable on some systems, referencing > 05115a77e012331b6ff5e24bab40e75848447c62? Yes. -- 雷米‧德尼-库尔蒙 http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] checkasm: Generalize crash handling
Le 19 décembre 2023 14:02:00 GMT+02:00, "Martin Storsjö" a écrit : >This replaces the riscv specific handling from >7212466e735aa187d82f51dadbce957fe3da77f0 (which essentially is >reverted, together with 286d6742218ba0235c32876b50bf593cb1986353) >with a different implementation of the same (plus a bit more), based >on the corresponding feature in dav1d's checkasm, supporting both Unix >and Windows. > >See in particular dav1d commits >0b6ee30eab2400e4f85b735ad29a68a842c34e21 and >0421f787ea592fd2cc74c887f20b8dc31393788b, authored by >Henrik Gramner. > >The overall approach is the same; set up a signal handler, >store the state with setjmp/sigsetjmp, jump out of the crashing >function with longjmp/siglongjmp. > >The main difference is in what happens when the signal handler >is invoked. In the previous implementation, it would resume from >right before calling the crashing function, and then skip that call >based on the setjmp return value. > >In the imported implementation from dav1d, we return to right before >the check_func() call, which will skip testing the current function >(as the pointer is the same as it was before). > >Other differences are: >- Support for other signal handling mechanisms (Windows > AddVectoredExceptionHandler) >- Using RtlCaptureContext/RtlRestoreContext instead of setjmp/longjmp > on Windows with SEH (which adds the design limitation that it doesn't > return a value like setjmp does) >- Only catching signals once per function - if more than one > signal is delivered before signal handling is reenabled, any > signal is handled as it would without our handler >- Not using an arch specific signal handler written in assembly >--- > tests/checkasm/checkasm.c | 100 ++-- > tests/checkasm/checkasm.h | 79 ++--- > tests/checkasm/riscv/checkasm.S | 12 > 3 files changed, 140 insertions(+), 51 deletions(-) > >diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c >index 6318d9296b..668034c67f 100644 >--- a/tests/checkasm/checkasm.c >+++ b/tests/checkasm/checkasm.c >@@ -23,8 +23,10 @@ > #include "config.h" > #include "config_components.h" > >-#ifndef _GNU_SOURCE >-# define _GNU_SOURCE // for syscall (performance monitoring API), strsignal() >+#if CONFIG_LINUX_PERF >+# ifndef _GNU_SOURCE >+# define _GNU_SOURCE // for syscall (performance monitoring API) >+# endif > #endif > > #include >@@ -326,6 +328,7 @@ static struct { > const char *cpu_flag_name; > const char *test_name; > int verbose; >+int catch_signals; > } state; > > /* PRNG state */ >@@ -627,6 +630,64 @@ static CheckasmFunc *get_func(CheckasmFunc **root, const >char *name) > return f; > } > >+checkasm_context checkasm_context_buf; >+ >+/* Crash handling: attempt to catch crashes and handle them >+ * gracefully instead of just aborting abruptly. */ >+#ifdef _WIN32 >+#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) >+static LONG NTAPI signal_handler(EXCEPTION_POINTERS *const e) { >+const char *err; >+ >+if (!state.catch_signals) >+return EXCEPTION_CONTINUE_SEARCH; >+ >+switch (e->ExceptionRecord->ExceptionCode) { >+case EXCEPTION_FLT_DIVIDE_BY_ZERO: >+case EXCEPTION_INT_DIVIDE_BY_ZERO: >+err = "fatal arithmetic error"; >+break; >+case EXCEPTION_ILLEGAL_INSTRUCTION: >+case EXCEPTION_PRIV_INSTRUCTION: >+err = "illegal instruction"; >+break; >+case EXCEPTION_ACCESS_VIOLATION: >+case EXCEPTION_ARRAY_BOUNDS_EXCEEDED: >+case EXCEPTION_DATATYPE_MISALIGNMENT: >+case EXCEPTION_STACK_OVERFLOW: >+err = "segmentation fault"; >+break; >+case EXCEPTION_IN_PAGE_ERROR: >+err = "bus error"; >+break; >+default: >+return EXCEPTION_CONTINUE_SEARCH; >+} >+state.catch_signals = 0; >+checkasm_fail_func("%s", err); >+checkasm_load_context(); >+return EXCEPTION_CONTINUE_EXECUTION; /* never reached, but shuts up gcc */ >+} >+#endif >+#else >+static void signal_handler(const int s) { >+if (state.catch_signals) { >+state.catch_signals = 0; >+checkasm_fail_func("%s", >+ s == SIGFPE ? "fatal arithmetic error" : >+ s == SIGILL ? "illegal instruction" : >+ s == SIGBUS ? "bus error" : >+ "segmentation fault"); >+checkasm_load_context(); Use of format string is probably not async-signal-safe. I would also be surprised if the load_context() function was safe in signal context. That's why the current code does pretty much nothing other than a long jump. >+} else { >+/* fall back to the default signal handler */ >+static const struct sigaction default_sa = { .sa_handler = SIG_DFL }; >+sigaction(s, _sa, NULL); >+raise(s); >+} >+} >+#endif >+ > /* Perform tests and benchmarks for the specified cpu flag if supported by >
Re: [FFmpeg-devel] [RFC] fftools/ffmpeg and libavdevice/sdl issue
Le 19 décembre 2023 14:51:21 GMT+02:00, Nicolas George a écrit : >Rémi Denis-Courmont (12023-12-19): >> Anton's objections are against the horrible hacks necessary to support >> Mac and Windows, as far as I understand him. > >I have not read that. If that is true, maybe he could start with >refraining from using expressions like “horrible hacks”. > >> Of course it's also objectionable for SDL to be modelled as a muxer, > >Sigh. Do we have to explain this once again? Devices have to present as >muxers and demuxers in order to be usable transparently by applications >designed for plain files. And anyway, the manner frames enter or leave a >device is orthogonal to the implementation of said device, so bringing >this question in the discussion is irrelevant. That's a horrible hack of the kind that makes one infer that whoever wrote the library doesn't understand API design. >> Running on the main thread (the initial thread of an address space) >> requires an external executable > >No. Or [citation needed]. I don't care if you disagree with the definition of "main thread" in the context of SDL. >> Besides, starting a new process without execution of an executable, in >> other words, forking without executing, is essentially impossible in a >> multithreaded Unix-like environment, > >It is less than standards-compliant and portable, but it is doable. You could certainly engineer a custom OS that would allow this, but I don't think that's really relevant, whilst the issue at stake is support for Apple's OS. Hence "essentially impossible" as opposed to "impossible". > >> since FFmpeg is not async-fork-safe. > >This is something that should be fixed, do you not think? First, good luck with that. Making FFmpeg work under POSIX fork-safe constraints is simply not realistic, not to mention the underlying libraries that FFmpeg would have to fork (pun unintended). If it were feasible, we wouldn't need to have this argument: somebody could just fix the SDL muxer internals without messing with the FFmpeg APIs. Second, even if you did succeed at this, the result would be unmaintainable, as you'd have to mind those constraints for all future code changes. And third, you would leak memory and resources of other threads that just happened to be allocated to the parent process at the time of fork. This is highly undesirable. ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] checkasm: add test for dcmul_add
Will push soon except for objections ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] checkasm: add test for dcmul_add
Le sunnuntaina 19. marraskuuta 2023, 0.28.10 EET flow gg a écrit : > From 2785ce57f68dbb2373c951b9432afa73796f7cc1 Mon Sep 17 00:00:00 2001 > From: sunyuechi > Date: Sat, 18 Nov 2023 10:58:17 +0800 > Subject: [PATCH] checkasm: test for dcmul_add git-am reports the patch corrupt. -- Rémi Denis-Courmont http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] checkasm: Fix the signature of float_to_fixed24
Lgtm ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [ANNOUNCE] upcoming vote: TC/CC elections
Le 5 décembre 2023 15:28:54 GMT+02:00, James Almer a écrit : >On 12/5/2023 7:07 AM, Anton Khirnov wrote: >> Hi all, >> Both elections have now concluded. >> >> We have 36 votes for the CC election (70% turnout) and 38 votes for TC >> (75% turnout); raw votes in CSV format are attached. >> >> The CC members now are: >> * James Almer >> * Jean-Baptiste Kempf >> * Anton Khirnov >> * Ronald Bultje >> * Michael Niedermayer >> >> For TC, it seems that we have a tie. The system reports two winning >> sets, both of which contain: >> * Michael Niedermayer >> * Martin Storsjö >> * Mark Thompson >> * Anton Khirnov >> >> The final member is Jan Ekström in one set and Niklas Haas in the other. >> We should now consider how to break this tie. Some options suggested on >> IRC were: >> * run a new vote with just the two of them >> * randomly >> * have Rémi break the tie, as he said he accidentally voted incorrectly >>due to misinterpreting the documentation > >This doesn't feel nice, having one person decide like this. I don't want to do that anyway as that feels like a very uncomfortable position to be in. Nobody can force me. So that option is off the table. >> * expand the committee > >Six members mean a vote could end up in a tie. > >IMO, either we do a new vote with the two of them as options, or one of them >steps down. >___ >ffmpeg-devel mailing list >ffmpeg-devel@ffmpeg.org >https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > >To unsubscribe, visit link above, or email >ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] doc: mention that for RISC-V, we prefer .S files
Le 5 décembre 2023 11:59:39 GMT+02:00, Jean-Baptiste Kempf a écrit : >$subject > >See attachment. I think that the non-ISA specification is a better reference than GNU/binutils. The later takes some controversial liberties from the earlier. And while I blame LLVM as a project for sitting on the `.option arch` support patch set for months and months, I don't blame them for adhering to the specification where binutils doesn't. ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] riscv: test for assembler support
This should fix the build on LLVM 16 and earlier, at the cost of turning all non-RVV optimisations off. --- Makefile| 6 +++--- configure | 5 - ffbuild/arch.mak| 1 + libavcodec/riscv/Makefile | 16 libavcodec/riscv/ac3dsp_init.c | 2 ++ libavcodec/riscv/audiodsp_init.c| 2 ++ libavcodec/riscv/bswapdsp_init.c| 2 ++ libavcodec/riscv/pixblockdsp_init.c | 2 ++ libswscale/riscv/Makefile | 2 +- libswscale/riscv/rgb2rgb.c | 2 ++ tests/checkasm/Makefile | 2 +- tests/checkasm/checkasm.c | 2 +- tests/checkasm/checkasm.h | 5 - 13 files changed, 33 insertions(+), 16 deletions(-) diff --git a/Makefile b/Makefile index 78652c47bd..2fc3e538c1 100644 --- a/Makefile +++ b/Makefile @@ -93,10 +93,10 @@ ffbuild/.config: $(CONFIGURABLE_COMPONENTS) SUBDIR_VARS := CLEANFILES FFLIBS HOSTPROGS TESTPROGS TOOLS \ HEADERS ARCH_HEADERS BUILT_HEADERS SKIPHEADERS\ ARMV5TE-OBJS ARMV6-OBJS ARMV8-OBJS VFP-OBJS NEON-OBJS \ - ALTIVEC-OBJS VSX-OBJS RVV-OBJS MMX-OBJS X86ASM-OBJS \ + ALTIVEC-OBJS VSX-OBJS MMX-OBJS X86ASM-OBJS\ MIPSFPU-OBJS MIPSDSPR2-OBJS MIPSDSP-OBJS MSA-OBJS \ - MMI-OBJS LSX-OBJS LASX-OBJS OBJS SLIBOBJS SHLIBOBJS \ - STLIBOBJS HOSTOBJS TESTOBJS + MMI-OBJS LSX-OBJS LASX-OBJS RV-OBJS RVV-OBJS \ + OBJS SLIBOBJS SHLIBOBJS STLIBOBJS HOSTOBJS TESTOBJS define RESET $(1) := diff --git a/configure b/configure index d77c053226..7d2ee66000 100755 --- a/configure +++ b/configure @@ -2154,6 +2154,7 @@ ARCH_EXT_LIST_PPC=" " ARCH_EXT_LIST_RISCV=" +rv rvv " @@ -2679,7 +2680,8 @@ ppc4xx_deps="ppc" vsx_deps="altivec" power8_deps="vsx" -rvv_deps="riscv" +rv_deps="riscv" +rvv_deps="rv" loongson2_deps="mips" loongson3_deps="mips" @@ -6243,6 +6245,7 @@ elif enabled ppc; then elif enabled riscv; then +enabled rv && check_inline_asm rv '".option arch, +zbb\nrev8 t0, t1"' enabled rvv && check_inline_asm rvv '".option arch, +v\nvsetivli zero, 0, e8, m1, ta, ma"' elif enabled x86; then diff --git a/ffbuild/arch.mak b/ffbuild/arch.mak index 39d76ee152..23a3feb090 100644 --- a/ffbuild/arch.mak +++ b/ffbuild/arch.mak @@ -15,6 +15,7 @@ OBJS-$(HAVE_LASX) += $(LASX-OBJS) $(LASX-OBJS-yes) OBJS-$(HAVE_ALTIVEC) += $(ALTIVEC-OBJS) $(ALTIVEC-OBJS-yes) OBJS-$(HAVE_VSX) += $(VSX-OBJS) $(VSX-OBJS-yes) +OBJS-$(HAVE_RV) += $(RV-OBJS) $(RV-OBJS-yes) OBJS-$(HAVE_RVV) += $(RVV-OBJS) $(RVV-OBJS-yes) OBJS-$(HAVE_MMX) += $(MMX-OBJS) $(MMX-OBJS-yes) diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile index 2d0e6c19c8..74381e3648 100644 --- a/libavcodec/riscv/Makefile +++ b/libavcodec/riscv/Makefile @@ -1,14 +1,14 @@ OBJS-$(CONFIG_AAC_DECODER) += riscv/aacpsdsp_init.o riscv/sbrdsp_init.o RVV-OBJS-$(CONFIG_AAC_DECODER) += riscv/aacpsdsp_rvv.o riscv/sbrdsp_rvv.o -OBJS-$(CONFIG_AC3DSP) += riscv/ac3dsp_init.o \ - riscv/ac3dsp_rvb.o +OBJS-$(CONFIG_AC3DSP) += riscv/ac3dsp_init.o +RV-OBJS-$(CONFIG_AC3DSP) +=riscv/ac3dsp_rvb.o OBJS-$(CONFIG_ALAC_DECODER) += riscv/alacdsp_init.o RVV-OBJS-$(CONFIG_ALAC_DECODER) += riscv/alacdsp_rvv.o -OBJS-$(CONFIG_AUDIODSP) += riscv/audiodsp_init.o \ - riscv/audiodsp_rvf.o +OBJS-$(CONFIG_AUDIODSP) += riscv/audiodsp_init.o +RV-OBJS-$(CONFIG_AUDIODSP) += riscv/audiodsp_rvf.o RVV-OBJS-$(CONFIG_AUDIODSP) += riscv/audiodsp_rvv.o -OBJS-$(CONFIG_BSWAPDSP) += riscv/bswapdsp_init.o \ - riscv/bswapdsp_rvb.o +OBJS-$(CONFIG_BSWAPDSP) += riscv/bswapdsp_init.o +RV-OBJS-$(CONFIG_BSWAPDSP) += riscv/bswapdsp_rvb.o RVV-OBJS-$(CONFIG_BSWAPDSP) += riscv/bswapdsp_rvv.o OBJS-$(CONFIG_EXR_DECODER) += riscv/exrdsp_init.o RVV-OBJS-$(CONFIG_EXR_DECODER) += riscv/exrdsp_rvv.o @@ -34,8 +34,8 @@ OBJS-$(CONFIG_LLVIDENCDSP) += riscv/llvidencdsp_init.o RVV-OBJS-$(CONFIG_LLVIDENCDSP) += riscv/llvidencdsp_rvv.o OBJS-$(CONFIG_OPUS_DECODER) += riscv/opusdsp_init.o RVV-OBJS-$(CONFIG_OPUS_DECODER) += riscv/opusdsp_rvv.o -OBJS-$(CONFIG_PIXBLOCKDSP) += riscv/pixblockdsp_init.o \ - riscv/pixblockdsp_rvi.o +OBJS-$(CONFIG_PIXBLOCKDSP) += riscv/pixblockdsp_init.o +RV-OBJS-$(CONFIG_PIXBLOCKDSP) += riscv/pixblockdsp_rvi.o RVV-OBJS-$(CONFIG_PIXBLOCKDSP) += riscv/pixblockdsp_rvv.o OBJS-$(CONFIG_UTVIDEO_DECODER) += riscv/utvideodsp_init.o RVV-OBJS-$(CONFIG_UTVIDEO_DECODER) += riscv/utvideodsp_rvv.o diff --git a/libavcodec/riscv/ac3dsp_init.c b/libavcodec/riscv/ac3dsp_init.c index 20f294f1de..92678ea810 100644 --- a/libavcodec/riscv/ac3dsp_init.c +++ b/libavcodec/riscv/ac3dsp_init.c @@ -29,10 +29,12 @@ void ff_extract_exponents_rvb(uint8_t *exp, int32_t *coef, int nb_coefs);
Re: [FFmpeg-devel] [PATCH] lavc/vc1dsp: R-V V inv_trans
Instruction scheduling could be better, especially on in-order CPUs. > +vzext.vf2 v8, v0 > +vadd.vx v8, v8, t2 > +vmax.vx v8, v8, zero > +vsetvli zero, t0, e8, m4, ta, ma You don't need to reset the AVL here, just pass zero. > +vnclipu.wiv0, v8, 0 > +vsetivli zero, 8, e8, mf2, ta, ma > +vsse64.v v0, (a0), a1 > +ret > +endfunc > + > +func ff_vc1_inv_trans_4x8_dc_rvv, zve32x > +lht2, (a2) > +slli t1, t2, 4 > +add t2, t2, t1 > +addi t2, t2, 4 > +srai t2, t2, 3 > +sh1addt2, t2, t2 > +slli t2, t2, 2 > +addi t2, t2, 64 > +srai t2, t2, 7 > +vsetivli zero, 8, e8, mf2, ta, ma > +vlse32.v v0, (a0), a1 > +lit0, 4*8 > +vsetvli zero, t0, e16, m4, ta, ma > +vzext.vf2 v4, v0 > +vadd.vx v4, v4, t2 > +vmax.vx v4, v4, zero > +vsetvli zero, t0, e8, m2, ta, ma > +vnclipu.wiv0, v4, 0 > +vsetivli zero, 8, e8, mf2, ta, ma > +vsse32.v v0, (a0), a1 > +ret > +endfunc > + > +func ff_vc1_inv_trans_8x4_dc_rvv, zve64x > +lht2, (a2) > +sh1addt2, t2, t2 > +addi t2, t2, 1 > +srai t2, t2, 1 > +slli t1, t2, 4 > +add t2, t2, t1 > +addi t2, t2, 64 > +srai t2, t2, 7 > +vsetivli zero, 8, e8, mf2, ta, ma > +vlse64.v v0, (a0), a1 > +lit0, 8*4 > +vsetvli zero, t0, e16, m4, ta, ma > +vzext.vf2 v4, v0 > +vadd.vx v4, v4, t2 > +vmax.vx v4, v4, zero > +vsetvli zero, t0, e8, m2, ta, ma > +vnclipu.wiv0, v4, 0 > +vsetivli zero, 8, e8, mf2, ta, ma > +vsse64.v v0, (a0), a1 > +ret > +endfunc > + > +func ff_vc1_inv_trans_4x4_dc_rvv, zve32x > +lht2, (a2) > +slli t1, t2, 4 > +add t2, t2, t1 > +addi t2, t2, 4 > +srai t2, t2, 3 > +slli t1, t2, 4 > +add t2, t2, t1 > +addi t2, t2, 64 > +srai t2, t2, 7 > +vsetivli zero, 4, e8, mf2, ta, ma > +vlse32.v v0, (a0), a1 > +lit0, 4*4 > +vsetvli zero, t0, e16, m2, ta, ma vsetivli > +vzext.vf2 v2, v0 > +vadd.vx v2, v2, t2 > +vmax.vx v2, v2, zero > +vsetvli zero, t0, e8, m1, ta, ma > +vnclipu.wiv0, v2, 0 > +vsetivli zero, 4, e8, mf2, ta, ma > +vsse32.v v0, (a0), a1 > +ret > +endfunc > diff --git a/libavcodec/vc1dsp.c b/libavcodec/vc1dsp.c > index 62c8eb21fa..2caa3c6863 100644 > --- a/libavcodec/vc1dsp.c > +++ b/libavcodec/vc1dsp.c > @@ -1039,6 +1039,8 @@ av_cold void ff_vc1dsp_init(VC1DSPContext *dsp) > ff_vc1dsp_init_arm(dsp); > #elif ARCH_PPC > ff_vc1dsp_init_ppc(dsp); > +#elif ARCH_RISCV > +ff_vc1dsp_init_riscv(dsp); > #elif ARCH_X86 > ff_vc1dsp_init_x86(dsp); > #elif ARCH_MIPS > diff --git a/libavcodec/vc1dsp.h b/libavcodec/vc1dsp.h > index 7ed1776ca7..e3b90d2b62 100644 > --- a/libavcodec/vc1dsp.h > +++ b/libavcodec/vc1dsp.h > @@ -89,6 +89,7 @@ void ff_vc1dsp_init(VC1DSPContext* c); > void ff_vc1dsp_init_aarch64(VC1DSPContext* dsp); > void ff_vc1dsp_init_arm(VC1DSPContext* dsp); > void ff_vc1dsp_init_ppc(VC1DSPContext *c); > +void ff_vc1dsp_init_riscv(VC1DSPContext *c); > void ff_vc1dsp_init_x86(VC1DSPContext* dsp); > void ff_vc1dsp_init_mips(VC1DSPContext* dsp); > void ff_vc1dsp_init_loongarch(VC1DSPContext* dsp); -- Rémi Denis-Courmont http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] lavc/vc1dsp: R-V V inv_trans
Le tiistaina 5. joulukuuta 2023, 21.25.12 EET flow gg a écrit : > > This block can be folded into the next. You don't need to check VLENB > > twice. > > Changed. > > > Instruction scheduling could be better, especially on in-order CPUs. > > I put the vload at the front, and then proceeded with the t2 operation, but > I'm not sure... > > > You don't need to reset the AVL here, just pass zero. > > Changed. > > > vsetivli > > Changed. You changed more than I asked for. The immediate AVL is a 5-bit unsigned integer, so it should not be possible to assemble 32 or 64, unless you have a preprocessor that silently rewrites `vsetivli` into `vsetvli` (If so, that sounds very iffy because `vsetivli zero` has no scratch X register to work with). FWIW CanMV-K230 boards are on sale for under 500 RMB. -- レミ・デニ-クールモン http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] fate.sh: Allow overriding what targets to make for running the tests
Le 30 novembre 2023 23:13:59 GMT+02:00, "Martin Storsjö" a écrit : >On Thu, 30 Nov 2023, Rémi Denis-Courmont wrote: > >> You can already test it properly as things stand, and reporting is trivial, >> just not to the FATE website. The question is whether this is worth adding >> to FATE. > >More public test coverage is better than less, isn't it? That's a false dichotomy. >> In other words, is publishing on the FATE website worth making the tests >> coverage and/or the build time worse? > >By making the test coverage worse, you mean if I'd be doing the full testing >of many combinations already, and I'd stop doing that in order to do this >lesser testing instead? If I'd be doing it (I currently don't) I guess that >would be my concern, not others? No. The point is that this is adding a small hack that works for one specific case for a short while (testing Armv8 IMM8 and DP), but is known not to be sufficient anyway (for SVE, PAuth, RVV, etc). In the end, it's all about not adding inadequate interfaces and supporting/publishing bad solutions. It's certainly not as bad as if it were a public C API, but that doesn't make it good. Normally "insufficient" interfaces don't get merged for a variety of reasons. >>> Again, for SVE, I'd rather have testing with 1 config (the default, which >>> is longer vectors than one usually encounters in HW) rather than none at >>> all. It won't catch every theoretical issue but practically would catch >>> many things at least. >> >> I find that statement very misleading. This is not a question of testing 1 >> config vs 0. It's a question of testing 1 configuration vs all of them(*), >> and reporting that one vs reporting all of them elsewhere than >> FATE.ffmpeg.org. Until/unless somebody does the missing integration. > >Currently I test 0 of these configurations. I would like to test 1 such >config, and publish those results on the FATE website. I don't currently test >any form of "all configs". And if I wanted to make a private setup for testing >"all configs", I really don't see how it would be mutually exclusive with the >publicly posted test results from the one config? > >>> And in order to actually test BTI, one has to link with a sysroot that >>> also was built with BTI enabled - I currently use a sysroot extracted from >>> fedora for that. (And my tests for it use -Wl,-z,force-bti.) >> >> I can readily believe how much of a PITA that would be to set up. I can also >> believe that glibc won't allow masking the guarded page bit in mmap()/ >> mprotect(). >> >> That does not mean you need different builds to test each of the 4 possible >> combinations (or 3 if you ignore the case of BTI without PAC, which does not >> exist in real hardware). Once you have that build, you can test it with >> whichever QEMU CPU settings. > >I didn't mean to imply that one would have to do separate builds for all of >those. I currently don't do any testing with builds with >-mbranch-protection=standard (and with different sysroots), but I was >considering adding one such build, with the fedora sysroot - and only test one >single configuration with it (with QEMU's defaults of all features enabled). > > >So, to spell out your objection in simpler terms. You are firmly against >anybody posting test results on FATE that only include checkasm but not the >rest of the tests, because you consider that this can be misleading/confusing >to people reading the test results - is that right? > >Or would such a setup be acceptable to you, if someone would implement a way >of running the tests (either the full set or only a subset such as chckasm) >multiple times with different QEMU configurations, with the same build of >ffmpeg, within the same FATE run? > >// Martin >___ >ffmpeg-devel mailing list >ffmpeg-devel@ffmpeg.org >https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > >To unsubscribe, visit link above, or email >ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] fate.sh: Allow overriding what targets to make for running the tests
Le 1 décembre 2023 09:55:15 GMT+02:00, "Martin Storsjö" a écrit : >On Fri, 1 Dec 2023, Rémi Denis-Courmont wrote: > >> Le 30 novembre 2023 23:13:59 GMT+02:00, "Martin Storsjö" >> a écrit : >>> On Thu, 30 Nov 2023, Rémi Denis-Courmont wrote: >> >>>> In other words, is publishing on the FATE website worth making the tests >>>> coverage and/or the build time worse? >>> >>> By making the test coverage worse, you mean if I'd be doing the full >>> testing of many combinations already, and I'd stop doing that in order to >>> do this lesser testing instead? If I'd be doing it (I currently don't) I >>> guess that would be my concern, not others? >> >> No. The point is that this is adding a small hack that works for one >> specific case for a short while (testing Armv8 IMM8 and DP), but is known >> not to be sufficient anyway (for SVE, PAuth, RVV, etc). > >I'll reiterate the question from the bottom of the mail, that you didn't >respond to. > >Would you be ok with a setup, where a FATE instance optionally can run a >subset of tests instead of the full suite, but run them multiple times with >e.g. different QEMU settings? That would allow repeating checkasm for all the >interesting cases - and if one really wanted to spend a lot of CPU time on it, >also could run the full FATE suite in all those configurations. Being able to run tests under a different runner/wrapper or the same runner with different settings, would be a lot more viable, indeed IMO > >// Martin >___ >ffmpeg-devel mailing list >ffmpeg-devel@ffmpeg.org >https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > >To unsubscribe, visit link above, or email >ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] ac3dsp: RISC-V V float_to_fixed24
Le perjantaina 24. marraskuuta 2023, 0.39.39 EET flow gg a écrit : > Okay, changed src/libavcodec/riscv/ac3dsp_init.c: In function ‘ff_ac3dsp_init_riscv’: src/libavcodec/riscv/ac3dsp_init.c:39:33: warning: assignment to ‘void (*) (int32_t *, const float *, size_t)’ {aka ‘void (*)(int *, const float *, long unsigned int)’} from incompatible pointer type ‘void (*)(int32_t *, const float *, unsigned int)’ {aka ‘void (*)(int *, const float *, unsigned int)’} [- Wincompatible-pointer-types] 39 | c->float_to_fixed24 = ff_float_to_fixed24_rvv; | ^ Also the Makefile precondition is inaccurate. -- 雷米‧德尼-库尔蒙 http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] ac3dsp: RISC-V V float_to_fixed24
Le perjantaina 1. joulukuuta 2023, 20.35.10 EET Rémi Denis-Courmont a écrit : > Le perjantaina 24. marraskuuta 2023, 0.39.39 EET flow gg a écrit : > > Okay, changed > > src/libavcodec/riscv/ac3dsp_init.c: In function ‘ff_ac3dsp_init_riscv’: > src/libavcodec/riscv/ac3dsp_init.c:39:33: warning: assignment to ‘void (*) > (int32_t *, const float *, size_t)’ {aka ‘void (*)(int *, const float *, > long unsigned int)’} from incompatible pointer type ‘void (*)(int32_t *, > const float *, unsigned int)’ {aka ‘void (*)(int *, const float *, unsigned > int)’} [- Wincompatible-pointer-types] >39 | c->float_to_fixed24 = ff_float_to_fixed24_rvv; > > | ^ > > Also the Makefile precondition is inaccurate. Oh, and on C908, LMUL=8 is actually faster than LMUL=4. Generally speaking, you should maximise the LMUL unless there is a *specific* reason not to. -- レミ・デニ-クールモン http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] RISC-V dev kit recommendations
Le perjantaina 1. joulukuuta 2023, 21.44.24 EET Sean McGovern a écrit : > If I wanted to purchase a RISC-V developer kit, does anyone have > suggestions of what to buy? Or even what to steer clear of? As this is FFmpeg-devel, I don't suppose you are looking for a microcontroller. To run Linux, the best option at the moment is the StarFive VisionFive 2. However it lacks all the new fancy extensions such as vectors, virtualisation and/or cryptography. In fact, I am not aware of commercial hardware with either of the later two. The otherwise modest Canaan CanMV-K230 constitutes the only commercial device currently with vectors, including half-precision, at least to my knowledge. However it is currently on back-orders. -- レミ・デニ-クールモン http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] checkasm/ac3dsp: add float_to_fixed24 test
Le torstaina 23. marraskuuta 2023, 9.08.16 EET flow gg a écrit : > You should probably add the test case to tests/fate/checkasm.mak -- レミ・デニ-クールモン http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] ac3dsp: RISC-V V float_to_fixed24
Le torstaina 23. marraskuuta 2023, 1.17.03 EET flow gg a écrit : > Hello, I saw the new commit "avcodec/ac3dsp: make len a size_t in > float_to_fixed24." > > So I removed the part #if (__riscv_xlen == 64) and restored the patch. You're not checking for Zba. Also 'bnez' would be more logical than 'bgtz' for an unsigned counter. -- レミ・デニ-クールモン http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH 3/3] avcodec/ac3dsp: make len a size_t in float_to_fixed24
Le keskiviikkona 22. marraskuuta 2023, 21.49.13 EET James Almer a écrit : > Should simplify asm implementations, and prevent UB on at least win64. > > Signed-off-by: James Almer This one looks good to me, but I am utterly incompetent for the previous two. -- 雷米‧德尼-库尔蒙 http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] lavc/vc1dsp: R-V V inv_trans
Le sunnuntaina 3. joulukuuta 2023, 16.40.08 EET flow gg a écrit : > c910 > vc1dsp.vc1_inv_trans_4x4_dc_c: 84.0 > vc1dsp.vc1_inv_trans_4x4_dc_rvv_i32: 74.0 > vc1dsp.vc1_inv_trans_4x8_dc_c: 150.2 > vc1dsp.vc1_inv_trans_4x8_dc_rvv_i32: 83.5 > vc1dsp.vc1_inv_trans_8x4_dc_c: 129.0 > vc1dsp.vc1_inv_trans_8x4_dc_rvv_i64: 75.7 > vc1dsp.vc1_inv_trans_8x8_dc_c: 254.7 > vc1dsp.vc1_inv_trans_8x8_dc_rvv_i64: 90.5 The code below uses fractional multipliers, so I infer that the benchmarked code was significantly different, and the measurements are not really worth the bother. I know that supply is a problem at the moment, but I if you are going to keep this up, I would hope that ISCAS can get you access to an RVV 1.0 board. In-line... > diff --git a/libavcodec/riscv/vc1dsp_init.c b/libavcodec/riscv/vc1dsp_init.c > new file mode 100644 > index 00..88e0434f0e > --- /dev/null > +++ b/libavcodec/riscv/vc1dsp_init.c > @@ -0,0 +1,47 @@ > +/* > + * Copyright (c) 2023 Institue of Software Chinese Academy of Sciences > (ISCAS). > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > + */ > + > +#include > + > +#include "libavutil/attributes.h" > +#include "libavutil/cpu.h" > +#include "libavutil/riscv/cpu.h" > +#include "libavcodec/vc1.h" > + > +void ff_vc1_inv_trans_8x8_dc_rvv(uint8_t *dest, ptrdiff_t stride, int16_t > *block); > +void ff_vc1_inv_trans_4x8_dc_rvv(uint8_t *dest, ptrdiff_t stride, int16_t > *block); > +void ff_vc1_inv_trans_8x4_dc_rvv(uint8_t *dest, ptrdiff_t stride, int16_t > *block); > +void ff_vc1_inv_trans_4x4_dc_rvv(uint8_t *dest, ptrdiff_t stride, int16_t > *block); > + > +av_cold void ff_vc1dsp_init_riscv(VC1DSPContext *dsp) > +{ > +#if HAVE_RVV > +int flags = av_get_cpu_flags(); > + > +if (flags & AV_CPU_FLAG_RVV_I64) { > +dsp->vc1_inv_trans_8x8_dc = ff_vc1_inv_trans_8x8_dc_rvv; > +dsp->vc1_inv_trans_8x4_dc = ff_vc1_inv_trans_8x4_dc_rvv; > +} > +if (flags & AV_CPU_FLAG_RVV_I32) { > +dsp->vc1_inv_trans_4x8_dc = ff_vc1_inv_trans_4x8_dc_rvv; > +dsp->vc1_inv_trans_4x4_dc = ff_vc1_inv_trans_4x4_dc_rvv; > +} Probably missing VLENB checks. > +#endif > +} > diff --git a/libavcodec/riscv/vc1dsp_rvv.S b/libavcodec/riscv/vc1dsp_rvv.S > new file mode 100644 > index 00..8a6b27192a > --- /dev/null > +++ b/libavcodec/riscv/vc1dsp_rvv.S > @@ -0,0 +1,123 @@ > +/* > + * Copyright (c) 2023 Institue of Software Chinese Academy of Sciences > (ISCAS). > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > + */ > + > +#include "libavutil/riscv/asm.S" > + > +func ff_vc1_inv_trans_8x8_dc_rvv, zve64x > +lht2, (a2) > +lit1, 3 > +mul t2, t2, t1 You can multiply by 3, 5 or 9 with shift-and-add. By 12 with shift-and-add then shift, and by 17 with shift then add. You don't need multiplications. > +addi t2, t2, 1 > +srai t2, t2, 1 > +mul t2, t2, t1 > +addi t2, t2, 16 > +srai t2, t2, 5 > +vsetivli zero, 8, e8, mf2, ta, ma > +vlse64.v v0, (a0), a1 > +lit0, 8*8 > +vsetvli zero, t0, e16, m8, ta, ma > +vmv.v.x v8, t2 Do you really need to splat? Can't .vx or .wx be used instead? > +vsetvli zero, t0, e8, m4, ta, ma > +vwaddu.wv v8, v8, v0 > +vsetvli zero, t0, e16, m8, ta, ma > +vmax.vx
Re: [FFmpeg-devel] [PATCH] lavc/ac3: add R-V Zbb extract_exponents
Le 3 décembre 2023 19:50:18 GMT+02:00, Zhao Zhili a écrit : > > >> On Oct 3, 2023, at 00:47, Rémi Denis-Courmont wrote: >> >> >> diff --git a/libavcodec/riscv/ac3dsp_rvb.S b/libavcodec/riscv/ac3dsp_rvb.S >> new file mode 100644 >> index 00..48f8bb101e >> --- /dev/null >> +++ b/libavcodec/riscv/ac3dsp_rvb.S >> >> +func ff_extract_exponents_rvb, zbb >> +1: >> +lw t0, (a1) >> +addi a0, a0, 1 >> +neg t1, t0 >> +addi a1, a1, 4 >> +max t0, t0, t1 >> +addi a2, a2, -1 >> +clz t0, t0 >> +addi t0, t0, 24 - __riscv_xlen >> +sb t0, -1(a0) >> +bgtza2, 1b >> + >> +ret >> +endfunc >> — > >Got build failure with clang 14: > >:6:21: warning: unknown option, expected 'push', 'pop', 'rvc', >'norvc', 'relax' or 'norelax' >.option arch, +zbb >^ >src/libavcodec/riscv/ac3dsp_rvb.S:24:1: note: while in macro instantiation >func ff_extract_exponents_rvb, zbb >^ >src/libavcodec/riscv/ac3dsp_rvb.S:30:9: error: instruction requires the >following: 'Zbb' (Basic Bit-Manipulation) >max t0, t0, t1 >^ >src/libavcodec/riscv/ac3dsp_rvb.S:32:9: error: instruction requires the >following: 'Zbb' (Basic Bit-Manipulation) >clz t0, t0 >^ >make: *** [/home/quink/work/ffmpeg/ffbuild/common.mak:93: >libavcodec/riscv/ac3dsp_rvb.o] Error 1 >make: *** Waiting for unfinished jobs >:6:21: warning: unknown option, expected 'push', 'pop', 'rvc', >'norvc', 'relax' or 'norelax' >.option arch, +f >^ >src/libavcodec/riscv/audiodsp_rvf.S:23:1: note: while in macro instantiation >func ff_vector_clipf_rvf, f >^ > >Someone says clang 14 has Zbb extensions support. I don’t know what’s going on. It's not practical to support such a broken assembler as LLVM built-in's until they get their act together. You can add tests in FFmpeg configure but that's just going to turn all optimisations off. You could also disable the integrated assembler and use binutils, but then you'll hit the limitation of FFmpeg's configure whereby it tests the inline assembler rather than the outline one. So really you're better off with GCC. RISC-V support on LLVM is pretty sad, TBH. ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] lavc/ac3: add R-V Zbb extract_exponents
Le 3 décembre 2023 19:50:18 GMT+02:00, Zhao Zhili a écrit : > > >> On Oct 3, 2023, at 00:47, Rémi Denis-Courmont wrote: >> >> >> diff --git a/libavcodec/riscv/ac3dsp_rvb.S b/libavcodec/riscv/ac3dsp_rvb.S >> new file mode 100644 >> index 00..48f8bb101e >> --- /dev/null >> +++ b/libavcodec/riscv/ac3dsp_rvb.S >> >> +func ff_extract_exponents_rvb, zbb >> +1: >> +lw t0, (a1) >> +addi a0, a0, 1 >> +neg t1, t0 >> +addi a1, a1, 4 >> +max t0, t0, t1 >> +addi a2, a2, -1 >> +clz t0, t0 >> +addi t0, t0, 24 - __riscv_xlen >> +sb t0, -1(a0) >> +bgtza2, 1b >> + >> +ret >> +endfunc >> — > >Got build failure with clang 14: > >:6:21: warning: unknown option, expected 'push', 'pop', 'rvc', >'norvc', 'relax' or 'norelax' >.option arch, +zbb >^ >src/libavcodec/riscv/ac3dsp_rvb.S:24:1: note: while in macro instantiation >func ff_extract_exponents_rvb, zbb >^ >src/libavcodec/riscv/ac3dsp_rvb.S:30:9: error: instruction requires the >following: 'Zbb' (Basic Bit-Manipulation) >max t0, t0, t1 >^ >src/libavcodec/riscv/ac3dsp_rvb.S:32:9: error: instruction requires the >following: 'Zbb' (Basic Bit-Manipulation) >clz t0, t0 >^ >make: *** [/home/quink/work/ffmpeg/ffbuild/common.mak:93: >libavcodec/riscv/ac3dsp_rvb.o] Error 1 >make: *** Waiting for unfinished jobs >:6:21: warning: unknown option, expected 'push', 'pop', 'rvc', >'norvc', 'relax' or 'norelax' >.option arch, +f >^ >src/libavcodec/riscv/audiodsp_rvf.S:23:1: note: while in macro instantiation >func ff_vector_clipf_rvf, f >^ > >Someone says clang 14 has Zbb extensions support. I don’t know what’s going on. Forgot the explanation: the problem seems to be the arch option, not the Zbb extension. I think you can get around it with -no-integrated-as, but then... what I said in the other mail. Br, ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH v2 7/7] avcodec: add AV_CODEC_FLAG_CLEAR
Hi, Le 8 décembre 2023 00:47:13 GMT+02:00, Marton Balint a écrit : > > >On Thu, 7 Dec 2023, Anton Khirnov wrote: > >> Quoting Ronald S. Bultje (2023-12-07 02:44:36) >>> Hi, >>> >>> On Wed, Dec 6, 2023 at 3:23 AM Marton Balint wrote: >>> >>> > Signed-off-by: Marton Balint >>> > --- >>> > doc/APIchanges | 3 +++ >>> > doc/codecs.texi| 14 ++ >>> > libavcodec/avcodec.h | 4 >>> > libavcodec/decode.c| 6 ++ >>> > libavcodec/options_table.h | 1 + >>> > libavcodec/version.h | 2 +- >>> > 6 files changed, 29 insertions(+), 1 deletion(-) >>> > >>> > diff --git a/doc/APIchanges b/doc/APIchanges >>> > index 416e2bec5e..f839504a64 100644 >>> > --- a/doc/APIchanges >>> > +++ b/doc/APIchanges >>> > @@ -2,6 +2,9 @@ The last version increases of all libraries were on >>> > 2023-02-09 >>> > >>> > API changes, most recent first: >>> > >>> > +2023-12-xx - xxx - lavc 60.36.100 - avcodec.h >>> > + Add AV_CODEC_FLAG_CLEAR. >>> > + >>> > 2023-12-xx - xxx - lavu 58.33.100 - imgutils.h >>> >Add av_image_fill_color() >>> > >>> > diff --git a/doc/codecs.texi b/doc/codecs.texi >>> > index 5b950b4560..0504a535f2 100644 >>> > --- a/doc/codecs.texi >>> > +++ b/doc/codecs.texi >>> > @@ -76,6 +76,20 @@ Apply interlaced motion estimation. >>> > Use closed gop. >>> > @item output_corrupt >>> > Output even potentially corrupted frames. >>> > +@item clear >>> > +Clear the contents of the video buffer before decoding the next picture >>> > to it. >>> > + >>> > +Usually if only a part of a picture is affected by a decode error then >>> > the >>> > +decoder (if it implements error concealment) tries to hide it by >>> > interpolating >>> > +pixels from neighbouring areas or in some cases from the previous frame. >>> > Even >>> > +without error concealment it is quite likely that the affected area will >>> > +contain pixels from an earlier frame, due to frame pooling. >>> > >>> >>> No comment on the patch itself, but wouldn't our users (and the C standard >>> itself) consider it a security issue to return stale >> >> I don't see the security issue in returning previously-returned frame >> data. > >I guess what Ronald means that it is possible that the decoder frame pool >allocates data in heap previously containing sensitive data, and that might >never get overwritten in case of faulty input before passing it to the user. > >The simple fix for that is to clear frame pool buffers on creation? > >I am not sure if it is actually UB to read uninitialzied data from the heap >though. Reading uninitialised data is UB if the type representation is not surjective (e.g. bool, and potentially compound types with padding). Of course there are all sorts of other problems that could indirectly cause UB such as implicitly assuming that an integer fits a certain range and triggering an undefined overflow otherwise. > >Regards, >Marton >___ >ffmpeg-devel mailing list >ffmpeg-devel@ffmpeg.org >https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > >To unsubscribe, visit link above, or email >ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] lavc/lpc: R-V V apply_welch_window
apply_welch_window_even_c: 617.5 apply_welch_window_even_rvv_f64: 235.0 apply_welch_window_odd_c:709.0 apply_welch_window_odd_rvv_f64: 256.5 --- libavcodec/lpc.c| 4 +- libavcodec/lpc.h| 1 + libavcodec/riscv/Makefile | 2 + libavcodec/riscv/lpc_init.c | 37 libavcodec/riscv/lpc_rvv.S | 88 + 5 files changed, 131 insertions(+), 1 deletion(-) create mode 100644 libavcodec/riscv/lpc_init.c create mode 100644 libavcodec/riscv/lpc_rvv.S diff --git a/libavcodec/lpc.c b/libavcodec/lpc.c index dc6a3060ce..9e2fd0f128 100644 --- a/libavcodec/lpc.c +++ b/libavcodec/lpc.c @@ -320,7 +320,9 @@ av_cold int ff_lpc_init(LPCContext *s, int blocksize, int max_order, s->lpc_apply_welch_window = lpc_apply_welch_window_c; s->lpc_compute_autocorr = lpc_compute_autocorr_c; -#if ARCH_X86 +#if ARCH_RISCV +ff_lpc_init_riscv(s); +#elif ARCH_X86 ff_lpc_init_x86(s); #endif diff --git a/libavcodec/lpc.h b/libavcodec/lpc.h index 467d0b2830..0200baea5c 100644 --- a/libavcodec/lpc.h +++ b/libavcodec/lpc.h @@ -109,6 +109,7 @@ double ff_lpc_calc_ref_coefs_f(LPCContext *s, const float *samples, int len, */ int ff_lpc_init(LPCContext *s, int blocksize, int max_order, enum FFLPCType lpc_type); +void ff_lpc_init_riscv(LPCContext *s); void ff_lpc_init_x86(LPCContext *s); /** diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile index e9825c0856..1d4572fbc5 100644 --- a/libavcodec/riscv/Makefile +++ b/libavcodec/riscv/Makefile @@ -33,6 +33,8 @@ OBJS-$(CONFIG_LLVIDDSP) += riscv/llviddsp_init.o RVV-OBJS-$(CONFIG_LLVIDDSP) += riscv/llviddsp_rvv.o OBJS-$(CONFIG_LLVIDENCDSP) += riscv/llvidencdsp_init.o RVV-OBJS-$(CONFIG_LLVIDENCDSP) += riscv/llvidencdsp_rvv.o +OBJS-$(CONFIG_LPC) += riscv/lpc_init.o +RVV-OBJS-$(CONFIG_LPC) += riscv/lpc_rvv.o OBJS-$(CONFIG_OPUS_DECODER) += riscv/opusdsp_init.o RVV-OBJS-$(CONFIG_OPUS_DECODER) += riscv/opusdsp_rvv.o OBJS-$(CONFIG_PIXBLOCKDSP) += riscv/pixblockdsp_init.o diff --git a/libavcodec/riscv/lpc_init.c b/libavcodec/riscv/lpc_init.c new file mode 100644 index 00..c16e5745f0 --- /dev/null +++ b/libavcodec/riscv/lpc_init.c @@ -0,0 +1,37 @@ +/* + * Copyright © 2022 Rémi Denis-Courmont. + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" + +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavcodec/lpc.h" + +void ff_lpc_apply_welch_window_rvv(const int32_t *, ptrdiff_t, double *); + +av_cold void ff_lpc_init_riscv(LPCContext *c) +{ +#if HAVE_RVV && (__riscv_xlen >= 64) +int flags = av_get_cpu_flags(); + +if ((flags & AV_CPU_FLAG_RVV_F64) && (flags & AV_CPU_FLAG_RVB_ADDR)) +c->lpc_apply_welch_window = ff_lpc_apply_welch_window_rvv; +#endif +} diff --git a/libavcodec/riscv/lpc_rvv.S b/libavcodec/riscv/lpc_rvv.S new file mode 100644 index 00..2bc729d400 --- /dev/null +++ b/libavcodec/riscv/lpc_rvv.S @@ -0,0 +1,88 @@ +/* + * Copyright © 2023 Rémi Denis-Courmont. + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/riscv/asm.S" + +#if __riscv_xlen >= 64 +func ff_lpc_apply_welch_window_rvv, zve64d +vsetvli t0, zero, e64, m8, ta, ma +vid.v v0 +addit2, a1, -1 +vfcvt.f.xu.v v0, v0 +li t3, 2 +fcvt.d.l ft2, t2 +srait1, a1, 1 +fcvt.d.l ft3, t3 +li t4,
Re: [FFmpeg-devel] [PATCH] checkasm: add test for dcmul_add
Le 26 novembre 2023 22:54:28 GMT+02:00, flow gg a écrit : >This is a bit confusing for me.. I tried pulling the latest code, and then >used `git am checkasm-test-for-dcmul_add.patch` without any patch >corruption. Did you try with the actual sent email or only with the original patch file? ___ >> ffmpeg-devel mailing list >> ffmpeg-devel@ffmpeg.org >> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel >> >> To unsubscribe, visit link above, or email >> ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". >> >___ >ffmpeg-devel mailing list >ffmpeg-devel@ffmpeg.org >https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > >To unsubscribe, visit link above, or email >ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] fate.sh: Allow overriding what targets to make for running the tests
Le maanantaina 27. marraskuuta 2023, 14.31.18 EET Martin Storsjö a écrit : > This can be useful if doing testing of uncommon CPU extensions by > running tests with QEMU (by configuring with e.g. > "target_exec=qemu-aarch64"), by only running the checkasm tests, > to get a reasonable test coverage without excessive test runtime. For the purpose of testing future or bleeding-edge CPU extensions on emulator, you would normally want to be able to actually filter those in. That is more of a matter of patching checkasm than FATE. Considering the poor coverage of checkasm, I fear that this just gives the wrong impression, not to say a false sense of security. It feels misleading to encourage or support that paradigm into FATE, in light of that poor coverage. Afterall, if it's just about running checkasm, anybody can just run `make tests/checkasm/checkasm && tests/checkasm/checkasm`. Either way, this feels like a case of cart before horse. Also FWIW, RV broke due to misaligned accesses and illegal vector types that QEMU tolerated. That is rather an argument against QEMU than against this MR but still. -- Rémi Denis-Courmont http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] fate.sh: Allow overriding what targets to make for running the tests
Le 28 novembre 2023 01:22:14 GMT+02:00, Michael Niedermayer a écrit : >On Mon, Nov 27, 2023 at 05:46:40PM +0200, Rémi Denis-Courmont wrote: >[...] >> Also FWIW, RV broke due to misaligned accesses and illegal vector types that >> QEMU tolerated. That is rather an argument against QEMU than against this MR >> but still. > >has someone reported this to qemu ? >(seems like a bug) It's not a bug. The specification leaves those cases *undefined*. QEMU supports them because they can, and adding sanity checks would just slow stuff down. Also generally QEMU TCG policy seems to be maximize perf and compatibility, not formal correctness. > >thx > >[...] ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] MAINTAINERS: remove myself from FFmpeg
Le torstaina 7. joulukuuta 2023, 10.59.06 EET Nicolas George a écrit : > Jean-Baptiste Kempf (12023-12-07): > > Why? > > Because after twelve years libav has finally managed to take control and > FFmpeg is now essentially dead. The question was for Paul. Even if you take Anton's knee-jerk threats of reverts as LibAV-think, they were but the last straw (Paul wrote as much). You have had heated arguments against Paul in recent times too. You have also argued a lot of exercising your review privileges, which sounds like a very libavish notion to me - a LibAV notion that made into written down FFmpeg project rules. As a matter of fact, regardless of who was right or wrong, and whence, I can only _observe_ that Paul did complain specifically about you on the IRC channel. To be fair, he also abundantly abused JB there, although I do not know how much of it was sarcasm vs actual attack. But in any case, by that same logic, you could also be "thank[ed] for your contribution in this". In my opinion, this would be unfair to you, and accordingly, you are being unfair to whomever you designate by "libav". (For the sake of utmost clarity, I am not so vain as to consider myself a part of the former project known as libav with however few contributions I made thereto.) > Thank you for your contribution in this. @CC: Yes, that second sentence can be construed as an ad hominem against Anton. Feel free to ban me. -- 雷米‧德尼-库尔蒙 http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] fate.sh: Allow overriding what targets to make for running the tests
Le 27 novembre 2023 23:55:18 GMT+02:00, "Martin Storsjö" a écrit : >On Mon, 27 Nov 2023, Rémi Denis-Courmont wrote: > >> Le maanantaina 27. marraskuuta 2023, 14.31.18 EET Martin Storsjö a écrit : >>> This can be useful if doing testing of uncommon CPU extensions by >>> running tests with QEMU (by configuring with e.g. >>> "target_exec=qemu-aarch64"), by only running the checkasm tests, >>> to get a reasonable test coverage without excessive test runtime. >> >> For the purpose of testing future or bleeding-edge CPU extensions on >> emulator, you would normally want to be able to actually filter those in. >> That is more of a matter of patching checkasm than FATE. > >Sorry, can you elaborate on what you mean with "filter those in" here? You're running all checkasm tests, not just those that require the emulator. But what's potentially much worse is that you're triggering a whole build, or it's not entirely clear from the description how you'd reuse an existing build. For Armv8, that's just bad. For RV, that's terrible, as we need to run the same checkasm with different emulator configuration (different $QEMU_CPU in the case of QEMU): one per vector length. Armv9 will potentially have the same problem if FFmpeg grows SVE(2) support. > >> Considering the poor coverage of checkasm, I fear that this just gives the >> wrong impression, not to say a false sense of security. It feels misleading >> to encourage or support that paradigm into FATE, in light of that poor >> coverage. Afterall, if it's just about running checkasm, anybody can just >> run `make tests/checkasm/checkasm && tests/checkasm/checkasm`. > >Yes, anybody can run that - but having those results posted continuously >somewhere where other can see them can be valuable as well. > >Anyway, the concrete case I'm considering, is that we've got AArch64 code >merged, that uses the I8MM extensions. We don't have any FATE configuration >that continuously test that. Whenever there are patches, I do spin up a cloud >instance that supports this extension and test the patches there, but >inbetween that we're pretty much blind. > >While checkasm's coverage isn't fantastic, for this particular case I'm not >merging any AArch64 code for new extensions unless that code is covered by >checkasm. > >The other AArch64 feature that we do have code for, which also is untested, is >the assembly support for branch protection and pointer authentication. Also >this is testable pretty easily with QEMU. It's of course more interesting to >run the full fate suite, but if we're not looking for bugs in the compiler but >only for bugs in our assembly, then checkasm should cover most of it. > >Yes there's potential for QEMU bugs hiding real issues, but I'd rather have a >regular run of QEMU+checkasm than not have it tested at all. And I'm >volunteering HW+time for testing these cases with QEMU for whatever checkasm >covers, but I'm not volunteering it for full fate runs with QEMU. > >And sure, I can just run such configs privately, as I already do run a bunch >of various regular builds for projects I care about - but as we do have FATE >with the public status page, hooking it up to be reported there would feel >like added value for everybody. > >// Martin >___ >ffmpeg-devel mailing list >ffmpeg-devel@ffmpeg.org >https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > >To unsubscribe, visit link above, or email >ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] fate.sh: Allow overriding what targets to make for running the tests
Le torstaina 30. marraskuuta 2023, 17.34.31 EET Martin Storsjö a écrit : > Yeah, I wouldn't reuse an existing build here. For the setup I have in > mind, one build doesn't take too horribly long (either on an old desktop > x86 machine, or a moderate aarch64 server) - so it's not ideal but not a > dealbreaker anyway (while running all of fate with qemu takes one > magnitude longer). Well it's pretty much a deal breaker for Armv9 and RV. I can understand wanting to build on a comfy x86 server, but doing different builds just to change QEMU CPU flags is IMO inept. Sure, we could just build once and run several times checkasm with a separate script, as I already pointed out. But then this patch is completely unnecessary. > For the other setup I intended to test, to test AArch64 PAC and BTI, I > would do a separate build with -mbranch-protection=standard anyway. That does not make much sense to me. PAC and BTI should be enabled by default in compatibility mode (for ARMv8.0-8.2 builds) or noncompatibility mode (for ARMv8.3+ builds). The resulting code should be tested with and without PAC and with and without BTI. Separate builds only might make sense if you want to do something more fancy with PAC, requiring the non-HINT instructions, but then that is beyond "standard" branch protection. For BTI, there are no reasons whatsoever to make separate builds; it's a literal waste of time and energy. -- レミ・デニ-クールモン http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH 1/2] checkasm: test for abs_pow34
Le tiistaina 28. marraskuuta 2023, 18.59.38 EET flow gg a écrit : > Since nobody else commented, I shall note that you should probably split the underlying lavc changes into a separate preliminary patch. -- レミ・デニ-クールモン http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] fate.sh: Allow overriding what targets to make for running the tests
Le tiistaina 28. marraskuuta 2023, 16.21.55 EET Michael Niedermayer a écrit : > On Tue, Nov 28, 2023 at 09:27:08AM +0200, Rémi Denis-Courmont wrote: > > Le 28 novembre 2023 01:22:14 GMT+02:00, Michael Niedermayer a écrit : > > >On Mon, Nov 27, 2023 at 05:46:40PM +0200, Rémi Denis-Courmont wrote: > > >[...] > > > > > >> Also FWIW, RV broke due to misaligned accesses and illegal vector types > > >> that QEMU tolerated. That is rather an argument against QEMU than > > >> against this MR but still. > > > > > >has someone reported this to qemu ? > > >(seems like a bug) > > > > It's not a bug. The specification leaves those cases *undefined*. QEMU > > supports them because they can, and adding sanity checks would just slow > > stuff down. > > > > Also generally QEMU TCG policy seems to be maximize perf and > > compatibility, not formal correctness. > I think i read somewhere that recent qemu supposedly checks alignment on arm > more completely. But i couldnt quickly find a official statement about that As of 8.2.0-rc2, it most definitely does not: 8< static inline void gen_check_sp_alignment(DisasContext *s) { /* The AArch64 architecture mandates that (if enabled via PSTATE * or SCTLR bits) there is a check that SP is 16-aligned on every * SP-relative load or store (with an exception generated if it is not). * In line with general QEMU practice regarding misaligned accesses, * we omit these checks for the sake of guest program performance. * This function is provided as a hook so we can more easily add these * checks in future (possibly as a "favour catching guest program bugs * over speed" user selectable option). */ } >8 And this is an actual violation of the specification. In the RISC-V case, QEMU is not even violating the specification, just making a different choice than the only one currently commercially available hardware implementation. > But either way, qemu could emit such code optionally when it is used for > testing. Which is one of the things people use qemu for. That would be very true for system mode "soft-MMU" QEMU, but much more questionable for user mode. In any case, I don't make their policies. > So IMHO it would make sense for qemu to detect cases that are undefined > even if for no other reason than to emulate the hw more exactly. I would agree that optional flags would be sensible. But TBH, we don't even yet know how the IPs from other vendors than Alibaba/T-Head will behave. > If this is not done, qemu can be detected and code could refuse or > fail to run -- レミ・デニ-クールモン http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] fate.sh: Allow overriding what targets to make for running the tests
Le torstaina 30. marraskuuta 2023, 18.28.39 EET Martin Storsjö a écrit : > On Thu, 30 Nov 2023, Rémi Denis-Courmont wrote: > > Le torstaina 30. marraskuuta 2023, 17.34.31 EET Martin Storsjö a écrit : > >> Yeah, I wouldn't reuse an existing build here. For the setup I have in > >> mind, one build doesn't take too horribly long (either on an old desktop > >> x86 machine, or a moderate aarch64 server) - so it's not ideal but not a > >> dealbreaker anyway (while running all of fate with qemu takes one > >> magnitude longer). > > > > Well it's pretty much a deal breaker for Armv9 and RV. I can understand > > wanting to build on a comfy x86 server, but doing different builds just to > > change QEMU CPU flags is IMO inept. > > Yes. But for doing one single run with QEMU, I don't mind. You can already test it properly as things stand, and reporting is trivial, just not to the FATE website. The question is whether this is worth adding to FATE. In other words, is publishing on the FATE website worth making the tests coverage and/or the build time worse? not to mention confusing the existing website users with weirdly incomplete test results. > Again, for SVE, I'd rather have testing with 1 config (the default, which > is longer vectors than one usually encounters in HW) rather than none at > all. It won't catch every theoretical issue but practically would catch > many things at least. I find that statement very misleading. This is not a question of testing 1 config vs 0. It's a question of testing 1 configuration vs all of them(*), and reporting that one vs reporting all of them elsewhere than FATE.ffmpeg.org. Until/unless somebody does the missing integration. (*) at least those that QEMU supports > Are you volunteering to write FATE integration to run checkasm multiple > times with different QEMU settings, so I can wait for that instead of > having much improved public test coverage right now? Of course I will not volunteer, given that the RISE project already has an outstanding RfP which will likely require this done professionally: https://hubs.la/Q029hwpS0 (That does not mean that I would have volunteered otherwise, just that the question is moot as far as I am concerned and for the time being.) > > Sure, we could just build once and run several times checkasm with a > > separate script, as I already pointed out. But then this patch is > > completely unnecessary. > > Indeed, that's trivial to do for a private testing setup. > > >> For the other setup I intended to test, to test AArch64 PAC and BTI, I > >> would do a separate build with -mbranch-protection=standard anyway. > > > > That does not make much sense to me. PAC and BTI should be enabled by > > default in compatibility mode (for ARMv8.0-8.2 builds) or > > noncompatibility mode (for ARMv8.3+ builds). > > Maybe it should - but it currently isn't. That's really up to whoever set up the AArch64 builds to fix their build flags TBH (I believe that the assembler is already sorted). And at least for PAuth, that should be sufficient, as support from the C runtime is not required. > And in order to actually test BTI, one has to link with a sysroot that > also was built with BTI enabled - I currently use a sysroot extracted from > fedora for that. (And my tests for it use -Wl,-z,force-bti.) I can readily believe how much of a PITA that would be to set up. I can also believe that glibc won't allow masking the guarded page bit in mmap()/ mprotect(). That does not mean you need different builds to test each of the 4 possible combinations (or 3 if you ignore the case of BTI without PAC, which does not exist in real hardware). Once you have that build, you can test it with whichever QEMU CPU settings. Surely Fedora, of all distros, is not going to treat Armv8.5-BTI as a distinct arch from AArch64 whilst Arm made sure it was both backward-compatible and runtime-tunable. -- レミ・デニ-クールモン http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] lavc/vc1dsp: R-V V inv_trans
Le maanantaina 4. joulukuuta 2023, 10.48.56 EET flow gg a écrit : > > Probably missing VLENB checks. > > Changed. > > > You can multiply by 3, 5 or 9 with shift-and-add. By 12 with shift-and-add > > then shift, and by 17 with shift then add. You don't need multiplications. > > Changed. > > > Do you really need to splat? Can't .vx or .wx be used instead? > > Okay, for example in ff_vc1_inv_trans_8x8_dc_rvv > > + vsetvli zero, t0, e8, m2, ta, ma > + vwaddu.vxv4, v0, zero > + vsetvli zero, t0, e16, m4, ta, ma > + vadd.vx v4, v4, t2 > - vsetvli zero, t0, e16, m4, ta, ma > - vmv.v.x v4, t2 > - vsetvli zero, t0, e8, m2, ta, ma > - vwaddu.wvv4, v4, v0 > > But the speed has slowed down slightly on the c910, > I'm not sure if I should modify it. OK, unfortunately, there is no widening addition with wide scalar operand. But you can do zero-extension then addition here. In the end, I doubt that you can reasonably optimise whilst working with a C910-based board. This function deviates too much on non-conformant hardware. -- レミ・デニ-クールモン http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] Sovereign Tech Fund
Le sunnuntaina 28. tammikuuta 2024, 5.25.49 EET Michael Niedermayer a écrit : > Please read the following to get a better understanding what STF is about: > (In short it is about maintenance and sustainability, not features) > https://www.sovereigntechfund.de/programs/applications > > As some probably already know, Thilo has worked with STF to work out > many details of this. SPI will handle the financials for FFmpeg. As anybody who's been following FFmpeg-devel knows, people have pointed out SPI seems like a poor choice of vehicle for that sort of commission. I won't repeat the arguments that were already made in the second half of last year. But I will add a few comments... > Everyone willing to benefit from this sponsorship must not be a US sanctioned > entity or in a US sanctioned country. In other words, the choice of a US vehicle is excluding people who are, or fear that they may be affected by US sanctions. Some active developers are associated with, for example, the Chinese Academy of Science, Huawei Technologies or other Chinese IT R entites. This is discriminatory, and thus something that an open-source project should actively seek to *avoid*. German government funding should go to German or at least EU-based entities if only for that reason. In other words, by going through SPI, Thilo is *unnecessarily* bringing ugly politics into an open-source project. (And please don't shoot the messenger here.) > At this point, what we need is a list of Projects so we can submit an > application to STF at or before 12th Feb. (at the 14th they have a meeting > and will review our submission) What STF told us, they need ATM is: The "selection criteria" seem rather restrictive. It seems that critical tasks such as long-term maintainance (Anton) and security fixes (you) are in scope. Though I can only agree with Kieran that SoW is ill-suited for tasks of the sort. If SPI insists on SoW, which is somewhat understandable from their legal and moral standpoint, then that is another reason why SPI should not, or maybe, cannot, be the vehicle. By stretching the criteria a little, maybe reasonably expected external or normative updates are also in scope, like say implementing optimisations for new ISA extensions or new codec profiles. But implementing entirely new features seems unambiguously excluded, especially if competing with existing open-source projects. Prototypes are also *explicitly* excluded. So for the sake of the argument, reimplementing X264, dav1d or GNU/radio functionality in FFmpeg seems like it would not qualify. I am not a lawyer, but there may be nontrivial legal implications for SPI and the contractees here. Note that I do not mean to argue against the restrictions here. They make perfect sense considering that this funding would ultimately come from the German tax payers. (...) > My suggestion is that we create a Trac WIKI page similar to the ideas > page for GSoC. > On that page everyone can add a project idea. > The requirement is that > 1. it must fit in the goals and mission and all of STF > 2. it must be about FFmpeg (IIUC non coding tasks are ok) IIUC, they are *not* OK, unless they are a dependency of a coding task: | Development is our primary focus, although security audits, conference | attendance, and other community-based events can be included in the | application should they be necessary. -- レミ・デニ-クールモン http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH 2/4] lavc/rv34dsp: R-V V rv34_inv_transform_dc
Hi, I think this breaks the build for RV32, and it lacks checks for the vector length. Also fractional multipler should never be smaller than the ratio of the specified element size to the largest element size used in the function. Here it is largelly inconsequential, but for instance "e32, mf4" and "e64, mf2" are invalid. -- 雷米‧德尼-库尔蒙 http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] Subject: [PATCH 3/3] lavc/dnxhdenc: R-V V get_pixels_8x4_sym
Hi, +/* + * Copyright (c) 2023 Institue of Software Chinese Academy of Sciences (ISCAS). + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/riscv/asm.S" + +func ff_get_pixels_8x4_sym_rvv, zve64x +vsetivlizero, 8, e8, mf2, ta, ma +vlse64.vv16, (a1), a2 +li t0, 8 * 8 +vsetvli zero, t0, e16, m4, ta, ma +vzext.vf2 v8, v16 +vse16.v v8, (a0) +li a2, 8*2 That's not needed. You can use immediate values. +vsetivlizero, 2, e8, mf8, ta, ma +addia1, a0, 48 +addia0, a0, 32*2 +vle64.v v0, (a1) +vse64.v v0, (a0) +sub a1, a1, a2 +vle64.v v0, (a1) +add a0, a0, a2 +vse64.v v0, (a0) +sub a1, a1, a2 +vle64.v v0, (a1) +add a0, a0, a2 +vse64.v v0, (a0) +sub a1, a1, a2 +vle64.v v0, (a1) +add a0, a0, a2 +vse64.v v0, (a0) You can reorder to avoid immediate data dependencies on the addresses. I expect that it would be faster to make one large load, and then 4 small stores, but that might work only for exactly 128-bit vectors? In any case, you need to check the vector length in init. + +ret +endfunc -- 雷米‧德尼-库尔蒙 http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] Sovereign Tech Fund
Le maanantaina 29. tammikuuta 2024, 19.27.14 EET Michael Niedermayer a écrit : > Also FFmpeg has been part of Google summer of code for many many years > and also in the past in outreachy. All these projects payed "students" > for work they did. > From a legal point of view, these are probably very similar > > Mysteriously, there was a total absence of similar drama there. > I wonder how it could have been possible to do that for over a decade > with not one instance of drama or problems like here. Google funding GSoC students to work on FFmpeg. And nobody objected agains the core idea of STF funding developers to work on FFmpeg. The "drama" is about how and through whom the funding goes. That drama couldn't be had for GSoC because how was however Google decides, and there was no intermediary to go through (money went straight from Google to the students). -- レミ・デニ-クールモン http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] Sovereign Tech Fund
Le maanantaina 29. tammikuuta 2024, 20.11.19 EET Michael Niedermayer a écrit : > > The "drama" is about how and through whom the funding goes. > > ok, elaborate please > > All FFmpeg money has always been handled through SPI or associated entities It was already a bit of a stretch to compare GSoC students with (hypothetical) STF subcontractors. So sorry but I simply don't think that the funding for mentors is comparable at all. In fact, it seems completely normal for the GSoC mentor funding to go via open-source foundations, and other GSoC projects presumably operate the same way. > Its under the control of the community and its transparent You always have the control of the community at the time of review and merge. You can argue all you want that more open is better. What I see is that this more open is already turning into a train wreck (as predicted last year). > And very important what do you propose ? We already went through this in the previous thread last year. This is not going to work in the light of what Jonatas politely calls FFmpeg "governance" challenges. It was already clear that finding agreement within the GA would be at best very difficult and untimely. People (including myself) already suggested to arrange that sort of things via an IT service company (*not* necessarily FFlabs). Or you could even go through a "porting" company in your country if you can't find an existing agreeable company and don't want to register your own. Of course those are not perfect solutions but they seem far less fraught with problems than going through a foundation, especially a US-based foundation. You can review the archives for details. And it certainly does not help that this only became public so late in the process, which is intrinsically suspicious. > Should we reject the maybe 200k € grant we could get from STF now ? Again, nobody objected to getting funding from STF as such. > > That drama > > couldn't be had for GSoC because how was however Google decides, and there > > was no intermediary to go through (money went straight from Google to the > > students). > > SPI handles all the GSoC mentor money. > And lets just assume it would handle the students money too, what difference > would that really make ? It would cause similar arguments to this one. And that's if Google would even agree to such a setup (which I guess they wouldn't). What is the point of going through SPI for *this* (as opposed to regular donations)? -- レミ・デニ-クールモン http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] Sovereign Tech Fund
Le torstaina 1. helmikuuta 2024, 19.59.14 EET Anton Khirnov a écrit : > > Why should i suddenly do something different ? > > I did it for 100% free back then > > and here it wouldnt even make sense, closing false positives also > > counts as resolved. Its less work even to get 70USD ;) > > What's with this hurt-feelings tone? You ASKED people to comment on the > proposals, so I asked a question. You can just answer it, no need to get > all emotional about it. I don't stalk you or your commits, why do you > expect me to know that you worked on such issues "long ago"? I don't > even know one can close coverity issues manually. > > What I do know is that I've seen similar initiatives run into this > pathology in the past, hence my question. Yeah, well there are two sides to this issue. The obvious one is that it reviewing code takes time and is not exactly the most rewarding job. This is especially true for reviewing dull issues like Coverity's, but it is generally true. The lesser obvious flip-side is that somebody should also review the handling of Coverity issues, even those that end up marked as "False positive" or "Intentional". This gets even worse if everybody knows that someone else is paid. Then the incentive to review on one's free time gets even lower in my experience. I don't know how to address that paradox generally speaking, but I do think that bug triaging, bug fixing and code review should be paid per hour, not per bug report (and I count Coverity issues as a type of bug reports). This is not just theoretical. I have actually previously worked in an organisation that paid contractors per bug as a unit, and of course people gamed the system to get paid more with little extra work. -- 雷米‧德尼-库尔蒙 http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [RFC] Vote STF/SPI 2024-02
Le torstaina 1. helmikuuta 2024, 19.45.52 EET Vittorio Giovara a écrit : > The same of course should apply to any other future funding, it must be > either the community (via GA) or a third party setting up the sponsorship. Neither the community or the GA can forbid people from seeking funding for themselves. I suppose that, in theory, developers could be required to sign an agreement to that effect before they are allowed to submit code for inclusion, but that seems neither practical, nor desirable to me. That is probably not what you meant, but that is what this reads like. Frankly, if Thilo secures the funding, it's between him and the German authorities what they want to spend it on, as long as it remains within the boundaries of applicable laws. If he can come with a project to fund Michael to maintain FFmpeg for a while, FFmpeg will be no worse off. Nobody should claim to represent FFmpeg without any kind of preexisting delegation to do so. If that was done, then that is very morally wrong. But realistically, we cannot enforce that. Some people did it in the past and will continue to do it in the future. It is effectively up the other parties to perform due diligence and not get fooled - if they even care. STF probably does not care; NAB most certainly does not care. Moreover pretenses of this process being open need to be dropped. It's not open if any and all objections are summarily rejected to put it politely. A short deadline is not an excuse, even if it was unavoidable. (And I remain unconvinced that public discussion could not start earlier than they did.) Ultimately, whatever comes out of this does not get any special exemption from code review standards and TC oversight, but that should be a given. Therefore this funding should much preferably be used toward as uncontroversial tasks as possible: Maintainance is a good example. SDR is a counter-example. With that long side note, while I agree with most of what you said otherwise, I don't think that there is any merit to excluding Michael from this process, doubly so if there are too few viable proposals. -- 雷米‧德尼-库尔蒙 http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH 2/2] lavc/blockdsp: R-V V clear_blocks
You should probably use an assembler macro to repeat the code. -- レミ・デニ-クールモン http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH 1/4] checkasm/rv34dsp: add rv34_inv_transform_dc test
Le 2 février 2024 01:42:20 GMT+02:00, Michael Niedermayer a écrit : >On Wed, Jan 31, 2024 at 08:00:18PM +0800, flow gg wrote: >> > >> checkasm/Makefile |1 >> checkasm/checkasm.c |3 ++ >> checkasm/checkasm.h |1 >> checkasm/rv34dsp.c | 65 >> >> fate/checkasm.mak |1 >> 5 files changed, 71 insertions(+) >> e7eed6e25de9f313ddb3c0f3066f02f0671d3271 >> 0001-checkasm-rv34dsp-add-rv34_inv_transform_dc-test.patch >> From 46a81051f49f6b4032815d5f123be8ff614033e2 Mon Sep 17 00:00:00 2001 >> From: sunyuechi >> Date: Wed, 31 Jan 2024 19:00:23 +0800 >> Subject: [PATCH 1/4] checkasm/rv34dsp: add rv34_inv_transform_dc test > >seems to fail here Do you mean that the test is wrong or that it exposes a bug in the x86 optimisations (which wouldn't be the first occurrence)? It's painful enough that RVV optimisations need to add checkasm tests for existing code. We can't be expected to fix x86 bugs on top. ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] Sovereign Tech Fund
Hi, Le 4 février 2024 14:41:15 GMT+01:00, Michael Niedermayer a écrit : >Hi > >As said on IRC, i thought people knew it, but ‘the same person as before’ is >Thilo. > >Ive updated the price design suggestion for the merge task, its 16€ / commit >limited to 50k€ >this comes from looking at pauls fork which has around 500 commits in 2 months >thus >250 commits per month, 12 months, and if we allocate 50k that end with roughly >16€ / commit >if activity stays equal. It's very different if we're talking about librempeg or some other unspecified fork. I could make a fork that removes MMX et al, and claim that I'm merging a fork. >The task has ATM no developer on it. If a developer adds himself, he can >change teh task >and specify what he proposes to merge. > >I am totally perplexed why every dot on every i is such a big thing. That is the whole point of a statement of work. And I agree that it's tedious and possibly outright annoying... Indeed I don't think that a semiformal open-source community with a lot of strong and varied opinions will carry such dotting of all i's very effectively. That has been one of the arguments for delegating this to a contracting IT company rather than to FFmpeg-devel and SPI. >We are doing GSoC for a decade and noone cared about voting about anything in >it. Again, I don't think it's a fair comparison. GSoC rules are a given set by Google. Maintenance is not allowed nor are vague broadly defined tasks. Also the mentor payment is not really a proper compensation, nor is it intended to be. >The difference here is FFmpeg developers are benefiting from the money. That's a pretty major difference. >We send an application and a scope of work. That's exactly why we need to have a precise scope of work to vote on this. ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] Sovereign Tech Fund
Hi, I don't believe it is appropriate to hold the vote before Derek's question is addressed. We don't really know what we're voting on here. Le 1 février 2024 20:22:14 GMT+01:00, Derek Buitenhuis a écrit : >On 1/31/2024 9:44 PM, Derek Buitenhuis wrote: >> On 1/30/2024 1:48 AM, Michael Niedermayer wrote: >>> https://trac.ffmpeg.org/wiki/SponsoringPrograms/STF/2024 >> >> Not to derail this fine thread, but what forks does the Merge Forks >> project refer to? > >I do not believe this has been answered. > >- Derek > >___ >ffmpeg-devel mailing list >ffmpeg-devel@ffmpeg.org >https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > >To unsubscribe, visit link above, or email >ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". > ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH 0/2] Remove SDL2 output devices
Le 4 février 2024 11:11:12 GMT+01:00, Marton Balint a écrit : >Actually they work here on a linux box with OpenSuse 15.5. So even if they >are broken on some setups, they are not broken everywhere, or not more broken >than they used to be. No. They were always broken in terms of the design, and they are more technically broken than before because the threading rework exposed the design bugs from within fftools. No sane application would use this. If it doesn't even work in fftools, it should be removed. >Also, poper deprecation is needed here, since not only the CLI tools might use >these. Especially since there is no drop-in replacement. First it's not what would be considered an API. The removal shouldn't break source compatibility, so deprecation won't get us anything here. Where would you even put the deprecation guards? And then deprecation only makes sense if it can be fixed. Nobody has come forward with a practical solution to make it work, probably because there is not one, at least on MacOS. >> The 'pipe:' output can be used with a real video player such as mpv, vlc, or >> even ffplay. For cases where the user was an application using the API they >> should supply their own renderer. > >Yeah, but I never liked when people piped uncompressed data... Not everything >that the devices support can be serialized, it is extra CPU, latency of the >receiving app reading from pipe is a question... That sounds pretty minor problems for something that's purely meant for testing, and well, at least piping works. >I'd be a lot more happy with this if we'd offer some replacement which has no >issues. Maybe a libplacebo based outdev. That's orthogonal, and you're welcome to provide patches. But AFAICT, any video output device would suffer the same problems on the same platforms. You simply can't treat video output as a generic pipeline component, at least on Windows and especially MacOS. ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH 0/2] Remove SDL2 output devices
Le 4 février 2024 10:02:31 GMT+01:00, "J. Dekker" a écrit : >With the addition of threading in ffmpeg.c, the SDL2 devices no longer have the >'main' thread. This means that both the SDL2 and OpenGL output device are >broken >in master. Rather than attempting to fix it, they should be removed instead as >there are better alternatives for debugging or viewing streams. This is as agreed after discussed in yesterday's technical meeting. So obviously I support this patchset. ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] RISC-V vector DSP functions: Motivation for commit 446b009
Hi, Le perjantaina 19. tammikuuta 2024, 17.30.00 EET Michael Platzer via ffmpeg- devel a écrit : > Commit 446b0090cbb66ee614dcf6ca79c78dc8eb7f0e37 by Remi Denis-Courmont has > replaced RISC-V vector loads and stores with negative stride with vrgather > (generalized permutation within vector registers) instructions in order to > reverse the elements in a vector register. The commit message explains that > this change was done, but it does not explain why. It was faster on what the best approximation of real hardware available at the time, i.e. a Sipeed Lichee Pi4A board. There are no benchmarks in the commit because I don't like to publish benchmarks collected from prototypes. Nevertheless I think the commit message hints enough that anybody could easily guess that it was a performance optimisation, if I'm being honest. This is not exactly surprising: typical hardware can only access so many memory addresses simultaneously (i.e. one or maybe two), so indexed loads and strided loads are bound to be much slower than unit-strided loads. Maybe you have access to special hardware that is able to optimise the special case of strides equal to minus one to reduce the number of memory accesses. But I didn't back then, and as a matter of fact, I still don't. Hardware donations are welcome. > I fail to see what could possibly have motivated this change. > The RISC-V vector loads and stores support negative stride values for use > cases such as this one. [Citation required] > Using vrgather instead replaces the more specific operation with a more > generic one, That is a very subjective and unsubstantiated assertion. This feels a bit hypocritical while you are attacking me for not providing justification. As far as I can tell, neither instruction are specific to reversing vector element order. An actual real-life specific instruction exists on Arm in the form of vector-reverse. I don't know any ISA with load-reverse or store- reverse. > which is likely to be less performant on most HW architectures. Would you care to define "most architectures"? I only know one commercially available hardware architecture as of today, Kendryte K230 SoC with T-Head C908 CPU, so I can't make much sense of your sentence here. > In addition, it requires to setup an index vector, That is irrelevant since in this loop, the vector bank is not a bottleneck. The loop can run with maximul LMUL either way. And besides, the loop turned out to be faster with a smaller multiplier. > thus raising dynamic instruction count. It adds only one instruction (reverse subtraction) in the main loop, and even that could be optimised away if relevant. -- レミ・デニ-クールモン http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] Sovereign Tech Fund
Le 30 janvier 2024 00:43:39 GMT+02:00, Michael Niedermayer a écrit : >Hi > >On Mon, Jan 29, 2024 at 11:01:05PM +0200, Rémi Denis-Courmont wrote: >> Le maanantaina 29. tammikuuta 2024, 20.11.19 EET Michael Niedermayer a écrit >> : >[...] >> > Its under the control of the community and its transparent >> >> You always have the control of the community at the time of review and merge. >> >> You can argue all you want that more open is better. What I see is that this >> more open is already turning into a train wreck (as predicted last year). > >I do have to disagree on this specific point >The people predicting it to be a train wreck are the people who now make it >a train wreck. That's clearly false and defamatory against me. And given that you were the one to ask for feedback and project ideas that also constitutes entrapment. You should step down from the CC IMO because that's very unbecoming of a CC member (as are your attacks against Kieran) In these conditions I maintain that this process is inane and discriminatory. Lastly the FFmpeg community should bot to be taken hostage in one person's personal feud against FFlabs and/or other companies. (This is purely hypothetical and not an accusation against anyone in particular. If you feel targeted, that's on you.) ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] Sovereign Tech Fund
Le 29 janvier 2024 22:15:39 GMT+02:00, Derek Buitenhuis a écrit : >Between this, the unaswered NAB questions, the second vote ridiculousness, the >accidental email to the ML from Thilo where he admits he has purposely not >replied, >etc., Also - Reject FFmpeg project's free invitation to SCaLE because he wouldn't participate, rather than pass it on. ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] Sovereign Tech Fund
Hi, Le keskiviikkona 31. tammikuuta 2024, 16.10.02 EET Jonatas L. Nogueira via ffmpeg-devel a écrit : > > IMO hasty actions and avoidable drama may cause damage to the project > > What would be a hasty action? I've seen far too much people calling action > over stuff discussed for weeks/months as "hasty" in attempt to stall into > endless discussions, so you might want to clarify. Would you care to clarify which astronomical body do you count weeks and months in? I believe that it is customary to use Earth units when you do not specify. And in this case, the topic was brought to the community just about 0.5 week, or 0.11 month ago. Sarcasm aside, I take that to mean that SPI has been involved with those discussions for months in a private and closed process. Michael asserted that an open inclusive process is better than the usual closed approach whence the funding goes through a company. It looks to me that those SPI discussions were just as opaque and closed, and all the talk of openess is just pretense. It does not help that Michael, and now you too, misrepresent any challenge to SPI proposed *process* as an attempt to reject the idea of STF sponsorship, under the convenient pretext that there is not enough time. This is further aggravated by the context that Michael brought forward the idea of funding developers through SPI 3 months ago (in actual Earth units). From your statement, I have to infer that Thilo, Michael and SPI already knew of the STF plan and concealed that key piece of contextual information back then. In hindsight, it feels hypocritical to me that they were arguing for the SPI path, and against the corporate path, on the basis of openess already then, to be honest. I can only agree with Anton that this looks like an attempt to strongarm the community. This is ostensibly being to ignore all the objections that were already brought in October and are being brought again now, with the complicity of SPI. I can't say that this looks well on SPI, but that's just my personal opinion. With all that said, I don't think anybody will attempt to prevent this from happening (if they even can?). But that will take place without the consent of the GA, without any legitimacy on the claims of openess and inclusiveness, and obviously without any form of preclearance from the technical appropriateness of the resulting code contributions. -- レミ・デニ-クールモン http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] RISC-V vector DSP functions: Motivation for commit 446b009
Le tiistaina 23. tammikuuta 2024, 19.34.46 EET Michael Platzer via ffmpeg-devel a écrit : > I agree that the indexed and strided loads and stores are certainly slower > than unit-strided loads and stores. However, the vrgather instruction is > unlikely to be very performant either, unless the vector length is > relatively short. > Particularly, if vector register groups are used via a > length multiplier LMUL of, e.g., 8, then any element in the destination > vector register could be sourced from any element in the 8 source vector > registers (i.e., 1/4 of the vector register file). Gather instruction seem to scale quadratically on existing hardware, which is bad. That's why the FFmpeg code was later modified to use LMUL=1 in that particular case. Now if you want to argue that VLSE is better, then please provide a patch exhibiting better performance on FFmpeg's checkasm on real hardware. Otherwise, this discussion is not much more than he-said-she-said. > By contrast, the performance of strided loads and stores, while certainly > slower than unit-strided loads and stores, likely scales linearly with the > vector length, so on CPUs with large VLEN the original code could very well > run faster than the variant with vrgather, despite the slower strided loads > and stores. Yes, but it's a stretch to expect that accessing memory will be faster than accessing registers, especially when the dataset is typically too large to fit in L1. Furthermore strided loads require adders to compute the accessed address - something VRGATHER (or even VLUEXI) does not need. Some people wish that processor cores would make a special optimised case of minus EEW/8 strides. And sure, that would be nice. But so far that's just wishful thinking. > > > The RISC-V vector loads and stores support negative stride values for > > > use cases such as this one. > > > > [Citation required] > > The purpose of strided loads and stores is to load/store elements that are > not consecutive in memory, but instead separated by a constant offset. > Additionally, the authors of the specification decided to allow negative > stride values, since they apparently deemed it useful to be able to reverse > the order of those elements. FFmpeg *still* uses strided loads and stores where applicable, typically where the stride is legitimately variable. I cannot find a justification that small constant non-unit strides would be a good idea anywhere though. Just because you can use negative offsets does not mean that this will be optimised for negative-unit offsets. Again, I have only seen some wishful thinking from some developers here and there. I have yet to see a serious quote from a IP vendor or a benchmark that would support this. > > > Using vrgather instead replaces the more specific operation with a > > > more generic one, > > > > > > That is a very subjective and unsubstantiated assertion. This feels a bit > > hypocritical while you are attacking me for not providing justification. > > vrgather is more generic because it can be used for any kind of permutation, > which strided loads and stores cannot. This is not subjective. That would be a fair comparison of vrgather with hypothetical vreverse or vtranspose instructions. But you're comparing apples and oranges here. > > As far as I can tell, neither instruction are specific to reversing vector > > element order. An actual real-life specific instruction exists on Arm in > > the form of vector-reverse. I don't know any ISA with load-reverse or > > store- reverse. > > A load-reverse or store-reverse would just be a special case of strided > load/store. By that logic, a unit-stride load is just a special case of a strided load, and a strided load is just a special case of an indexed load. From an architectural functional standpoint, that is indeed definitely true. From a hardware silicon design and microbenchmark standpoint, that is however certainly false. > When writing about the performance of vrgather I primarily had the > scalability issues explained above in mind. It seems that you have already > experienced these, since you found that a larger LMUL reduces the > performance of vrgather. > How would the reverse subtraction be optimized away? I assume that it needs > to be part of the loop since it depends on the VL of the current iteration. VRSUB computes the same vector at all but the last two iterations. All you need to do is make a special case for the tail iterations. Then VRSUB can be ran just twice for the whole function, zero times per loop iteration. -- 雷米‧德尼-库尔蒙 http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] lavc/llviddsp: R-V V add_bytes
add_bytes_c: 2077.2 add_bytes_rvv_i32: 105.0 --- libavcodec/lossless_videodsp.c | 2 ++ libavcodec/lossless_videodsp.h | 1 + libavcodec/riscv/Makefile| 2 ++ libavcodec/riscv/llviddsp_init.c | 38 libavcodec/riscv/llviddsp_rvv.S | 36 ++ 5 files changed, 79 insertions(+) create mode 100644 libavcodec/riscv/llviddsp_init.c create mode 100644 libavcodec/riscv/llviddsp_rvv.S diff --git a/libavcodec/lossless_videodsp.c b/libavcodec/lossless_videodsp.c index 359606981c..876decb1e6 100644 --- a/libavcodec/lossless_videodsp.c +++ b/libavcodec/lossless_videodsp.c @@ -121,6 +121,8 @@ void ff_llviddsp_init(LLVidDSPContext *c) #if ARCH_PPC ff_llviddsp_init_ppc(c); +#elif ARCH_RISCV +ff_llviddsp_init_riscv(c); #elif ARCH_X86 ff_llviddsp_init_x86(c); #endif diff --git a/libavcodec/lossless_videodsp.h b/libavcodec/lossless_videodsp.h index da4baa1414..5309ce4be7 100644 --- a/libavcodec/lossless_videodsp.h +++ b/libavcodec/lossless_videodsp.h @@ -40,6 +40,7 @@ typedef struct LLVidDSPContext { } LLVidDSPContext; void ff_llviddsp_init(LLVidDSPContext *llviddsp); +void ff_llviddsp_init_riscv(LLVidDSPContext *llviddsp); void ff_llviddsp_init_x86(LLVidDSPContext *llviddsp); void ff_llviddsp_init_ppc(LLVidDSPContext *llviddsp); diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile index d34dc77458..8f2a519827 100644 --- a/libavcodec/riscv/Makefile +++ b/libavcodec/riscv/Makefile @@ -28,6 +28,8 @@ OBJS-$(CONFIG_IDCTDSP) += riscv/idctdsp_init.o RVV-OBJS-$(CONFIG_IDCTDSP) += riscv/idctdsp_rvv.o OBJS-$(CONFIG_LLAUDDSP) += riscv/llauddsp_init.o RVV-OBJS-$(CONFIG_LLAUDDSP) += riscv/llauddsp_rvv.o +OBJS-$(CONFIG_LLVIDDSP) += riscv/llviddsp_init.o +RVV-OBJS-$(CONFIG_LLVIDDSP) += riscv/llviddsp_rvv.o OBJS-$(CONFIG_OPUS_DECODER) += riscv/opusdsp_init.o RVV-OBJS-$(CONFIG_OPUS_DECODER) += riscv/opusdsp_rvv.o OBJS-$(CONFIG_PIXBLOCKDSP) += riscv/pixblockdsp_init.o \ diff --git a/libavcodec/riscv/llviddsp_init.c b/libavcodec/riscv/llviddsp_init.c new file mode 100644 index 00..f042eeab32 --- /dev/null +++ b/libavcodec/riscv/llviddsp_init.c @@ -0,0 +1,38 @@ +/* + * Copyright © 2023 Rémi Denis-Courmont. + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" + +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavcodec/lossless_videodsp.h" + +void ff_llvid_add_bytes_rvv(uint8_t *, uint8_t *src, ptrdiff_t w); + +av_cold void ff_llviddsp_init_riscv(LLVidDSPContext *c) +{ +#if HAVE_RVV +int flags = av_get_cpu_flags(); + +if (flags & AV_CPU_FLAG_RVV_I32) { +c->add_bytes = ff_llvid_add_bytes_rvv; +} +#endif +} diff --git a/libavcodec/riscv/llviddsp_rvv.S b/libavcodec/riscv/llviddsp_rvv.S new file mode 100644 index 00..a4814837b9 --- /dev/null +++ b/libavcodec/riscv/llviddsp_rvv.S @@ -0,0 +1,36 @@ +/* + * Copyright © 2023 Rémi Denis-Courmont. + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/riscv/asm.S" + +func ff_llvid_add_bytes_rvv, zve32x +1: +vsetvli t0, a2, e8, m8, ta, ma +vle8.v v0, (a1) +sub a2, a2, t0 +vle8.v v8, (a0) +add a1, t0, a1 +vadd.vv v8, v0, v8 +vse8.v v8, (a0) +add a0, t0, a0 +bneza2, 1b + +ret +endfunc -- 2.42.0 ___ ffmpeg-devel mailing list ffmpeg-devel@ff
Re: [FFmpeg-devel] [PATCH] af_afir: RISC-V V fcmul_add
Le keskiviikkona 15. marraskuuta 2023, 10.59.55 EET flow gg a écrit : > Okay, I have updated these issues in the patch. It does not assemble but I can fix it locally. The narrowing shift trickery require Zve64x, or rather Zve64f in this case. The performance improvement is much better on newer hardware: fcmul_add_c: 4891.2 fcmul_add_rvv_f64: 2399.5 FWIW, VLSEG2E32.V remains slightly worse than with shifting: fcmul_add_c: 4891.2 fcmul_add_rvv_f32: 2877.5 -- 雷米‧德尼-库尔蒙 http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] checkasm/flacdsp: add LPC test
--- tests/checkasm/flacdsp.c | 28 1 file changed, 28 insertions(+) diff --git a/tests/checkasm/flacdsp.c b/tests/checkasm/flacdsp.c index 51a0e0060b..589a3fe834 100644 --- a/tests/checkasm/flacdsp.c +++ b/tests/checkasm/flacdsp.c @@ -54,6 +54,27 @@ static void check_decorrelate(uint8_t **ref_dst, uint8_t **ref_src, uint8_t **ne bench_new(new_dst, (int32_t **)new_src, channels, BUF_SIZE / sizeof(int32_t), 8); } +static void check_lpc(FLACDSPContext *c) +{ +int pred_order = (rnd() % 32) + 1; +int qlevel = rnd() % 16; +LOCAL_ALIGNED_16(int32_t, coeffs, [32]); +LOCAL_ALIGNED_16(int32_t, dst0, [BUF_SIZE]); +LOCAL_ALIGNED_16(int32_t, dst1, [BUF_SIZE]); + +declare_func(void, int32_t *, const int[32], int, int, int); + +for (int i = 0; i < BUF_SIZE; i++) +dst0[i] = rnd(); + +memcpy(dst1, dst0, BUF_SIZE * sizeof (int32_t)); +call_ref(dst0, coeffs, pred_order, qlevel, BUF_SIZE); +call_new(dst1, coeffs, pred_order, qlevel, BUF_SIZE); +if (memcmp(dst0, dst1, BUF_SIZE * sizeof (int32_t)) != 0) + fail(); +bench_new(dst1, coeffs, pred_order, qlevel, BUF_SIZE); +} + void checkasm_check_flacdsp(void) { LOCAL_ALIGNED_16(uint8_t, ref_dst, [BUF_SIZE*MAX_CHANNELS]); @@ -88,4 +109,11 @@ void checkasm_check_flacdsp(void) } report("decorrelate"); + +if (check_func(h.lpc16, "flac_lpc_16")) +check_lpc(); +if (check_func(h.lpc32, "flac_lpc_32")) +check_lpc(); + +report("lpc"); } -- 2.42.0 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] checkasm/flacdsp: add LPC test
Le keskiviikkona 15. marraskuuta 2023, 18.21.34 EET Rémi Denis-Courmont a écrit : > --- > tests/checkasm/flacdsp.c | 28 > 1 file changed, 28 insertions(+) > > diff --git a/tests/checkasm/flacdsp.c b/tests/checkasm/flacdsp.c > index 51a0e0060b..589a3fe834 100644 > --- a/tests/checkasm/flacdsp.c > +++ b/tests/checkasm/flacdsp.c > @@ -54,6 +54,27 @@ static void check_decorrelate(uint8_t **ref_dst, uint8_t > **ref_src, uint8_t **ne bench_new(new_dst, (int32_t **)new_src, channels, > BUF_SIZE / sizeof(int32_t), 8); } > > +static void check_lpc(FLACDSPContext *c) > +{ > +int pred_order = (rnd() % 32) + 1; > +int qlevel = rnd() % 16; > +LOCAL_ALIGNED_16(int32_t, coeffs, [32]); > +LOCAL_ALIGNED_16(int32_t, dst0, [BUF_SIZE]); > +LOCAL_ALIGNED_16(int32_t, dst1, [BUF_SIZE]); > + > +declare_func(void, int32_t *, const int[32], int, int, int); Hmmph, nevermind, forgot to initialise the coefficients. > + > +for (int i = 0; i < BUF_SIZE; i++) > +dst0[i] = rnd(); > + > +memcpy(dst1, dst0, BUF_SIZE * sizeof (int32_t)); > +call_ref(dst0, coeffs, pred_order, qlevel, BUF_SIZE); > +call_new(dst1, coeffs, pred_order, qlevel, BUF_SIZE); > +if (memcmp(dst0, dst1, BUF_SIZE * sizeof (int32_t)) != 0) > + fail(); > +bench_new(dst1, coeffs, pred_order, qlevel, BUF_SIZE); > +} > + > void checkasm_check_flacdsp(void) > { > LOCAL_ALIGNED_16(uint8_t, ref_dst, [BUF_SIZE*MAX_CHANNELS]); > @@ -88,4 +109,11 @@ void checkasm_check_flacdsp(void) > } > > report("decorrelate"); > + > +if (check_func(h.lpc16, "flac_lpc_16")) > +check_lpc(); > +if (check_func(h.lpc32, "flac_lpc_32")) > +check_lpc(); > + > +report("lpc"); > } -- Rémi Denis-Courmont http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH 1/2] checkasm/flacdsp: add LPC test
Le keskiviikkona 15. marraskuuta 2023, 21.14.26 EET James Almer a écrit : > On 11/15/2023 3:02 PM, Rémi Denis-Courmont wrote: > > --- > > > > tests/checkasm/flacdsp.c | 32 > > 1 file changed, 32 insertions(+) > > > > diff --git a/tests/checkasm/flacdsp.c b/tests/checkasm/flacdsp.c > > index 51a0e0060b..4d69cbe507 100644 > > --- a/tests/checkasm/flacdsp.c > > +++ b/tests/checkasm/flacdsp.c > > @@ -54,6 +54,28 @@ static void check_decorrelate(uint8_t **ref_dst, > > uint8_t **ref_src, uint8_t **ne> > > bench_new(new_dst, (int32_t **)new_src, channels, BUF_SIZE / > > sizeof(int32_t), 8);> > > } > > > > +static void check_lpc(FLACDSPContext *c, int pred_order) > > c is unused. > > > +{ > > +int qlevel = rnd() % 16; > > +LOCAL_ALIGNED_16(int32_t, coeffs, [32]); > > +LOCAL_ALIGNED_16(int32_t, dst0, [BUF_SIZE]); > > +LOCAL_ALIGNED_16(int32_t, dst1, [BUF_SIZE]); > > + > > +declare_func(void, int32_t *, const int[32], int, int, int); > > + > > +for (int i = 0; i < 32; i++) > > +coeffs[i] = rnd(); > > +for (int i = 0; i < BUF_SIZE; i++) > > +dst0[i] = rnd(); > > + > > +memcpy(dst1, dst0, BUF_SIZE * sizeof (int32_t)); > > +call_ref(dst0, coeffs, pred_order, qlevel, BUF_SIZE); > > +call_new(dst1, coeffs, pred_order, qlevel, BUF_SIZE); > > +if (memcmp(dst0, dst1, BUF_SIZE * sizeof (int32_t)) != 0) > > + fail(); > > +bench_new(dst1, coeffs, pred_order, qlevel, BUF_SIZE); > > Not sure if it matters, but dst1 is already trashed by call_new(). Yeah I know. I could allocate a third buffer. AFAICT, the only parameter that should affect the benchmarks is pred-order (which indeed affects the result on both x86 and RVV). So that the extra code to preserve dst seemed pointless? > > > +} > > + > > > > void checkasm_check_flacdsp(void) > > { > > > > LOCAL_ALIGNED_16(uint8_t, ref_dst, [BUF_SIZE*MAX_CHANNELS]); > > > > @@ -72,6 +94,7 @@ void checkasm_check_flacdsp(void) > > > > { AV_SAMPLE_FMT_S16, 16 }, > > { AV_SAMPLE_FMT_S32, 32 }, > > > > }; > > > > +static const signed char pred_orders[] = { 13, 16, 29, 32 }; > > > > FLACDSPContext h; > > int i, j; > > > > @@ -88,4 +111,13 @@ void checkasm_check_flacdsp(void) > > > > } > > > > report("decorrelate"); > > > > + > > +for (int i = 0; i < sizeof (pred_orders); i++) { > > i is already defined. Also, use FF_ARRAY_ELEMS(pred_orders), so it > doesn't depend on char being 1 byte. > > > +if (check_func(h.lpc16, "flac_lpc_16_%d", pred_orders[i])) > > +check_lpc(, pred_orders[i]); > > +if (check_func(h.lpc32, "flac_lpc_32_%d", pred_orders[i])) > > +check_lpc(, pred_orders[i]); > > +} > > + > > +report("lpc"); > > > > } > > LGTM otherwise. > ___ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". -- 雷米‧德尼-库尔蒙 http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCHv3] checkasm/flacdsp: add LPC test
--- tests/checkasm/flacdsp.c | 32 1 file changed, 32 insertions(+) diff --git a/tests/checkasm/flacdsp.c b/tests/checkasm/flacdsp.c index 51a0e0060b..b308237db1 100644 --- a/tests/checkasm/flacdsp.c +++ b/tests/checkasm/flacdsp.c @@ -54,6 +54,28 @@ static void check_decorrelate(uint8_t **ref_dst, uint8_t **ref_src, uint8_t **ne bench_new(new_dst, (int32_t **)new_src, channels, BUF_SIZE / sizeof(int32_t), 8); } +static void check_lpc(int pred_order) +{ +int qlevel = rnd() % 16; +LOCAL_ALIGNED_16(int32_t, coeffs, [32]); +LOCAL_ALIGNED_16(int32_t, dst0, [BUF_SIZE]); +LOCAL_ALIGNED_16(int32_t, dst1, [BUF_SIZE]); + +declare_func(void, int32_t *, const int[32], int, int, int); + +for (int i = 0; i < 32; i++) +coeffs[i] = rnd(); +for (int i = 0; i < BUF_SIZE; i++) +dst0[i] = rnd(); + +memcpy(dst1, dst0, BUF_SIZE * sizeof (int32_t)); +call_ref(dst0, coeffs, pred_order, qlevel, BUF_SIZE); +call_new(dst1, coeffs, pred_order, qlevel, BUF_SIZE); +if (memcmp(dst0, dst1, BUF_SIZE * sizeof (int32_t)) != 0) + fail(); +bench_new(dst1, coeffs, pred_order, qlevel, BUF_SIZE); +} + void checkasm_check_flacdsp(void) { LOCAL_ALIGNED_16(uint8_t, ref_dst, [BUF_SIZE*MAX_CHANNELS]); @@ -72,6 +94,7 @@ void checkasm_check_flacdsp(void) { AV_SAMPLE_FMT_S16, 16 }, { AV_SAMPLE_FMT_S32, 32 }, }; +static const signed char pred_orders[] = { 13, 16, 29, 32 }; FLACDSPContext h; int i, j; @@ -88,4 +111,13 @@ void checkasm_check_flacdsp(void) } report("decorrelate"); + +for (i = 0; i < FF_ARRAY_ELEMS(pred_orders); i++) +if (check_func(h.lpc16, "flac_lpc_16_%d", pred_orders[i])) +check_lpc(pred_orders[i]); +for (i = 0; i < FF_ARRAY_ELEMS(pred_orders); i++) +if (check_func(h.lpc32, "flac_lpc_32_%d", pred_orders[i])) +check_lpc(pred_orders[i]); + +report("lpc"); } -- 2.42.0 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] lavc/flacdsp: R-V V LPC16 function
In this case, the inner loop computing the scalar product can be reduced to just one multiplication and one sum even with 128-bit vectors. The result is a lot simpler, but also brings more modest performance gains: flac_lpc_16_13_c: 15241.0 flac_lpc_16_13_rvv_i32: 11230.0 flac_lpc_16_16_c: 17884.0 flac_lpc_16_16_rvv_i32: 12125.7 flac_lpc_16_29_c: 27847.7 flac_lpc_16_29_rvv_i32: 10494.0 flac_lpc_16_32_c: 30051.5 flac_lpc_16_32_rvv_i32: 10355.0 --- libavcodec/riscv/flacdsp_init.c | 17 - libavcodec/riscv/flacdsp_rvv.S | 23 +++ 2 files changed, 35 insertions(+), 5 deletions(-) diff --git a/libavcodec/riscv/flacdsp_init.c b/libavcodec/riscv/flacdsp_init.c index f60f98ea31..6cfb50ead8 100644 --- a/libavcodec/riscv/flacdsp_init.c +++ b/libavcodec/riscv/flacdsp_init.c @@ -25,6 +25,8 @@ #include "libavutil/riscv/cpu.h" #include "libavcodec/flacdsp.h" +void ff_flac_lpc16_rvv(int32_t *decoded, const int coeffs[32], + int pred_order, int qlevel, int len); void ff_flac_lpc32_rvv(int32_t *decoded, const int coeffs[32], int pred_order, int qlevel, int len); void ff_flac_lpc32_rvv_simple(int32_t *decoded, const int coeffs[32], @@ -61,16 +63,20 @@ void ff_flac_decorrelate_ms_32_rvv(uint8_t **out, int32_t **in, av_cold void ff_flacdsp_init_riscv(FLACDSPContext *c, enum AVSampleFormat fmt, int channels) { -#if HAVE_RVV && (__riscv_xlen >= 64) +#if HAVE_RVV int flags = av_get_cpu_flags(); if ((flags & AV_CPU_FLAG_RVV_I32) && (flags & AV_CPU_FLAG_RVB_ADDR)) { int vlenb = ff_get_rv_vlenb(); -if (vlenb == 16) -c->lpc32 = ff_flac_lpc32_rvv; -else if (vlenb > 16) -c->lpc32 = ff_flac_lpc32_rvv_simple; +if (vlenb >= 16) { +c->lpc16 = ff_flac_lpc16_rvv; +# if (__riscv_xlen >= 64) +if (vlenb > 16) +c->lpc32 = ff_flac_lpc32_rvv_simple; +else +c->lpc32 = ff_flac_lpc32_rvv; +} switch (fmt) { case AV_SAMPLE_FMT_S16: @@ -111,6 +117,7 @@ av_cold void ff_flacdsp_init_riscv(FLACDSPContext *c, enum AVSampleFormat fmt, c->decorrelate[2] = ff_flac_decorrelate_rs_32_rvv; c->decorrelate[3] = ff_flac_decorrelate_ms_32_rvv; break; +# endif } } #endif diff --git a/libavcodec/riscv/flacdsp_rvv.S b/libavcodec/riscv/flacdsp_rvv.S index b1724f5500..2a0b50f7a9 100644 --- a/libavcodec/riscv/flacdsp_rvv.S +++ b/libavcodec/riscv/flacdsp_rvv.S @@ -20,6 +20,29 @@ #include "libavutil/riscv/asm.S" +func ff_flac_lpc16_rvv, zve32x +vsetvli zero, a2, e32, m8, ta, ma +vle32.v v8, (a1) +sub a4, a4, a2 +vle32.v v16, (a0) +sh2add a0, a2, a0 +vmv.s.x v0, zero +1: +vmul.vv v24, v8, v16 +lw t0, (a0) +vredsum.vs v24, v24, v0 +addia4, a4, -1 +vmv.x.s t1, v24 +sra t1, t1, a3 +add t0, t0, t1 +vslide1down.vx v16, v16, t0 +sw t0, (a0) +addia0, a0, 4 +bneza4, 1b + +ret +endfunc + #if (__riscv_xlen == 64) func ff_flac_lpc32_rvv, zve32x addit2, a2, -16 -- 2.42.0 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] riscv: fix builds without Zbb support
--- libavutil/riscv/asm.S | 5 + 1 file changed, 5 insertions(+) diff --git a/libavutil/riscv/asm.S b/libavutil/riscv/asm.S index 6ca74f263a..0a9e2e0d3f 100644 --- a/libavutil/riscv/asm.S +++ b/libavutil/riscv/asm.S @@ -92,6 +92,11 @@ shnadd 3, \rd, \rs1, \rs2 .endm #endif +#if !defined (__riscv_zbb) +.macro min rd, rs1, rs2 +.insn r OP, 4, 5, \rd, \rs1, \rs2 +.endm +#endif /* Convenience macro to load a Vector type (vtype) as immediate */ .macro lvtypei rd, e, m=m1, tp=tu, mp=mu -- 2.42.0 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 1/2] checkasm/flacdsp: add LPC test
--- tests/checkasm/flacdsp.c | 32 1 file changed, 32 insertions(+) diff --git a/tests/checkasm/flacdsp.c b/tests/checkasm/flacdsp.c index 51a0e0060b..4d69cbe507 100644 --- a/tests/checkasm/flacdsp.c +++ b/tests/checkasm/flacdsp.c @@ -54,6 +54,28 @@ static void check_decorrelate(uint8_t **ref_dst, uint8_t **ref_src, uint8_t **ne bench_new(new_dst, (int32_t **)new_src, channels, BUF_SIZE / sizeof(int32_t), 8); } +static void check_lpc(FLACDSPContext *c, int pred_order) +{ +int qlevel = rnd() % 16; +LOCAL_ALIGNED_16(int32_t, coeffs, [32]); +LOCAL_ALIGNED_16(int32_t, dst0, [BUF_SIZE]); +LOCAL_ALIGNED_16(int32_t, dst1, [BUF_SIZE]); + +declare_func(void, int32_t *, const int[32], int, int, int); + +for (int i = 0; i < 32; i++) +coeffs[i] = rnd(); +for (int i = 0; i < BUF_SIZE; i++) +dst0[i] = rnd(); + +memcpy(dst1, dst0, BUF_SIZE * sizeof (int32_t)); +call_ref(dst0, coeffs, pred_order, qlevel, BUF_SIZE); +call_new(dst1, coeffs, pred_order, qlevel, BUF_SIZE); +if (memcmp(dst0, dst1, BUF_SIZE * sizeof (int32_t)) != 0) + fail(); +bench_new(dst1, coeffs, pred_order, qlevel, BUF_SIZE); +} + void checkasm_check_flacdsp(void) { LOCAL_ALIGNED_16(uint8_t, ref_dst, [BUF_SIZE*MAX_CHANNELS]); @@ -72,6 +94,7 @@ void checkasm_check_flacdsp(void) { AV_SAMPLE_FMT_S16, 16 }, { AV_SAMPLE_FMT_S32, 32 }, }; +static const signed char pred_orders[] = { 13, 16, 29, 32 }; FLACDSPContext h; int i, j; @@ -88,4 +111,13 @@ void checkasm_check_flacdsp(void) } report("decorrelate"); + +for (int i = 0; i < sizeof (pred_orders); i++) { +if (check_func(h.lpc16, "flac_lpc_16_%d", pred_orders[i])) +check_lpc(, pred_orders[i]); +if (check_func(h.lpc32, "flac_lpc_32_%d", pred_orders[i])) +check_lpc(, pred_orders[i]); +} + +report("lpc"); } -- 2.42.0 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 2/2] lavc/flacdsp: R-V V LPC32
The entire set of 32 coefficients and corresponding past 32 samples can fit in a single vector (with LMUL=8) exactly, but... since widening double the needed vector sizes, we still end up too short with 128-bit vectors. This adds a very simple version for future 256+-bit hardware, and for pred_orders values up to 16, and a bit more involved loop for for 128-bit hardware with pred_orders between 17 and 32. With 128-bit hardware, the benchmarks look like this: flac_lpc_32_13_c: 30152.0 flac_lpc_32_13_rvv_i32: 10244.7 flac_lpc_32_16_c: 37314.2 flac_lpc_32_16_rvv_i32: 10126.2 flac_lpc_32_29_c: 61910.0 flac_lpc_32_29_rvv_i32: 14495.2 flac_lpc_32_32_c: 68204.0 flac_lpc_32_32_rvv_i32: 13273.7 --- libavcodec/riscv/flacdsp_init.c | 12 +++ libavcodec/riscv/flacdsp_rvv.S | 57 + 2 files changed, 69 insertions(+) diff --git a/libavcodec/riscv/flacdsp_init.c b/libavcodec/riscv/flacdsp_init.c index 73d431cb77..f60f98ea31 100644 --- a/libavcodec/riscv/flacdsp_init.c +++ b/libavcodec/riscv/flacdsp_init.c @@ -22,8 +22,13 @@ #include "libavutil/attributes.h" #include "libavutil/cpu.h" +#include "libavutil/riscv/cpu.h" #include "libavcodec/flacdsp.h" +void ff_flac_lpc32_rvv(int32_t *decoded, const int coeffs[32], + int pred_order, int qlevel, int len); +void ff_flac_lpc32_rvv_simple(int32_t *decoded, const int coeffs[32], + int pred_order, int qlevel, int len); void ff_flac_decorrelate_indep2_16_rvv(uint8_t **out, int32_t **in, int channels, int len, int shift); void ff_flac_decorrelate_indep4_16_rvv(uint8_t **out, int32_t **in, @@ -60,6 +65,13 @@ av_cold void ff_flacdsp_init_riscv(FLACDSPContext *c, enum AVSampleFormat fmt, int flags = av_get_cpu_flags(); if ((flags & AV_CPU_FLAG_RVV_I32) && (flags & AV_CPU_FLAG_RVB_ADDR)) { +int vlenb = ff_get_rv_vlenb(); + +if (vlenb == 16) +c->lpc32 = ff_flac_lpc32_rvv; +else if (vlenb > 16) +c->lpc32 = ff_flac_lpc32_rvv_simple; + switch (fmt) { case AV_SAMPLE_FMT_S16: switch (channels) { diff --git a/libavcodec/riscv/flacdsp_rvv.S b/libavcodec/riscv/flacdsp_rvv.S index 12b456f7da..b1724f5500 100644 --- a/libavcodec/riscv/flacdsp_rvv.S +++ b/libavcodec/riscv/flacdsp_rvv.S @@ -21,6 +21,63 @@ #include "libavutil/riscv/asm.S" #if (__riscv_xlen == 64) +func ff_flac_lpc32_rvv, zve32x +addit2, a2, -16 +ble t2, zero, ff_flac_lpc32_rvv_simple +vsetivli zero, 1, e64, m1, ta, ma +vmv.s.x v0, zero +vsetvli zero, a2, e32, m8, ta, ma +vle32.v v8, (a1) +sub a4, a4, a2 +vle32.v v16, (a0) +sh2add a0, a2, a0 +1: +vsetvli zero, a2, e32, m4, ta, ma +vwmul.vv v24, v8, v16 +vsetvli zero, t2, e32, m4, tu, ma +vwmacc.vv v24, v12, v20 +vsetvli zero, a2, e64, m8, ta, ma +vredsum.vs v24, v24, v0 +lw t0, (a0) +addia4, a4, -1 +vmv.x.s t1, v24 +vsetvli zero, a2, e32, m8, ta, ma +sra t1, t1, a3 +add t0, t0, t1 +vslide1down.vx v16, v16, t0 +sw t0, (a0) +addia0, a0, 4 +bneza4, 1b + +ret +endfunc + +func ff_flac_lpc32_rvv_simple, zve32x +vsetivli zero, 1, e64, m1, ta, ma +vmv.s.x v0, zero +vsetvli zero, a2, e32, m4, ta, ma +vle32.v v8, (a1) +sub a4, a4, a2 +vle32.v v16, (a0) +sh2add a0, a2, a0 +1: +vwmul.vv v24, v8, v16 +vsetvli zero, zero, e64, m8, ta, ma +vredsum.vs v24, v24, v0 +lw t0, (a0) +addia4, a4, -1 +vmv.x.s t1, v24 +vsetvli zero, zero, e32, m4, ta, ma +sra t1, t1, a3 +add t0, t0, t1 +vslide1down.vx v16, v16, t0 +sw t0, (a0) +addia0, a0, 4 +bneza4, 1b + +ret +endfunc + func ff_flac_decorrelate_indep2_16_rvv, zve32x ld a0, (a0) ld a2, 8(a1) -- 2.42.0 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 1/2] riscv: indent code
This reindents code to prepare for the next changeset. No functional changes. --- libavutil/riscv/cpu.c | 28 +++- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/libavutil/riscv/cpu.c b/libavutil/riscv/cpu.c index 460d3e9f91..984293aef0 100644 --- a/libavutil/riscv/cpu.c +++ b/libavutil/riscv/cpu.c @@ -32,21 +32,23 @@ int ff_get_cpu_flags_riscv(void) { int ret = 0; #if HAVE_GETAUXVAL -const unsigned long hwcap = getauxval(AT_HWCAP); +{ +const unsigned long hwcap = getauxval(AT_HWCAP); -if (hwcap & HWCAP_RV('I')) -ret |= AV_CPU_FLAG_RVI; -if (hwcap & HWCAP_RV('F')) -ret |= AV_CPU_FLAG_RVF; -if (hwcap & HWCAP_RV('D')) -ret |= AV_CPU_FLAG_RVD; -if (hwcap & HWCAP_RV('B')) -ret |= AV_CPU_FLAG_RVB_ADDR | AV_CPU_FLAG_RVB_BASIC; +if (hwcap & HWCAP_RV('I')) +ret |= AV_CPU_FLAG_RVI; +if (hwcap & HWCAP_RV('F')) +ret |= AV_CPU_FLAG_RVF; +if (hwcap & HWCAP_RV('D')) +ret |= AV_CPU_FLAG_RVD; +if (hwcap & HWCAP_RV('B')) +ret |= AV_CPU_FLAG_RVB_ADDR | AV_CPU_FLAG_RVB_BASIC; -/* The V extension implies all Zve* functional subsets */ -if (hwcap & HWCAP_RV('V')) -ret |= AV_CPU_FLAG_RVV_I32 | AV_CPU_FLAG_RVV_I64 - | AV_CPU_FLAG_RVV_F32 | AV_CPU_FLAG_RVV_F64; +/* The V extension implies all Zve* functional subsets */ +if (hwcap & HWCAP_RV('V')) + ret |= AV_CPU_FLAG_RVV_I32 | AV_CPU_FLAG_RVV_I64 + | AV_CPU_FLAG_RVV_F32 | AV_CPU_FLAG_RVV_F64; +} #endif #ifdef __riscv_i -- 2.42.0 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 2/2] riscv: add hwprobe() for CPU detection
This adds the Linux-specific system call to detect CPU features. Unlike the auxillary vector, this supports extension other than single lettered ones. (The API is kind of a mess though.) At the moment, we need this to detect Zba and Zbb at run-time. --- configure | 5 + libavutil/riscv/cpu.c | 43 +++ 2 files changed, 48 insertions(+) diff --git a/configure b/configure index 6be849fc08..a6039c1476 100755 --- a/configure +++ b/configure @@ -2202,6 +2202,7 @@ HAVE_LIST_PUB=" HEADERS_LIST=" arpa_inet_h +asm_hwprobe_h asm_types_h cdio_paranoia_h cdio_paranoia_paranoia_h @@ -2227,6 +2228,7 @@ HEADERS_LIST=" opencv2_core_core_c_h OpenGL_gl3_h poll_h +sys_hwprobe_h sys_param_h sys_resource_h sys_select_h @@ -5410,6 +5412,9 @@ elif enabled ppc; then elif enabled riscv; then +check_headers asm/hwprobe.h +check_headers sys/hwprobe.h + if test_cpp_condition stddef.h "__riscv_zbb"; then enable fast_clz fi diff --git a/libavutil/riscv/cpu.c b/libavutil/riscv/cpu.c index 984293aef0..23e49767c2 100644 --- a/libavutil/riscv/cpu.c +++ b/libavutil/riscv/cpu.c @@ -18,8 +18,10 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#define _GNU_SOURCE #include "libavutil/cpu.h" #include "libavutil/cpu_internal.h" +#include "libavutil/macros.h" #include "libavutil/log.h" #include "config.h" @@ -27,10 +29,51 @@ #include #define HWCAP_RV(letter) (1ul << ((letter) - 'A')) #endif +#if defined (HAVE_SYS_HWPROBE_H) +#include +#elif defined (HAVE_ASM_HWPROBE_H) +#include +#include +#include + +static int __riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count, + size_t cpu_count, unsigned long *cpus, + unsigned int flags) +{ +return syscall(__NR_riscv_hwprobe, pairs, pair_count, cpu_count, cpus, + flags); +} +#endif int ff_get_cpu_flags_riscv(void) { int ret = 0; +#if defined (HAVE_SYS_HWPROBE_H) || defined (HAVE_ASM_HWPROBE_H) +struct riscv_hwprobe pairs[] = { +{ RISCV_HWPROBE_KEY_BASE_BEHAVIOR, 0 }, +{ RISCV_HWPROBE_KEY_IMA_EXT_0, 0 }, +}; + +if (__riscv_hwprobe(pairs, FF_ARRAY_ELEMS(pairs), 0, NULL, 0) == 0) { +if (pairs[0].value & RISCV_HWPROBE_BASE_BEHAVIOR_IMA) +ret |= AV_CPU_FLAG_RVI; +if (pairs[1].value & RISCV_HWPROBE_IMA_FD) +ret |= AV_CPU_FLAG_RVF | AV_CPU_FLAG_RVD; +# ifdef RISCV_HWPROBE_IMA_V +if (pairs[1].value & RISCV_HWPROBE_IMA_V) +ret |= AV_CPU_FLAG_RVV_I32 | AV_CPU_FLAG_RVV_I64 + | AV_CPU_FLAG_RVV_F32 | AV_CPU_FLAG_RVV_F64; +# endif +# ifdef RISCV_HWPROBE_EXT_ZBA +if (pairs[1].value & RISCV_HWPROBE_EXT_ZBA) +ret |= AV_CPU_FLAG_RVB_ADDR; +# endif +# ifdef RISCV_HWPROBE_EXT_ZBB +if (pairs[1].value & RISCV_HWPROBE_EXT_ZBB) +ret |= AV_CPU_FLAG_RVB_BASIC; +# endif +} else +#endif #if HAVE_GETAUXVAL { const unsigned long hwcap = getauxval(AT_HWCAP); -- 2.42.0 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] lavu/fixed_dsp: R-V V fmul_window_scaled
vector_fmul_window_scaled_fixed_c: 4393.7 vector_fmul_window_scaled_fixed_rvv_i64: 1642.7 --- libavutil/riscv/fixed_dsp_init.c | 7 - libavutil/riscv/fixed_dsp_rvv.S | 48 2 files changed, 54 insertions(+), 1 deletion(-) diff --git a/libavutil/riscv/fixed_dsp_init.c b/libavutil/riscv/fixed_dsp_init.c index 6469b45374..cd318af486 100644 --- a/libavutil/riscv/fixed_dsp_init.c +++ b/libavutil/riscv/fixed_dsp_init.c @@ -25,6 +25,9 @@ #include "libavutil/cpu.h" #include "libavutil/fixed_dsp.h" +void ff_vector_fmul_window_scaled_rvv(int16_t *dst, const int32_t *src0, + const int32_t *src1, const int32_t *win, + int len, uint8_t bits); void ff_vector_fmul_window_fixed_rvv(int32_t *dst, const int32_t *src0, const int32_t *src1, const int32_t *win, int len); @@ -43,8 +46,10 @@ av_cold void ff_fixed_dsp_init_riscv(AVFixedDSPContext *fdsp) int flags = av_get_cpu_flags(); if ((flags & AV_CPU_FLAG_RVV_I32) && (flags & AV_CPU_FLAG_RVB_ADDR)) { -if (flags & AV_CPU_FLAG_RVV_I64) +if (flags & AV_CPU_FLAG_RVV_I64) { +fdsp->vector_fmul_window_scaled = ff_vector_fmul_window_scaled_rvv; fdsp->vector_fmul_window = ff_vector_fmul_window_fixed_rvv; +} fdsp->vector_fmul = ff_vector_fmul_fixed_rvv; fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_fixed_rvv; diff --git a/libavutil/riscv/fixed_dsp_rvv.S b/libavutil/riscv/fixed_dsp_rvv.S index 68de6d7e1b..6bac5813b8 100644 --- a/libavutil/riscv/fixed_dsp_rvv.S +++ b/libavutil/riscv/fixed_dsp_rvv.S @@ -20,6 +20,54 @@ #include "asm.S" +func ff_vector_fmul_window_scaled_rvv, zve64x +csrwi vxrm, 0 +vsetvli t0, zero, e16, m1, ta, ma +sh2add a2, a4, a2 +vid.v v0 +sh3add t3, a4, a3 +vadd.vi v0, v0, 1 +sh2add t0, a4, a0 +1: +vsetvli t2, a4, e16, m1, ta, ma +sllit4, t2, 2 +sllit1, t2, 1 +vrsub.vx v2, v0, t2 +sub t3, t3, t4 +vsetvli zero, zero, e32, m2, ta, ma +sub a2, a2, t4 +vle32.v v8, (t3) +sub t0, t0, t1 +vle32.v v4, (a2) +sub a4, a4, t2 +vrgatherei16.vv v28, v8, v2 +vle32.v v16, (a1) +add a1, a1, t4 +vrgatherei16.vv v20, v4, v2 +vle32.v v24, (a3) +add a3, a3, t4 +vwmul.vv v12, v16, v28 +vwmul.vv v8, v16, v24 +// vwnmsac.vv does _not_ exist so multiply & subtract separately +vwmul.vv v4, v20, v24 +vwmacc.vv v8, v20, v28 +vsetvli zero, zero, e64, m4, ta, ma +vsub.vv v12, v12, v4 +vsetvli zero, zero, e32, m2, ta, ma +vnclip.wi v16, v8, 31 +vnclip.wi v20, v12, 31 +vsetvli zero, zero, e16, m1, ta, ma +vnclip.wx v8, v16, a5 +vnclip.wx v12, v20, a5 +vrgatherei16.vv v16, v8, v2 +vse16.v v12, (a0) +add a0, a0, t1 +vse16.v v16, (t0) +bneza4, 1b + +ret +endfunc + func ff_vector_fmul_window_fixed_rvv, zve64x csrwi vxrm, 0 vsetvli t0, zero, e16, m1, ta, ma -- 2.42.0 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] lavc/g722dsp: optimise R-V V apply_qmf
This stores the constant coefficients deinterleaved, so that they can be loaded directly with NF=0. Unfortunately, we cannot optimise loading the input, due to insufficient memory alignment (not 32-bit). Before: g722_apply_qmf_c: 82.5 g722_apply_qmf_rvv_i32: 78.2 After: g722_apply_qmf_c: 82.5 g722_apply_qmf_rvv_i32: 65.2 --- libavcodec/riscv/g722dsp_rvv.S | 24 +--- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/libavcodec/riscv/g722dsp_rvv.S b/libavcodec/riscv/g722dsp_rvv.S index 350be8dc1f..981d5cecd8 100644 --- a/libavcodec/riscv/g722dsp_rvv.S +++ b/libavcodec/riscv/g722dsp_rvv.S @@ -24,7 +24,9 @@ func ff_g722_apply_qmf_rvv, zve32x lla t0, qmf_coeffs vsetivlizero, 12, e16, m2, ta, ma vlseg2e16.v v28, (a0) -vlseg2e16.v v24, (t0) +addit1, t0, 12 * 2 +vle16.v v24, (t0) +vle16.v v26, (t1) vwmul.vvv16, v28, v24 vwmul.vvv20, v30, v26 vsetivlizero, 12, e32, m4, ta, ma @@ -41,26 +43,26 @@ endfunc const qmf_coeffs, align=2 .short 3 .short -11 -.short -11 -.short53 .short12 -.short -156 .short32 -.short 362 .short -210 -.short -805 .short 951 .short 3876 -.short 3876 -.short 951 .short -805 -.short -210 .short 362 -.short32 .short -156 -.short12 .short53 .short -11 .short -11 +.short53 +.short -156 +.short 362 +.short -805 +.short 3876 +.short 951 +.short -210 +.short32 +.short12 +.short -11 .short 3 endconst -- 2.42.0 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 1/2] lavu/fixed_dsp: optimise R-V V fmul_reverse
Gathers are (unsurprisingly) a notable exception to the rule that R-V V gets faster with larger group multipliers. So roll the function to speed it up. Before: vector_fmul_reverse_fixed_c: 2840.7 vector_fmul_reverse_fixed_rvv_i32: 2430.2 After: vector_fmul_reverse_fixed_c: 2841.0 vector_fmul_reverse_fixed_rvv_i32: 962.2 It might be possible to further optimise the function by moving the reverse-subtract out of the loop and adding ad-hoc tail handling. --- libavutil/riscv/fixed_dsp_rvv.S | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/libavutil/riscv/fixed_dsp_rvv.S b/libavutil/riscv/fixed_dsp_rvv.S index 2bece88685..46bb591352 100644 --- a/libavutil/riscv/fixed_dsp_rvv.S +++ b/libavutil/riscv/fixed_dsp_rvv.S @@ -127,16 +127,17 @@ endfunc func ff_vector_fmul_reverse_fixed_rvv, zve32x csrwi vxrm, 0 -vsetvli t0, zero, e16, m4, ta, ma +// e16/m4 and e32/m8 are possible but slow the gathers down. +vsetvli t0, zero, e16, m1, ta, ma sh2add a2, a3, a2 vid.v v0 vadd.vi v0, v0, 1 1: -vsetvli t0, a3, e16, m4, ta, ma +vsetvli t0, a3, e16, m1, ta, ma sllit1, t0, 2 vrsub.vx v4, v0, t0 // v4[i] = [VL-1, VL-2... 1, 0] sub a2, a2, t1 -vsetvli zero, zero, e32, m8, ta, ma +vsetvli zero, zero, e32, m2, ta, ma vle32.v v8, (a2) sub a3, a3, t0 vle32.v v16, (a1) -- 2.42.0 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 2/2] lavu/float_dsp: optimise R-V V fmul_reverse & fmul_window
Roll the loop to avoid slow gathers. Before: vector_fmul_reverse_c: 1561.7 vector_fmul_reverse_rvv_f32: 2410.2 vector_fmul_window_c:2068.2 vector_fmul_window_rvv_f32: 1879.5 After: vector_fmul_reverse_c: 1561.7 vector_fmul_reverse_rvv_f32: 916.2 vector_fmul_window_c:2068.2 vector_fmul_window_rvv_f32: 1202.5 --- libavutil/riscv/float_dsp_rvv.S | 8 +--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/libavutil/riscv/float_dsp_rvv.S b/libavutil/riscv/float_dsp_rvv.S index a2f9488249..ce5b6823d4 100644 --- a/libavutil/riscv/float_dsp_rvv.S +++ b/libavutil/riscv/float_dsp_rvv.S @@ -75,18 +75,19 @@ endfunc func ff_vector_fmul_window_rvv, zve32f // a0: dst, a1: src0, a2: src1, a3: window, a4: length -vsetvlit0, zero, e16, m2, ta, ma +// e16/m2 and e32/m4 are possible but slower due to gather. +vsetvlit0, zero, e16, m1, ta, ma sh2add a2, a4, a2 vid.v v0 sh3add t3, a4, a3 vadd.viv0, v0, 1 sh3add t0, a4, a0 1: -vsetvlit2, a4, e16, m2, ta, ma +vsetvlit2, a4, e16, m1, ta, ma slli t4, t2, 2 vrsub.vx v2, v0, t2 subt3, t3, t4 -vsetvlizero, zero, e32, m4, ta, ma +vsetvlizero, zero, e32, m2, ta, ma suba2, a2, t4 vle32.vv8, (t3) subt0, t0, t4 @@ -133,6 +134,7 @@ endfunc // TODO factor vrsub, separate last iteration? // (a0) = (a1) * reverse(a2) [0..a3-1] func ff_vector_fmul_reverse_rvv, zve32f +// e16/m4 and e32/m8 are possible but slower due to gather. vsetvli t0, zero, e16, m4, ta, ma sh2add a2, a3, a2 vid.vv0 -- 2.42.0 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] lavc/llvidencdsp: add R-V V diff_bytes
diff_bytes_c: 163.0 diff_bytes_rvv_i32: 52.7 --- libavcodec/lossless_videoencdsp.c | 4 ++- libavcodec/lossless_videoencdsp.h | 1 + libavcodec/riscv/Makefile | 2 ++ libavcodec/riscv/llvidencdsp_init.c | 39 + libavcodec/riscv/llvidencdsp_rvv.S | 37 +++ 5 files changed, 82 insertions(+), 1 deletion(-) create mode 100644 libavcodec/riscv/llvidencdsp_init.c create mode 100644 libavcodec/riscv/llvidencdsp_rvv.S diff --git a/libavcodec/lossless_videoencdsp.c b/libavcodec/lossless_videoencdsp.c index b4130ebc7b..e2dc99e201 100644 --- a/libavcodec/lossless_videoencdsp.c +++ b/libavcodec/lossless_videoencdsp.c @@ -94,7 +94,9 @@ av_cold void ff_llvidencdsp_init(LLVidEncDSPContext *c) c->sub_median_pred = sub_median_pred_c; c->sub_left_predict = sub_left_predict_c; -#if ARCH_X86 +#if ARCH_RISCV +ff_llvidencdsp_init_riscv(c); +#elif ARCH_X86 ff_llvidencdsp_init_x86(c); #endif } diff --git a/libavcodec/lossless_videoencdsp.h b/libavcodec/lossless_videoencdsp.h index f2c2878485..07fff584af 100644 --- a/libavcodec/lossless_videoencdsp.h +++ b/libavcodec/lossless_videoencdsp.h @@ -40,6 +40,7 @@ typedef struct LLVidEncDSPContext { } LLVidEncDSPContext; void ff_llvidencdsp_init(LLVidEncDSPContext *c); +void ff_llvidencdsp_init_riscv(LLVidEncDSPContext *c); void ff_llvidencdsp_init_x86(LLVidEncDSPContext *c); #endif /* AVCODEC_LOSSLESS_VIDEOENCDSP_H */ diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile index 8f2a519827..2d0e6c19c8 100644 --- a/libavcodec/riscv/Makefile +++ b/libavcodec/riscv/Makefile @@ -30,6 +30,8 @@ OBJS-$(CONFIG_LLAUDDSP) += riscv/llauddsp_init.o RVV-OBJS-$(CONFIG_LLAUDDSP) += riscv/llauddsp_rvv.o OBJS-$(CONFIG_LLVIDDSP) += riscv/llviddsp_init.o RVV-OBJS-$(CONFIG_LLVIDDSP) += riscv/llviddsp_rvv.o +OBJS-$(CONFIG_LLVIDENCDSP) += riscv/llvidencdsp_init.o +RVV-OBJS-$(CONFIG_LLVIDENCDSP) += riscv/llvidencdsp_rvv.o OBJS-$(CONFIG_OPUS_DECODER) += riscv/opusdsp_init.o RVV-OBJS-$(CONFIG_OPUS_DECODER) += riscv/opusdsp_rvv.o OBJS-$(CONFIG_PIXBLOCKDSP) += riscv/pixblockdsp_init.o \ diff --git a/libavcodec/riscv/llvidencdsp_init.c b/libavcodec/riscv/llvidencdsp_init.c new file mode 100644 index 00..e35406dc41 --- /dev/null +++ b/libavcodec/riscv/llvidencdsp_init.c @@ -0,0 +1,39 @@ +/* + * Copyright © 2023 Rémi Denis-Courmont. + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" + +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavcodec/lossless_videoencdsp.h" + +void ff_llvidenc_diff_bytes_rvv(uint8_t *dst, const uint8_t *src1, +const uint8_t *src2, intptr_t w); + +av_cold void ff_llvidencdsp_init_riscv(LLVidEncDSPContext *c) +{ +#if HAVE_RVV +int flags = av_get_cpu_flags(); + +if (flags & AV_CPU_FLAG_RVV_I32) { +c->diff_bytes = ff_llvidenc_diff_bytes_rvv; +} +#endif +} diff --git a/libavcodec/riscv/llvidencdsp_rvv.S b/libavcodec/riscv/llvidencdsp_rvv.S new file mode 100644 index 00..0342165127 --- /dev/null +++ b/libavcodec/riscv/llvidencdsp_rvv.S @@ -0,0 +1,37 @@ +/* + * Copyright © 2023 Rémi Denis-Courmont. + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/riscv/asm.S" + +func ff_llvidenc_diff_bytes_rvv, zve32x +1: +vsetvli t0, a3, e8, m8, ta, ma +vle8.v v0, (a1) +sub a3, a3, t0 +vle8.v v8, (a2) +add
[FFmpeg-devel] [PATCH] lavc/aacpsdsp: use LMUL=2 and amortise strides
The input is laid out in 16 segments, of which 13 actually need to be loaded. There are no really efficient ways to deal with this: 1) If we load 8 segments wit unit stride, then narrow to 16 segments with right shifts, we can only one half-size vector per segment, or just 2 elements per vector (EMUL=1/2). This ends up unsurprisingly about as fas as the C code. 2) The current approach is to load with strides. We keep that approach, but improve it using three 4-segmented loads instead of 12 single-segment loads. This divides the number of distinct loaded addresses by 4. 3) A potential third approach would be to avoid segmentation altogether and splat the scalar coefficient into vectors. Then we can use a unit-stride and maximum EMUL. But the downside then is that we have to multiply the 3 (of 16) unused segments with zero as part of the multiply-accumulate operations. In addition, we also reuse vectors mid-loop so as to increase the EMUL from 1 to 2, which also improves performance a little bit. Oeverall the gains are quite small with the device under test, as it does not deal with segmented loads very well. But at least the code is tidier, and should enjoy bigger speed-ups on better hardware implementation. Before: ps_hybrid_analysis_c: 1819.2 ps_hybrid_analysis_rvv_f32: 1037.0 (before) ps_hybrid_analysis_rvv_f32: 990.0 (after) --- libavcodec/riscv/aacpsdsp_rvv.S | 61 +++-- 1 file changed, 20 insertions(+), 41 deletions(-) diff --git a/libavcodec/riscv/aacpsdsp_rvv.S b/libavcodec/riscv/aacpsdsp_rvv.S index 1dc426e01c..f46b35fe91 100644 --- a/libavcodec/riscv/aacpsdsp_rvv.S +++ b/libavcodec/riscv/aacpsdsp_rvv.S @@ -85,63 +85,42 @@ NOHWD fsw fs\n, (4 * \n)(sp) flw fs4, (4 * ((6 * 2) + 0))(a1) flw fs5, (4 * ((6 * 2) + 1))(a1) -adda2, a2, 6 * 2 * 4 // point to filter[i][6][0] +add t2, a2, 6 * 2 * 4 // point to filter[i][6][0] li t4, 8 * 2 * 4 // filter byte stride slli a3, a3, 3 // output byte stride 1: .macro filter, vs0, vs1, fo0, fo1, fo2, fo3 vfmacc.vf v8, \fo0, \vs0 -vfmacc.vf v9, \fo2, \vs0 +vfmacc.vf v10, \fo2, \vs0 vfnmsac.vf v8, \fo1, \vs1 -vfmacc.vf v9, \fo3, \vs1 +vfmacc.vf v10, \fo3, \vs1 .endm -vsetvlit0, a4, e32, m1, ta, ma +vsetvlit0, a4, e32, m2, ta, ma /* * The filter (a2) has 16 segments, of which 13 need to be extracted. * R-V V supports only up to 8 segments, so unrolling is unavoidable. */ -addi t1, a2, -48 -vlse32.v v22, (a2), t4 -addi t2, a2, -44 -vlse32.v v16, (t1), t4 -addi t1, a2, -40 -vfmul.vf v8, v22, fs4 -vlse32.v v24, (t2), t4 -addi t2, a2, -36 -vfmul.vf v9, v22, fs5 -vlse32.v v17, (t1), t4 -addi t1, a2, -32 -vlse32.v v25, (t2), t4 -addi t2, a2, -28 -filter v16, v24, ft0, ft1, ft2, ft3 -vlse32.v v18, (t1), t4 -addi t1, a2, -24 -vlse32.v v26, (t2), t4 -addi t2, a2, -20 -filter v17, v25, ft4, ft5, ft6, ft7 -vlse32.v v19, (t1), t4 -addi t1, a2, -16 -vlse32.v v27, (t2), t4 -addi t2, a2, -12 -filter v18, v26, ft8, ft9, ft10, ft11 -vlse32.v v20, (t1), t4 -addi t1, a2, -8 vlse32.v v28, (t2), t4 -addi t2, a2, -4 -filter v19, v27, fa0, fa1, fa2, fa3 -vlse32.v v21, (t1), t4 +addi t1, a2, 16 +vfmul.vf v8, v28, fs4 +vlsseg4e32.v v16, (a2), t4 +vfmul.vf v10, v28, fs5 +filter v16, v18, ft0, ft1, ft2, ft3 +vlsseg4e32.v v24, (t1), t4 +filter v20, v22, ft4, ft5, ft6, ft7 +addi t1, a2, 32 +filter v24, v26, ft8, ft9, ft10, ft11 +vlsseg4e32.v v16, (t1), t4 suba4, a4, t0 -vlse32.v v29, (t2), t4 +filter v28, v30, fa0, fa1, fa2, fa3 slli t1, t0, 3 + 1 + 2 // ctz(8 * 2 * 4) -adda2, a2, t1 -filter v20, v28, fa4, fa5, fa6, fa7 -filter v21, v29, fs0, fs1, fs2, fs3 - -addt2, a0, 4 -vsse32.v v8, (a0), a3 +filter v16, v18, fa4, fa5, fa6, fa7 mult0, t0, a3 -vsse32.v v9, (t2), a3 +filter v20, v22, fs0, fs1, fs2, fs3 +adda2, a2, t1 +addt2, t2, t1 +vssseg2e32.v v8, (a0), a3 adda0, a0, t0 bnez a4, 1b -- 2.42.0 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email
[FFmpeg-devel] [PATCH] riscv: set fast half-precision conversion
This is only supported at compilation time. If Zfhmin is supported, then conversions are fast, which is what the flag is used for. At this time, run-tiem detection is not possible, as in not supported by Linux. But even if it were, the current FFmpeg approach seems unable to deal with it (same problem as on x86, really). --- configure | 3 +++ 1 file changed, 3 insertions(+) diff --git a/configure b/configure index d6e4a1e7df..6be849fc08 100755 --- a/configure +++ b/configure @@ -5413,6 +5413,9 @@ elif enabled riscv; then if test_cpp_condition stddef.h "__riscv_zbb"; then enable fast_clz fi +if test_cpp_condition stddef.h "__riscv_zfhmin"; then +enable fast_float16 +fi elif enabled sparc; then -- 2.42.0 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] checkasm/riscv: use t0 as alternative link register
The unprivileged ISA specification says that either RA or T0 should be used for this purpose. Other registers may confuse the return address prediction stack. --- tests/checkasm/riscv/checkasm.S | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/checkasm/riscv/checkasm.S b/tests/checkasm/riscv/checkasm.S index 73ca85f344..b902ab1043 100644 --- a/tests/checkasm/riscv/checkasm.S +++ b/tests/checkasm/riscv/checkasm.S @@ -123,10 +123,10 @@ func checkasm_get_wrapper, v /* Call the tested function */ la.tls.ie t0, checked_func -add t0, tp, t0 -ld t1, (t0) -sd zero, (t0) -jalrt1 +add t1, tp, t0 +ld t0, (t1) +sd zero, (t1) +jalrt0 /* Check special register values */ la.tls.ie t0, saved_regs -- 2.42.0 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 1/2] checkasm: add helper to report a fatal signal
--- tests/checkasm/checkasm.c | 15 +++ tests/checkasm/checkasm.h | 1 + 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c index 708119e7c6..c67cf58922 100644 --- a/tests/checkasm/checkasm.c +++ b/tests/checkasm/checkasm.c @@ -23,10 +23,8 @@ #include "config.h" #include "config_components.h" -#if CONFIG_LINUX_PERF -# ifndef _GNU_SOURCE -# define _GNU_SOURCE // for syscall (performance monitoring API) -# endif +#ifndef _GNU_SOURCE +# define _GNU_SOURCE // for syscall (performance monitoring API), strsignal() #endif #include @@ -863,6 +861,15 @@ void checkasm_fail_func(const char *msg, ...) } } +void checkasm_fail_signal(int signum) +{ +#ifdef __GLIBC__ +checkasm_fail_func("fatal signal %d: %s", signum, strsignal(signum)); +#else +checkasm_fail_func("fatal signal %d", signum); +#endif +} + /* Get the benchmark context of the current function */ CheckasmPerf *checkasm_get_perf_context(void) { diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h index cfea868ff1..8a1df43ab6 100644 --- a/tests/checkasm/checkasm.h +++ b/tests/checkasm/checkasm.h @@ -102,6 +102,7 @@ struct CheckasmPerf; void *checkasm_check_func(void *func, const char *name, ...) av_printf_format(2, 3); int checkasm_bench_func(void); void checkasm_fail_func(const char *msg, ...) av_printf_format(1, 2); +void checkasm_fail_signal(int signum); struct CheckasmPerf *checkasm_get_perf_context(void); void checkasm_report(const char *name, ...) av_printf_format(1, 2); -- 2.42.0 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 2/2] checkasm/riscv: report an error upon SIGILL
Terminating the whole checkasm process is not very helpful. This will report if an illegal instruction occurs while executing a tested function. This is a common occurrence whilst developping RISC-V assembler, due to the compatibility between vector configuration and instruction done at run-time. --- tests/checkasm/checkasm.c | 9 + tests/checkasm/checkasm.h | 11 +-- tests/checkasm/riscv/checkasm.S | 12 3 files changed, 30 insertions(+), 2 deletions(-) diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c index c67cf58922..a15e801caf 100644 --- a/tests/checkasm/checkasm.c +++ b/tests/checkasm/checkasm.c @@ -27,6 +27,7 @@ # define _GNU_SOURCE // for syscall (performance monitoring API), strsignal() #endif +#include #include #include #include @@ -734,6 +735,14 @@ int main(int argc, char *argv[]) if (have_vfp(av_get_cpu_flags()) || have_neon(av_get_cpu_flags())) checkasm_checked_call = checkasm_checked_call_vfp; #endif +#if ARCH_RISCV +struct sigaction act = { +.sa_handler = checkasm_handle_signal, +.sa_flags = 0, +}; + +sigaction(SIGILL, , NULL); +#endif if (!tests[0].func || !cpus[0].flag) { fprintf(stderr, "checkasm: no tests to perform\n"); diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h index 8a1df43ab6..61734a8dbb 100644 --- a/tests/checkasm/checkasm.h +++ b/tests/checkasm/checkasm.h @@ -23,6 +23,7 @@ #ifndef TESTS_CHECKASM_CHECKASM_H #define TESTS_CHECKASM_CHECKASM_H +#include #include #include "config.h" @@ -211,14 +212,20 @@ void checkasm_checked_call(void *func, ...); checked_call(func_new, 0, 0, 0, 0, 0, 0, 0, __VA_ARGS__,\ 7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0)) #elif ARCH_RISCV -void checkasm_set_function(void *); +void checkasm_set_function(void *, sigjmp_buf); void *checkasm_get_wrapper(void); +void checkasm_handle_signal(int signum); #if (__riscv_xlen == 64) && defined (__riscv_d) #define declare_new(ret, ...) \ +int checked_call_signum = 0; \ +sigjmp_buf checked_call_jb; \ ret (*checked_call)(__VA_ARGS__) = checkasm_get_wrapper(); #define call_new(...) \ -(checkasm_set_function(func_new), checked_call(__VA_ARGS__)) +(checkasm_set_function(func_new, checked_call_jb), \ + (checked_call_signum = sigsetjmp(checked_call_jb, 1)) == 0 \ +? checked_call(__VA_ARGS__) \ + : (checkasm_fail_signal(checked_call_signum), 0)) #endif #else #define declare_new(ret, ...) diff --git a/tests/checkasm/riscv/checkasm.S b/tests/checkasm/riscv/checkasm.S index b902ab1043..30d3f3d8bb 100644 --- a/tests/checkasm/riscv/checkasm.S +++ b/tests/checkasm/riscv/checkasm.S @@ -41,6 +41,7 @@ endconst checked_func: .quad 0 +.quad 0 saved_regs: /* Space to spill RA, SP, GP, TP, S0-S11 and FS0-FS11 */ @@ -52,6 +53,7 @@ func checkasm_set_function la.tls.ie t0, checked_func add t0, tp, t0 sd a0, (t0) +sd a1, 8(t0) ret endfunc @@ -175,4 +177,14 @@ func checkasm_get_wrapper, v callcheckasm_fail_func j 4b endfunc + +func checkasm_handle_signal +mv a1, a0 +la.tls.ie a0, checked_func +add a0, tp, a0 +ld a0, 8(a0) +beqza0, 8f +tailsiglongjmp +8: tailabort /* No jump buffer to go to */ +endfunc #endif -- 2.42.0 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] checkasm/riscv: use t0 as alternative link register
Le torstaina 16. marraskuuta 2023, 18.04.51 EET Rémi Denis-Courmont a écrit : > The unprivileged ISA specification says that either RA or T0 should be > used for this purpose. Other registers may confuse the return address > prediction stack. Need more sleep. This is true for the link register (the destination operand), not the branch target (the source operand). Please ignore. -- 雷米‧德尼-库尔蒙 http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] checkasm/flacdsp: fix ls/rs/ms tests
decorrelate_ls, _rs and _ms are decorrelate[1], [2] and [3] respectively. The code ended up testing indep ([0]) as twice, ms never, and misnaming the other two. --- tests/checkasm/flacdsp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/checkasm/flacdsp.c b/tests/checkasm/flacdsp.c index ef93df8c81..51a0e0060b 100644 --- a/tests/checkasm/flacdsp.c +++ b/tests/checkasm/flacdsp.c @@ -78,7 +78,7 @@ void checkasm_check_flacdsp(void) for (i = 0; i < 2; i++) { ff_flacdsp_init(, fmts[i].fmt, 2); for (j = 0; j < 3; j++) -if (check_func(h.decorrelate[j], "flac_decorrelate_%s_%d", names[j], fmts[i].bits)) +if (check_func(h.decorrelate[j + 1], "flac_decorrelate_%s_%d", names[j], fmts[i].bits)) check_decorrelate(_dst, ref_src, _dst, new_src, 2, fmts[i].bits); for (j = 2; j <= MAX_CHANNELS; j += 2) { ff_flacdsp_init(, fmts[i].fmt, j); -- 2.42.0 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 1/2] lavc/flacdsp: R-V V packed decorrelate_{l, r}s
flac_decorrelate_ms_16_c: 457.2 flac_decorrelate_ms_16_rvv_i32: 203.0 flac_decorrelate_ms_32_c: 457.2 flac_decorrelate_ms_32_rvv_i32: 203.5 flac_decorrelate_rs_16_c: 456.2 flac_decorrelate_rs_16_rvv_i32: 207.0 flac_decorrelate_rs_32_c: 456.2 flac_decorrelate_rs_32_rvv_i32: 210.5 --- libavcodec/flacdsp.c| 2 + libavcodec/flacdsp.h| 1 + libavcodec/riscv/Makefile | 2 + libavcodec/riscv/flacdsp_init.c | 55 libavcodec/riscv/flacdsp_rvv.S | 113 5 files changed, 173 insertions(+) create mode 100644 libavcodec/riscv/flacdsp_init.c create mode 100644 libavcodec/riscv/flacdsp_rvv.S diff --git a/libavcodec/flacdsp.c b/libavcodec/flacdsp.c index 42e231db53..71b4ac44aa 100644 --- a/libavcodec/flacdsp.c +++ b/libavcodec/flacdsp.c @@ -121,6 +121,8 @@ av_cold void ff_flacdsp_init(FLACDSPContext *c, enum AVSampleFormat fmt, int cha #if ARCH_ARM ff_flacdsp_init_arm(c, fmt, channels); +#elif ARCH_RISCV +ff_flacdsp_init_riscv(c, fmt, channels); #elif ARCH_X86 ff_flacdsp_init_x86(c, fmt, channels); #endif diff --git a/libavcodec/flacdsp.h b/libavcodec/flacdsp.h index 9f8ed38b66..15149c026e 100644 --- a/libavcodec/flacdsp.h +++ b/libavcodec/flacdsp.h @@ -38,6 +38,7 @@ typedef struct FLACDSPContext { void ff_flacdsp_init(FLACDSPContext *c, enum AVSampleFormat fmt, int channels); void ff_flacdsp_init_arm(FLACDSPContext *c, enum AVSampleFormat fmt, int channels); +void ff_flacdsp_init_riscv(FLACDSPContext *c, enum AVSampleFormat fmt, int channels); void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt, int channels); #endif /* AVCODEC_FLACDSP_H */ diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile index 57c1708dbb..d34dc77458 100644 --- a/libavcodec/riscv/Makefile +++ b/libavcodec/riscv/Makefile @@ -12,6 +12,8 @@ OBJS-$(CONFIG_BSWAPDSP) += riscv/bswapdsp_init.o \ RVV-OBJS-$(CONFIG_BSWAPDSP) += riscv/bswapdsp_rvv.o OBJS-$(CONFIG_EXR_DECODER) += riscv/exrdsp_init.o RVV-OBJS-$(CONFIG_EXR_DECODER) += riscv/exrdsp_rvv.o +OBJS-$(CONFIG_FLAC_DECODER) += riscv/flacdsp_init.o +RVV-OBJS-$(CONFIG_FLAC_DECODER) += riscv/flacdsp_rvv.o OBJS-$(CONFIG_FMTCONVERT) += riscv/fmtconvert_init.o RVV-OBJS-$(CONFIG_FMTCONVERT) += riscv/fmtconvert_rvv.o OBJS-$(CONFIG_G722DSP) += riscv/g722dsp_init.o diff --git a/libavcodec/riscv/flacdsp_init.c b/libavcodec/riscv/flacdsp_init.c new file mode 100644 index 00..a3415d6d55 --- /dev/null +++ b/libavcodec/riscv/flacdsp_init.c @@ -0,0 +1,55 @@ +/* + * Copyright © 2023 Rémi Denis-Courmont. + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" + +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavcodec/flacdsp.h" + +void ff_flac_decorrelate_ls_16_rvv(uint8_t **out, int32_t **in, + int channels, int len, int shift); +void ff_flac_decorrelate_rs_16_rvv(uint8_t **out, int32_t **in, + int channels, int len, int shift); +void ff_flac_decorrelate_ls_32_rvv(uint8_t **out, int32_t **in, + int channels, int len, int shift); +void ff_flac_decorrelate_rs_32_rvv(uint8_t **out, int32_t **in, + int channels, int len, int shift); + +av_cold void ff_flacdsp_init_riscv(FLACDSPContext *c, enum AVSampleFormat fmt, + int channels) +{ +#if HAVE_RVV +int flags = av_get_cpu_flags(); + +if ((flags & AV_CPU_FLAG_RVV_I32) && (flags & AV_CPU_FLAG_RVB_ADDR)) { +switch (fmt) { +case AV_SAMPLE_FMT_S16: +c->decorrelate[1] = ff_flac_decorrelate_ls_16_rvv; +c->decorrelate[2] = ff_flac_decorrelate_rs_16_rvv; +break; +case AV_SAMPLE_FMT_S32: +c->decorrelate[1] = ff_flac_decorrelate_ls_32_rvv; +c->decorrelate[2] = ff_flac_decorrelate_rs_32_rvv; +break; +} +} +#endif +} diff --git a/libavcodec/riscv/flacdsp_rvv.S b/libavcodec/riscv/flacdsp_rvv.S new file mode 100644 index 00..c70ad8fcb0 --- /dev/null +++ b/libavcodec/riscv/flacdsp_
[FFmpeg-devel] [PATCH 2/2] lavc/flacdsp: R-V V decorrelate_ms packed
flac_decorrelate_ms_16_c: 585.5 flac_decorrelate_ms_16_rvv_i32: 263.0 flac_decorrelate_ms_32_c: 584.7 flac_decorrelate_ms_32_rvv_i32: 250.0 --- libavcodec/riscv/flacdsp_init.c | 6 libavcodec/riscv/flacdsp_rvv.S | 49 + 2 files changed, 55 insertions(+) diff --git a/libavcodec/riscv/flacdsp_init.c b/libavcodec/riscv/flacdsp_init.c index a3415d6d55..0e7be25d98 100644 --- a/libavcodec/riscv/flacdsp_init.c +++ b/libavcodec/riscv/flacdsp_init.c @@ -28,10 +28,14 @@ void ff_flac_decorrelate_ls_16_rvv(uint8_t **out, int32_t **in, int channels, int len, int shift); void ff_flac_decorrelate_rs_16_rvv(uint8_t **out, int32_t **in, int channels, int len, int shift); +void ff_flac_decorrelate_ms_16_rvv(uint8_t **out, int32_t **in, + int channels, int len, int shift); void ff_flac_decorrelate_ls_32_rvv(uint8_t **out, int32_t **in, int channels, int len, int shift); void ff_flac_decorrelate_rs_32_rvv(uint8_t **out, int32_t **in, int channels, int len, int shift); +void ff_flac_decorrelate_ms_32_rvv(uint8_t **out, int32_t **in, + int channels, int len, int shift); av_cold void ff_flacdsp_init_riscv(FLACDSPContext *c, enum AVSampleFormat fmt, int channels) @@ -44,10 +48,12 @@ av_cold void ff_flacdsp_init_riscv(FLACDSPContext *c, enum AVSampleFormat fmt, case AV_SAMPLE_FMT_S16: c->decorrelate[1] = ff_flac_decorrelate_ls_16_rvv; c->decorrelate[2] = ff_flac_decorrelate_rs_16_rvv; +c->decorrelate[3] = ff_flac_decorrelate_ms_16_rvv; break; case AV_SAMPLE_FMT_S32: c->decorrelate[1] = ff_flac_decorrelate_ls_32_rvv; c->decorrelate[2] = ff_flac_decorrelate_rs_32_rvv; +c->decorrelate[3] = ff_flac_decorrelate_ms_32_rvv; break; } } diff --git a/libavcodec/riscv/flacdsp_rvv.S b/libavcodec/riscv/flacdsp_rvv.S index c70ad8fcb0..616565ed7e 100644 --- a/libavcodec/riscv/flacdsp_rvv.S +++ b/libavcodec/riscv/flacdsp_rvv.S @@ -69,6 +69,32 @@ func ff_flac_decorrelate_rs_16_rvv, zve32x ret endfunc +func ff_flac_decorrelate_ms_16_rvv, zve32x +ld a0, (a0) +ld a2, 8(a1) +ld a1, (a1) +1: +vsetvli t0, a3, e32, m8, ta, ma +vle32.v v8, (a2) +sub a3, a3, t0 +vle32.v v0, (a1) +sh2add a1, t0, a1 +vsra.vi v16, v8, 1 +sh2add a2, t0, a2 +vsub.vv v24, v0, v16 +vadd.vv v16, v24, v8 +vsll.vx v8, v24, a4 +vsll.vx v0, v16, a4 +vsetvli zero, zero, e16, m4, ta, ma +vncvt.x.x.w v0, v0 +vncvt.x.x.w v4, v8 +vsseg2e16.v v0, (a0) +sh2add a0, t0, a0 +bneza3, 1b + +ret +endfunc + func ff_flac_decorrelate_ls_32_rvv, zve32x ld a0, (a0) ld a2, 8(a1) @@ -110,4 +136,27 @@ func ff_flac_decorrelate_rs_32_rvv, zve32x ret endfunc + +func ff_flac_decorrelate_ms_32_rvv, zve32x +ld a0, (a0) +ld a2, 8(a1) +ld a1, (a1) +1: +vsetvli t0, a3, e32, m4, ta, ma +vle32.v v4, (a2) +sub a3, a3, t0 +vle32.v v0, (a1) +sh2add a1, t0, a1 +vsra.vi v8, v4, 1 +sh2add a2, t0, a2 +vsub.vv v12, v0, v8 +vadd.vv v8, v12, v4 +vsll.vx v4, v12, a4 +vsll.vx v0, v8, a4 +vsseg2e32.v v0, (a0) +sh3add a0, t0, a0 +bneza3, 1b + +ret +endfunc #endif -- 2.42.0 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".