Re: [FFmpeg-devel] [PATCH] avcodec: use av_mod_intp2() where possible

2015-04-21 Thread Michael Niedermayer
On Tue, Apr 21, 2015 at 06:51:52PM -0300, James Almer wrote:
 On 21/04/15 6:38 PM, Michael Niedermayer wrote:
  On Tue, Apr 21, 2015 at 05:45:25PM -0300, James Almer wrote:
  Signed-off-by: James Almer jamr...@gmail.com
  ---
   libavcodec/acelp_vectors.c |  8 +++-
   libavcodec/adpcm.c |  2 +-
   libavcodec/alacenc.c   |  3 +--
   libavcodec/atrac3plus.c|  4 ++--
   libavcodec/dnxhdenc.c  |  2 +-
   libavcodec/dvenc.c |  2 +-
   libavcodec/ffv1.h  |  2 +-
   libavcodec/ffv1dec.c   |  3 +--
   libavcodec/g726.c  |  2 +-
   libavcodec/g729dec.c   |  2 +-
   libavcodec/golomb.h|  2 +-
   libavcodec/h264.c  |  5 ++---
   libavcodec/h264_refs.c |  2 +-
   libavcodec/hevc.c  | 26 --
   libavcodec/hevc_cabac.c|  8 
   libavcodec/hevc_filter.c   | 15 ++-
   libavcodec/hevc_mvs.c  |  4 ++--
   libavcodec/hevc_ps.c   |  4 ++--
   libavcodec/hevc_refs.c |  5 ++---
   libavcodec/hevcpred_template.c |  4 ++--
   libavcodec/mpeg12enc.c |  8 
   libavcodec/opus.h  |  2 +-
   libavcodec/opus_celt.c |  9 -
   libavcodec/proresenc_kostya.c  |  6 ++
   libavcodec/put_bits.h  |  2 +-
   libavcodec/vorbisdec.c |  5 ++---
   26 files changed, 61 insertions(+), 76 deletions(-)
 
  diff --git a/libavcodec/acelp_vectors.c b/libavcodec/acelp_vectors.c
  index 86851a3..7aef8c7 100644
  --- a/libavcodec/acelp_vectors.c
  +++ b/libavcodec/acelp_vectors.c
  @@ -133,12 +133,11 @@ void ff_acelp_fc_pulse_per_track(
   int pulse_count,
   int bits)
   {
  -int mask = (1  bits) - 1;
   int i;
   
   for(i=0; ipulse_count; i++)
   {
  -fc_v[i + tab1[pulse_indexes  mask]] +=
  +fc_v[i + tab1[av_mod_uintp2(pulse_indexes, bits)]] +=
   (pulse_signs  1) ? 8191 : -8192; // +/-1 in (2.13)
   
   pulse_indexes = bits;
  @@ -154,13 +153,12 @@ void ff_decode_10_pulses_35bits(const int16_t 
  *fixed_index,
   int half_pulse_count, int bits)
   {
   int i;
  -int mask = (1  bits) - 1;
   
   fixed_sparse-no_repeat_mask = 0;
   fixed_sparse-n = 2 * half_pulse_count;
   for (i = 0; i  half_pulse_count; i++) {
  -const int pos1   = gray_decode[fixed_index[2*i+1]  mask] + i;
  -const int pos2   = gray_decode[fixed_index[2*i  ]  mask] + i;
  +const int pos1   = gray_decode[av_mod_uintp2(fixed_index[2*i+1], 
  bits)] + i;
  +const int pos2   = gray_decode[av_mod_uintp2(fixed_index[2*i  ], 
  bits)] + i;
   const float sign = (fixed_index[2*i+1]  (1  bits)) ? -1.0 : 
  1.0;
   fixed_sparse-x[2*i+1] = pos1;
   fixed_sparse-x[2*i  ] = pos2;
  [...]
  diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h
  index 5081397..bfc4d71 100644
  --- a/libavcodec/ffv1.h
  +++ b/libavcodec/ffv1.h
  @@ -143,7 +143,7 @@ static av_always_inline int fold(int diff, int bits)
   diff = (int8_t)diff;
   else {
   diff +=  1  (bits  - 1);
  -diff = (1   bits) - 1;
  +diff  = av_mod_uintp2(diff, bits);
   diff -=  1  (bits  - 1);
   }
   
  
  iam not sure some of these changes are are a good idea
  for example above the mask has to be calculated anyway what can be
  gained from av_mod_uintp2() use here ?
  i think this should be bechmarked when its in performance critical code
 
 1  (bits  - 1) is not the same as (1   bits) - 1. The latter is not 
 going to be
 calculated if the arch optimized version of av_mod_uintp2 is used.

yep, i should not review patches when i am in a (IRC) meeting

before the patch fold looks like this: (for the non 8bit case)
addl100(%rsp), %ecx
andl104(%rsp), %ecx
subl100(%rsp), %ecx
.L869:
afterwards it looks like this:

addl104(%rsp), %ecx
andl64(%rsp), %ecx
subl104(%rsp), %ecx
.L869:

so it seems ok


 
 The one file where i don't know if there's going to be any gain is in one of 
 the opus_celt.c
 changes, where the mask needs to be calculated even if arch optimized 
 av_mod_uintp2 is used.
 That one can be removed, but every other change is pretty straightforward.

ok, then iam fine with the patch

Thanks

[...]

-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Asymptotically faster algorithms should always be preferred if you have
asymptotical amounts of data


signature.asc
Description: Digital signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] avcodec: use av_mod_intp2() where possible

2015-04-21 Thread Michael Niedermayer
On Tue, Apr 21, 2015 at 05:45:25PM -0300, James Almer wrote:
 Signed-off-by: James Almer jamr...@gmail.com
 ---
  libavcodec/acelp_vectors.c |  8 +++-
  libavcodec/adpcm.c |  2 +-
  libavcodec/alacenc.c   |  3 +--
  libavcodec/atrac3plus.c|  4 ++--
  libavcodec/dnxhdenc.c  |  2 +-
  libavcodec/dvenc.c |  2 +-
  libavcodec/ffv1.h  |  2 +-
  libavcodec/ffv1dec.c   |  3 +--
  libavcodec/g726.c  |  2 +-
  libavcodec/g729dec.c   |  2 +-
  libavcodec/golomb.h|  2 +-
  libavcodec/h264.c  |  5 ++---
  libavcodec/h264_refs.c |  2 +-
  libavcodec/hevc.c  | 26 --
  libavcodec/hevc_cabac.c|  8 
  libavcodec/hevc_filter.c   | 15 ++-
  libavcodec/hevc_mvs.c  |  4 ++--
  libavcodec/hevc_ps.c   |  4 ++--
  libavcodec/hevc_refs.c |  5 ++---
  libavcodec/hevcpred_template.c |  4 ++--
  libavcodec/mpeg12enc.c |  8 
  libavcodec/opus.h  |  2 +-
  libavcodec/opus_celt.c |  9 -
  libavcodec/proresenc_kostya.c  |  6 ++
  libavcodec/put_bits.h  |  2 +-
  libavcodec/vorbisdec.c |  5 ++---
  26 files changed, 61 insertions(+), 76 deletions(-)
 
 diff --git a/libavcodec/acelp_vectors.c b/libavcodec/acelp_vectors.c
 index 86851a3..7aef8c7 100644
 --- a/libavcodec/acelp_vectors.c
 +++ b/libavcodec/acelp_vectors.c
 @@ -133,12 +133,11 @@ void ff_acelp_fc_pulse_per_track(
  int pulse_count,
  int bits)
  {
 -int mask = (1  bits) - 1;
  int i;
  
  for(i=0; ipulse_count; i++)
  {
 -fc_v[i + tab1[pulse_indexes  mask]] +=
 +fc_v[i + tab1[av_mod_uintp2(pulse_indexes, bits)]] +=
  (pulse_signs  1) ? 8191 : -8192; // +/-1 in (2.13)
  
  pulse_indexes = bits;
 @@ -154,13 +153,12 @@ void ff_decode_10_pulses_35bits(const int16_t 
 *fixed_index,
  int half_pulse_count, int bits)
  {
  int i;
 -int mask = (1  bits) - 1;
  
  fixed_sparse-no_repeat_mask = 0;
  fixed_sparse-n = 2 * half_pulse_count;
  for (i = 0; i  half_pulse_count; i++) {
 -const int pos1   = gray_decode[fixed_index[2*i+1]  mask] + i;
 -const int pos2   = gray_decode[fixed_index[2*i  ]  mask] + i;
 +const int pos1   = gray_decode[av_mod_uintp2(fixed_index[2*i+1], 
 bits)] + i;
 +const int pos2   = gray_decode[av_mod_uintp2(fixed_index[2*i  ], 
 bits)] + i;
  const float sign = (fixed_index[2*i+1]  (1  bits)) ? -1.0 : 1.0;
  fixed_sparse-x[2*i+1] = pos1;
  fixed_sparse-x[2*i  ] = pos2;
[...]
 diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h
 index 5081397..bfc4d71 100644
 --- a/libavcodec/ffv1.h
 +++ b/libavcodec/ffv1.h
 @@ -143,7 +143,7 @@ static av_always_inline int fold(int diff, int bits)
  diff = (int8_t)diff;
  else {
  diff +=  1  (bits  - 1);
 -diff = (1   bits) - 1;
 +diff  = av_mod_uintp2(diff, bits);
  diff -=  1  (bits  - 1);
  }
  

iam not sure some of these changes are are a good idea
for example above the mask has to be calculated anyway what can be
gained from av_mod_uintp2() use here ?
i think this should be bechmarked when its in performance critical code

[...]
-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Awnsering whenever a program halts or runs forever is
On a turing machine, in general impossible (turings halting problem).
On any real computer, always possible as a real computer has a finite number
of states N, and will either halt in less than N cycles or never halt.


signature.asc
Description: Digital signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] avcodec: use av_mod_intp2() where possible

2015-04-21 Thread James Almer
On 21/04/15 6:38 PM, Michael Niedermayer wrote:
 On Tue, Apr 21, 2015 at 05:45:25PM -0300, James Almer wrote:
 Signed-off-by: James Almer jamr...@gmail.com
 ---
  libavcodec/acelp_vectors.c |  8 +++-
  libavcodec/adpcm.c |  2 +-
  libavcodec/alacenc.c   |  3 +--
  libavcodec/atrac3plus.c|  4 ++--
  libavcodec/dnxhdenc.c  |  2 +-
  libavcodec/dvenc.c |  2 +-
  libavcodec/ffv1.h  |  2 +-
  libavcodec/ffv1dec.c   |  3 +--
  libavcodec/g726.c  |  2 +-
  libavcodec/g729dec.c   |  2 +-
  libavcodec/golomb.h|  2 +-
  libavcodec/h264.c  |  5 ++---
  libavcodec/h264_refs.c |  2 +-
  libavcodec/hevc.c  | 26 --
  libavcodec/hevc_cabac.c|  8 
  libavcodec/hevc_filter.c   | 15 ++-
  libavcodec/hevc_mvs.c  |  4 ++--
  libavcodec/hevc_ps.c   |  4 ++--
  libavcodec/hevc_refs.c |  5 ++---
  libavcodec/hevcpred_template.c |  4 ++--
  libavcodec/mpeg12enc.c |  8 
  libavcodec/opus.h  |  2 +-
  libavcodec/opus_celt.c |  9 -
  libavcodec/proresenc_kostya.c  |  6 ++
  libavcodec/put_bits.h  |  2 +-
  libavcodec/vorbisdec.c |  5 ++---
  26 files changed, 61 insertions(+), 76 deletions(-)

 diff --git a/libavcodec/acelp_vectors.c b/libavcodec/acelp_vectors.c
 index 86851a3..7aef8c7 100644
 --- a/libavcodec/acelp_vectors.c
 +++ b/libavcodec/acelp_vectors.c
 @@ -133,12 +133,11 @@ void ff_acelp_fc_pulse_per_track(
  int pulse_count,
  int bits)
  {
 -int mask = (1  bits) - 1;
  int i;
  
  for(i=0; ipulse_count; i++)
  {
 -fc_v[i + tab1[pulse_indexes  mask]] +=
 +fc_v[i + tab1[av_mod_uintp2(pulse_indexes, bits)]] +=
  (pulse_signs  1) ? 8191 : -8192; // +/-1 in (2.13)
  
  pulse_indexes = bits;
 @@ -154,13 +153,12 @@ void ff_decode_10_pulses_35bits(const int16_t 
 *fixed_index,
  int half_pulse_count, int bits)
  {
  int i;
 -int mask = (1  bits) - 1;
  
  fixed_sparse-no_repeat_mask = 0;
  fixed_sparse-n = 2 * half_pulse_count;
  for (i = 0; i  half_pulse_count; i++) {
 -const int pos1   = gray_decode[fixed_index[2*i+1]  mask] + i;
 -const int pos2   = gray_decode[fixed_index[2*i  ]  mask] + i;
 +const int pos1   = gray_decode[av_mod_uintp2(fixed_index[2*i+1], 
 bits)] + i;
 +const int pos2   = gray_decode[av_mod_uintp2(fixed_index[2*i  ], 
 bits)] + i;
  const float sign = (fixed_index[2*i+1]  (1  bits)) ? -1.0 : 1.0;
  fixed_sparse-x[2*i+1] = pos1;
  fixed_sparse-x[2*i  ] = pos2;
 [...]
 diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h
 index 5081397..bfc4d71 100644
 --- a/libavcodec/ffv1.h
 +++ b/libavcodec/ffv1.h
 @@ -143,7 +143,7 @@ static av_always_inline int fold(int diff, int bits)
  diff = (int8_t)diff;
  else {
  diff +=  1  (bits  - 1);
 -diff = (1   bits) - 1;
 +diff  = av_mod_uintp2(diff, bits);
  diff -=  1  (bits  - 1);
  }
  
 
 iam not sure some of these changes are are a good idea
 for example above the mask has to be calculated anyway what can be
 gained from av_mod_uintp2() use here ?
 i think this should be bechmarked when its in performance critical code

1  (bits  - 1) is not the same as (1   bits) - 1. The latter is not 
going to be
calculated if the arch optimized version of av_mod_uintp2 is used.

The one file where i don't know if there's going to be any gain is in one of 
the opus_celt.c
changes, where the mask needs to be calculated even if arch optimized 
av_mod_uintp2 is used.
That one can be removed, but every other change is pretty straightforward.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] avcodec: use av_mod_intp2() where possible

2015-04-21 Thread James Almer
On 21/04/15 7:49 PM, James Almer wrote:
 On 21/04/15 7:12 PM, Michael Niedermayer wrote:
 On Tue, Apr 21, 2015 at 06:51:52PM -0300, James Almer wrote:
 On 21/04/15 6:38 PM, Michael Niedermayer wrote:
 On Tue, Apr 21, 2015 at 05:45:25PM -0300, James Almer wrote:
 Signed-off-by: James Almer jamr...@gmail.com
 diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h
 index 5081397..bfc4d71 100644
 --- a/libavcodec/ffv1.h
 +++ b/libavcodec/ffv1.h
 @@ -143,7 +143,7 @@ static av_always_inline int fold(int diff, int bits)
  diff = (int8_t)diff;
  else {
  diff +=  1  (bits  - 1);
 -diff = (1   bits) - 1;
 +diff  = av_mod_uintp2(diff, bits);
  diff -=  1  (bits  - 1);
  }
  

 iam not sure some of these changes are are a good idea
 for example above the mask has to be calculated anyway what can be
 gained from av_mod_uintp2() use here ?
 i think this should be bechmarked when its in performance critical code

 1  (bits  - 1) is not the same as (1   bits) - 1. The latter is not 
 going to be
 calculated if the arch optimized version of av_mod_uintp2 is used.

 yep, i should not review patches when i am in a (IRC) meeting

 before the patch fold looks like this: (for the non 8bit case)
 addl100(%rsp), %ecx
 andl104(%rsp), %ecx
 subl100(%rsp), %ecx
 .L869:
 afterwards it looks like this:

 addl104(%rsp), %ecx
 andl64(%rsp), %ecx
 subl104(%rsp), %ecx
 .L869:

 so it seems ok



 The one file where i don't know if there's going to be any gain is in one 
 of the opus_celt.c
 changes, where the mask needs to be calculated even if arch optimized 
 av_mod_uintp2 is used.
 That one can be removed, but every other change is pretty straightforward.

 ok, then iam fine with the patch

 Thanks
 
 I'm going to apply the changes that are straightforward (single line changes 
 that didn't use
 mask variables prior to this patch, or that created one but then used it 
 once). av_mod_uintp2
 should generate the same assembly with the non bmi2 optimized version.

Done and pushed, thanks.

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] avcodec: use av_mod_intp2() where possible

2015-04-21 Thread Michael Niedermayer
On Tue, Apr 21, 2015 at 11:38:32PM +0200, Michael Niedermayer wrote:
 On Tue, Apr 21, 2015 at 05:45:25PM -0300, James Almer wrote:
  Signed-off-by: James Almer jamr...@gmail.com
  ---
   libavcodec/acelp_vectors.c |  8 +++-
   libavcodec/adpcm.c |  2 +-
   libavcodec/alacenc.c   |  3 +--
   libavcodec/atrac3plus.c|  4 ++--
   libavcodec/dnxhdenc.c  |  2 +-
   libavcodec/dvenc.c |  2 +-
   libavcodec/ffv1.h  |  2 +-
   libavcodec/ffv1dec.c   |  3 +--
   libavcodec/g726.c  |  2 +-
   libavcodec/g729dec.c   |  2 +-
   libavcodec/golomb.h|  2 +-
   libavcodec/h264.c  |  5 ++---
   libavcodec/h264_refs.c |  2 +-
   libavcodec/hevc.c  | 26 --
   libavcodec/hevc_cabac.c|  8 
   libavcodec/hevc_filter.c   | 15 ++-
   libavcodec/hevc_mvs.c  |  4 ++--
   libavcodec/hevc_ps.c   |  4 ++--
   libavcodec/hevc_refs.c |  5 ++---
   libavcodec/hevcpred_template.c |  4 ++--
   libavcodec/mpeg12enc.c |  8 
   libavcodec/opus.h  |  2 +-
   libavcodec/opus_celt.c |  9 -
   libavcodec/proresenc_kostya.c  |  6 ++
   libavcodec/put_bits.h  |  2 +-
   libavcodec/vorbisdec.c |  5 ++---
   26 files changed, 61 insertions(+), 76 deletions(-)
  
  diff --git a/libavcodec/acelp_vectors.c b/libavcodec/acelp_vectors.c
  index 86851a3..7aef8c7 100644
  --- a/libavcodec/acelp_vectors.c
  +++ b/libavcodec/acelp_vectors.c
  @@ -133,12 +133,11 @@ void ff_acelp_fc_pulse_per_track(
   int pulse_count,
   int bits)
   {
  -int mask = (1  bits) - 1;
   int i;
   
   for(i=0; ipulse_count; i++)
   {
  -fc_v[i + tab1[pulse_indexes  mask]] +=
  +fc_v[i + tab1[av_mod_uintp2(pulse_indexes, bits)]] +=
   (pulse_signs  1) ? 8191 : -8192; // +/-1 in (2.13)
   
   pulse_indexes = bits;
  @@ -154,13 +153,12 @@ void ff_decode_10_pulses_35bits(const int16_t 
  *fixed_index,
   int half_pulse_count, int bits)
   {
   int i;
  -int mask = (1  bits) - 1;
   
   fixed_sparse-no_repeat_mask = 0;
   fixed_sparse-n = 2 * half_pulse_count;
   for (i = 0; i  half_pulse_count; i++) {
  -const int pos1   = gray_decode[fixed_index[2*i+1]  mask] + i;
  -const int pos2   = gray_decode[fixed_index[2*i  ]  mask] + i;
  +const int pos1   = gray_decode[av_mod_uintp2(fixed_index[2*i+1], 
  bits)] + i;
  +const int pos2   = gray_decode[av_mod_uintp2(fixed_index[2*i  ], 
  bits)] + i;
   const float sign = (fixed_index[2*i+1]  (1  bits)) ? -1.0 : 1.0;
   fixed_sparse-x[2*i+1] = pos1;
   fixed_sparse-x[2*i  ] = pos2;
 [...]
  diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h
  index 5081397..bfc4d71 100644
  --- a/libavcodec/ffv1.h
  +++ b/libavcodec/ffv1.h
  @@ -143,7 +143,7 @@ static av_always_inline int fold(int diff, int bits)
   diff = (int8_t)diff;
   else {
   diff +=  1  (bits  - 1);
  -diff = (1   bits) - 1;
  +diff  = av_mod_uintp2(diff, bits);
   diff -=  1  (bits  - 1);
   }
   
 
 iam not sure some of these changes are are a good idea
 for example above the mask has to be calculated anyway what can be
 gained from av_mod_uintp2() use here ?
 i think this should be bechmarked when its in performance critical code

but the changes look ok when it can be ensured theres no speed loss
or the code changed is not speed relevant

[...]

-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

No great genius has ever existed without some touch of madness. -- Aristotle


signature.asc
Description: Digital signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel