On Mon, Jul 6, 2009 at 3:45 AM, Michael Niedermayer<[email protected]> wrote:
> On Sun, Jul 05, 2009 at 12:02:27PM +0000, Jai Menon wrote:
>> On Sun, Jun 28, 2009 at 12:06 PM, Michael Niedermayer<[email protected]> 
>> wrote:
>> > On Sat, Jun 27, 2009 at 08:25:43PM +0000, Jai Menon wrote:
>> >> On Thu, Jun 25, 2009 at 9:51 PM, Michael Niedermayer<[email protected]> 
>> >> wrote:
>> >> > On Wed, Jun 24, 2009 at 05:59:19PM +0000, Jai Menon wrote:
>> >> >> On Wed, Jun 24, 2009 at 3:58 PM, Michael Niedermayer<[email protected]> 
>> >> >> wrote:
>> >> >> > On Wed, Jun 24, 2009 at 01:42:08PM +0000, Jai Menon wrote:
>> >> >> >> On Wed, Jun 24, 2009 at 1:27 PM, Michael 
>> >> >> >> Niedermayer<[email protected]> wrote:
>> >> >> >> > On Sun, Jun 21, 2009 at 04:35:20PM +0000, Jai Menon wrote:
>> >> >> > [...]
>> >> >> >> > [...]
>> >> >> >> >> @@ -806,6 +815,26 @@
>> >> >> >> >>
>> >> >> >> >>          line += s->picture.linesize[0];
>> >> >> >> >>      }
>> >> >> >> >> +    } else {
>> >> >> >> >> +        for (; y < tile->comp[0].coord[1][1] - 
>> >> >> >> >> s->image_offset_y; y++) {
>> >> >> >> >> +            uint16_t *dst;
>> >> >> >> >> +            x = tile->comp[0].coord[0][0] - s->image_offset_x;
>> >> >> >> >> +            dst = line + x * s->ncomponents * 2;
>> >> >> >> >> +            for (; x < tile->comp[0].coord[0][1] - 
>> >> >> >> >> s->image_offset_x; x++) {
>> >> >> >> >> +                for (compno = 0; compno < s->ncomponents; 
>> >> >> >> >> compno++) {
>> >> >> >> >
>> >> >> >> >> +                    *src[compno] = av_rescale(*src[compno], (1 
>> >> >> >> >> << 16) - 1,
>> >> >> >> >> +                                              (1 << 
>> >> >> >> >> s->cbps[compno]) - 1);
>> >> >> >> >
>> >> >> >> > av_rescale is too slow
>> >> >> >>
>> >> >> >> So just (*src[compno]/((1 << s->cbps[compno]) - 1)) * ((1 << 16) - 
>> >> >> >> 1) ?
>> >> >> >
>> >> >> > * is slow
>> >> >> > / s slower
>> >> >> >
>> >> >> > "src" << C
>> >> >> > it should be
>> >> >>
>> >> >> <possibly dumb question ahead>
>> >> >>
>> >> >> I understand that * and / are slower but how can I achieve the same
>> >> >> effect with a single <<?
>> >> >
>> >> > well, not the same but close enough IMHO
>> >> > src<<C
>> >> > or
>> >> > (src<<C) + (src>>(16-C))
>> >> > should be close enough, my point was mainly that av_rescale() is too 
>> >> > slow
>> >> > to be done per pixel and anything else is better
>> >>
>> >> Okay, modified patch attached.
>> >
>> > [...]
>> >> @@ -806,6 +815,22 @@
>> >>
>> >>          line += s->picture.linesize[0];
>> >>      }
>> >> +    } else {
>> >> +        for (; y < tile->comp[0].coord[1][1] - s->image_offset_y; y++) {
>> >> +            uint16_t *dst;
>> >> +            x = tile->comp[0].coord[0][0] - s->image_offset_x;
>> >> +            dst = line + x * s->ncomponents * 2;
>> >> +            for (; x < tile->comp[0].coord[0][1] - s->image_offset_x; 
>> >> x++) {
>> >> +                for (compno = 0; compno < s->ncomponents; compno++) {
>> >> +                    *src[compno] = *src[compno] << (16 - 
>> >> s->cbps[compno]);
>> >> +                    *src[compno] += 1 << 15;
>> >> +                    *src[compno] = av_clip(*src[compno], 0, (1 << 16) - 
>> >> 1);
>> >> +                    *dst++ = *src[compno]++;
>> >
>> > i dont think using *src[compno] as a temporary is a good choice
>>
>> You mean *src[compno] should be copied to dst and all operations
>> should be done on dst?
>> Current approach seemed correct because this a part of level shifting.
>> Or did i misunderstand?
>
> int val= src << ...
> val += ...
> val = av_clip(...)
> *dst++= val;
>
> its easy for the compiler to put val in a register, doing t with src is not
> because it would have to proof that src is not read after it

Ah, thanks for the explanation. Modified patch attached. I guess same
change should be made for the other case as well.

-- 
Regards,

Jai
Index: j2kdec.c
===================================================================
--- j2kdec.c	(revision 4434)
+++ j2kdec.c	(working copy)
@@ -55,6 +55,7 @@
     uint8_t sgnd[4]; ///< if a component is signed
     uint8_t properties[4];
 
+    int precision;
     int ncomponents;
     int tile_width, tile_height; ///< tile size
     int numXtiles, numYtiles;
@@ -225,6 +226,7 @@
     for (i = 0; i < s->ncomponents; i++){ // Ssiz_i XRsiz_i, YRsiz_i
         uint8_t x = bytestream_get_byte(&s->buf);
         s->cbps[i] = (x & 0x7f) + 1;
+        s->precision = FFMAX(s->cbps[i], s->precision);
         s->sgnd[i] = (x & 0x80) == 1;
         if (bytestream_get_byte(&s->buf) != 1)
             return -1;
@@ -251,8 +253,14 @@
     s->avctx->height = s->height - s->image_offset_y;
 
     switch(s->ncomponents){
-        case 1: s->avctx->pix_fmt = PIX_FMT_GRAY8; break;
-        case 3: s->avctx->pix_fmt = PIX_FMT_RGB24; break;
+        case 1: if (s->precision > 8) {
+                    s->avctx->pix_fmt    = PIX_FMT_GRAY16;
+                } else s->avctx->pix_fmt = PIX_FMT_GRAY8;
+                break;
+        case 3: if (s->precision > 8) {
+                    s->avctx->pix_fmt    = PIX_FMT_RGB48;
+                } else s->avctx->pix_fmt = PIX_FMT_RGB24;
+                break;
         case 4: s->avctx->pix_fmt = PIX_FMT_BGRA; break;
     }
 
@@ -788,6 +796,7 @@
     if (s->avctx->pix_fmt == PIX_FMT_BGRA) // RGBA -> BGRA
         FFSWAP(int *, src[0], src[2]);
 
+    if (s->precision <= 8) {
     for (; y < tile->comp[0].coord[1][1] - s->image_offset_y; y++){
         uint8_t *dst;
 
@@ -806,6 +815,23 @@
 
         line += s->picture.linesize[0];
     }
+    } else {
+        for (; y < tile->comp[0].coord[1][1] - s->image_offset_y; y++) {
+            uint16_t *dst;
+            x = tile->comp[0].coord[0][0] - s->image_offset_x;
+            dst = line + x * s->ncomponents * 2;
+            for (; x < tile->comp[0].coord[0][1] - s->image_offset_x; x++) {
+                for (compno = 0; compno < s->ncomponents; compno++) {
+                    int32_t val;
+                    val = *src[compno]++ << (16 - s->cbps[compno]);
+                    val += 1 << 15;
+                    val = av_clip(val, 0, (1 << 16) - 1);
+                    *dst++ = val;
+                }
+            }
+            line += s->picture.linesize[0];
+        }
+    }
     return 0;
 }
 
_______________________________________________
FFmpeg-soc mailing list
[email protected]
https://lists.mplayerhq.hu/mailman/listinfo/ffmpeg-soc

Reply via email to