Marc,
On 6/6/2011 9:20 AM, Marc-André Moreau wrote:
I read more about SSE, and then about NEON which is the equivalent for
ARM
My first impression is damn, how could I not see this before? This
thing looks very well suited not only for acceleration of RemoteFX
decoding, but there's a chance that more GDI operations could be
accelerated with it than the current implementation in xfreerdp. Color
conversion also appears to be possible with it. If someone wants to
work on something like this, let me know.
I started working on adding SSE/SSE2 decoding support to the RemoteFX
library.
I think there are several questions that still need to be answered on
how to best wire this up, but please review the attached .patch file to
see what I have working so far. This .patch file is based off of your
recent changes in the awakecoding/FreeRDP branch.
As a starting place, I broke out the YCbCr to RGB conversion code out of
rfx_decode_rgb and into a separate function. I then added an SSE
'optimized' version of it. Also included is a file with the disassembly
of the rfx_decode.o file that clearly shows the difference between the 2
functions.
One note... I had to use a ./configure CFLAGS="-O2 -msse2" command to
get this code to compile (the -O2 isn't actually needed, but cleans up
the assembled code). I think we would need to find a better way of
automatically handling this. Maybe a --with-sse flag that can be passed
to ./configure with #ifdef lines around SSE code? Help around how to
set this up would be appreciated.
Then there are questions about structure. Should we break out SSE
optimizations into their own files and/or libraries, or leave them
alongside their non-SSE cousins?
Lastly, is there a good way to test if and how much better these
optimizations actually are? I started messing around with gprof, sprof,
and oprofile, but I can't seem to get debug info out of the
libfreerdp-rfx static library. gprof works, but only records info on
the xfreerdp application and not on static libraries. I can't seem to
get sprof or oprofile working either. Maybe it is just the way I was
using them, but is there a better/easier way to profile this library?
Or... maybe we could set up a unit test with known RFX data that can be
run through a number of iterations and then time it?
Any other thoughts?
-Steve
diff --git a/X11/xf_decode.c b/X11/xf_decode.c
index 092aced..acd40c8 100644
--- a/X11/xf_decode.c
+++ b/X11/xf_decode.c
@@ -87,6 +87,8 @@ xf_decode_frame(xfInfo * xfi, int x, int y, uint8 * bitmapData, uint32 bitmapDat
}
rfx_message_free(xfi->rfx_context, message);
+ XSetClipMask(xfi->display, xfi->gc, None);
+
break;
default:
diff --git a/include/freerdp/rfx.h b/include/freerdp/rfx.h
index a5a9ed8..346a26f 100644
--- a/include/freerdp/rfx.h
+++ b/include/freerdp/rfx.h
@@ -137,14 +137,20 @@ struct _RFX_CONTEXT
RFX_POOL* pool; /* memory pool */
- uint32 y_buffer[4096]; /* 4096 = 64x64 */
- uint32 cr_buffer[4096]; /* 4096 = 64x64 */
- uint32 cb_buffer[4096]; /* 4096 = 64x64 */
-
+ uint32 y_r_mem[4096+4]; /* 4096 = 64x64 (+ 4x4 = 16 for mem align) */
+ uint32 cb_g_mem[4096+4]; /* 4096 = 64x64 (+ 4x4 = 16 for mem align) */
+ uint32 cr_b_mem[4096+4]; /* 4096 = 64x64 (+ 4x4 = 16 for mem align) */
+
+ uint32* y_r_buffer;
+ uint32* cb_g_buffer;
+ uint32* cr_b_buffer;
+
uint32 idwt_buffer_8[256]; /* sub-band width 8 */
uint32 idwt_buffer_16[1024]; /* sub-band width 16 */
uint32 idwt_buffer_32[4096]; /* sub-band width 32 */
uint32* idwt_buffers[5]; /* sub-band buffer array */
+
+ void (* decode_YCbCr_to_RGB)(uint32 * y_r_buf, uint32 * cb_g_buf, uint32 * cr_b_buf);
};
typedef struct _RFX_CONTEXT RFX_CONTEXT;
diff --git a/libfreerdp-rfx/librfx.c b/libfreerdp-rfx/librfx.c
index a08cdfc..572f1e5 100644
--- a/libfreerdp-rfx/librfx.c
+++ b/libfreerdp-rfx/librfx.c
@@ -20,6 +20,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <inttypes.h>
#include <freerdp/rfx.h>
#include <freerdp/types/base.h>
#include <freerdp/utils/stream.h>
@@ -39,10 +40,26 @@ rfx_context_new(void)
context->pool = rfx_pool_new();
+ // align buffers to 16 byte boundary (needed for SSE/SSE2 instructions)
+ context->y_r_buffer = (uint32 *)(((uintptr_t)context->y_r_mem + 16) & ~ 0x0F);
+ context->cb_g_buffer = (uint32 *)(((uintptr_t)context->cb_g_mem + 16) & ~ 0x0F);
+ context->cr_b_buffer = (uint32 *)(((uintptr_t)context->cr_b_mem + 16) & ~ 0x0F);
+
context->idwt_buffers[1] = (uint32*) context->idwt_buffer_8;
context->idwt_buffers[2] = (uint32*) context->idwt_buffer_16;
context->idwt_buffers[4] = (uint32*) context->idwt_buffer_32;
+
+
+ //blah
+ context->decode_YCbCr_to_RGB = rfx_decode_YCbCr_to_RGB;
+
+ // TODO: how to best tell if sse/sse2 is available and desired?
+ if (1)
+ {
+ context->decode_YCbCr_to_RGB = rfx_decode_YCbCr_to_RGB_sse;
+ }
+
return context;
}
diff --git a/libfreerdp-rfx/rfx_decode.c b/libfreerdp-rfx/rfx_decode.c
index ff51f69..e61e126 100644
--- a/libfreerdp-rfx/rfx_decode.c
+++ b/libfreerdp-rfx/rfx_decode.c
@@ -24,11 +24,108 @@
#include "rfx_differential.h"
#include "rfx_quantization.h"
#include "rfx_dwt.h"
-
#include "rfx_decode.h"
+#include <inttypes.h>
+#include "xmmintrin.h"
+#include "emmintrin.h"
+
#define MINMAX(_v,_l,_h) ((_v) < (_l) ? (_l) : ((_v) > (_h) ? (_h) : (_v)))
+/* TODO: move these sse helpers to a seperate include file */
+
+static __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_between_ps (__m128 val, __m128 min, __m128 max)
+{
+ __m128 ret;
+ ret = _mm_max_ps(val, min);
+ return _mm_min_ps(ret, max);
+}
+
+static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtps_epi32_and_store (__m128i * loc, __m128 val)
+{
+ __m128i tmp;
+ tmp = _mm_cvtps_epi32(val);
+ _mm_stream_si128(loc, tmp);
+}
+
+void
+rfx_decode_YCbCr_to_RGB_sse(uint32 * y_r_buffer, uint32 * cb_g_buffer, uint32 * cr_b_buffer)
+{
+ __m128 y_add = _mm_set_ps1(128.0f);
+ __m128 r_cr_t = _mm_set_ps1(1.403f);
+ __m128 g_cb_t = _mm_set_ps1(-0.344f);
+ __m128 g_cr_t = _mm_set_ps1(-0.714f);
+ __m128 b_cb_t = _mm_set_ps1(1.77f);
+
+ __m128 min = _mm_set_ps1(0.0f);
+ __m128 max = _mm_set_ps1(255.0f);
+
+ __m128 y, cb, cr;
+ __m128 r, g, b, tmp;
+
+ __m128i * y_r_buf = (__m128i*) y_r_buffer;
+ __m128i * cb_g_buf = (__m128i*) cb_g_buffer;
+ __m128i * cr_b_buf = (__m128i*) cr_b_buffer;
+
+ int i;
+ for (i = 0; i < (4096 / 4); i++)
+ {
+ y = _mm_cvtepi32_ps(*y_r_buf);
+ cb = _mm_cvtepi32_ps(*cb_g_buf);
+ cr = _mm_cvtepi32_ps(*cr_b_buf);
+
+ // y = y + 128
+ y = _mm_add_ps(y, y_add);
+
+ // r = between(y + (cr * 1.403), 0, 255)
+ r = _mm_mul_ps(cr, r_cr_t);
+ r = _mm_add_ps(r, y);
+ r = _mm_between_ps(r, min, max);
+ _mm_cvtps_epi32_and_store(y_r_buf, r);
+
+ // g = between(y + (cb * -0.344) + (cr * -0.714), 0, 255)
+ g = _mm_mul_ps(cb, g_cb_t);
+ tmp = _mm_mul_ps(cr, g_cr_t);
+ g = _mm_add_ps(g, tmp);
+ g = _mm_add_ps(g, y);
+ g = _mm_between_ps(g, min, max);
+ _mm_cvtps_epi32_and_store(cb_g_buf, g);
+
+ // b = between(y + (cb * 1.77), 0, 255)
+ b = _mm_mul_ps(cb, b_cb_t);
+ b = _mm_add_ps(b, y);
+ b = _mm_between_ps(b, min, max);
+ _mm_cvtps_epi32_and_store(cr_b_buf, b);
+
+ y_r_buf++;
+ cb_g_buf++;
+ cr_b_buf++;
+ }
+}
+
+void
+rfx_decode_YCbCr_to_RGB(uint32 * y_r_buf, uint32 * cb_g_buf, uint32 * cr_b_buf)
+{
+ int y, cb, cr;
+ int r, g, b;
+
+ int i;
+ for (i = 0; i < 4096; i++)
+ {
+ y = y_r_buf[i] + 128;
+ cb = cb_g_buf[i];
+ cr = cr_b_buf[i];
+ r = (y + cr + (cr >> 2) + (cr >> 3) + (cr >> 5));
+ y_r_buf[i] = MINMAX(r, 0, 255);
+ g = (y - ((cb >> 2) + (cb >> 4) + (cb >> 5)) - ((cr >> 1) + (cr >> 3) + (cr >> 4) + (cr >> 5)));
+ cb_g_buf[i] = MINMAX(g, 0, 255);
+ b = (y + cb + (cb >> 1) + (cb >> 2) + (cb >> 6));
+ cr_b_buf[i] = MINMAX(b, 0, 255);
+ }
+}
+
static void
rfx_decode_component(RFX_CONTEXT * context, const uint32 * quantization_values, int half,
const uint8 * data, int size, uint32 * buffer)
@@ -67,23 +164,17 @@ rfx_decode_rgb(RFX_CONTEXT * context,
int y, cb, cr;
dst = rgb_buffer;
- rfx_decode_component(context, y_quants, 0, y_data, y_size, context->y_buffer);
- rfx_decode_component(context, cb_quants, 0, cb_data, cb_size, context->cb_buffer);
- rfx_decode_component(context, cr_quants, 0, cr_data, cr_size, context->cr_buffer);
+ rfx_decode_component(context, y_quants, 0, y_data, y_size, context->y_r_buffer);
+ rfx_decode_component(context, cb_quants, 0, cb_data, cb_size, context->cb_g_buffer);
+ rfx_decode_component(context, cr_quants, 0, cr_data, cr_size, context->cr_b_buffer);
+
+ context->decode_YCbCr_to_RGB(context->y_r_buffer, context->cb_g_buffer, context->cr_b_buffer);
for (i = 0; i < 4096; i++)
{
- y = context->y_buffer[i] + 128;
- cb = context->cb_buffer[i];
- cr = context->cr_buffer[i];
-
- r = (y + cr + (cr >> 2) + (cr >> 3) + (cr >> 5));
- r = MINMAX(r, 0, 255);
- g = (y - ((cb >> 2) + (cb >> 4) + (cb >> 5)) - ((cr >> 1) + (cr >> 3) + (cr >> 4) + (cr >> 5)));
- g = MINMAX(g, 0, 255);
- b = (y + cb + (cb >> 1) + (cb >> 2) + (cb >> 6));
- b = MINMAX(b, 0, 255);
-
+ r = context->y_r_buffer[i];
+ g = context->cb_g_buffer[i];
+ b = context->cr_b_buffer[i];
switch (context->pixel_format)
{
case RFX_PIXEL_FORMAT_BGRA:
@@ -112,6 +203,5 @@ rfx_decode_rgb(RFX_CONTEXT * context,
break;
}
}
-
return rgb_buffer;
}
diff --git a/libfreerdp-rfx/rfx_decode.h b/libfreerdp-rfx/rfx_decode.h
index afcc965..02585a2 100644
--- a/libfreerdp-rfx/rfx_decode.h
+++ b/libfreerdp-rfx/rfx_decode.h
@@ -22,6 +22,12 @@
#include <freerdp/rfx.h>
+void
+rfx_decode_YCbCr_to_RGB_sse(uint32 * y_r_buf, uint32 * cb_g_buf, uint32 * cr_b_buf);
+
+void
+rfx_decode_YCbCr_to_RGB(uint32 * y_r_buf, uint32 * cb_g_buf, uint32 * cr_b_buf);
+
unsigned char *
rfx_decode_rgb(RFX_CONTEXT * context,
const uint8 * y_data, int y_size, const uint32 * y_quants,
libfreerdp-rfx/libfreerdp_rfx_la-rfx_decode.o: file format elf32-i386
Disassembly of section .text:
00000000 <rfx_decode_YCbCr_to_RGB>:
0: 55 push %ebp
1: 31 c0 xor %eax,%eax
3: 89 e5 mov %esp,%ebp
5: 57 push %edi
6: 56 push %esi
7: 53 push %ebx
8: 83 ec 10 sub $0x10,%esp
b: 90 nop
c: 8d 74 26 00 lea 0x0(%esi,%eiz,1),%esi
10: 8b 55 08 mov 0x8(%ebp),%edx
13: 8b 4d 0c mov 0xc(%ebp),%ecx
16: 8b 75 10 mov 0x10(%ebp),%esi
19: 8b 1c 82 mov (%edx,%eax,4),%ebx
1c: 8b 14 81 mov (%ecx,%eax,4),%edx
1f: 8b 0c 86 mov (%esi,%eax,4),%ecx
22: 83 eb 80 sub $0xffffff80,%ebx
25: 89 cf mov %ecx,%edi
27: 89 ce mov %ecx,%esi
29: c1 ff 03 sar $0x3,%edi
2c: 89 7d e4 mov %edi,-0x1c(%ebp)
2f: 89 cf mov %ecx,%edi
31: c1 ff 02 sar $0x2,%edi
34: 8d 3c 3b lea (%ebx,%edi,1),%edi
37: 01 cf add %ecx,%edi
39: 03 7d e4 add -0x1c(%ebp),%edi
3c: c1 fe 05 sar $0x5,%esi
3f: 89 75 ec mov %esi,-0x14(%ebp)
42: 89 7d f0 mov %edi,-0x10(%ebp)
45: 8b 75 f0 mov -0x10(%ebp),%esi
48: 31 ff xor %edi,%edi
4a: 03 75 ec add -0x14(%ebp),%esi
4d: 89 75 f0 mov %esi,-0x10(%ebp)
50: 78 0d js 5f <rfx_decode_YCbCr_to_RGB+0x5f>
52: 81 fe ff 00 00 00 cmp $0xff,%esi
58: 66 bf ff 00 mov $0xff,%di
5c: 0f 4e fe cmovle %esi,%edi
5f: 8b 75 08 mov 0x8(%ebp),%esi
62: 89 3c 86 mov %edi,(%esi,%eax,4)
65: 89 de mov %ebx,%esi
67: 89 d7 mov %edx,%edi
69: 2b 75 e4 sub -0x1c(%ebp),%esi
6c: c1 ff 02 sar $0x2,%edi
6f: 89 7d e8 mov %edi,-0x18(%ebp)
72: 29 fe sub %edi,%esi
74: 89 cf mov %ecx,%edi
76: d1 ff sar %edi
78: 29 fe sub %edi,%esi
7a: 89 d7 mov %edx,%edi
7c: c1 f9 04 sar $0x4,%ecx
7f: 29 ce sub %ecx,%esi
81: 89 d1 mov %edx,%ecx
83: c1 f9 05 sar $0x5,%ecx
86: 29 ce sub %ecx,%esi
88: 31 c9 xor %ecx,%ecx
8a: 2b 75 ec sub -0x14(%ebp),%esi
8d: c1 ff 04 sar $0x4,%edi
90: 29 fe sub %edi,%esi
92: 78 0b js 9f <rfx_decode_YCbCr_to_RGB+0x9f>
94: 81 fe ff 00 00 00 cmp $0xff,%esi
9a: b1 ff mov $0xff,%cl
9c: 0f 4e ce cmovle %esi,%ecx
9f: 8b 75 0c mov 0xc(%ebp),%esi
a2: 03 5d e8 add -0x18(%ebp),%ebx
a5: 89 0c 86 mov %ecx,(%esi,%eax,4)
a8: 89 d1 mov %edx,%ecx
aa: 01 d3 add %edx,%ebx
ac: d1 f9 sar %ecx
ae: 01 cb add %ecx,%ebx
b0: 31 c9 xor %ecx,%ecx
b2: c1 fa 06 sar $0x6,%edx
b5: 01 da add %ebx,%edx
b7: 78 0b js c4 <rfx_decode_YCbCr_to_RGB+0xc4>
b9: 81 fa ff 00 00 00 cmp $0xff,%edx
bf: b1 ff mov $0xff,%cl
c1: 0f 4e ca cmovle %edx,%ecx
c4: 8b 7d 10 mov 0x10(%ebp),%edi
c7: 89 0c 87 mov %ecx,(%edi,%eax,4)
ca: 83 c0 01 add $0x1,%eax
cd: 3d 00 10 00 00 cmp $0x1000,%eax
d2: 0f 85 38 ff ff ff jne 10 <rfx_decode_YCbCr_to_RGB+0x10>
d8: 83 c4 10 add $0x10,%esp
db: 5b pop %ebx
dc: 5e pop %esi
dd: 5f pop %edi
de: 5d pop %ebp
df: c3 ret
000000e0 <rfx_decode_YCbCr_to_RGB_sse>:
e0: 55 push %ebp
e1: 0f 57 db xorps %xmm3,%xmm3
e4: 89 e5 mov %esp,%ebp
e6: 8b 45 08 mov 0x8(%ebp),%eax
e9: 8b 4d 0c mov 0xc(%ebp),%ecx
ec: 8b 55 10 mov 0x10(%ebp),%edx
ef: 0f 28 3d 00 00 00 00 movaps 0x0,%xmm7
f6: 53 push %ebx
f7: 0f 28 35 10 00 00 00 movaps 0x10,%xmm6
fe: 8d 98 00 40 00 00 lea 0x4000(%eax),%ebx
104: 0f 28 15 50 00 00 00 movaps 0x50,%xmm2
10b: 90 nop
10c: 8d 74 26 00 lea 0x0(%esi,%eiz,1),%esi
110: 0f 5b 2a cvtdq2ps (%edx),%xmm5
113: 0f 28 c5 movaps %xmm5,%xmm0
116: 0f 5b 08 cvtdq2ps (%eax),%xmm1
119: 0f 58 cf addps %xmm7,%xmm1
11c: 0f 5b 21 cvtdq2ps (%ecx),%xmm4
11f: 0f 59 c6 mulps %xmm6,%xmm0
122: 0f 59 2d 30 00 00 00 mulps 0x30,%xmm5
129: 0f 58 c1 addps %xmm1,%xmm0
12c: 0f 5f c3 maxps %xmm3,%xmm0
12f: 0f 5d c2 minps %xmm2,%xmm0
132: 66 0f 5b c0 cvtps2dq %xmm0,%xmm0
136: 66 0f e7 00 movntdq %xmm0,(%eax)
13a: 0f 28 05 20 00 00 00 movaps 0x20,%xmm0
141: 83 c0 10 add $0x10,%eax
144: 0f 59 c4 mulps %xmm4,%xmm0
147: 0f 58 c5 addps %xmm5,%xmm0
14a: 0f 58 c1 addps %xmm1,%xmm0
14d: 0f 5f c3 maxps %xmm3,%xmm0
150: 0f 5d c2 minps %xmm2,%xmm0
153: 66 0f 5b c0 cvtps2dq %xmm0,%xmm0
157: 66 0f e7 01 movntdq %xmm0,(%ecx)
15b: 0f 28 05 40 00 00 00 movaps 0x40,%xmm0
162: 83 c1 10 add $0x10,%ecx
165: 0f 59 c4 mulps %xmm4,%xmm0
168: 0f 58 c1 addps %xmm1,%xmm0
16b: 0f 5f c3 maxps %xmm3,%xmm0
16e: 0f 5d c2 minps %xmm2,%xmm0
171: 66 0f 5b c0 cvtps2dq %xmm0,%xmm0
175: 66 0f e7 02 movntdq %xmm0,(%edx)
179: 83 c2 10 add $0x10,%edx
17c: 39 d8 cmp %ebx,%eax
17e: 75 90 jne 110 <rfx_decode_YCbCr_to_RGB_sse+0x30>
180: 5b pop %ebx
181: 5d pop %ebp
182: c3 ret
183: 8d b6 00 00 00 00 lea 0x0(%esi),%esi
189: 8d bc 27 00 00 00 00 lea 0x0(%edi,%eiz,1),%edi
00000190 <T.68>:
190: 55 push %ebp
191: 89 e5 mov %esp,%ebp
193: 57 push %edi
194: 56 push %esi
195: 89 d6 mov %edx,%esi
197: 53 push %ebx
198: 89 c3 mov %eax,%ebx
19a: 83 ec 4c sub $0x4c,%esp
19d: 8b 45 08 mov 0x8(%ebp),%eax
1a0: 8b 7d 0c mov 0xc(%ebp),%edi
1a3: 89 4c 24 04 mov %ecx,0x4(%esp)
1a7: c7 44 24 10 00 10 00 movl $0x1000,0x10(%esp)
1ae: 00
1af: 89 44 24 08 mov %eax,0x8(%esp)
1b3: 8b 43 08 mov 0x8(%ebx),%eax
1b6: 89 7c 24 0c mov %edi,0xc(%esp)
1ba: 89 04 24 mov %eax,(%esp)
1bd: e8 fc ff ff ff call 1be <T.68+0x2e>
1c2: 8d 8f 00 3f 00 00 lea 0x3f00(%edi),%ecx
1c8: 89 0c 24 mov %ecx,(%esp)
1cb: 89 4d e4 mov %ecx,-0x1c(%ebp)
1ce: c7 44 24 04 40 00 00 movl $0x40,0x4(%esp)
1d5: 00
1d6: e8 fc ff ff ff call 1d7 <T.68+0x47>
1db: 8b 46 20 mov 0x20(%esi),%eax
1de: 89 3c 24 mov %edi,(%esp)
1e1: c7 44 24 04 00 04 00 movl $0x400,0x4(%esp)
1e8: 00
1e9: 89 44 24 08 mov %eax,0x8(%esp)
1ed: e8 fc ff ff ff call 1ee <T.68+0x5e>
1f2: 8b 46 1c mov 0x1c(%esi),%eax
1f5: c7 44 24 04 00 04 00 movl $0x400,0x4(%esp)
1fc: 00
1fd: 89 44 24 08 mov %eax,0x8(%esp)
201: 8d 87 00 10 00 00 lea 0x1000(%edi),%eax
207: 89 04 24 mov %eax,(%esp)
20a: e8 fc ff ff ff call 20b <T.68+0x7b>
20f: 8b 46 24 mov 0x24(%esi),%eax
212: c7 44 24 04 00 04 00 movl $0x400,0x4(%esp)
219: 00
21a: 89 44 24 08 mov %eax,0x8(%esp)
21e: 8d 87 00 20 00 00 lea 0x2000(%edi),%eax
224: 89 04 24 mov %eax,(%esp)
227: e8 fc ff ff ff call 228 <T.68+0x98>
22c: 8b 46 14 mov 0x14(%esi),%eax
22f: c7 44 24 04 00 01 00 movl $0x100,0x4(%esp)
236: 00
237: 89 44 24 08 mov %eax,0x8(%esp)
23b: 8d 87 00 30 00 00 lea 0x3000(%edi),%eax
241: 89 45 d4 mov %eax,-0x2c(%ebp)
244: 89 04 24 mov %eax,(%esp)
247: e8 fc ff ff ff call 248 <T.68+0xb8>
24c: 8b 56 10 mov 0x10(%esi),%edx
24f: c7 44 24 04 00 01 00 movl $0x100,0x4(%esp)
256: 00
257: 89 54 24 08 mov %edx,0x8(%esp)
25b: 8d 97 00 34 00 00 lea 0x3400(%edi),%edx
261: 89 14 24 mov %edx,(%esp)
264: e8 fc ff ff ff call 265 <T.68+0xd5>
269: 8b 56 18 mov 0x18(%esi),%edx
26c: c7 44 24 04 00 01 00 movl $0x100,0x4(%esp)
273: 00
274: 89 54 24 08 mov %edx,0x8(%esp)
278: 8d 97 00 38 00 00 lea 0x3800(%edi),%edx
27e: 89 14 24 mov %edx,(%esp)
281: e8 fc ff ff ff call 282 <T.68+0xf2>
286: 8b 56 08 mov 0x8(%esi),%edx
289: c7 44 24 04 40 00 00 movl $0x40,0x4(%esp)
290: 00
291: 89 54 24 08 mov %edx,0x8(%esp)
295: 8d 97 00 3c 00 00 lea 0x3c00(%edi),%edx
29b: 89 14 24 mov %edx,(%esp)
29e: 89 55 e0 mov %edx,-0x20(%ebp)
2a1: e8 fc ff ff ff call 2a2 <T.68+0x112>
2a6: 8b 46 04 mov 0x4(%esi),%eax
2a9: c7 44 24 04 40 00 00 movl $0x40,0x4(%esp)
2b0: 00
2b1: 89 44 24 08 mov %eax,0x8(%esp)
2b5: 8d 87 00 3d 00 00 lea 0x3d00(%edi),%eax
2bb: 89 04 24 mov %eax,(%esp)
2be: e8 fc ff ff ff call 2bf <T.68+0x12f>
2c3: 8b 46 0c mov 0xc(%esi),%eax
2c6: c7 44 24 04 40 00 00 movl $0x40,0x4(%esp)
2cd: 00
2ce: 89 44 24 08 mov %eax,0x8(%esp)
2d2: 8d 87 70 3c 00 00 lea 0x3c70(%edi),%eax
2d8: 89 04 24 mov %eax,(%esp)
2db: e8 fc ff ff ff call 2dc <T.68+0x14c>
2e0: 8b 4d e4 mov -0x1c(%ebp),%ecx
2e3: 8b 36 mov (%esi),%esi
2e5: c7 44 24 04 40 00 00 movl $0x40,0x4(%esp)
2ec: 00
2ed: 89 0c 24 mov %ecx,(%esp)
2f0: 89 74 24 08 mov %esi,0x8(%esp)
2f4: e8 fc ff ff ff call 2f5 <T.68+0x165>
2f9: 8b 55 e0 mov -0x20(%ebp),%edx
2fc: 89 1c 24 mov %ebx,(%esp)
2ff: c7 44 24 08 08 00 00 movl $0x8,0x8(%esp)
306: 00
307: 89 54 24 04 mov %edx,0x4(%esp)
30b: e8 fc ff ff ff call 30c <T.68+0x17c>
310: 8b 45 d4 mov -0x2c(%ebp),%eax
313: 89 1c 24 mov %ebx,(%esp)
316: c7 44 24 08 10 00 00 movl $0x10,0x8(%esp)
31d: 00
31e: 89 44 24 04 mov %eax,0x4(%esp)
322: e8 fc ff ff ff call 323 <T.68+0x193>
327: 89 7c 24 04 mov %edi,0x4(%esp)
32b: 89 1c 24 mov %ebx,(%esp)
32e: c7 44 24 08 20 00 00 movl $0x20,0x8(%esp)
335: 00
336: e8 fc ff ff ff call 337 <T.68+0x1a7>
33b: 83 c4 4c add $0x4c,%esp
33e: 5b pop %ebx
33f: 5e pop %esi
340: 5f pop %edi
341: 5d pop %ebp
342: c3 ret
343: 8d b6 00 00 00 00 lea 0x0(%esi),%esi
349: 8d bc 27 00 00 00 00 lea 0x0(%edi,%eiz,1),%edi
00000350 <rfx_decode_rgb>:
350: 55 push %ebp
351: 89 e5 mov %esp,%ebp
353: 57 push %edi
354: 56 push %esi
355: 53 push %ebx
356: 83 ec 2c sub $0x2c,%esp
359: 8b 5d 08 mov 0x8(%ebp),%ebx
35c: 8b 4d 0c mov 0xc(%ebp),%ecx
35f: 8b 55 14 mov 0x14(%ebp),%edx
362: 8b 83 58 c0 00 00 mov 0xc058(%ebx),%eax
368: 89 44 24 04 mov %eax,0x4(%esp)
36c: 8b 45 10 mov 0x10(%ebp),%eax
36f: 89 04 24 mov %eax,(%esp)
372: 89 d8 mov %ebx,%eax
374: e8 17 fe ff ff call 190 <T.68>
379: 8b 83 5c c0 00 00 mov 0xc05c(%ebx),%eax
37f: 8b 4d 18 mov 0x18(%ebp),%ecx
382: 8b 55 20 mov 0x20(%ebp),%edx
385: 89 44 24 04 mov %eax,0x4(%esp)
389: 8b 45 1c mov 0x1c(%ebp),%eax
38c: 89 04 24 mov %eax,(%esp)
38f: 89 d8 mov %ebx,%eax
391: e8 fa fd ff ff call 190 <T.68>
396: 8b 83 60 c0 00 00 mov 0xc060(%ebx),%eax
39c: 8b 55 2c mov 0x2c(%ebp),%edx
39f: 8b 4d 24 mov 0x24(%ebp),%ecx
3a2: 89 44 24 04 mov %eax,0x4(%esp)
3a6: 8b 45 28 mov 0x28(%ebp),%eax
3a9: 89 04 24 mov %eax,(%esp)
3ac: 89 d8 mov %ebx,%eax
3ae: e8 dd fd ff ff call 190 <T.68>
3b3: 8b 83 60 c0 00 00 mov 0xc060(%ebx),%eax
3b9: 89 44 24 08 mov %eax,0x8(%esp)
3bd: 8b 83 5c c0 00 00 mov 0xc05c(%ebx),%eax
3c3: 89 44 24 04 mov %eax,0x4(%esp)
3c7: 8b 83 58 c0 00 00 mov 0xc058(%ebx),%eax
3cd: 89 04 24 mov %eax,(%esp)
3d0: ff 93 78 14 01 00 call *0x11478(%ebx)
3d6: 8b 55 30 mov 0x30(%ebp),%edx
3d9: 31 c0 xor %eax,%eax
3db: eb 24 jmp 401 <rfx_decode_rgb+0xb1>
3dd: 8d 76 00 lea 0x0(%esi),%esi
3e0: 0f b6 4d e4 movzbl -0x1c(%ebp),%ecx
3e4: c6 42 03 ff movb $0xff,0x3(%edx)
3e8: 88 0a mov %cl,(%edx)
3ea: 89 f1 mov %esi,%ecx
3ec: 88 4a 01 mov %cl,0x1(%edx)
3ef: 89 f9 mov %edi,%ecx
3f1: 88 4a 02 mov %cl,0x2(%edx)
3f4: 83 c2 04 add $0x4,%edx
3f7: 83 c0 04 add $0x4,%eax
3fa: 3d 00 40 00 00 cmp $0x4000,%eax
3ff: 74 56 je 457 <rfx_decode_rgb+0x107>
401: 8b 8b 58 c0 00 00 mov 0xc058(%ebx),%ecx
407: 8b 3c 01 mov (%ecx,%eax,1),%edi
40a: 8b 8b 5c c0 00 00 mov 0xc05c(%ebx),%ecx
410: 8b 34 01 mov (%ecx,%eax,1),%esi
413: 8b 8b 60 c0 00 00 mov 0xc060(%ebx),%ecx
419: 8b 0c 01 mov (%ecx,%eax,1),%ecx
41c: 89 4d e4 mov %ecx,-0x1c(%ebp)
41f: 8b 4b 18 mov 0x18(%ebx),%ecx
422: 83 f9 01 cmp $0x1,%ecx
425: 74 59 je 480 <rfx_decode_rgb+0x130>
427: 72 b7 jb 3e0 <rfx_decode_rgb+0x90>
429: 83 f9 02 cmp $0x2,%ecx
42c: 74 3a je 468 <rfx_decode_rgb+0x118>
42e: 83 f9 03 cmp $0x3,%ecx
431: 8d b4 26 00 00 00 00 lea 0x0(%esi,%eiz,1),%esi
438: 75 bd jne 3f7 <rfx_decode_rgb+0xa7>
43a: 89 f9 mov %edi,%ecx
43c: 83 c0 04 add $0x4,%eax
43f: 88 0a mov %cl,(%edx)
441: 89 f1 mov %esi,%ecx
443: 88 4a 01 mov %cl,0x1(%edx)
446: 0f b6 4d e4 movzbl -0x1c(%ebp),%ecx
44a: 88 4a 02 mov %cl,0x2(%edx)
44d: 83 c2 03 add $0x3,%edx
450: 3d 00 40 00 00 cmp $0x4000,%eax
455: 75 aa jne 401 <rfx_decode_rgb+0xb1>
457: 8b 45 30 mov 0x30(%ebp),%eax
45a: 83 c4 2c add $0x2c,%esp
45d: 5b pop %ebx
45e: 5e pop %esi
45f: 5f pop %edi
460: 5d pop %ebp
461: c3 ret
462: 8d b6 00 00 00 00 lea 0x0(%esi),%esi
468: 0f b6 4d e4 movzbl -0x1c(%ebp),%ecx
46c: 88 0a mov %cl,(%edx)
46e: 89 f1 mov %esi,%ecx
470: 88 4a 01 mov %cl,0x1(%edx)
473: 89 f9 mov %edi,%ecx
475: 88 4a 02 mov %cl,0x2(%edx)
478: 83 c2 03 add $0x3,%edx
47b: e9 77 ff ff ff jmp 3f7 <rfx_decode_rgb+0xa7>
480: 89 f9 mov %edi,%ecx
482: 88 0a mov %cl,(%edx)
484: 89 f1 mov %esi,%ecx
486: 88 4a 01 mov %cl,0x1(%edx)
489: 0f b6 4d e4 movzbl -0x1c(%ebp),%ecx
48d: c6 42 03 ff movb $0xff,0x3(%edx)
491: 88 4a 02 mov %cl,0x2(%edx)
494: 83 c2 04 add $0x4,%edx
497: e9 5b ff ff ff jmp 3f7 <rfx_decode_rgb+0xa7>
Disassembly of section .rodata.cst16:
00000000 <.rodata.cst16>:
0: 00 00 add %al,(%eax)
2: 00 43 00 add %al,0x0(%ebx)
5: 00 00 add %al,(%eax)
7: 43 inc %ebx
8: 00 00 add %al,(%eax)
a: 00 43 00 add %al,0x0(%ebx)
d: 00 00 add %al,(%eax)
f: 43 inc %ebx
10: 81 95 b3 3f 81 95 b3 adcl $0x95813fb3,-0x6a7ec04d(%ebp)
17: 3f 81 95
1a: b3 3f mov $0x3f,%bl
1c: 81 95 b3 3f c5 20 b0 adcl $0x20c5beb0,0x20c53fb3(%ebp)
23: be c5 20
26: b0 be mov $0xbe,%al
28: c5 20 lds (%eax),%esp
2a: b0 be mov $0xbe,%al
2c: c5 20 lds (%eax),%esp
2e: b0 be mov $0xbe,%al
30: b4 c8 mov $0xc8,%ah
32: 36 ss
33: bf b4 c8 36 bf mov $0xbf36c8b4,%edi
38: b4 c8 mov $0xc8,%ah
3a: 36 ss
3b: bf b4 c8 36 bf mov $0xbf36c8b4,%edi
40: 5c pop %esp
41: 8f (bad)
42: e2 3f loop 83 <rfx_decode_YCbCr_to_RGB+0x83>
44: 5c pop %esp
45: 8f (bad)
46: e2 3f loop 87 <rfx_decode_YCbCr_to_RGB+0x87>
48: 5c pop %esp
49: 8f (bad)
4a: e2 3f loop 8b <rfx_decode_YCbCr_to_RGB+0x8b>
4c: 5c pop %esp
4d: 8f (bad)
4e: e2 3f loop 8f <rfx_decode_YCbCr_to_RGB+0x8f>
50: 00 00 add %al,(%eax)
52: 7f 43 jg 97 <rfx_decode_YCbCr_to_RGB+0x97>
54: 00 00 add %al,(%eax)
56: 7f 43 jg 9b <rfx_decode_YCbCr_to_RGB+0x9b>
58: 00 00 add %al,(%eax)
5a: 7f 43 jg 9f <rfx_decode_YCbCr_to_RGB+0x9f>
5c: 00 00 add %al,(%eax)
5e: 7f 43 jg a3 <rfx_decode_YCbCr_to_RGB+0xa3>
Disassembly of section .comment:
00000000 <.comment>:
0: 00 47 43 add %al,0x43(%edi)
3: 43 inc %ebx
4: 3a 20 cmp (%eax),%ah
6: 28 55 62 sub %dl,0x62(%ebp)
9: 75 6e jne 79 <rfx_decode_YCbCr_to_RGB+0x79>
b: 74 75 je 82 <rfx_decode_YCbCr_to_RGB+0x82>
d: 20 34 2e and %dh,(%esi,%ebp,1)
10: 34 2e xor $0x2e,%al
12: 33 2d 34 75 62 75 xor 0x75627534,%ebp
18: 6e outsb %ds:(%esi),(%dx)
19: 74 75 je 90 <rfx_decode_YCbCr_to_RGB+0x90>
1b: 35 29 20 34 2e xor $0x2e342029,%eax
20: 34 2e xor $0x2e,%al
22: 33 00 xor (%eax),%eax
------------------------------------------------------------------------------
EditLive Enterprise is the world's most technically advanced content
authoring tool. Experience the power of Track Changes, Inline Image
Editing and ensure content is compliant with Accessibility Checking.
http://p.sf.net/sfu/ephox-dev2dev
_______________________________________________
Freerdp-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/freerdp-devel