Hi Aurelien, On 28.09.2015 14:48, Aurelien Jarno wrote: > On 2015-09-27 23:43, Andreas Cadhalpun wrote: >> A slightly larger test case for mips is compiling ffmpeg... > > It what I did to test if the failure is due to the above changes. > ffmpeg builds fine with gcc version 5.2.1-17 and -march=mips2 > -mtune=mips32 (instead of the new default -march=mips32r2), and passes > the testsuite. I'll see if I can isolate a smaller testcase so that we > can understand the problem.
Attached is a testcase for mips, but it works fine for mipsel. One more peculiarity I noticed is that adding 'c->A = 1;' to an unused code path inverts the behavior: It works with -fexpensive-optimizations, but fails with -fno-expensive-optimizations. Best regards, Andreas
#include <inttypes.h> #include <stdio.h> typedef struct Context { int64_t out; int64_t A; int64_t B; int C; } Context; void init_tables(Context *c, int *table, int brightness, int contrast, int saturation); int main() { Context c = { 0 }; int table[3] = {104597, 132201, 25675}; init_tables(&c, table, 0, 65536, 65536); fprintf(stderr, "TEST: 0x%"PRIX64"\n", c.out); if (c.out == 0x12a15) { printf("working\n"); return 0; } printf("broken\n"); return 1; }
CFLAGS += -fPIC -O2 LDFLAGS += -shared all: working broken LD_LIBRARY_PATH=. ./working || true LD_LIBRARY_PATH=. ./broken || true working.o: $(CC) $(CFLAGS) -fno-expensive-optimizations -c -o working.o test.c broken.o: $(CC) $(CFLAGS) -c -o broken.o test.c libworking.so: working.o $(CC) $(LDFLAGS) -o libworking.so working.o libbroken.so: broken.o $(CC) $(LDFLAGS) -o libbroken.so broken.o working: libworking.so $(CC) -o working main.c -L. -lworking broken: libbroken.so $(CC) -o broken main.c -L. -lbroken clean: rm -f working.o libworking.so working broken.o libbroken.so broken
#include <inttypes.h> typedef struct Context { int64_t out; int64_t A; int64_t B; int C; } Context; static uint8_t clip_uint8_c(int a) { if (a&(~0xFF)) return (-a)>>31; else return a; } static void fill_table(uint8_t* table[256 + 2*256], const int elemsize, const int64_t inc, void *y_tab) { int i; uint8_t *y_table = y_tab; y_table -= elemsize * (inc >> 9); for (i = 0; i < 256 + 2*256; i++) { int64_t cb = clip_uint8_c(i-256)*inc; table[i] = y_table + elemsize * (cb >> 16); } } static void fill_gv_table(int table[256 + 2*256], const int elemsize, const int64_t inc) { int i; int off = -(inc >> 9); for (i = 0; i < 256 + 2*256; i++) { int64_t cb = clip_uint8_c(i-256)*inc; table[i] = elemsize * (off + (cb >> 16)); } } static uint16_t roundToInt16(int64_t f) { int r = (f + (1 << 15)) >> 16; if (r < -0x7FFF) return 0x8000; else if (r > 0x7FFF) return 0x7FFF; else return r; } uint8_t yuvTable[1024]; int table_gV[256 + 2*256]; uint8_t *table_rV[256 + 2*256]; uint8_t *table_gU[256 + 2*256]; uint8_t *table_bU[256 + 2*256]; int64_t loc1; int64_t loc2; int64_t loc3; int loc4; __attribute__((cold)) void init_tables(Context *c, int *table, int brightness, int contrast, int saturation) { int i; const int bpp = c->C; const int yoffs = brightness ? 384 : 326; int64_t crv = table[0]; int64_t cbu = table[1]; int64_t cgu = -table[0]; int64_t cgv = -table[2]; int64_t cy = 1 << 16; int64_t TEST = 1 << 16; int64_t oy = 0; int64_t yb = 0; if (!brightness) { cy = (cy * 255) / 219; TEST = (TEST * 255) / 219; oy = 16 << 16; } else { crv = (crv * 224) / 255; cbu = (cbu * 224) / 255; cgu = (cgu * 224) / 255; cgv = (cgv * 224) / 255; } cy = (cy * contrast) >> 16; // c->out = TEST; TEST = (TEST * contrast) >> 16; c->out = TEST; crv = (crv * contrast * saturation) >> 32; cbu = (cbu * contrast * saturation) >> 32; cgu = (cgu * contrast * saturation) >> 32; cgv = (cgv * contrast * saturation) >> 32; oy -= 256 * brightness; c->A = 1; c->B = roundToInt16(cy * 8192) + roundToInt16(crv * 8192); loc1 = roundToInt16(cgv * 8192); loc2 = roundToInt16(cgu * 8192); loc3 = roundToInt16(oy * 8); loc4 = roundToInt16(cy << 13) + roundToInt16(oy) + roundToInt16(crv << 13) + roundToInt16(cgv << 13) + roundToInt16(cgu << 13) + roundToInt16(cbu << 13); crv = ((crv << 16) + 0x8000) / ((cy) > (1) ? (cy) : (1)); cbu = ((cbu << 16) + 0x8000) / ((cy) > (1) ? (cy) : (1)); cgu = ((cgu << 16) + 0x8000) / ((cy) > (1) ? (cy) : (1)); cgv = ((cgv << 16) + 0x8000) / ((cy) > (1) ? (cy) : (1)); switch (bpp) { case 0: yb = -(384 << 16) - oy; for (i = 0; i < 1024; i++) { yuvTable[i] = clip_uint8_c(yb >> 16); yb += cy; } fill_table(table_rV, 1, crv, yuvTable); fill_table(table_gU, 1, cgu, yuvTable); fill_table(table_bU, 1, cbu, yuvTable); fill_gv_table(table_gV, 1, cgv); break; case 1: yb = -(384 << 16) - oy; for (i = 0; i < 1024; i++) { yuvTable[i] = clip_uint8_c(yb >> 16); yb += cy; } // c->A = 1; // uncomment to change broken/working fill_table(table_rV, 1, crv, yuvTable); fill_table(table_gU, 1, cgu, yuvTable); fill_table(table_bU, 1, cbu, yuvTable + yoffs); fill_gv_table(table_gV, 1, cgv); break; } }