On Monday, 2018-07-23 19:52:01 -0400, Marek Olšák wrote: > From: Marek Olšák <marek.ol...@amd.com> > > --- > src/mesa/Makefile.sources | 2 + > src/mesa/main/formats.c | 42 + > src/mesa/main/formats.h | 3 + > src/mesa/main/texcompress_astc.cpp | 1871 ++++++++++++++++++++++++++++ > src/mesa/main/texcompress_astc.h | 47 + > src/util/half_float.c | 59 + > src/util/half_float.h | 5 + > 7 files changed, 2029 insertions(+) > create mode 100644 src/mesa/main/texcompress_astc.cpp > create mode 100644 src/mesa/main/texcompress_astc.h > > diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources > index 63f3734c322..ae8934e2830 100644 > --- a/src/mesa/Makefile.sources > +++ b/src/mesa/Makefile.sources > @@ -207,20 +207,22 @@ MAIN_FILES = \ > main/shader_query.cpp \ > main/shared.c \ > main/shared.h \ > main/state.c \ > main/state.h \ > main/stencil.c \ > main/stencil.h \ > main/syncobj.c \ > main/syncobj.h \ > main/texcompress.c \ > + main/texcompress_astc.cpp \ > + main/texcompress_astc.h \ > main/texcompress_bptc.c \ > main/texcompress_bptc.h \ > main/texcompress_bptc_tmp.h \ > main/texcompress_cpal.c \ > main/texcompress_cpal.h \ > main/texcompress_etc.c \ > main/texcompress_etc.h \ > main/texcompress_etc_tmp.h \ > main/texcompress_fxt1.c \ > main/texcompress_fxt1.h \
And to avoid breaking meson :) ----8<---- diff --git a/src/mesa/meson.build b/src/mesa/meson.build index 8fb7db215c8f39212c07..29633691ee776089bc35 100644 --- a/src/mesa/meson.build +++ b/src/mesa/meson.build @@ -254,6 +254,8 @@ files_libmesa_common = files( 'main/syncobj.c', 'main/syncobj.h', 'main/texcompress.c', + 'main/texcompress_astc.cpp', + 'main/texcompress_astc.h', 'main/texcompress_bptc.c', 'main/texcompress_bptc.h', 'main/texcompress_cpal.c', ---->8---- > diff --git a/src/mesa/main/formats.c b/src/mesa/main/formats.c > index fdb53afd570..d4cd5d2182c 100644 > --- a/src/mesa/main/formats.c > +++ b/src/mesa/main/formats.c > @@ -620,20 +620,62 @@ _mesa_is_format_etc2(mesa_format format) > case MESA_FORMAT_ETC2_SIGNED_RG11_EAC: > case MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1: > case MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1: > return GL_TRUE; > default: > return GL_FALSE; > } > } > > > +/** > + * Return TRUE if format is an ASTC 2D compressed format. > + */ > +bool > +_mesa_is_format_astc_2d(mesa_format format) > +{ > + switch (format) { > + case MESA_FORMAT_RGBA_ASTC_4x4: > + case MESA_FORMAT_RGBA_ASTC_5x4: > + case MESA_FORMAT_RGBA_ASTC_5x5: > + case MESA_FORMAT_RGBA_ASTC_6x5: > + case MESA_FORMAT_RGBA_ASTC_6x6: > + case MESA_FORMAT_RGBA_ASTC_8x5: > + case MESA_FORMAT_RGBA_ASTC_8x6: > + case MESA_FORMAT_RGBA_ASTC_8x8: > + case MESA_FORMAT_RGBA_ASTC_10x5: > + case MESA_FORMAT_RGBA_ASTC_10x6: > + case MESA_FORMAT_RGBA_ASTC_10x8: > + case MESA_FORMAT_RGBA_ASTC_10x10: > + case MESA_FORMAT_RGBA_ASTC_12x10: > + case MESA_FORMAT_RGBA_ASTC_12x12: > + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_4x4: > + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x4: > + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5: > + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x5: > + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x6: > + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x5: > + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x6: > + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x8: > + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x5: > + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x6: > + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x8: > + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x10: > + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x10: > + case MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x12: > + return true; > + default: > + return false; > + } > +} > + > + > /** > * If the given format is a compressed format, return a corresponding > * uncompressed format. > */ > mesa_format > _mesa_get_uncompressed_format(mesa_format format) > { > switch (format) { > case MESA_FORMAT_RGB_FXT1: > return MESA_FORMAT_BGR_UNORM8; > diff --git a/src/mesa/main/formats.h b/src/mesa/main/formats.h > index 2afa886782b..335e4de9955 100644 > --- a/src/mesa/main/formats.h > +++ b/src/mesa/main/formats.h > @@ -714,20 +714,23 @@ _mesa_is_format_unsigned(mesa_format format); > > extern GLboolean > _mesa_is_format_signed(mesa_format format); > > extern GLboolean > _mesa_is_format_integer(mesa_format format); > > extern bool > _mesa_is_format_etc2(mesa_format format); > > +bool > +_mesa_is_format_astc_2d(mesa_format format); > + > GLenum > _mesa_is_format_color_format(mesa_format format); > > extern GLenum > _mesa_get_format_color_encoding(mesa_format format); > > extern GLuint > _mesa_format_image_size(mesa_format format, GLsizei width, > GLsizei height, GLsizei depth); > > diff --git a/src/mesa/main/texcompress_astc.cpp > b/src/mesa/main/texcompress_astc.cpp > new file mode 100644 > index 00000000000..996e8ea28d6 > --- /dev/null > +++ b/src/mesa/main/texcompress_astc.cpp > @@ -0,0 +1,1871 @@ > +/* > + * Copyright 2015 Philip Taylor <phi...@zaynar.co.uk> > + * Copyright 2018 Advanced Micro Devices, Inc. > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice (including the next > + * paragraph) shall be included in all copies or substantial portions of the > + * Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER > + * DEALINGS IN THE SOFTWARE. > + */ > + > +/** > + * \file texcompress_astc.c > + * > + * Decompression code for GL_KHR_texture_compression_astc_ldr, which is just > + * ASTC 2D LDR. > + * > + * The ASTC 2D LDR decoder (without the sRGB part) was copied from the OASTC > + * library written by Philip Taylor. I added sRGB support and adjusted it for > + * Mesa. - Marek > + */ > + > +#include "texcompress_astc.h" > +#include "macros.h" > +#include "util/half_float.h" > +#include <stdio.h> > + > +static bool VERBOSE_DECODE = false; > +static bool VERBOSE_WRITE = false; > + > +static inline uint8_t > +uint16_div_64k_to_half_to_unorm8(uint16_t v) > +{ > + return _mesa_half_to_unorm8(_mesa_uint16_div_64k_to_half(v)); > +} > + > +class decode_error > +{ > +public: > + enum type { > + ok, > + unsupported_hdr_void_extent, > + reserved_block_mode_1, > + reserved_block_mode_2, > + dual_plane_and_too_many_partitions, > + invalid_range_in_void_extent, > + weight_grid_exceeds_block_size, > + invalid_colour_endpoints_size, > + invalid_colour_endpoints_count, > + invalid_weight_bits, > + invalid_num_weights, > + }; > +}; > + > + > +struct cem_range { > + uint8_t max; > + uint8_t t, q, b; > +}; > + > +/* Based on the Color Unquantization Parameters table, > + * plus the bit-only representations, sorted by increasing size > + */ > +static cem_range cem_ranges[] = { > + { 5, 1, 0, 1 }, > + { 7, 0, 0, 3 }, > + { 9, 0, 1, 1 }, > + { 11, 1, 0, 2 }, > + { 15, 0, 0, 4 }, > + { 19, 0, 1, 2 }, > + { 23, 1, 0, 3 }, > + { 31, 0, 0, 5 }, > + { 39, 0, 1, 3 }, > + { 47, 1, 0, 4 }, > + { 63, 0, 0, 6 }, > + { 79, 0, 1, 4 }, > + { 95, 1, 0, 5 }, > + { 127, 0, 0, 7 }, > + { 159, 0, 1, 5 }, > + { 191, 1, 0, 6 }, > + { 255, 0, 0, 8 }, > +}; > + > +#define CAT_BITS_2(a, b) ( ((a) << 1) | (b) ) > +#define CAT_BITS_3(a, b, c) ( ((a) << 2) | ((b) << 1) | (c) ) > +#define CAT_BITS_4(a, b, c, d) ( ((a) << 3) | ((b) << 2) | ((c) << 1) | > (d) ) > +#define CAT_BITS_5(a, b, c, d, e) ( ((a) << 4) | ((b) << 3) | ((c) << 2) | > ((d) << 1) | (e) ) > + > +/** > + * Unpack 5n+8 bits from 'in' into 5 output values. > + * If n <= 4 then T should be uint32_t, else it must be uint64_t. > + */ > +template <typename T> > +static void unpack_trit_block(int n, T in, uint8_t *out) > +{ > + assert(n <= 6); /* else output will overflow uint8_t */ > + > + uint8_t T0 = (in >> (n)) & 0b1; > + uint8_t T1 = (in >> (n+1)) & 0b1; > + uint8_t T2 = (in >> (2*n+2)) & 0b1; > + uint8_t T3 = (in >> (2*n+3)) & 0b1; > + uint8_t T4 = (in >> (3*n+4)) & 0b1; > + uint8_t T5 = (in >> (4*n+5)) & 0b1; > + uint8_t T6 = (in >> (4*n+6)) & 0b1; > + uint8_t T7 = (in >> (5*n+7)) & 0b1; > + uint8_t mmask = (1 << n) - 1; > + uint8_t m0 = (in >> (0)) & mmask; > + uint8_t m1 = (in >> (n+2)) & mmask; > + uint8_t m2 = (in >> (2*n+4)) & mmask; > + uint8_t m3 = (in >> (3*n+5)) & mmask; > + uint8_t m4 = (in >> (4*n+7)) & mmask; > + > + uint8_t C; > + uint8_t t4, t3, t2, t1, t0; > + if (CAT_BITS_3(T4, T3, T2) == 0b111) { > + C = CAT_BITS_5(T7, T6, T5, T1, T0); > + t4 = t3 = 2; > + } else { > + C = CAT_BITS_5(T4, T3, T2, T1, T0); > + if (CAT_BITS_2(T6, T5) == 0b11) { > + t4 = 2; > + t3 = T7; > + } else { > + t4 = T7; > + t3 = CAT_BITS_2(T6, T5); > + } > + } > + > + if ((C & 0b11) == 0b11) { > + t2 = 2; > + t1 = (C >> 4) & 0b1; > + uint8_t C3 = (C >> 3) & 0b1; > + uint8_t C2 = (C >> 2) & 0b1; > + t0 = (C3 << 1) | (C2 & ~C3); > + } else if (((C >> 2) & 0b11) == 0b11) { > + t2 = 2; > + t1 = 2; > + t0 = C & 0b11; > + } else { > + t2 = (C >> 4) & 0b1; > + t1 = (C >> 2) & 0b11; > + uint8_t C1 = (C >> 1) & 0b1; > + uint8_t C0 = (C >> 0) & 0b1; > + t0 = (C1 << 1) | (C0 & ~C1); > + } > + > + out[0] = (t0 << n) | m0; > + out[1] = (t1 << n) | m1; > + out[2] = (t2 << n) | m2; > + out[3] = (t3 << n) | m3; > + out[4] = (t4 << n) | m4; > +} > + > +/** > + * Unpack 3n+7 bits from 'in' into 3 output values > + */ > +static void unpack_quint_block(int n, uint32_t in, uint8_t *out) > +{ > + assert(n <= 5); /* else output will overflow uint8_t */ > + > + uint8_t Q0 = (in >> (n)) & 0b1; > + uint8_t Q1 = (in >> (n+1)) & 0b1; > + uint8_t Q2 = (in >> (n+2)) & 0b1; > + uint8_t Q3 = (in >> (2*n+3)) & 0b1; > + uint8_t Q4 = (in >> (2*n+4)) & 0b1; > + uint8_t Q5 = (in >> (3*n+5)) & 0b1; > + uint8_t Q6 = (in >> (3*n+6)) & 0b1; > + uint8_t mmask = (1 << n) - 1; > + uint8_t m0 = (in >> (0)) & mmask; > + uint8_t m1 = (in >> (n+3)) & mmask; > + uint8_t m2 = (in >> (2*n+5)) & mmask; > + > + uint8_t C; > + uint8_t q2, q1, q0; > + if (CAT_BITS_4(Q6, Q5, Q2, Q1) == 0b0011) { > + q2 = CAT_BITS_3(Q0, Q4 & ~Q0, Q3 & ~Q0); > + q1 = 4; > + q0 = 4; > + } else { > + if (CAT_BITS_2(Q2, Q1) == 0b11) { > + q2 = 4; > + C = CAT_BITS_5(Q4, Q3, 0b1 & ~Q6, 0b1 & ~Q5, Q0); > + } else { > + q2 = CAT_BITS_2(Q6, Q5); > + C = CAT_BITS_5(Q4, Q3, Q2, Q1, Q0); > + } > + if ((C & 0b111) == 0b101) { > + q1 = 4; > + q0 = (C >> 3) & 0b11; > + } else { > + q1 = (C >> 3) & 0b11; > + q0 = C & 0b111; > + } > + } > + out[0] = (q0 << n) | m0; > + out[1] = (q1 << n) | m1; > + out[2] = (q2 << n) | m2; > +} > + > + > +struct uint8x4_t > +{ > + uint8_t v[4]; > + > + uint8x4_t() { } > + > + uint8x4_t(int a, int b, int c, int d) > + { > + assert(0 <= a && a <= 255); > + assert(0 <= b && b <= 255); > + assert(0 <= c && c <= 255); > + assert(0 <= d && d <= 255); > + v[0] = a; > + v[1] = b; > + v[2] = c; > + v[3] = d; > + } > + > + static uint8x4_t clamped(int a, int b, int c, int d) > + { > + uint8x4_t r; > + r.v[0] = MAX2(0, MIN2(255, a)); > + r.v[1] = MAX2(0, MIN2(255, b)); > + r.v[2] = MAX2(0, MIN2(255, c)); > + r.v[3] = MAX2(0, MIN2(255, d)); > + return r; > + } > +}; > + > +static uint8x4_t blue_contract(int r, int g, int b, int a) > +{ > + return uint8x4_t((r+b) >> 1, (g+b) >> 1, b, a); > +} > + > +static uint8x4_t blue_contract_clamped(int r, int g, int b, int a) > +{ > + return uint8x4_t::clamped((r+b) >> 1, (g+b) >> 1, b, a); > +} > + > +static void bit_transfer_signed(int &a, int &b) > +{ > + b >>= 1; > + b |= a & 0x80; > + a >>= 1; > + a &= 0x3f; > + if (a & 0x20) > + a -= 0x40; > +} > + > +static uint32_t hash52(uint32_t p) > +{ > + p ^= p >> 15; > + p -= p << 17; > + p += p << 7; > + p += p << 4; > + p ^= p >> 5; > + p += p << 16; > + p ^= p >> 7; > + p ^= p >> 3; > + p ^= p << 6; > + p ^= p >> 17; > + return p; > +} > + > +static int select_partition(int seed, int x, int y, int z, int > partitioncount, > + int small_block) > +{ > + if (small_block) { > + x <<= 1; > + y <<= 1; > + z <<= 1; > + } > + seed += (partitioncount - 1) * 1024; > + uint32_t rnum = hash52(seed); > + uint8_t seed1 = rnum & 0xF; > + uint8_t seed2 = (rnum >> 4) & 0xF; > + uint8_t seed3 = (rnum >> 8) & 0xF; > + uint8_t seed4 = (rnum >> 12) & 0xF; > + uint8_t seed5 = (rnum >> 16) & 0xF; > + uint8_t seed6 = (rnum >> 20) & 0xF; > + uint8_t seed7 = (rnum >> 24) & 0xF; > + uint8_t seed8 = (rnum >> 28) & 0xF; > + uint8_t seed9 = (rnum >> 18) & 0xF; > + uint8_t seed10 = (rnum >> 22) & 0xF; > + uint8_t seed11 = (rnum >> 26) & 0xF; > + uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF; > + > + seed1 *= seed1; > + seed2 *= seed2; > + seed3 *= seed3; > + seed4 *= seed4; > + seed5 *= seed5; > + seed6 *= seed6; > + seed7 *= seed7; > + seed8 *= seed8; > + seed9 *= seed9; > + seed10 *= seed10; > + seed11 *= seed11; > + seed12 *= seed12; > + > + int sh1, sh2, sh3; > + if (seed & 1) { > + sh1 = (seed & 2 ? 4 : 5); > + sh2 = (partitioncount == 3 ? 6 : 5); > + } else { > + sh1 = (partitioncount == 3 ? 6 : 5); > + sh2 = (seed & 2 ? 4 : 5); > + } > + sh3 = (seed & 0x10) ? sh1 : sh2; > + > + seed1 >>= sh1; > + seed2 >>= sh2; > + seed3 >>= sh1; > + seed4 >>= sh2; > + seed5 >>= sh1; > + seed6 >>= sh2; > + seed7 >>= sh1; > + seed8 >>= sh2; > + seed9 >>= sh3; > + seed10 >>= sh3; > + seed11 >>= sh3; > + seed12 >>= sh3; > + > + int a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14); > + int b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10); > + int c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6); > + int d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2); > + > + a &= 0x3F; > + b &= 0x3F; > + c &= 0x3F; > + d &= 0x3F; > + > + if (partitioncount < 4) > + d = 0; > + if (partitioncount < 3) > + c = 0; > + > + if (a >= b && a >= c && a >= d) > + return 0; > + else if (b >= c && b >= d) > + return 1; > + else if (c >= d) > + return 2; > + else > + return 3; > +} > + > + > +struct InputBitVector > +{ > + uint32_t data[4]; > + > + void printf_bits(int offset, int count, const char *fmt = "", ...) > + { > + char out[129]; > + memset(out, '.', 128); > + out[128] = '\0'; > + int idx = offset; > + for (int i = 0; i < count; ++i) { > + out[127 - idx] = ((data[idx >> 5] >> (idx & 31)) & 1) ? '1' : '0'; > + ++idx; > + } > + printf("%s ", out); > + va_list ap; > + va_start(ap, fmt); > + vprintf(fmt, ap); > + va_end(ap); > + printf("\n"); > + } > + > + uint32_t get_bits(int offset, int count) > + { > + assert(count >= 0 && count < 32); > + > + uint32_t out = 0; > + if (offset < 32) > + out |= data[0] >> offset; > + > + if (0 < offset && offset <= 32) > + out |= data[1] << (32 - offset); > + if (32 < offset && offset < 64) > + out |= data[1] >> (offset - 32); > + > + if (32 < offset && offset <= 64) > + out |= data[2] << (64 - offset); > + if (64 < offset && offset < 96) > + out |= data[2] >> (offset - 64); > + > + if (64 < offset && offset <= 96) > + out |= data[3] << (96 - offset); > + if (96 < offset && offset < 128) > + out |= data[3] >> (offset - 96); > + > + out &= (1 << count) - 1; > + return out; > + } > + > + uint64_t get_bits64(int offset, int count) > + { > + assert(count >= 0 && count < 64); > + > + uint64_t out = 0; > + if (offset < 32) > + out |= data[0] >> offset; > + > + if (offset <= 32) > + out |= (uint64_t)data[1] << (32 - offset); > + if (32 < offset && offset < 64) > + out |= data[1] >> (offset - 32); > + > + if (0 < offset && offset <= 64) > + out |= (uint64_t)data[2] << (64 - offset); > + if (64 < offset && offset < 96) > + out |= data[2] >> (offset - 64); > + > + if (32 < offset && offset <= 96) > + out |= (uint64_t)data[3] << (96 - offset); > + if (96 < offset && offset < 128) > + out |= data[3] >> (offset - 96); > + > + out &= ((uint64_t)1 << count) - 1; > + return out; > + } > + > + uint32_t get_bits_rev(int offset, int count) > + { > + assert(offset >= count); > + uint32_t tmp = get_bits(offset - count, count); > + uint32_t out = 0; > + for (int i = 0; i < count; ++i) > + out |= ((tmp >> i) & 1) << (count - 1 - i); > + return out; > + } > +}; > + > +struct OutputBitVector > +{ > + uint32_t data[4]; > + int offset; > + > + OutputBitVector() > + : offset(0) > + { > + memset(data, 0, sizeof(data)); > + } > + > + void append(uint32_t value, int size) > + { > + if (VERBOSE_WRITE) > + printf("append offset=%d size=%d values=0x%x\n", offset, size, > value); > + > + assert(offset + size <= 128); > + > + assert(size <= 32); > + if (size < 32) > + assert((value >> size) == 0); > + > + while (size) { > + int c = MIN2(size, 32 - (offset & 31)); > + data[offset >> 5] |= (value << (offset & 31)); > + offset += c; > + size -= c; > + value >>= c; > + } > + } > + > + void append64(uint64_t value, int size) > + { > + if (VERBOSE_WRITE) > + printf("append offset=%d size=%d values=0x%llx\n", offset, size, > (unsigned long long)value); > + > + assert(offset + size <= 128); > + > + assert(size <= 64); > + if (size < 64) > + assert((value >> size) == 0); > + > + while (size) { > + int c = MIN2(size, 32 - (offset & 31)); > + data[offset >> 5] |= (value << (offset & 31)); > + offset += c; > + size -= c; > + value >>= c; > + } > + } > + > + void append(OutputBitVector &v, int size) > + { > + if (VERBOSE_WRITE) > + printf("append vector offset=%d size=%d\n", offset, size); > + > + assert(offset + size <= 128); > + int i = 0; > + while (size >= 32) { > + append(v.data[i++], 32); > + size -= 32; > + } > + if (size > 0) > + append(v.data[i] & ((1 << size) - 1), size); > + } > + > + void append_end(OutputBitVector &v, int size) > + { > + for (int i = 0; i < size; ++i) > + data[(127 - i) >> 5] |= ((v.data[i >> 5] >> (i & 31)) & 1) << ((127 > - i) & 31); > + } > + > + /* Insert the given number of '1' bits. (We could use 0s instead, but 1s > are > + * more likely to flush out bugs where we accidentally read undefined > bits.) > + */ > + void skip(int size) > + { > + if (VERBOSE_WRITE) > + printf("skip offset=%d size=%d\n", offset, size); > + > + assert(offset + size <= 128); > + while (size >= 32) { > + append(0xffffffff, 32); > + size -= 32; > + } > + if (size > 0) > + append(0xffffffff >> (32 - size), size); > + } > +}; > + > + > +class Decoder > +{ > +public: > + Decoder(int block_w, int block_h, int block_d, bool srgb, bool > output_unorm8) > + : block_w(block_w), block_h(block_h), block_d(block_d), srgb(srgb), > + output_unorm8(output_unorm8) {} > + > + decode_error::type decode(const uint8_t *in, uint16_t *output) const; > + > + int block_w, block_h, block_d; > + bool srgb, output_unorm8; > +}; > + > +struct Block > +{ > + bool is_error; > + bool bogus_colour_endpoints; > + bool bogus_weights; > + > + int high_prec; > + int dual_plane; > + int colour_component_selector; > + int wt_range; > + int wt_w, wt_h, wt_d; > + int num_parts; > + int partition_index; > + > + bool is_void_extent; > + int void_extent_d; > + int void_extent_min_s; > + int void_extent_max_s; > + int void_extent_min_t; > + int void_extent_max_t; > + uint16_t void_extent_colour_r; > + uint16_t void_extent_colour_g; > + uint16_t void_extent_colour_b; > + uint16_t void_extent_colour_a; > + > + bool is_multi_cem; > + int num_extra_cem_bits; > + int colour_endpoint_data_offset; > + int extra_cem_bits; > + int cem_base_class; > + int cems[4]; > + > + int num_cem_values; > + > + /* Calculated by unpack_weights(): */ > + uint8_t weights_quant[64 + 4]; /* max 64 values, plus padding for > overflows in trit parsing */ > + > + /* Calculated by unquantise_weights(): */ > + uint8_t weights[64 + 18]; /* max 64 values, plus padding for the infill > interpolation */ > + > + /* Calculated by unpack_colour_endpoints(): */ > + uint8_t colour_endpoints_quant[18 + 4]; /* max 18 values, plus padding > for overflows in trit parsing */ > + > + /* Calculated by unquantise_colour_endpoints(): */ > + uint8_t colour_endpoints[18]; > + > + /* Calculated by calculate_from_weights(): */ > + int wt_trits; > + int wt_quints; > + int wt_bits; > + int wt_max; > + int num_weights; > + int weight_bits; > + > + /* Calculated by calculate_remaining_bits(): */ > + int remaining_bits; > + > + /* Calculated by calculate_colour_endpoints_size(): */ > + int colour_endpoint_bits; > + int ce_max; > + int ce_trits; > + int ce_quints; > + int ce_bits; > + > + /* Calculated by compute_infill_weights(); */ > + uint8_t infill_weights[2][216]; /* large enough for 6x6x6 */ > + > + /* Calculated by decode_colour_endpoints(); */ > + uint8x4_t endpoints_decoded[2][4]; > + > + void calculate_from_weights(); > + void calculate_remaining_bits(); > + decode_error::type calculate_colour_endpoints_size(); > + > + void unquantise_weights(); > + void unquantise_colour_endpoints(); > + > + decode_error::type decode(const Decoder &decoder, InputBitVector in); > + > + decode_error::type decode_block_mode(InputBitVector in); > + decode_error::type decode_void_extent(InputBitVector in); > + void decode_cem(InputBitVector in); > + void unpack_colour_endpoints(InputBitVector in); > + void decode_colour_endpoints(); > + void unpack_weights(InputBitVector in); > + void compute_infill_weights(int block_w, int block_h, int block_d); > + > + void write_decoded(const Decoder &decoder, uint16_t *output); > +}; > + > + > +decode_error::type Decoder::decode(const uint8_t *in, uint16_t *output) const > +{ > + Block blk; > + InputBitVector in_vec; > + memcpy(&in_vec.data, in, 16); > + decode_error::type err = blk.decode(*this, in_vec); > + if (err == decode_error::ok) { > + blk.write_decoded(*this, output); > + } else { > + /* Fill output with the error colour */ > + for (int i = 0; i < block_w * block_h * block_d; ++i) { > + if (output_unorm8) { > + output[i*4+0] = 0xff; > + output[i*4+1] = 0; > + output[i*4+2] = 0xff; > + output[i*4+3] = 0xff; > + } else { > + assert(!srgb); /* srgb must use unorm8 */ > + > + output[i*4+0] = FP16_ONE; > + output[i*4+1] = FP16_ZERO; > + output[i*4+2] = FP16_ONE; > + output[i*4+3] = FP16_ONE; > + } > + } > + } > + return err; > +} > + > + > +decode_error::type Block::decode_void_extent(InputBitVector block) > +{ > + /* TODO: 3D */ > + > + is_void_extent = true; > + void_extent_d = block.get_bits(9, 1); > + void_extent_min_s = block.get_bits(12, 13); > + void_extent_max_s = block.get_bits(25, 13); > + void_extent_min_t = block.get_bits(38, 13); > + void_extent_max_t = block.get_bits(51, 13); > + void_extent_colour_r = block.get_bits(64, 16); > + void_extent_colour_g = block.get_bits(80, 16); > + void_extent_colour_b = block.get_bits(96, 16); > + void_extent_colour_a = block.get_bits(112, 16); > + > + /* TODO: maybe we should do something useful with the extent coordinates? > */ > + > + if (void_extent_d) { > + return decode_error::unsupported_hdr_void_extent; > + } > + > + if (void_extent_min_s == 0x1fff && void_extent_max_s == 0x1fff > + && void_extent_min_t == 0x1fff && void_extent_max_t == 0x1fff) { > + > + /* No extents */ > + > + } else { > + > + /* Check for illegal encoding */ > + if (void_extent_min_s >= void_extent_max_s || void_extent_min_t >= > void_extent_max_t) { > + return decode_error::invalid_range_in_void_extent; > + } > + } > + > + return decode_error::ok; > +} > + > +decode_error::type Block::decode_block_mode(InputBitVector in) > +{ > + dual_plane = in.get_bits(10, 1); > + high_prec = in.get_bits(9, 1); > + > + if (in.get_bits(0, 2) != 0x0) { > + wt_range = (in.get_bits(0, 2) << 1) | in.get_bits(4, 1); > + int a = in.get_bits(5, 2); > + int b = in.get_bits(7, 2); > + switch (in.get_bits(2, 2)) { > + case 0x0: > + if (VERBOSE_DECODE) > + in.printf_bits(0, 11, "DHBBAAR00RR"); > + wt_w = b + 4; > + wt_h = a + 2; > + break; > + case 0x1: > + if (VERBOSE_DECODE) > + in.printf_bits(0, 11, "DHBBAAR01RR"); > + wt_w = b + 8; > + wt_h = a + 2; > + break; > + case 0x2: > + if (VERBOSE_DECODE) > + in.printf_bits(0, 11, "DHBBAAR10RR"); > + wt_w = a + 2; > + wt_h = b + 8; > + break; > + case 0x3: > + if ((b & 0x2) == 0) { > + if (VERBOSE_DECODE) > + in.printf_bits(0, 11, "DH0BAAR11RR"); > + wt_w = a + 2; > + wt_h = b + 6; > + } else { > + if (VERBOSE_DECODE) > + in.printf_bits(0, 11, "DH1BAAR11RR"); > + wt_w = (b & 0x1) + 2; > + wt_h = a + 2; > + } > + break; > + } > + } else { > + if (in.get_bits(6, 3) == 0x7) { > + if (in.get_bits(0, 9) == 0x1fc) { > + if (VERBOSE_DECODE) > + in.printf_bits(0, 11, "xx111111100 (void extent)"); > + return decode_void_extent(in); > + } else { > + if (VERBOSE_DECODE) > + in.printf_bits(0, 11, "xx111xxxx00"); > + return decode_error::reserved_block_mode_1; > + } > + } > + if (in.get_bits(0, 4) == 0x0) { > + if (VERBOSE_DECODE) > + in.printf_bits(0, 11, "xxxxxxx0000"); > + return decode_error::reserved_block_mode_2; > + } > + > + wt_range = in.get_bits(1, 3) | in.get_bits(4, 1); > + int a = in.get_bits(5, 2); > + int b; > + > + switch (in.get_bits(7, 2)) { > + case 0b00: > + if (VERBOSE_DECODE) > + in.printf_bits(0, 11, "DH00AARRR00"); > + wt_w = 12; > + wt_h = a + 2; > + break; > + case 0b01: > + if (VERBOSE_DECODE) > + in.printf_bits(0, 11, "DH01AARRR00"); > + wt_w = a + 2; > + wt_h = 12; > + break; > + case 0b11: > + if (in.get_bits(5, 1) == 0) { > + if (VERBOSE_DECODE) > + in.printf_bits(0, 11, "DH1100RRR00"); > + wt_w = 6; > + wt_h = 10; > + } else { > + if (VERBOSE_DECODE) > + in.printf_bits(0, 11, "DH1101RRR00"); > + wt_w = 10; > + wt_h = 6; > + } > + break; > + case 0b10: > + if (VERBOSE_DECODE) > + in.printf_bits(0, 11, "BB10AARRR00"); > + b = in.get_bits(9, 2); > + wt_w = a + 6; > + wt_h = b + 6; > + dual_plane = 0; > + high_prec = 0; > + break; > + } > + } > + return decode_error::ok; > +} > + > +void Block::decode_cem(InputBitVector in) > +{ > + cems[0] = cems[1] = cems[2] = cems[3] = -1; > + > + num_extra_cem_bits = 0; > + extra_cem_bits = 0; > + > + if (num_parts > 1) { > + > + partition_index = in.get_bits(13, 10); > + if (VERBOSE_DECODE) > + in.printf_bits(13, 10, "partition ID (%d)", partition_index); > + > + uint32_t cem = in.get_bits(23, 6); > + > + if ((cem & 0b11) == 0b00) { > + cem >>= 2; > + cem_base_class = cem >> 2; > + is_multi_cem = false; > + > + for (int i = 0; i < num_parts; ++i) > + cems[i] = cem; > + > + if (VERBOSE_DECODE) > + in.printf_bits(23, 6, "CEM (single, %d)", cem); > + } else { > + > + cem_base_class = (cem & 0b11) - 1; > + is_multi_cem = true; > + > + if (VERBOSE_DECODE) > + in.printf_bits(23, 6, "CEM (multi, base class %d)", > cem_base_class); > + > + int offset = 128 - weight_bits; > + > + if (num_parts == 2) { > + if (VERBOSE_DECODE) { > + in.printf_bits(25, 4, "M0M0 C1 C0"); > + in.printf_bits(offset - 2, 2, "M1M1"); > + } > + > + uint32_t c0 = in.get_bits(25, 1); > + uint32_t c1 = in.get_bits(26, 1); > + > + extra_cem_bits = c0 + c1; > + > + num_extra_cem_bits = 2; > + > + uint32_t m0 = in.get_bits(27, 2); > + uint32_t m1 = in.get_bits(offset - 2, 2); > + > + cems[0] = ((cem_base_class + c0) << 2) | m0; > + cems[1] = ((cem_base_class + c1) << 2) | m1; > + > + } else if (num_parts == 3) { > + if (VERBOSE_DECODE) { > + in.printf_bits(25, 4, "M0 C2 C1 C0"); > + in.printf_bits(offset - 5, 5, "M2M2 M1M1 M0"); > + } > + > + uint32_t c0 = in.get_bits(25, 1); > + uint32_t c1 = in.get_bits(26, 1); > + uint32_t c2 = in.get_bits(27, 1); > + > + extra_cem_bits = c0 + c1 + c2; > + > + num_extra_cem_bits = 5; > + > + uint32_t m0 = in.get_bits(28, 1) | (in.get_bits(128 - > weight_bits - 5, 1) << 1); > + uint32_t m1 = in.get_bits(offset - 4, 2); > + uint32_t m2 = in.get_bits(offset - 2, 2); > + > + cems[0] = ((cem_base_class + c0) << 2) | m0; > + cems[1] = ((cem_base_class + c1) << 2) | m1; > + cems[2] = ((cem_base_class + c2) << 2) | m2; > + > + } else if (num_parts == 4) { > + if (VERBOSE_DECODE) { > + in.printf_bits(25, 4, "C3 C2 C1 C0"); > + in.printf_bits(offset - 8, 8, "M3M3 M2M2 M1M1 M0M0"); > + } > + > + uint32_t c0 = in.get_bits(25, 1); > + uint32_t c1 = in.get_bits(26, 1); > + uint32_t c2 = in.get_bits(27, 1); > + uint32_t c3 = in.get_bits(28, 1); > + > + extra_cem_bits = c0 + c1 + c2 + c3; > + > + num_extra_cem_bits = 8; > + > + uint32_t m0 = in.get_bits(offset - 8, 2); > + uint32_t m1 = in.get_bits(offset - 6, 2); > + uint32_t m2 = in.get_bits(offset - 4, 2); > + uint32_t m3 = in.get_bits(offset - 2, 2); > + > + cems[0] = ((cem_base_class + c0) << 2) | m0; > + cems[1] = ((cem_base_class + c1) << 2) | m1; > + cems[2] = ((cem_base_class + c2) << 2) | m2; > + cems[3] = ((cem_base_class + c3) << 2) | m3; > + } else { > + unreachable(""); > + } > + } > + > + colour_endpoint_data_offset = 29; > + > + } else { > + uint32_t cem = in.get_bits(13, 4); > + > + cem_base_class = cem >> 2; > + is_multi_cem = false; > + > + cems[0] = cem; > + > + partition_index = -1; > + > + if (VERBOSE_DECODE) > + in.printf_bits(13, 4, "CEM = %d (class %d)", cem, cem_base_class); > + > + colour_endpoint_data_offset = 17; > + } > +} > + > +void Block::unpack_colour_endpoints(InputBitVector in) > +{ > + if (ce_trits) { > + int offset = colour_endpoint_data_offset; > + int bits_left = colour_endpoint_bits; > + for (int i = 0; i < num_cem_values; i += 5) { > + int bits_to_read = MIN2(bits_left, 8 + ce_bits * 5); > + /* If ce_trits then ce_bits <= 6, so bits_to_read <= 38 and we have > to use uint64_t */ > + uint64_t raw = in.get_bits64(offset, bits_to_read); > + unpack_trit_block(ce_bits, raw, &colour_endpoints_quant[i]); > + > + if (VERBOSE_DECODE) > + in.printf_bits(offset, bits_to_read, > + "trits [%d,%d,%d,%d,%d]", > + colour_endpoints_quant[i+0], > colour_endpoints_quant[i+1], > + colour_endpoints_quant[i+2], colour_endpoints_quant[i+3], > + colour_endpoints_quant[i+4]); > + > + offset += 8 + ce_bits * 5; > + bits_left -= 8 + ce_bits * 5; > + } > + } else if (ce_quints) { > + int offset = colour_endpoint_data_offset; > + int bits_left = colour_endpoint_bits; > + for (int i = 0; i < num_cem_values; i += 3) { > + int bits_to_read = MIN2(bits_left, 7 + ce_bits * 3); > + /* If ce_quints then ce_bits <= 5, so bits_to_read <= 22 and we can > use uint32_t */ > + uint32_t raw = in.get_bits(offset, bits_to_read); > + unpack_quint_block(ce_bits, raw, &colour_endpoints_quant[i]); > + > + if (VERBOSE_DECODE) > + in.printf_bits(offset, bits_to_read, > + "quints [%d,%d,%d]", > + colour_endpoints_quant[i], > colour_endpoints_quant[i+1], colour_endpoints_quant[i+2]); > + > + offset += 7 + ce_bits * 3; > + bits_left -= 7 + ce_bits * 3; > + } > + } else { > + assert((colour_endpoint_bits % ce_bits) == 0); > + int offset = colour_endpoint_data_offset; > + for (int i = 0; i < num_cem_values; i++) { > + colour_endpoints_quant[i] = in.get_bits(offset, ce_bits); > + > + if (VERBOSE_DECODE) > + in.printf_bits(offset, ce_bits, "bits [%d]", > colour_endpoints_quant[i]); > + > + offset += ce_bits; > + } > + } > +} > + > +void Block::decode_colour_endpoints() > +{ > + int cem_values_idx = 0; > + for (int part = 0; part < num_parts; ++part) { > + uint8_t *v = &colour_endpoints[cem_values_idx]; > + int v0 = v[0]; > + int v1 = v[1]; > + int v2 = v[2]; > + int v3 = v[3]; > + int v4 = v[4]; > + int v5 = v[5]; > + int v6 = v[6]; > + int v7 = v[7]; > + cem_values_idx += ((cems[part] >> 2) + 1) * 2; > + > + uint8x4_t e0, e1; > + int s0, s1, L0, L1; > + > + switch (cems[part]) > + { > + case 0: > + e0 = uint8x4_t(v0, v0, v0, 0xff); > + e1 = uint8x4_t(v1, v1, v1, 0xff); > + break; > + case 1: > + L0 = (v0 >> 2) | (v1 & 0xc0); > + L1 = L0 + (v1 & 0x3f); > + if (L1 > 0xff) > + L1 = 0xff; > + e0 = uint8x4_t(L0, L0, L0, 0xff); > + e1 = uint8x4_t(L1, L1, L1, 0xff); > + break; > + case 4: > + e0 = uint8x4_t(v0, v0, v0, v2); > + e1 = uint8x4_t(v1, v1, v1, v3); > + break; > + case 5: > + bit_transfer_signed(v1, v0); > + bit_transfer_signed(v3, v2); > + e0 = uint8x4_t(v0, v0, v0, v2); > + e1 = uint8x4_t::clamped(v0+v1, v0+v1, v0+v1, v2+v3); > + break; > + case 6: > + e0 = uint8x4_t(v0*v3 >> 8, v1*v3 >> 8, v2*v3 >> 8, 0xff); > + e1 = uint8x4_t(v0, v1, v2, 0xff); > + break; > + case 8: > + s0 = v0 + v2 + v4; > + s1 = v1 + v3 + v5; > + if (s1 >= s0) { > + e0 = uint8x4_t(v0, v2, v4, 0xff); > + e1 = uint8x4_t(v1, v3, v5, 0xff); > + } else { > + e0 = blue_contract(v1, v3, v5, 0xff); > + e1 = blue_contract(v0, v2, v4, 0xff); > + } > + break; > + case 9: > + bit_transfer_signed(v1, v0); > + bit_transfer_signed(v3, v2); > + bit_transfer_signed(v5, v4); > + if (v1 + v3 + v5 >= 0) { > + e0 = uint8x4_t(v0, v2, v4, 0xff); > + e1 = uint8x4_t::clamped(v0+v1, v2+v3, v4+v5, 0xff); > + } else { > + e0 = blue_contract_clamped(v0+v1, v2+v3, v4+v5, 0xff); > + e1 = blue_contract(v0, v2, v4, 0xff); > + } > + break; > + case 10: > + e0 = uint8x4_t(v0*v3 >> 8, v1*v3 >> 8, v2*v3 >> 8, v4); > + e1 = uint8x4_t(v0, v1, v2, v5); > + break; > + case 12: > + s0 = v0 + v2 + v4; > + s1 = v1 + v3 + v5; > + if (s1 >= s0) { > + e0 = uint8x4_t(v0, v2, v4, v6); > + e1 = uint8x4_t(v1, v3, v5, v7); > + } else { > + e0 = blue_contract(v1, v3, v5, v7); > + e1 = blue_contract(v0, v2, v4, v6); > + } > + break; > + case 13: > + bit_transfer_signed(v1, v0); > + bit_transfer_signed(v3, v2); > + bit_transfer_signed(v5, v4); > + bit_transfer_signed(v7, v6); > + if (v1 + v3 + v5 >= 0) { > + e0 = uint8x4_t(v0, v2, v4, v6); > + e1 = uint8x4_t::clamped(v0+v1, v2+v3, v4+v5, v6+v7); > + } else { > + e0 = blue_contract_clamped(v0+v1, v2+v3, v4+v5, v6+v7); > + e1 = blue_contract(v0, v2, v4, v6); > + } > + break; > + default: > + /* HDR endpoints not supported; return error colour */ > + e0 = uint8x4_t(255, 0, 255, 255); > + e1 = uint8x4_t(255, 0, 255, 255); > + break; > + } > + > + endpoints_decoded[0][part] = e0; > + endpoints_decoded[1][part] = e1; > + > + if (VERBOSE_DECODE) { > + printf("cems[%d]=%d v=[", part, cems[part]); > + for (int i = 0; i < (cems[part] >> 2) + 1; ++i) { > + if (i) > + printf(", "); > + printf("%3d", v[i]); > + } > + printf("] e0=[%3d,%4d,%4d,%4d] e1=[%3d,%4d,%4d,%4d]\n", > + e0.v[0], e0.v[1], e0.v[2], e0.v[3], > + e1.v[0], e1.v[1], e1.v[2], e1.v[3]); > + } > + } > +} > + > +void Block::unpack_weights(InputBitVector in) > +{ > + if (wt_trits) { > + int offset = 128; > + int bits_left = weight_bits; > + for (int i = 0; i < num_weights; i += 5) { > + int bits_to_read = MIN2(bits_left, 8 + 5*wt_bits); > + /* If wt_trits then wt_bits <= 3, so bits_to_read <= 23 and we can > use uint32_t */ > + uint32_t raw = in.get_bits_rev(offset, bits_to_read); > + unpack_trit_block(wt_bits, raw, &weights_quant[i]); > + > + if (VERBOSE_DECODE) > + in.printf_bits(offset - bits_to_read, bits_to_read, "weight > trits [%d,%d,%d,%d,%d]", > + weights_quant[i+0], weights_quant[i+1], > + weights_quant[i+2], weights_quant[i+3], > + weights_quant[i+4]); > + > + offset -= 8 + wt_bits * 5; > + bits_left -= 8 + wt_bits * 5; > + } > + > + } else if (wt_quints) { > + > + int offset = 128; > + int bits_left = weight_bits; > + for (int i = 0; i < num_weights; i += 3) { > + int bits_to_read = MIN2(bits_left, 7 + 3*wt_bits); > + /* If wt_quints then wt_bits <= 2, so bits_to_read <= 13 and we can > use uint32_t */ > + uint32_t raw = in.get_bits_rev(offset, bits_to_read); > + unpack_quint_block(wt_bits, raw, &weights_quant[i]); > + > + if (VERBOSE_DECODE) > + in.printf_bits(offset - bits_to_read, bits_to_read, "weight > quints [%d,%d,%d]", > + weights_quant[i], weights_quant[i+1], > weights_quant[i+2]); > + > + offset -= 7 + wt_bits * 3; > + bits_left -= 7 + wt_bits * 3; > + } > + > + } else { > + int offset = 128; > + assert((weight_bits % wt_bits) == 0); > + for (int i = 0; i < num_weights; ++i) { > + weights_quant[i] = in.get_bits_rev(offset, wt_bits); > + > + if (VERBOSE_DECODE) > + in.printf_bits(offset - wt_bits, wt_bits, "weight bits [%d]", > weights_quant[i]); > + > + offset -= wt_bits; > + } > + } > +} > + > +void Block::unquantise_weights() > +{ > + assert(num_weights <= (int)ARRAY_SIZE(weights_quant)); > + assert(num_weights <= (int)ARRAY_SIZE(weights)); > + > + memset(weights, 0, sizeof(weights)); > + > + for (int i = 0; i < num_weights; ++i) { > + > + uint8_t v = weights_quant[i]; > + uint8_t w; > + > + if (wt_trits) { > + > + if (wt_bits == 0) { > + w = v * 32; > + } else { > + uint8_t A, B, C, D; > + A = (v & 0b1) ? 0b1111111 : 0b0000000; > + switch (wt_bits) { > + case 1: > + B = 0; > + C = 50; > + D = v >> 1; > + break; > + case 2: > + B = (v & 0b10) ? 0b1000101 : 0b0000000; > + C = 23; > + D = v >> 2; > + break; > + case 3: > + B = ((v & 0b110) >> 1) | ((v & 0b110) << 4); > + C = 11; > + D = v >> 3; > + break; > + default: > + unreachable(""); > + } > + uint16_t T = D * C + B; > + T = T ^ A; > + T = (A & 0x20) | (T >> 2); > + assert(T < 64); > + if (T > 32) > + T++; > + w = T; > + } > + > + } else if (wt_quints) { > + > + if (wt_bits == 0) { > + w = v * 16; > + } else { > + uint8_t A, B, C, D; > + A = (v & 0b1) ? 0b1111111 : 0b0000000; > + switch (wt_bits) { > + case 1: > + B = 0; > + C = 28; > + D = v >> 1; > + break; > + case 2: > + B = (v & 0b10) ? 0b1000010 : 0b0000000; > + C = 13; > + D = v >> 2; > + break; > + default: > + unreachable(""); > + } > + uint16_t T = D * C + B; > + T = T ^ A; > + T = (A & 0x20) | (T >> 2); > + assert(T < 64); > + if (T > 32) > + T++; > + w = T; > + } > + weights[i] = w; > + > + } else { > + > + switch (wt_bits) { > + case 1: w = v ? 0b111111 : 0b000000; break; > + case 2: w = v | (v << 2) | (v << 4); break; > + case 3: w = v | (v << 3); break; > + case 4: w = (v >> 2) | (v << 2); break; > + case 5: w = (v >> 4) | (v << 1); break; > + default: unreachable(""); > + } > + assert(w < 64); > + if (w > 32) > + w++; > + } > + weights[i] = w; > + } > +} > + > +void Block::compute_infill_weights(int block_w, int block_h, int block_d) > +{ > + int Ds = block_w <= 1 ? 0 : (1024 + block_w / 2) / (block_w - 1); > + int Dt = block_h <= 1 ? 0 : (1024 + block_h / 2) / (block_h - 1); > + int Dr = block_d <= 1 ? 0 : (1024 + block_d / 2) / (block_d - 1); > + for (int r = 0; r < block_d; ++r) { > + for (int t = 0; t < block_h; ++t) { > + for (int s = 0; s < block_w; ++s) { > + int cs = Ds * s; > + int ct = Dt * t; > + int cr = Dr * r; > + int gs = (cs * (wt_w - 1) + 32) >> 6; > + int gt = (ct * (wt_h - 1) + 32) >> 6; > + int gr = (cr * (wt_d - 1) + 32) >> 6; > + assert(gs >= 0 && gs <= 176); > + assert(gt >= 0 && gt <= 176); > + assert(gr >= 0 && gr <= 176); > + int js = gs >> 4; > + int fs = gs & 0xf; > + int jt = gt >> 4; > + int ft = gt & 0xf; > + int jr = gr >> 4; > + int fr = gr & 0xf; > + > + /* TODO: 3D */ > + (void)jr; > + (void)fr; > + > + int w11 = (fs * ft + 8) >> 4; > + int w10 = ft - w11; > + int w01 = fs - w11; > + int w00 = 16 - fs - ft + w11; > + > + if (dual_plane) { > + int p00, p01, p10, p11, i0, i1; > + int v0 = js + jt * wt_w; > + p00 = weights[(v0) * 2]; > + p01 = weights[(v0 + 1) * 2]; > + p10 = weights[(v0 + wt_w) * 2]; > + p11 = weights[(v0 + wt_w + 1) * 2]; > + i0 = (p00*w00 + p01*w01 + p10*w10 + p11*w11 + 8) >> 4; > + p00 = weights[(v0) * 2 + 1]; > + p01 = weights[(v0 + 1) * 2 + 1]; > + p10 = weights[(v0 + wt_w) * 2 + 1]; > + p11 = weights[(v0 + wt_w + 1) * 2 + 1]; > + assert((v0 + wt_w + 1) * 2 + 1 < (int)ARRAY_SIZE(weights)); > + i1 = (p00*w00 + p01*w01 + p10*w10 + p11*w11 + 8) >> 4; > + assert(0 <= i0 && i0 <= 64); > + infill_weights[0][s + t*block_w + r*block_w*block_h] = i0; > + infill_weights[1][s + t*block_w + r*block_w*block_h] = i1; > + } else { > + int p00, p01, p10, p11, i; > + int v0 = js + jt * wt_w; > + p00 = weights[v0]; > + p01 = weights[v0 + 1]; > + p10 = weights[v0 + wt_w]; > + p11 = weights[v0 + wt_w + 1]; > + assert(v0 + wt_w + 1 < (int)ARRAY_SIZE(weights)); > + i = (p00*w00 + p01*w01 + p10*w10 + p11*w11 + 8) >> 4; > + assert(0 <= i && i <= 64); > + infill_weights[0][s + t*block_w + r*block_w*block_h] = i; > + } > + } > + } > + } > +} > + > +void Block::unquantise_colour_endpoints() > +{ > + assert(num_cem_values <= (int)ARRAY_SIZE(colour_endpoints_quant)); > + assert(num_cem_values <= (int)ARRAY_SIZE(colour_endpoints)); > + > + for (int i = 0; i < num_cem_values; ++i) { > + uint8_t v = colour_endpoints_quant[i]; > + > + if (ce_trits) { > + uint16_t A, B, C, D; > + uint16_t t; > + A = (v & 0b1) ? 0b111111111 : 0b000000000; > + switch (ce_bits) { > + case 1: > + B = 0; > + C = 204; > + D = v >> 1; > + break; > + case 2: > + B = (v & 0b10) ? 0b100010110 : 0b000000000; > + C = 93; > + D = v >> 2; > + break; > + case 3: > + t = ((v >> 1) & 0b11); > + B = t | (t << 2) | (t << 7); > + C = 44; > + D = v >> 3; > + break; > + case 4: > + t = ((v >> 1) & 0b111); > + B = t | (t << 6); > + C = 22; > + D = v >> 4; > + break; > + case 5: > + t = ((v >> 1) & 0b1111); > + B = (t >> 2) | (t << 5); > + C = 11; > + D = v >> 5; > + break; > + case 6: > + B = ((v & 0b111110) << 3) | ((v >> 5) & 0b1); > + C = 5; > + D = v >> 6; > + break; > + default: > + unreachable(""); > + } > + uint16_t T = D * C + B; > + T = T ^ A; > + T = (A & 0x80) | (T >> 2); > + assert(T < 256); > + colour_endpoints[i] = T; > + } else if (ce_quints) { > + uint16_t A, B, C, D; > + uint16_t t; > + A = (v & 0b1) ? 0b111111111 : 0b000000000; > + switch (ce_bits) { > + case 1: > + B = 0; > + C = 113; > + D = v >> 1; > + break; > + case 2: > + B = (v & 0b10) ? 0b100001100 : 0b000000000; > + C = 54; > + D = v >> 2; > + break; > + case 3: > + t = ((v >> 1) & 0b11); > + B = (t >> 1) | (t << 1) | (t << 7); > + C = 26; > + D = v >> 3; > + break; > + case 4: > + t = ((v >> 1) & 0b111); > + B = (t >> 1) | (t << 6); > + C = 13; > + D = v >> 4; > + break; > + case 5: > + t = ((v >> 1) & 0b1111); > + B = (t >> 4) | (t << 5); > + C = 6; > + D = v >> 5; > + break; > + default: > + unreachable(""); > + } > + uint16_t T = D * C + B; > + T = T ^ A; > + T = (A & 0x80) | (T >> 2); > + assert(T < 256); > + colour_endpoints[i] = T; > + } else { > + switch (ce_bits) { > + case 1: v = v ? 0b11111111 : 0b00000000; break; > + case 2: v = (v << 6) | (v << 4) | (v << 2) | v; break; > + case 3: v = (v << 5) | (v << 2) | (v >> 1); break; > + case 4: v = (v << 4) | v; break; > + case 5: v = (v << 3) | (v >> 2); break; > + case 6: v = (v << 2) | (v >> 4); break; > + case 7: v = (v << 1) | (v >> 6); break; > + case 8: break; > + default: unreachable(""); > + } > + colour_endpoints[i] = v; > + } > + } > +} > + > +decode_error::type Block::decode(const Decoder &decoder, InputBitVector in) > +{ > + decode_error::type err; > + > + is_error = false; > + bogus_colour_endpoints = false; > + bogus_weights = false; > + is_void_extent = false; > + > + wt_d = 1; > + /* TODO: 3D */ > + > + /* TODO: test for all the illegal encodings */ > + > + if (VERBOSE_DECODE) > + in.printf_bits(0, 128); > + > + err = decode_block_mode(in); > + if (err != decode_error::ok) > + return err; > + > + if (is_void_extent) > + return decode_error::ok; > + > + /* TODO: 3D */ > + > + calculate_from_weights(); > + > + if (VERBOSE_DECODE) > + printf("weights_grid=%dx%dx%d dual_plane=%d num_weights=%d > high_prec=%d r=%d range=0..%d (%dt %dq %db) weight_bits=%d\n", > + wt_w, wt_h, wt_d, dual_plane, num_weights, high_prec, wt_range, > wt_max, wt_trits, wt_quints, wt_bits, weight_bits); > + > + if (wt_w > decoder.block_w || wt_h > decoder.block_h || wt_d > > decoder.block_d) > + return decode_error::weight_grid_exceeds_block_size; > + > + num_parts = in.get_bits(11, 2) + 1; > + > + if (VERBOSE_DECODE) > + in.printf_bits(11, 2, "partitions = %d", num_parts); > + > + if (dual_plane && num_parts > 3) > + return decode_error::dual_plane_and_too_many_partitions; > + > + decode_cem(in); > + > + if (VERBOSE_DECODE) > + printf("cem=[%d,%d,%d,%d] base_cem_class=%d\n", cems[0], cems[1], > cems[2], cems[3], cem_base_class); > + > + int num_cem_pairs = (cem_base_class + 1) * num_parts + extra_cem_bits; > + num_cem_values = num_cem_pairs * 2; > + > + calculate_remaining_bits(); > + err = calculate_colour_endpoints_size(); > + if (err != decode_error::ok) > + return err; > + > + if (VERBOSE_DECODE) > + in.printf_bits(colour_endpoint_data_offset, colour_endpoint_bits, > + "endpoint data (%d bits, %d vals, %dt %dq %db)", > + colour_endpoint_bits, num_cem_values, ce_trits, > ce_quints, ce_bits); > + > + unpack_colour_endpoints(in); > + > + if (VERBOSE_DECODE) { > + printf("cem values raw =["); > + for (int i = 0; i < num_cem_values; i++) { > + if (i) > + printf(", "); > + printf("%3d", colour_endpoints_quant[i]); > + } > + printf("]\n"); > + } > + > + if (num_cem_values > 18) > + return decode_error::invalid_colour_endpoints_count; > + > + unquantise_colour_endpoints(); > + > + if (VERBOSE_DECODE) { > + printf("cem values norm=["); > + for (int i = 0; i < num_cem_values; i++) { > + if (i) > + printf(", "); > + printf("%3d", colour_endpoints[i]); > + } > + printf("]\n"); > + } > + > + decode_colour_endpoints(); > + > + if (dual_plane) { > + int ccs_offset = 128 - weight_bits - num_extra_cem_bits - 2; > + colour_component_selector = in.get_bits(ccs_offset, 2); > + > + if (VERBOSE_DECODE) > + in.printf_bits(ccs_offset, 2, "colour component selector = %d", > colour_component_selector); > + } else { > + colour_component_selector = 0; > + } > + > + > + if (VERBOSE_DECODE) > + in.printf_bits(128 - weight_bits, weight_bits, "weights (%d bits)", > weight_bits); > + > + if (num_weights > 64) > + return decode_error::invalid_num_weights; > + > + if (weight_bits < 24 || weight_bits > 96) > + return decode_error::invalid_weight_bits; > + > + unpack_weights(in); > + > + unquantise_weights(); > + > + if (VERBOSE_DECODE) { > + printf("weights=["); > + for (int i = 0; i < num_weights; ++i) { > + if (i) > + printf(", "); > + printf("%d", weights[i]); > + } > + printf("]\n"); > + > + for (int plane = 0; plane <= dual_plane; ++plane) { > + printf("weights (plane %d):\n", plane); > + int i = 0; > + (void)i; > + > + for (int r = 0; r < wt_d; ++r) { > + for (int t = 0; t < wt_h; ++t) { > + for (int s = 0; s < wt_w; ++s) { > + printf("%3d", weights[i++ * (1 + dual_plane) + plane]); > + } > + printf("\n"); > + } > + if (r < wt_d - 1) > + printf("\n"); > + } > + } > + } > + > + compute_infill_weights(decoder.block_w, decoder.block_h, decoder.block_d); > + > + if (VERBOSE_DECODE) { > + for (int plane = 0; plane <= dual_plane; ++plane) { > + printf("infilled weights (plane %d):\n", plane); > + int i = 0; > + (void)i; > + > + for (int r = 0; r < decoder.block_d; ++r) { > + for (int t = 0; t < decoder.block_h; ++t) { > + for (int s = 0; s < decoder.block_w; ++s) { > + printf("%3d", infill_weights[plane][i++]); > + } > + printf("\n"); > + } > + if (r < decoder.block_d - 1) > + printf("\n"); > + } > + } > + } > + if (VERBOSE_DECODE) > + printf("\n"); > + > + return decode_error::ok; > +} > + > +void Block::write_decoded(const Decoder &decoder, uint16_t *output) > +{ > + /* sRGB can only be stored as unorm8. */ > + assert(!decoder.srgb || decoder.output_unorm8); > + > + if (is_void_extent) { > + for (int idx = 0; idx < > decoder.block_w*decoder.block_h*decoder.block_d; ++idx) { > + if (decoder.output_unorm8) { > + if (decoder.srgb) { > + output[idx*4+0] = void_extent_colour_r >> 8; > + output[idx*4+1] = void_extent_colour_g >> 8; > + output[idx*4+2] = void_extent_colour_b >> 8; > + } else { > + output[idx*4+0] = > uint16_div_64k_to_half_to_unorm8(void_extent_colour_r); > + output[idx*4+1] = > uint16_div_64k_to_half_to_unorm8(void_extent_colour_g); > + output[idx*4+2] = > uint16_div_64k_to_half_to_unorm8(void_extent_colour_b); > + } > + output[idx*4+3] = > uint16_div_64k_to_half_to_unorm8(void_extent_colour_a); > + } else { > + /* Store the color as FP16. */ > + output[idx*4+0] = > _mesa_uint16_div_64k_to_half(void_extent_colour_r); > + output[idx*4+1] = > _mesa_uint16_div_64k_to_half(void_extent_colour_g); > + output[idx*4+2] = > _mesa_uint16_div_64k_to_half(void_extent_colour_b); > + output[idx*4+3] = > _mesa_uint16_div_64k_to_half(void_extent_colour_a); > + } > + } > + return; > + } > + > + int small_block = (decoder.block_w * decoder.block_h * decoder.block_d) < > 31; > + > + int idx = 0; > + for (int z = 0; z < decoder.block_d; ++z) { > + for (int y = 0; y < decoder.block_h; ++y) { > + for (int x = 0; x < decoder.block_w; ++x) { > + > + int partition; > + if (num_parts > 1) { > + partition = select_partition(partition_index, x, y, z, > num_parts, small_block); > + assert(partition < num_parts); > + } else { > + partition = 0; > + } > + > + /* TODO: HDR */ > + > + uint8x4_t e0 = endpoints_decoded[0][partition]; > + uint8x4_t e1 = endpoints_decoded[1][partition]; > + uint16_t c0[4], c1[4]; > + > + /* Expand to 16 bits. */ > + if (decoder.srgb) { > + c0[0] = (uint16_t)((e0.v[0] << 8) | 0x80); > + c0[1] = (uint16_t)((e0.v[1] << 8) | 0x80); > + c0[2] = (uint16_t)((e0.v[2] << 8) | 0x80); > + c0[3] = (uint16_t)((e0.v[3] << 8) | 0x80); > + > + c1[0] = (uint16_t)((e1.v[0] << 8) | 0x80); > + c1[1] = (uint16_t)((e1.v[1] << 8) | 0x80); > + c1[2] = (uint16_t)((e1.v[2] << 8) | 0x80); > + c1[3] = (uint16_t)((e1.v[3] << 8) | 0x80); > + } else { > + c0[0] = (uint16_t)((e0.v[0] << 8) | e0.v[0]); > + c0[1] = (uint16_t)((e0.v[1] << 8) | e0.v[1]); > + c0[2] = (uint16_t)((e0.v[2] << 8) | e0.v[2]); > + c0[3] = (uint16_t)((e0.v[3] << 8) | e0.v[3]); > + > + c1[0] = (uint16_t)((e1.v[0] << 8) | e1.v[0]); > + c1[1] = (uint16_t)((e1.v[1] << 8) | e1.v[1]); > + c1[2] = (uint16_t)((e1.v[2] << 8) | e1.v[2]); > + c1[3] = (uint16_t)((e1.v[3] << 8) | e1.v[3]); > + } > + > + int w[4]; > + if (dual_plane) { > + int w0 = infill_weights[0][idx]; > + int w1 = infill_weights[1][idx]; > + w[0] = w[1] = w[2] = w[3] = w0; > + w[colour_component_selector] = w1; > + } else { > + int w0 = infill_weights[0][idx]; > + w[0] = w[1] = w[2] = w[3] = w0; > + } > + > + /* Interpolate to produce UNORM16, applying weights. */ > + uint16_t c[4] = { > + (uint16_t)((c0[0] * (64 - w[0]) + c1[0] * w[0] + 32) >> 6), > + (uint16_t)((c0[1] * (64 - w[1]) + c1[1] * w[1] + 32) >> 6), > + (uint16_t)((c0[2] * (64 - w[2]) + c1[2] * w[2] + 32) >> 6), > + (uint16_t)((c0[3] * (64 - w[3]) + c1[3] * w[3] + 32) >> 6), > + }; > + > + if (decoder.output_unorm8) { > + if (decoder.srgb) { > + output[idx*4+0] = c[0] >> 8; > + output[idx*4+1] = c[1] >> 8; > + output[idx*4+2] = c[2] >> 8; > + } else { > + output[idx*4+0] = c[0] == 65535 ? 0xff : > uint16_div_64k_to_half_to_unorm8(c[0]); > + output[idx*4+1] = c[1] == 65535 ? 0xff : > uint16_div_64k_to_half_to_unorm8(c[1]); > + output[idx*4+2] = c[2] == 65535 ? 0xff : > uint16_div_64k_to_half_to_unorm8(c[2]); > + } > + output[idx*4+3] = c[3] == 65535 ? 0xff : > uint16_div_64k_to_half_to_unorm8(c[3]); > + } else { > + /* Store the color as FP16. */ > + output[idx*4+0] = c[0] == 65535 ? FP16_ONE : > _mesa_uint16_div_64k_to_half(c[0]); > + output[idx*4+1] = c[1] == 65535 ? FP16_ONE : > _mesa_uint16_div_64k_to_half(c[1]); > + output[idx*4+2] = c[2] == 65535 ? FP16_ONE : > _mesa_uint16_div_64k_to_half(c[2]); > + output[idx*4+3] = c[3] == 65535 ? FP16_ONE : > _mesa_uint16_div_64k_to_half(c[3]); > + } > + > + idx++; > + } > + } > + } > +} > + > +void Block::calculate_from_weights() > +{ > + wt_trits = 0; > + wt_quints = 0; > + wt_bits = 0; > + switch (high_prec) { > + case 0: > + switch (wt_range) { > + case 0x2: wt_max = 1; wt_bits = 1; break; > + case 0x3: wt_max = 2; wt_trits = 1; break; > + case 0x4: wt_max = 3; wt_bits = 2; break; > + case 0x5: wt_max = 4; wt_quints = 1; break; > + case 0x6: wt_max = 5; wt_trits = 1; wt_bits = 1; break; > + case 0x7: wt_max = 7; wt_bits = 3; break; > + default: abort(); > + } > + break; > + case 1: > + switch (wt_range) { > + case 0x2: wt_max = 9; wt_quints = 1; wt_bits = 1; break; > + case 0x3: wt_max = 11; wt_trits = 1; wt_bits = 2; break; > + case 0x4: wt_max = 15; wt_bits = 4; break; > + case 0x5: wt_max = 19; wt_quints = 1; wt_bits = 2; break; > + case 0x6: wt_max = 23; wt_trits = 1; wt_bits = 3; break; > + case 0x7: wt_max = 31; wt_bits = 5; break; > + default: abort(); > + } > + break; > + } > + > + assert(wt_trits || wt_quints || wt_bits); > + > + num_weights = wt_w * wt_h * wt_d; > + > + if (dual_plane) > + num_weights *= 2; > + > + weight_bits = > + (num_weights * 8 * wt_trits + 4) / 5 > + + (num_weights * 7 * wt_quints + 2) / 3 > + + num_weights * wt_bits; > +} > + > +void Block::calculate_remaining_bits() > +{ > + int config_bits; > + if (num_parts > 1) { > + if (!is_multi_cem) > + config_bits = 29; > + else > + config_bits = 25 + 3 * num_parts; > + } else { > + config_bits = 17; > + } > + > + if (dual_plane) > + config_bits += 2; > + > + remaining_bits = 128 - config_bits - weight_bits; > +} > + > +decode_error::type Block::calculate_colour_endpoints_size() > +{ > + /* Specified as illegal */ > + if (remaining_bits < (13 * num_cem_values + 4) / 5) { > + colour_endpoint_bits = ce_max = ce_trits = ce_quints = ce_bits = 0; > + return decode_error::invalid_colour_endpoints_size; > + } > + > + /* Find the largest cem_ranges that fits within remaining_bits */ > + for (int i = ARRAY_SIZE(cem_ranges)-1; i >= 0; --i) { > + int cem_bits; > + cem_bits = (num_cem_values * 8 * cem_ranges[i].t + 4) / 5 > + + (num_cem_values * 7 * cem_ranges[i].q + 2) / 3 > + + num_cem_values * cem_ranges[i].b; > + > + if (cem_bits <= remaining_bits) > + { > + colour_endpoint_bits = cem_bits; > + ce_max = cem_ranges[i].max; > + ce_trits = cem_ranges[i].t; > + ce_quints = cem_ranges[i].q; > + ce_bits = cem_ranges[i].b; > + return decode_error::ok; > + } > + } > + > + assert(0); > + return decode_error::invalid_colour_endpoints_size; > +} > + > +/** > + * Decode ASTC 2D LDR texture data. > + * > + * \param src_width in pixels > + * \param src_height in pixels > + * \param dst_stride in bytes > + */ > +extern "C" void > +_mesa_unpack_astc_2d_ldr(uint8_t *dst_row, > + unsigned dst_stride, > + const uint8_t *src_row, > + unsigned src_stride, > + unsigned src_width, > + unsigned src_height, > + mesa_format format) > +{ > + assert(_mesa_is_format_astc_2d(format)); > + bool srgb = _mesa_get_format_color_encoding(format) == GL_SRGB; > + > + unsigned blk_w, blk_h; > + _mesa_get_format_block_size(format, &blk_w, &blk_h); > + > + const unsigned block_size = 16; > + unsigned x_blocks = (src_width + blk_w - 1) / blk_w; > + unsigned y_blocks = (src_height + blk_h - 1) / blk_h; > + > + Decoder dec(blk_w, blk_h, 1, srgb, true); > + > + for (unsigned y = 0; y < y_blocks; ++y) { > + for (unsigned x = 0; x < x_blocks; ++x) { > + /* Same size as the largest block. */ > + uint16_t block_out[12 * 12 * 4]; > + > + dec.decode(src_row + x * block_size, block_out); > + > + /* This can be smaller with NPOT dimensions. */ > + unsigned dst_blk_w = MIN2(blk_w, src_width - x*blk_w); > + unsigned dst_blk_h = MIN2(blk_h, src_height - y*blk_h); > + > + for (unsigned sub_y = 0; sub_y < dst_blk_h; ++sub_y) { > + for (unsigned sub_x = 0; sub_x < dst_blk_w; ++sub_x) { > + uint8_t *dst = dst_row + sub_y * dst_stride + > + (x * blk_w + sub_x) * 4; > + const uint16_t *src = &block_out[(sub_y * blk_w + sub_x) * 4]; > + > + dst[0] = src[0]; > + dst[1] = src[1]; > + dst[2] = src[2]; > + dst[3] = src[3]; > + } > + } > + } > + src_row += src_stride; > + dst_row += dst_stride * blk_h; > + } > +} > diff --git a/src/mesa/main/texcompress_astc.h > b/src/mesa/main/texcompress_astc.h > new file mode 100644 > index 00000000000..9f9c5281dda > --- /dev/null > +++ b/src/mesa/main/texcompress_astc.h > @@ -0,0 +1,47 @@ > +/* > + * Copyright 2018 Advanced Micro Devices, Inc. > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice (including the next > + * paragraph) shall be included in all copies or substantial portions of the > + * Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER > + * DEALINGS IN THE SOFTWARE. > + */ > + > +#ifndef TEXCOMPRESS_ASTC_H > +#define TEXCOMPRESS_ASTC_H > + > +#include <inttypes.h> > +#include "texcompress.h" > + > +#ifdef __cplusplus > +extern "C" { > +#endif > + > +void > +_mesa_unpack_astc_2d_ldr(uint8_t *dst_row, > + unsigned dst_stride, > + const uint8_t *src_row, > + unsigned src_stride, > + unsigned src_width, > + unsigned src_height, > + mesa_format format); > + > +#ifdef __cplusplus > +} > +#endif > + > +#endif > diff --git a/src/util/half_float.c b/src/util/half_float.c > index 4df64c2ccf9..588f050d924 100644 > --- a/src/util/half_float.c > +++ b/src/util/half_float.c > @@ -1,14 +1,16 @@ > /* > * Mesa 3-D graphics library > * > * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. > + * Copyright 2015 Philip Taylor <phi...@zaynar.co.uk> > + * Copyright 2018 Advanced Micro Devices, Inc. > * > * Permission is hereby granted, free of charge, to any person obtaining a > * copy of this software and associated documentation files (the "Software"), > * to deal in the Software without restriction, including without limitation > * the rights to use, copy, modify, merge, publish, distribute, sublicense, > * and/or sell copies of the Software, and to permit persons to whom the > * Software is furnished to do so, subject to the following conditions: > * > * The above copyright notice and this permission notice shall be included > * in all copies or substantial portions of the Software. > @@ -168,10 +170,67 @@ _mesa_half_to_float(uint16_t val) > else { > /* regular */ > flt_e = e + 112; > flt_m = m << 13; > } > > fi.i = (flt_s << 31) | (flt_e << 23) | flt_m; > result = fi.f; > return result; > } > + > +/** > + * Convert 0.0 to 0x00, 1.0 to 0xff. > + * Values outside the range [0.0, 1.0] will give undefined results. > + */ > +uint8_t _mesa_half_to_unorm8(uint16_t val) > +{ > + const int m = val & 0x3ff; > + const int e = (val >> 10) & 0x1f; > + const int s = (val >> 15) & 0x1; > + > + /* v = round_to_nearest(1.mmmmmmmmmm * 2^(e-15) * 255) > + * = round_to_nearest((1.mmmmmmmmmm * 255) * 2^(e-15)) > + * = round_to_nearest((1mmmmmmmmmm * 255) * 2^(e-25)) > + * = round_to_zero((1mmmmmmmmmm * 255) * 2^(e-25) + 0.5) > + * = round_to_zero(((1mmmmmmmmmm * 255) * 2^(e-24) + 1) / 2) > + * > + * This happens to give the correct answer for zero/subnormals too > + */ > + assert(s == 0 && val <= FP16_ONE); /* check 0 <= this <= 1 */ > + /* (implies e <= 15, which means the bit-shifts below are safe) */ > + > + uint32_t v = ((1 << 10) | m) * 255; > + v = ((v >> (24 - e)) + 1) >> 1; > + return v; > +} > + > +/** > + * Takes a uint16_t, divides by 65536, converts the infinite-precision > + * result to fp16 with round-to-zero. Used by the ASTC decoder. > + */ > +uint16_t _mesa_uint16_div_64k_to_half(uint16_t v) > +{ > + /* Zero or subnormal. Set the mantissa to (v << 8) and return. */ > + if (v < 4) > + return v << 8; > + > + /* Count the leading 0s in the uint16_t */ > + int n = __builtin_clz(v) - (sizeof(unsigned int) - sizeof(uint16_t)) * 8; > + > + /* Shift the mantissa up so bit 16 is the hidden 1 bit, > + * mask it off, then shift back down to 10 bits > + */ > + int m = ( ((uint32_t)v << (n + 1)) & 0xffff ) >> 6; > + > + /* (0{n} 1 X{15-n}) * 2^-16 > + * = 1.X * 2^(15-n-16) > + * = 1.X * 2^(14-n - 15) > + * which is the FP16 form with e = 14 - n > + */ > + int e = 14 - n; > + > + assert(e >= 1 && e <= 30); > + assert(m >= 0 && m < 0x400); > + > + return (e << 10) | m; > +} > diff --git a/src/util/half_float.h b/src/util/half_float.h > index b3bc3f687ad..01557424735 100644 > --- a/src/util/half_float.h > +++ b/src/util/half_float.h > @@ -25,22 +25,27 @@ > #ifndef _HALF_FLOAT_H_ > #define _HALF_FLOAT_H_ > > #include <stdbool.h> > #include <stdint.h> > > #ifdef __cplusplus > extern "C" { > #endif > > +#define FP16_ONE 0x3C00 > +#define FP16_ZERO 0 > + > uint16_t _mesa_float_to_half(float val); > float _mesa_half_to_float(uint16_t val); > +uint8_t _mesa_half_to_unorm8(uint16_t v); > +uint16_t _mesa_uint16_div_64k_to_half(uint16_t v); > > static inline bool > _mesa_half_is_negative(uint16_t h) > { > return !!(h & 0x8000); > } > > > #ifdef __cplusplus > } /* extern C */ > -- > 2.17.1 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev