A big limitation of the current direct memcpy routine is that it only recognises a couple of (admittedly) common colour types, and cannot do any inline conversion. If we pass the mesa_format down to memcpy and tell it the direction of the transfer, we can start accepting a few mixed transfers and be less picky overall. --- src/mesa/drivers/dri/i965/intel_pixel_read.c | 20 +-- src/mesa/drivers/dri/i965/intel_tex_image.c | 16 +-- src/mesa/drivers/dri/i965/intel_tex_subimage.c | 14 +- src/mesa/drivers/dri/i965/intel_tiled_memcpy.c | 186 +++++++++++++++++-------- src/mesa/drivers/dri/i965/intel_tiled_memcpy.h | 17 ++- 5 files changed, 162 insertions(+), 91 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c b/src/mesa/drivers/dri/i965/intel_pixel_read.c index cd4fbab097..eb3166ca82 100644 --- a/src/mesa/drivers/dri/i965/intel_pixel_read.c +++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c @@ -84,17 +84,13 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx, /* The miptree's buffer. */ struct brw_bo *bo; - uint32_t cpp; - mem_copy_fn mem_copy = NULL; + mem_copy_fn mem_copy; /* This fastpath is restricted to specific renderbuffer types: * a 2D BGRA, RGBA, L8 or A8 texture. It could be generalized to support * more types. */ - if (!brw->has_llc || - !(type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) || - pixels == NULL || - _mesa_is_bufferobj(pack->BufferObj) || + if (!brw->has_llc || pixels == NULL || _mesa_is_bufferobj(pack->BufferObj) || pack->Alignment > 4 || pack->SkipPixels > 0 || pack->SkipRows > 0 || @@ -115,15 +111,8 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx, if (rb->NumSamples > 1) return false; - /* We can't handle copying from RGBX or BGRX because the tiled_memcpy - * function doesn't set the last channel to 1. Note this checks BaseFormat - * rather than TexFormat in case the RGBX format is being simulated with an - * RGBA format. - */ - if (rb->_BaseFormat == GL_RGB) - return false; - - if (!intel_get_memcpy(rb->Format, format, type, &mem_copy, &cpp)) + mem_copy = intel_get_memcpy(rb->Format, format, type, INTEL_DOWNLOAD); + if (mem_copy == NULL) return false; if (!irb->mt || @@ -196,6 +185,7 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx, pack->Alignment, pack->RowLength, pack->SkipPixels, pack->SkipRows); + uint32_t cpp = _mesa_get_format_bytes(rb->Format); tiled_to_linear( xoffset * cpp, (xoffset + width) * cpp, yoffset, yoffset + height, diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c b/src/mesa/drivers/dri/i965/intel_tex_image.c index beed1609bd..770a9a78a8 100644 --- a/src/mesa/drivers/dri/i965/intel_tex_image.c +++ b/src/mesa/drivers/dri/i965/intel_tex_image.c @@ -387,8 +387,7 @@ intel_gettexsubimage_tiled_memcpy(struct gl_context *ctx, /* The miptree's buffer. */ struct brw_bo *bo; - uint32_t cpp; - mem_copy_fn mem_copy = NULL; + mem_copy_fn mem_copy; /* This fastpath is restricted to specific texture types: * a 2D BGRA, RGBA, L8 or A8 texture. It could be generalized to support @@ -400,7 +399,6 @@ intel_gettexsubimage_tiled_memcpy(struct gl_context *ctx, * we need tests. */ if (!brw->has_llc || - !(type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) || !(texImage->TexObject->Target == GL_TEXTURE_2D || texImage->TexObject->Target == GL_TEXTURE_RECTANGLE) || pixels == NULL || @@ -414,15 +412,8 @@ intel_gettexsubimage_tiled_memcpy(struct gl_context *ctx, packing->Invert) return false; - /* We can't handle copying from RGBX or BGRX because the tiled_memcpy - * function doesn't set the last channel to 1. Note this checks BaseFormat - * rather than TexFormat in case the RGBX format is being simulated with an - * RGBA format. - */ - if (texImage->_BaseFormat == GL_RGB) - return false; - - if (!intel_get_memcpy(texImage->TexFormat, format, type, &mem_copy, &cpp)) + mem_copy = intel_get_memcpy(texImage->TexFormat, format, type, INTEL_DOWNLOAD); + if (mem_copy == NULL) return false; /* If this is a nontrivial texture view, let another path handle it instead. */ @@ -486,6 +477,7 @@ intel_gettexsubimage_tiled_memcpy(struct gl_context *ctx, xoffset += level_x; yoffset += level_y; + uint32_t cpp = _mesa_get_format_bytes(texImage->TexFormat); tiled_to_linear( xoffset * cpp, (xoffset + width) * cpp, yoffset, yoffset + height, diff --git a/src/mesa/drivers/dri/i965/intel_tex_subimage.c b/src/mesa/drivers/dri/i965/intel_tex_subimage.c index a34203087f..d6fcc93b98 100644 --- a/src/mesa/drivers/dri/i965/intel_tex_subimage.c +++ b/src/mesa/drivers/dri/i965/intel_tex_subimage.c @@ -27,6 +27,7 @@ #include "main/image.h" #include "main/macros.h" #include "main/mtypes.h" +#include "main/glformats.h" #include "main/pbo.h" #include "main/texobj.h" #include "main/texstore.h" @@ -86,8 +87,7 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx, /* The miptree's buffer. */ struct brw_bo *bo; - uint32_t cpp; - mem_copy_fn mem_copy = NULL; + mem_copy_fn mem_copy; /* This fastpath is restricted to specific texture types: * a 2D BGRA, RGBA, L8 or A8 texture. It could be generalized to support @@ -98,8 +98,7 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx, * with _mesa_image_row_stride. However, before removing the restrictions * we need tests. */ - if (!(type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) || - !(texImage->TexObject->Target == GL_TEXTURE_2D || + if (!(texImage->TexObject->Target == GL_TEXTURE_2D || texImage->TexObject->Target == GL_TEXTURE_RECTANGLE) || pixels == NULL || _mesa_is_bufferobj(packing->BufferObj) || @@ -116,7 +115,11 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx, if (ctx->_ImageTransferState) return false; - if (!intel_get_memcpy(texImage->TexFormat, format, type, &mem_copy, &cpp)) + if (format == GL_COLOR_INDEX) + return false; + + mem_copy = intel_get_memcpy(texImage->TexFormat, format, type, INTEL_UPLOAD); + if (mem_copy == NULL) return false; /* If this is a nontrivial texture view, let another path handle it instead. */ @@ -193,6 +196,7 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx, xoffset += level_x; yoffset += level_y; + uint32_t cpp = _mesa_get_format_bytes(texImage->TexFormat); linear_to_tiled( xoffset * cpp, (xoffset + width) * cpp, yoffset, yoffset + height, diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c index 53a5679691..65dd950c08 100644 --- a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c +++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c @@ -33,6 +33,11 @@ #include "util/macros.h" +#include "main/glheader.h" +#include "main/enums.h" +#include "main/mtypes.h" +#include "main/glformats.h" + #include "brw_context.h" #include "intel_tiled_memcpy.h" @@ -213,6 +218,37 @@ rgba8_copy_aligned_src(void *dst, const void *src, size_t bytes) return dst; } +static inline void * +rgbx8(void *dst, const void *src, size_t bytes) +{ + uint8_t *d = dst; + uint8_t const *s = src; + + while (bytes >= 4) { + d[0] = s[2]; + d[1] = s[1]; + d[2] = s[0]; + d[3] = 0xff; + d += 4; + s += 4; + bytes -= 4; + } + return dst; +} + +static inline void * +bgrx8(void *dst, const void *src, size_t bytes) +{ + uint32_t *d = dst; + uint32_t const *s = src; + + while (bytes >= 4) { + *d++ = *s++ | 0xff000000; + bytes -= 4; + } + return dst; +} + /** * Each row from y0 to y1 is copied in three parts: [x0,x1), [x1,x2), [x2,x3). * These ranges are in bytes, i.e. pixels * bytes-per-pixel. @@ -473,7 +509,9 @@ linear_to_xtiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, dst, src, src_pitch, swizzle_bit, rgba8_copy, rgba8_copy_aligned_dst); else - unreachable("not reached"); + return linear_to_xtiled(0, 0, xtile_width, xtile_width, 0, xtile_height, + dst, src, src_pitch, swizzle_bit, + mem_copy, memcpy); } else { if (mem_copy == memcpy) return linear_to_xtiled(x0, x1, x2, x3, y0, y1, @@ -484,10 +522,11 @@ linear_to_xtiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, dst, src, src_pitch, swizzle_bit, rgba8_copy, rgba8_copy_aligned_dst); else - unreachable("not reached"); + return linear_to_xtiled(x0, x1, x2, x3, y0, y1, + dst, src, src_pitch, swizzle_bit, + mem_copy, mem_copy); } - linear_to_xtiled(x0, x1, x2, x3, y0, y1, - dst, src, src_pitch, swizzle_bit, mem_copy, mem_copy); + unreachable("not reached"); } /** @@ -516,7 +555,9 @@ linear_to_ytiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, dst, src, src_pitch, swizzle_bit, rgba8_copy, rgba8_copy_aligned_dst); else - unreachable("not reached"); + return linear_to_ytiled(0, 0, ytile_width, ytile_width, 0, ytile_height, + dst, src, src_pitch, swizzle_bit, + mem_copy, mem_copy); } else { if (mem_copy == memcpy) return linear_to_ytiled(x0, x1, x2, x3, y0, y1, @@ -526,10 +567,11 @@ linear_to_ytiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, dst, src, src_pitch, swizzle_bit, rgba8_copy, rgba8_copy_aligned_dst); else - unreachable("not reached"); + return linear_to_ytiled(x0, x1, x2, x3, y0, y1, + dst, src, src_pitch, swizzle_bit, + mem_copy, mem_copy); } - linear_to_ytiled(x0, x1, x2, x3, y0, y1, - dst, src, src_pitch, swizzle_bit, mem_copy, mem_copy); + unreachable("not reached"); } /** @@ -558,7 +600,9 @@ xtiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, dst, src, dst_pitch, swizzle_bit, rgba8_copy, rgba8_copy_aligned_src); else - unreachable("not reached"); + return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height, + dst, src, dst_pitch, swizzle_bit, + mem_copy, mem_copy); } else { if (mem_copy == memcpy) return xtiled_to_linear(x0, x1, x2, x3, y0, y1, @@ -568,10 +612,11 @@ xtiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, dst, src, dst_pitch, swizzle_bit, rgba8_copy, rgba8_copy_aligned_src); else - unreachable("not reached"); + return xtiled_to_linear(x0, x1, x2, x3, y0, y1, + dst, src, dst_pitch, swizzle_bit, + mem_copy, mem_copy); } - xtiled_to_linear(x0, x1, x2, x3, y0, y1, - dst, src, dst_pitch, swizzle_bit, mem_copy, mem_copy); + unreachable("not reached"); } /** @@ -600,7 +645,9 @@ ytiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, dst, src, dst_pitch, swizzle_bit, rgba8_copy, rgba8_copy_aligned_src); else - unreachable("not reached"); + return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height, + dst, src, dst_pitch, swizzle_bit, + mem_copy, mem_copy); } else { if (mem_copy == memcpy) return ytiled_to_linear(x0, x1, x2, x3, y0, y1, @@ -610,10 +657,11 @@ ytiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, dst, src, dst_pitch, swizzle_bit, rgba8_copy, rgba8_copy_aligned_src); else - unreachable("not reached"); + return ytiled_to_linear(x0, x1, x2, x3, y0, y1, + dst, src, dst_pitch, swizzle_bit, + mem_copy, mem_copy); } - ytiled_to_linear(x0, x1, x2, x3, y0, y1, - dst, src, dst_pitch, swizzle_bit, mem_copy, mem_copy); + unreachable("not reached"); } /** @@ -812,51 +860,75 @@ tiled_to_linear(uint32_t xt1, uint32_t xt2, * \param[in] tiledFormat The format of the tiled image * \param[in] format The GL format of the client data * \param[in] type The GL type of the client data - * \param[out] mem_copy Will be set to one of either the standard - * library's memcpy or a different copy function - * that performs an RGBA to BGRA conversion - * \param[out] cpp Number of bytes per channel * - * \return true if the format and type combination are valid + * \return the mem_copy_fn if the format and type combination are valid */ -bool intel_get_memcpy(mesa_format tiledFormat, GLenum format, - GLenum type, mem_copy_fn *mem_copy, uint32_t *cpp) +mem_copy_fn intel_get_memcpy(mesa_format tiledFormat, + GLenum format, GLenum type, + enum intel_memcpy_direction direction) { - if (type == GL_UNSIGNED_INT_8_8_8_8_REV && - !(format == GL_RGBA || format == GL_BGRA)) - return false; /* Invalid type/format combination */ - - if ((tiledFormat == MESA_FORMAT_L_UNORM8 && format == GL_LUMINANCE) || - (tiledFormat == MESA_FORMAT_A_UNORM8 && format == GL_ALPHA)) { - *cpp = 1; - *mem_copy = memcpy; - } else if ((tiledFormat == MESA_FORMAT_B8G8R8A8_UNORM) || - (tiledFormat == MESA_FORMAT_B8G8R8X8_UNORM) || - (tiledFormat == MESA_FORMAT_B8G8R8A8_SRGB) || - (tiledFormat == MESA_FORMAT_B8G8R8X8_SRGB)) { - *cpp = 4; - if (format == GL_BGRA) { - *mem_copy = memcpy; - } else if (format == GL_RGBA) { - *mem_copy = rgba8_copy; - } - } else if ((tiledFormat == MESA_FORMAT_R8G8B8A8_UNORM) || - (tiledFormat == MESA_FORMAT_R8G8B8X8_UNORM) || - (tiledFormat == MESA_FORMAT_R8G8B8A8_SRGB) || - (tiledFormat == MESA_FORMAT_R8G8B8X8_SRGB)) { - *cpp = 4; - if (format == GL_BGRA) { - /* Copying from RGBA to BGRA is the same as BGRA to RGBA so we can - * use the same function. - */ - *mem_copy = rgba8_copy; - } else if (format == GL_RGBA) { - *mem_copy = memcpy; - } + mesa_format user_format; + mem_copy_fn fn = NULL; + + if (type == GL_BITMAP) + return NULL; + + /* Stencil tiling is a lie, though we could do similar manual detiling */ + switch ((int)tiledFormat) { + case MESA_FORMAT_S_UINT8: + case MESA_FORMAT_Z32_FLOAT_S8X24_UINT: + case MESA_FORMAT_Z24_UNORM_X8_UINT: + case MESA_FORMAT_Z24_UNORM_S8_UINT: + return NULL; } - if (!(*mem_copy)) - return false; + if (_mesa_is_format_compressed(tiledFormat)) + return NULL; + + user_format = _mesa_format_from_format_and_type(format, type); + if (_mesa_format_is_mesa_array_format(user_format)) + user_format = _mesa_format_from_array_format(user_format); + + if (user_format == tiledFormat) { + /* Prevent any implicit conversions */ + if (_mesa_unpack_format_to_base_format(format) != + _mesa_get_format_base_format(tiledFormat)) + fn = memcpy; + } else switch ((int)tiledFormat) { + case MESA_FORMAT_B8G8R8A8_UNORM: + if (user_format == MESA_FORMAT_R8G8B8A8_UNORM) + fn = rgba8_copy; + else if (user_format == MESA_FORMAT_R8G8B8X8_UNORM) + fn = direction == INTEL_UPLOAD ? rgbx8 : memcpy; + else if (user_format == MESA_FORMAT_B8G8R8X8_UNORM) + fn = direction == INTEL_UPLOAD ? bgrx8 : memcpy; + break; + case MESA_FORMAT_B8G8R8X8_UNORM: + if (user_format == MESA_FORMAT_B8G8R8A8_UNORM) + fn = direction == INTEL_UPLOAD ? memcpy : bgrx8; + else if (user_format == MESA_FORMAT_R8G8B8X8_UNORM) + fn = direction == INTEL_UPLOAD ? rgba8_copy : bgrx8; + else if (user_format == MESA_FORMAT_R8G8B8X8_UNORM) + fn = rgba8_copy; + break; + + case MESA_FORMAT_R8G8B8A8_UNORM: + if (user_format == MESA_FORMAT_B8G8R8A8_UNORM) + fn = rgba8_copy; + else if (user_format == MESA_FORMAT_B8G8R8X8_UNORM) + fn = direction == INTEL_UPLOAD ? rgbx8 : memcpy; + else if (user_format == MESA_FORMAT_R8G8B8X8_UNORM) + fn = direction == INTEL_UPLOAD ? bgrx8 : memcpy; + break; + case MESA_FORMAT_R8G8B8X8_UNORM: + if (user_format == MESA_FORMAT_R8G8B8A8_UNORM) + fn = direction == INTEL_UPLOAD ? memcpy : bgrx8; + else if (user_format == MESA_FORMAT_B8G8R8X8_UNORM) + fn = direction == INTEL_UPLOAD ? rgba8_copy : bgrx8; + else if (user_format == MESA_FORMAT_B8G8R8X8_UNORM) + fn = rgba8_copy; + break; + } - return true; + return fn; } diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h index 62ec8847fb..e9c43920a1 100644 --- a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h +++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h @@ -55,7 +55,20 @@ tiled_to_linear(uint32_t xt1, uint32_t xt2, enum isl_tiling tiling, mem_copy_fn mem_copy); -bool intel_get_memcpy(mesa_format tiledFormat, GLenum format, - GLenum type, mem_copy_fn *mem_copy, uint32_t *cpp); +/* Tells intel_get_memcpy() whether the memcpy() is + * + * - an upload to the GPU with an aligned destination and a potentially + * unaligned source; or + * - a download from the GPU with an aligned source and a potentially + * unaligned destination. + */ +enum intel_memcpy_direction { + INTEL_UPLOAD, + INTEL_DOWNLOAD +}; + +mem_copy_fn intel_get_memcpy(mesa_format tiledFormat, + GLenum format, GLenum type, + enum intel_memcpy_direction direction); #endif /* INTEL_TILED_MEMCPY */ -- 2.13.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev