On Mon, Sep 24, 2018 at 4:20 AM Tapani Pälli <tapani.pa...@intel.com> wrote: > > From: Scott D Phillips <scott.d.phill...@intel.com> > > Rename the (un)map_gtt functions to (un)map_map (map by > returning a map) and add new functions (un)map_tiled_memcpy that > return a shadow buffer populated with the intel_tiled_memcpy > functions. > > Tiling/detiling with the cpu will be the only way to handle Yf/Ys > tiling, when support is added for those formats. > > v2: Compute extents properly in the x|y-rounded-down case (Chris Wilson) > > v3: Add units to parameter names of tile_extents (Nanley Chery) > Use _mesa_align_malloc for the shadow copy (Nanley) > Continue using gtt maps on gen4 (Nanley) > > v4: Use streaming_load_memcpy when detiling > > v5: (edited by Ken) Move map_tiled_memcpy above map_movntdqa, so it > takes precedence. Add intel_miptree_access_raw, needed after > rebasing on commit b499b85b0f2cc0c82b7c9af91502c2814fdc8e67. > > v6: refactor to changes done for sse41 separation (Tapani) > > Reviewed-by: Chris Wilson <ch...@chris-wilson.co.uk> (v5) > Reviewed-by: Kenneth Graunke <kenn...@whitecape.org> (v5) > > Signed-off-by: Tapani Pälli <tapani.pa...@intel.com> > --- > src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 110 +++++++++++++++++- > 1 file changed, 106 insertions(+), 4 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > index 36681352ba7..4c2cee8ebba 100644 > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > @@ -31,6 +31,8 @@ > #include "intel_image.h" > #include "intel_mipmap_tree.h" > #include "intel_tex.h" > +#include "intel_tiled_memcpy.h" > +#include "intel_tiled_memcpy_sse41.h" > #include "intel_blit.h" > #include "intel_fbo.h" > > @@ -2998,7 +3000,7 @@ intel_miptree_unmap_raw(struct intel_mipmap_tree *mt) > } > > static void > -intel_miptree_unmap_gtt(struct brw_context *brw, > +intel_miptree_unmap_map(struct brw_context *brw, > struct intel_mipmap_tree *mt, > struct intel_miptree_map *map, > unsigned int level, unsigned int slice) > @@ -3007,7 +3009,7 @@ intel_miptree_unmap_gtt(struct brw_context *brw, > } > > static void > -intel_miptree_map_gtt(struct brw_context *brw, > +intel_miptree_map_map(struct brw_context *brw, > struct intel_mipmap_tree *mt, > struct intel_miptree_map *map, > unsigned int level, unsigned int slice) > @@ -3055,7 +3057,7 @@ intel_miptree_map_gtt(struct brw_context *brw, > mt, _mesa_get_format_name(mt->format), > x, y, map->ptr, map->stride); > > - map->unmap = intel_miptree_unmap_gtt; > + map->unmap = intel_miptree_unmap_map; > } > > static void > @@ -3087,6 +3089,101 @@ intel_miptree_unmap_blit(struct brw_context *brw, > intel_miptree_release(&map->linear_mt); > } > > +/* Compute extent parameters for use with tiled_memcpy functions. > + * xs are in units of bytes and ys are in units of strides. > + */ > +static inline void > +tile_extents(struct intel_mipmap_tree *mt, struct intel_miptree_map *map, > + unsigned int level, unsigned int slice, unsigned int *x1_B, > + unsigned int *x2_B, unsigned int *y1_el, unsigned int *y2_el) > +{ > + unsigned int block_width, block_height; > + unsigned int x0_el, y0_el; > + > + _mesa_get_format_block_size(mt->format, &block_width, &block_height); > + > + assert(map->x % block_width == 0); > + assert(map->y % block_height == 0); > + > + intel_miptree_get_image_offset(mt, level, slice, &x0_el, &y0_el); > + *x1_B = (map->x / block_width + x0_el) * mt->cpp; > + *y1_el = map->y / block_height + y0_el; > + *x2_B = (DIV_ROUND_UP(map->x + map->w, block_width) + x0_el) * mt->cpp; > + *y2_el = DIV_ROUND_UP(map->y + map->h, block_height) + y0_el; > +} > + > +static void > +intel_miptree_unmap_tiled_memcpy(struct brw_context *brw, > + struct intel_mipmap_tree *mt, > + struct intel_miptree_map *map, > + unsigned int level, > + unsigned int slice) > +{ > + if (map->mode & GL_MAP_WRITE_BIT) { > + unsigned int x1, x2, y1, y2; > + tile_extents(mt, map, level, slice, &x1, &x2, &y1, &y2); > + > + char *dst = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW); > + dst += mt->offset; > + > + linear_to_tiled(x1, x2, y1, y2, dst, map->ptr, mt->surf.row_pitch, > + map->stride, brw->has_swizzling, mt->surf.tiling, > + INTEL_COPY_MEMCPY); > + > + intel_miptree_unmap_raw(mt); > + } > + _mesa_align_free(map->buffer); > + map->buffer = map->ptr = NULL; > +} > + > +static void > +intel_miptree_map_tiled_memcpy(struct brw_context *brw, > + struct intel_mipmap_tree *mt, > + struct intel_miptree_map *map, > + unsigned int level, unsigned int slice) > +{ > + intel_miptree_access_raw(brw, mt, level, slice, > + map->mode & GL_MAP_WRITE_BIT); > + > + unsigned int x1, x2, y1, y2; > + tile_extents(mt, map, level, slice, &x1, &x2, &y1, &y2); > + map->stride = ALIGN(_mesa_format_row_stride(mt->format, map->w), 16); > + > + /* The tiling and detiling functions require that the linear buffer > + * has proper 16-byte alignment (that is, its `x0` is 16-byte > + * aligned). Here we over-allocate the linear buffer by enough > + * bytes to get the proper alignment. > + */ > + map->buffer = _mesa_align_malloc(map->stride * (y2 - y1) + (x1 & 0xf), > 16); > + map->ptr = (char *)map->buffer + (x1 & 0xf); > + assert(map->buffer); > + > + if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) { > + char *src = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW); > + src += mt->offset; > + > + const tiled_to_linear_fn ttl_func = > +#if defined(USE_SSE41) > + cpu_has_sse4_1 ? tiled_to_linear_sse41 : > +#endif > + tiled_to_linear; > + > + const mem_copy_fn_type copy_type = > +#if defined(USE_SSE41) > + cpu_has_sse4_1 ? INTEL_COPY_STREAMING_LOAD : > +#endif > + INTEL_COPY_MEMCPY;
I find this bit weird -- identical blocks of code that pick the SSE4 vs non-SSE4 function and also INTEL_COPY_STREAMING_LOAD vs INTEL_COPY_MEMCPY based on the same condition. Reviewing patches 1 and 2 I expected this mem_copy_fn_type to be used to select the variation of the function to call. That would be nice to do, but that's fine as a clean up. All three are Reviewed-by: Matt Turner <matts...@gmail.com> > + > + ttl_func(x1, x2, y1, y2, map->ptr, src, map->stride, > + mt->surf.row_pitch, brw->has_swizzling, mt->surf.tiling, > + copy_type); _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev