On 2019-02-04 1:47 p.m., Liu, Leo wrote: > On 2/1/19 11:28 AM, Zhu, James wrote: >> Add compute shader to support video compositor render. >> >> Signed-off-by: James Zhu <james....@amd.com> >> --- >> src/gallium/auxiliary/Makefile.sources | 2 + >> src/gallium/auxiliary/meson.build | 2 + >> src/gallium/auxiliary/vl/vl_compositor_cs.c | 414 >> ++++++++++++++++++++++++++++ >> src/gallium/auxiliary/vl/vl_compositor_cs.h | 56 ++++ >> 4 files changed, 474 insertions(+) >> create mode 100644 src/gallium/auxiliary/vl/vl_compositor_cs.c >> create mode 100644 src/gallium/auxiliary/vl/vl_compositor_cs.h >> >> diff --git a/src/gallium/auxiliary/Makefile.sources >> b/src/gallium/auxiliary/Makefile.sources >> index 50e8808..df000f6 100644 >> --- a/src/gallium/auxiliary/Makefile.sources >> +++ b/src/gallium/auxiliary/Makefile.sources >> @@ -348,6 +348,8 @@ VL_SOURCES := \ >> vl/vl_bicubic_filter.h \ >> vl/vl_compositor.c \ >> vl/vl_compositor.h \ >> + vl/vl_compositor_cs.c \ >> + vl/vl_compositor_cs.h \ >> vl/vl_csc.c \ >> vl/vl_csc.h \ >> vl/vl_decoder.c \ >> diff --git a/src/gallium/auxiliary/meson.build >> b/src/gallium/auxiliary/meson.build >> index 57f7e69..74e4b48 100644 >> --- a/src/gallium/auxiliary/meson.build >> +++ b/src/gallium/auxiliary/meson.build >> @@ -445,6 +445,8 @@ files_libgalliumvl = files( >> 'vl/vl_bicubic_filter.h', >> 'vl/vl_compositor.c', >> 'vl/vl_compositor.h', >> + 'vl/vl_compositor_cs.c', >> + 'vl/vl_compositor_cs.h', >> 'vl/vl_csc.c', >> 'vl/vl_csc.h', >> 'vl/vl_decoder.c', >> diff --git a/src/gallium/auxiliary/vl/vl_compositor_cs.c >> b/src/gallium/auxiliary/vl/vl_compositor_cs.c >> new file mode 100644 >> index 0000000..3cd1a76 >> --- /dev/null >> +++ b/src/gallium/auxiliary/vl/vl_compositor_cs.c >> @@ -0,0 +1,414 @@ >> +/************************************************************************** >> + * >> + * Copyright 2019 Advanced Micro Devices, Inc. >> + * All Rights Reserved. >> + * >> + * Permission is hereby granted, free of charge, to any person obtaining a >> + * copy of this software and associated documentation files (the >> + * "Software"), to deal in the Software without restriction, including >> + * without limitation the rights to use, copy, modify, merge, publish, >> + * distribute, sub license, and/or sell copies of the Software, and to >> + * permit persons to whom the Software is furnished to do so, subject to >> + * the following conditions: >> + * >> + * The above copyright notice and this permission notice (including the >> + * next paragraph) shall be included in all copies or substantial portions >> + * of the Software. >> + * >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS >> + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF >> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. >> + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR >> + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, >> + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE >> + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. >> + * >> + * Authors: James Zhu <james.zhu<@amd.com> >> + * >> + **************************************************************************/ >> + >> +#include <assert.h> >> + >> +#include "tgsi/tgsi_text.h" >> +#include "vl_compositor_cs.h" >> + >> +struct cs_viewport { >> + float scale_x; >> + float scale_y; >> + int translate_x; >> + int translate_y; >> + struct u_rect area; >> +}; >> + >> +char *compute_shader_video_buffer = >> + "COMP\n" >> + "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n" >> + "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n" >> + "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n" >> + >> + "DCL SV[0], THREAD_ID\n" >> + "DCL SV[1], BLOCK_ID\n" >> + >> + "DCL CONST[0..5]\n" >> + "DCL SVIEW[0..2], RECT, FLOAT\n" >> + "DCL SAMP[0..2]\n" >> + >> + "DCL IMAGE[0], 2D, WR\n" >> + "DCL TEMP[0..7]\n" >> + >> + "IMM[0] UINT32 { 8, 8, 1, 0}\n" >> + "IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n" >> + >> + "UMAD TEMP[0], SV[1], IMM[0], SV[0]\n" >> + >> + /* Drawn area check */ >> + "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n" >> + "USLT TEMP[1].zw, TEMP[0].xyxy, CONST[4].zwzw\n" >> + "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].yyyy\n" >> + "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n" >> + "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n" >> + >> + "UIF TEMP[1]\n" >> + /* Translate */ >> + "UADD TEMP[2].xy, TEMP[0], -CONST[5].xyxy\n" >> + "U2F TEMP[2], TEMP[2]\n" >> + "DIV TEMP[3], TEMP[2], IMM[1].yyyy\n" >> + >> + /* Scale */ >> + "DIV TEMP[2], TEMP[2], CONST[3].zwzw\n" >> + "DIV TEMP[3], TEMP[3], CONST[3].zwzw\n" >> + >> + /* Fetch texels */ >> + "TEX_LZ TEMP[4].x, TEMP[2], SAMP[0], RECT\n" >> + "TEX_LZ TEMP[4].y, TEMP[3], SAMP[1], RECT\n" >> + "TEX_LZ TEMP[4].z, TEMP[3], SAMP[2], RECT\n" >> + >> + "MOV TEMP[4].w, IMM[1].xxxx\n" >> + >> + /* Color Space Conversion */ >> + "DP4 TEMP[7].x, CONST[0], TEMP[4]\n" >> + "DP4 TEMP[7].y, CONST[1], TEMP[4]\n" >> + "DP4 TEMP[7].z, CONST[2], TEMP[4]\n" >> + >> + "MOV TEMP[5].w, TEMP[4].zzzz\n" >> + "SLE TEMP[6].w, TEMP[5], CONST[3].xxxx\n" >> + "SGT TEMP[5].w, TEMP[5], CONST[3].yyyy\n" >> + >> + "MAX TEMP[7].w, TEMP[5], TEMP[6]\n" >> + >> + "STORE IMAGE[0], TEMP[0], TEMP[7], 2D\n" >> + "ENDIF\n" >> + >> + "END\n"; >> + >> +char *compute_shader_weave = >> + "COMP\n" >> + "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n" >> + "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n" >> + "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n" >> + >> + "DCL SV[0], THREAD_ID\n" >> + "DCL SV[1], BLOCK_ID\n" >> + >> + "DCL CONST[0..5]\n" >> + "DCL SVIEW[0..2], RECT, FLOAT\n" >> + "DCL SAMP[0..2]\n" >> + >> + "DCL IMAGE[0], 2D, WR\n" >> + "DCL TEMP[0..9]\n" >> + >> + "IMM[0] UINT32 { 8, 8, 1, 0}\n" >> + "IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n" >> + "IMM[2] UINT32 { 1, 2, 4, 0}\n" >> + >> + "UMAD TEMP[0], SV[1], IMM[0], SV[0]\n" >> + >> + /* Drawn area check */ >> + "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n" >> + "USLT TEMP[1].zw, TEMP[0].xyxy, CONST[4].zwzw\n" >> + "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].yyyy\n" >> + "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n" >> + "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n" >> + >> + "UIF TEMP[1]\n" >> + "MOV TEMP[2], TEMP[0]\n" >> + /* Translate */ >> + "UADD TEMP[2].xy, TEMP[2], -CONST[5].xyxy\n" >> + >> + /* Texture layer */ >> + "UMOD TEMP[2].z, TEMP[2].yyyy, IMM[2].yyyy\n" >> + "UMOD TEMP[3].z, TEMP[2].yyyy, IMM[2].zzzz\n" >> + "USHR TEMP[3].z, TEMP[3].zzzz, IMM[2].xxxx\n" >> + >> + "USHR TEMP[2].y, TEMP[2], IMM[2].xxxx\n" >> + "USHR TEMP[3].xy, TEMP[2], IMM[2].xxxx\n" >> + >> + "U2F TEMP[4], TEMP[2]\n" >> + "U2F TEMP[5], TEMP[3]\n" >> + >> + /* Scale */ >> + "DIV TEMP[4], TEMP[4], CONST[3].zwzw\n" >> + "DIV TEMP[5], TEMP[5], CONST[3].zwzw\n" >> + >> + /* Fetch texels */ >> + "TEX_LZ TEMP[6].x, TEMP[4], SAMP[0], RECT\n" >> + "TEX_LZ TEMP[6].y, TEMP[5], SAMP[1], RECT\n" >> + "TEX_LZ TEMP[6].z, TEMP[5], SAMP[2], RECT\n" >> + >> + "MOV TEMP[6].w, IMM[1].xxxx\n" >> + >> + /* Color Space Conversion */ >> + "DP4 TEMP[9].x, CONST[0], TEMP[6]\n" >> + "DP4 TEMP[9].y, CONST[1], TEMP[6]\n" >> + "DP4 TEMP[9].z, CONST[2], TEMP[6]\n" >> + >> + "MOV TEMP[7].w, TEMP[6].zzzz\n" >> + "SLE TEMP[8].w, TEMP[7], CONST[3].xxxx\n" >> + "SGT TEMP[7].w, TEMP[7], CONST[3].yyyy\n" >> + >> + "MAX TEMP[9].w, TEMP[7], TEMP[8]\n" >> + >> + "STORE IMAGE[0], TEMP[0], TEMP[9], 2D\n" >> + "ENDIF\n" >> + >> + "END\n"; >> + >> +char *compute_shader_sub_pic = >> + "COMP\n" >> + "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n" >> + "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n" >> + "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n" >> + >> + "DCL SV[0], THREAD_ID\n" >> + "DCL SV[1], BLOCK_ID\n" >> + >> + "DCL CONST[0..5]\n" >> + "DCL SVIEW[0..2], RECT, FLOAT\n" >> + "DCL SAMP[0..2]\n" >> + >> + "DCL IMAGE[0], 2D, WR\n" >> + "DCL TEMP[0..3]\n" >> + >> + "IMM[0] UINT32 { 8, 8, 1, 0}\n" >> + "IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n" >> + >> + "UMAD TEMP[0], SV[1], IMM[0], SV[0]\n" >> + >> + /* Drawn area check */ >> + "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n" >> + "USLT TEMP[1].zw, TEMP[0].xyxy, CONST[4].zwzw\n" >> + "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].yyyy\n" >> + "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n" >> + "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n" >> + >> + "UIF TEMP[1]\n" >> + /* Translate */ >> + "UADD TEMP[2].xy, TEMP[0], -CONST[5].xyxy\n" >> + "U2F TEMP[2], TEMP[2]\n" >> + >> + /* Scale */ >> + "DIV TEMP[2], TEMP[2], CONST[3].zwzw\n" >> + >> + /* Fetch texels */ >> + "TEX_LZ TEMP[3].x, TEMP[2], SAMP[0], RECT\n" >> + >> + "STORE IMAGE[0], TEMP[0], TEMP[3].xxxx, 2D\n" >> + "ENDIF\n" >> + >> + "END\n"; >> + >> +static void >> +cs_launch(struct vl_compositor *c, >> + struct vl_compositor_state *s, >> + void *cs) >> +{ >> + struct pipe_context *ctx = c->pipe; >> + >> + /* Bind the image */ >> + struct pipe_image_view image = {}; >> + image.resource = c->fb_state.cbufs[0]->texture; >> + image.shader_access = image.access = PIPE_IMAGE_ACCESS_READ_WRITE; >> + image.format = c->fb_state.cbufs[0]->texture->format; >> + >> + ctx->set_shader_images(c->pipe, PIPE_SHADER_COMPUTE, 0, 1, &image); >> + >> + /* Bind compute shader */ >> + ctx->bind_compute_state(ctx, cs); >> + >> + /* Dispatch compute */ >> + struct pipe_grid_info info = {}; >> + info.block[0] = 8; >> + info.block[1] = 8; >> + info.block[2] = 1; >> + info.grid[0] = DIV_ROUND_UP(c->fb_state.width, info.block[0]); >> + info.grid[1] = DIV_ROUND_UP(c->fb_state.height, info.block[1]); >> + info.grid[2] = 1; >> + >> + ctx->launch_grid(ctx, &info); >> +} >> + >> +static inline struct u_rect >> +cs_calc_drawn_area(struct vl_compositor_state *s, >> + struct vl_compositor_layer *layer) >> +{ >> + struct vertex2f tl, br; >> + struct u_rect result; >> + >> + assert(s && layer); >> + >> + tl = layer->dst.tl; >> + br = layer->dst.br; >> + >> + /* Scale */ >> + result.x0 = tl.x * layer->viewport.scale[0] + >> layer->viewport.translate[0]; >> + result.y0 = tl.y * layer->viewport.scale[1] + >> layer->viewport.translate[1]; >> + result.x1 = br.x * layer->viewport.scale[0] + >> layer->viewport.translate[0]; >> + result.y1 = br.y * layer->viewport.scale[1] + >> layer->viewport.translate[1]; >> + >> + /* Clip */ >> + result.x0 = MAX2(result.x0, s->scissor.minx); >> + result.y0 = MAX2(result.y0, s->scissor.miny); >> + result.x1 = MIN2(result.x1, s->scissor.maxx); >> + result.y1 = MIN2(result.y1, s->scissor.maxy); >> + return result; >> +} >> + >> +static bool >> +cs_set_viewport(struct vl_compositor_state *s, >> + struct cs_viewport *drawn) >> +{ >> + struct pipe_transfer *buf_transfer; >> + >> + assert(s && drawn); >> + >> + void *ptr = pipe_buffer_map(s->pipe, s->csc_matrix, >> + PIPE_TRANSFER_READ | PIPE_TRANSFER_WRITE, >> + &buf_transfer); >> + >> + if (!ptr) >> + return false; >> + >> + float *ptr_float = (float *)ptr; >> + ptr_float += sizeof(vl_csc_matrix)/sizeof(float) + 2; >> + *ptr_float++ = drawn->scale_x; >> + *ptr_float++ = drawn->scale_y; >> + >> + int *ptr_int = (int *)ptr_float; >> + *ptr_int++ = drawn->area.x0; >> + *ptr_int++ = drawn->area.y0; >> + *ptr_int++ = drawn->area.x1; >> + *ptr_int++ = drawn->area.y1; >> + *ptr_int++ = drawn->translate_x; >> + *ptr_int = drawn->translate_y; >> + >> + pipe_buffer_unmap(s->pipe, buf_transfer); >> + >> + return true; >> +} >> + >> +static void >> +cs_draw_layers(struct vl_compositor *c, >> + struct vl_compositor_state *s, >> + struct u_rect *dirty) >> +{ >> + unsigned i; >> + static struct cs_viewport old_drawn; >> + >> + assert(c); >> + >> + for (i = 0; i < VL_COMPOSITOR_MAX_LAYERS; ++i) { >> + if (s->used_layers & (1 << i)) { >> + struct vl_compositor_layer *layer = &s->layers[i]; >> + struct pipe_sampler_view **samplers = &layer->sampler_views[0]; >> + unsigned num_sampler_views = !samplers[1] ? 1 : !samplers[2] ? 2 : >> 3; >> + struct cs_viewport drawn; >> + >> + drawn.area = cs_calc_drawn_area(s, layer); >> + drawn.scale_x = layer->viewport.scale[0] / >> + (float)layer->sampler_views[0]->texture->width0; >> + drawn.scale_y = drawn.scale_x; >> + drawn.translate_x = (int)layer->viewport.translate[0]; >> + drawn.translate_y = (int)layer->viewport.translate[1]; >> + >> + if (memcmp(&drawn, &old_drawn, sizeof(struct cs_viewport))) { >> + cs_set_viewport(s, &drawn); >> + old_drawn = drawn; >> + pipe_set_constant_buffer(c->pipe, PIPE_SHADER_COMPUTE, 0, >> + s->csc_matrix); >> + } >> + >> + c->pipe->bind_sampler_states(c->pipe, PIPE_SHADER_COMPUTE, 0, >> + num_sampler_views, layer->samplers); >> + c->pipe->set_sampler_views(c->pipe, PIPE_SHADER_COMPUTE, 0, >> + num_sampler_views, samplers); >> + >> + if (num_sampler_views == 3) >> + cs_launch(c, s, layer->cs); >> + else if (num_sampler_views == 1) >> + cs_launch(c, s, c->cs_sub_pic); > What is the counterpart of cs_sub_pic from gfx implementation? will this > get built since I saw this is added to header file from next patch. It > has to get built with each patch incrementally from the patch sets.
gfx shader - create_frag_shader_rgba will render this sub-pictures. I am rewriting this part. James > > > Leo > > >> + else >> + assert(!"Not support yet!"); >> + >> + if (dirty) { >> + struct u_rect drawn = cs_calc_drawn_area(s, layer); >> + dirty->x0 = MIN2(drawn.x0, dirty->x0); >> + dirty->y0 = MIN2(drawn.y0, dirty->y0); >> + dirty->x1 = MAX2(drawn.x1, dirty->x1); >> + dirty->y1 = MAX2(drawn.y1, dirty->y1); >> + } >> + } >> + } >> +} >> + >> +void * >> +vl_compositor_cs_create_shader(struct vl_compositor *c, >> + const char *compute_shader_text) >> +{ >> + assert(c && compute_shader_text); >> + >> + struct tgsi_token tokens[1024]; >> + if (!tgsi_text_translate(compute_shader_text, tokens, >> ARRAY_SIZE(tokens))) { >> + assert(0); >> + return NULL; >> + } >> + >> + struct pipe_compute_state state = {}; >> + state.ir_type = PIPE_SHADER_IR_TGSI; >> + state.prog = tokens; >> + >> + /* create compute shader */ >> + return c->pipe->create_compute_state(c->pipe, &state); >> +} >> + >> +void >> +vl_compositor_cs_render(struct vl_compositor_state *s, >> + struct vl_compositor *c, >> + struct pipe_surface *dst_surface, >> + struct u_rect *dirty_area, >> + bool clear_dirty) >> +{ >> + assert(c && s); >> + assert(dst_surface); >> + >> + c->fb_state.width = dst_surface->width; >> + c->fb_state.height = dst_surface->height; >> + c->fb_state.cbufs[0] = dst_surface; >> + >> + if (!s->scissor_valid) { >> + s->scissor.minx = 0; >> + s->scissor.miny = 0; >> + s->scissor.maxx = dst_surface->width; >> + s->scissor.maxy = dst_surface->height; >> + } >> + >> + if (clear_dirty && dirty_area && >> + (dirty_area->x0 < dirty_area->x1 || dirty_area->y0 < >> dirty_area->y1)) { >> + >> + c->pipe->clear_render_target(c->pipe, dst_surface, &s->clear_color, >> + 0, 0, dst_surface->width, dst_surface->height, >> false); >> + dirty_area->x0 = dirty_area->y0 = MAX_DIRTY; >> + dirty_area->x1 = dirty_area->y1 = MIN_DIRTY; >> + } >> + >> + cs_draw_layers(c, s, dirty_area); >> +} >> diff --git a/src/gallium/auxiliary/vl/vl_compositor_cs.h >> b/src/gallium/auxiliary/vl/vl_compositor_cs.h >> new file mode 100644 >> index 0000000..a3f61dc >> --- /dev/null >> +++ b/src/gallium/auxiliary/vl/vl_compositor_cs.h >> @@ -0,0 +1,56 @@ >> +/************************************************************************** >> + * >> + * Copyright 2019 Advanced Micro Devices, Inc. >> + * All Rights Reserved. >> + * >> + * Permission is hereby granted, free of charge, to any person obtaining a >> + * copy of this software and associated documentation files (the >> + * "Software"), to deal in the Software without restriction, including >> + * without limitation the rights to use, copy, modify, merge, publish, >> + * distribute, sub license, and/or sell copies of the Software, and to >> + * permit persons to whom the Software is furnished to do so, subject to >> + * the following conditions: >> + * >> + * The above copyright notice and this permission notice (including the >> + * next paragraph) shall be included in all copies or substantial portions >> + * of the Software. >> + * >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS >> + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF >> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. >> + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR >> + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, >> + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE >> + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. >> + * >> + * Authors: James Zhu <james.zhu<@amd.com> >> + * >> + **************************************************************************/ >> + >> +#ifndef vl_compositor_cs_h >> +#define vl_compositor_cs_h >> + >> +#include "vl_compositor.h" >> + >> +char *compute_shader_video_buffer; >> +char *compute_shader_weave; >> +char *compute_shader_sub_pic; >> + >> +/** >> + * create compute shader >> + */ >> +void * >> +vl_compositor_cs_create_shader(struct vl_compositor *c, >> + const char *compute_shader_text); >> + >> +/** >> + * render the layers to the frontbuffer with compute shader >> + */ >> +void >> +vl_compositor_cs_render(struct vl_compositor_state *s, >> + struct vl_compositor *c, >> + struct pipe_surface *dst_surface, >> + struct u_rect *dirty_area, >> + bool clear_dirty); >> + >> +#endif /* vl_compositor_cs_h */ _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev