There are a few differenes between Mali T860 (Panfrost's primary reference target) and the older Midgard generations (T600/T700):
- Miscellaneous different magic numbers. It's not clear what these numbers mean on either the old or new configurations yet. - Errata fixes. T800 is the final Midgard generation and presumably the least buggy. Older Midgard has some extra hardware errata we have to workaround. - SFBD vs MFBD split. Essentially, older Midgard use a Single FrameBuffer Descriptor (SFBD), which corresponds to single render-target rendering. Newer Midgard (T760+) use a Multiple FrameBuffer Descriptor (MFBD), allowing multiple RTs. On ES 2.0, these descriptors serve the same function, but we implement both, depending on the version of the hardware. - CPU bitness. 32-bit systems generally use 32-bit GPU descriptors, and vice versa for 64-bit. Our target T760 systems are 32-bit whereas our target T860 systems are 64-bit. More work is needed in this area. This patch fixes support in these areas for supporting older Midgard hardware. It is tested on Mali T760 and Mali T860. Signed-off-by: Alyssa Rosenzweig <aly...@rosenzweig.io> --- .../drivers/panfrost/include/panfrost-job.h | 21 +- src/gallium/drivers/panfrost/meson.build | 1 + src/gallium/drivers/panfrost/pan_assemble.c | 4 +- src/gallium/drivers/panfrost/pan_blending.c | 4 +- src/gallium/drivers/panfrost/pan_context.c | 541 ++++++++++-------- src/gallium/drivers/panfrost/pan_context.h | 31 +- 6 files changed, 340 insertions(+), 262 deletions(-) diff --git a/src/gallium/drivers/panfrost/include/panfrost-job.h b/src/gallium/drivers/panfrost/include/panfrost-job.h index dbb5486bfa4..3b51fcfddef 100644 --- a/src/gallium/drivers/panfrost/include/panfrost-job.h +++ b/src/gallium/drivers/panfrost/include/panfrost-job.h @@ -30,8 +30,6 @@ #include <stdint.h> #include <panfrost-misc.h> -#define T8XX - #define MALI_SHORT_PTR_BITS (sizeof(uintptr_t)*8) #define MALI_FBD_HIERARCHY_WEIGHTS 8 @@ -120,7 +118,7 @@ enum mali_alt_func { #define MALI_HAS_MSAA (1 << 0) #define MALI_CAN_DISCARD (1 << 5) -/* Applies on T6XX, specifying that programmable blending is in use */ +/* Applies on SFBD systems, specifying that programmable blending is in use */ #define MALI_HAS_BLEND_SHADER (1 << 6) /* func is mali_func */ @@ -404,7 +402,7 @@ enum mali_format { #define MALI_NO_ALPHA_TO_COVERAGE (1 << 10) struct mali_blend_meta { -#ifdef T8XX +#ifndef BIFROST /* Base value of 0x200. * OR with 0x1 for blending (anything other than REPLACE). * OR with 0x2 for programmable blending @@ -995,7 +993,7 @@ struct mali_vertex_tiler_postfix { mali_ptr framebuffer; #ifdef __LP64__ -#ifndef T8XX +#ifdef BIFROST /* most likely padding to make this a multiple of 64 bytes */ u64 zero7; #endif @@ -1003,29 +1001,26 @@ struct mali_vertex_tiler_postfix { } __attribute__((packed)); struct midgard_payload_vertex_tiler { -#ifdef T6XX +#ifndef __LP64__ union midgard_primitive_size primitive_size; #endif struct mali_vertex_tiler_prefix prefix; -#ifdef T6XX +#ifndef __LP64__ u32 zero3; #endif + u32 gl_enables; // 0x5 /* Offset for first vertex in buffer */ u32 draw_start; -#ifdef T6XX - u32 zero5; -#else - u64 zero5; -#endif + uintptr_t zero5; struct mali_vertex_tiler_postfix postfix; -#ifdef T8XX +#ifdef __LP64__ union midgard_primitive_size primitive_size; #endif } __attribute__((packed)); diff --git a/src/gallium/drivers/panfrost/meson.build b/src/gallium/drivers/panfrost/meson.build index 5e799eae119..79c1639a3e1 100644 --- a/src/gallium/drivers/panfrost/meson.build +++ b/src/gallium/drivers/panfrost/meson.build @@ -63,6 +63,7 @@ nondrm_overlay_check = run_command('ls', overlay) has_nondrm_overlay = nondrm_overlay_check.returncode() == 0 if has_nondrm_overlay + subdir('nondrm/include') files_panfrost += files('nondrm/pan_nondrm.c') inc_panfrost += include_directories('nondrm/include') compile_args_panfrost += '-DPAN_NONDRM_OVERLAY' diff --git a/src/gallium/drivers/panfrost/pan_assemble.c b/src/gallium/drivers/panfrost/pan_assemble.c index 44136acc18a..4cbbecce0f7 100644 --- a/src/gallium/drivers/panfrost/pan_assemble.c +++ b/src/gallium/drivers/panfrost/pan_assemble.c @@ -96,8 +96,8 @@ panfrost_shader_compile(struct panfrost_context *ctx, struct mali_shader_meta *m if (type == JOB_TYPE_VERTEX) meta->varying_count += 1; - /* gl_FragCoord does -not- eat an extra spot; it will be included in our count if we need it */ - + /* Note: gl_FragCoord does -not- eat an extra spot; it will be included + * in our count if we need it */ meta->midgard1.unknown2 = 8; /* XXX */ diff --git a/src/gallium/drivers/panfrost/pan_blending.c b/src/gallium/drivers/panfrost/pan_blending.c index 058fb6bda84..cecdd780ce1 100644 --- a/src/gallium/drivers/panfrost/pan_blending.c +++ b/src/gallium/drivers/panfrost/pan_blending.c @@ -296,7 +296,7 @@ panfrost_make_fixed_blend_part(unsigned func, unsigned src_factor, unsigned dst_ * fixed-function operation breaks down. */ static bool -panfrost_make_constant(unsigned *factors, unsigned num_factors, const struct pipe_blend_color *blend_color, float *out) +panfrost_make_constant(unsigned *factors, unsigned num_factors, const struct pipe_blend_color *blend_color, void *out) { /* Color components used */ bool cc[4] = { false }; @@ -335,7 +335,7 @@ panfrost_make_constant(unsigned *factors, unsigned num_factors, const struct pip /* We have the constant -- success! */ - *out = constant; + memcpy(out, &constant, sizeof(float)); return true; } diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index 565e6541b6c..44c0ea3e8f5 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -58,22 +58,35 @@ static int performance_counter_number = 0; /* TODO: Sample size, etc */ +/* True for t6XX, false for t8xx. TODO: Run-time settable for automatic + * hardware configuration. */ + +static bool is_t6xx = false; + +/* If set, we'll require the use of single render-target framebuffer + * descriptors (SFBD), for older hardware -- specifically, <T760 hardware, If + * false, we'll use the MFBD no matter what. New hardware -does- retain support + * for SFBD, and in theory we could flip between them on a per-RT basis, but + * there's no real advantage to doing so */ + +static bool require_sfbd = false; + static void panfrost_set_framebuffer_msaa(struct panfrost_context *ctx, bool enabled) { SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_HAS_MSAA, enabled); SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_NO_MSAA, !enabled); -#ifdef SFBD - SET_BIT(ctx->fragment_fbd.format, MALI_FRAMEBUFFER_MSAA_A | MALI_FRAMEBUFFER_MSAA_B, enabled); -#else - SET_BIT(ctx->fragment_rts[0].format, MALI_MFBD_FORMAT_MSAA, enabled); + if (require_sfbd) { + SET_BIT(ctx->fragment_sfbd.format, MALI_FRAMEBUFFER_MSAA_A | MALI_FRAMEBUFFER_MSAA_B, enabled); + } else { + SET_BIT(ctx->fragment_rts[0].format, MALI_MFBD_FORMAT_MSAA, enabled); - SET_BIT(ctx->fragment_fbd.unk1, (1 << 4) | (1 << 1), enabled); + SET_BIT(ctx->fragment_mfbd.unk1, (1 << 4) | (1 << 1), enabled); - /* XXX */ - ctx->fragment_fbd.rt_count_2 = enabled ? 4 : 1; -#endif + /* XXX */ + ctx->fragment_mfbd.rt_count_2 = enabled ? 4 : 1; + } } /* AFBC is enabled on a per-resource basis (AFBC enabling is theoretically @@ -85,7 +98,11 @@ panfrost_set_framebuffer_msaa(struct panfrost_context *ctx, bool enabled) static void panfrost_enable_afbc(struct panfrost_context *ctx, struct panfrost_resource *rsrc, bool ds) { -#ifdef MFBD + if (require_sfbd) { + printf("AFBC not supported yet on SFBD\n"); + assert(0); + } + struct pipe_context *gallium = (struct pipe_context *) ctx; struct panfrost_screen *screen = pan_screen(gallium->screen); /* AFBC metadata is 16 bytes per tile */ @@ -109,10 +126,6 @@ panfrost_enable_afbc(struct panfrost_context *ctx, struct panfrost_resource *rsr rsrc->bo->gpu[0] = rsrc->bo->afbc_slab.gpu | (ds ? 0 : 1); rsrc->bo->cpu[0] = rsrc->bo->afbc_slab.cpu; -#else - printf("AFBC not supported yet on SFBD\n"); - assert(0); -#endif } static void @@ -144,6 +157,11 @@ panfrost_set_fragment_afbc(struct panfrost_context *ctx) if (!rsrc->bo->has_afbc) continue; + if (require_sfbd) { + fprintf(stderr, "Color AFBC not supported on SFBD\n"); + assert(0); + } + /* Enable AFBC for the render target */ ctx->fragment_rts[0].afbc.metadata = rsrc->bo->afbc_slab.gpu; ctx->fragment_rts[0].afbc.stride = 0; @@ -163,7 +181,12 @@ panfrost_set_fragment_afbc(struct panfrost_context *ctx) struct panfrost_resource *rsrc = (struct panfrost_resource *) ctx->pipe_framebuffer.zsbuf->texture; if (rsrc->bo->has_afbc) { - ctx->fragment_fbd.unk3 |= MALI_MFBD_EXTRA; + if (require_sfbd) { + fprintf(stderr, "Depth AFBC not supported on SFBD\n"); + assert(0); + } + + ctx->fragment_mfbd.unk3 |= MALI_MFBD_EXTRA; ctx->fragment_extra.ds_afbc.depth_stencil_afbc_metadata = rsrc->bo->afbc_slab.gpu; ctx->fragment_extra.ds_afbc.depth_stencil_afbc_stride = 0; @@ -175,13 +198,18 @@ panfrost_set_fragment_afbc(struct panfrost_context *ctx) ctx->fragment_extra.unk = 0x435; /* General 0x400 in all unks. 0x5 for depth/stencil. 0x10 for AFBC encoded depth stencil. Unclear where the 0x20 is from */ - ctx->fragment_fbd.unk3 |= 0x400; + ctx->fragment_mfbd.unk3 |= 0x400; } } /* For the special case of a depth-only FBO, we need to attach a dummy render target */ if (ctx->pipe_framebuffer.nr_cbufs == 0) { + if (require_sfbd) { + fprintf(stderr, "Depth-only FBO not supported on SFBD\n"); + assert(0); + } + ctx->fragment_rts[0].format = 0x80008000; ctx->fragment_rts[0].framebuffer = 0; ctx->fragment_rts[0].framebuffer_stride = 0; @@ -190,7 +218,6 @@ panfrost_set_fragment_afbc(struct panfrost_context *ctx) /* Framebuffer descriptor */ -#ifdef SFBD static void panfrost_set_framebuffer_resolution(struct mali_single_framebuffer *fb, int w, int h) { @@ -204,26 +231,30 @@ panfrost_set_framebuffer_resolution(struct mali_single_framebuffer *fb, int w, i fb->resolution_check = ((w + h) / 3) << 4; } -#endif -static PANFROST_FRAMEBUFFER -panfrost_emit_fbd(struct panfrost_context *ctx) +static struct mali_single_framebuffer +panfrost_emit_sfbd(struct panfrost_context *ctx) { -#ifdef SFBD struct mali_single_framebuffer framebuffer = { .unknown2 = 0x1f, .format = 0x30000000, .clear_flags = 0x1000, .unknown_address_0 = ctx->scratchpad.gpu, - .unknown_address_1 = ctx->scratchpad.gpu + 0x6000, - .unknown_address_2 = ctx->scratchpad.gpu + 0x6200, + .unknown_address_1 = ctx->misc_0.gpu, + .unknown_address_2 = ctx->misc_0.gpu + 40960, .tiler_flags = 0xf0, .tiler_heap_free = ctx->tiler_heap.gpu, .tiler_heap_end = ctx->tiler_heap.gpu + ctx->tiler_heap.size, }; panfrost_set_framebuffer_resolution(&framebuffer, ctx->pipe_framebuffer.width, ctx->pipe_framebuffer.height); -#else + + return framebuffer; +} + +static struct bifrost_framebuffer +panfrost_emit_mfbd(struct panfrost_context *ctx) +{ struct bifrost_framebuffer framebuffer = { .tiler_meta = 0xf00000c600, @@ -249,8 +280,6 @@ panfrost_emit_fbd(struct panfrost_context *ctx) .tiler_heap_end = ctx->tiler_heap.gpu + ctx->tiler_heap.size, }; -#endif - return framebuffer; } @@ -297,33 +326,33 @@ panfrost_new_frag_framebuffer(struct panfrost_context *ctx) stride = -stride; } -#ifdef SFBD - struct mali_single_framebuffer fb = panfrost_emit_fbd(ctx); - - fb.framebuffer = framebuffer; - fb.stride = stride; + if (require_sfbd) { + struct mali_single_framebuffer fb = panfrost_emit_sfbd(ctx); - fb.format = 0xb84e0281; /* RGB32, no MSAA */ -#else - struct bifrost_framebuffer fb = panfrost_emit_fbd(ctx); + fb.framebuffer = framebuffer; + fb.stride = stride; - /* XXX: MRT case */ - fb.rt_count_2 = 1; - fb.unk3 = 0x100; + fb.format = 0xb84e0281; /* RGB32, no MSAA */ + memcpy(&ctx->fragment_sfbd, &fb, sizeof(fb)); + } else { + struct bifrost_framebuffer fb = panfrost_emit_mfbd(ctx); - struct bifrost_render_target rt = { - .unk1 = 0x4000000, - .format = 0x860a8899, /* RGBA32, no MSAA */ - .framebuffer = framebuffer, - .framebuffer_stride = (stride / 16) & 0xfffffff, - }; + /* XXX: MRT case */ + fb.rt_count_2 = 1; + fb.unk3 = 0x100; - memcpy(&ctx->fragment_rts[0], &rt, sizeof(rt)); + struct bifrost_render_target rt = { + .unk1 = 0x4000000, + .format = 0x860a8899, /* RGBA32, no MSAA */ + .framebuffer = framebuffer, + .framebuffer_stride = (stride / 16) & 0xfffffff, + }; - memset(&ctx->fragment_extra, 0, sizeof(ctx->fragment_extra)); -#endif + memcpy(&ctx->fragment_rts[0], &rt, sizeof(rt)); - memcpy(&ctx->fragment_fbd, &fb, sizeof(fb)); + memset(&ctx->fragment_extra, 0, sizeof(ctx->fragment_extra)); + memcpy(&ctx->fragment_mfbd, &fb, sizeof(fb)); + } } /* Maps float 0.0-1.0 to int 0x00-0xFF */ @@ -334,58 +363,77 @@ normalised_float_to_u8(float f) } static void -panfrost_clear( - struct pipe_context *pipe, - unsigned buffers, - const union pipe_color_union *color, - double depth, unsigned stencil) +panfrost_clear_sfbd(struct panfrost_context *ctx, + bool clear_color, + bool clear_depth, + bool clear_stencil, + uint32_t packed_color, + double depth, unsigned stencil + ) { - struct panfrost_context *ctx = pan_context(pipe); + struct mali_single_framebuffer *sfbd = &ctx->fragment_sfbd; - if (!color) { - printf("Warning: clear color null?\n"); - return; + if (clear_color) { + sfbd->clear_color_1 = packed_color; + sfbd->clear_color_2 = packed_color; + sfbd->clear_color_3 = packed_color; + sfbd->clear_color_4 = packed_color; } - /* Save settings for FBO switch */ - ctx->last_clear.buffers = buffers; - ctx->last_clear.color = color; - ctx->last_clear.depth = depth; - ctx->last_clear.depth = depth; + if (clear_depth) { + sfbd->clear_depth_1 = depth; + sfbd->clear_depth_2 = depth; + sfbd->clear_depth_3 = depth; + sfbd->clear_depth_4 = depth; + } - bool clear_color = buffers & PIPE_CLEAR_COLOR; - bool clear_depth = buffers & PIPE_CLEAR_DEPTH; - bool clear_stencil = buffers & PIPE_CLEAR_STENCIL; + if (clear_stencil) { + sfbd->clear_stencil = stencil; + } - /* Remember that we've done something */ - ctx->frame_cleared = true; + /* Setup buffers */ - /* Alpha clear only meaningful without alpha channel */ - bool has_alpha = ctx->pipe_framebuffer.nr_cbufs && util_format_has_alpha(ctx->pipe_framebuffer.cbufs[0]->format); - float clear_alpha = has_alpha ? color->f[3] : 1.0f; + if (clear_depth) { + sfbd->depth_buffer = ctx->depth_stencil_buffer.gpu; + sfbd->depth_buffer_enable = MALI_DEPTH_STENCIL_ENABLE; + } - uint32_t packed_color = - (normalised_float_to_u8(clear_alpha) << 24) | - (normalised_float_to_u8(color->f[2]) << 16) | - (normalised_float_to_u8(color->f[1]) << 8) | - (normalised_float_to_u8(color->f[0]) << 0); + if (clear_stencil) { + sfbd->stencil_buffer = ctx->depth_stencil_buffer.gpu; + sfbd->stencil_buffer_enable = MALI_DEPTH_STENCIL_ENABLE; + } -#ifdef MFBD - struct bifrost_render_target *buffer_color = &ctx->fragment_rts[0]; -#else - struct mali_single_framebuffer *buffer_color = &ctx->fragment_fbd; -#endif + /* Set flags based on what has been cleared, for the SFBD case */ + /* XXX: What do these flags mean? */ + int clear_flags = 0x101100; -#ifdef MFBD - struct bifrost_framebuffer *buffer_ds = &ctx->fragment_fbd; -#else - struct mali_single_framebuffer *buffer_ds = buffer_color; -#endif + if (clear_color && clear_depth && clear_stencil) { + /* On a tiler like this, it's fastest to clear all three buffers at once */ - if (clear_color) { - /* Fields duplicated 4x for unknown reasons. Same in Utgard, - * too, which is doubly weird. */ + clear_flags |= MALI_CLEAR_FAST; + } else { + clear_flags |= MALI_CLEAR_SLOW; + + if (clear_stencil) + clear_flags |= MALI_CLEAR_SLOW_STENCIL; + } + + sfbd->clear_flags = clear_flags; +} +static void +panfrost_clear_mfbd(struct panfrost_context *ctx, + bool clear_color, + bool clear_depth, + bool clear_stencil, + uint32_t packed_color, + double depth, unsigned stencil + ) +{ + struct bifrost_render_target *buffer_color = &ctx->fragment_rts[0]; + struct bifrost_framebuffer *buffer_ds = &ctx->fragment_mfbd; + + if (clear_color) { buffer_color->clear_color_1 = packed_color; buffer_color->clear_color_2 = packed_color; buffer_color->clear_color_3 = packed_color; @@ -393,72 +441,71 @@ panfrost_clear( } if (clear_depth) { -#ifdef SFBD - buffer_ds->clear_depth_1 = depth; - buffer_ds->clear_depth_2 = depth; - buffer_ds->clear_depth_3 = depth; - buffer_ds->clear_depth_4 = depth; -#else buffer_ds->clear_depth = depth; -#endif } if (clear_stencil) { buffer_ds->clear_stencil = stencil; } - /* Setup buffers depending on MFBD/SFBD */ - -#ifdef MFBD - if (clear_depth || clear_stencil) { /* Setup combined 24/8 depth/stencil */ - ctx->fragment_fbd.unk3 |= MALI_MFBD_EXTRA; + ctx->fragment_mfbd.unk3 |= MALI_MFBD_EXTRA; //ctx->fragment_extra.unk = /*0x405*/0x404; ctx->fragment_extra.unk = 0x405; ctx->fragment_extra.ds_linear.depth = ctx->depth_stencil_buffer.gpu; ctx->fragment_extra.ds_linear.depth_stride = ctx->pipe_framebuffer.width * 4; } +} -#else +static void +panfrost_clear( + struct pipe_context *pipe, + unsigned buffers, + const union pipe_color_union *color, + double depth, unsigned stencil) +{ + struct panfrost_context *ctx = pan_context(pipe); - if (clear_depth) { - buffer_ds->depth_buffer = ctx->depth_stencil_buffer.gpu; - buffer_ds->depth_buffer_enable = MALI_DEPTH_STENCIL_ENABLE; + if (!color) { + printf("Warning: clear color null?\n"); + return; } - if (clear_stencil) { - buffer_ds->stencil_buffer = ctx->depth_stencil_buffer.gpu; - buffer_ds->stencil_buffer_enable = MALI_DEPTH_STENCIL_ENABLE; - } + /* Save settings for FBO switch */ + ctx->last_clear.buffers = buffers; + ctx->last_clear.color = color; + ctx->last_clear.depth = depth; + ctx->last_clear.depth = depth; -#endif + bool clear_color = buffers & PIPE_CLEAR_COLOR; + bool clear_depth = buffers & PIPE_CLEAR_DEPTH; + bool clear_stencil = buffers & PIPE_CLEAR_STENCIL; -#ifdef SFBD - /* Set flags based on what has been cleared, for the SFBD case */ - /* XXX: What do these flags mean? */ - int clear_flags = 0x101100; + /* Remember that we've done something */ + ctx->frame_cleared = true; - if (clear_color && clear_depth && clear_stencil) { - /* On a tiler like this, it's fastest to clear all three buffers at once */ + /* Alpha clear only meaningful without alpha channel */ + bool has_alpha = ctx->pipe_framebuffer.nr_cbufs && util_format_has_alpha(ctx->pipe_framebuffer.cbufs[0]->format); + float clear_alpha = has_alpha ? color->f[3] : 1.0f; - clear_flags |= MALI_CLEAR_FAST; - } else { - clear_flags |= MALI_CLEAR_SLOW; + uint32_t packed_color = + (normalised_float_to_u8(clear_alpha) << 24) | + (normalised_float_to_u8(color->f[2]) << 16) | + (normalised_float_to_u8(color->f[1]) << 8) | + (normalised_float_to_u8(color->f[0]) << 0); - if (clear_stencil) - clear_flags |= MALI_CLEAR_SLOW_STENCIL; + if (require_sfbd) { + panfrost_clear_sfbd(ctx, clear_color, clear_depth, clear_stencil, packed_color, depth, stencil); + } else { + panfrost_clear_mfbd(ctx, clear_color, clear_depth, clear_stencil, packed_color, depth, stencil); } - - fbd->clear_flags = clear_flags; -#endif } -static void -panfrost_attach_vt_framebuffer(struct panfrost_context *ctx) +static mali_ptr +panfrost_attach_vt_mfbd(struct panfrost_context *ctx) { -#ifdef MFBD - /* MFBD needs a sequential semi-render target upload, but this is, is beyond me for now */ + /* MFBD needs a sequential semi-render target upload, but what exactly this is, is beyond me for now */ struct bifrost_render_target rts_list[] = { { .chunknown = { @@ -470,18 +517,31 @@ panfrost_attach_vt_framebuffer(struct panfrost_context *ctx) }; /* Allocate memory for the three components */ - int size = 1024 + sizeof(ctx->vt_framebuffer) + sizeof(rts_list); + int size = 1024 + sizeof(ctx->vt_framebuffer_mfbd) + sizeof(rts_list); struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, size); /* Opaque 1024-block */ rts_list[0].chunknown.pointer = transfer.gpu; - mali_ptr framebuffer = (transfer.gpu + 1024) | PANFROST_DEFAULT_FBD; - memcpy(transfer.cpu + 1024, &ctx->vt_framebuffer, sizeof(ctx->vt_framebuffer)); - memcpy(transfer.cpu + 1024 + sizeof(ctx->vt_framebuffer), rts_list, sizeof(rts_list)); -#else - mali_ptr framebuffer = panfrost_upload_transient(ctx, &ctx->vt_framebuffer, sizeof(ctx->vt_framebuffer)) | PANFROST_DEFAULT_FBD; -#endif + memcpy(transfer.cpu + 1024, &ctx->vt_framebuffer_mfbd, sizeof(ctx->vt_framebuffer_mfbd)); + memcpy(transfer.cpu + 1024 + sizeof(ctx->vt_framebuffer_mfbd), rts_list, sizeof(rts_list)); + + return (transfer.gpu + 1024) | MALI_MFBD; +} + +static mali_ptr +panfrost_attach_vt_sfbd(struct panfrost_context *ctx) +{ + return panfrost_upload_transient(ctx, &ctx->vt_framebuffer_sfbd, sizeof(ctx->vt_framebuffer_sfbd)) | MALI_SFBD; +} + +static void +panfrost_attach_vt_framebuffer(struct panfrost_context *ctx) +{ + mali_ptr framebuffer = require_sfbd ? + panfrost_attach_vt_sfbd(ctx) : + panfrost_attach_vt_mfbd(ctx); + ctx->payload_vertex.postfix.framebuffer = framebuffer; ctx->payload_tiler.postfix.framebuffer = framebuffer; } @@ -528,7 +588,11 @@ panfrost_invalidate_frame(struct panfrost_context *ctx) if ((++ctx->cmdstream_i) == (sizeof(ctx->transient_pools) / sizeof(ctx->transient_pools[0]))) ctx->cmdstream_i = 0; - ctx->vt_framebuffer = panfrost_emit_fbd(ctx); + if (require_sfbd) + ctx->vt_framebuffer_sfbd = panfrost_emit_sfbd(ctx); + else + ctx->vt_framebuffer_mfbd = panfrost_emit_mfbd(ctx); + panfrost_new_frag_framebuffer(ctx); /* Reset varyings allocated */ @@ -563,7 +627,7 @@ panfrost_emit_vertex_payload(struct panfrost_context *ctx) .workgroups_x_shift_2 = 0x2, .workgroups_x_shift_3 = 0x5, }, - .gl_enables = 0x6 + .gl_enables = 0x4 | (is_t6xx ? 0 : 0x2), }; memcpy(&ctx->payload_vertex, &payload, sizeof(payload)); @@ -751,14 +815,14 @@ panfrost_default_shader_backend(struct panfrost_context *ctx) struct mali_shader_meta shader = { .alpha_coverage = ~MALI_ALPHA_COVERAGE(0.000000), - .unknown2_3 = MALI_DEPTH_FUNC(MALI_FUNC_ALWAYS) | 0x3010 /*| MALI_CAN_DISCARD*/, -#ifdef T8XX + .unknown2_3 = MALI_DEPTH_FUNC(MALI_FUNC_ALWAYS) | 0x3010, .unknown2_4 = MALI_NO_MSAA | 0x4e0, -#else - .unknown2_4 = MALI_NO_MSAA | 0x4f0, -#endif }; + if (is_t6xx) { + shader.unknown2_4 |= 0x10; + } + struct pipe_stencil_state default_stencil = { .enabled = 0, .func = PIPE_FUNC_ALWAYS, @@ -801,14 +865,6 @@ panfrost_vertex_tiler_job(struct panfrost_context *ctx, bool is_tiler, bool is_e #endif }; - /* XXX: What is this? */ -#ifdef T6XX - - if (is_tiler) - job.unknown_flags = ctx->draw_count ? 64 : 1; - -#endif - /* Only non-elided tiler jobs have dependencies which are known at this point */ if (is_tiler && !is_elided_tiler) { @@ -873,12 +929,16 @@ panfrost_fragment_job(struct panfrost_context *ctx) if (ctx->pipe_framebuffer.nr_cbufs == 1) { struct panfrost_resource *rsrc = (struct panfrost_resource *) ctx->pipe_framebuffer.cbufs[0]->texture; - int stride = util_format_get_stride(rsrc->base.format, rsrc->base.width0); if (rsrc->bo->has_checksum) { - //ctx->fragment_fbd.unk3 |= 0xa00000; - //ctx->fragment_fbd.unk3 = 0xa02100; - ctx->fragment_fbd.unk3 |= MALI_MFBD_EXTRA; + if (require_sfbd) { + fprintf(stderr, "Checksumming not supported on SFBD\n"); + assert(0); + } + + int stride = util_format_get_stride(rsrc->base.format, rsrc->base.width0); + + ctx->fragment_mfbd.unk3 |= MALI_MFBD_EXTRA; ctx->fragment_extra.unk |= 0x420; ctx->fragment_extra.checksum_stride = rsrc->bo->checksum_stride; ctx->fragment_extra.checksum = rsrc->bo->gpu[0] + stride * rsrc->base.height0; @@ -888,22 +948,29 @@ panfrost_fragment_job(struct panfrost_context *ctx) /* The frame is complete and therefore the framebuffer descriptor is * ready for linkage and upload */ - size_t sz = sizeof(ctx->fragment_fbd) + sizeof(struct bifrost_fb_extra) + sizeof(struct bifrost_render_target) * 1; + size_t sz = require_sfbd ? sizeof(struct mali_single_framebuffer) : (sizeof(struct bifrost_framebuffer) + sizeof(struct bifrost_fb_extra) + sizeof(struct bifrost_render_target) * 1); struct panfrost_transfer fbd_t = panfrost_allocate_transient(ctx, sz); off_t offset = 0; - memcpy(fbd_t.cpu, &ctx->fragment_fbd, sizeof(ctx->fragment_fbd)); - offset += sizeof(ctx->fragment_fbd); + if (require_sfbd) { + /* Upload just the SFBD all at once */ + memcpy(fbd_t.cpu, &ctx->fragment_sfbd, sizeof(ctx->fragment_sfbd)); + offset += sizeof(ctx->fragment_sfbd); + } else { + /* Upload the MFBD header */ + memcpy(fbd_t.cpu, &ctx->fragment_mfbd, sizeof(ctx->fragment_mfbd)); + offset += sizeof(ctx->fragment_mfbd); + + /* Upload extra framebuffer info if necessary */ + if (ctx->fragment_mfbd.unk3 & MALI_MFBD_EXTRA) { + memcpy(fbd_t.cpu + offset, &ctx->fragment_extra, sizeof(struct bifrost_fb_extra)); + offset += sizeof(struct bifrost_fb_extra); + } - /* Upload extra framebuffer info if necessary */ - if (ctx->fragment_fbd.unk3 & MALI_MFBD_EXTRA) { - memcpy(fbd_t.cpu + offset, &ctx->fragment_extra, sizeof(struct bifrost_fb_extra)); - offset += sizeof(struct bifrost_fb_extra); + /* Upload (single) render target */ + memcpy(fbd_t.cpu + offset, &ctx->fragment_rts[0], sizeof(struct bifrost_render_target) * 1); } - /* Upload (single) render target */ - memcpy(fbd_t.cpu + offset, &ctx->fragment_rts[0], sizeof(struct bifrost_render_target) * 1); - /* Generate the fragment (frame) job */ struct mali_job_descriptor_header header = { @@ -917,9 +984,16 @@ panfrost_fragment_job(struct panfrost_context *ctx) struct mali_payload_fragment payload = { .min_tile_coord = MALI_COORDINATE_TO_TILE_MIN(0, 0), .max_tile_coord = MALI_COORDINATE_TO_TILE_MAX(ctx->pipe_framebuffer.width, ctx->pipe_framebuffer.height), - .framebuffer = fbd_t.gpu | PANFROST_DEFAULT_FBD | (ctx->fragment_fbd.unk3 & MALI_MFBD_EXTRA ? 2 : 0), + .framebuffer = fbd_t.gpu | (require_sfbd ? MALI_SFBD : MALI_MFBD), }; + if (!require_sfbd && ctx->fragment_mfbd.unk3 & MALI_MFBD_EXTRA) { + /* Signal that there is an extra portion of the framebuffer + * descriptor */ + + payload.framebuffer |= 2; + } + /* Normally, there should be no padding. However, fragment jobs are * shared with 64-bit Bifrost systems, and accordingly there is 4-bytes * of zero padding in between. */ @@ -1114,10 +1188,10 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) ctx->fragment_shader_core.stencil_back.ref = ctx->stencil_ref.ref_value[1]; /* CAN_DISCARD should be set if the fragment shader possibly - * contains a 'discard' instruction, or maybe other - * circumstances. It is likely this is related to optimizations - * related to forward-pixel kill, as per "Mali Performance 3: - * Is EGL_BUFFER_PRESERVED a good thing?" by Peter Harris + * contains a 'discard' instruction. It is likely this is + * related to optimizations related to forward-pixel kill, as + * per "Mali Performance 3: Is EGL_BUFFER_PRESERVED a good + * thing?" by Peter Harris */ if (variant->can_discard) { @@ -1127,8 +1201,30 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) ctx->fragment_shader_core.midgard1.unknown1 = 0x4200; } - if (ctx->blend->has_blend_shader) - ctx->fragment_shader_core.blend_shader = ctx->blend->blend_shader; + /* Check if we're using the default blend descriptor (fast path) */ + + bool no_blending = + !ctx->blend->has_blend_shader && + (ctx->blend->equation.rgb_mode == 0x122) && + (ctx->blend->equation.alpha_mode == 0x122) && + (ctx->blend->equation.color_mask == 0xf); + + if (require_sfbd) { + /* When only a single render target platform is used, the blend + * information is inside the shader meta itself. We + * additionally need to signal CAN_DISCARD for nontrivial blend + * modes (so we're able to read back the destination buffer) */ + + if (ctx->blend->has_blend_shader) { + ctx->fragment_shader_core.blend_shader = ctx->blend->blend_shader; + } else { + memcpy(&ctx->fragment_shader_core.blend_equation, &ctx->blend->equation, sizeof(ctx->blend->equation)); + } + + if (!no_blending) { + ctx->fragment_shader_core.unknown2_3 |= MALI_CAN_DISCARD; + } + } size_t size = sizeof(struct mali_shader_meta) + sizeof(struct mali_blend_meta); struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, size); @@ -1136,51 +1232,46 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) ctx->payload_tiler.postfix._shader_upper = (transfer.gpu) >> 4; -#ifdef T8XX - /* Additional blend descriptor tacked on for newer systems */ + if (!require_sfbd) { + /* Additional blend descriptor tacked on for jobs using MFBD */ - unsigned blend_count = 0; + unsigned blend_count = 0; - if (ctx->blend->has_blend_shader) { - /* For a blend shader, the bottom nibble corresponds to - * the number of work registers used, which signals the - * -existence- of a blend shader */ + if (ctx->blend->has_blend_shader) { + /* For a blend shader, the bottom nibble corresponds to + * the number of work registers used, which signals the + * -existence- of a blend shader */ - assert(ctx->blend->blend_work_count >= 2); - blend_count |= MIN2(ctx->blend->blend_work_count, 3); - } else { - /* Otherwise, the bottom bit simply specifies if - * blending (anything other than REPLACE) is enabled */ + assert(ctx->blend->blend_work_count >= 2); + blend_count |= MIN2(ctx->blend->blend_work_count, 3); + } else { + /* Otherwise, the bottom bit simply specifies if + * blending (anything other than REPLACE) is enabled */ - /* XXX: Less ugly way to do this? */ - bool no_blending = - (ctx->blend->equation.rgb_mode == 0x122) && - (ctx->blend->equation.alpha_mode == 0x122) && - (ctx->blend->equation.color_mask == 0xf); - if (!no_blending) - blend_count |= 0x1; - } + if (!no_blending) + blend_count |= 0x1; + } - /* Second blend equation is always a simple replace */ + /* Second blend equation is always a simple replace */ - uint64_t replace_magic = 0xf0122122; - struct mali_blend_equation replace_mode; - memcpy(&replace_mode, &replace_magic, sizeof(replace_mode)); + uint64_t replace_magic = 0xf0122122; + struct mali_blend_equation replace_mode; + memcpy(&replace_mode, &replace_magic, sizeof(replace_mode)); - struct mali_blend_meta blend_meta[] = { - { - .unk1 = 0x200 | blend_count, - .blend_equation_1 = ctx->blend->equation, - .blend_equation_2 = replace_mode - }, - }; + struct mali_blend_meta blend_meta[] = { + { + .unk1 = 0x200 | blend_count, + .blend_equation_1 = ctx->blend->equation, + .blend_equation_2 = replace_mode + }, + }; - if (ctx->blend->has_blend_shader) - memcpy(&blend_meta[0].blend_equation_1, &ctx->blend->blend_shader, sizeof(ctx->blend->blend_shader)); + if (ctx->blend->has_blend_shader) + memcpy(&blend_meta[0].blend_equation_1, &ctx->blend->blend_shader, sizeof(ctx->blend->blend_shader)); - memcpy(transfer.cpu + sizeof(struct mali_shader_meta), blend_meta, sizeof(blend_meta)); -#endif + memcpy(transfer.cpu + sizeof(struct mali_shader_meta), blend_meta, sizeof(blend_meta)); + } } if (ctx->dirty & PAN_DIRTY_VERTEX) { @@ -1231,12 +1322,13 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) int s = ctx->sampler_views[t][i]->hw.nr_mipmap_levels; if (!rsrc->bo->is_mipmap) { -#ifdef T6XX - /* HW ERRATA, not needed after T6XX */ - ctx->sampler_views[t][i]->hw.swizzled_bitmaps[1] = rsrc->bo->gpu[0]; + if (is_t6xx) { + /* HW ERRATA, not needed after t6XX */ + ctx->sampler_views[t][i]->hw.swizzled_bitmaps[1] = rsrc->bo->gpu[0]; + + ctx->sampler_views[t][i]->hw.unknown3A = 1; + } - ctx->sampler_views[t][i]->hw.unknown3A = 1; -#endif ctx->sampler_views[t][i]->hw.nr_mipmap_levels = 0; } @@ -1245,9 +1337,9 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) /* Restore */ ctx->sampler_views[t][i]->hw.nr_mipmap_levels = s; -#ifdef T6XX - ctx->sampler_views[t][i]->hw.unknown3A = 0; -#endif + if (is_t6xx) { + ctx->sampler_views[t][i]->hw.unknown3A = 0; + } } mali_ptr trampoline = panfrost_upload_transient(ctx, trampolines, sizeof(uint64_t) * ctx->sampler_view_count[t]); @@ -1391,7 +1483,7 @@ panfrost_link_jobs(struct panfrost_context *ctx) for (int i = 0; i < ctx->vertex_job_count; ++i) { bool isLast = (i + 1) == ctx->vertex_job_count; - panfrost_link_job_pair(ctx->u_vertex_jobs[i], isLast ? ctx->tiler_jobs[0]: ctx->vertex_jobs[i + 1]); + panfrost_link_job_pair(ctx->u_vertex_jobs[i], isLast ? ctx->tiler_jobs[0] : ctx->vertex_jobs[i + 1]); } /* T -> T/null */ @@ -1715,11 +1807,7 @@ panfrost_create_rasterizer_state( so->base = *cso; /* Bitmask, unknown meaning of the start value */ -#ifdef T8XX - so->tiler_gl_enables = 0x7; -#else - so->tiler_gl_enables = 0x105; -#endif + so->tiler_gl_enables = is_t6xx ? 0x105 : 0x7; so->tiler_gl_enables |= MALI_FRONT_FACE( cso->front_ccw ? MALI_CCW : MALI_CW); @@ -2198,8 +2286,8 @@ panfrost_set_framebuffer_state(struct pipe_context *pctx, ctx->pipe_framebuffer.nr_cbufs = fb->nr_cbufs; ctx->pipe_framebuffer.samples = fb->samples; ctx->pipe_framebuffer.layers = fb->layers; - ctx->pipe_framebuffer.width = fb->width; - ctx->pipe_framebuffer.height = fb->height; + ctx->pipe_framebuffer.width = ALIGN(fb->width, 16); + ctx->pipe_framebuffer.height = ALIGN(fb->height, 16); for (int i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { struct pipe_surface *cb = i < fb->nr_cbufs ? fb->cbufs[i] : NULL; @@ -2218,7 +2306,11 @@ panfrost_set_framebuffer_state(struct pipe_context *pctx, if (!cb) continue; - ctx->vt_framebuffer = panfrost_emit_fbd(ctx); + if (require_sfbd) + ctx->vt_framebuffer_sfbd = panfrost_emit_sfbd(ctx); + else + ctx->vt_framebuffer_mfbd = panfrost_emit_mfbd(ctx); + panfrost_attach_vt_framebuffer(ctx); panfrost_new_frag_framebuffer(ctx); panfrost_set_scissor(ctx); @@ -2249,7 +2341,11 @@ panfrost_set_framebuffer_state(struct pipe_context *pctx, if (zb) { /* FBO has depth */ - ctx->vt_framebuffer = panfrost_emit_fbd(ctx); + if (require_sfbd) + ctx->vt_framebuffer_sfbd = panfrost_emit_sfbd(ctx); + else + ctx->vt_framebuffer_mfbd = panfrost_emit_mfbd(ctx); + panfrost_attach_vt_framebuffer(ctx); panfrost_new_frag_framebuffer(ctx); panfrost_set_scissor(ctx); @@ -2670,9 +2766,6 @@ panfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags) /* Prepare for render! */ - /* TODO: XXX */ - ctx->vt_framebuffer = panfrost_emit_fbd(ctx); - panfrost_emit_vertex_payload(ctx); panfrost_emit_tiler_payload(ctx); panfrost_invalidate_frame(ctx); diff --git a/src/gallium/drivers/panfrost/pan_context.h b/src/gallium/drivers/panfrost/pan_context.h index 89f821318e1..48cce72a303 100644 --- a/src/gallium/drivers/panfrost/pan_context.h +++ b/src/gallium/drivers/panfrost/pan_context.h @@ -25,8 +25,6 @@ #ifndef __BUILDER_H__ #define __BUILDER_H__ -#define MFBD - #define _LARGEFILE64_SOURCE 1 #define CACHE_LINE_SIZE 1024 /* TODO */ #include <sys/mman.h> @@ -45,15 +43,6 @@ /* Forward declare to avoid extra header dep */ struct prim_convert_context; -/* TODO: Handle on newer hardware */ -#ifdef MFBD -#define PANFROST_DEFAULT_FBD (MALI_MFBD) -#define PANFROST_FRAMEBUFFER struct bifrost_framebuffer -#else -#define PANFROST_DEFAULT_FBD (MALI_SFBD) -#define PANFROST_FRAMEBUFFER struct mali_single_framebuffer -#endif - #define MAX_DRAW_CALLS 4096 #define MAX_VARYINGS 4096 @@ -140,15 +129,14 @@ struct panfrost_context { * most obvious is the fragment framebuffer descriptor, which carries * e.g. clearing information */ -#ifdef SFBD - struct mali_single_framebuffer fragment_fbd; -#else - struct bifrost_framebuffer fragment_fbd; - - struct bifrost_fb_extra fragment_extra; - - struct bifrost_render_target fragment_rts[4]; -#endif + union { + struct mali_single_framebuffer fragment_sfbd; + struct { + struct bifrost_framebuffer fragment_mfbd; + struct bifrost_fb_extra fragment_extra; + struct bifrost_render_target fragment_rts[4]; + }; + }; /* Each draw has corresponding vertex and tiler payloads */ struct midgard_payload_vertex_tiler payload_vertex; @@ -190,7 +178,8 @@ struct panfrost_context { unsigned varying_height; struct mali_viewport *viewport; - PANFROST_FRAMEBUFFER vt_framebuffer; + struct mali_single_framebuffer vt_framebuffer_sfbd; + struct bifrost_framebuffer vt_framebuffer_mfbd; /* TODO: Multiple uniform buffers (index =/= 0), finer updates? */ -- 2.20.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev