On Wed, 4 Feb 2026, Chad Jablonski wrote:
Implement flushing the 128-bit HOST_DATA accumulator to VRAM to enable
text rendering in X. Currently supports only the monochrome
foreground/background datatype.
The flush is broken up into two steps. First, expansion of the
monochrome bits to the destination color depth. Then the expanded pixels
are sent to the ati_2d_do_blt one scanline at a time. ati_2d_do_blt then
clips and performs the blit.
Signed-off-by: Chad Jablonski <[email protected]>
---
hw/display/ati.c | 4 +--
hw/display/ati_2d.c | 84 +++++++++++++++++++++++++++++++++++++++++++
hw/display/ati_int.h | 3 ++
hw/display/ati_regs.h | 4 +++
4 files changed, 93 insertions(+), 2 deletions(-)
diff --git a/hw/display/ati.c b/hw/display/ati.c
index fd717044b0..514935e510 100644
--- a/hw/display/ati.c
+++ b/hw/display/ati.c
@@ -1045,13 +1045,13 @@ static void ati_mm_write(void *opaque, hwaddr addr,
case HOST_DATA7:
s->host_data.acc[s->host_data.next++] = data;
if (s->host_data.next >= 4) {
- qemu_log_mask(LOG_UNIMP, "HOST_DATA flush not yet implemented\n");
+ ati_flush_host_data(s);
s->host_data.next = 0;
}
break;
case HOST_DATA_LAST:
s->host_data.acc[s->host_data.next] = data;
- qemu_log_mask(LOG_UNIMP, "HOST_DATA flush not yet implemented\n");
+ ati_flush_host_data(s);
ati_host_data_reset(&s->host_data);
break;
default:
diff --git a/hw/display/ati_2d.c b/hw/display/ati_2d.c
index 53f00e07ec..fc18ed4384 100644
--- a/hw/display/ati_2d.c
+++ b/hw/display/ati_2d.c
@@ -282,6 +282,11 @@ static void ati_2d_do_blt(ATI2DCtx *ctx, uint8_t
use_pixman)
void ati_2d_blt(ATIVGAState *s)
{
ATI2DCtx ctx;
+ uint32_t src_source = s->regs.dp_mix & DP_SRC_SOURCE;
+ if (src_source == DP_SRC_HOST || src_source == DP_SRC_HOST_BYTEALIGN) {
+ /* HOST_DATA sources are handled by ati_flush_host_data() */
+ return;
+ }
setup_2d_blt_ctx(s, &ctx);
if (ctx.src.x > 0x3fff || ctx.src.y > 0x3fff
|| ctx.src_bits >= ctx.vram_end || ctx.src_bits + ctx.src.x
@@ -292,3 +297,82 @@ void ati_2d_blt(ATIVGAState *s)
ati_2d_do_blt(&ctx, s->use_pixman);
ati_set_dirty(&s->vga, &ctx);
}
+
+void ati_flush_host_data(ATIVGAState *s)
+{
+ ATI2DCtx ctx, chunk;
+ uint32_t fg = s->regs.dp_src_frgd_clr;
+ uint32_t bg = s->regs.dp_src_bkgd_clr;
+ unsigned bypp, row, col, idx;
+ uint8_t pix_buf[ATI_HOST_DATA_ACC_BITS * sizeof(uint32_t)];
+ uint32_t byte_pix_order = s->regs.dp_datatype & DP_BYTE_PIX_ORDER;
+ uint32_t src_source = s->regs.dp_mix & DP_SRC_SOURCE;
+ uint32_t src_datatype = s->regs.dp_datatype & DP_SRC_DATATYPE;
+
+ if (src_source != DP_SRC_HOST) {
+ qemu_log_mask(LOG_UNIMP, "host_data_blt: only DP_SRC_HOST
supported\n");
+ return;
+ }
Instead of logging what's supported it's more useful to log the value
that's unsupported, e.g. "unsupported src_source %x" because that's what
we want to look at if we see this message not what works that we don't
care about any more now that it's implemented.
+ if (src_datatype != SRC_MONO_FRGD_BKGD) {
+ qemu_log_mask(LOG_UNIMP,
+ "host_data_blt: only SRC_MONO_FRGD_BKGD supported\n");
+ return;
+ }
+
+ setup_2d_blt_ctx(s, &ctx);
+
+ if (!ctx.left_to_right || !ctx.top_to_bottom) {
+ qemu_log_mask(LOG_UNIMP, "host_data_blt: only L->R, T->B supported\n");
+ return;
+ }
+
+ bypp = ctx.bpp / 8;
+
+ /* Expand monochrome bits to color pixels */
+ idx = 0;
+ for (int word = 0; word < 4; word++) {
+ for (int byte = 0; byte < 4; byte++) {
+ uint8_t byte_val = s->host_data.acc[word] >> (byte * 8);
+ for (int i = 0; i < 8; i++) {
+ int bit = byte_pix_order ? i : (7 - i);
+ bool is_fg = extract8(byte_val, bit, 1);
I don't like extract/deposit functions especially in code that should be
fast because they have a useless assert adding overhead. This is just a
single bit mask shifted at each step so can you just write it as that e.g.
byte & BIT(byte_pix_order ? i : 7 - i) or even two separate loops
depending on byte_pix_order to avoid having the if within loop one doing
mask=1 then mask <<= 1 in each step and the other mask=7 and mask >>= 1
+ uint32_t color = is_fg ? fg : bg;
(although we already have this condition here so separate loops may not
save much).
+ stn_he_p(&pix_buf[idx * bypp], bypp, color);
+ idx += 1;
+ }
+ }
+ }
Supporting other src_datatypes is easy. SRC_COLOR is just writing the
value unchanged to the frame buffer and for SRC_MONO_FRGD we could do the
same as SRC_MONO_FRGD_BKGD (we do that in hw/display/sm501.c::803) and
with that MorphOS is actually usable and better than missing text. For
that this color extraction could be moved to separate function and called
from here if needed.
If you don't want to fix these here I can send patches on top later as
this at least works for Xorg.
Please cc the next version to maintainers (Gerd Hoffmann
<[email protected]> and [email protected]) too as they are the
ones who can merge this.
Regards,
BALATON Zoltan
+
+ /* Copy and then modify blit ctx for use in a chunked blit */
+ chunk = ctx;
+ chunk.src_bits = pix_buf;
+ chunk.src.y = 0;
+ chunk.src_stride = ATI_HOST_DATA_ACC_BITS * bypp;
+
+ /* Blit one scanline chunk at a time */
+ row = s->host_data.row;
+ col = s->host_data.col;
+ idx = 0;
+ while (idx < ATI_HOST_DATA_ACC_BITS && row < ctx.dst.height) {
+ unsigned pix_in_scanline = MIN(ATI_HOST_DATA_ACC_BITS - idx,
+ ctx.dst.width - col);
+ chunk.src.x = idx;
+ /* Build a rect for this scanline chunk */
+ chunk.dst.x = ctx.dst.x + col;
+ chunk.dst.y = ctx.dst.y + row;
+ chunk.dst.width = pix_in_scanline;
+ chunk.dst.height = 1;
+ ati_2d_do_blt(&chunk, s->use_pixman);
+ ati_set_dirty(&s->vga, &chunk);
+ idx += pix_in_scanline;
+ col += pix_in_scanline;
+ if (col >= ctx.dst.width) {
+ col = 0;
+ row += 1;
+ }
+ }
+
+ /* Track state of the overall blit for use by the next flush */
+ s->host_data.row = row;
+ s->host_data.col = col;
+}
diff --git a/hw/display/ati_int.h b/hw/display/ati_int.h
index ef474e366e..32d1e628d3 100644
--- a/hw/display/ati_int.h
+++ b/hw/display/ati_int.h
@@ -33,6 +33,8 @@
#define ATI_RAGE128_LINEAR_APER_SIZE (64 * MiB)
#define ATI_R100_LINEAR_APER_SIZE (128 * MiB)
+#define ATI_HOST_DATA_ACC_BITS 128
+
#define TYPE_ATI_VGA "ati-vga"
OBJECT_DECLARE_SIMPLE_TYPE(ATIVGAState, ATI_VGA)
@@ -125,5 +127,6 @@ struct ATIVGAState {
const char *ati_reg_name(int num);
void ati_2d_blt(ATIVGAState *s);
+void ati_flush_host_data(ATIVGAState *s);
#endif /* ATI_INT_H */
diff --git a/hw/display/ati_regs.h b/hw/display/ati_regs.h
index 48f15e9b1d..9d08d3e956 100644
--- a/hw/display/ati_regs.h
+++ b/hw/display/ati_regs.h
@@ -397,7 +397,11 @@
#define DST_32BPP 0x00000006
#define DP_DST_DATATYPE 0x0000000f
#define DP_BRUSH_DATATYPE 0x00000f00
+#define SRC_MONO_FRGD_BKGD 0x00000000
+#define SRC_MONO_FRGD 0x00010000
+#define SRC_COLOR 0x00020000
#define DP_SRC_DATATYPE 0x00030000
+#define DP_BYTE_PIX_ORDER 0x40000000
#define BRUSH_SOLIDCOLOR 0x00000d00