On Fri, 27 Feb 2026, Chad Jablonski wrote:
Writing to any of the HOST_DATA0-7 registers pushes the written data
into a 128-bit accumulator. When the accumulator is full a flush is
So this is no longer true as you increased the accumulator to 256? Then
the commit message needs to be updated too. I wonder if Mach64 had 64 bit
ops which was then increased to 128 in Rage128. Then is it further
increased in Rage128Pro and Radeon (in which case can it increase further
in later chips?), or we have double the bits so host data writes can keep
writing values to the other half while the 2D engine uses the already
written half. Does it matter if we don't emulate a concurent 2D engine
yet? If not maybe we can keep only 128 bits for now to simplify this and
only implement it when we make 2D engine run concurrently in the future?
triggered to copy it to the framebuffer. A final write to HOST_DATA_LAST
will also initiate a flush. The flush itself is left for the next patch.
Unaligned HOST_DATA* writes result in, from what I can tell, undefined
behavior on real hardware. A well-behaved driver shouldn't be doing this
anyway. For that reason they are not handled here at all.
Signed-off-by: Chad Jablonski <[email protected]>
Reviewed-by: BALATON Zoltan <[email protected]>
You should only keep R-b tags if the patch was not changed or only
trivially changed otherwise it needs to be reviewed again so you should
drop previous R-b tag.
---
hw/display/ati.c | 32 ++++++++++++++++++++++++++++++++
hw/display/ati_dbg.c | 9 +++++++++
hw/display/ati_int.h | 11 +++++++++++
hw/display/ati_regs.h | 9 +++++++++
4 files changed, 61 insertions(+)
diff --git a/hw/display/ati.c b/hw/display/ati.c
index 6cf243bcf9..3757b8426e 100644
--- a/hw/display/ati.c
+++ b/hw/display/ati.c
@@ -1024,6 +1024,33 @@ static void ati_mm_write(void *opaque, hwaddr addr,
case SRC_SC_BOTTOM:
s->regs.src_sc_bottom = data & 0x3fff;
break;
+ case HOST_DATA0:
+ case HOST_DATA1:
+ case HOST_DATA2:
+ case HOST_DATA3:
+ case HOST_DATA4:
+ case HOST_DATA5:
+ case HOST_DATA6:
+ case HOST_DATA7:
+ if (!s->host_data.active) {
+ break;
+ }
+ s->host_data.acc[s->host_data.next] = data;
+ if (s->host_data.next % ATI_HOST_DATA_FLUSH_WORDS ==
+ ATI_HOST_DATA_FLUSH_WORDS - 1) {
+ qemu_log_mask(LOG_UNIMP, "HOST_DATA flush not yet implemented\n");
+ }
+ s->host_data.next = (s->host_data.next + 1) %
+ ARRAY_SIZE(s->host_data.acc);
Indents are off here and in the previous if above.
+ break;
+ case HOST_DATA_LAST:
+ if (!s->host_data.active) {
+ break;
+ }
+ s->host_data.acc[s->host_data.next] = data;
+ qemu_log_mask(LOG_UNIMP,
+ "HOST_DATA finish flush not yet implemented\n");
+ break;
default:
break;
}
@@ -1129,6 +1156,11 @@ static void ati_vga_reset(DeviceState *dev)
/* reset vga */
vga_common_reset(&s->vga);
s->mode = VGA_MODE;
+
+ s->host_data.active = false;
+ s->host_data.next = 0;
+ s->host_data.row = 0;
+ s->host_data.col = 0;
}
static void ati_vga_exit(PCIDevice *dev)
diff --git a/hw/display/ati_dbg.c b/hw/display/ati_dbg.c
index 3ffa7f35df..5c799d540a 100644
--- a/hw/display/ati_dbg.c
+++ b/hw/display/ati_dbg.c
@@ -252,6 +252,15 @@ static struct ati_regdesc ati_reg_names[] = {
{"MC_SRC1_CNTL", 0x19D8},
{"TEX_CNTL", 0x1800},
{"RAGE128_MPP_TB_CONFIG", 0x01c0},
+ {"HOST_DATA0", 0x17c0},
+ {"HOST_DATA1", 0x17c4},
+ {"HOST_DATA2", 0x17c8},
+ {"HOST_DATA3", 0x17cc},
+ {"HOST_DATA4", 0x17d0},
+ {"HOST_DATA5", 0x17d4},
+ {"HOST_DATA6", 0x17d8},
+ {"HOST_DATA7", 0x17dc},
+ {"HOST_DATA_LAST", 0x17e0},
{NULL, -1}
};
diff --git a/hw/display/ati_int.h b/hw/display/ati_int.h
index 98f57ca5fa..b285b6eba5 100644
--- a/hw/display/ati_int.h
+++ b/hw/display/ati_int.h
@@ -32,6 +32,8 @@
#define ATI_RAGE128_LINEAR_APER_SIZE (64 * MiB)
#define ATI_R100_LINEAR_APER_SIZE (128 * MiB)
+#define ATI_HOST_DATA_FLUSH_BITS 128
+#define ATI_HOST_DATA_FLUSH_WORDS (ATI_HOST_DATA_FLUSH_BITS / 32)
I think these may not be the best constants as you still do a lot of
calculation with them so maybe it could be simplified but I don't know
how. If there's no visible difference in output with normal usage only in
corned cases that only happened during testing then maybe it's too early
to try to implement this and we could go back to the previous 128 bit
accumulator unless you found some real world usage where this would
matter. Otherwise I think this would only matter with a concurent 2D
engine where host data writes could continue in the other half of the 256
bit register while the 2D engine does the operation on the already written
half. So if we get the same graphical result with only storing 128 bits
for now we could do that and rething this when we can run the 2D engine
concurrently.
Regards,
BALATON Zoltan
#define TYPE_ATI_VGA "ati-vga"
OBJECT_DECLARE_SIMPLE_TYPE(ATIVGAState, ATI_VGA)
@@ -95,6 +97,14 @@ typedef struct ATIVGARegs {
uint32_t default_tile;
} ATIVGARegs;
+typedef struct ATIHostDataState {
+ bool active;
+ uint32_t row;
+ uint32_t col;
+ uint32_t next;
+ uint32_t acc[ATI_HOST_DATA_FLUSH_WORDS * 2];
+} ATIHostDataState;
+
struct ATIVGAState {
PCIDevice dev;
VGACommonState vga;
@@ -112,6 +122,7 @@ struct ATIVGAState {
MemoryRegion io;
MemoryRegion mm;
ATIVGARegs regs;
+ ATIHostDataState host_data;
};
const char *ati_reg_name(int num);
diff --git a/hw/display/ati_regs.h b/hw/display/ati_regs.h
index 3999edb9b7..48f15e9b1d 100644
--- a/hw/display/ati_regs.h
+++ b/hw/display/ati_regs.h
@@ -252,6 +252,15 @@
#define DP_T12_CNTL 0x178c
#define DST_BRES_T1_LNTH 0x1790
#define DST_BRES_T2_LNTH 0x1794
+#define HOST_DATA0 0x17c0
+#define HOST_DATA1 0x17c4
+#define HOST_DATA2 0x17c8
+#define HOST_DATA3 0x17cc
+#define HOST_DATA4 0x17d0
+#define HOST_DATA5 0x17d4
+#define HOST_DATA6 0x17d8
+#define HOST_DATA7 0x17dc
+#define HOST_DATA_LAST 0x17e0
#define SCALE_SRC_HEIGHT_WIDTH 0x1994
#define SCALE_OFFSET_0 0x1998
#define SCALE_PITCH 0x199c