While drivers use bus mastering modes, PIO is the simplest place to start.
This implements the PM4_FIFO_DATA_EVEN/ODD registers. Writing to these
registers in sequence places packets into the CCE FIFO directly without
need for a ring buffer. This enables testing of the CCE packet processing
itself. Ring buffer registers will follow in a future patch.

Type-0 and Type-1 packets write to registers. Type-2 packets are NOPs.
Type-3 packet headers are parsed but only logged as of now.

Hardware testing and poking at the microcode suggests that Type-0/1/2
packets may be implemented in hardware and not the microcode. Type-3,
however, definitely depends on the microcode.

Signed-off-by: Chad Jablonski <[email protected]>
---
 hw/display/ati.c        |  10 +++
 hw/display/ati_cce.c    | 156 ++++++++++++++++++++++++++++++++++++++++
 hw/display/ati_cce.h    |  56 +++++++++++++++
 hw/display/meson.build  |   2 +-
 hw/display/trace-events |   9 +++
 5 files changed, 232 insertions(+), 1 deletion(-)
 create mode 100644 hw/display/ati_cce.c

diff --git a/hw/display/ati.c b/hw/display/ati.c
index 82450c0331..e7ba202bbd 100644
--- a/hw/display/ati.c
+++ b/hw/display/ati.c
@@ -1117,6 +1117,16 @@ void ati_reg_write(ATIVGAState *s, hwaddr addr,
         s->cce.freerun = data & PM4_MICRO_FREERUN;
         break;
     }
+    case PM4_FIFO_DATA_EVEN:
+        /* fall through */
+    case PM4_FIFO_DATA_ODD:
+        /*
+         * Real hardware does seem to behave differently when the even/odd
+         * sequence is not strictly adhered to but it's difficult to determine
+         * exactly what is happenning. So for now we treat them the same.
+         */
+        ati_cce_receive_data(s, data);
+        break;
     default:
         break;
     }
diff --git a/hw/display/ati_cce.c b/hw/display/ati_cce.c
new file mode 100644
index 0000000000..62a88a54df
--- /dev/null
+++ b/hw/display/ati_cce.c
@@ -0,0 +1,156 @@
+/*
+ * QEMU ATI SVGA emulation
+ * CCE engine functions
+ *
+ * Copyright (c) 2025 Chad Jablonski
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#include "qemu/osdep.h"
+#include "ati_regs.h"
+#include "ati_int.h"
+#include "trace.h"
+
+static inline uint32_t
+ati_cce_data_packets_remaining(const ATIPM4PacketState *p)
+{
+    switch (p->type) {
+    case ATI_CCE_TYPE0:
+        return p->t0.count - p->dwords_processed;
+    case ATI_CCE_TYPE1:
+        return 2 - p->dwords_processed;
+    case ATI_CCE_TYPE2:
+        return 0;
+    case ATI_CCE_TYPE3:
+        return p->t3.count - p->dwords_processed;
+    default:
+        /* This should never happen, type is 2-bits wide */
+        return 0;
+    }
+}
+
+static void
+ati_cce_parse_packet_header(ATIPM4PacketState *p, uint32_t header)
+{
+    p->dwords_processed = 0;
+    p->type = (header & ATI_CCE_TYPE_MASK) >> ATI_CCE_TYPE_SHIFT;
+    switch (p->type) {
+    case ATI_CCE_TYPE0: {
+        ATIPM4Type0Header t0 = {
+            /* Packet stores base_reg as word offset, convert to byte offset */
+            .base_reg = ((header & ATI_CCE_TYPE0_BASE_REG_MASK) >>
+                        ATI_CCE_TYPE0_BASE_REG_SHIFT) << 2,
+            /* Packet stores count as n-1, convert to actual count */
+            .count = ((header & ATI_CCE_TYPE0_COUNT_MASK) >>
+                     ATI_CCE_TYPE0_COUNT_SHIFT) + 1,
+            .one_reg_wr = header & ATI_CCE_TYPE0_ONE_REG_WR,
+        };
+        p->t0 = t0;
+        trace_ati_cce_packet_type0(t0.base_reg, t0.count, t0.one_reg_wr);
+        break;
+    }
+    case ATI_CCE_TYPE1: {
+        ATIPM4Type1Header t1 = {
+            /* Packet stores reg0 as word offset, convert to byte offset */
+            .reg0 = ((header & ATI_CCE_TYPE1_REG0_MASK) >>
+                    ATI_CCE_TYPE1_REG0_SHIFT) << 2,
+            /* Packet stores reg1 as word offset, convert to byte offset */
+            .reg1 = ((header & ATI_CCE_TYPE1_REG1_MASK) >>
+                    ATI_CCE_TYPE1_REG1_SHIFT) << 2,
+        };
+        p->t1 = t1;
+        trace_ati_cce_packet_type1(t1.reg0, t1.reg1);
+        break;
+    }
+    case ATI_CCE_TYPE2: {
+        /* Type-2 is a no-op, it has no header state */
+        trace_ati_cce_packet_type2();
+        break;
+    }
+    case ATI_CCE_TYPE3: {
+        ATIPM4Type3Header t3 = {
+            .opcode = (header & ATI_CCE_TYPE3_OPCODE_MASK) >>
+                      ATI_CCE_TYPE3_OPCODE_SHIFT,
+            /* Packet stores count as n-1, convert to actual count */
+            .count = ((header & ATI_CCE_TYPE3_COUNT_MASK) >>
+                     ATI_CCE_TYPE3_COUNT_SHIFT) + 1,
+        };
+        p->t3 = t3;
+        trace_ati_cce_packet_type3(t3.opcode, t3.count);
+        break;
+    }
+    default:
+        /* This should never happen, type is 2-bits wide */
+        break;
+    }
+}
+
+static void
+ati_cce_process_type0_data(ATIVGAState *s, uint32_t data)
+{
+    ATIPM4PacketState *p = &s->cce.cur_packet;
+    uint32_t offset = p->t0.one_reg_wr ? 0 :
+                      (p->dwords_processed * sizeof(uint32_t));
+    uint32_t reg = p->t0.base_reg + offset;
+    trace_ati_cce_packet_type0_data(p->dwords_processed, reg, data);
+    ati_reg_write(s, reg, data, sizeof(uint32_t));
+}
+
+static void
+ati_cce_process_type1_data(ATIVGAState *s, uint32_t data)
+{
+    ATIPM4PacketState *p = &s->cce.cur_packet;
+    uint32_t reg = p->dwords_processed == 0 ? p->t1.reg0 : p->t1.reg1;
+    trace_ati_cce_packet_type1_data(p->dwords_processed, reg, data);
+    ati_reg_write(s, reg, data, sizeof(uint32_t));
+}
+
+static void
+ati_cce_process_type3_data(ATIVGAState *s, uint32_t data)
+{
+    ATIPM4PacketState *p = &s->cce.cur_packet;
+    uint32_t opcode = p->t3.opcode;
+    qemu_log_mask(LOG_UNIMP, "Type-3 CCE packets not yet implemented\n");
+    trace_ati_cce_packet_type3_data(p->dwords_processed, opcode, data);
+}
+
+static void
+ati_cce_process_packet_data(ATIVGAState *s, uint32_t data)
+{
+    ATIPM4PacketState *p = &s->cce.cur_packet;
+    switch (p->type) {
+    case ATI_CCE_TYPE0: {
+        ati_cce_process_type0_data(s, data);
+        p->dwords_processed += 1;
+        break;
+    }
+    case ATI_CCE_TYPE1: {
+        ati_cce_process_type1_data(s, data);
+        p->dwords_processed += 1;
+        break;
+    }
+    case ATI_CCE_TYPE2:
+        /* Type-2 packets have no data, we should never end up here */
+        break;
+    case ATI_CCE_TYPE3: {
+        ati_cce_process_type3_data(s, data);
+        p->dwords_processed += 1;
+        break;
+    }
+    default:
+        /* This should never happen, type is 2-bits wide */
+        break;
+    }
+}
+
+void
+ati_cce_receive_data(ATIVGAState *s, uint32_t data)
+{
+    uint32_t remaining = ati_cce_data_packets_remaining(&s->cce.cur_packet);
+    if (remaining == 0) {
+        /* We're ready to start processing a new packet header */
+        ati_cce_parse_packet_header(&s->cce.cur_packet, data);
+        return;
+    }
+    ati_cce_process_packet_data(s, data);
+}
diff --git a/hw/display/ati_cce.h b/hw/display/ati_cce.h
index a6a9aa87c4..b6ad21f47e 100644
--- a/hw/display/ati_cce.h
+++ b/hw/display/ati_cce.h
@@ -13,6 +13,60 @@
 #include "qemu/osdep.h"
 #include "qemu/log.h"
 
+typedef struct ATIVGAState ATIVGAState;
+
+#define ATI_CCE_TYPE_MASK            0xc0000000
+#define ATI_CCE_TYPE_SHIFT           30
+
+#define ATI_CCE_TYPE0                0
+#define ATI_CCE_TYPE0_BASE_REG_MASK  0x00007fff
+#define ATI_CCE_TYPE0_BASE_REG_SHIFT 0
+#define ATI_CCE_TYPE0_ONE_REG_WR     0x00008000
+#define ATI_CCE_TYPE0_COUNT_MASK     0x3fff0000
+#define ATI_CCE_TYPE0_COUNT_SHIFT    16
+
+#define ATI_CCE_TYPE1                1
+#define ATI_CCE_TYPE1_REG0_MASK      0x000007ff
+#define ATI_CCE_TYPE1_REG0_SHIFT     0
+#define ATI_CCE_TYPE1_REG1_MASK      0x003ff800
+#define ATI_CCE_TYPE1_REG1_SHIFT     11
+
+#define ATI_CCE_TYPE2                2
+
+#define ATI_CCE_TYPE3                3
+#define ATI_CCE_TYPE3_OPCODE_MASK    0x0000ff00
+#define ATI_CCE_TYPE3_OPCODE_SHIFT   8
+#define ATI_CCE_TYPE3_COUNT_MASK     0x3fff0000
+#define ATI_CCE_TYPE3_COUNT_SHIFT    16
+
+typedef struct ATIPM4Type0Header {
+    uint32_t base_reg;
+    uint16_t count;
+    bool one_reg_wr;
+} ATIPM4Type0Header;
+
+typedef struct ATIPM4Type1Header {
+    uint32_t reg0;
+    uint32_t reg1;
+} ATIPM4Type1Header;
+
+/* Type-2 headers are a no-op and have no state */
+
+typedef struct ATIPM4Type3Header {
+    uint8_t opcode;
+    uint16_t count;
+} ATIPM4Type3Header;
+
+typedef struct ATIPM4PacketState {
+    uint8_t type;
+    uint16_t dwords_processed;
+    union {
+        ATIPM4Type0Header t0;
+        ATIPM4Type1Header t1;
+        ATIPM4Type3Header t3;
+    };
+} ATIPM4PacketState;
+
 typedef struct ATIPM4MicrocodeState {
     uint8_t addr;
     uint8_t raddr;
@@ -23,10 +77,12 @@ typedef struct ATICCEState {
     ATIPM4MicrocodeState microcode;
     /* MicroCntl */
     bool freerun;
+    ATIPM4PacketState cur_packet;
     /* BufferCntl */
     uint32_t buffer_size_l2qw;
     bool no_update;
     uint8_t buffer_mode;
 } ATICCEState;
 
+void ati_cce_receive_data(ATIVGAState *s, uint32_t data);
 #endif /* ATI_CCE_H */
diff --git a/hw/display/meson.build b/hw/display/meson.build
index 90e6c041bd..136d014746 100644
--- a/hw/display/meson.build
+++ b/hw/display/meson.build
@@ -59,7 +59,7 @@ system_ss.add(when: 'CONFIG_XLNX_DISPLAYPORT', if_true: 
files('xlnx_dp.c'))
 
 system_ss.add(when: 'CONFIG_ARTIST', if_true: files('artist.c'))
 
-system_ss.add(when: 'CONFIG_ATI_VGA', if_true: [files('ati.c', 'ati_2d.c', 
'ati_dbg.c'), pixman])
+system_ss.add(when: 'CONFIG_ATI_VGA', if_true: [files('ati.c', 'ati_2d.c', 
'ati_dbg.c', 'ati_cce.c'), pixman])
 
 system_ss.add(when: [pvg, 'CONFIG_MAC_PVG_PCI'],     if_true: 
[files('apple-gfx.m', 'apple-gfx-pci.m')])
 system_ss.add(when: [pvg, 'CONFIG_MAC_PVG_MMIO'],    if_true: 
[files('apple-gfx.m', 'apple-gfx-mmio.m')])
diff --git a/hw/display/trace-events b/hw/display/trace-events
index e323a82cff..d3c7ca1467 100644
--- a/hw/display/trace-events
+++ b/hw/display/trace-events
@@ -147,6 +147,15 @@ sii9022_switch_mode(const char *mode) "mode: %s"
 ati_mm_read(unsigned int size, uint64_t addr, const char *name, uint64_t val) 
"%u 0x%"PRIx64 " %s -> 0x%"PRIx64
 ati_mm_write(unsigned int size, uint64_t addr, const char *name, uint64_t val) 
"%u 0x%"PRIx64 " %s <- 0x%"PRIx64
 
+# ati_cce.c
+ati_cce_packet_type0(uint32_t base_reg, uint32_t count, bool one_reg_wr) 
"base_reg=0x%x count=%u one_reg_wr=%u"
+ati_cce_packet_type0_data(uint32_t data_idx, uint32_t reg, uint32_t data) 
"data_idx=%u reg=0x%x data=0x%x"
+ati_cce_packet_type1(uint32_t reg0, uint32_t reg1) "reg0=0x%x reg1=0x%x"
+ati_cce_packet_type1_data(uint32_t data_idx, uint32_t reg, uint32_t data) 
"data_idx=%u reg=0x%x data=0x%x"
+ati_cce_packet_type2(void) ""
+ati_cce_packet_type3(uint8_t opcode, uint32_t count) "opcode=0x%x count=%u"
+ati_cce_packet_type3_data(uint32_t data_idx, uint8_t opcode, uint32_t data) 
"data_idx=%u opcode=0x%x data=%u"
+
 # artist.c
 artist_reg_read(unsigned int size, uint64_t addr, const char *name, uint64_t 
val) "%u 0x%"PRIx64 "%s -> 0x%08"PRIx64
 artist_reg_write(unsigned int size, uint64_t addr, const char *name, uint64_t 
val) "%u 0x%"PRIx64 "%s <- 0x%08"PRIx64
-- 
2.51.2


Reply via email to