On Wed, 1 Dec 2021 at 15:40, Francisco Iglesias <francisco.igles...@xilinx.com> wrote: > > Add a model of Xilinx Versal's OSPI flash memory controller. > > Signed-off-by: Francisco Iglesias <francisco.igles...@xilinx.com>
> +#define SZ_512MBIT (512 * 1024 * 1024) > +#define SZ_1GBIT (1024 * 1024 * 1024) > +#define SZ_2GBIT (2ULL * SZ_1GBIT) > +#define SZ_4GBIT (4ULL * SZ_1GBIT) > + > +#define IS_IND_DMA_START(op) (op->done_bytes == 0) > +/* > + * Bit field size of R_INDIRECT_WRITE_XFER_CTRL_REG_NUM_IND_OPS_DONE_FLD > + * is 2 bits, which can record max of 3 indac operations. > + */ > +#define IND_OPS_DONE_MAX 3 > + > +typedef enum { > + WREN = 0x6, > +} FlashCMD; > + > +/* Type to avoid cpu endian byte swaps */ > +typedef union { > + uint64_t u64; > + uint8_t u8[8]; > +} OSPIRdData; Don't hand-roll this kind of thing, please. (I'll suggest code below in the places where you use this union.) > +static unsigned int single_cs(XlnxVersalOspi *s) > +{ > + unsigned int field = ARRAY_FIELD_EX32(s->regs, > + CONFIG_REG, PERIPH_CS_LINES_FLD); > + int i; > + > + /* > + * 4'bXXX0 -> 4'b1110 > + * 4'bXX0X -> 4'b1101 > + * 4'bX0XX -> 4'b1011 This chart is ambiguous, because 0b1100 matches both the first two lines, for instance. The code implements 0bXXX0 -> 0b1110 0bXX01 -> 0b1101 0bX011 -> 0b1011 etc > + * 4'b0XXX -> 4'b0111 > + * 4'b1111 -> 4'b1111 > + */ > + for (i = 0; i < 4; i++) { > + if ((field & (1 << i)) == 0) { > + return (~(1 << i)) & 0xF; > + } > + } > + return 0; The comment says that if the input is 0b1111 then the output is also 0b1111, but unless I'm misreading the code we will return 0 in that case. Which is correct ? Assuming you want 0b1111 input to give 0b1111 output, you can do this without the loop, like this: return (field | ~(field + 1)) & 0xf; (which uses a variant on the "isolate the rightmost 0-bit" trick from here: https://emre.me/computer-science/bit-manipulation-tricks/ ) If you do it that way do add a comment, because it's a bit obscure :-) > +static void ospi_rx_fifo_pop_stig_rd_data(XlnxVersalOspi *s) > +{ > + int size = ospi_stig_rd_data_len(s); > + OSPIRdData res = {}; > + int i; > + > + size = MIN(fifo8_num_used(&s->rx_fifo), size); I think I would assert(size <= 8) here; given the size of the data field that ospi_stig_rd_data_len() reads from it must be true, but it's not immediately obvious that this function won't overrun the array so the assert() helps readers. > + for (i = 0; i < size; i++) { > + res.u8[i] = fifo8_pop(&s->rx_fifo); > + } > + > + s->regs[R_FLASH_RD_DATA_LOWER_REG] = res.u64 & 0xFFFFFFFF; > + s->regs[R_FLASH_RD_DATA_UPPER_REG] = (res.u64 >> 32) & 0xFFFFFFFF; This will give different values for these registers depending on whether the host is big-endian or little-endian, because if the bytes come out of the FIFO in the order 00 11 22 33 44 55 66 77 then on a BE host res.u64 is 0x0011223344556677 and so LOWER_REG is 0x44556677, whereas on a LE host res.u64 is 0x7766554433221100 and LOWER_REG is 0x33221100. Instead of the union: uint8_t bytes[8] = {}; // pop into the bytes array s->regs[LOWER_REG] = ldl_le_p(bytes); s->regs[UPPER_REG] = ldl_le_p(bytes + 4); I assume here that the desired behaviour is that if the bytes come out of the FIFO in the order 00 11 22 33 44 55 66 77 then LOWER_REG reads 0x33221100 and UPPER_REG 0x77665544. > + /* Write out tx_fifo in maximum page sz chunks */ > + while (!ospi_ind_op_completed(op) && fifo8_num_used(&s->tx_sram) > 0) { > + next_b = ind_op_next_byte(op); > + end_b = next_b + MIN(fifo8_num_used(&s->tx_sram), pagesz); > + > + /* Dont cross page boundery */ "boundary" > +static void ospi_stig_fill_membank(XlnxVersalOspi *s) > +{ > + int num_rd_bytes = ospi_stig_membank_rd_bytes(s); > + int idx = num_rd_bytes - 8; /* first of last 8 */ > + uint32_t lower = 0; > + uint32_t upper = 0; > + int i; > + > + for (i = 0; i < num_rd_bytes; i++) { > + s->stig_membank[i] = fifo8_pop(&s->rx_fifo); > + } > + > + /* Fill in lower upper regs */ > + for (i = 0; i < 4; i++) { > + lower |= ((uint32_t)s->stig_membank[idx++]) << 8 * i; > + } > + > + for (i = 0; i < 4; i++) { > + upper |= ((uint32_t)s->stig_membank[idx++]) << 8 * i; > + } You can replace these loops with lower = ldl_le_p(s->stig_membank[idx]); upper = ldl_le_p(s->stig_membank[idx + 4]); idx += 8; > + s->regs[R_FLASH_RD_DATA_LOWER_REG] = lower; > + s->regs[R_FLASH_RD_DATA_UPPER_REG] = upper; > +} > + > +static uint64_t ospi_rx_sram_read(XlnxVersalOspi *s, unsigned int size) > +{ > + OSPIRdData ret = {}; > + int i; > + > + if (size < 4 && fifo8_num_used(&s->rx_sram) >= 4) { > + qemu_log_mask(LOG_GUEST_ERROR, > + "OSPI only last read of internal " > + "sram is allowed to be < 32 bits\n"); > + } > + > + size = MIN(fifo8_num_used(&s->rx_sram), size); > + for (i = 0; i < size; i++) { > + ret.u8[i] = fifo8_pop(&s->rx_sram); > + } > + return ret.u64; Similarly to ospi_rx_fifo_pop_stig_rd_data(), you want to read into a local uint8_t bytes[8] (and assert about size), but here because we want a uint64_t rather than two uint32_t we can return ldq_le_p(bytes); > +} > + > +static void ospi_tx_sram_write(XlnxVersalOspi *s, uint64_t value, > + unsigned int size) > +{ > + int i; > + for (i = 0; i < size; i++) { > + fifo8_push(&s->tx_sram, value >> 8 * i); > + } > +} > + > +static uint64_t ospi_do_dac_read(void *opaque, hwaddr addr, unsigned int > size) > +{ > + XlnxVersalOspi *s = XILINX_VERSAL_OSPI(opaque); > + OSPIRdData ret = {}; > + int i; > + > + /* Create first section of read cmd */ > + ospi_tx_fifo_push_rd_op_addr(s, (uint32_t) addr); > + > + /* Enable cs and transmit first part */ > + ospi_dac_cs(s, addr); > + ospi_flush_txfifo(s); > + > + fifo8_reset(&s->rx_fifo); > + > + /* transmit second part (data) */ > + for (i = 0; i < size; ++i) { > + fifo8_push(&s->tx_fifo, 0); > + } > + ospi_flush_txfifo(s); > + > + /* fill in result */ > + size = MIN(fifo8_num_used(&s->rx_fifo), size); > + > + for (i = 0; i < size; i++) { > + ret.u8[i] = fifo8_pop(&s->rx_fifo); > + } > + > + /* done */ > + ospi_disable_cs(s); > + > + return ret.u64; Same as ospi_rx_sram_read(). > +} > +/* Return dev-obj from reg-region created by register_init_block32 */ > +static XlnxVersalOspi *xilinx_ospi_of_mr(void *mr_accessor) > +{ > + RegisterInfoArray *reg_array = mr_accessor; > + Object *dev; > + > + assert(reg_array != NULL); You don't really need to assert() this kind of thing -- if it is NULL then the code is going to crash immediately on the following line anyway. assert()s are most useful for turning "weird misbehaviour" and "something goes wrong a long way away from the point where we could have detected it" bugs into "crash where it's clear what the problem is" bugs. > + > + dev = reg_array->mem.owner; > + assert(dev); > + > + return XILINX_VERSAL_OSPI(dev); > +} This is another device that would benefit from a "QEMU interface" comment describing the properties/gpios/etc. > +#ifndef XILINX_VERSAL_OSPI_H > +#define XILINX_VERSAL_OSPI_H > + > +#include "hw/register.h" > +#include "hw/ssi/ssi.h" > +#include "qemu/fifo32.h" > +#include "hw/dma/dma-ctrl-if.h" > + > +#define TYPE_XILINX_VERSAL_OSPI "xlnx.versal-ospi" > + > +#define XILINX_VERSAL_OSPI(obj) \ > + OBJECT_CHECK(XlnxVersalOspi, (obj), TYPE_XILINX_VERSAL_OSPI) Same comment about macros as the other device. > + > +#define XILINX_VERSAL_OSPI_R_MAX (0xfc / 4 + 1) thanks -- PMM