Apply some optimizations to speed up bitstream loading
(both for full and split periph/core bitstreams):

 * Change the size of the first fs read, so that all the subsequent
   reads are aligned to a specific value (called MAX_FIRST_LOAD_SIZE).
   This value was chosen so that in subsequent reads the fat fs driver
   doesn't have to allocate a temporary buffer in get_contents
   (assuming 8KiB clusters).

 * Change the buffer size to a larger value when reading to ddr
   (but not too large, because large transfers cause a stack overflow
   in the dwmmc driver).

Signed-off-by: Paweł Anikiel <p...@semihalf.com>
Reviewed-by: Simon Glass <s...@chromium.org>
---
 drivers/fpga/socfpga_arria10.c | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/drivers/fpga/socfpga_arria10.c b/drivers/fpga/socfpga_arria10.c
index 798e3a3f90..07bfe3060e 100644
--- a/drivers/fpga/socfpga_arria10.c
+++ b/drivers/fpga/socfpga_arria10.c
@@ -30,6 +30,14 @@
 #define FPGA_TIMEOUT_MSEC      1000  /* timeout in ms */
 #define FPGA_TIMEOUT_CNT       0x1000000
 #define DEFAULT_DDR_LOAD_ADDRESS       0x400
+#define DDR_BUFFER_SIZE                0x100000
+
+/* When reading bitstream from a filesystem, the size of the first read is
+ * changed so that the subsequent reads are aligned to this value. This value
+ * was chosen so that in subsequent reads the fat fs driver doesn't have to
+ * allocate a temporary buffer in get_contents (assuming 8KiB clusters).
+ */
+#define MAX_FIRST_LOAD_SIZE    0x2000
 
 DECLARE_GLOBAL_DATA_PTR;
 
@@ -526,7 +534,8 @@ static void get_rbf_image_info(struct rbf_info *rbf, u16 
*buffer)
 #ifdef CONFIG_FS_LOADER
 static int first_loading_rbf_to_buffer(struct udevice *dev,
                                struct fpga_loadfs_info *fpga_loadfs,
-                               u32 *buffer, size_t *buffer_bsize)
+                               u32 *buffer, size_t *buffer_bsize,
+                               size_t *buffer_bsize_ori)
 {
        u32 *buffer_p = (u32 *)*buffer;
        u32 *loadable = buffer_p;
@@ -674,6 +683,7 @@ static int first_loading_rbf_to_buffer(struct udevice *dev,
                }
 
                buffer_size = rbf_size;
+               *buffer_bsize_ori = DDR_BUFFER_SIZE;
        }
 
        debug("FPGA: External data: offset = 0x%x, size = 0x%x.\n",
@@ -686,11 +696,16 @@ static int first_loading_rbf_to_buffer(struct udevice 
*dev,
         * chunk by chunk transfer is required due to smaller buffer size
         * compare to bitstream
         */
+
+       if (buffer_size > MAX_FIRST_LOAD_SIZE)
+               buffer_size = MAX_FIRST_LOAD_SIZE;
+
        if (rbf_size <= buffer_size) {
                /* Loading whole bitstream into buffer */
                buffer_size = rbf_size;
                fpga_loadfs->remaining = 0;
        } else {
+               buffer_size -= rbf_offset % buffer_size;
                fpga_loadfs->remaining -= buffer_size;
        }
 
@@ -806,7 +821,8 @@ int socfpga_loadfs(fpga_fs_info *fpga_fsinfo, const void 
*buf, size_t bsize,
         * function below.
         */
        ret = first_loading_rbf_to_buffer(dev, &fpga_loadfs, &buffer,
-                                          &buffer_sizebytes);
+                                          &buffer_sizebytes,
+                                          &buffer_sizebytes_ori);
        if (ret == 1) {
                printf("FPGA: Skipping configuration ...\n");
                return 0;
-- 
2.36.1.476.g0c4daa206d-goog

Reply via email to