On Mon, Sep 08, 2025 at 10:56:00AM +0530, Ekansh Gupta wrote:
> 
> 
> On 9/2/2025 2:51 PM, Dmitry Baryshkov wrote:
> > On Mon, Sep 01, 2025 at 11:03:36AM +0530, Ekansh Gupta wrote:
> >> For any remote call to DSP, after sending an invocation message,
> >> fastRPC driver waits for glink response and during this time the
> >> CPU can go into low power modes. This adds latency to overall fastrpc
> >> call as CPU wakeup and scheduling latencies are included.  Adding a
> > s/Adding/Add/, see Documentation/process/submitting-patches.rst
> Ack.
> >
> >> polling mode support with which fastRPC driver will poll continuously
> >> on a memory after sending a message to remote subsystem which will
> >> eliminate CPU wakeup and scheduling latencies and reduce fastRPC
> >> overhead.
> > Describe your design decisions: when it is enabled, why, etc.
> Yes, also planning to enable it from userspace in v2 due to power consumption
> concerns.
> >
> >> Signed-off-by: Ekansh Gupta <ekansh.gu...@oss.qualcomm.com>
> >> ---
> >>  drivers/misc/fastrpc.c | 121 ++++++++++++++++++++++++++++++++++++++---
> >>  1 file changed, 114 insertions(+), 7 deletions(-)
> >>
> >> diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c
> >> index 57e118de6e4a..939a3e3d29e2 100644
> >> --- a/drivers/misc/fastrpc.c
> >> +++ b/drivers/misc/fastrpc.c
> >> @@ -22,6 +22,8 @@
> >>  #include <linux/firmware/qcom/qcom_scm.h>
> >>  #include <uapi/misc/fastrpc.h>
> >>  #include <linux/of_reserved_mem.h>
> >> +#include <linux/compiler.h>
> >> +#include <linux/iopoll.h>
> >>  
> >>  #define ADSP_DOMAIN_ID (0)
> >>  #define MDSP_DOMAIN_ID (1)
> >> @@ -37,6 +39,7 @@
> >>  #define FASTRPC_CTX_MAX (256)
> >>  #define FASTRPC_INIT_HANDLE       1
> >>  #define FASTRPC_DSP_UTILITIES_HANDLE      2
> >> +#define FASTRPC_MAX_STATIC_HANDLE (20)
> > What is this?
> Static handles in FastRPC refer to handles that are statically defined and
> associated with modules in the DSP image at build time, rather than being
> dynamically created or loaded at runtime. These are typically used for
> system-level services or core module.

Is this a const which has always been set to this value and will never
change in future?

> 
> Defined this to limit the polling mode only for user calls.

This needs to be explained somewhere.

> 
> >
> >>  #define FASTRPC_CTXID_MASK (0xFF00)
> >>  #define INIT_FILELEN_MAX (2 * 1024 * 1024)
> >>  #define INIT_FILE_NAMELEN_MAX (128)
> >> @@ -105,6 +108,20 @@
> >>  
> >>  #define miscdev_to_fdevice(d) container_of(d, struct fastrpc_device, 
> >> miscdev)
> >>  
> >> +/* Poll response number from remote processor for call completion */
> >> +#define FASTRPC_POLL_RESPONSE (0xdecaf)
> >> +
> >> +/* Polling mode timeout limit */
> >> +#define FASTRPC_POLL_MAX_TIMEOUT_US (10000)
> >> +
> >> +/* Response types supported for RPC calls */
> >> +enum fastrpc_response_flags {
> >> +  /* normal job completion glink response */
> >> +  NORMAL_RESPONSE = 0,
> >> +  /* process updates poll memory instead of glink response */
> >> +  POLL_MODE = 1,
> >> +};
> > bool is_polled;
> >
> > OR
> >
> > unsigned long is_polled : 1;
> >
> >> +
> >>  struct fastrpc_phy_page {
> >>    u64 addr;               /* physical address */
> >>    u64 size;               /* size of contiguous region */
> >> @@ -235,8 +252,14 @@ struct fastrpc_invoke_ctx {
> >>    u32 sc;
> >>    u64 *fdlist;
> >>    u32 *crc;
> >> +  /* Poll memory that DSP updates */
> >> +  u32 *poll;
> >>    u64 ctxid;
> >>    u64 msg_sz;
> >> +  /* work done status flag */
> >> +  bool is_work_done;
> >> +  /* response flags from remote processor */
> >> +  enum fastrpc_response_flags rsp_flags;
> >>    struct kref refcount;
> >>    struct list_head node; /* list of ctxs */
> >>    struct completion work;
> >> @@ -891,7 +914,8 @@ static int fastrpc_get_meta_size(struct 
> >> fastrpc_invoke_ctx *ctx)
> >>            sizeof(struct fastrpc_invoke_buf) +
> >>            sizeof(struct fastrpc_phy_page)) * ctx->nscalars +
> >>            sizeof(u64) * FASTRPC_MAX_FDLIST +
> >> -          sizeof(u32) * FASTRPC_MAX_CRCLIST;
> >> +          sizeof(u32) * FASTRPC_MAX_CRCLIST +
> >> +          sizeof(u32);
> >>  
> >>    return size;
> >>  }
> >> @@ -987,6 +1011,8 @@ static int fastrpc_get_args(u32 kernel, struct 
> >> fastrpc_invoke_ctx *ctx)
> >>    list = fastrpc_invoke_buf_start(rpra, ctx->nscalars);
> >>    pages = fastrpc_phy_page_start(list, ctx->nscalars);
> >>    ctx->fdlist = (u64 *)(pages + ctx->nscalars);
> >> +  ctx->crc = (u32 *)(ctx->fdlist + FASTRPC_MAX_FDLIST);
> > Why?
> DSP considers the poll memory to be at the end of metadata buffer. The 
> contents
> of metadata are in the order as added in fastrpc_get_meta_size

It's a different why. We don't support CRC. Why are you adding it here?

> >
> >> +  ctx->poll = (u32 *)(ctx->crc + FASTRPC_MAX_CRCLIST);
> >>    args = (uintptr_t)ctx->buf->virt + metalen;
> >>    rlen = pkt_size - metalen;
> >>    ctx->rpra = rpra;
> >> @@ -1155,6 +1181,83 @@ static int fastrpc_invoke_send(struct 
> >> fastrpc_session_ctx *sctx,
> >>  
> >>  }
> >>  
> >> +static inline u32 fastrpc_poll_op(void *p)
> >> +{
> >> +  struct fastrpc_invoke_ctx *ctx = p;
> >> +
> >> +  dma_rmb();
> >> +  return READ_ONCE(*ctx->poll);
> > Is this enough? Is the write by the DSP side going to invalidate the
> > cache for this memory location? Think about older platforms which
> > usually don't have dma-coherent property in the DSP / FastRPC nodes.
> Yes, DSP will take care of invalidating the cache after writing to this 
> memory.

Will DSP invalidate the cache on the CPU side? On non-dma-coherent
platforms?

> >
> >> +}
> >> +
> >> +static int poll_for_remote_response(struct fastrpc_invoke_ctx *ctx)
> >> +{
> >> +  u32 val;
> >> +  int ret;
> >> +
> >> +  /*
> >> +   * Poll until DSP writes FASTRPC_POLL_RESPONSE into *ctx->poll
> >> +   * or until another path marks the work done.
> >> +   */
> >> +  ret = read_poll_timeout_atomic(fastrpc_poll_op, val,
> >> +                                 (val == FASTRPC_POLL_RESPONSE) ||
> >> +                                 ctx->is_work_done, 1,
> >> +                                 FASTRPC_POLL_MAX_TIMEOUT_US, false, ctx);
> >> +
> >> +  if (!ret && val == FASTRPC_POLL_RESPONSE) {
> >> +          ctx->is_work_done = true;
> >> +          ctx->retval = 0;
> >> +  }
> >> +
> >> +  if (ret == -ETIMEDOUT)
> >> +          ret = -EIO;
> >> +
> >> +  return ret;
> >> +}
> >> +
> >> +static inline int fastrpc_wait_for_response(struct fastrpc_invoke_ctx 
> >> *ctx,
> >> +                                      u32 kernel)
> >> +{
> >> +  int err = 0;
> >> +
> >> +  if (kernel) {
> >> +          if (!wait_for_completion_timeout(&ctx->work, 10 * HZ))
> >> +                  err = -ETIMEDOUT;
> >> +  } else {
> >> +          err = wait_for_completion_interruptible(&ctx->work);
> >> +  }
> >> +
> >> +  return err;
> >> +}
> >> +
> >> +static int fastrpc_wait_for_completion(struct fastrpc_invoke_ctx *ctx,
> >> +                                 u32 kernel)
> >> +{
> >> +  int err;
> >> +
> >> +  do {
> >> +          switch (ctx->rsp_flags) {
> >> +          case NORMAL_RESPONSE:
> >> +                  err = fastrpc_wait_for_response(ctx, kernel);
> >> +                  if (err || ctx->is_work_done)
> >> +                          return err;
> >> +                  break;
> >> +          case POLL_MODE:
> >> +                  err = poll_for_remote_response(ctx);
> >> +                  /* If polling timed out, move to normal response mode */
> >> +                  if (err)
> >> +                          ctx->rsp_flags = NORMAL_RESPONSE;
> >> +                  break;
> >> +          default:
> > What kind of response type can it be? Have you had checked for the flag
> > being set, you wouldn't have a false possibility of having another
> > response type.
> Sorry, couldn't exactly understand your point here. Are you suggesting that 
> as the
> rsp_flags is getting set by the driver itself, there isn't a possibility of 
> having any
> unsupported response type?

Yes.

> >
> >> +                  err = -EBADR;
> >> +                  dev_dbg(ctx->fl->sctx->dev,
> >> +                          "unsupported response type:0x%x\n", 
> >> ctx->rsp_flags);
> >> +                  break;
> >> +          }
> >> +  } while (!ctx->is_work_done);
> >> +
> >> +  return err;
> >> +}
> >> +
> >>  static int fastrpc_internal_invoke(struct fastrpc_user *fl,  u32 kernel,
> >>                               u32 handle, u32 sc,
> >>                               struct fastrpc_invoke_args *args)
> >> @@ -1190,16 +1293,19 @@ static int fastrpc_internal_invoke(struct 
> >> fastrpc_user *fl,  u32 kernel,
> >>    if (err)
> >>            goto bail;
> >>  
> >> -  if (kernel) {
> >> -          if (!wait_for_completion_timeout(&ctx->work, 10 * HZ))
> >> -                  err = -ETIMEDOUT;
> >> -  } else {
> >> -          err = wait_for_completion_interruptible(&ctx->work);
> >> -  }
> >> +  if (handle > FASTRPC_MAX_STATIC_HANDLE && fl->pd == USER_PD)
> >> +          ctx->rsp_flags = POLL_MODE;
> > This definitely needs to be explained.
> Ack.
> 
> Thanks for the review.
> 
> //Ekansh
> >
> >>  
> >> +  err = fastrpc_wait_for_completion(ctx, kernel);
> >>    if (err)
> >>            goto bail;
> >>  
> >> +  if (!ctx->is_work_done) {
> >> +          err = -ETIMEDOUT;
> >> +          dev_dbg(fl->sctx->dev, "Invalid workdone state for handle 0x%x, 
> >> sc 0x%x\n",
> >> +                  handle, sc);
> >> +          goto bail;
> >> +  }
> >>    /* make sure that all memory writes by DSP are seen by CPU */
> >>    dma_rmb();
> >>    /* populate all the output buffers with results */
> >> @@ -2462,6 +2568,7 @@ static int fastrpc_rpmsg_callback(struct 
> >> rpmsg_device *rpdev, void *data,
> >>  
> >>    ctx->retval = rsp->retval;
> >>    complete(&ctx->work);
> >> +  ctx->is_work_done = true;
> >>  
> >>    /*
> >>     * The DMA buffer associated with the context cannot be freed in
> >> -- 
> >> 2.34.1
> >>
> 

-- 
With best wishes
Dmitry

Reply via email to