On Tue, Nov 20, 2018 at 05:07:31PM +0530, Sharat Masetty wrote:
> When the userspace tries to read the crashstate dump, the read side
> implementation in the driver currently ascii85 encodes all the binary
> buffers and it does this each time the read system call is called.
> A userspace tool like cat typically does a page by page read and the
> number of read calls depends on the size of the data captured by the
> driver. This is certainly not desirable and does not scale well with
> large captures.
> 
> This patch encodes the buffer only once in the read path. With this there
> is an immediate >10X speed improvement in crashstate save time.
> 
> Signed-off-by: Sharat Masetty <smase...@codeaurora.org>

Reviewed-by: Jordan Crouse <jcro...@codeaurora.org>

I like this a lot.

> ---
> Changes from v1:
>       Addressed comments from Jordan Crouse
> 
>  drivers/gpu/drm/msm/adreno/adreno_gpu.c | 80 
> ++++++++++++++++++++++++---------
>  drivers/gpu/drm/msm/msm_gpu.h           |  1 +
>  2 files changed, 60 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c 
> b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> index bbf8d3e..7749967 100644
> --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> @@ -441,11 +441,15 @@ void adreno_gpu_state_destroy(struct msm_gpu_state 
> *state)
>  {
>       int i;
> 
> -     for (i = 0; i < ARRAY_SIZE(state->ring); i++)
> +     for (i = 0; i < ARRAY_SIZE(state->ring); i++) {
>               kvfree(state->ring[i].bo.data);
> +             kvfree(state->ring[i].bo.encoded);
> +     }
> 
> -     for (i = 0; state->bos && i < state->nr_bos; i++)
> +     for (i = 0; state->bos && i < state->nr_bos; i++) {
>               kvfree(state->bos[i].data);
> +             kvfree(state->bos[i].encoded);
> +     }
> 
>       kfree(state->bos);
>       kfree(state->comm);
> @@ -472,34 +476,70 @@ int adreno_gpu_state_put(struct msm_gpu_state *state)
> 
>  #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
> 
> -static void adreno_show_object(struct drm_printer *p, u32 *ptr, int len)
> +static char *adreno_gpu_ascii85_encode(u32 *src, size_t len)
>  {
> -     char out[ASCII85_BUFSZ];
> -     long l, datalen, i;
> +     void *buf;
> +     size_t buf_itr = 0;
> +     long i, l;
> 
> -     if (!ptr || !len)
> -             return;
> +     if (!len)
> +             return NULL;
> +
> +     l = ascii85_encode_len(len);
> 
>       /*
> -      * Only dump the non-zero part of the buffer - rarely will any data
> -      * completely fill the entire allocated size of the buffer
> +      * ascii85 outputs either a 5 byte string or a 1 byte string. So we
> +      * account for the worst case of 5 bytes per dword plus the 1 for '\0'
>        */
> -     for (datalen = 0, i = 0; i < len >> 2; i++) {
> -             if (ptr[i])
> -                     datalen = (i << 2) + 1;
> +     buf = kvmalloc((l * 5) + 1, GFP_KERNEL);
> +     if (!buf)
> +             return NULL;
> +
> +     for (i = 0; i < l; i++) {
> +             ascii85_encode(src[i], buf + buf_itr);
> +
> +             if (src[i] == 0)
> +                     buf_itr += 1;
> +             else
> +                     buf_itr += 5;
>       }
> 
> -     /* Skip printing the object if it is empty */
> -     if (datalen == 0)
> +     return buf;
> +}
> +
> +static void adreno_show_object(struct drm_printer *p,
> +             struct msm_gpu_state_bo *bo)
> +{
> +     if ((!bo->data && !bo->encoded) || !bo->size)
>               return;
> 
> -     l = ascii85_encode_len(datalen);
> +     if (!bo->encoded) {
> +             long datalen, i;
> +             u32 *buf = bo->data;
> +
> +             /*
> +              * Only dump the non-zero part of the buffer - rarely will
> +              * any data completely fill the entire allocated size of
> +              * the buffer.
> +              */
> +             for (datalen = 0, i = 0; i < (bo->size) >> 2; i++) {
> +                     if (buf[i])
> +                             datalen = ((i + 1) << 2);
> +             }
> +
> +             bo->encoded = adreno_gpu_ascii85_encode(buf, datalen);
> +
> +             kvfree(bo->data);
> +             bo->data = NULL;
> +
> +             if (!bo->encoded)
> +                     return;
> +     }
> 
>       drm_puts(p, "    data: !!ascii85 |\n");
>       drm_puts(p, "     ");
> 
> -     for (i = 0; i < l; i++)
> -             drm_puts(p, ascii85_encode(ptr[i], out));
> +     drm_puts(p, bo->encoded);
> 
>       drm_puts(p, "\n");
>  }
> @@ -531,8 +571,7 @@ void adreno_show(struct msm_gpu *gpu, struct 
> msm_gpu_state *state,
>               drm_printf(p, "    wptr: %d\n", state->ring[i].wptr);
>               drm_printf(p, "    size: %d\n", MSM_GPU_RINGBUFFER_SZ);
> 
> -             adreno_show_object(p, state->ring[i].bo.data,
> -                     state->ring[i].bo.size);
> +             adreno_show_object(p, &(state->ring[i].bo));
>       }
> 
>       if (state->bos) {
> @@ -543,8 +582,7 @@ void adreno_show(struct msm_gpu *gpu, struct 
> msm_gpu_state *state,
>                               state->bos[i].iova);
>                       drm_printf(p, "    size: %zd\n", state->bos[i].size);
> 
> -                     adreno_show_object(p, state->bos[i].data,
> -                             state->bos[i].size);
> +                     adreno_show_object(p, &(state->bos[i]));
>               }
>       }
> 
> diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
> index a3a6371..737bf45 100644
> --- a/drivers/gpu/drm/msm/msm_gpu.h
> +++ b/drivers/gpu/drm/msm/msm_gpu.h
> @@ -191,6 +191,7 @@ struct msm_gpu_state_bo {
>       u64 iova;
>       size_t size;
>       void *data;
> +     char *encoded;
>  };
> 
>  struct msm_gpu_state {
> --
> 1.9.1
> 

-- 
The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project
_______________________________________________
Freedreno mailing list
Freedreno@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno

Reply via email to