[PATCH v3 4/6] arm64: dts: qcom: SDM845: Enable GPU DDR bw scaling
This patch adds the interconnects property for the gpu node and the opp-peak-kBps property to the opps of the gpu opp table. This should help enable DDR bandwidth scaling dynamically and proportionally to the GPU frequency. Signed-off-by: Sharat Masetty --- arch/arm64/boot/dts/qcom/sdm845.dtsi | 9 + 1 file changed, 9 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi index 11fc3f24..6ea6f54 100644 --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi @@ -3240,6 +3240,8 @@ qcom,gmu = <&gmu>; + interconnects = <&mem_noc MASTER_GFX3D &mem_noc SLAVE_EBI1>; + zap_shader: zap-shader { memory-region = <&gpu_mem>; }; @@ -3250,36 +3252,43 @@ opp-71000 { opp-hz = /bits/ 64 <71000>; opp-level = ; + opp-peak-kBps = <7216000>; }; opp-67500 { opp-hz = /bits/ 64 <67500>; opp-level = ; + opp-peak-kBps = <7216000>; }; opp-59600 { opp-hz = /bits/ 64 <59600>; opp-level = ; + opp-peak-kBps = <622>; }; opp-52000 { opp-hz = /bits/ 64 <52000>; opp-level = ; + opp-peak-kBps = <622>; }; opp-41400 { opp-hz = /bits/ 64 <41400>; opp-level = ; + opp-peak-kBps = <4068000>; }; opp-34200 { opp-hz = /bits/ 64 <34200>; opp-level = ; + opp-peak-kBps = <2724000>; }; opp-25700 { opp-hz = /bits/ 64 <25700>; opp-level = ; + opp-peak-kBps = <1648000>; }; }; }; -- 2.7.4
[PATCH v3 1/6] dt-bindings: drm/msm/gpu: Document gpu opp table
Update documentation to list the gpu opp table bindings including the newly added "opp-peak-kBps" needed for GPU-DDR bandwidth scaling. Signed-off-by: Sharat Masetty Acked-by: Rob Herring --- .../devicetree/bindings/display/msm/gpu.txt| 28 ++ 1 file changed, 28 insertions(+) diff --git a/Documentation/devicetree/bindings/display/msm/gpu.txt b/Documentation/devicetree/bindings/display/msm/gpu.txt index 70025cb..48bd4ab 100644 --- a/Documentation/devicetree/bindings/display/msm/gpu.txt +++ b/Documentation/devicetree/bindings/display/msm/gpu.txt @@ -79,6 +79,34 @@ Example a6xx (with GMU): interconnects = <&rsc_hlos MASTER_GFX3D &rsc_hlos SLAVE_EBI1>; + gpu_opp_table: opp-table { + compatible = "operating-points-v2"; + + opp-43000 { + opp-hz = /bits/ 64 <43000>; + opp-level = ; + opp-peak-kBps = <5412000>; + }; + + opp-35500 { + opp-hz = /bits/ 64 <35500>; + opp-level = ; + opp-peak-kBps = <3072000>; + }; + + opp-26700 { + opp-hz = /bits/ 64 <26700>; + opp-level = ; + opp-peak-kBps = <3072000>; + }; + + opp-18000 { + opp-hz = /bits/ 64 <18000>; + opp-level = ; + opp-peak-kBps = <1804000>; + }; + }; + qcom,gmu = <&gmu>; zap-shader { -- 2.7.4
[PATCH v3 2/6] drm: msm: a6xx: send opp instead of a frequency
This patch changes the plumbing to send the devfreq recommended opp rather than the frequency. Also consolidate and rearrange the code in a6xx to set the GPU frequency and the icc vote in preparation for the upcoming changes for GPU->DDR scaling votes. Signed-off-by: Sharat Masetty --- drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 62 +++ drivers/gpu/drm/msm/adreno/a6xx_gpu.h | 2 +- drivers/gpu/drm/msm/msm_gpu.c | 3 +- drivers/gpu/drm/msm/msm_gpu.h | 3 +- 4 files changed, 38 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c index 748cd37..2d8124b 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -100,17 +100,30 @@ bool a6xx_gmu_gx_is_on(struct a6xx_gmu *gmu) A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_GX_HM_CLK_OFF)); } -static void __a6xx_gmu_set_freq(struct a6xx_gmu *gmu, int index) +void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp) { - struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu); - struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; - struct msm_gpu *gpu = &adreno_gpu->base; - int ret; + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + struct a6xx_gmu *gmu = &a6xx_gpu->gmu; + u32 perf_index; + unsigned long gpu_freq; + int ret = 0; + + gpu_freq = dev_pm_opp_get_freq(opp); + + if (gpu_freq == gmu->freq) + return; + + for (perf_index = 0; perf_index < gmu->nr_gpu_freqs - 1; perf_index++) + if (gpu_freq == gmu->gpu_freqs[perf_index]) + break; + + gmu->current_perf_index = perf_index; gmu_write(gmu, REG_A6XX_GMU_DCVS_ACK_OPTION, 0); gmu_write(gmu, REG_A6XX_GMU_DCVS_PERF_SETTING, - ((3 & 0xf) << 28) | index); + ((3 & 0xf) << 28) | perf_index); /* * Send an invalid index as a vote for the bus bandwidth and let the @@ -126,7 +139,7 @@ static void __a6xx_gmu_set_freq(struct a6xx_gmu *gmu, int index) if (ret) dev_err(gmu->dev, "GMU set GPU frequency error: %d\n", ret); - gmu->freq = gmu->gpu_freqs[index]; + gmu->freq = gmu->gpu_freqs[perf_index]; /* * Eventually we will want to scale the path vote with the frequency but @@ -135,25 +148,6 @@ static void __a6xx_gmu_set_freq(struct a6xx_gmu *gmu, int index) icc_set_bw(gpu->icc_path, 0, MBps_to_icc(7216)); } -void a6xx_gmu_set_freq(struct msm_gpu *gpu, unsigned long freq) -{ - struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); - struct a6xx_gmu *gmu = &a6xx_gpu->gmu; - u32 perf_index = 0; - - if (freq == gmu->freq) - return; - - for (perf_index = 0; perf_index < gmu->nr_gpu_freqs - 1; perf_index++) - if (freq == gmu->gpu_freqs[perf_index]) - break; - - gmu->current_perf_index = perf_index; - - __a6xx_gmu_set_freq(gmu, perf_index); -} - unsigned long a6xx_gmu_get_freq(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); @@ -708,6 +702,19 @@ static void a6xx_gmu_force_off(struct a6xx_gmu *gmu) a6xx_gmu_rpmh_off(gmu); } +static void a6xx_gmu_set_initial_freq(struct msm_gpu *gpu, struct a6xx_gmu *gmu) +{ + struct dev_pm_opp *gpu_opp; + unsigned long gpu_freq = gmu->gpu_freqs[gmu->current_perf_index]; + + gpu_opp = dev_pm_opp_find_freq_exact(&gpu->pdev->dev, gpu_freq, true); + if (IS_ERR_OR_NULL(gpu_opp)) + return; + + a6xx_gmu_set_freq(gpu, gpu_opp); + dev_pm_opp_put(gpu_opp); +} + int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu) { struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; @@ -759,8 +766,7 @@ int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu) gmu_write(gmu, REG_A6XX_GMU_GMU2HOST_INTR_MASK, ~A6XX_HFI_IRQ_MASK); enable_irq(gmu->hfi_irq); - /* Set the GPU to the current freq */ - __a6xx_gmu_set_freq(gmu, gmu->current_perf_index); + a6xx_gmu_set_initial_freq(gpu, gmu); /* * "enable" the GX power domain which won't actually do anything but it diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h index 7239b8b..03ba60d 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h @@ -63,7 +63,7 @@ void a6xx_gmu_clear_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state); int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node); void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu)
[PATCH v3 6/6] arm64: dts: qcom: sc7180: Add opp-peak-kBps to GPU opp
Add opp-peak-kBps bindings to the GPU opp table, listing the peak GPU -> DDR bandwidth requirement for each opp level. This will be used to scale the DDR bandwidth along with the GPU frequency dynamically. Signed-off-by: Sharat Masetty Reviewed-by: Matthias Kaehlcke --- arch/arm64/boot/dts/qcom/sc7180.dtsi | 7 +++ 1 file changed, 7 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi b/arch/arm64/boot/dts/qcom/sc7180.dtsi index 34004ad..7bef42b 100644 --- a/arch/arm64/boot/dts/qcom/sc7180.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi @@ -1505,36 +1505,43 @@ opp-8 { opp-hz = /bits/ 64 <8>; opp-level = ; + opp-peak-kBps = <8532000>; }; opp-65000 { opp-hz = /bits/ 64 <65000>; opp-level = ; + opp-peak-kBps = <7216000>; }; opp-56500 { opp-hz = /bits/ 64 <56500>; opp-level = ; + opp-peak-kBps = <5412000>; }; opp-43000 { opp-hz = /bits/ 64 <43000>; opp-level = ; + opp-peak-kBps = <5412000>; }; opp-35500 { opp-hz = /bits/ 64 <35500>; opp-level = ; + opp-peak-kBps = <3072000>; }; opp-26700 { opp-hz = /bits/ 64 <26700>; opp-level = ; + opp-peak-kBps = <3072000>; }; opp-18000 { opp-hz = /bits/ 64 <18000>; opp-level = ; + opp-peak-kBps = <1804000>; }; }; }; -- 2.7.4
[PATCH v3 3/6] drm: msm: a6xx: use dev_pm_opp_set_bw to scale DDR
This patches replaces the previously used static DDR vote and uses dev_pm_opp_set_bw() to scale GPU->DDR bandwidth along with scaling GPU frequency. Also since the icc path voting is handled completely in the opp driver, remove the icc_path handle and its usage in the drm driver. Signed-off-by: Sharat Masetty --- drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 23 --- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 8 drivers/gpu/drm/msm/msm_gpu.h | 2 -- 3 files changed, 16 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c index 2d8124b..1dd8fc5 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -141,11 +141,7 @@ void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp) gmu->freq = gmu->gpu_freqs[perf_index]; - /* -* Eventually we will want to scale the path vote with the frequency but -* for now leave it at max so that the performance is nominal. -*/ - icc_set_bw(gpu->icc_path, 0, MBps_to_icc(7216)); + dev_pm_opp_set_bw(&gpu->pdev->dev, opp); } unsigned long a6xx_gmu_get_freq(struct msm_gpu *gpu) @@ -715,6 +711,19 @@ static void a6xx_gmu_set_initial_freq(struct msm_gpu *gpu, struct a6xx_gmu *gmu) dev_pm_opp_put(gpu_opp); } +static void a6xx_gmu_set_initial_bw(struct msm_gpu *gpu, struct a6xx_gmu *gmu) +{ + struct dev_pm_opp *gpu_opp; + unsigned long gpu_freq = gmu->gpu_freqs[gmu->current_perf_index]; + + gpu_opp = dev_pm_opp_find_freq_exact(&gpu->pdev->dev, gpu_freq, true); + if (IS_ERR_OR_NULL(gpu_opp)) + return; + + dev_pm_opp_set_bw(&gpu->pdev->dev, gpu_opp); + dev_pm_opp_put(gpu_opp); +} + int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu) { struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; @@ -739,7 +748,7 @@ int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu) } /* Set the bus quota to a reasonable value for boot */ - icc_set_bw(gpu->icc_path, 0, MBps_to_icc(3072)); + a6xx_gmu_set_initial_bw(gpu, gmu); /* Enable the GMU interrupt */ gmu_write(gmu, REG_A6XX_GMU_AO_HOST_INTERRUPT_CLR, ~0); @@ -907,7 +916,7 @@ int a6xx_gmu_stop(struct a6xx_gpu *a6xx_gpu) a6xx_gmu_shutdown(gmu); /* Remove the bus vote */ - icc_set_bw(gpu->icc_path, 0, 0); + dev_pm_opp_set_bw(&gpu->pdev->dev, NULL); /* * Make sure the GX domain is off before turning off the GMU (CX) diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 2d13694..718c705 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -913,11 +913,6 @@ static int adreno_get_pwrlevels(struct device *dev, DBG("fast_rate=%u, slow_rate=2700", gpu->fast_rate); - /* Check for an interconnect path for the bus */ - gpu->icc_path = of_icc_get(dev, NULL); - if (IS_ERR(gpu->icc_path)) - gpu->icc_path = NULL; - return 0; } @@ -958,13 +953,10 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, void adreno_gpu_cleanup(struct adreno_gpu *adreno_gpu) { - struct msm_gpu *gpu = &adreno_gpu->base; unsigned int i; for (i = 0; i < ARRAY_SIZE(adreno_gpu->info->fw); i++) release_firmware(adreno_gpu->fw[i]); - icc_put(gpu->icc_path); - msm_gpu_cleanup(&adreno_gpu->base); } diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index cf0dc6d..c7d74a9 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -112,8 +112,6 @@ struct msm_gpu { struct clk *ebi1_clk, *core_clk, *rbbmtimer_clk; uint32_t fast_rate; - struct icc_path *icc_path; - /* Hang and Inactivity Detection: */ #define DRM_MSM_INACTIVE_PERIOD 66 /* in ms (roughly four frames) */ -- 2.7.4
[PATCH v3 5/6] arm64: dts: qcom: sc7180: Add interconnects property for GPU
This patch adds the interconnects property to the GPU node. This enables the GPU->DDR path bandwidth voting. Signed-off-by: Sharat Masetty --- arch/arm64/boot/dts/qcom/sc7180.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi b/arch/arm64/boot/dts/qcom/sc7180.dtsi index eaede5e..34004ad 100644 --- a/arch/arm64/boot/dts/qcom/sc7180.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi @@ -1497,6 +1497,8 @@ operating-points-v2 = <&gpu_opp_table>; qcom,gmu = <&gmu>; + interconnects = <&gem_noc MASTER_GFX3D &mc_virt SLAVE_EBI1>; + gpu_opp_table: opp-table { compatible = "operating-points-v2"; -- 2.7.4
[PATCH v3 0/6] Add support for GPU DDR BW scaling
This is a respin of [1]. Incorported review feedback and fixed issues observed during testing. Picked up the Georgi's series from opp/linux-next [2], and this series is also dependent on a helper function needed to set and clear ddr bandwidth vote [3]. Patch number 4 in the series adds support for SDM845 as well but its not tested yet(WIP), but the SC7180 patches are well tested now. [1] https://patchwork.freedesktop.org/series/75291/ [2] https://kernel.googlesource.com/pub/scm/linux/kernel/git/vireshk/pm/+log/opp/linux-next/ [3] https://patchwork.kernel.org/patch/11590563/ Sharat Masetty (6): dt-bindings: drm/msm/gpu: Document gpu opp table drm: msm: a6xx: send opp instead of a frequency drm: msm: a6xx: use dev_pm_opp_set_bw to scale DDR arm64: dts: qcom: SDM845: Enable GPU DDR bw scaling arm64: dts: qcom: sc7180: Add interconnects property for GPU arm64: dts: qcom: sc7180: Add opp-peak-kBps to GPU opp .../devicetree/bindings/display/msm/gpu.txt| 28 +++ arch/arm64/boot/dts/qcom/sc7180.dtsi | 9 +++ arch/arm64/boot/dts/qcom/sdm845.dtsi | 9 +++ drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 85 +- drivers/gpu/drm/msm/adreno/a6xx_gpu.h | 2 +- drivers/gpu/drm/msm/adreno/adreno_gpu.c| 8 -- drivers/gpu/drm/msm/msm_gpu.c | 3 +- drivers/gpu/drm/msm/msm_gpu.h | 5 +- 8 files changed, 100 insertions(+), 49 deletions(-) -- 2.7.4
Re: [Freedreno] [PATCH 5/6] drm: msm: a6xx: use dev_pm_opp_set_bw to set DDR bandwidth
On 5/27/2020 9:08 PM, Rob Clark wrote: On Wed, May 27, 2020 at 1:47 AM Sharat Masetty wrote: + more folks On 5/18/2020 9:55 PM, Rob Clark wrote: On Mon, May 18, 2020 at 7:23 AM Jordan Crouse wrote: On Thu, May 14, 2020 at 04:24:18PM +0530, Sharat Masetty wrote: This patches replaces the previously used static DDR vote and uses dev_pm_opp_set_bw() to scale GPU->DDR bandwidth along with scaling GPU frequency. Signed-off-by: Sharat Masetty --- drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 6 +- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c index 2d8124b..79433d3 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -141,11 +141,7 @@ void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp) gmu->freq = gmu->gpu_freqs[perf_index]; - /* - * Eventually we will want to scale the path vote with the frequency but - * for now leave it at max so that the performance is nominal. - */ - icc_set_bw(gpu->icc_path, 0, MBps_to_icc(7216)); + dev_pm_opp_set_bw(&gpu->pdev->dev, opp); } This adds an implicit requirement that all targets need bandwidth settings defined in the OPP or they won't get a bus vote at all. I would prefer that there be an default escape valve but if not you'll need to add bandwidth values for the sdm845 OPP that target doesn't regress. it looks like we could maybe do something like: ret = dev_pm_opp_set_bw(...); if (ret) { dev_warn_once(dev, "no bandwidth settings"); icc_set_bw(...); } ? BR, -R There is a bit of an issue here - Looks like its not possible to two icc handles to the same path. Its causing double enumeration of the paths in the icc core and messing up path votes. With [1] Since opp/core already gets a handle to the icc path as part of table add, drm/msm could do either a) Conditionally enumerate gpu->icc_path handle only when pm/opp core has not got the icc path handle. I could use something like [2] to determine if should initialize gpu->icc_path* b) Add peak-opp-configs in 845 dt and mandate all future versions to use this bindings. With this, I can remove gpu->icc_path from msm/drm completely and only rely on opp/core for bw voting. The main thing is that we want to make sure newer dtb always works on an older kernel without regression.. but, hmm.. I guess the interconnects/interconnects-names properties haven't landed yet in sdm845.dtsi? Maybe that lets us go with the simpler approach (b). Looks like we haven't wired up interconnect for 8916 or 8996 either, so probably we can just mandate this for all of them? I checked all three 845, 820 and 8916 and none of them have the interconnect configs for GPU. So, I think we are good here. I'll go with option (b) and re-spin v3. Adding interconnects and opp-peak-kBps configs for previous chips can be taken up as a separate activity. Sharat If we have landed the interconnect dts hookup for gpu somewhere that I'm overlooking, I guess we would have to go with (a) and keep the existing interconnects/interconnects-names properties. BR, -R [1] - https://lore.kernel.org/patchwork/cover/1240687/ [2] - https://patchwork.kernel.org/patch/11527573/ Let me know your thoughts Sharat Jordan unsigned long a6xx_gmu_get_freq(struct msm_gpu *gpu) -- 2.7.4 -- The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project ___ Freedreno mailing list freedr...@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/freedreno
Re: [Freedreno] [PATCH 5/6] drm: msm: a6xx: use dev_pm_opp_set_bw to set DDR bandwidth
+ more folks On 5/18/2020 9:55 PM, Rob Clark wrote: On Mon, May 18, 2020 at 7:23 AM Jordan Crouse wrote: On Thu, May 14, 2020 at 04:24:18PM +0530, Sharat Masetty wrote: This patches replaces the previously used static DDR vote and uses dev_pm_opp_set_bw() to scale GPU->DDR bandwidth along with scaling GPU frequency. Signed-off-by: Sharat Masetty --- drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 6 +- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c index 2d8124b..79433d3 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -141,11 +141,7 @@ void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp) gmu->freq = gmu->gpu_freqs[perf_index]; - /* - * Eventually we will want to scale the path vote with the frequency but - * for now leave it at max so that the performance is nominal. - */ - icc_set_bw(gpu->icc_path, 0, MBps_to_icc(7216)); + dev_pm_opp_set_bw(&gpu->pdev->dev, opp); } This adds an implicit requirement that all targets need bandwidth settings defined in the OPP or they won't get a bus vote at all. I would prefer that there be an default escape valve but if not you'll need to add bandwidth values for the sdm845 OPP that target doesn't regress. it looks like we could maybe do something like: ret = dev_pm_opp_set_bw(...); if (ret) { dev_warn_once(dev, "no bandwidth settings"); icc_set_bw(...); } ? BR, -R There is a bit of an issue here - Looks like its not possible to two icc handles to the same path. Its causing double enumeration of the paths in the icc core and messing up path votes. With [1] Since opp/core already gets a handle to the icc path as part of table add, drm/msm could do either a) Conditionally enumerate gpu->icc_path handle only when pm/opp core has not got the icc path handle. I could use something like [2] to determine if should initialize gpu->icc_path* b) Add peak-opp-configs in 845 dt and mandate all future versions to use this bindings. With this, I can remove gpu->icc_path from msm/drm completely and only rely on opp/core for bw voting. [1] - https://lore.kernel.org/patchwork/cover/1240687/ [2] - https://patchwork.kernel.org/patch/11527573/ Let me know your thoughts Sharat Jordan unsigned long a6xx_gmu_get_freq(struct msm_gpu *gpu) -- 2.7.4 -- The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project ___ Freedreno mailing list freedr...@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/freedreno
[PATCH 5/6] drm: msm: a6xx: use dev_pm_opp_set_bw to set DDR bandwidth
This patches replaces the previously used static DDR vote and uses dev_pm_opp_set_bw() to scale GPU->DDR bandwidth along with scaling GPU frequency. Signed-off-by: Sharat Masetty --- drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 6 +- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c index 2d8124b..79433d3 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -141,11 +141,7 @@ void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp) gmu->freq = gmu->gpu_freqs[perf_index]; - /* -* Eventually we will want to scale the path vote with the frequency but -* for now leave it at max so that the performance is nominal. -*/ - icc_set_bw(gpu->icc_path, 0, MBps_to_icc(7216)); + dev_pm_opp_set_bw(&gpu->pdev->dev, opp); } unsigned long a6xx_gmu_get_freq(struct msm_gpu *gpu) -- 2.7.4
[PATCH 4/6] drm: msm: a6xx: send opp instead of a frequency
This patch changes the plumbing to send the devfreq recommended opp rather than the frequency. Also consolidate and rearrange the code in a6xx to set the GPU frequency and the icc vote in preparation for the upcoming changes for GPU->DDR scaling votes. Signed-off-by: Sharat Masetty --- drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 62 +++ drivers/gpu/drm/msm/adreno/a6xx_gpu.h | 2 +- drivers/gpu/drm/msm/msm_gpu.c | 3 +- drivers/gpu/drm/msm/msm_gpu.h | 3 +- 4 files changed, 38 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c index 748cd37..2d8124b 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -100,17 +100,30 @@ bool a6xx_gmu_gx_is_on(struct a6xx_gmu *gmu) A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_GX_HM_CLK_OFF)); } -static void __a6xx_gmu_set_freq(struct a6xx_gmu *gmu, int index) +void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp) { - struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu); - struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; - struct msm_gpu *gpu = &adreno_gpu->base; - int ret; + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + struct a6xx_gmu *gmu = &a6xx_gpu->gmu; + u32 perf_index; + unsigned long gpu_freq; + int ret = 0; + + gpu_freq = dev_pm_opp_get_freq(opp); + + if (gpu_freq == gmu->freq) + return; + + for (perf_index = 0; perf_index < gmu->nr_gpu_freqs - 1; perf_index++) + if (gpu_freq == gmu->gpu_freqs[perf_index]) + break; + + gmu->current_perf_index = perf_index; gmu_write(gmu, REG_A6XX_GMU_DCVS_ACK_OPTION, 0); gmu_write(gmu, REG_A6XX_GMU_DCVS_PERF_SETTING, - ((3 & 0xf) << 28) | index); + ((3 & 0xf) << 28) | perf_index); /* * Send an invalid index as a vote for the bus bandwidth and let the @@ -126,7 +139,7 @@ static void __a6xx_gmu_set_freq(struct a6xx_gmu *gmu, int index) if (ret) dev_err(gmu->dev, "GMU set GPU frequency error: %d\n", ret); - gmu->freq = gmu->gpu_freqs[index]; + gmu->freq = gmu->gpu_freqs[perf_index]; /* * Eventually we will want to scale the path vote with the frequency but @@ -135,25 +148,6 @@ static void __a6xx_gmu_set_freq(struct a6xx_gmu *gmu, int index) icc_set_bw(gpu->icc_path, 0, MBps_to_icc(7216)); } -void a6xx_gmu_set_freq(struct msm_gpu *gpu, unsigned long freq) -{ - struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); - struct a6xx_gmu *gmu = &a6xx_gpu->gmu; - u32 perf_index = 0; - - if (freq == gmu->freq) - return; - - for (perf_index = 0; perf_index < gmu->nr_gpu_freqs - 1; perf_index++) - if (freq == gmu->gpu_freqs[perf_index]) - break; - - gmu->current_perf_index = perf_index; - - __a6xx_gmu_set_freq(gmu, perf_index); -} - unsigned long a6xx_gmu_get_freq(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); @@ -708,6 +702,19 @@ static void a6xx_gmu_force_off(struct a6xx_gmu *gmu) a6xx_gmu_rpmh_off(gmu); } +static void a6xx_gmu_set_initial_freq(struct msm_gpu *gpu, struct a6xx_gmu *gmu) +{ + struct dev_pm_opp *gpu_opp; + unsigned long gpu_freq = gmu->gpu_freqs[gmu->current_perf_index]; + + gpu_opp = dev_pm_opp_find_freq_exact(&gpu->pdev->dev, gpu_freq, true); + if (IS_ERR_OR_NULL(gpu_opp)) + return; + + a6xx_gmu_set_freq(gpu, gpu_opp); + dev_pm_opp_put(gpu_opp); +} + int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu) { struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; @@ -759,8 +766,7 @@ int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu) gmu_write(gmu, REG_A6XX_GMU_GMU2HOST_INTR_MASK, ~A6XX_HFI_IRQ_MASK); enable_irq(gmu->hfi_irq); - /* Set the GPU to the current freq */ - __a6xx_gmu_set_freq(gmu, gmu->current_perf_index); + a6xx_gmu_set_initial_freq(gpu, gmu); /* * "enable" the GX power domain which won't actually do anything but it diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h index 7239b8b..03ba60d 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h @@ -63,7 +63,7 @@ void a6xx_gmu_clear_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state); int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node); void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu)
[PATCH 2/6] arm64: dts: qcom: sc7180: Add opp-peak-kBps to GPU opp
Add opp-peak-kBps bindings to the GPU opp table, listing the peak GPU -> DDR bandwidth requirement for each opp level. This will be used to scale the DDR bandwidth along with the GPU frequency dynamically. Signed-off-by: Sharat Masetty --- arch/arm64/boot/dts/qcom/sc7180.dtsi | 7 +++ 1 file changed, 7 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi b/arch/arm64/boot/dts/qcom/sc7180.dtsi index 0ce9921..89f7767 100644 --- a/arch/arm64/boot/dts/qcom/sc7180.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi @@ -1392,36 +1392,43 @@ opp-8 { opp-hz = /bits/ 64 <8>; opp-level = ; + opp-peak-kBps = <8532000>; }; opp-65000 { opp-hz = /bits/ 64 <65000>; opp-level = ; + opp-peak-kBps = <7216000>; }; opp-56500 { opp-hz = /bits/ 64 <56500>; opp-level = ; + opp-peak-kBps = <5412000>; }; opp-43000 { opp-hz = /bits/ 64 <43000>; opp-level = ; + opp-peak-kBps = <5412000>; }; opp-35500 { opp-hz = /bits/ 64 <35500>; opp-level = ; + opp-peak-kBps = <3072000>; }; opp-26700 { opp-hz = /bits/ 64 <26700>; opp-level = ; + opp-peak-kBps = <3072000>; }; opp-18000 { opp-hz = /bits/ 64 <18000>; opp-level = ; + opp-peak-kBps = <1804000>; }; }; }; -- 2.7.4
[PATCH 0/6] Add support for GPU DDR BW scaling
This is a rework of my previous series [1], but this time based on the bindings from Georgi [2] + a few fixes which look to be fixed in v8 of Georgi's series [3]. The work is based on the chromeOS tip. [1]: https://patchwork.freedesktop.org/series/75291/ [2]: https://lore.kernel.org/patchwork/cover/1230626/ [3]: https://lore.kernel.org/patchwork/cover/1240687/ Sharat Masetty (5): arm64: dts: qcom: sc7180: Add interconnect bindings for GPU arm64: dts: qcom: sc7180: Add opp-peak-kBps to GPU opp drm: msm: a6xx: send opp instead of a frequency drm: msm: a6xx: use dev_pm_opp_set_bw to set DDR bandwidth dt-bindings: drm/msm/gpu: Document gpu opp table Sibi Sankar (1): OPP: Add and export helper to set bandwidth .../devicetree/bindings/display/msm/gpu.txt| 28 + arch/arm64/boot/dts/qcom/sc7180.dtsi | 9 +++ drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 68 +++--- drivers/gpu/drm/msm/adreno/a6xx_gpu.h | 2 +- drivers/gpu/drm/msm/msm_gpu.c | 3 +- drivers/gpu/drm/msm/msm_gpu.h | 3 +- drivers/opp/core.c | 43 ++ include/linux/pm_opp.h | 6 ++ 8 files changed, 125 insertions(+), 37 deletions(-) -- 2.7.4
[PATCH 6/6] dt-bindings: drm/msm/gpu: Document gpu opp table
Update documentation to list the gpu opp table bindings including the newly added "opp-peak-kBps" needed for GPU-DDR bandwidth scaling. Signed-off-by: Sharat Masetty --- .../devicetree/bindings/display/msm/gpu.txt| 28 ++ 1 file changed, 28 insertions(+) diff --git a/Documentation/devicetree/bindings/display/msm/gpu.txt b/Documentation/devicetree/bindings/display/msm/gpu.txt index 70025cb..48bd4ab 100644 --- a/Documentation/devicetree/bindings/display/msm/gpu.txt +++ b/Documentation/devicetree/bindings/display/msm/gpu.txt @@ -79,6 +79,34 @@ Example a6xx (with GMU): interconnects = <&rsc_hlos MASTER_GFX3D &rsc_hlos SLAVE_EBI1>; + gpu_opp_table: opp-table { + compatible = "operating-points-v2"; + + opp-43000 { + opp-hz = /bits/ 64 <43000>; + opp-level = ; + opp-peak-kBps = <5412000>; + }; + + opp-35500 { + opp-hz = /bits/ 64 <35500>; + opp-level = ; + opp-peak-kBps = <3072000>; + }; + + opp-26700 { + opp-hz = /bits/ 64 <26700>; + opp-level = ; + opp-peak-kBps = <3072000>; + }; + + opp-18000 { + opp-hz = /bits/ 64 <18000>; + opp-level = ; + opp-peak-kBps = <1804000>; + }; + }; + qcom,gmu = <&gmu>; zap-shader { -- 2.7.4
[PATCH 1/6] arm64: dts: qcom: sc7180: Add interconnect bindings for GPU
This patch adds the interconnect bindings to the GPU node. This enables the GPU->DDR path bandwidth voting. Signed-off-by: Sharat Masetty --- arch/arm64/boot/dts/qcom/sc7180.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi b/arch/arm64/boot/dts/qcom/sc7180.dtsi index b46ee78..0ce9921 100644 --- a/arch/arm64/boot/dts/qcom/sc7180.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi @@ -1384,6 +1384,8 @@ operating-points-v2 = <&gpu_opp_table>; qcom,gmu = <&gmu>; + interconnects = <&gem_noc MASTER_GFX3D &mc_virt SLAVE_EBI1>; + gpu_opp_table: opp-table { compatible = "operating-points-v2"; -- 2.7.4
[PATCH 3/6] OPP: Add and export helper to set bandwidth
From: Sibi Sankar Add and export 'dev_pm_opp_set_bw' to set the bandwidth levels associated with an OPP for a given frequency. Signed-off-by: Sibi Sankar Signed-off-by: Sharat Masetty --- drivers/opp/core.c | 43 +++ include/linux/pm_opp.h | 6 ++ 2 files changed, 49 insertions(+) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index f42b7c4..0f34077 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -828,6 +828,49 @@ static int _set_required_opps(struct device *dev, } /** + * dev_pm_opp_set_bw() - sets bandwidth levels corresponding to an available opp + * @dev: device for which we do this operation + * @opp: opp based on which the bandwidth levels are to be configured + * + * This configures the bandwidth to the levels specified + * by the OPP. + * + * Return: 0 on success or a negative error value. + */ +int dev_pm_opp_set_bw(struct device *dev, struct dev_pm_opp *opp) +{ + struct opp_table *opp_table; + int ret = -EINVAL; + int i; + + if (IS_ERR_OR_NULL(opp) || !opp->available) { + dev_err(dev, "%s: Invalid parameters\n", __func__); + return -EINVAL; + } + + opp_table = _find_opp_table(dev); + if (IS_ERR(opp_table)) { + dev_err(dev, "%s: device opp table doesn't exist\n", __func__); + return PTR_ERR(opp_table); + } + + if (opp_table->paths) { + for (i = 0; i < opp_table->path_count; i++) { + ret = icc_set_bw(opp_table->paths[i], +opp->bandwidth[i].avg, +opp->bandwidth[i].peak); + if (ret) + dev_err(dev, "Failed to set bandwidth[%d]: %d\n", + i, ret); + } + } + + dev_pm_opp_put_opp_table(opp_table); + return ret; +} +EXPORT_SYMBOL_GPL(dev_pm_opp_set_bw); + +/** * dev_pm_opp_set_rate() - Configure new OPP based on frequency * @dev:device for which we do this operation * @target_freq: frequency to achieve diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 76f8c6b..04f7fda 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -156,6 +156,7 @@ struct dev_pm_opp *dev_pm_opp_xlate_opp(struct opp_table *src_table, struct opp_table *dst_table, struct dev_pm_opp *src_opp); int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq); +int dev_pm_opp_set_bw(struct device *dev, struct dev_pm_opp *opp); int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const struct cpumask *cpumask); int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask); void dev_pm_opp_remove_table(struct device *dev); @@ -354,6 +355,11 @@ static inline int dev_pm_opp_set_rate(struct device *dev, unsigned long target_f return -ENOTSUPP; } +static inline int dev_pm_opp_set_bw(struct device *dev, struct dev_pm_opp *opp) +{ + return -ENOTSUPP; +} + static inline int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const struct cpumask *cpumask) { return -ENOTSUPP; -- 2.7.4
[PATCH 2/2] dt-bindings: arm-smmu: Add sc7180 compatible string
This patch simply adds a new compatible string for SC7180 platform. Signed-off-by: Sharat Masetty --- Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index 6515dbe..986098b 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -28,6 +28,7 @@ properties: - enum: - qcom,msm8996-smmu-v2 - qcom,msm8998-smmu-v2 + - qcom,sc7180-smmu-v2 - qcom,sdm845-smmu-v2 - const: qcom,smmu-v2 -- 1.9.1
[PATCH 1/2] arm64: dts: qcom: sc7180: Add A618 gpu dt blob
This patch adds the required dt nodes and properties to enabled A618 GPU. Signed-off-by: Sharat Masetty --- * Remove GCC_DDRSS_GPU_AXI_CLK clock reference from gpu smmu node. arch/arm64/boot/dts/qcom/sc7180.dtsi | 102 +++ 1 file changed, 102 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi b/arch/arm64/boot/dts/qcom/sc7180.dtsi index 4216b57..de9a054 100644 --- a/arch/arm64/boot/dts/qcom/sc7180.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi @@ -1373,6 +1373,108 @@ }; }; + gpu: gpu@500 { + compatible = "qcom,adreno-618.0", "qcom,adreno"; + #stream-id-cells = <16>; + reg = <0 0x0500 0 0x4>, <0 0x0509e000 0 0x1000>, + <0 0x05061000 0 0x800>; + reg-names = "kgsl_3d0_reg_memory", "cx_mem", "cx_dbgc"; + interrupts = ; + iommus = <&adreno_smmu 0>; + operating-points-v2 = <&gpu_opp_table>; + qcom,gmu = <&gmu>; + + gpu_opp_table: opp-table { + compatible = "operating-points-v2"; + + opp-8 { + opp-hz = /bits/ 64 <8>; + opp-level = ; + }; + + opp-65000 { + opp-hz = /bits/ 64 <65000>; + opp-level = ; + }; + + opp-56500 { + opp-hz = /bits/ 64 <56500>; + opp-level = ; + }; + + opp-43000 { + opp-hz = /bits/ 64 <43000>; + opp-level = ; + }; + + opp-35500 { + opp-hz = /bits/ 64 <35500>; + opp-level = ; + }; + + opp-26700 { + opp-hz = /bits/ 64 <26700>; + opp-level = ; + }; + + opp-18000 { + opp-hz = /bits/ 64 <18000>; + opp-level = ; + }; + }; + }; + + adreno_smmu: iommu@504 { + compatible = "qcom,sc7180-smmu-v2", "qcom,smmu-v2"; + reg = <0 0x0504 0 0x1>; + #iommu-cells = <1>; + #global-interrupts = <2>; + interrupts = , + , + , + , + , + , + , + , + , + ; + + clocks = <&gcc GCC_GPU_MEMNOC_GFX_CLK>, + <&gcc GCC_GPU_CFG_AHB_CLK>; + clock-names = "bus", "iface"; + + power-domains = <&gpucc CX_GDSC>; + }; + + gmu: gmu@506a000 { + compatible="qcom,adreno-gmu-618.0", "qcom,adreno-gmu"; + reg = <0 0x0506a000 0 0x31000>, <0 0x0b29 0 0x1>, + <0 0x0b49 0 0x1>; + reg-names = "gmu", "gmu_pdc", "gmu_pdc_seq"; + interrupts = , + ; + interrupt-names = "hfi", "gmu"; + clocks = <&gpucc GPU_CC_CX_GMU_CLK>, + <&gpucc GPU_CC_CXO_CLK>, + <&gcc GCC_DDRSS_GPU_AXI_CLK>, + <&gcc GCC_GPU_MEMNOC_GFX_CLK>; + clock-names = "gmu", "cxo", "axi", "memnoc"; + power-domains = <&gpucc CX_GDSC>, <&
Re: [Freedreno] [PATCH v2] dt-bindings: arm-smmu: Add sc7180 compatible string and mem_iface clock
On 4/30/2020 11:51 PM, Doug Anderson wrote: Hi, On Thu, Apr 30, 2020 at 11:12 AM Jordan Crouse wrote: On Thu, Apr 30, 2020 at 09:29:47AM +0530, Sharat Masetty wrote: This patch adds a new compatible string for sc7180 and also an additional clock listing needed to power the TBUs and the TCU. Signed-off-by: Sharat Masetty --- v2: Addressed review comments from Doug Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 8 1 file changed, 8 insertions(+) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index 6515dbe..ba5dba4 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -28,6 +28,7 @@ properties: - enum: - qcom,msm8996-smmu-v2 - qcom,msm8998-smmu-v2 + - qcom,sc7180-smmu-v2 - qcom,sdm845-smmu-v2 - const: qcom,smmu-v2 @@ -113,16 +114,23 @@ properties: present in such cases. clock-names: +minItems: 2 +maxItems: 3 items: - const: bus - const: iface + - const: mem_iface Hi Sharat - I think there was a bit of confusion due to renaming between downstream and upstream. Currently for the sdm845 and friends we have: clocks = <&gcc GCC_GPU_MEMNOC_GFX_CLK>, <&gcc GCC_GPU_CFG_AHB_CLK>; clock-names = "bus", "iface"; Confusingly these same clocks downstream are "mem_iface_clk" and "iface_clk" respectively. It looks like you are trying to add GCC_DDRSS_GPU_AXI_CLK as "mem_iface" which was formerly "mem_clk" downstream. I'm not sure if the naming change is intentional or you were trying to make upstream and downstream match and didn't realize that they were renamed. I'm not sure if we need DDRSS_GPU_AXI_CLK or not. Empirically it works without it for sdm845 (I don't have a sc7180 to test) but we should probably loop back with either the clock team or the hardware designers to be sure there isn't a corner case that is missing. I agree with Doug that its always best if we don't need to add a clock. Thanks Jordan and Doug for the updates. My intention was to add the third clock as listed downstream, but as you said the naming is a bit misleading. From the clock GCC_DDRSS_GPU_AXI_CLK description, this is needed for the GPU to DDR access and all transactions to the DDR from the GPU go through the SMMU. It is listed in the SMMU dt node because its needed by SMMU to perform pagetable walks. I think we may be fine by not listing this clock in the SMMU node because the same clock is listed in both the GMU and also the GPU. I can confirm that on sc7180 the GPU seems to come up just fine without the clock being specified in the iommu node. Definitely would be good to know what's broken and if nothing is broken maybe we can change this patch to just add the sc7180 compatible string and drop the clock. I do note that the GMU already has a reference to the same "GCC_DDRSS_GPU_AXI_CLK" clock. -Doug ___ Freedreno mailing list freedr...@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/freedreno
[PATCH v2] dt-bindings: arm-smmu: Add sc7180 compatible string and mem_iface clock
This patch adds a new compatible string for sc7180 and also an additional clock listing needed to power the TBUs and the TCU. Signed-off-by: Sharat Masetty --- v2: Addressed review comments from Doug Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 8 1 file changed, 8 insertions(+) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index 6515dbe..ba5dba4 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -28,6 +28,7 @@ properties: - enum: - qcom,msm8996-smmu-v2 - qcom,msm8998-smmu-v2 + - qcom,sc7180-smmu-v2 - qcom,sdm845-smmu-v2 - const: qcom,smmu-v2 @@ -113,16 +114,23 @@ properties: present in such cases. clock-names: +minItems: 2 +maxItems: 3 items: - const: bus - const: iface + - const: mem_iface clocks: +minItems: 2 +maxItems: 3 items: - description: bus clock required for downstream bus access and for the smmu ptw - description: interface clock required to access smmu's registers through the TCU's programming interface. + - description: clock required for the inner working of SMMU TBUs and the + TCU like the pagetable walks and the TLB flushes. power-domains: maxItems: 1 -- 1.9.1
Re: [PATCH] dt-bindings: arm-smmu: Add a new compatible string and a clock
On 4/29/2020 3:57 AM, Doug Anderson wrote: Hi, On Tue, Apr 28, 2020 at 4:39 AM Sharat Masetty wrote: This patch adds a new compatible string for sc7180 and also an additional clock listing needed to power the TBUs and the TCU. Signed-off-by: Sharat Masetty --- Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 7 +++ 1 file changed, 7 insertions(+) nit: mention sc7180 in subject, like: dt-bindings: arm-smmu: Add sc7180 compatible string and mem_iface clock diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index 6515dbe..15946ac 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -28,6 +28,7 @@ properties: - enum: - qcom,msm8996-smmu-v2 - qcom,msm8998-smmu-v2 + - qcom,sc7180-smmu-v2 - qcom,sdm845-smmu-v2 - const: qcom,smmu-v2 @@ -113,16 +114,22 @@ properties: present in such cases. clock-names: +minItems: 2 +maxItems: 3 items: - const: bus - const: iface + - const: mem_iface_clk People usually frown on clock-names ending in "_clk". Just name it "mem_iface". clocks: +minItems: 2 +maxItems: 3 items: - description: bus clock required for downstream bus access and for the smmu ptw - description: interface clock required to access smmu's registers through the TCU's programming interface. + - description: clock required for the SMMU TBUs and the TCU Is this clock only needed for sc7180, or would it be useful if we enabled certain features on existing devices? Please document exactly when someone would provide this clock and when they'd leave it off. ...also: maybe it's obvious to those that understand IOMMUs in depth, but to me I have no idea what your description means and why it's different from the other two clocks. Any way you could punch up your description a little bit? Looking at sdm845 I see that this clock seems to exist but wasn't listed in the IOMMU device tree node. Is that a mistake on sdm845? ...or is it just fine because the GPU holds the clock? Is there a reason the sdm845 solution and the sc7180 solution shouldn't be the same (AKA we should either add this clock to the sdm845 device tree file or remove it from sc7180)? I went and checked the downstream SDM845 device tree for GPU SMMU and I do see this clock listed on there. I am no expert in SMMU either but my understanding is that this clock is needed for core working of the SMMU like the pagetable walks, TLB invalidations etc, whereas the other two clocks are required to access SMMU register space from the host.My proposal is to add this clock to SDM845 as well as a follow up effort so that we can remove the Min/MaxItems properties which I do not like. @Jordan, do you remember why this clock was added to SDM845? Thanks! -Doug
[PATCH] dt-bindings: arm-smmu: Add a new compatible string and a clock
This patch adds a new compatible string for sc7180 and also an additional clock listing needed to power the TBUs and the TCU. Signed-off-by: Sharat Masetty --- Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 7 +++ 1 file changed, 7 insertions(+) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index 6515dbe..15946ac 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -28,6 +28,7 @@ properties: - enum: - qcom,msm8996-smmu-v2 - qcom,msm8998-smmu-v2 + - qcom,sc7180-smmu-v2 - qcom,sdm845-smmu-v2 - const: qcom,smmu-v2 @@ -113,16 +114,22 @@ properties: present in such cases. clock-names: +minItems: 2 +maxItems: 3 items: - const: bus - const: iface + - const: mem_iface_clk clocks: +minItems: 2 +maxItems: 3 items: - description: bus clock required for downstream bus access and for the smmu ptw - description: interface clock required to access smmu's registers through the TCU's programming interface. + - description: clock required for the SMMU TBUs and the TCU power-domains: maxItems: 1 -- 1.9.1
[PATCH 3/3] drm/msm: Optimize adreno_show_object()
When the userspace tries to read the crashstate dump, the read side implementation in the driver currently ascii85 encodes all the binary buffers and it does this each time the read system call is called. A userspace tool like cat typically does a page by page read and the number of read calls depends on the size of the data captured by the driver. This is certainly not desirable and does not scale well with large captures. This patch encodes the buffer only once in the read path. With this there is an immediate >10X speed improvement in crashstate save time. Signed-off-by: Sharat Masetty --- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 76 - drivers/gpu/drm/msm/msm_gpu.h | 2 + 2 files changed, 58 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index c93702d..e29093e 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -475,34 +475,70 @@ int adreno_gpu_state_put(struct msm_gpu_state *state) #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP) -static void adreno_show_object(struct drm_printer *p, u32 *ptr, int len) +static char *adreno_gpu_ascii85_encode(u32 *src, size_t len) { - char out[ASCII85_BUFSZ]; - long l, datalen, i; + void *buf; + size_t buf_itr = 0; + long i, l; - if (!ptr || !len) - return; + if (!len) + return NULL; + + l = ascii85_encode_len(len); /* -* Only dump the non-zero part of the buffer - rarely will any data -* completely fill the entire allocated size of the buffer +* ascii85 outputs either a 5 byte string or a 1 byte string. So we +* account for the worst case of 5 bytes per dword plus the 1 for '\0' */ - for (datalen = 0, i = 0; i < len >> 2; i++) { - if (ptr[i]) - datalen = (i << 2) + 1; - } + buf = kvmalloc((l * 5) + 1, GFP_KERNEL); + if (!buf) + return NULL; - /* Skip printing the object if it is empty */ - if (datalen == 0) + for (i = 0; i < l; i++) + buf_itr += ascii85_encode_to_buf(src[i], buf + buf_itr); + + return buf; +} + +/* len is expected to be in bytes */ +static void adreno_show_object(struct drm_printer *p, void **ptr, int len, + bool *encoded) +{ + if (!*ptr || !len) return; - l = ascii85_encode_len(datalen); + if (!*encoded) { + long datalen, i; + u32 *buf = *ptr; + + /* +* Only dump the non-zero part of the buffer - rarely will +* any data completely fill the entire allocated size of +* the buffer. +*/ + for (datalen = 0, i = 0; i < len >> 2; i++) { + if (buf[i]) + datalen = ((i + 1) << 2); + } + + /* +* If we reach here, then the originally captured binary buffer +* will be replaced with the ascii85 encoded string +*/ + *ptr = adreno_gpu_ascii85_encode(buf, datalen); + + kvfree(buf); + + *encoded = true; + } + + if (!*ptr) + return; drm_puts(p, "data: !!ascii85 |\n"); drm_puts(p, " "); - for (i = 0; i < l; i++) - drm_puts(p, ascii85_encode(ptr[i], out)); + drm_puts(p, *ptr); drm_puts(p, "\n"); } @@ -534,8 +570,8 @@ void adreno_show(struct msm_gpu *gpu, struct msm_gpu_state *state, drm_printf(p, "wptr: %d\n", state->ring[i].wptr); drm_printf(p, "size: %d\n", MSM_GPU_RINGBUFFER_SZ); - adreno_show_object(p, state->ring[i].data, - state->ring[i].data_size); + adreno_show_object(p, &state->ring[i].data, + state->ring[i].data_size, &state->ring[i].encoded); } if (state->bos) { @@ -546,8 +582,8 @@ void adreno_show(struct msm_gpu *gpu, struct msm_gpu_state *state, state->bos[i].iova); drm_printf(p, "size: %zd\n", state->bos[i].size); - adreno_show_object(p, state->bos[i].data, - state->bos[i].size); + adreno_show_object(p, &state->bos[i].data, + state->bos[i].size, &state->bos[i].encoded); } } diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index f82bac0..efb49bb 100644 --- a/drivers/gpu/drm/msm/msm_
[PATCH] lib/string: Pass the input gfp flags to kmalloc
Pass the user sent gfp flags to kmalloc() calls. This helps calling the functions in user desired contexts. Signed-off-by: Sharat Masetty --- lib/string_helpers.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/string_helpers.c b/lib/string_helpers.c index 29c490e..60f9015 100644 --- a/lib/string_helpers.c +++ b/lib/string_helpers.c @@ -576,7 +576,7 @@ char *kstrdup_quotable_cmdline(struct task_struct *task, gfp_t gfp) char *buffer, *quoted; int i, res; - buffer = kmalloc(PAGE_SIZE, GFP_KERNEL); + buffer = kmalloc(PAGE_SIZE, gfp); if (!buffer) return NULL; @@ -612,7 +612,7 @@ char *kstrdup_quotable_file(struct file *file, gfp_t gfp) return kstrdup("", gfp); /* We add 11 spaces for ' (deleted)' to be appended */ - temp = kmalloc(PATH_MAX + 11, GFP_KERNEL); + temp = kmalloc(PATH_MAX + 11, gfp); if (!temp) return kstrdup("", gfp); -- 1.9.1
Re: [PATCH net-next,v4] hyperv: Add support for virtual Receive Side Scaling (vRSS)
Hi Zhang, How is this beneficial when compared to RPS(receive packet steering)? Can you please provide more details on what this patch does? Thanks Sharat On Mon, Apr 7, 2014 at 12:42 PM, David Miller wrote: > > The net-next tree is not open yet, I will announce when it is and you can > submit > net-next targetted patches. > -- > To unsubscribe from this list: send the line "unsubscribe netdev" in > the body of a message to majord...@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/