[PATCH v3 4/6] arm64: dts: qcom: SDM845: Enable GPU DDR bw scaling

2020-06-05 Thread Sharat Masetty
This patch adds the interconnects property for the gpu node and the
opp-peak-kBps property to the opps of the gpu opp table. This should
help enable DDR bandwidth scaling dynamically and proportionally to the
GPU frequency.

Signed-off-by: Sharat Masetty 
---
 arch/arm64/boot/dts/qcom/sdm845.dtsi | 9 +
 1 file changed, 9 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi 
b/arch/arm64/boot/dts/qcom/sdm845.dtsi
index 11fc3f24..6ea6f54 100644
--- a/arch/arm64/boot/dts/qcom/sdm845.dtsi
+++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi
@@ -3240,6 +3240,8 @@

qcom,gmu = <&gmu>;

+   interconnects = <&mem_noc MASTER_GFX3D &mem_noc 
SLAVE_EBI1>;
+
zap_shader: zap-shader {
memory-region = <&gpu_mem>;
};
@@ -3250,36 +3252,43 @@
opp-71000 {
opp-hz = /bits/ 64 <71000>;
opp-level = 
;
+   opp-peak-kBps = <7216000>;
};

opp-67500 {
opp-hz = /bits/ 64 <67500>;
opp-level = 
;
+   opp-peak-kBps = <7216000>;
};

opp-59600 {
opp-hz = /bits/ 64 <59600>;
opp-level = 
;
+   opp-peak-kBps = <622>;
};

opp-52000 {
opp-hz = /bits/ 64 <52000>;
opp-level = ;
+   opp-peak-kBps = <622>;
};

opp-41400 {
opp-hz = /bits/ 64 <41400>;
opp-level = 
;
+   opp-peak-kBps = <4068000>;
};

opp-34200 {
opp-hz = /bits/ 64 <34200>;
opp-level = ;
+   opp-peak-kBps = <2724000>;
};

opp-25700 {
opp-hz = /bits/ 64 <25700>;
opp-level = 
;
+   opp-peak-kBps = <1648000>;
};
};
};
--
2.7.4



[PATCH v3 1/6] dt-bindings: drm/msm/gpu: Document gpu opp table

2020-06-05 Thread Sharat Masetty
Update documentation to list the gpu opp table bindings including the
newly added "opp-peak-kBps" needed for GPU-DDR bandwidth scaling.

Signed-off-by: Sharat Masetty 
Acked-by: Rob Herring 
---
 .../devicetree/bindings/display/msm/gpu.txt| 28 ++
 1 file changed, 28 insertions(+)

diff --git a/Documentation/devicetree/bindings/display/msm/gpu.txt 
b/Documentation/devicetree/bindings/display/msm/gpu.txt
index 70025cb..48bd4ab 100644
--- a/Documentation/devicetree/bindings/display/msm/gpu.txt
+++ b/Documentation/devicetree/bindings/display/msm/gpu.txt
@@ -79,6 +79,34 @@ Example a6xx (with GMU):

interconnects = <&rsc_hlos MASTER_GFX3D &rsc_hlos SLAVE_EBI1>;

+   gpu_opp_table: opp-table {
+   compatible = "operating-points-v2";
+
+   opp-43000 {
+   opp-hz = /bits/ 64 <43000>;
+   opp-level = ;
+   opp-peak-kBps = <5412000>;
+   };
+
+   opp-35500 {
+   opp-hz = /bits/ 64 <35500>;
+   opp-level = ;
+   opp-peak-kBps = <3072000>;
+   };
+
+   opp-26700 {
+   opp-hz = /bits/ 64 <26700>;
+   opp-level = ;
+   opp-peak-kBps = <3072000>;
+   };
+
+   opp-18000 {
+   opp-hz = /bits/ 64 <18000>;
+   opp-level = ;
+   opp-peak-kBps = <1804000>;
+   };
+   };
+
qcom,gmu = <&gmu>;

zap-shader {
--
2.7.4



[PATCH v3 2/6] drm: msm: a6xx: send opp instead of a frequency

2020-06-05 Thread Sharat Masetty
This patch changes the plumbing to send the devfreq recommended opp rather
than the frequency. Also consolidate and rearrange the code in a6xx to set
the GPU frequency and the icc vote in preparation for the upcoming
changes for GPU->DDR scaling votes.

Signed-off-by: Sharat Masetty 
---
 drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 62 +++
 drivers/gpu/drm/msm/adreno/a6xx_gpu.h |  2 +-
 drivers/gpu/drm/msm/msm_gpu.c |  3 +-
 drivers/gpu/drm/msm/msm_gpu.h |  3 +-
 4 files changed, 38 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index 748cd37..2d8124b 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -100,17 +100,30 @@ bool a6xx_gmu_gx_is_on(struct a6xx_gmu *gmu)
A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_GX_HM_CLK_OFF));
 }

-static void __a6xx_gmu_set_freq(struct a6xx_gmu *gmu, int index)
+void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp)
 {
-   struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
-   struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
-   struct msm_gpu *gpu = &adreno_gpu->base;
-   int ret;
+   struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+   struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
+   struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
+   u32 perf_index;
+   unsigned long gpu_freq;
+   int ret = 0;
+
+   gpu_freq = dev_pm_opp_get_freq(opp);
+
+   if (gpu_freq == gmu->freq)
+   return;
+
+   for (perf_index = 0; perf_index < gmu->nr_gpu_freqs - 1; perf_index++)
+   if (gpu_freq == gmu->gpu_freqs[perf_index])
+   break;
+
+   gmu->current_perf_index = perf_index;

gmu_write(gmu, REG_A6XX_GMU_DCVS_ACK_OPTION, 0);

gmu_write(gmu, REG_A6XX_GMU_DCVS_PERF_SETTING,
-   ((3 & 0xf) << 28) | index);
+   ((3 & 0xf) << 28) | perf_index);

/*
 * Send an invalid index as a vote for the bus bandwidth and let the
@@ -126,7 +139,7 @@ static void __a6xx_gmu_set_freq(struct a6xx_gmu *gmu, int 
index)
if (ret)
dev_err(gmu->dev, "GMU set GPU frequency error: %d\n", ret);

-   gmu->freq = gmu->gpu_freqs[index];
+   gmu->freq = gmu->gpu_freqs[perf_index];

/*
 * Eventually we will want to scale the path vote with the frequency but
@@ -135,25 +148,6 @@ static void __a6xx_gmu_set_freq(struct a6xx_gmu *gmu, int 
index)
icc_set_bw(gpu->icc_path, 0, MBps_to_icc(7216));
 }

-void a6xx_gmu_set_freq(struct msm_gpu *gpu, unsigned long freq)
-{
-   struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
-   struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
-   struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
-   u32 perf_index = 0;
-
-   if (freq == gmu->freq)
-   return;
-
-   for (perf_index = 0; perf_index < gmu->nr_gpu_freqs - 1; perf_index++)
-   if (freq == gmu->gpu_freqs[perf_index])
-   break;
-
-   gmu->current_perf_index = perf_index;
-
-   __a6xx_gmu_set_freq(gmu, perf_index);
-}
-
 unsigned long a6xx_gmu_get_freq(struct msm_gpu *gpu)
 {
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
@@ -708,6 +702,19 @@ static void a6xx_gmu_force_off(struct a6xx_gmu *gmu)
a6xx_gmu_rpmh_off(gmu);
 }

+static void a6xx_gmu_set_initial_freq(struct msm_gpu *gpu, struct a6xx_gmu 
*gmu)
+{
+   struct dev_pm_opp *gpu_opp;
+   unsigned long gpu_freq = gmu->gpu_freqs[gmu->current_perf_index];
+
+   gpu_opp = dev_pm_opp_find_freq_exact(&gpu->pdev->dev, gpu_freq, true);
+   if (IS_ERR_OR_NULL(gpu_opp))
+   return;
+
+   a6xx_gmu_set_freq(gpu, gpu_opp);
+   dev_pm_opp_put(gpu_opp);
+}
+
 int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu)
 {
struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
@@ -759,8 +766,7 @@ int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu)
gmu_write(gmu, REG_A6XX_GMU_GMU2HOST_INTR_MASK, ~A6XX_HFI_IRQ_MASK);
enable_irq(gmu->hfi_irq);

-   /* Set the GPU to the current freq */
-   __a6xx_gmu_set_freq(gmu, gmu->current_perf_index);
+   a6xx_gmu_set_initial_freq(gpu, gmu);

/*
 * "enable" the GX power domain which won't actually do anything but it
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h 
b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
index 7239b8b..03ba60d 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
@@ -63,7 +63,7 @@ void a6xx_gmu_clear_oob(struct a6xx_gmu *gmu, enum 
a6xx_gmu_oob_state state);
 int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node);
 void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu)

[PATCH v3 6/6] arm64: dts: qcom: sc7180: Add opp-peak-kBps to GPU opp

2020-06-05 Thread Sharat Masetty
Add opp-peak-kBps bindings to the GPU opp table, listing the peak
GPU -> DDR bandwidth requirement for each opp level. This will be
used to scale the DDR bandwidth along with the GPU frequency dynamically.

Signed-off-by: Sharat Masetty 
Reviewed-by: Matthias Kaehlcke 
---
 arch/arm64/boot/dts/qcom/sc7180.dtsi | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi 
b/arch/arm64/boot/dts/qcom/sc7180.dtsi
index 34004ad..7bef42b 100644
--- a/arch/arm64/boot/dts/qcom/sc7180.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi
@@ -1505,36 +1505,43 @@
opp-8 {
opp-hz = /bits/ 64 <8>;
opp-level = 
;
+   opp-peak-kBps = <8532000>;
};

opp-65000 {
opp-hz = /bits/ 64 <65000>;
opp-level = 
;
+   opp-peak-kBps = <7216000>;
};

opp-56500 {
opp-hz = /bits/ 64 <56500>;
opp-level = ;
+   opp-peak-kBps = <5412000>;
};

opp-43000 {
opp-hz = /bits/ 64 <43000>;
opp-level = 
;
+   opp-peak-kBps = <5412000>;
};

opp-35500 {
opp-hz = /bits/ 64 <35500>;
opp-level = ;
+   opp-peak-kBps = <3072000>;
};

opp-26700 {
opp-hz = /bits/ 64 <26700>;
opp-level = 
;
+   opp-peak-kBps = <3072000>;
};

opp-18000 {
opp-hz = /bits/ 64 <18000>;
opp-level = 
;
+   opp-peak-kBps = <1804000>;
};
};
};
--
2.7.4



[PATCH v3 3/6] drm: msm: a6xx: use dev_pm_opp_set_bw to scale DDR

2020-06-05 Thread Sharat Masetty
This patches replaces the previously used static DDR vote and uses
dev_pm_opp_set_bw() to scale GPU->DDR bandwidth along with scaling
GPU frequency. Also since the icc path voting is handled completely
in the opp driver, remove the icc_path handle and its usage in the
drm driver.

Signed-off-by: Sharat Masetty 
---
 drivers/gpu/drm/msm/adreno/a6xx_gmu.c   | 23 ---
 drivers/gpu/drm/msm/adreno/adreno_gpu.c |  8 
 drivers/gpu/drm/msm/msm_gpu.h   |  2 --
 3 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index 2d8124b..1dd8fc5 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -141,11 +141,7 @@ void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct 
dev_pm_opp *opp)

gmu->freq = gmu->gpu_freqs[perf_index];

-   /*
-* Eventually we will want to scale the path vote with the frequency but
-* for now leave it at max so that the performance is nominal.
-*/
-   icc_set_bw(gpu->icc_path, 0, MBps_to_icc(7216));
+   dev_pm_opp_set_bw(&gpu->pdev->dev, opp);
 }

 unsigned long a6xx_gmu_get_freq(struct msm_gpu *gpu)
@@ -715,6 +711,19 @@ static void a6xx_gmu_set_initial_freq(struct msm_gpu *gpu, 
struct a6xx_gmu *gmu)
dev_pm_opp_put(gpu_opp);
 }

+static void a6xx_gmu_set_initial_bw(struct msm_gpu *gpu, struct a6xx_gmu *gmu)
+{
+   struct dev_pm_opp *gpu_opp;
+   unsigned long gpu_freq = gmu->gpu_freqs[gmu->current_perf_index];
+
+   gpu_opp = dev_pm_opp_find_freq_exact(&gpu->pdev->dev, gpu_freq, true);
+   if (IS_ERR_OR_NULL(gpu_opp))
+   return;
+
+   dev_pm_opp_set_bw(&gpu->pdev->dev, gpu_opp);
+   dev_pm_opp_put(gpu_opp);
+}
+
 int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu)
 {
struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
@@ -739,7 +748,7 @@ int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu)
}

/* Set the bus quota to a reasonable value for boot */
-   icc_set_bw(gpu->icc_path, 0, MBps_to_icc(3072));
+   a6xx_gmu_set_initial_bw(gpu, gmu);

/* Enable the GMU interrupt */
gmu_write(gmu, REG_A6XX_GMU_AO_HOST_INTERRUPT_CLR, ~0);
@@ -907,7 +916,7 @@ int a6xx_gmu_stop(struct a6xx_gpu *a6xx_gpu)
a6xx_gmu_shutdown(gmu);

/* Remove the bus vote */
-   icc_set_bw(gpu->icc_path, 0, 0);
+   dev_pm_opp_set_bw(&gpu->pdev->dev, NULL);

/*
 * Make sure the GX domain is off before turning off the GMU (CX)
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c 
b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index 2d13694..718c705 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -913,11 +913,6 @@ static int adreno_get_pwrlevels(struct device *dev,

DBG("fast_rate=%u, slow_rate=2700", gpu->fast_rate);

-   /* Check for an interconnect path for the bus */
-   gpu->icc_path = of_icc_get(dev, NULL);
-   if (IS_ERR(gpu->icc_path))
-   gpu->icc_path = NULL;
-
return 0;
 }

@@ -958,13 +953,10 @@ int adreno_gpu_init(struct drm_device *drm, struct 
platform_device *pdev,

 void adreno_gpu_cleanup(struct adreno_gpu *adreno_gpu)
 {
-   struct msm_gpu *gpu = &adreno_gpu->base;
unsigned int i;

for (i = 0; i < ARRAY_SIZE(adreno_gpu->info->fw); i++)
release_firmware(adreno_gpu->fw[i]);

-   icc_put(gpu->icc_path);
-
msm_gpu_cleanup(&adreno_gpu->base);
 }
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index cf0dc6d..c7d74a9 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -112,8 +112,6 @@ struct msm_gpu {
struct clk *ebi1_clk, *core_clk, *rbbmtimer_clk;
uint32_t fast_rate;

-   struct icc_path *icc_path;
-
/* Hang and Inactivity Detection:
 */
 #define DRM_MSM_INACTIVE_PERIOD   66 /* in ms (roughly four frames) */
--
2.7.4



[PATCH v3 5/6] arm64: dts: qcom: sc7180: Add interconnects property for GPU

2020-06-05 Thread Sharat Masetty
This patch adds the interconnects property to the GPU node. This enables
the GPU->DDR path bandwidth voting.

Signed-off-by: Sharat Masetty 
---
 arch/arm64/boot/dts/qcom/sc7180.dtsi | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi 
b/arch/arm64/boot/dts/qcom/sc7180.dtsi
index eaede5e..34004ad 100644
--- a/arch/arm64/boot/dts/qcom/sc7180.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi
@@ -1497,6 +1497,8 @@
operating-points-v2 = <&gpu_opp_table>;
qcom,gmu = <&gmu>;

+   interconnects = <&gem_noc MASTER_GFX3D &mc_virt 
SLAVE_EBI1>;
+
gpu_opp_table: opp-table {
compatible = "operating-points-v2";

--
2.7.4



[PATCH v3 0/6] Add support for GPU DDR BW scaling

2020-06-05 Thread Sharat Masetty
This is a respin of [1]. Incorported review feedback and fixed issues observed
during testing. Picked up the Georgi's series from opp/linux-next [2], and this
series is also dependent on a helper function needed to set and clear ddr
bandwidth vote [3]. Patch number 4 in the series adds support for SDM845 as well
but its not tested yet(WIP), but the SC7180 patches are well tested now.

[1] https://patchwork.freedesktop.org/series/75291/
[2] 
https://kernel.googlesource.com/pub/scm/linux/kernel/git/vireshk/pm/+log/opp/linux-next/
[3] https://patchwork.kernel.org/patch/11590563/

Sharat Masetty (6):
  dt-bindings: drm/msm/gpu: Document gpu opp table
  drm: msm: a6xx: send opp instead of a frequency
  drm: msm: a6xx: use dev_pm_opp_set_bw to scale DDR
  arm64: dts: qcom: SDM845: Enable GPU DDR bw scaling
  arm64: dts: qcom: sc7180: Add interconnects property for GPU
  arm64: dts: qcom: sc7180: Add opp-peak-kBps to GPU opp

 .../devicetree/bindings/display/msm/gpu.txt| 28 +++
 arch/arm64/boot/dts/qcom/sc7180.dtsi   |  9 +++
 arch/arm64/boot/dts/qcom/sdm845.dtsi   |  9 +++
 drivers/gpu/drm/msm/adreno/a6xx_gmu.c  | 85 +-
 drivers/gpu/drm/msm/adreno/a6xx_gpu.h  |  2 +-
 drivers/gpu/drm/msm/adreno/adreno_gpu.c|  8 --
 drivers/gpu/drm/msm/msm_gpu.c  |  3 +-
 drivers/gpu/drm/msm/msm_gpu.h  |  5 +-
 8 files changed, 100 insertions(+), 49 deletions(-)

--
2.7.4



Re: [Freedreno] [PATCH 5/6] drm: msm: a6xx: use dev_pm_opp_set_bw to set DDR bandwidth

2020-05-28 Thread Sharat Masetty



On 5/27/2020 9:08 PM, Rob Clark wrote:

On Wed, May 27, 2020 at 1:47 AM Sharat Masetty  wrote:

+ more folks

On 5/18/2020 9:55 PM, Rob Clark wrote:

On Mon, May 18, 2020 at 7:23 AM Jordan Crouse  wrote:

On Thu, May 14, 2020 at 04:24:18PM +0530, Sharat Masetty wrote:

This patches replaces the previously used static DDR vote and uses
dev_pm_opp_set_bw() to scale GPU->DDR bandwidth along with scaling
GPU frequency.

Signed-off-by: Sharat Masetty 
---
   drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 6 +-
   1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index 2d8124b..79433d3 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -141,11 +141,7 @@ void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct 
dev_pm_opp *opp)

gmu->freq = gmu->gpu_freqs[perf_index];

- /*
-  * Eventually we will want to scale the path vote with the frequency but
-  * for now leave it at max so that the performance is nominal.
-  */
- icc_set_bw(gpu->icc_path, 0, MBps_to_icc(7216));
+ dev_pm_opp_set_bw(&gpu->pdev->dev, opp);
   }

This adds an implicit requirement that all targets need bandwidth settings
defined in the OPP or they won't get a bus vote at all. I would prefer that
there be an default escape valve but if not you'll need to add
bandwidth values for the sdm845 OPP that target doesn't regress.


it looks like we could maybe do something like:

ret = dev_pm_opp_set_bw(...);
if (ret) {
dev_warn_once(dev, "no bandwidth settings");
icc_set_bw(...);
}

?

BR,
-R

There is a bit of an issue here - Looks like its not possible to two icc
handles to the same path.  Its causing double enumeration of the paths
in the icc core and messing up path votes. With [1] Since opp/core
already gets a handle to the icc path as part of table add,  drm/msm
could do either

a) Conditionally enumerate gpu->icc_path handle only when pm/opp core
has not got the icc path handle. I could use something like [2] to
determine if should initialize gpu->icc_path*

b) Add peak-opp-configs in 845 dt and mandate all future versions to use
this bindings. With this, I can remove gpu->icc_path from msm/drm
completely and only rely on opp/core for bw voting.

The main thing is that we want to make sure newer dtb always works on
an older kernel without regression.. but, hmm..  I guess the
interconnects/interconnects-names properties haven't landed yet in
sdm845.dtsi?  Maybe that lets us go with the simpler approach (b).
Looks like we haven't wired up interconnect for 8916 or 8996 either,
so probably we can just mandate this for all of them?


I checked all three 845, 820 and 8916 and none of them have the 
interconnect configs for GPU. So, I think we are good here. I'll go with 
option (b) and re-spin v3. Adding interconnects and opp-peak-kBps 
configs for previous chips can be taken up as a separate activity.


Sharat


If we have landed the interconnect dts hookup for gpu somewhere that
I'm overlooking, I guess we would have to go with (a) and keep the
existing interconnects/interconnects-names properties.

BR,
-R


[1] - https://lore.kernel.org/patchwork/cover/1240687/

[2] - https://patchwork.kernel.org/patch/11527573/

Let me know your thoughts

Sharat


Jordan


   unsigned long a6xx_gmu_get_freq(struct msm_gpu *gpu)
--
2.7.4


--
The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project
___
Freedreno mailing list
freedr...@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno


Re: [Freedreno] [PATCH 5/6] drm: msm: a6xx: use dev_pm_opp_set_bw to set DDR bandwidth

2020-05-27 Thread Sharat Masetty

+ more folks

On 5/18/2020 9:55 PM, Rob Clark wrote:

On Mon, May 18, 2020 at 7:23 AM Jordan Crouse  wrote:

On Thu, May 14, 2020 at 04:24:18PM +0530, Sharat Masetty wrote:

This patches replaces the previously used static DDR vote and uses
dev_pm_opp_set_bw() to scale GPU->DDR bandwidth along with scaling
GPU frequency.

Signed-off-by: Sharat Masetty 
---
  drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 6 +-
  1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index 2d8124b..79433d3 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -141,11 +141,7 @@ void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct 
dev_pm_opp *opp)

   gmu->freq = gmu->gpu_freqs[perf_index];

- /*
-  * Eventually we will want to scale the path vote with the frequency but
-  * for now leave it at max so that the performance is nominal.
-  */
- icc_set_bw(gpu->icc_path, 0, MBps_to_icc(7216));
+ dev_pm_opp_set_bw(&gpu->pdev->dev, opp);
  }

This adds an implicit requirement that all targets need bandwidth settings
defined in the OPP or they won't get a bus vote at all. I would prefer that
there be an default escape valve but if not you'll need to add
bandwidth values for the sdm845 OPP that target doesn't regress.


it looks like we could maybe do something like:

   ret = dev_pm_opp_set_bw(...);
   if (ret) {
   dev_warn_once(dev, "no bandwidth settings");
   icc_set_bw(...);
   }

?

BR,
-R


There is a bit of an issue here - Looks like its not possible to two icc 
handles to the same path.  Its causing double enumeration of the paths 
in the icc core and messing up path votes. With [1] Since opp/core 
already gets a handle to the icc path as part of table add,  drm/msm 
could do either


a) Conditionally enumerate gpu->icc_path handle only when pm/opp core 
has not got the icc path handle. I could use something like [2] to 
determine if should initialize gpu->icc_path*


b) Add peak-opp-configs in 845 dt and mandate all future versions to use 
this bindings. With this, I can remove gpu->icc_path from msm/drm 
completely and only rely on opp/core for bw voting.


[1] - https://lore.kernel.org/patchwork/cover/1240687/

[2] - https://patchwork.kernel.org/patch/11527573/

Let me know your thoughts

Sharat




Jordan


  unsigned long a6xx_gmu_get_freq(struct msm_gpu *gpu)
--
2.7.4


--
The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project
___
Freedreno mailing list
freedr...@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno


[PATCH 5/6] drm: msm: a6xx: use dev_pm_opp_set_bw to set DDR bandwidth

2020-05-14 Thread Sharat Masetty
This patches replaces the previously used static DDR vote and uses
dev_pm_opp_set_bw() to scale GPU->DDR bandwidth along with scaling
GPU frequency.

Signed-off-by: Sharat Masetty 
---
 drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 6 +-
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index 2d8124b..79433d3 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -141,11 +141,7 @@ void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct 
dev_pm_opp *opp)

gmu->freq = gmu->gpu_freqs[perf_index];

-   /*
-* Eventually we will want to scale the path vote with the frequency but
-* for now leave it at max so that the performance is nominal.
-*/
-   icc_set_bw(gpu->icc_path, 0, MBps_to_icc(7216));
+   dev_pm_opp_set_bw(&gpu->pdev->dev, opp);
 }

 unsigned long a6xx_gmu_get_freq(struct msm_gpu *gpu)
--
2.7.4


[PATCH 4/6] drm: msm: a6xx: send opp instead of a frequency

2020-05-14 Thread Sharat Masetty
This patch changes the plumbing to send the devfreq recommended opp rather
than the frequency. Also consolidate and rearrange the code in a6xx to set
the GPU frequency and the icc vote in preparation for the upcoming
changes for GPU->DDR scaling votes.

Signed-off-by: Sharat Masetty 
---
 drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 62 +++
 drivers/gpu/drm/msm/adreno/a6xx_gpu.h |  2 +-
 drivers/gpu/drm/msm/msm_gpu.c |  3 +-
 drivers/gpu/drm/msm/msm_gpu.h |  3 +-
 4 files changed, 38 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index 748cd37..2d8124b 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -100,17 +100,30 @@ bool a6xx_gmu_gx_is_on(struct a6xx_gmu *gmu)
A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_GX_HM_CLK_OFF));
 }

-static void __a6xx_gmu_set_freq(struct a6xx_gmu *gmu, int index)
+void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp)
 {
-   struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
-   struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
-   struct msm_gpu *gpu = &adreno_gpu->base;
-   int ret;
+   struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+   struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
+   struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
+   u32 perf_index;
+   unsigned long gpu_freq;
+   int ret = 0;
+
+   gpu_freq = dev_pm_opp_get_freq(opp);
+
+   if (gpu_freq == gmu->freq)
+   return;
+
+   for (perf_index = 0; perf_index < gmu->nr_gpu_freqs - 1; perf_index++)
+   if (gpu_freq == gmu->gpu_freqs[perf_index])
+   break;
+
+   gmu->current_perf_index = perf_index;

gmu_write(gmu, REG_A6XX_GMU_DCVS_ACK_OPTION, 0);

gmu_write(gmu, REG_A6XX_GMU_DCVS_PERF_SETTING,
-   ((3 & 0xf) << 28) | index);
+   ((3 & 0xf) << 28) | perf_index);

/*
 * Send an invalid index as a vote for the bus bandwidth and let the
@@ -126,7 +139,7 @@ static void __a6xx_gmu_set_freq(struct a6xx_gmu *gmu, int 
index)
if (ret)
dev_err(gmu->dev, "GMU set GPU frequency error: %d\n", ret);

-   gmu->freq = gmu->gpu_freqs[index];
+   gmu->freq = gmu->gpu_freqs[perf_index];

/*
 * Eventually we will want to scale the path vote with the frequency but
@@ -135,25 +148,6 @@ static void __a6xx_gmu_set_freq(struct a6xx_gmu *gmu, int 
index)
icc_set_bw(gpu->icc_path, 0, MBps_to_icc(7216));
 }

-void a6xx_gmu_set_freq(struct msm_gpu *gpu, unsigned long freq)
-{
-   struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
-   struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
-   struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
-   u32 perf_index = 0;
-
-   if (freq == gmu->freq)
-   return;
-
-   for (perf_index = 0; perf_index < gmu->nr_gpu_freqs - 1; perf_index++)
-   if (freq == gmu->gpu_freqs[perf_index])
-   break;
-
-   gmu->current_perf_index = perf_index;
-
-   __a6xx_gmu_set_freq(gmu, perf_index);
-}
-
 unsigned long a6xx_gmu_get_freq(struct msm_gpu *gpu)
 {
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
@@ -708,6 +702,19 @@ static void a6xx_gmu_force_off(struct a6xx_gmu *gmu)
a6xx_gmu_rpmh_off(gmu);
 }

+static void a6xx_gmu_set_initial_freq(struct msm_gpu *gpu, struct a6xx_gmu 
*gmu)
+{
+   struct dev_pm_opp *gpu_opp;
+   unsigned long gpu_freq = gmu->gpu_freqs[gmu->current_perf_index];
+
+   gpu_opp = dev_pm_opp_find_freq_exact(&gpu->pdev->dev, gpu_freq, true);
+   if (IS_ERR_OR_NULL(gpu_opp))
+   return;
+
+   a6xx_gmu_set_freq(gpu, gpu_opp);
+   dev_pm_opp_put(gpu_opp);
+}
+
 int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu)
 {
struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
@@ -759,8 +766,7 @@ int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu)
gmu_write(gmu, REG_A6XX_GMU_GMU2HOST_INTR_MASK, ~A6XX_HFI_IRQ_MASK);
enable_irq(gmu->hfi_irq);

-   /* Set the GPU to the current freq */
-   __a6xx_gmu_set_freq(gmu, gmu->current_perf_index);
+   a6xx_gmu_set_initial_freq(gpu, gmu);

/*
 * "enable" the GX power domain which won't actually do anything but it
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h 
b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
index 7239b8b..03ba60d 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
@@ -63,7 +63,7 @@ void a6xx_gmu_clear_oob(struct a6xx_gmu *gmu, enum 
a6xx_gmu_oob_state state);
 int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node);
 void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu)

[PATCH 2/6] arm64: dts: qcom: sc7180: Add opp-peak-kBps to GPU opp

2020-05-14 Thread Sharat Masetty
Add opp-peak-kBps bindings to the GPU opp table, listing the peak
GPU -> DDR bandwidth requirement for each opp level. This will be
used to scale the DDR bandwidth along with the GPU frequency dynamically.

Signed-off-by: Sharat Masetty 
---
 arch/arm64/boot/dts/qcom/sc7180.dtsi | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi 
b/arch/arm64/boot/dts/qcom/sc7180.dtsi
index 0ce9921..89f7767 100644
--- a/arch/arm64/boot/dts/qcom/sc7180.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi
@@ -1392,36 +1392,43 @@
opp-8 {
opp-hz = /bits/ 64 <8>;
opp-level = 
;
+   opp-peak-kBps = <8532000>;
};

opp-65000 {
opp-hz = /bits/ 64 <65000>;
opp-level = 
;
+   opp-peak-kBps = <7216000>;
};

opp-56500 {
opp-hz = /bits/ 64 <56500>;
opp-level = ;
+   opp-peak-kBps = <5412000>;
};

opp-43000 {
opp-hz = /bits/ 64 <43000>;
opp-level = 
;
+   opp-peak-kBps = <5412000>;
};

opp-35500 {
opp-hz = /bits/ 64 <35500>;
opp-level = ;
+   opp-peak-kBps = <3072000>;
};

opp-26700 {
opp-hz = /bits/ 64 <26700>;
opp-level = 
;
+   opp-peak-kBps = <3072000>;
};

opp-18000 {
opp-hz = /bits/ 64 <18000>;
opp-level = 
;
+   opp-peak-kBps = <1804000>;
};
};
};
--
2.7.4


[PATCH 0/6] Add support for GPU DDR BW scaling

2020-05-14 Thread Sharat Masetty
This is a rework of my previous series [1], but this time based on the bindings
from Georgi [2] + a few fixes which look to be fixed in v8 of Georgi's series
[3]. The work is based on the chromeOS tip.

[1]: https://patchwork.freedesktop.org/series/75291/
[2]: https://lore.kernel.org/patchwork/cover/1230626/
[3]: https://lore.kernel.org/patchwork/cover/1240687/

Sharat Masetty (5):
  arm64: dts: qcom: sc7180: Add interconnect bindings for GPU
  arm64: dts: qcom: sc7180: Add opp-peak-kBps to GPU opp
  drm: msm: a6xx: send opp instead of a frequency
  drm: msm: a6xx: use dev_pm_opp_set_bw to set DDR bandwidth
  dt-bindings: drm/msm/gpu: Document gpu opp table

Sibi Sankar (1):
  OPP: Add and export helper to set bandwidth

 .../devicetree/bindings/display/msm/gpu.txt| 28 +
 arch/arm64/boot/dts/qcom/sc7180.dtsi   |  9 +++
 drivers/gpu/drm/msm/adreno/a6xx_gmu.c  | 68 +++---
 drivers/gpu/drm/msm/adreno/a6xx_gpu.h  |  2 +-
 drivers/gpu/drm/msm/msm_gpu.c  |  3 +-
 drivers/gpu/drm/msm/msm_gpu.h  |  3 +-
 drivers/opp/core.c | 43 ++
 include/linux/pm_opp.h |  6 ++
 8 files changed, 125 insertions(+), 37 deletions(-)

--
2.7.4


[PATCH 6/6] dt-bindings: drm/msm/gpu: Document gpu opp table

2020-05-14 Thread Sharat Masetty
Update documentation to list the gpu opp table bindings including the
newly added "opp-peak-kBps" needed for GPU-DDR bandwidth scaling.

Signed-off-by: Sharat Masetty 
---
 .../devicetree/bindings/display/msm/gpu.txt| 28 ++
 1 file changed, 28 insertions(+)

diff --git a/Documentation/devicetree/bindings/display/msm/gpu.txt 
b/Documentation/devicetree/bindings/display/msm/gpu.txt
index 70025cb..48bd4ab 100644
--- a/Documentation/devicetree/bindings/display/msm/gpu.txt
+++ b/Documentation/devicetree/bindings/display/msm/gpu.txt
@@ -79,6 +79,34 @@ Example a6xx (with GMU):

interconnects = <&rsc_hlos MASTER_GFX3D &rsc_hlos SLAVE_EBI1>;

+   gpu_opp_table: opp-table {
+   compatible = "operating-points-v2";
+
+   opp-43000 {
+   opp-hz = /bits/ 64 <43000>;
+   opp-level = ;
+   opp-peak-kBps = <5412000>;
+   };
+
+   opp-35500 {
+   opp-hz = /bits/ 64 <35500>;
+   opp-level = ;
+   opp-peak-kBps = <3072000>;
+   };
+
+   opp-26700 {
+   opp-hz = /bits/ 64 <26700>;
+   opp-level = ;
+   opp-peak-kBps = <3072000>;
+   };
+
+   opp-18000 {
+   opp-hz = /bits/ 64 <18000>;
+   opp-level = ;
+   opp-peak-kBps = <1804000>;
+   };
+   };
+
qcom,gmu = <&gmu>;

zap-shader {
--
2.7.4


[PATCH 1/6] arm64: dts: qcom: sc7180: Add interconnect bindings for GPU

2020-05-14 Thread Sharat Masetty
This patch adds the interconnect bindings to the GPU node. This enables
the GPU->DDR path bandwidth voting.

Signed-off-by: Sharat Masetty 
---
 arch/arm64/boot/dts/qcom/sc7180.dtsi | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi 
b/arch/arm64/boot/dts/qcom/sc7180.dtsi
index b46ee78..0ce9921 100644
--- a/arch/arm64/boot/dts/qcom/sc7180.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi
@@ -1384,6 +1384,8 @@
operating-points-v2 = <&gpu_opp_table>;
qcom,gmu = <&gmu>;

+   interconnects = <&gem_noc MASTER_GFX3D &mc_virt 
SLAVE_EBI1>;
+
gpu_opp_table: opp-table {
compatible = "operating-points-v2";

--
2.7.4


[PATCH 3/6] OPP: Add and export helper to set bandwidth

2020-05-14 Thread Sharat Masetty
From: Sibi Sankar 

Add and export 'dev_pm_opp_set_bw' to set the bandwidth
levels associated with an OPP for a given frequency.

Signed-off-by: Sibi Sankar 
Signed-off-by: Sharat Masetty 
---
 drivers/opp/core.c | 43 +++
 include/linux/pm_opp.h |  6 ++
 2 files changed, 49 insertions(+)

diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index f42b7c4..0f34077 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -828,6 +828,49 @@ static int _set_required_opps(struct device *dev,
 }

 /**
+ * dev_pm_opp_set_bw() - sets bandwidth levels corresponding to an available 
opp
+ * @dev:   device for which we do this operation
+ * @opp:   opp based on which the bandwidth levels are to be configured
+ *
+ * This configures the bandwidth to the levels specified
+ * by the OPP.
+ *
+ * Return: 0 on success or a negative error value.
+ */
+int dev_pm_opp_set_bw(struct device *dev, struct dev_pm_opp *opp)
+{
+   struct opp_table *opp_table;
+   int ret = -EINVAL;
+   int i;
+
+   if (IS_ERR_OR_NULL(opp) || !opp->available) {
+   dev_err(dev, "%s: Invalid parameters\n", __func__);
+   return -EINVAL;
+   }
+
+   opp_table = _find_opp_table(dev);
+   if (IS_ERR(opp_table)) {
+   dev_err(dev, "%s: device opp table doesn't exist\n", __func__);
+   return PTR_ERR(opp_table);
+   }
+
+   if (opp_table->paths) {
+   for (i = 0; i < opp_table->path_count; i++) {
+   ret = icc_set_bw(opp_table->paths[i],
+opp->bandwidth[i].avg,
+opp->bandwidth[i].peak);
+   if (ret)
+   dev_err(dev, "Failed to set bandwidth[%d]: 
%d\n",
+   i, ret);
+   }
+   }
+
+   dev_pm_opp_put_opp_table(opp_table);
+   return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_set_bw);
+
+/**
  * dev_pm_opp_set_rate() - Configure new OPP based on frequency
  * @dev:device for which we do this operation
  * @target_freq: frequency to achieve
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index 76f8c6b..04f7fda 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -156,6 +156,7 @@ struct dev_pm_opp *dev_pm_opp_xlate_opp(struct opp_table 
*src_table,
struct opp_table *dst_table,
struct dev_pm_opp *src_opp);
 int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq);
+int dev_pm_opp_set_bw(struct device *dev, struct dev_pm_opp *opp);
 int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const struct cpumask 
*cpumask);
 int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask 
*cpumask);
 void dev_pm_opp_remove_table(struct device *dev);
@@ -354,6 +355,11 @@ static inline int dev_pm_opp_set_rate(struct device *dev, 
unsigned long target_f
return -ENOTSUPP;
 }

+static inline int dev_pm_opp_set_bw(struct device *dev, struct dev_pm_opp *opp)
+{
+   return -ENOTSUPP;
+}
+
 static inline int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const 
struct cpumask *cpumask)
 {
return -ENOTSUPP;
--
2.7.4


[PATCH 2/2] dt-bindings: arm-smmu: Add sc7180 compatible string

2020-05-01 Thread Sharat Masetty
This patch simply adds a new compatible string for SC7180 platform.

Signed-off-by: Sharat Masetty 
---
 Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml 
b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
index 6515dbe..986098b 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
@@ -28,6 +28,7 @@ properties:
   - enum:
   - qcom,msm8996-smmu-v2
   - qcom,msm8998-smmu-v2
+  - qcom,sc7180-smmu-v2
   - qcom,sdm845-smmu-v2
   - const: qcom,smmu-v2
 
-- 
1.9.1


[PATCH 1/2] arm64: dts: qcom: sc7180: Add A618 gpu dt blob

2020-05-01 Thread Sharat Masetty
This patch adds the required dt nodes and properties
to enabled A618 GPU.

Signed-off-by: Sharat Masetty 
---
* Remove GCC_DDRSS_GPU_AXI_CLK clock reference from gpu smmu node.

 arch/arm64/boot/dts/qcom/sc7180.dtsi | 102 +++
 1 file changed, 102 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi 
b/arch/arm64/boot/dts/qcom/sc7180.dtsi
index 4216b57..de9a054 100644
--- a/arch/arm64/boot/dts/qcom/sc7180.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi
@@ -1373,6 +1373,108 @@
};
};

+   gpu: gpu@500 {
+   compatible = "qcom,adreno-618.0", "qcom,adreno";
+   #stream-id-cells = <16>;
+   reg = <0 0x0500 0 0x4>, <0 0x0509e000 0 0x1000>,
+   <0 0x05061000 0 0x800>;
+   reg-names = "kgsl_3d0_reg_memory", "cx_mem", "cx_dbgc";
+   interrupts = ;
+   iommus = <&adreno_smmu 0>;
+   operating-points-v2 = <&gpu_opp_table>;
+   qcom,gmu = <&gmu>;
+
+   gpu_opp_table: opp-table {
+   compatible = "operating-points-v2";
+
+   opp-8 {
+   opp-hz = /bits/ 64 <8>;
+   opp-level = 
;
+   };
+
+   opp-65000 {
+   opp-hz = /bits/ 64 <65000>;
+   opp-level = 
;
+   };
+
+   opp-56500 {
+   opp-hz = /bits/ 64 <56500>;
+   opp-level = ;
+   };
+
+   opp-43000 {
+   opp-hz = /bits/ 64 <43000>;
+   opp-level = 
;
+   };
+
+   opp-35500 {
+   opp-hz = /bits/ 64 <35500>;
+   opp-level = ;
+   };
+
+   opp-26700 {
+   opp-hz = /bits/ 64 <26700>;
+   opp-level = 
;
+   };
+
+   opp-18000 {
+   opp-hz = /bits/ 64 <18000>;
+   opp-level = 
;
+   };
+   };
+   };
+
+   adreno_smmu: iommu@504 {
+   compatible = "qcom,sc7180-smmu-v2", "qcom,smmu-v2";
+   reg = <0 0x0504 0 0x1>;
+   #iommu-cells = <1>;
+   #global-interrupts = <2>;
+   interrupts = ,
+   ,
+   ,
+   ,
+   ,
+   ,
+   ,
+   ,
+   ,
+   ;
+
+   clocks = <&gcc GCC_GPU_MEMNOC_GFX_CLK>,
+   <&gcc GCC_GPU_CFG_AHB_CLK>;
+   clock-names = "bus", "iface";
+
+   power-domains = <&gpucc CX_GDSC>;
+   };
+
+   gmu: gmu@506a000 {
+   compatible="qcom,adreno-gmu-618.0", "qcom,adreno-gmu";
+   reg = <0 0x0506a000 0 0x31000>, <0 0x0b29 0 
0x1>,
+   <0 0x0b49 0 0x1>;
+   reg-names = "gmu", "gmu_pdc", "gmu_pdc_seq";
+   interrupts = ,
+  ;
+   interrupt-names = "hfi", "gmu";
+   clocks = <&gpucc GPU_CC_CX_GMU_CLK>,
+  <&gpucc GPU_CC_CXO_CLK>,
+  <&gcc GCC_DDRSS_GPU_AXI_CLK>,
+  <&gcc GCC_GPU_MEMNOC_GFX_CLK>;
+   clock-names = "gmu", "cxo", "axi", "memnoc";
+   power-domains = <&gpucc CX_GDSC>, <&

Re: [Freedreno] [PATCH v2] dt-bindings: arm-smmu: Add sc7180 compatible string and mem_iface clock

2020-05-01 Thread Sharat Masetty



On 4/30/2020 11:51 PM, Doug Anderson wrote:

Hi,

On Thu, Apr 30, 2020 at 11:12 AM Jordan Crouse  wrote:

On Thu, Apr 30, 2020 at 09:29:47AM +0530, Sharat Masetty wrote:

This patch adds a new compatible string for sc7180 and also an
additional clock listing needed to power the TBUs and the TCU.

Signed-off-by: Sharat Masetty 
---
v2: Addressed review comments from Doug

  Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 8 
  1 file changed, 8 insertions(+)

diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml 
b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
index 6515dbe..ba5dba4 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
@@ -28,6 +28,7 @@ properties:
- enum:
- qcom,msm8996-smmu-v2
- qcom,msm8998-smmu-v2
+  - qcom,sc7180-smmu-v2
- qcom,sdm845-smmu-v2
- const: qcom,smmu-v2

@@ -113,16 +114,23 @@ properties:
present in such cases.

clock-names:
+minItems: 2
+maxItems: 3
  items:
- const: bus
- const: iface
+  - const: mem_iface

Hi Sharat -

I think there was a bit of confusion due to renaming between downstream and
upstream.  Currently for the sdm845 and friends we have:

   clocks = <&gcc GCC_GPU_MEMNOC_GFX_CLK>,
  <&gcc GCC_GPU_CFG_AHB_CLK>;
   clock-names = "bus", "iface";

Confusingly these same clocks downstream are "mem_iface_clk" and "iface_clk"
respectively.

It looks like you are trying to add GCC_DDRSS_GPU_AXI_CLK as "mem_iface" which
was formerly "mem_clk" downstream. I'm not sure if the naming change is
intentional or you were trying to make upstream and downstream match and didn't
realize that they were renamed.

I'm not sure if we need DDRSS_GPU_AXI_CLK or not. Empirically it works without
it for sdm845 (I don't have a sc7180 to test) but we should probably loop back
with either the clock team or the hardware designers to be sure there isn't a
corner case that is missing. I agree with Doug that its always best if we don't
need to add a clock.


Thanks Jordan and Doug for the updates. My intention was to add the 
third clock as listed downstream, but as you said the naming is a bit 
misleading. From the clock GCC_DDRSS_GPU_AXI_CLK description, this is 
needed for the GPU to DDR access and all transactions to the DDR from 
the GPU go through the SMMU. It is listed in the SMMU dt node because 
its needed by SMMU to perform pagetable walks.


I think we may be fine by not listing this clock in the SMMU node 
because the same clock is listed in both the GMU and also the GPU.



I can confirm that on sc7180 the GPU seems to come up just fine
without the clock being specified in the iommu node.  Definitely would
be good to know what's broken and if nothing is broken maybe we can
change this patch to just add the sc7180 compatible string and drop
the clock.  I do note that the GMU already has a reference to the same
"GCC_DDRSS_GPU_AXI_CLK" clock.

-Doug
___
Freedreno mailing list
freedr...@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno


[PATCH v2] dt-bindings: arm-smmu: Add sc7180 compatible string and mem_iface clock

2020-04-29 Thread Sharat Masetty
This patch adds a new compatible string for sc7180 and also an
additional clock listing needed to power the TBUs and the TCU.

Signed-off-by: Sharat Masetty 
---
v2: Addressed review comments from Doug

 Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 8 
 1 file changed, 8 insertions(+)

diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml 
b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
index 6515dbe..ba5dba4 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
@@ -28,6 +28,7 @@ properties:
   - enum:
   - qcom,msm8996-smmu-v2
   - qcom,msm8998-smmu-v2
+  - qcom,sc7180-smmu-v2
   - qcom,sdm845-smmu-v2
   - const: qcom,smmu-v2

@@ -113,16 +114,23 @@ properties:
   present in such cases.

   clock-names:
+minItems: 2
+maxItems: 3
 items:
   - const: bus
   - const: iface
+  - const: mem_iface

   clocks:
+minItems: 2
+maxItems: 3
 items:
   - description: bus clock required for downstream bus access and for the
   smmu ptw
   - description: interface clock required to access smmu's registers
   through the TCU's programming interface.
+  - description: clock required for the inner working of SMMU TBUs and the
+  TCU like the pagetable walks and the TLB flushes.

   power-domains:
 maxItems: 1
--
1.9.1


Re: [PATCH] dt-bindings: arm-smmu: Add a new compatible string and a clock

2020-04-29 Thread Sharat Masetty



On 4/29/2020 3:57 AM, Doug Anderson wrote:

Hi,

On Tue, Apr 28, 2020 at 4:39 AM Sharat Masetty  wrote:

This patch adds a new compatible string for sc7180 and also an
additional clock listing needed to power the TBUs and the TCU.

Signed-off-by: Sharat Masetty 
---
  Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 7 +++
  1 file changed, 7 insertions(+)

nit: mention sc7180 in subject, like:

dt-bindings: arm-smmu: Add sc7180 compatible string and mem_iface clock



diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml 
b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
index 6515dbe..15946ac 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
@@ -28,6 +28,7 @@ properties:
- enum:
- qcom,msm8996-smmu-v2
- qcom,msm8998-smmu-v2
+  - qcom,sc7180-smmu-v2
- qcom,sdm845-smmu-v2
- const: qcom,smmu-v2

@@ -113,16 +114,22 @@ properties:
present in such cases.

clock-names:
+minItems: 2
+maxItems: 3
  items:
- const: bus
- const: iface
+  - const: mem_iface_clk

People usually frown on clock-names ending in "_clk".  Just name it "mem_iface".



clocks:
+minItems: 2
+maxItems: 3
  items:
- description: bus clock required for downstream bus access and for the
smmu ptw
- description: interface clock required to access smmu's registers
through the TCU's programming interface.
+  - description: clock required for the SMMU TBUs and the TCU

Is this clock only needed for sc7180, or would it be useful if we
enabled certain features on existing devices?  Please document exactly
when someone would provide this clock and when they'd leave it off.

...also: maybe it's obvious to those that understand IOMMUs in depth,
but to me I have no idea what your description means and why it's
different from the other two clocks.  Any way you could punch up your
description a little bit?

Looking at sdm845 I see that this clock seems to exist but wasn't
listed in the IOMMU device tree node.  Is that a mistake on sdm845?
...or is it just fine because the GPU holds the clock?  Is there a
reason the sdm845 solution and the sc7180 solution shouldn't be the
same (AKA we should either add this clock to the sdm845 device tree
file or remove it from sc7180)?


I went and checked the downstream SDM845 device tree for GPU SMMU and I 
do see this clock listed on there. I am no expert in SMMU either but my 
understanding is that this clock is needed for core working of the SMMU 
like the pagetable walks, TLB invalidations etc, whereas the other two 
clocks are required to access SMMU register space from the host.My 
proposal is to add this clock to SDM845 as well as a follow up effort so 
that we can remove the Min/MaxItems properties which I do not like.


@Jordan, do you remember why this clock was added to SDM845?


Thanks!

-Doug


[PATCH] dt-bindings: arm-smmu: Add a new compatible string and a clock

2020-04-28 Thread Sharat Masetty
This patch adds a new compatible string for sc7180 and also an
additional clock listing needed to power the TBUs and the TCU.

Signed-off-by: Sharat Masetty 
---
 Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml 
b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
index 6515dbe..15946ac 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
@@ -28,6 +28,7 @@ properties:
   - enum:
   - qcom,msm8996-smmu-v2
   - qcom,msm8998-smmu-v2
+  - qcom,sc7180-smmu-v2
   - qcom,sdm845-smmu-v2
   - const: qcom,smmu-v2
 
@@ -113,16 +114,22 @@ properties:
   present in such cases.
 
   clock-names:
+minItems: 2
+maxItems: 3
 items:
   - const: bus
   - const: iface
+  - const: mem_iface_clk
 
   clocks:
+minItems: 2
+maxItems: 3
 items:
   - description: bus clock required for downstream bus access and for the
   smmu ptw
   - description: interface clock required to access smmu's registers
   through the TCU's programming interface.
+  - description: clock required for the SMMU TBUs and the TCU
 
   power-domains:
 maxItems: 1
-- 
1.9.1


[PATCH 3/3] drm/msm: Optimize adreno_show_object()

2018-11-05 Thread Sharat Masetty
When the userspace tries to read the crashstate dump, the read side
implementation in the driver currently ascii85 encodes all the binary
buffers and it does this each time the read system call is called.
A userspace tool like cat typically does a page by page read and the
number of read calls depends on the size of the data captured by the
driver. This is certainly not desirable and does not scale well with
large captures.

This patch encodes the buffer only once in the read path. With this there
is an immediate >10X speed improvement in crashstate save time.

Signed-off-by: Sharat Masetty 
---
 drivers/gpu/drm/msm/adreno/adreno_gpu.c | 76 -
 drivers/gpu/drm/msm/msm_gpu.h   |  2 +
 2 files changed, 58 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c 
b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index c93702d..e29093e 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -475,34 +475,70 @@ int adreno_gpu_state_put(struct msm_gpu_state *state)
 
 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
 
-static void adreno_show_object(struct drm_printer *p, u32 *ptr, int len)
+static char *adreno_gpu_ascii85_encode(u32 *src, size_t len)
 {
-   char out[ASCII85_BUFSZ];
-   long l, datalen, i;
+   void *buf;
+   size_t buf_itr = 0;
+   long i, l;
 
-   if (!ptr || !len)
-   return;
+   if (!len)
+   return NULL;
+
+   l = ascii85_encode_len(len);
 
/*
-* Only dump the non-zero part of the buffer - rarely will any data
-* completely fill the entire allocated size of the buffer
+* ascii85 outputs either a 5 byte string or a 1 byte string. So we
+* account for the worst case of 5 bytes per dword plus the 1 for '\0'
 */
-   for (datalen = 0, i = 0; i < len >> 2; i++) {
-   if (ptr[i])
-   datalen = (i << 2) + 1;
-   }
+   buf = kvmalloc((l * 5) + 1, GFP_KERNEL);
+   if (!buf)
+   return NULL;
 
-   /* Skip printing the object if it is empty */
-   if (datalen == 0)
+   for (i = 0; i < l; i++)
+   buf_itr += ascii85_encode_to_buf(src[i], buf + buf_itr);
+
+   return buf;
+}
+
+/* len is expected to be in bytes */
+static void adreno_show_object(struct drm_printer *p, void **ptr, int len,
+   bool *encoded)
+{
+   if (!*ptr || !len)
return;
 
-   l = ascii85_encode_len(datalen);
+   if (!*encoded) {
+   long datalen, i;
+   u32 *buf = *ptr;
+
+   /*
+* Only dump the non-zero part of the buffer - rarely will
+* any data completely fill the entire allocated size of
+* the buffer.
+*/
+   for (datalen = 0, i = 0; i < len >> 2; i++) {
+   if (buf[i])
+   datalen = ((i + 1) << 2);
+   }
+
+   /*
+* If we reach here, then the originally captured binary buffer
+* will be replaced with the ascii85 encoded string
+*/
+   *ptr = adreno_gpu_ascii85_encode(buf, datalen);
+
+   kvfree(buf);
+
+   *encoded = true;
+   }
+
+   if (!*ptr)
+   return;
 
drm_puts(p, "data: !!ascii85 |\n");
drm_puts(p, " ");
 
-   for (i = 0; i < l; i++)
-   drm_puts(p, ascii85_encode(ptr[i], out));
+   drm_puts(p, *ptr);
 
drm_puts(p, "\n");
 }
@@ -534,8 +570,8 @@ void adreno_show(struct msm_gpu *gpu, struct msm_gpu_state 
*state,
drm_printf(p, "wptr: %d\n", state->ring[i].wptr);
drm_printf(p, "size: %d\n", MSM_GPU_RINGBUFFER_SZ);
 
-   adreno_show_object(p, state->ring[i].data,
-   state->ring[i].data_size);
+   adreno_show_object(p, &state->ring[i].data,
+   state->ring[i].data_size, &state->ring[i].encoded);
}
 
if (state->bos) {
@@ -546,8 +582,8 @@ void adreno_show(struct msm_gpu *gpu, struct msm_gpu_state 
*state,
state->bos[i].iova);
drm_printf(p, "size: %zd\n", state->bos[i].size);
 
-   adreno_show_object(p, state->bos[i].data,
-   state->bos[i].size);
+   adreno_show_object(p, &state->bos[i].data,
+   state->bos[i].size, &state->bos[i].encoded);
}
}
 
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index f82bac0..efb49bb 100644
--- a/drivers/gpu/drm/msm/msm_

[PATCH] lib/string: Pass the input gfp flags to kmalloc

2018-10-15 Thread Sharat Masetty
Pass the user sent gfp flags to kmalloc() calls. This helps calling the
functions in user desired contexts.

Signed-off-by: Sharat Masetty 
---
 lib/string_helpers.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/string_helpers.c b/lib/string_helpers.c
index 29c490e..60f9015 100644
--- a/lib/string_helpers.c
+++ b/lib/string_helpers.c
@@ -576,7 +576,7 @@ char *kstrdup_quotable_cmdline(struct task_struct *task, 
gfp_t gfp)
char *buffer, *quoted;
int i, res;
 
-   buffer = kmalloc(PAGE_SIZE, GFP_KERNEL);
+   buffer = kmalloc(PAGE_SIZE, gfp);
if (!buffer)
return NULL;
 
@@ -612,7 +612,7 @@ char *kstrdup_quotable_file(struct file *file, gfp_t gfp)
return kstrdup("", gfp);
 
/* We add 11 spaces for ' (deleted)' to be appended */
-   temp = kmalloc(PATH_MAX + 11, GFP_KERNEL);
+   temp = kmalloc(PATH_MAX + 11, gfp);
if (!temp)
return kstrdup("", gfp);
 
-- 
1.9.1



Re: [PATCH net-next,v4] hyperv: Add support for virtual Receive Side Scaling (vRSS)

2014-04-07 Thread Sharat Masetty
Hi Zhang,

How is this beneficial when compared to RPS(receive packet steering)?
Can you please provide more details on what this patch does?

Thanks
Sharat


On Mon, Apr 7, 2014 at 12:42 PM, David Miller  wrote:
>
> The net-next tree is not open yet, I will announce when it is and you can 
> submit
> net-next targetted patches.
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/