Re: [PATCH V4 13/45] block: blk-merge: try to make front segments in full size

2018-01-09 Thread Dmitry Osipenko
On 09.01.2018 05:34, Ming Lei wrote:
> On Tue, Jan 09, 2018 at 12:09:27AM +0300, Dmitry Osipenko wrote:
>> On 18.12.2017 15:22, Ming Lei wrote:
>>> When merging one bvec into segment, if the bvec is too big
>>> to merge, current policy is to move the whole bvec into another
>>> new segment.
>>>
>>> This patchset changes the policy into trying to maximize size of
>>> front segments, that means in above situation, part of bvec
>>> is merged into current segment, and the remainder is put
>>> into next segment.
>>>
>>> This patch prepares for support multipage bvec because
>>> it can be quite common to see this case and we should try
>>> to make front segments in full size.
>>>
>>> Signed-off-by: Ming Lei 
>>> ---
>>>  block/blk-merge.c | 54 
>>> +-
>>>  1 file changed, 49 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/block/blk-merge.c b/block/blk-merge.c
>>> index a476337a8ff4..42ceb89bc566 100644
>>> --- a/block/blk-merge.c
>>> +++ b/block/blk-merge.c
>>> @@ -109,6 +109,7 @@ static struct bio *blk_bio_segment_split(struct 
>>> request_queue *q,
>>> bool do_split = true;
>>> struct bio *new = NULL;
>>> const unsigned max_sectors = get_max_io_size(q, bio);
>>> +   unsigned advance = 0;
>>>  
>>> bio_for_each_segment(bv, bio, iter) {
>>> /*
>>> @@ -134,12 +135,32 @@ static struct bio *blk_bio_segment_split(struct 
>>> request_queue *q,
>>> }
>>>  
>>> if (bvprvp && blk_queue_cluster(q)) {
>>> -   if (seg_size + bv.bv_len > queue_max_segment_size(q))
>>> -   goto new_segment;
>>> if (!BIOVEC_PHYS_MERGEABLE(bvprvp, ))
>>> goto new_segment;
>>> if (!BIOVEC_SEG_BOUNDARY(q, bvprvp, ))
>>> goto new_segment;
>>> +   if (seg_size + bv.bv_len > queue_max_segment_size(q)) {
>>> +   /*
>>> +* On assumption is that initial value of
>>> +* @seg_size(equals to bv.bv_len) won't be
>>> +* bigger than max segment size, but will
>>> +* becomes false after multipage bvec comes.
>>> +*/
>>> +   advance = queue_max_segment_size(q) - seg_size;
>>> +
>>> +   if (advance > 0) {
>>> +   seg_size += advance;
>>> +   sectors += advance >> 9;
>>> +   bv.bv_len -= advance;
>>> +   bv.bv_offset += advance;
>>> +   }
>>> +
>>> +   /*
>>> +* Still need to put remainder of current
>>> +* bvec into a new segment.
>>> +*/
>>> +   goto new_segment;
>>> +   }
>>>  
>>> seg_size += bv.bv_len;
>>> bvprv = bv;
>>> @@ -161,6 +182,12 @@ static struct bio *blk_bio_segment_split(struct 
>>> request_queue *q,
>>> seg_size = bv.bv_len;
>>> sectors += bv.bv_len >> 9;
>>>  
>>> +   /* restore the bvec for iterator */
>>> +   if (advance) {
>>> +   bv.bv_len += advance;
>>> +   bv.bv_offset -= advance;
>>> +   advance = 0;
>>> +   }
>>> }
>>>  
>>> do_split = false;
>>> @@ -361,16 +388,29 @@ __blk_segment_map_sg(struct request_queue *q, struct 
>>> bio_vec *bvec,
>>>  {
>>>  
>>> int nbytes = bvec->bv_len;
>>> +   unsigned advance = 0;
>>>  
>>> if (*sg && *cluster) {
>>> -   if ((*sg)->length + nbytes > queue_max_segment_size(q))
>>> -   goto new_segment;
>>> -
>>> if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec))
>>> goto new_segment;
>>> if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec))
>>

Re: [PATCH V4 13/45] block: blk-merge: try to make front segments in full size

2018-01-09 Thread Dmitry Osipenko
On 09.01.2018 17:33, Ming Lei wrote:
> On Tue, Jan 09, 2018 at 04:18:39PM +0300, Dmitry Osipenko wrote:
>> On 09.01.2018 05:34, Ming Lei wrote:
>>> On Tue, Jan 09, 2018 at 12:09:27AM +0300, Dmitry Osipenko wrote:
>>>> On 18.12.2017 15:22, Ming Lei wrote:
>>>>> When merging one bvec into segment, if the bvec is too big
>>>>> to merge, current policy is to move the whole bvec into another
>>>>> new segment.
>>>>>
>>>>> This patchset changes the policy into trying to maximize size of
>>>>> front segments, that means in above situation, part of bvec
>>>>> is merged into current segment, and the remainder is put
>>>>> into next segment.
>>>>>
>>>>> This patch prepares for support multipage bvec because
>>>>> it can be quite common to see this case and we should try
>>>>> to make front segments in full size.
>>>>>
>>>>> Signed-off-by: Ming Lei 
>>>>> ---
>>>>>  block/blk-merge.c | 54 
>>>>> +-
>>>>>  1 file changed, 49 insertions(+), 5 deletions(-)
>>>>>
>>>>> diff --git a/block/blk-merge.c b/block/blk-merge.c
>>>>> index a476337a8ff4..42ceb89bc566 100644
>>>>> --- a/block/blk-merge.c
>>>>> +++ b/block/blk-merge.c
>>>>> @@ -109,6 +109,7 @@ static struct bio *blk_bio_segment_split(struct 
>>>>> request_queue *q,
>>>>>   bool do_split = true;
>>>>>   struct bio *new = NULL;
>>>>>   const unsigned max_sectors = get_max_io_size(q, bio);
>>>>> + unsigned advance = 0;
>>>>>  
>>>>>   bio_for_each_segment(bv, bio, iter) {
>>>>>   /*
>>>>> @@ -134,12 +135,32 @@ static struct bio *blk_bio_segment_split(struct 
>>>>> request_queue *q,
>>>>>   }
>>>>>  
>>>>>   if (bvprvp && blk_queue_cluster(q)) {
>>>>> - if (seg_size + bv.bv_len > queue_max_segment_size(q))
>>>>> - goto new_segment;
>>>>>   if (!BIOVEC_PHYS_MERGEABLE(bvprvp, ))
>>>>>   goto new_segment;
>>>>>   if (!BIOVEC_SEG_BOUNDARY(q, bvprvp, ))
>>>>>   goto new_segment;
>>>>> + if (seg_size + bv.bv_len > queue_max_segment_size(q)) {
>>>>> + /*
>>>>> +  * On assumption is that initial value of
>>>>> +  * @seg_size(equals to bv.bv_len) won't be
>>>>> +  * bigger than max segment size, but will
>>>>> +  * becomes false after multipage bvec comes.
>>>>> +  */
>>>>> + advance = queue_max_segment_size(q) - seg_size;
>>>>> +
>>>>> + if (advance > 0) {
>>>>> + seg_size += advance;
>>>>> + sectors += advance >> 9;
>>>>> + bv.bv_len -= advance;
>>>>> + bv.bv_offset += advance;
>>>>> + }
>>>>> +
>>>>> + /*
>>>>> +  * Still need to put remainder of current
>>>>> +  * bvec into a new segment.
>>>>> +  */
>>>>> + goto new_segment;
>>>>> + }
>>>>>  
>>>>>   seg_size += bv.bv_len;
>>>>>   bvprv = bv;
>>>>> @@ -161,6 +182,12 @@ static struct bio *blk_bio_segment_split(struct 
>>>>> request_queue *q,
>>>>>   seg_size = bv.bv_len;
>>>>>   sectors += bv.bv_len >> 9;
>>>>>  
>>>>> + /* restore the bvec for iterator */
>>>>> + if (advance) {
>>>>> + bv.bv_len += advance;
>>>>> + bv.bv_offset -= advance;
>>>>> + advance = 0;
>>>>> + }
>>>>>   }
>>>

[PATCH v3 3/3] clk: tegra: Specify VDE clock rate

2018-01-10 Thread Dmitry Osipenko
Currently VDE clock rate is determined by clock config left from
bootloader, let's not rely on it and explicitly specify the clock
rate in the CCF driver.

Signed-off-by: Dmitry Osipenko 
Acked-by: Peter De Schrijver 
---

Change log:
v2: No change.
v3: No change.

 drivers/clk/tegra/clk-tegra114.c | 1 +
 drivers/clk/tegra/clk-tegra124.c | 2 +-
 drivers/clk/tegra/clk-tegra20.c  | 1 +
 drivers/clk/tegra/clk-tegra30.c  | 1 +
 4 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/clk/tegra/clk-tegra114.c b/drivers/clk/tegra/clk-tegra114.c
index c3945c683f60..5d5a22d529f5 100644
--- a/drivers/clk/tegra/clk-tegra114.c
+++ b/drivers/clk/tegra/clk-tegra114.c
@@ -1189,6 +1189,7 @@ static struct tegra_clk_init_table init_table[] 
__initdata = {
{ TEGRA114_CLK_XUSB_HS_SRC, TEGRA114_CLK_XUSB_SS_DIV2, 6120, 0 },
{ TEGRA114_CLK_XUSB_FALCON_SRC, TEGRA114_CLK_PLL_P, 20400, 0 },
{ TEGRA114_CLK_XUSB_HOST_SRC, TEGRA114_CLK_PLL_P, 10200, 0 },
+   { TEGRA114_CLK_VDE, TEGRA114_CLK_CLK_MAX, 6, 0 },
/* must be the last entry */
{ TEGRA114_CLK_CLK_MAX, TEGRA114_CLK_CLK_MAX, 0, 0 },
 };
diff --git a/drivers/clk/tegra/clk-tegra124.c b/drivers/clk/tegra/clk-tegra124.c
index 230f9a2c1abf..50088e976611 100644
--- a/drivers/clk/tegra/clk-tegra124.c
+++ b/drivers/clk/tegra/clk-tegra124.c
@@ -1267,7 +1267,7 @@ static struct tegra_clk_init_table common_init_table[] 
__initdata = {
{ TEGRA124_CLK_I2S2, TEGRA124_CLK_PLL_A_OUT0, 11289600, 0 },
{ TEGRA124_CLK_I2S3, TEGRA124_CLK_PLL_A_OUT0, 11289600, 0 },
{ TEGRA124_CLK_I2S4, TEGRA124_CLK_PLL_A_OUT0, 11289600, 0 },
-   { TEGRA124_CLK_VDE, TEGRA124_CLK_PLL_P, 0, 0 },
+   { TEGRA124_CLK_VDE, TEGRA124_CLK_CLK_MAX, 6, 0 },
{ TEGRA124_CLK_HOST1X, TEGRA124_CLK_PLL_P, 13600, 1 },
{ TEGRA124_CLK_DSIALP, TEGRA124_CLK_PLL_P, 6800, 0 },
{ TEGRA124_CLK_DSIBLP, TEGRA124_CLK_PLL_P, 6800, 0 },
diff --git a/drivers/clk/tegra/clk-tegra20.c b/drivers/clk/tegra/clk-tegra20.c
index dec95919fbff..0ee56dd04cec 100644
--- a/drivers/clk/tegra/clk-tegra20.c
+++ b/drivers/clk/tegra/clk-tegra20.c
@@ -1048,6 +1048,7 @@ static struct tegra_clk_init_table init_table[] 
__initdata = {
{ TEGRA20_CLK_DISP2, TEGRA20_CLK_PLL_P, 6, 0 },
{ TEGRA20_CLK_GR2D, TEGRA20_CLK_PLL_C, 3, 0 },
{ TEGRA20_CLK_GR3D, TEGRA20_CLK_PLL_C, 3, 0 },
+   { TEGRA20_CLK_VDE, TEGRA20_CLK_CLK_MAX, 3, 0 },
/* must be the last entry */
{ TEGRA20_CLK_CLK_MAX, TEGRA20_CLK_CLK_MAX, 0, 0 },
 };
diff --git a/drivers/clk/tegra/clk-tegra30.c b/drivers/clk/tegra/clk-tegra30.c
index 8428895ad475..b316dfb6f6c7 100644
--- a/drivers/clk/tegra/clk-tegra30.c
+++ b/drivers/clk/tegra/clk-tegra30.c
@@ -1266,6 +1266,7 @@ static struct tegra_clk_init_table init_table[] 
__initdata = {
{ TEGRA30_CLK_GR3D, TEGRA30_CLK_PLL_C, 3, 0 },
{ TEGRA30_CLK_GR3D2, TEGRA30_CLK_PLL_C, 3, 0 },
{ TEGRA30_CLK_PLL_U, TEGRA30_CLK_CLK_MAX, 48000, 0 },
+   { TEGRA30_CLK_VDE, TEGRA30_CLK_CLK_MAX, 6, 0 },
/* must be the last entry */
{ TEGRA30_CLK_CLK_MAX, TEGRA30_CLK_CLK_MAX, 0, 0 },
 };
-- 
2.15.1



[PATCH v3 1/3] clk: tegra: Mark HCLK, SCLK and EMC as critical

2018-01-10 Thread Dmitry Osipenko
Machine dies if HCLK, SCLK or EMC is disabled. Hence mark these clocks
as critical.

Signed-off-by: Dmitry Osipenko 
Acked-by: Peter De Schrijver 
---

Change log:
v2: Fixed accidentally missed marking EMC as critical on Tegra30 and
Tegra124. Switched to a use of common EMC gate definition on Tegra20
and Tegra30.

v3: Dropped marking PLL_P outputs as critical, because seems they are
not so critical. Although, I still haven't got a definitive answer
about what exact HW functions are affected by the fixed-clocks.
Anyway it should be cleaner to correct the actual drivers.

 drivers/clk/tegra/clk-emc.c  |  2 +-
 drivers/clk/tegra/clk-tegra-periph.c |  2 +-
 drivers/clk/tegra/clk-tegra-super-gen4.c |  8 +---
 drivers/clk/tegra/clk-tegra114.c |  3 +--
 drivers/clk/tegra/clk-tegra124.c |  7 +++
 drivers/clk/tegra/clk-tegra20.c  | 23 ++-
 drivers/clk/tegra/clk-tegra210.c |  3 +--
 drivers/clk/tegra/clk-tegra30.c  | 14 --
 8 files changed, 26 insertions(+), 36 deletions(-)

diff --git a/drivers/clk/tegra/clk-emc.c b/drivers/clk/tegra/clk-emc.c
index 11a5066e5c27..5234acd30e89 100644
--- a/drivers/clk/tegra/clk-emc.c
+++ b/drivers/clk/tegra/clk-emc.c
@@ -515,7 +515,7 @@ struct clk *tegra_clk_register_emc(void __iomem *base, 
struct device_node *np,
 
init.name = "emc";
init.ops = _clk_emc_ops;
-   init.flags = 0;
+   init.flags = CLK_IS_CRITICAL;
init.parent_names = emc_parent_clk_names;
init.num_parents = ARRAY_SIZE(emc_parent_clk_names);
 
diff --git a/drivers/clk/tegra/clk-tegra-periph.c 
b/drivers/clk/tegra/clk-tegra-periph.c
index c02711927d79..2acba2986bc6 100644
--- a/drivers/clk/tegra/clk-tegra-periph.c
+++ b/drivers/clk/tegra/clk-tegra-periph.c
@@ -830,7 +830,7 @@ static struct tegra_periph_init_data gate_clks[] = {
GATE("xusb_host", "xusb_host_src", 89, 0, tegra_clk_xusb_host, 0),
GATE("xusb_ss", "xusb_ss_src", 156, 0, tegra_clk_xusb_ss, 0),
GATE("xusb_dev", "xusb_dev_src", 95, 0, tegra_clk_xusb_dev, 0),
-   GATE("emc", "emc_mux", 57, 0, tegra_clk_emc, CLK_IGNORE_UNUSED),
+   GATE("emc", "emc_mux", 57, 0, tegra_clk_emc, CLK_IS_CRITICAL),
GATE("sata_cold", "clk_m", 129, TEGRA_PERIPH_ON_APB, 
tegra_clk_sata_cold, 0),
GATE("ispa", "isp", 23, 0, tegra_clk_ispa, 0),
GATE("ispb", "isp", 3, 0, tegra_clk_ispb, 0),
diff --git a/drivers/clk/tegra/clk-tegra-super-gen4.c 
b/drivers/clk/tegra/clk-tegra-super-gen4.c
index 10047107c1dc..89d6b47a27a8 100644
--- a/drivers/clk/tegra/clk-tegra-super-gen4.c
+++ b/drivers/clk/tegra/clk-tegra-super-gen4.c
@@ -125,7 +125,8 @@ static void __init tegra_sclk_init(void __iomem *clk_base,
/* SCLK */
dt_clk = tegra_lookup_dt_id(tegra_clk_sclk, tegra_clks);
if (dt_clk) {
-   clk = clk_register_divider(NULL, "sclk", "sclk_mux", 0,
+   clk = clk_register_divider(NULL, "sclk", "sclk_mux",
+   CLK_IS_CRITICAL,
clk_base + SCLK_DIVIDER, 0, 8,
0, _lock);
*dt_clk = clk;
@@ -137,7 +138,8 @@ static void __init tegra_sclk_init(void __iomem *clk_base,
clk = tegra_clk_register_super_mux("sclk",
gen_info->sclk_parents,
gen_info->num_sclk_parents,
-   CLK_SET_RATE_PARENT,
+   CLK_SET_RATE_PARENT |
+   CLK_IS_CRITICAL,
clk_base + SCLK_BURST_POLICY,
0, 4, 0, 0, NULL);
*dt_clk = clk;
@@ -151,7 +153,7 @@ static void __init tegra_sclk_init(void __iomem *clk_base,
   clk_base + SYSTEM_CLK_RATE, 4, 2, 0,
   _lock);
clk = clk_register_gate(NULL, "hclk", "hclk_div",
-   CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED,
+   CLK_SET_RATE_PARENT | CLK_IS_CRITICAL,
clk_base + SYSTEM_CLK_RATE,
7, CLK_GATE_SET_TO_DISABLE, _lock);
*dt_clk = clk;
diff --git a/drivers/clk/tegra/clk-tegra114.c b/drivers/clk/tegra/clk-tegra114.c
index 63087d17c3e2..c3945c683f60 100644
--- a/drivers/clk/tegra/clk-tegra114.c
+++ b/drivers/

[PATCH v3 2/3] clk: tegra20: Correct PLL_C_OUT1 setup

2018-01-10 Thread Dmitry Osipenko
PLL_C_OUT_1 can't produce 216 MHz defined in the init_table. Let's
set it to 240 MHz and explicitly specify HCLK rate for consistency.

Signed-off-by: Dmitry Osipenko 
Acked-by: Peter De Schrijver 
---

Change log:
v2: No change.
v3: No change.

 drivers/clk/tegra/clk-tegra20.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/clk/tegra/clk-tegra20.c b/drivers/clk/tegra/clk-tegra20.c
index e3392ca2c2fc..dec95919fbff 100644
--- a/drivers/clk/tegra/clk-tegra20.c
+++ b/drivers/clk/tegra/clk-tegra20.c
@@ -1018,9 +1018,9 @@ static struct tegra_clk_init_table init_table[] 
__initdata = {
{ TEGRA20_CLK_PLL_P_OUT3, TEGRA20_CLK_CLK_MAX, 7200, 1 },
{ TEGRA20_CLK_PLL_P_OUT4, TEGRA20_CLK_CLK_MAX, 2400, 1 },
{ TEGRA20_CLK_PLL_C, TEGRA20_CLK_CLK_MAX, 6, 0 },
-   { TEGRA20_CLK_PLL_C_OUT1, TEGRA20_CLK_CLK_MAX, 21600, 0 },
-   { TEGRA20_CLK_SCLK, TEGRA20_CLK_PLL_C_OUT1, 0, 0 },
-   { TEGRA20_CLK_HCLK, TEGRA20_CLK_CLK_MAX, 0, 0 },
+   { TEGRA20_CLK_PLL_C_OUT1, TEGRA20_CLK_CLK_MAX, 24000, 0 },
+   { TEGRA20_CLK_SCLK, TEGRA20_CLK_PLL_C_OUT1, 24000, 0 },
+   { TEGRA20_CLK_HCLK, TEGRA20_CLK_CLK_MAX, 24000, 0 },
{ TEGRA20_CLK_PCLK, TEGRA20_CLK_CLK_MAX, 6000, 0 },
{ TEGRA20_CLK_CSITE, TEGRA20_CLK_CLK_MAX, 0, 1 },
{ TEGRA20_CLK_CCLK, TEGRA20_CLK_CLK_MAX, 0, 1 },
-- 
2.15.1



Re: [PATCH V4 13/45] block: blk-merge: try to make front segments in full size

2018-01-10 Thread Dmitry Osipenko
On 10.01.2018 05:40, Ming Lei wrote:
> On Tue, Jan 09, 2018 at 08:02:53PM +0300, Dmitry Osipenko wrote:
>> On 09.01.2018 17:33, Ming Lei wrote:
>>> On Tue, Jan 09, 2018 at 04:18:39PM +0300, Dmitry Osipenko wrote:
>>>> On 09.01.2018 05:34, Ming Lei wrote:
>>>>> On Tue, Jan 09, 2018 at 12:09:27AM +0300, Dmitry Osipenko wrote:
>>>>>> On 18.12.2017 15:22, Ming Lei wrote:
>>>>>>> When merging one bvec into segment, if the bvec is too big
>>>>>>> to merge, current policy is to move the whole bvec into another
>>>>>>> new segment.
>>>>>>>
>>>>>>> This patchset changes the policy into trying to maximize size of
>>>>>>> front segments, that means in above situation, part of bvec
>>>>>>> is merged into current segment, and the remainder is put
>>>>>>> into next segment.
>>>>>>>
>>>>>>> This patch prepares for support multipage bvec because
>>>>>>> it can be quite common to see this case and we should try
>>>>>>> to make front segments in full size.
>>>>>>>
>>>>>>> Signed-off-by: Ming Lei 
>>>>>>> ---
>>>>>>>  block/blk-merge.c | 54 
>>>>>>> +-
>>>>>>>  1 file changed, 49 insertions(+), 5 deletions(-)
>>>>>>>
>>>>>>> diff --git a/block/blk-merge.c b/block/blk-merge.c
>>>>>>> index a476337a8ff4..42ceb89bc566 100644
>>>>>>> --- a/block/blk-merge.c
>>>>>>> +++ b/block/blk-merge.c
>>>>>>> @@ -109,6 +109,7 @@ static struct bio *blk_bio_segment_split(struct 
>>>>>>> request_queue *q,
>>>>>>> bool do_split = true;
>>>>>>> struct bio *new = NULL;
>>>>>>> const unsigned max_sectors = get_max_io_size(q, bio);
>>>>>>> +   unsigned advance = 0;
>>>>>>>  
>>>>>>> bio_for_each_segment(bv, bio, iter) {
>>>>>>> /*
>>>>>>> @@ -134,12 +135,32 @@ static struct bio *blk_bio_segment_split(struct 
>>>>>>> request_queue *q,
>>>>>>> }
>>>>>>>  
>>>>>>> if (bvprvp && blk_queue_cluster(q)) {
>>>>>>> -   if (seg_size + bv.bv_len > 
>>>>>>> queue_max_segment_size(q))
>>>>>>> -   goto new_segment;
>>>>>>> if (!BIOVEC_PHYS_MERGEABLE(bvprvp, ))
>>>>>>> goto new_segment;
>>>>>>> if (!BIOVEC_SEG_BOUNDARY(q, bvprvp, ))
>>>>>>> goto new_segment;
>>>>>>> +   if (seg_size + bv.bv_len > 
>>>>>>> queue_max_segment_size(q)) {
>>>>>>> +   /*
>>>>>>> +* On assumption is that initial value 
>>>>>>> of
>>>>>>> +* @seg_size(equals to bv.bv_len) won't 
>>>>>>> be
>>>>>>> +* bigger than max segment size, but 
>>>>>>> will
>>>>>>> +* becomes false after multipage bvec 
>>>>>>> comes.
>>>>>>> +*/
>>>>>>> +   advance = queue_max_segment_size(q) - 
>>>>>>> seg_size;
>>>>>>> +
>>>>>>> +   if (advance > 0) {
>>>>>>> +   seg_size += advance;
>>>>>>> +   sectors += advance >> 9;
>>>>>>> +   bv.bv_len -= advance;
>>>>>>> +   bv.bv_offset += advance;
>>>>>>> +   }
>>>>>>> +
>>>>>>> +   /*
>>>>>>> +   

[PATCH v1] usb: chipidea: tegra: Use aligned DMA on Tegra30

2017-12-18 Thread Dmitry Osipenko
USB Ethernet gadget now works on Tegra30.

Signed-off-by: Dmitry Osipenko 
---
 drivers/usb/chipidea/ci_hdrc_tegra.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/chipidea/ci_hdrc_tegra.c 
b/drivers/usb/chipidea/ci_hdrc_tegra.c
index 7b65a1040d2c..7f4d2b6af37a 100644
--- a/drivers/usb/chipidea/ci_hdrc_tegra.c
+++ b/drivers/usb/chipidea/ci_hdrc_tegra.c
@@ -29,7 +29,7 @@ static const struct tegra_udc_soc_info tegra20_udc_soc_info = 
{
 };
 
 static const struct tegra_udc_soc_info tegra30_udc_soc_info = {
-   .flags = 0,
+   .flags = CI_HDRC_REQUIRES_ALIGNED_DMA,
 };
 
 static const struct tegra_udc_soc_info tegra114_udc_soc_info = {
-- 
2.15.1



[PATCH v2 2/2] clk: tegra20: Correct PLL_C_OUT1 setup

2017-12-18 Thread Dmitry Osipenko
PLL_C_OUT_1 can't produce 216 MHz defined in the init_table. Let's
set it to 240 MHz and explicitly specify HCLK rate for consistency.

Signed-off-by: Dmitry Osipenko 
Acked-By:  Peter De Schrijver 
---

Change log:
v2: No change.

 drivers/clk/tegra/clk-tegra20.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/clk/tegra/clk-tegra20.c b/drivers/clk/tegra/clk-tegra20.c
index d143a867968a..06c743988ae2 100644
--- a/drivers/clk/tegra/clk-tegra20.c
+++ b/drivers/clk/tegra/clk-tegra20.c
@@ -1019,9 +1019,9 @@ static struct tegra_clk_init_table init_table[] 
__initdata = {
{ TEGRA20_CLK_PLL_P_OUT3, TEGRA20_CLK_CLK_MAX, 7200, 0 },
{ TEGRA20_CLK_PLL_P_OUT4, TEGRA20_CLK_CLK_MAX, 2400, 0 },
{ TEGRA20_CLK_PLL_C, TEGRA20_CLK_CLK_MAX, 6, 0 },
-   { TEGRA20_CLK_PLL_C_OUT1, TEGRA20_CLK_CLK_MAX, 21600, 0 },
-   { TEGRA20_CLK_SCLK, TEGRA20_CLK_PLL_C_OUT1, 0, 0 },
-   { TEGRA20_CLK_HCLK, TEGRA20_CLK_CLK_MAX, 0, 0 },
+   { TEGRA20_CLK_PLL_C_OUT1, TEGRA20_CLK_CLK_MAX, 24000, 0 },
+   { TEGRA20_CLK_SCLK, TEGRA20_CLK_PLL_C_OUT1, 24000, 0 },
+   { TEGRA20_CLK_HCLK, TEGRA20_CLK_CLK_MAX, 24000, 0 },
{ TEGRA20_CLK_PCLK, TEGRA20_CLK_CLK_MAX, 6000, 0 },
{ TEGRA20_CLK_CSITE, TEGRA20_CLK_CLK_MAX, 0, 1 },
{ TEGRA20_CLK_CCLK, TEGRA20_CLK_CLK_MAX, 0, 1 },
-- 
2.15.1



[PATCH v2 1/2] clk: tegra: Mark HCLK, SCLK, EMC, MC and PLL_P outputs as critical

2017-12-18 Thread Dmitry Osipenko
Machine dies if HCLK, SCLK or EMC is disabled, hence mark these clocks
as critical. Currently some of drivers do not manage clocks properly,
expecting clocks to be 'always enabled', these clocks are MC and PLL_P
outputs. Let's mark MC or PLL_P outputs as critical for now and revert
this change once drivers would be corrected.

Signed-off-by: Dmitry Osipenko 
Acked-By:  Peter De Schrijver 
---

Change log:
v2: Fixed accidentally missed marking EMC as critical on Tegra30 and
Tegra124. Switched to a use of common EMC gate definition on Tegra20
and Tegra30.

 drivers/clk/tegra/clk-divider.c  |  3 ++-
 drivers/clk/tegra/clk-emc.c  |  2 +-
 drivers/clk/tegra/clk-tegra-periph.c | 27 ++--
 drivers/clk/tegra/clk-tegra-super-gen4.c |  8 ---
 drivers/clk/tegra/clk-tegra114.c |  5 ++---
 drivers/clk/tegra/clk-tegra124.c | 10 -
 drivers/clk/tegra/clk-tegra20.c  | 36 +++-
 drivers/clk/tegra/clk-tegra210.c |  6 +++---
 drivers/clk/tegra/clk-tegra30.c  | 17 ++-
 drivers/clk/tegra/clk.h  |  2 +-
 10 files changed, 63 insertions(+), 53 deletions(-)

diff --git a/drivers/clk/tegra/clk-divider.c b/drivers/clk/tegra/clk-divider.c
index 16e0aee14773..ffae26a7c823 100644
--- a/drivers/clk/tegra/clk-divider.c
+++ b/drivers/clk/tegra/clk-divider.c
@@ -194,6 +194,7 @@ static const struct clk_div_table mc_div_table[] = {
 struct clk *tegra_clk_register_mc(const char *name, const char *parent_name,
  void __iomem *reg, spinlock_t *lock)
 {
-   return clk_register_divider_table(NULL, name, parent_name, 0, reg,
+   return clk_register_divider_table(NULL, name, parent_name,
+ CLK_IS_CRITICAL, reg,
  16, 1, 0, mc_div_table, lock);
 }
diff --git a/drivers/clk/tegra/clk-emc.c b/drivers/clk/tegra/clk-emc.c
index 11a5066e5c27..5234acd30e89 100644
--- a/drivers/clk/tegra/clk-emc.c
+++ b/drivers/clk/tegra/clk-emc.c
@@ -515,7 +515,7 @@ struct clk *tegra_clk_register_emc(void __iomem *base, 
struct device_node *np,
 
init.name = "emc";
init.ops = _clk_emc_ops;
-   init.flags = 0;
+   init.flags = CLK_IS_CRITICAL;
init.parent_names = emc_parent_clk_names;
init.num_parents = ARRAY_SIZE(emc_parent_clk_names);
 
diff --git a/drivers/clk/tegra/clk-tegra-periph.c 
b/drivers/clk/tegra/clk-tegra-periph.c
index c02711927d79..97bc7b43f40a 100644
--- a/drivers/clk/tegra/clk-tegra-periph.c
+++ b/drivers/clk/tegra/clk-tegra-periph.c
@@ -830,7 +830,7 @@ static struct tegra_periph_init_data gate_clks[] = {
GATE("xusb_host", "xusb_host_src", 89, 0, tegra_clk_xusb_host, 0),
GATE("xusb_ss", "xusb_ss_src", 156, 0, tegra_clk_xusb_ss, 0),
GATE("xusb_dev", "xusb_dev_src", 95, 0, tegra_clk_xusb_dev, 0),
-   GATE("emc", "emc_mux", 57, 0, tegra_clk_emc, CLK_IGNORE_UNUSED),
+   GATE("emc", "emc_mux", 57, 0, tegra_clk_emc, CLK_IS_CRITICAL),
GATE("sata_cold", "clk_m", 129, TEGRA_PERIPH_ON_APB, 
tegra_clk_sata_cold, 0),
GATE("ispa", "isp", 23, 0, tegra_clk_ispa, 0),
GATE("ispb", "isp", 3, 0, tegra_clk_ispb, 0),
@@ -971,7 +971,8 @@ static void __init div_clk_init(void __iomem *clk_base,
 
 static void __init init_pllp(void __iomem *clk_base, void __iomem *pmc_base,
struct tegra_clk *tegra_clks,
-   struct tegra_clk_pll_params *pll_params)
+   struct tegra_clk_pll_params *pll_params,
+   bool tegra30)
 {
struct clk *clk;
struct clk **dt_clk;
@@ -987,6 +988,7 @@ static void __init init_pllp(void __iomem *clk_base, void 
__iomem *pmc_base,
}
 
for (i = 0; i < ARRAY_SIZE(pllp_out_clks); i++) {
+   unsigned long flags = CLK_SET_RATE_PARENT;
struct pll_out_data *data;
 
data = pllp_out_clks + i;
@@ -995,14 +997,27 @@ static void __init init_pllp(void __iomem *clk_base, void 
__iomem *pmc_base,
if (!dt_clk)
continue;
 
+   /*
+* On all Tegra generations pll_p_out3 is used as an auxiliary
+* clock source by multiple peripherals.
+*/
+   if (strcmp(data->pll_out_name, "pll_p_out3") == 0)
+   flags |= CLK_IS_CRITICAL;
+
+   /*
+* Only on Tegra30 pll_p_out4 is used as an auxiliary clock
+* source by HDMI hardware block.
+*/
+   if (tegra30 && strcmp(data->pll_out_name, "pll_p_out4") 

Re: [PATCH v2 4/4] usb: host: ehci-tegra: Remove USB_PHY dependencies from Kconfig

2017-12-19 Thread Dmitry Osipenko
On 19.12.2017 20:52, Alan Stern wrote:
> On Sun, 17 Dec 2017, Dmitry Osipenko wrote:
> 
>> Previously tegra-phy driver was built only when ehci-tegra was, now
>> tegra-phy has its own Kconfig entry. Remove the USB_PHY dependencies
>> from ehci-tegra's Kconfig since they aren't useful anymore.
> 
> Are you sure they aren't useful?  Does this mean it is now
> possible/useful to configure a kernel with USB_EHCI_TEGRA enabled and
> USB_PHY disabled?

I'm not entirely sure that USB_TEGRA_PHY isn't useful because initially (in V1)
I proposed to do the other way around, to select PHY when ehci-tegra / chipidea
drivers are enabled. But that proposal wasn't very successful [0][1]. Yes, it
means that ehci-tegra could be built without the tegra-phy.

The USB_ULPI option isn't needed for compiling ehci-tegra, but for tegra-phy
driver. I've moved that option to the tegra-phy's Kconfig, see the previous
patch in the series ("Add Kconfig entry for Tegra PHY driver").

[0] https://marc.info/?l=linux-tegra=151307881119328
[1] https://marc.info/?l=linux-tegra=151294749618823

>> Signed-off-by: Dmitry Osipenko 
>> ---
>>  drivers/usb/host/Kconfig | 3 ---
>>  1 file changed, 3 deletions(-)
>>
>> diff --git a/drivers/usb/host/Kconfig b/drivers/usb/host/Kconfig
>> index 6150bed7cfa8..5042e72c1b76 100644
>> --- a/drivers/usb/host/Kconfig
>> +++ b/drivers/usb/host/Kconfig
>> @@ -234,9 +234,6 @@ config USB_EHCI_TEGRA
>> tristate "NVIDIA Tegra HCD support"
>> depends on ARCH_TEGRA
>> select USB_EHCI_ROOT_HUB_TT
>> -   select USB_PHY
>> -select USB_ULPI
>> -select USB_ULPI_VIEWPORT
> 
> Maybe you should add
> 
>   select USB_TEGRA_PHY
> 
> here.  Also, what happened to USB_ULPI_VIEWPORT?  Is it still 
> important?  If not, then what changed?

USB_ULPI_VIEWPORT is selected by USB_ULPI, so we don't need to duplicate the
selection. See drivers/usb/phy/Kconfig.


Re: [PATCH v2 1/2] clk: tegra: Mark HCLK, SCLK, EMC, MC and PLL_P outputs as critical

2017-12-19 Thread Dmitry Osipenko
On 19.12.2017 22:56, Michael Turquette wrote:
> Quoting Dmitry Osipenko (2017-12-18 19:59:06)
>> Machine dies if HCLK, SCLK or EMC is disabled, hence mark these clocks
>> as critical. Currently some of drivers do not manage clocks properly,
>> expecting clocks to be 'always enabled', these clocks are MC and PLL_P
>> outputs. Let's mark MC or PLL_P outputs as critical for now and revert
>> this change once drivers would be corrected.
> 
> Are the drivers that do not manage their clocks correctly merged
> upstream? If so can we fix those drivers instead of marking clocks as
> critical?

All the drivers are in upstream for a quite long time already. We should be able
to correct them, but I haven't tried yet.

> If not can we annotate the flags below with a comment stating to remove
> the critical clock flag once the consumer driver gets a clue?
I'll try to take a look at how much effort it would take to correct the drivers
during the next few days and will send v3 with either the comment being added or
'always enabled' clocks being dropped from the patch.


Re: [PATCH v2 3/4] usb: phy: Add Kconfig entry for Tegra PHY driver

2017-12-20 Thread Dmitry Osipenko
On 20.12.2017 16:17, kbuild test robot wrote:
> Hi Dmitry,
> 
> Thank you for the patch! Yet something to improve:
> 
> [auto build test ERROR on balbi-usb/next]
> [also build test ERROR on v4.15-rc4 next-20171220]
> [if your patch is applied to the wrong git tree, please drop us a note to 
> help improve the system]
> 
> url:
> https://github.com/0day-ci/linux/commits/Dmitry-Osipenko/usb-phy-tegra-Cleanup-error-messages/20171220-142227
> base:   https://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git next
> config: arm-tegra_defconfig (attached as .config)
> compiler: arm-linux-gnueabi-gcc (Debian 7.2.0-11) 7.2.0
> reproduce:
> wget 
> https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
> ~/bin/make.cross
> chmod +x ~/bin/make.cross
> # save the attached .config to linux build tree
> make.cross ARCH=arm 
> 
> All errors (new ones prefixed by >>):
> 
>drivers/usb/host/ehci-tegra.o: In function `tegra_ehci_hub_control':
>>> ehci-tegra.c:(.text+0x818): undefined reference to `tegra_usb_phy_preresume'
>>> ehci-tegra.c:(.text+0x9b4): undefined reference to 
>>> `tegra_usb_phy_postresume'

So there is actual build-dependency on the PHY for ehci-tegra and I haven't
tested it properly. That's not good.

Would it be fine to extend the generic PHY with pre/postresume restore_start/end
functions?


[PATCH v3 0/5] Some corrections and improvement for Tegra DRM

2017-12-20 Thread Dmitry Osipenko
I've aggregated all Tegra DRM patches that I've sent before into a single
series.

What's changed:

- Alpha formats been dropped in addition to restore of opaque formats
  on T20/30.

- Reworked the HW cursor patch a tad, since alpha formats have been
  dropped from the overlay plane.

- Fixed warning that was reported by kbuild bot for ARM64 build.

Dmitry Osipenko (5):
  drm/tegra: dc: Link DC1 to DC0 on Tegra20
  drm/tegra: Restore opaque and drop alpha formats on Tegra20/30
  drm/tegra: Trade overlay plane for cursor on older Tegra's
  drm/tegra: gem: Correct iommu_map_sg() error checking
  drm/tegra: Correct timeout in tegra_syncpt_wait

 drivers/gpu/drm/tegra/dc.c| 184 --
 drivers/gpu/drm/tegra/dc.h|   5 +-
 drivers/gpu/drm/tegra/drm.c   |   3 +-
 drivers/gpu/drm/tegra/fb.c|  13 ---
 drivers/gpu/drm/tegra/gem.c   |  15 ++--
 drivers/gpu/drm/tegra/hub.c   |   3 +-
 drivers/gpu/drm/tegra/plane.c |  22 +++--
 drivers/gpu/drm/tegra/plane.h |   2 +-
 8 files changed, 158 insertions(+), 89 deletions(-)

-- 
2.15.1



[PATCH v3 3/5] drm/tegra: Trade overlay plane for cursor on older Tegra's

2017-12-20 Thread Dmitry Osipenko
Older Tegra's do not support RGBA format for the cursor, but instead
overlay plane could be used for it. Since there is no much use for the
overlays on a regular desktop and HW-accelerated cursor is much nicer
than the jerky SW cursor, let's trade one overlay plane for the cursor.

Signed-off-by: Dmitry Osipenko 
---
 drivers/gpu/drm/tegra/dc.c | 75 +++---
 drivers/gpu/drm/tegra/dc.h |  2 ++
 2 files changed, 59 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index 460510366bb8..eaff8757bbe0 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -125,9 +125,10 @@ static inline u32 compute_initial_dda(unsigned int in)
return dfixed_frac(inf);
 }
 
-static void tegra_dc_setup_window(struct tegra_dc *dc, unsigned int index,
+static void tegra_dc_setup_window(struct tegra_dc *dc, struct drm_plane *plane,
  const struct tegra_dc_window *window)
 {
+   struct tegra_plane *p = to_tegra_plane(plane);
unsigned h_offset, v_offset, h_size, v_size, h_dda, v_dda, bpp;
unsigned long value, flags;
bool yuv, planar;
@@ -144,7 +145,7 @@ static void tegra_dc_setup_window(struct tegra_dc *dc, 
unsigned int index,
 
spin_lock_irqsave(>lock, flags);
 
-   value = WINDOW_A_SELECT << index;
+   value = WINDOW_A_SELECT << p->index;
tegra_dc_writel(dc, value, DC_CMD_DISPLAY_WINDOW_HEADER);
 
tegra_dc_writel(dc, window->format, DC_WIN_COLOR_DEPTH);
@@ -275,23 +276,29 @@ static void tegra_dc_setup_window(struct tegra_dc *dc, 
unsigned int index,
tegra_dc_writel(dc, 0x00, DC_WIN_BLEND_NOKEY);
tegra_dc_writel(dc, 0x00, DC_WIN_BLEND_1WIN);
 
-   switch (index) {
+   switch (p->index) {
case 0:
tegra_dc_writel(dc, 0x00, DC_WIN_BLEND_2WIN_X);
-   tegra_dc_writel(dc, 0x00, DC_WIN_BLEND_2WIN_Y);
+   tegra_dc_writel(dc, 0x08, DC_WIN_BLEND_2WIN_Y);
tegra_dc_writel(dc, 0x00, DC_WIN_BLEND_3WIN_XY);
break;
 
case 1:
tegra_dc_writel(dc, 0x00, DC_WIN_BLEND_2WIN_X);
-   tegra_dc_writel(dc, 0x00, DC_WIN_BLEND_2WIN_Y);
-   tegra_dc_writel(dc, 0x00, DC_WIN_BLEND_3WIN_XY);
+   tegra_dc_writel(dc, 0x08, DC_WIN_BLEND_2WIN_Y);
+   tegra_dc_writel(dc, 0x08, DC_WIN_BLEND_3WIN_XY);
break;
 
case 2:
-   tegra_dc_writel(dc, 0x00, DC_WIN_BLEND_2WIN_X);
-   tegra_dc_writel(dc, 0x00, DC_WIN_BLEND_2WIN_Y);
-   tegra_dc_writel(dc, 0x00, DC_WIN_BLEND_3WIN_XY);
+   if (plane->type == DRM_PLANE_TYPE_CURSOR) {
+   tegra_dc_writel(dc, 0x04, DC_WIN_BLEND_2WIN_X);
+   tegra_dc_writel(dc, 0x04, DC_WIN_BLEND_2WIN_Y);
+   tegra_dc_writel(dc, 0x04, DC_WIN_BLEND_3WIN_XY);
+   } else {
+   tegra_dc_writel(dc, 0x00, DC_WIN_BLEND_2WIN_X);
+   tegra_dc_writel(dc, 0x00, DC_WIN_BLEND_2WIN_Y);
+   tegra_dc_writel(dc, 0x00, DC_WIN_BLEND_3WIN_XY);
+   }
break;
}
 
@@ -433,7 +440,6 @@ static void tegra_plane_atomic_update(struct drm_plane 
*plane,
struct tegra_plane_state *state = to_tegra_plane_state(plane->state);
struct tegra_dc *dc = to_tegra_dc(plane->state->crtc);
struct drm_framebuffer *fb = plane->state->fb;
-   struct tegra_plane *p = to_tegra_plane(plane);
struct tegra_dc_window window;
unsigned int i;
 
@@ -475,7 +481,7 @@ static void tegra_plane_atomic_update(struct drm_plane 
*plane,
window.stride[i] = fb->pitches[i];
}
 
-   tegra_dc_setup_window(dc, p->index, );
+   tegra_dc_setup_window(dc, plane, );
 }
 
 static const struct drm_plane_helper_funcs tegra_plane_helper_funcs = {
@@ -706,6 +712,14 @@ static const u32 tegra20_overlay_formats[] = {
DRM_FORMAT_YUV422,
 };
 
+static const u32 tegra20_overlay_cursor_formats[] = {
+   DRM_FORMAT_ARGB,
+   DRM_FORMAT_ARGB1555,
+   DRM_FORMAT_RGBA5551,
+   DRM_FORMAT_ABGR,
+   DRM_FORMAT_ARGB,
+};
+
 static const u32 tegra114_overlay_formats[] = {
DRM_FORMAT_ARGB,
DRM_FORMAT_ARGB1555,
@@ -765,9 +779,11 @@ static const u32 tegra124_overlay_formats[] = {
 
 static struct drm_plane *tegra_dc_overlay_plane_create(struct drm_device *drm,
   struct tegra_dc *dc,
-  unsigned int index)
+  unsigned int index,
+  bool cursor)
 {
struct tegr

[PATCH v3 5/5] drm/tegra: Correct timeout in tegra_syncpt_wait

2017-12-20 Thread Dmitry Osipenko
host1x_syncpt_wait() takes timeout value in jiffies, but DRM passes it in
milliseconds.

Signed-off-by: Dmitry Osipenko 
---
 drivers/gpu/drm/tegra/drm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index bb98336fa8d7..57396388341b 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -629,7 +629,8 @@ static int tegra_syncpt_wait(struct drm_device *drm, void 
*data,
if (!sp)
return -EINVAL;
 
-   return host1x_syncpt_wait(sp, args->thresh, args->timeout,
+   return host1x_syncpt_wait(sp, args->thresh,
+ msecs_to_jiffies(args->timeout),
  >value);
 }
 
-- 
2.15.1



[PATCH v3 4/5] drm/tegra: gem: Correct iommu_map_sg() error checking

2017-12-20 Thread Dmitry Osipenko
iommu_map_sg() doesn't return a error value, but a size of the requested
IOMMU mapping or zero in case of error.

Signed-off-by: Dmitry Osipenko 
---
 drivers/gpu/drm/tegra/gem.c | 15 +++
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c
index ab1e53d434e8..49b9bf28f872 100644
--- a/drivers/gpu/drm/tegra/gem.c
+++ b/drivers/gpu/drm/tegra/gem.c
@@ -114,7 +114,7 @@ static const struct host1x_bo_ops tegra_bo_ops = {
 static int tegra_bo_iommu_map(struct tegra_drm *tegra, struct tegra_bo *bo)
 {
int prot = IOMMU_READ | IOMMU_WRITE;
-   ssize_t err;
+   int err;
 
if (bo->mm)
return -EBUSY;
@@ -128,22 +128,21 @@ static int tegra_bo_iommu_map(struct tegra_drm *tegra, 
struct tegra_bo *bo)
err = drm_mm_insert_node_generic(>mm,
 bo->mm, bo->gem.size, PAGE_SIZE, 0, 0);
if (err < 0) {
-   dev_err(tegra->drm->dev, "out of I/O virtual memory: %zd\n",
+   dev_err(tegra->drm->dev, "out of I/O virtual memory: %d\n",
err);
goto unlock;
}
 
bo->paddr = bo->mm->start;
 
-   err = iommu_map_sg(tegra->domain, bo->paddr, bo->sgt->sgl,
-  bo->sgt->nents, prot);
-   if (err < 0) {
-   dev_err(tegra->drm->dev, "failed to map buffer: %zd\n", err);
+   bo->size = iommu_map_sg(tegra->domain, bo->paddr, bo->sgt->sgl,
+   bo->sgt->nents, prot);
+   if (!bo->size) {
+   dev_err(tegra->drm->dev, "failed to map buffer\n");
+   err = -ENOMEM;
goto remove;
}
 
-   bo->size = err;
-
mutex_unlock(>mm_lock);
 
return 0;
-- 
2.15.1



[PATCH v3 1/5] drm/tegra: dc: Link DC1 to DC0 on Tegra20

2017-12-20 Thread Dmitry Osipenko
HW reset isn't actually broken on Tegra20, but there is a dependency on
first display controller to be taken out of reset for the second to be
enabled successfully.

Signed-off-by: Dmitry Osipenko 
---

Change log:

v2: Got rid of global variable and now use driver_find_device() instead.

 drivers/gpu/drm/tegra/dc.c | 80 +-
 drivers/gpu/drm/tegra/dc.h |  2 +-
 2 files changed, 51 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index e8a0cad5899c..5299185cea2f 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -1848,7 +1848,7 @@ static const struct tegra_dc_soc_info tegra20_dc_soc_info 
= {
.supports_block_linear = false,
.pitch_align = 8,
.has_powergate = false,
-   .broken_reset = true,
+   .coupled_pm = true,
.has_nvdisplay = false,
.num_primary_formats = ARRAY_SIZE(tegra20_primary_formats),
.primary_formats = tegra20_primary_formats,
@@ -1863,7 +1863,7 @@ static const struct tegra_dc_soc_info tegra30_dc_soc_info 
= {
.supports_block_linear = false,
.pitch_align = 8,
.has_powergate = false,
-   .broken_reset = false,
+   .coupled_pm = false,
.has_nvdisplay = false,
.num_primary_formats = ARRAY_SIZE(tegra20_primary_formats),
.primary_formats = tegra20_primary_formats,
@@ -1878,7 +1878,7 @@ static const struct tegra_dc_soc_info 
tegra114_dc_soc_info = {
.supports_block_linear = false,
.pitch_align = 64,
.has_powergate = true,
-   .broken_reset = false,
+   .coupled_pm = false,
.has_nvdisplay = false,
.num_primary_formats = ARRAY_SIZE(tegra114_primary_formats),
.primary_formats = tegra114_primary_formats,
@@ -1893,7 +1893,7 @@ static const struct tegra_dc_soc_info 
tegra124_dc_soc_info = {
.supports_block_linear = true,
.pitch_align = 64,
.has_powergate = true,
-   .broken_reset = false,
+   .coupled_pm = false,
.has_nvdisplay = false,
.num_primary_formats = ARRAY_SIZE(tegra124_primary_formats),
.primary_formats = tegra114_primary_formats,
@@ -1908,7 +1908,7 @@ static const struct tegra_dc_soc_info 
tegra210_dc_soc_info = {
.supports_block_linear = true,
.pitch_align = 64,
.has_powergate = true,
-   .broken_reset = false,
+   .coupled_pm = false,
.has_nvdisplay = false,
.num_primary_formats = ARRAY_SIZE(tegra114_primary_formats),
.primary_formats = tegra114_primary_formats,
@@ -1957,7 +1957,7 @@ static const struct tegra_dc_soc_info 
tegra186_dc_soc_info = {
.supports_block_linear = true,
.pitch_align = 64,
.has_powergate = false,
-   .broken_reset = false,
+   .coupled_pm = false,
.has_nvdisplay = true,
.wgrps = tegra186_dc_wgrps,
.num_wgrps = ARRAY_SIZE(tegra186_dc_wgrps),
@@ -2025,6 +2025,11 @@ static int tegra_dc_parse_dt(struct tegra_dc *dc)
return 0;
 }
 
+static int tegra_dc_match(struct device *dev, void *data)
+{
+   return of_device_is_compatible(dev->of_node, "nvidia,tegra20-dc");
+}
+
 static int tegra_dc_probe(struct platform_device *pdev)
 {
struct resource *regs;
@@ -2045,6 +2050,28 @@ static int tegra_dc_probe(struct platform_device *pdev)
if (err < 0)
return err;
 
+   /*
+* On Tegra20 DC1 requires DC0 to be taken out of reset in order to
+* be enabled, otherwise CPU hangs on writing to CMD_DISPLAY_COMMAND /
+* POWER_CONTROL registers during CRTC enabling.
+*/
+   if (dc->pipe == 1 && dc->soc->coupled_pm) {
+   struct device_link *link;
+   struct device *dc0_dev;
+
+   dc0_dev = driver_find_device(pdev->dev.driver, NULL, NULL,
+tegra_dc_match);
+   if (!dc0_dev)
+   return -EPROBE_DEFER;
+
+   link = device_link_add(>dev, dc0_dev,
+  DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE);
+   if (!link) {
+   dev_err(>dev, "failed to link to DC0\n");
+   return -EINVAL;
+   }
+   }
+
dc->clk = devm_clk_get(>dev, NULL);
if (IS_ERR(dc->clk)) {
dev_err(>dev, "failed to get clock\n");
@@ -2058,21 +2085,19 @@ static int tegra_dc_probe(struct platform_device *pdev)
}
 
/* assert reset and disable clock */
-   if (!dc->soc->broken_reset) {
-   err = clk_prepare_enable(dc->clk);
-   if (err < 0)
-   return err;
+   err = clk_prepare_enable(dc->clk);
+   if (err < 0)
+   return err;
 
-   usleep_range(2000, 4000);
+   usleep_range

[PATCH v3 2/5] drm/tegra: Restore opaque and drop alpha formats on Tegra20/30

2017-12-20 Thread Dmitry Osipenko
Commit 7772fdaef939 ("drm/tegra: Support ARGB and ABGR formats") broke
DRM's MODE_ADDFB IOCTL on Tegra20/30, because IOCTL uses XRGB format if
requested FB depth is 24bpp. As a result, Xorg doesn't work anymore with
both modesetting and opentegra drivers. On older Tegra's each plane has
a blending configuration which should be used to enable / disable alpha
blending and right now the blending configs are hardcoded to disabled
alpha blending. In order to support alpha formats properly, planes
blending configuration must be adjusted, until then the alpha formats
are equal to non-alpha.

Fixes: 7772fdaef939 ("drm/tegra: Support ARGB and ABGR formats")
Signed-off-by: Dmitry Osipenko 
---
 drivers/gpu/drm/tegra/dc.c| 29 ++---
 drivers/gpu/drm/tegra/dc.h|  1 +
 drivers/gpu/drm/tegra/fb.c| 13 -
 drivers/gpu/drm/tegra/hub.c   |  3 ++-
 drivers/gpu/drm/tegra/plane.c | 22 +-
 drivers/gpu/drm/tegra/plane.h |  2 +-
 6 files changed, 39 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index 5299185cea2f..460510366bb8 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -299,12 +299,12 @@ static void tegra_dc_setup_window(struct tegra_dc *dc, 
unsigned int index,
 }
 
 static const u32 tegra20_primary_formats[] = {
-   DRM_FORMAT_ARGB,
-   DRM_FORMAT_ARGB1555,
DRM_FORMAT_RGB565,
-   DRM_FORMAT_RGBA5551,
-   DRM_FORMAT_ABGR,
-   DRM_FORMAT_ARGB,
+   /* non-native formats */
+   DRM_FORMAT_XRGB1555,
+   DRM_FORMAT_RGBX5551,
+   DRM_FORMAT_XRGB,
+   DRM_FORMAT_XBGR,
 };
 
 static const u32 tegra114_primary_formats[] = {
@@ -369,7 +369,8 @@ static int tegra_plane_atomic_check(struct drm_plane *plane,
 
err = tegra_plane_format(state->fb->format->format,
 _state->format,
-_state->swap);
+_state->swap,
+dc->soc->supports_opaque_formats);
if (err < 0)
return err;
 
@@ -692,12 +693,12 @@ static struct drm_plane 
*tegra_dc_cursor_plane_create(struct drm_device *drm,
 }
 
 static const u32 tegra20_overlay_formats[] = {
-   DRM_FORMAT_ARGB,
-   DRM_FORMAT_ARGB1555,
DRM_FORMAT_RGB565,
-   DRM_FORMAT_RGBA5551,
-   DRM_FORMAT_ABGR,
-   DRM_FORMAT_ARGB,
+   /* non-native formats */
+   DRM_FORMAT_XRGB1555,
+   DRM_FORMAT_RGBX5551,
+   DRM_FORMAT_XRGB,
+   DRM_FORMAT_XBGR,
/* planar formats */
DRM_FORMAT_UYVY,
DRM_FORMAT_YUYV,
@@ -1854,6 +1855,7 @@ static const struct tegra_dc_soc_info tegra20_dc_soc_info 
= {
.primary_formats = tegra20_primary_formats,
.num_overlay_formats = ARRAY_SIZE(tegra20_overlay_formats),
.overlay_formats = tegra20_overlay_formats,
+   .supports_opaque_formats = false,
 };
 
 static const struct tegra_dc_soc_info tegra30_dc_soc_info = {
@@ -1869,6 +1871,7 @@ static const struct tegra_dc_soc_info tegra30_dc_soc_info 
= {
.primary_formats = tegra20_primary_formats,
.num_overlay_formats = ARRAY_SIZE(tegra20_overlay_formats),
.overlay_formats = tegra20_overlay_formats,
+   .supports_opaque_formats = false,
 };
 
 static const struct tegra_dc_soc_info tegra114_dc_soc_info = {
@@ -1884,6 +1887,7 @@ static const struct tegra_dc_soc_info 
tegra114_dc_soc_info = {
.primary_formats = tegra114_primary_formats,
.num_overlay_formats = ARRAY_SIZE(tegra114_overlay_formats),
.overlay_formats = tegra114_overlay_formats,
+   .supports_opaque_formats = true,
 };
 
 static const struct tegra_dc_soc_info tegra124_dc_soc_info = {
@@ -1899,6 +1903,7 @@ static const struct tegra_dc_soc_info 
tegra124_dc_soc_info = {
.primary_formats = tegra114_primary_formats,
.num_overlay_formats = ARRAY_SIZE(tegra124_overlay_formats),
.overlay_formats = tegra114_overlay_formats,
+   .supports_opaque_formats = true,
 };
 
 static const struct tegra_dc_soc_info tegra210_dc_soc_info = {
@@ -1914,6 +1919,7 @@ static const struct tegra_dc_soc_info 
tegra210_dc_soc_info = {
.primary_formats = tegra114_primary_formats,
.num_overlay_formats = ARRAY_SIZE(tegra114_overlay_formats),
.overlay_formats = tegra114_overlay_formats,
+   .supports_opaque_formats = true,
 };
 
 static const struct tegra_windowgroup_soc tegra186_dc_wgrps[] = {
@@ -1961,6 +1967,7 @@ static const struct tegra_dc_soc_info 
tegra186_dc_soc_info = {
.has_nvdisplay = true,
.wgrps = tegra186_dc_wgrps,
.num_wgrps = ARRAY_SIZE(tegra186_dc_wgrps),
+   .supports_opaque_formats = true,
 };
 
 static const struct of_device_id tegra_dc_of_match[] = {
diff --git a/drivers/gpu/drm/tegra/dc.h b/drivers/gpu/drm/tegra/dc.h
index 8098f49c

Re: [PATCH v3 2/5] drm/tegra: Restore opaque and drop alpha formats on Tegra20/30

2017-12-20 Thread Dmitry Osipenko
On 20.12.2017 21:01, Thierry Reding wrote:
> On Wed, Dec 20, 2017 at 06:46:11PM +0300, Dmitry Osipenko wrote:
>> Commit 7772fdaef939 ("drm/tegra: Support ARGB and ABGR formats") broke
>> DRM's MODE_ADDFB IOCTL on Tegra20/30, because IOCTL uses XRGB format if
>> requested FB depth is 24bpp. As a result, Xorg doesn't work anymore with
>> both modesetting and opentegra drivers. On older Tegra's each plane has
>> a blending configuration which should be used to enable / disable alpha
>> blending and right now the blending configs are hardcoded to disabled
>> alpha blending. In order to support alpha formats properly, planes
>> blending configuration must be adjusted, until then the alpha formats
>> are equal to non-alpha.
>>
>> Fixes: 7772fdaef939 ("drm/tegra: Support ARGB and ABGR formats")
>> Signed-off-by: Dmitry Osipenko 
>> ---
>>  drivers/gpu/drm/tegra/dc.c| 29 ++---
>>  drivers/gpu/drm/tegra/dc.h|  1 +
>>  drivers/gpu/drm/tegra/fb.c| 13 -
>>  drivers/gpu/drm/tegra/hub.c   |  3 ++-
>>  drivers/gpu/drm/tegra/plane.c | 22 +-
>>  drivers/gpu/drm/tegra/plane.h |  2 +-
>>  6 files changed, 39 insertions(+), 31 deletions(-)
> 
> This kept bugging me, so I spent some time looking at the blending
> programming. I came up with the attached patch which seems to work
> for all scenarios and is fairly similar to your patch. It has the
> added benefit that we can keep support for more formats.
> 
> Any comments?
> 
> Thierry
> --- >8 ---
> From 3d2b7d1a9b8239dc6940477d8783461ac60783bc Mon Sep 17 00:00:00 2001
> From: Thierry Reding 
> Date: Wed, 20 Dec 2017 09:39:14 +0100
> Subject: [PATCH] drm/tegra: dc: Implement legacy blending
> 
> This implements alpha blending on legacy display controllers (Tegra20,
> Tegra30 and Tegra114). While it's theoretically possible to support the
> zpos property to enable userspace to specify the Z-order of each plane
> individually, this is not currently supported and the same fixed Z-
> order as previously defined is used.

Perhaps one variant of implementing zpos could be by making overlays 'virtual',
so each virtual overlay will be backed by the real HW plane and we could swap
the HW planes of the virtual overlays, emulating zpos.

> Reverts commit 71835caa00e8 ("drm/tegra: fb: Force alpha formats") since
> the opaque formats are now supported.
> 
> Reported-by: Dmitry Osipenko 
> Fixes: 7772fdaef939 ("drm/tegra: Support ARGB and ABGR formats")
> Signed-off-by: Thierry Reding 
> ---
>  drivers/gpu/drm/tegra/dc.c| 74 
> ++-
>  drivers/gpu/drm/tegra/dc.h| 13 
>  drivers/gpu/drm/tegra/fb.c| 12 ---
>  drivers/gpu/drm/tegra/plane.c | 41 
>  drivers/gpu/drm/tegra/plane.h |  3 ++
>  5 files changed, 116 insertions(+), 27 deletions(-)
> 
> diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
> index bc65c314e00f..07c687d7f615 100644
> --- a/drivers/gpu/drm/tegra/dc.c
> +++ b/drivers/gpu/drm/tegra/dc.c
> @@ -168,32 +168,46 @@ static inline u32 compute_initial_dda(unsigned int in)
>   return dfixed_frac(inf);
>  }
>  
> -static void tegra_plane_setup_blending_legacy(struct tegra_plane *plane)
> +static void
> +tegra_plane_setup_blending_legacy(struct tegra_plane *plane,
> +   const struct tegra_dc_window *window)
>  {
> + u32 foreground = BLEND_WEIGHT1(255) | BLEND_WEIGHT0(255) |
> +  BLEND_COLOR_KEY_NONE;
> + u32 background = BLEND_WEIGHT1(0) | BLEND_WEIGHT0(0) |
> +  BLEND_COLOR_KEY_NONE;
> + u32 blendnokey = BLEND_WEIGHT1(255) | BLEND_WEIGHT0(255);
> +
> + /* enable alpha blending if window->alpha */
> + if (window->alpha) {
> + background |= BLEND_CONTROL_DEPENDENT;
> + foreground |= BLEND_CONTROL_ALPHA;
> + }

I think dependent weight means that window doesn't have alpha transparency. So
we should set the dependent_weight mode for opaque formats and alpha_weight for
formats with alpha channel.

If the above is correct, then I'm suggesting to not expose alpha formats, we
should properly test all combinations of blending of all the windows. In one
case you could apply my patch for now and then me/you/we could work on a proper
legacy blending implementation based on your patch. In the other case I could
take your patch into v4 (cursor patch would have to be rabased in that case) and
we will correct blending sometime later. I don't mind either case, up to you to
decide.

Is there any ready-made testsuite for the DRM planes blending? Or you have made
a test by 

Re: [PATCH v3 3/5] drm/tegra: Trade overlay plane for cursor on older Tegra's

2017-12-20 Thread Dmitry Osipenko
On 20.12.2017 23:19, Thierry Reding wrote:
> On Wed, Dec 20, 2017 at 06:46:12PM +0300, Dmitry Osipenko wrote:
>> Older Tegra's do not support RGBA format for the cursor, but instead
>> overlay plane could be used for it. Since there is no much use for the
>> overlays on a regular desktop and HW-accelerated cursor is much nicer
>> than the jerky SW cursor, let's trade one overlay plane for the cursor.
>>
>> Signed-off-by: Dmitry Osipenko 
>> ---
>>  drivers/gpu/drm/tegra/dc.c | 75 
>> +++---
>>  drivers/gpu/drm/tegra/dc.h |  2 ++
>>  2 files changed, 59 insertions(+), 18 deletions(-)
> 
> Given the dependency on the alpha formats patch and due to lack of time
> because of the holidays messing up the schedule I'd like to defer this
> to v4.17, unless we can settle all of it until Friday.

I'll rebase and re-test this patch on top of your "Implement legacy blending"
patch ASAP. I'm fine with the deferring if it won't workout well.


Re: [PATCH v3 2/5] drm/tegra: Restore opaque and drop alpha formats on Tegra20/30

2017-12-20 Thread Dmitry Osipenko
On 20.12.2017 23:16, Thierry Reding wrote:
> On Wed, Dec 20, 2017 at 11:01:49PM +0300, Dmitry Osipenko wrote:
>> On 20.12.2017 21:01, Thierry Reding wrote:
>>> On Wed, Dec 20, 2017 at 06:46:11PM +0300, Dmitry Osipenko wrote:
>>>> Commit 7772fdaef939 ("drm/tegra: Support ARGB and ABGR formats") broke
>>>> DRM's MODE_ADDFB IOCTL on Tegra20/30, because IOCTL uses XRGB format if
>>>> requested FB depth is 24bpp. As a result, Xorg doesn't work anymore with
>>>> both modesetting and opentegra drivers. On older Tegra's each plane has
>>>> a blending configuration which should be used to enable / disable alpha
>>>> blending and right now the blending configs are hardcoded to disabled
>>>> alpha blending. In order to support alpha formats properly, planes
>>>> blending configuration must be adjusted, until then the alpha formats
>>>> are equal to non-alpha.
>>>>
>>>> Fixes: 7772fdaef939 ("drm/tegra: Support ARGB and ABGR formats")
>>>> Signed-off-by: Dmitry Osipenko 
>>>> ---
>>>>  drivers/gpu/drm/tegra/dc.c| 29 ++---
>>>>  drivers/gpu/drm/tegra/dc.h|  1 +
>>>>  drivers/gpu/drm/tegra/fb.c| 13 -
>>>>  drivers/gpu/drm/tegra/hub.c   |  3 ++-
>>>>  drivers/gpu/drm/tegra/plane.c | 22 +-
>>>>  drivers/gpu/drm/tegra/plane.h |  2 +-
>>>>  6 files changed, 39 insertions(+), 31 deletions(-)
>>>
>>> This kept bugging me, so I spent some time looking at the blending
>>> programming. I came up with the attached patch which seems to work
>>> for all scenarios and is fairly similar to your patch. It has the
>>> added benefit that we can keep support for more formats.
>>>
>>> Any comments?
>>>
>>> Thierry
>>> --- >8 ---
>>> From 3d2b7d1a9b8239dc6940477d8783461ac60783bc Mon Sep 17 00:00:00 2001
>>> From: Thierry Reding 
>>> Date: Wed, 20 Dec 2017 09:39:14 +0100
>>> Subject: [PATCH] drm/tegra: dc: Implement legacy blending
>>>
>>> This implements alpha blending on legacy display controllers (Tegra20,
>>> Tegra30 and Tegra114). While it's theoretically possible to support the
>>> zpos property to enable userspace to specify the Z-order of each plane
>>> individually, this is not currently supported and the same fixed Z-
>>> order as previously defined is used.
>>
>> Perhaps one variant of implementing zpos could be by making overlays 
>> 'virtual',
>> so each virtual overlay will be backed by the real HW plane and we could swap
>> the HW planes of the virtual overlays, emulating zpos.
>>
>>> Reverts commit 71835caa00e8 ("drm/tegra: fb: Force alpha formats") since
>>> the opaque formats are now supported.
>>>
>>> Reported-by: Dmitry Osipenko 
>>> Fixes: 7772fdaef939 ("drm/tegra: Support ARGB and ABGR formats")
>>> Signed-off-by: Thierry Reding 
>>> ---
>>>  drivers/gpu/drm/tegra/dc.c| 74 
>>> ++-
>>>  drivers/gpu/drm/tegra/dc.h| 13 
>>>  drivers/gpu/drm/tegra/fb.c| 12 ---
>>>  drivers/gpu/drm/tegra/plane.c | 41 
>>>  drivers/gpu/drm/tegra/plane.h |  3 ++
>>>  5 files changed, 116 insertions(+), 27 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
>>> index bc65c314e00f..07c687d7f615 100644
>>> --- a/drivers/gpu/drm/tegra/dc.c
>>> +++ b/drivers/gpu/drm/tegra/dc.c
>>> @@ -168,32 +168,46 @@ static inline u32 compute_initial_dda(unsigned int in)
>>> return dfixed_frac(inf);
>>>  }
>>>  
>>> -static void tegra_plane_setup_blending_legacy(struct tegra_plane *plane)
>>> +static void
>>> +tegra_plane_setup_blending_legacy(struct tegra_plane *plane,
>>> + const struct tegra_dc_window *window)
>>>  {
>>> +   u32 foreground = BLEND_WEIGHT1(255) | BLEND_WEIGHT0(255) |
>>> +BLEND_COLOR_KEY_NONE;
>>> +   u32 background = BLEND_WEIGHT1(0) | BLEND_WEIGHT0(0) |
>>> +BLEND_COLOR_KEY_NONE;
>>> +   u32 blendnokey = BLEND_WEIGHT1(255) | BLEND_WEIGHT0(255);
>>> +
>>> +   /* enable alpha blending if window->alpha */
>>> +   if (window->alpha) {
>>> +   background |= BLEND_CONTROL_DEPENDENT;
>>> +   foreground |= BLEND_CONTROL

Re: [PATCH v3 2/5] drm/tegra: Restore opaque and drop alpha formats on Tegra20/30

2017-12-20 Thread Dmitry Osipenko
On 21.12.2017 01:02, Thierry Reding wrote:
> On Thu, Dec 21, 2017 at 12:05:40AM +0300, Dmitry Osipenko wrote:
>> On 20.12.2017 23:16, Thierry Reding wrote:
>>> On Wed, Dec 20, 2017 at 11:01:49PM +0300, Dmitry Osipenko wrote:
>>>> On 20.12.2017 21:01, Thierry Reding wrote:
>>>>> On Wed, Dec 20, 2017 at 06:46:11PM +0300, Dmitry Osipenko wrote:
>>>>>> Commit 7772fdaef939 ("drm/tegra: Support ARGB and ABGR formats") broke
>>>>>> DRM's MODE_ADDFB IOCTL on Tegra20/30, because IOCTL uses XRGB format if
>>>>>> requested FB depth is 24bpp. As a result, Xorg doesn't work anymore with
>>>>>> both modesetting and opentegra drivers. On older Tegra's each plane has
>>>>>> a blending configuration which should be used to enable / disable alpha
>>>>>> blending and right now the blending configs are hardcoded to disabled
>>>>>> alpha blending. In order to support alpha formats properly, planes
>>>>>> blending configuration must be adjusted, until then the alpha formats
>>>>>> are equal to non-alpha.
>>>>>>
>>>>>> Fixes: 7772fdaef939 ("drm/tegra: Support ARGB and ABGR formats")
>>>>>> Signed-off-by: Dmitry Osipenko 
>>>>>> ---
>>>>>>  drivers/gpu/drm/tegra/dc.c| 29 ++---
>>>>>>  drivers/gpu/drm/tegra/dc.h|  1 +
>>>>>>  drivers/gpu/drm/tegra/fb.c| 13 -
>>>>>>  drivers/gpu/drm/tegra/hub.c   |  3 ++-
>>>>>>  drivers/gpu/drm/tegra/plane.c | 22 +-
>>>>>>  drivers/gpu/drm/tegra/plane.h |  2 +-
>>>>>>  6 files changed, 39 insertions(+), 31 deletions(-)
>>>>>
>>>>> This kept bugging me, so I spent some time looking at the blending
>>>>> programming. I came up with the attached patch which seems to work
>>>>> for all scenarios and is fairly similar to your patch. It has the
>>>>> added benefit that we can keep support for more formats.
>>>>>
>>>>> Any comments?
>>>>>
>>>>> Thierry
>>>>> --- >8 ---
>>>>> From 3d2b7d1a9b8239dc6940477d8783461ac60783bc Mon Sep 17 00:00:00 2001
>>>>> From: Thierry Reding 
>>>>> Date: Wed, 20 Dec 2017 09:39:14 +0100
>>>>> Subject: [PATCH] drm/tegra: dc: Implement legacy blending
>>>>>
>>>>> This implements alpha blending on legacy display controllers (Tegra20,
>>>>> Tegra30 and Tegra114). While it's theoretically possible to support the
>>>>> zpos property to enable userspace to specify the Z-order of each plane
>>>>> individually, this is not currently supported and the same fixed Z-
>>>>> order as previously defined is used.
>>>>
>>>> Perhaps one variant of implementing zpos could be by making overlays 
>>>> 'virtual',
>>>> so each virtual overlay will be backed by the real HW plane and we could 
>>>> swap
>>>> the HW planes of the virtual overlays, emulating zpos.
>>>>
>>>>> Reverts commit 71835caa00e8 ("drm/tegra: fb: Force alpha formats") since
>>>>> the opaque formats are now supported.
>>>>>
>>>>> Reported-by: Dmitry Osipenko 
>>>>> Fixes: 7772fdaef939 ("drm/tegra: Support ARGB and ABGR formats")
>>>>> Signed-off-by: Thierry Reding 
>>>>> ---
>>>>>  drivers/gpu/drm/tegra/dc.c| 74 
>>>>> ++-
>>>>>  drivers/gpu/drm/tegra/dc.h| 13 
>>>>>  drivers/gpu/drm/tegra/fb.c| 12 ---
>>>>>  drivers/gpu/drm/tegra/plane.c | 41 
>>>>>  drivers/gpu/drm/tegra/plane.h |  3 ++
>>>>>  5 files changed, 116 insertions(+), 27 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
>>>>> index bc65c314e00f..07c687d7f615 100644
>>>>> --- a/drivers/gpu/drm/tegra/dc.c
>>>>> +++ b/drivers/gpu/drm/tegra/dc.c
>>>>> @@ -168,32 +168,46 @@ static inline u32 compute_initial_dda(unsigned int 
>>>>> in)
>>>>>   return dfixed_frac(inf);
>>>>>  }
>>>>>  
>>>>> -static void tegra_plane_setup_blending_legacy(struct tegra_plane *plane)
>>>

Re: [PATCH v3 2/5] drm/tegra: Restore opaque and drop alpha formats on Tegra20/30

2017-12-20 Thread Dmitry Osipenko
On 21.12.2017 01:23, Dmitry Osipenko wrote:
> On 21.12.2017 01:02, Thierry Reding wrote:
>> On Thu, Dec 21, 2017 at 12:05:40AM +0300, Dmitry Osipenko wrote:
>>> On 20.12.2017 23:16, Thierry Reding wrote:
>>>> On Wed, Dec 20, 2017 at 11:01:49PM +0300, Dmitry Osipenko wrote:
>>>>> On 20.12.2017 21:01, Thierry Reding wrote:
>>>>>> On Wed, Dec 20, 2017 at 06:46:11PM +0300, Dmitry Osipenko wrote:
>>>>>>> Commit 7772fdaef939 ("drm/tegra: Support ARGB and ABGR formats") broke
>>>>>>> DRM's MODE_ADDFB IOCTL on Tegra20/30, because IOCTL uses XRGB format if
>>>>>>> requested FB depth is 24bpp. As a result, Xorg doesn't work anymore with
>>>>>>> both modesetting and opentegra drivers. On older Tegra's each plane has
>>>>>>> a blending configuration which should be used to enable / disable alpha
>>>>>>> blending and right now the blending configs are hardcoded to disabled
>>>>>>> alpha blending. In order to support alpha formats properly, planes
>>>>>>> blending configuration must be adjusted, until then the alpha formats
>>>>>>> are equal to non-alpha.
>>>>>>>
>>>>>>> Fixes: 7772fdaef939 ("drm/tegra: Support ARGB and ABGR formats")
>>>>>>> Signed-off-by: Dmitry Osipenko 
>>>>>>> ---
>>>>>>>  drivers/gpu/drm/tegra/dc.c| 29 ++---
>>>>>>>  drivers/gpu/drm/tegra/dc.h|  1 +
>>>>>>>  drivers/gpu/drm/tegra/fb.c| 13 -
>>>>>>>  drivers/gpu/drm/tegra/hub.c   |  3 ++-
>>>>>>>  drivers/gpu/drm/tegra/plane.c | 22 +-
>>>>>>>  drivers/gpu/drm/tegra/plane.h |  2 +-
>>>>>>>  6 files changed, 39 insertions(+), 31 deletions(-)
>>>>>>
>>>>>> This kept bugging me, so I spent some time looking at the blending
>>>>>> programming. I came up with the attached patch which seems to work
>>>>>> for all scenarios and is fairly similar to your patch. It has the
>>>>>> added benefit that we can keep support for more formats.
>>>>>>
>>>>>> Any comments?
>>>>>>
>>>>>> Thierry
>>>>>> --- >8 ---
>>>>>> From 3d2b7d1a9b8239dc6940477d8783461ac60783bc Mon Sep 17 00:00:00 2001
>>>>>> From: Thierry Reding 
>>>>>> Date: Wed, 20 Dec 2017 09:39:14 +0100
>>>>>> Subject: [PATCH] drm/tegra: dc: Implement legacy blending
>>>>>>
>>>>>> This implements alpha blending on legacy display controllers (Tegra20,
>>>>>> Tegra30 and Tegra114). While it's theoretically possible to support the
>>>>>> zpos property to enable userspace to specify the Z-order of each plane
>>>>>> individually, this is not currently supported and the same fixed Z-
>>>>>> order as previously defined is used.
>>>>>
>>>>> Perhaps one variant of implementing zpos could be by making overlays 
>>>>> 'virtual',
>>>>> so each virtual overlay will be backed by the real HW plane and we could 
>>>>> swap
>>>>> the HW planes of the virtual overlays, emulating zpos.
>>>>>
>>>>>> Reverts commit 71835caa00e8 ("drm/tegra: fb: Force alpha formats") since
>>>>>> the opaque formats are now supported.
>>>>>>
>>>>>> Reported-by: Dmitry Osipenko 
>>>>>> Fixes: 7772fdaef939 ("drm/tegra: Support ARGB and ABGR formats")
>>>>>> Signed-off-by: Thierry Reding 
>>>>>> ---
>>>>>>  drivers/gpu/drm/tegra/dc.c| 74 
>>>>>> ++-
>>>>>>  drivers/gpu/drm/tegra/dc.h| 13 
>>>>>>  drivers/gpu/drm/tegra/fb.c| 12 ---
>>>>>>  drivers/gpu/drm/tegra/plane.c | 41 
>>>>>>  drivers/gpu/drm/tegra/plane.h |  3 ++
>>>>>>  5 files changed, 116 insertions(+), 27 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
>>>>>> index bc65c314e00f..07c687d7f615 100644
>>>>>> --- a/drivers/gpu/drm/tegra/dc.c
>>>>>> +++ b/drivers/gpu/drm/tegra/dc.c
&

Re: [PATCH] media: staging: tegra-vde: select DMA_SHARED_BUFFER

2018-01-05 Thread Dmitry Osipenko
On 05.01.2018 12:43, Arnd Bergmann wrote:
> Without CONFIG_DMA_SHARED_BUFFER we run into a link error for the
> dma_buf_* APIs:
> 
> ERROR: "dma_buf_map_attachment" 
> [drivers/staging/media/tegra-vde/tegra-vde.ko] undefined!
> ERROR: "dma_buf_attach" [drivers/staging/media/tegra-vde/tegra-vde.ko] 
> undefined!
> ERROR: "dma_buf_get" [drivers/staging/media/tegra-vde/tegra-vde.ko] undefined!
> ERROR: "dma_buf_put" [drivers/staging/media/tegra-vde/tegra-vde.ko] undefined!
> ERROR: "dma_buf_detach" [drivers/staging/media/tegra-vde/tegra-vde.ko] 
> undefined!
> ERROR: "dma_buf_unmap_attachment" 
> [drivers/staging/media/tegra-vde/tegra-vde.ko] undefined!
> 
> Signed-off-by: Arnd Bergmann 
> ---
>  drivers/staging/media/tegra-vde/Kconfig | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/drivers/staging/media/tegra-vde/Kconfig 
> b/drivers/staging/media/tegra-vde/Kconfig
> index ec3ebdaa..5c4914674468 100644
> --- a/drivers/staging/media/tegra-vde/Kconfig
> +++ b/drivers/staging/media/tegra-vde/Kconfig
> @@ -1,6 +1,7 @@
>  config TEGRA_VDE
>   tristate "NVIDIA Tegra Video Decoder Engine driver"
>   depends on ARCH_TEGRA || COMPILE_TEST
> + select DMA_SHARED_BUFFER
>   select SRAM
>   help
>   Say Y here to enable support for the NVIDIA Tegra video decoder
> 

Thanks!

Acked-by: Dmitry Osipenko 


Re: [PATCH v4 3/5] staging: Introduce NVIDIA Tegra video decoder driver

2017-12-05 Thread Dmitry Osipenko
Hi Hans,

On 04.12.2017 17:04, Hans Verkuil wrote:
> Hi Dmitry,
> 
> As you already mention in the TODO, this should become a v4l2 codec driver.
> 
> Good existing examples are the coda, qcom/venus and mtk-vcodec drivers.
> 
> One thing that is not clear from this code is if the tegra hardware is a
> stateful or stateless codec, i.e. does it keep track of the decoder state
> in the hardware, or does the application have to keep track of the state and
> provide the state information together with the video data?
> 
> I ask because at the moment only stateful codecs are supported. Work is 
> ongoing
> to support stateless codecs, but we don't support that for now.
> 

It is stateless. Is there anything ready to try out? If yes, could you please
give a reference to that work?

> Anyway, I'm OK with merging this in staging. Although I think it should go
> to staging/media since we want to keep track of it.
> 

Awesome, I'll move driver to staging/media in V5. Thanks!


Re: [PATCH 04/10] gpu: host1x: Lock classes during job submission

2017-12-05 Thread Dmitry Osipenko
On 05.12.2017 16:21, Mikko Perttunen wrote:
> On 07.11.2017 23:23, Dmitry Osipenko wrote:
>> On 07.11.2017 15:28, Mikko Perttunen wrote:
>>> On 05.11.2017 18:46, Dmitry Osipenko wrote:
>>>> On 05.11.2017 14:01, Mikko Perttunen wrote:
>>>>> ...
>>>>>
>>>>> +static int mlock_id_for_class(unsigned int class)
>>>>> +{
>>>>> +#if HOST1X_HW >= 6
>>>>> +    switch (class)
>>>>> +    {
>>>>> +    case HOST1X_CLASS_HOST1X:
>>>>> +    return 0;
>>>>> +    case HOST1X_CLASS_VIC:
>>>>> +    return 17;
>>>>
>>>> What is the meaning of returned ID values that you have defined here? Why 
>>>> VIC
>>>> should have different ID on T186?
>>>
>>> On T186, MLOCKs are not "generic" - the HW knows that each MLOCK 
>>> corresponds to
>>> a specific class. Therefore we must map that correctly.
>>>
>>
>> Okay.
>>
>>>>
>>>>> +    default:
>>>>> +    return -EINVAL;
>>>>> +    }
>>>>> +#else
>>>>> +    switch (class)
>>>>> +    {
>>>>> +    case HOST1X_CLASS_HOST1X:
>>>>> +    return 0;
>>>>> +    case HOST1X_CLASS_GR2D:
>>>>> +    return 1;
>>>>> +    case HOST1X_CLASS_GR2D_SB:
>>>>> +    return 2;
>>>>
>>>> Note that we are allowing to switch 2d classes in the same jobs context and
>>>> currently jobs class is somewhat hardcoded to GR2D.
>>>>
>>>> Even though that GR2D and GR2D_SB use different register banks, is it okay 
>>>> to
>>>> trigger execution of different classes simultaneously? Would syncpoint
>>>> differentiate classes on OP_DONE event?
>>>
>>> Good point, we might need to use the same lock for these two.
>>>
>>>>
>>>> I suppose that MLOCK (the module lock) implies the whole module locking,
>>>> wouldn't it make sense to just use the module ID's defined in the TRM?
>>>
>>> Can you point out where these are defined?
>>
>> See INDMODID / REGF_MODULEID fields of HOST1X_CHANNEL_INDOFF2_0 /
>> HOST1X_SYNC_REGF_ADDR_0 registers, bit numbers of HOST1X_SYNC_INTSTATUS_0 /
>> HOST1X_SYNC_INTC0MASK_0 / HOST1X_SYNC_MOD_TEARDOWN_0.
> 
> These values look like they would work on T20, but at least on T124 the module
> numbering for modules we want to lock goes above the number of MLOCKs so the
> indexing scheme would not work there..
> 

Indeed, for some reason I was thinking that there are 32 MLOCK's instead of 16.


Re: [PATCH v1 2/2] drm/tegra: Support disabled CONFIG_PM

2017-12-14 Thread Dmitry Osipenko
On 15.12.2017 00:41, Lucas Stach wrote:
> Am Montag, den 11.12.2017, 18:26 +0300 schrieb Dmitry Osipenko:
>> On 11.12.2017 17:27, Thierry Reding wrote:
>>> On Mon, Dec 11, 2017 at 04:53:56PM +0300, Dmitry Osipenko wrote:
>>>> On 11.12.2017 13:13, Thierry Reding wrote:
>>>>> On Mon, Dec 11, 2017 at 02:19:44AM +0300, Dmitry Osipenko
>>>>> wrote:
>>>>>> Add manual HW power management to drivers probe/remove in
>>>>>> order to
>>>>>> not fail in a case of runtime power management being disabled
>>>>>> in kernel
>>>>>> config.
>>>>>>
>>>>>> Signed-off-by: Dmitry Osipenko 
>>>>>> ---
>>>>>>  drivers/gpu/drm/tegra/dc.c   | 164
>>>>>> +++
>>>>>>  drivers/gpu/drm/tegra/dsi.c  | 138 +--
>>>>>> -
>>>>>>  drivers/gpu/drm/tegra/hdmi.c |  90 
>>>>>>  drivers/gpu/drm/tegra/sor.c  | 103 +--
>>>>>> 
>>>>>>  4 files changed, 310 insertions(+), 185 deletions(-)
>>>>>
>>>>> I think that's the wrong way around. We unconditionally select
>>>>> PM on
>>>>> 64-bit ARM already, and I think we should do the same on 32-bit 
>>>>> ARM.
>>>>> There's really no excuse not to enable runtime PM these days.
>>>>
>>>> What is the rational behind enabling PM unconditionally? It is
>>>> actually a very
>>>> useful debug feature when there is something wrong with the PM.
>>>> It looks like
>>>> Tegra DRM driver is the only driver on Tegra that doesn't work
>>>> properly with PM
>>>> being disabled. Please, let's just fix it.
>>>
>>> What's useful about disabling PM? The problem with allowing !PM is
>>> that
>>> it adds one more combination that needs to be build- and runtime
>>> tested.
>>
>> As I already stated, disabling PM is very useful for debugging when
>> system hangs
>> unexpectedly. I found it very helpful several times.
> 
> This assumes that the bootloader/firmware left the power domains
> powered up. Without PM_GENERIC_DOMAINS, which depends on CONFIG_PM the
> kernel has no means to control the state of the power domains. Probe
> deferral based on the power domain will also not work, so driver may
> probe and try to access power-gated devices, leading to system hangs in
> the common case.

Pre-186 Tegra's do not use generic PM domains, but a custom API. Meanwhile T186
always has CONFIG_PM enabled.


Re: [PATCH v1 2/2] memory: tegra: Introduce memory client hot reset API

2017-12-14 Thread Dmitry Osipenko
On 13.12.2017 06:12, Dmitry Osipenko wrote:
> In order to reset busy HW properly, memory controller needs to be
> involved, otherwise it possible to get corrupted memory if HW was reset
> during DMA. Introduce memory client 'hot reset' API that will be used
> for resetting busy HW. The primary users are memory clients related to
> video (decoder/encoder/camera) and graphics (2d/3d).

I forgot to export symbol and realized that assert/deassert functions would be
necessary. For now I'll wait for the comments to v1.


Re: [PATCH v1 2/2] drm/tegra: Support disabled CONFIG_PM

2017-12-14 Thread Dmitry Osipenko
On 15.12.2017 00:41, Lucas Stach wrote:
> Am Montag, den 11.12.2017, 18:26 +0300 schrieb Dmitry Osipenko:
>> On 11.12.2017 17:27, Thierry Reding wrote:
>>> On Mon, Dec 11, 2017 at 04:53:56PM +0300, Dmitry Osipenko wrote:
>>>> On 11.12.2017 13:13, Thierry Reding wrote:
>>>>> On Mon, Dec 11, 2017 at 02:19:44AM +0300, Dmitry Osipenko
>>>>> wrote:
>>>>>> Add manual HW power management to drivers probe/remove in
>>>>>> order to
>>>>>> not fail in a case of runtime power management being disabled
>>>>>> in kernel
>>>>>> config.
>>>>>>
>>>>>> Signed-off-by: Dmitry Osipenko 
>>>>>> ---
>>>>>>  drivers/gpu/drm/tegra/dc.c   | 164
>>>>>> +++
>>>>>>  drivers/gpu/drm/tegra/dsi.c  | 138 +--
>>>>>> -
>>>>>>  drivers/gpu/drm/tegra/hdmi.c |  90 
>>>>>>  drivers/gpu/drm/tegra/sor.c  | 103 +--
>>>>>> 
>>>>>>  4 files changed, 310 insertions(+), 185 deletions(-)
>>>>>
>>>>> I think that's the wrong way around. We unconditionally select
>>>>> PM on
>>>>> 64-bit ARM already, and I think we should do the same on 32-bit 
>>>>> ARM.
>>>>> There's really no excuse not to enable runtime PM these days.
>>>>
>>>> What is the rational behind enabling PM unconditionally? It is
>>>> actually a very
>>>> useful debug feature when there is something wrong with the PM.
>>>> It looks like
>>>> Tegra DRM driver is the only driver on Tegra that doesn't work
>>>> properly with PM
>>>> being disabled. Please, let's just fix it.
>>>
>>> What's useful about disabling PM? The problem with allowing !PM is
>>> that
>>> it adds one more combination that needs to be build- and runtime
>>> tested.
>>
>> As I already stated, disabling PM is very useful for debugging when
>> system hangs
>> unexpectedly. I found it very helpful several times.
> 
> This assumes that the bootloader/firmware left the power domains
> powered up. Without PM_GENERIC_DOMAINS, which depends on CONFIG_PM the
> kernel has no means to control the state of the power domains. Probe
> deferral based on the power domain will also not work, so driver may
> probe and try to access power-gated devices, leading to system hangs in
> the common case.

BTW, this probably explains why ARM64 has CONFIG_PM enabled, thanks.


Re: [PATCH v1 2/2] usb: tegra: Move UTMI-pads reset from ehci-tegra to tegra-phy

2017-12-14 Thread Dmitry Osipenko
On 11.12.2017 16:31, Dmitry Osipenko wrote:
> On 11.12.2017 13:25, Thierry Reding wrote:
>> On Mon, Dec 11, 2017 at 02:07:38AM +0300, Dmitry Osipenko wrote:
>>> UTMI pads are shared by USB controllers and reset of UTMI pads is shared
>>> with the reset of USB1 controller. Currently reset of UTMI pads is done by
>>> the EHCI driver and ChipIdea UDC works because EHCI driver always happen
>>> to be probed first. Move reset controls from ehci-tegra to tegra-phy in
>>> order to resolve the problem.
>>>
>>> Signed-off-by: Dmitry Osipenko 
>>> ---
>>>  drivers/usb/host/ehci-tegra.c | 87 
>>> ++-
>>>  drivers/usb/phy/phy-tegra-usb.c   | 46 +
>>>  include/linux/usb/tegra_usb_phy.h |  2 +
>>>  3 files changed, 87 insertions(+), 48 deletions(-)
>>
>> I don't think we can do this. For one I don't think shared resets are
>> going to work here because you really won't ever be able to reset after
>> two devices have requested the same reset.
> 
> Ah, indeed. Originally I had the reset being done in the probe, but then 
> changed
> it in the last minute without proper testing. Good catch! I'll revert back 
> patch
> to the origin.
> 
>  Second, utmip_pad_close()
>> could be called at any point and it will have the side-effect of either
>> not doing a reset at all (because it is shared) or resetting the USBD
>> controller at the same time.
> 
> utmip_pad_close() is only called on tegra-phy driver removal, so it is
> absolutely fine.
> 
>> We've been over this code a great deal over the years. I'd love it to be
>> simpler, but every time we tried to simplify it, things broke.
> 
> Well, the current code is already broken quite severely because now we have 
> two
> users of the tegra-phy: ehci-tegra and chipidea-tegra. Things brake if host
> driver is loaded after the UDC because host would reset the UDC. And also pads
> won't be reset if ehci-tegra isn't loaded at all.
> 
> Shared reset seems to be a perfect solution for us and of course it requires
> extra carefulness.

BTW, I think that the current code never did anything useful, because it resets
utmi-pads without enabling pads clk. So if non-USB1 controller gets probed
first, then it will enable non-USB1 clk and reset the pads, while it probably
should reset pads with USB1 clk being enabled.

Although, I'm not familiar with the actual HW and it could be that pads clk
isn't needed for the proper reset.


Re: [PATCH] drm/tegra: mark t186 display hub PM functions __maybe_unused

2017-12-15 Thread Dmitry Osipenko
On 15.12.2017 15:51, Arnd Bergmann wrote:
> The newly introduced driver has optional suspend/resume functions,
> causing a warning when CONFIG_PM is disabled:
> 
> drivers/gpu/drm/tegra/hub.c:749:12: error: 'tegra_display_hub_resume' defined 
> but not used [-Werror=unused-function]
> drivers/gpu/drm/tegra/hub.c:733:12: error: 'tegra_display_hub_suspend' 
> defined but not used [-Werror=unused-function]
> 
> This marks them __maybe_unused to shut up the warnings.
> 
> Fixes: c4755fb9064f ("drm/tegra: Add Tegra186 display hub support")
> Signed-off-by: Arnd Bergmann 
> ---
>  drivers/gpu/drm/tegra/hub.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/tegra/hub.c b/drivers/gpu/drm/tegra/hub.c
> index cccd44711d68..f4911feda9ff 100644
> --- a/drivers/gpu/drm/tegra/hub.c
> +++ b/drivers/gpu/drm/tegra/hub.c
> @@ -730,7 +730,7 @@ static int tegra_display_hub_remove(struct 
> platform_device *pdev)
>   return err;
>  }
>  
> -static int tegra_display_hub_suspend(struct device *dev)
> +static int __maybe_unused tegra_display_hub_suspend(struct device *dev)
>  {
>   struct tegra_display_hub *hub = dev_get_drvdata(dev);
>   int err;
> @@ -746,7 +746,7 @@ static int tegra_display_hub_suspend(struct device *dev)
>   return 0;
>  }
>  
> -static int tegra_display_hub_resume(struct device *dev)
> +static int __maybe_unused tegra_display_hub_resume(struct device *dev)
>  {
>   struct tegra_display_hub *hub = dev_get_drvdata(dev);
>   int err;
> 

Probably would be better to put '#ifdef CONFIG_PM' around these functions for
the consistency with the other drm/tegra files.


Re: [PATCH] drm/tegra: mark t186 display hub PM functions __maybe_unused

2017-12-15 Thread Dmitry Osipenko
On 15.12.2017 16:33, Thierry Reding wrote:
> On Fri, Dec 15, 2017 at 01:51:52PM +0100, Arnd Bergmann wrote:
>> The newly introduced driver has optional suspend/resume functions,
>> causing a warning when CONFIG_PM is disabled:
>>
>> drivers/gpu/drm/tegra/hub.c:749:12: error: 'tegra_display_hub_resume' 
>> defined but not used [-Werror=unused-function]
>> drivers/gpu/drm/tegra/hub.c:733:12: error: 'tegra_display_hub_suspend' 
>> defined but not used [-Werror=unused-function]
>>
>> This marks them __maybe_unused to shut up the warnings.
>>
>> Fixes: c4755fb9064f ("drm/tegra: Add Tegra186 display hub support")
>> Signed-off-by: Arnd Bergmann 
>> ---
>>  drivers/gpu/drm/tegra/hub.c | 4 ++--
>>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> We had touched on this topic in a different thread. The Tegra DRM driver
> currently relies on runtime PM to work properly. I don't see a reason to
> not make that official by adding a select PM to menuconfig ARCH_TEGRA on
> 32-bit ARM just like we already do on 64-bit ARM.

Since you've repeated the third time that you 'don't see a reason', I'll take it
as NAK for my patch that made drm/tegra to work with the disabled PM and drop
it. Next time please state it clearly if you oppose to something strongly. 
Thanks.


Re: [PATCH v1 2/2] drm/tegra: Support disabled CONFIG_PM

2017-12-15 Thread Dmitry Osipenko
On 15.12.2017 23:25, Lucas Stach wrote:
> Am Freitag, den 15.12.2017, 01:45 +0300 schrieb Dmitry Osipenko:
>> On 15.12.2017 00:41, Lucas Stach wrote:
>>> Am Montag, den 11.12.2017, 18:26 +0300 schrieb Dmitry Osipenko:
>>>> On 11.12.2017 17:27, Thierry Reding wrote:
>>>>> On Mon, Dec 11, 2017 at 04:53:56PM +0300, Dmitry Osipenko
>>>>> wrote:
>>>>>> On 11.12.2017 13:13, Thierry Reding wrote:
>>>>>>> On Mon, Dec 11, 2017 at 02:19:44AM +0300, Dmitry Osipenko
>>>>>>> wrote:
>>>>>>>> Add manual HW power management to drivers probe/remove in
>>>>>>>> order to
>>>>>>>> not fail in a case of runtime power management being
>>>>>>>> disabled
>>>>>>>> in kernel
>>>>>>>> config.
>>>>>>>>
>>>>>>>> Signed-off-by: Dmitry Osipenko 
>>>>>>>> ---
>>>>>>>>  drivers/gpu/drm/tegra/dc.c   | 164
>>>>>>>> +++
>>>>>>>>  drivers/gpu/drm/tegra/dsi.c  | 138
>>>>>>>> +--
>>>>>>>> -
>>>>>>>>  drivers/gpu/drm/tegra/hdmi.c |  90 -
>>>>>>>> ---
>>>>>>>>  drivers/gpu/drm/tegra/sor.c  | 103 +--
>>>>>>>> 
>>>>>>>> 
>>>>>>>>  4 files changed, 310 insertions(+), 185 deletions(-)
>>>>>>>
>>>>>>> I think that's the wrong way around. We unconditionally
>>>>>>> select
>>>>>>> PM on
>>>>>>> 64-bit ARM already, and I think we should do the same on
>>>>>>> 32-bit 
>>>>>>> ARM.
>>>>>>> There's really no excuse not to enable runtime PM these
>>>>>>> days.
>>>>>>
>>>>>> What is the rational behind enabling PM unconditionally? It
>>>>>> is
>>>>>> actually a very
>>>>>> useful debug feature when there is something wrong with the
>>>>>> PM.
>>>>>> It looks like
>>>>>> Tegra DRM driver is the only driver on Tegra that doesn't
>>>>>> work
>>>>>> properly with PM
>>>>>> being disabled. Please, let's just fix it.
>>>>>
>>>>> What's useful about disabling PM? The problem with allowing !PM
>>>>> is
>>>>> that
>>>>> it adds one more combination that needs to be build- and
>>>>> runtime
>>>>> tested.
>>>>
>>>> As I already stated, disabling PM is very useful for debugging
>>>> when
>>>> system hangs
>>>> unexpectedly. I found it very helpful several times.
>>>
>>> This assumes that the bootloader/firmware left the power domains
>>> powered up. Without PM_GENERIC_DOMAINS, which depends on CONFIG_PM
>>> the
>>> kernel has no means to control the state of the power domains.
>>> Probe
>>> deferral based on the power domain will also not work, so driver
>>> may
>>> probe and try to access power-gated devices, leading to system
>>> hangs in
>>> the common case.
>>
>> Pre-186 Tegra's do not use generic PM domains, but a custom API.
>> Meanwhile T186
>> always has CONFIG_PM enabled.
> 
> This is incorrect since a38045121bf4 (soc/tegra: pmc: Add generic PM
> domain support), i.e. kernel 4.7.

Power domains are defined only by T210/T186 in DT and both SoC's are ARM64, all
ARM32 SoC's use custom API. Moreover T210 doesn't define powerdomain for DC in
DT, so for DC it uses legacy API.

BTW, I'll drop this patch since Thierry isn't positive about having !CONFIG_PM.


Re: [PATCH v3 2/5] drm/tegra: Restore opaque and drop alpha formats on Tegra20/30

2017-12-21 Thread Dmitry Osipenko
On 21.12.2017 17:10, Thierry Reding wrote:
> On Thu, Dec 21, 2017 at 01:38:31AM +0300, Dmitry Osipenko wrote:
>> On 21.12.2017 01:23, Dmitry Osipenko wrote:
>>> On 21.12.2017 01:02, Thierry Reding wrote:
>>>> On Thu, Dec 21, 2017 at 12:05:40AM +0300, Dmitry Osipenko wrote:
>>>>> On 20.12.2017 23:16, Thierry Reding wrote:
>>>>>> On Wed, Dec 20, 2017 at 11:01:49PM +0300, Dmitry Osipenko wrote:
>>>>>>> On 20.12.2017 21:01, Thierry Reding wrote:
>>>>>>>> On Wed, Dec 20, 2017 at 06:46:11PM +0300, Dmitry Osipenko wrote:
>>>>>>>>> Commit 7772fdaef939 ("drm/tegra: Support ARGB and ABGR formats") broke
>>>>>>>>> DRM's MODE_ADDFB IOCTL on Tegra20/30, because IOCTL uses XRGB format 
>>>>>>>>> if
>>>>>>>>> requested FB depth is 24bpp. As a result, Xorg doesn't work anymore 
>>>>>>>>> with
>>>>>>>>> both modesetting and opentegra drivers. On older Tegra's each plane 
>>>>>>>>> has
>>>>>>>>> a blending configuration which should be used to enable / disable 
>>>>>>>>> alpha
>>>>>>>>> blending and right now the blending configs are hardcoded to disabled
>>>>>>>>> alpha blending. In order to support alpha formats properly, planes
>>>>>>>>> blending configuration must be adjusted, until then the alpha formats
>>>>>>>>> are equal to non-alpha.
>>>>>>>>>
>>>>>>>>> Fixes: 7772fdaef939 ("drm/tegra: Support ARGB and ABGR formats")
>>>>>>>>> Signed-off-by: Dmitry Osipenko 
>>>>>>>>> ---
>>>>>>>>>  drivers/gpu/drm/tegra/dc.c| 29 ++---
>>>>>>>>>  drivers/gpu/drm/tegra/dc.h|  1 +
>>>>>>>>>  drivers/gpu/drm/tegra/fb.c| 13 -
>>>>>>>>>  drivers/gpu/drm/tegra/hub.c   |  3 ++-
>>>>>>>>>  drivers/gpu/drm/tegra/plane.c | 22 +-
>>>>>>>>>  drivers/gpu/drm/tegra/plane.h |  2 +-
>>>>>>>>>  6 files changed, 39 insertions(+), 31 deletions(-)
>>>>>>>>
>>>>>>>> This kept bugging me, so I spent some time looking at the blending
>>>>>>>> programming. I came up with the attached patch which seems to work
>>>>>>>> for all scenarios and is fairly similar to your patch. It has the
>>>>>>>> added benefit that we can keep support for more formats.
>>>>>>>>
>>>>>>>> Any comments?
>>>>>>>>
>>>>>>>> Thierry
>>>>>>>> --- >8 ---
>>>>>>>> From 3d2b7d1a9b8239dc6940477d8783461ac60783bc Mon Sep 17 00:00:00 2001
>>>>>>>> From: Thierry Reding 
>>>>>>>> Date: Wed, 20 Dec 2017 09:39:14 +0100
>>>>>>>> Subject: [PATCH] drm/tegra: dc: Implement legacy blending
>>>>>>>>
>>>>>>>> This implements alpha blending on legacy display controllers (Tegra20,
>>>>>>>> Tegra30 and Tegra114). While it's theoretically possible to support the
>>>>>>>> zpos property to enable userspace to specify the Z-order of each plane
>>>>>>>> individually, this is not currently supported and the same fixed Z-
>>>>>>>> order as previously defined is used.
>>>>>>>
>>>>>>> Perhaps one variant of implementing zpos could be by making overlays 
>>>>>>> 'virtual',
>>>>>>> so each virtual overlay will be backed by the real HW plane and we 
>>>>>>> could swap
>>>>>>> the HW planes of the virtual overlays, emulating zpos.
>>>>>>>
>>>>>>>> Reverts commit 71835caa00e8 ("drm/tegra: fb: Force alpha formats") 
>>>>>>>> since
>>>>>>>> the opaque formats are now supported.
>>>>>>>>
>>>>>>>> Reported-by: Dmitry Osipenko 
>>>>>>>> Fixes: 7772fdaef939 ("drm/tegra: Support ARGB and ABGR formats")
>>>>>>>> Signed-off-by: Thierry Reding 
>>>>>&g

[PATCH v2] usb: phy: tegra: Increase PHY clock stabilization timeout

2017-12-17 Thread Dmitry Osipenko
This fixes "utmi_phy_clk_enable: timeout waiting for phy to stabilize"
error message.

Signed-off-by: Dmitry Osipenko 
---

Change log:
v2: Increased delay for the poll retry from 1us to 2000ms, thanks to
Thierry Reding for the suggestion.

 drivers/usb/phy/phy-tegra-usb.c | 14 +-
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/drivers/usb/phy/phy-tegra-usb.c b/drivers/usb/phy/phy-tegra-usb.c
index f668bfb708d3..0e8d23e51732 100644
--- a/drivers/usb/phy/phy-tegra-usb.c
+++ b/drivers/usb/phy/phy-tegra-usb.c
@@ -16,7 +16,7 @@
 #include 
 #include 
 #include 
-#include 
+#include 
 #include 
 #include 
 #include 
@@ -305,14 +305,10 @@ static int utmip_pad_power_off(struct tegra_usb_phy *phy)
 
 static int utmi_wait_register(void __iomem *reg, u32 mask, u32 result)
 {
-   unsigned long timeout = 2000;
-   do {
-   if ((readl(reg) & mask) == result)
-   return 0;
-   udelay(1);
-   timeout--;
-   } while (timeout);
-   return -1;
+   u32 tmp;
+
+   return readl_poll_timeout(reg, tmp, (tmp & mask) == result,
+ 2000, 6000);
 }
 
 static void utmi_phy_clk_disable(struct tegra_usb_phy *phy)
-- 
2.15.1



[PATCH v2 1/4] usb: phy: tegra: Cleanup error messages

2017-12-17 Thread Dmitry Osipenko
Tegra's PHY driver has a mix of pr_err() and dev_err(), let's switch to
dev_err() and use common errors message formatting across the driver for
consistency.

Signed-off-by: Dmitry Osipenko 
---

Change log:
v2: Removed function names as per Thierry's suggestion.

 drivers/usb/phy/phy-tegra-usb.c | 69 -
 1 file changed, 41 insertions(+), 28 deletions(-)

diff --git a/drivers/usb/phy/phy-tegra-usb.c b/drivers/usb/phy/phy-tegra-usb.c
index 0e8d23e51732..e46219e7fa93 100644
--- a/drivers/usb/phy/phy-tegra-usb.c
+++ b/drivers/usb/phy/phy-tegra-usb.c
@@ -236,10 +236,14 @@ static void set_phcd(struct tegra_usb_phy *phy, bool 
enable)
 
 static int utmip_pad_open(struct tegra_usb_phy *phy)
 {
+   int err;
+
phy->pad_clk = devm_clk_get(phy->u_phy.dev, "utmi-pads");
if (IS_ERR(phy->pad_clk)) {
-   pr_err("%s: can't get utmip pad clock\n", __func__);
-   return PTR_ERR(phy->pad_clk);
+   err = PTR_ERR(phy->pad_clk);
+   dev_err(phy->u_phy.dev,
+   "Failed to get UTMIP pad clock: %d\n", err);
+   return err;
}
 
return 0;
@@ -282,7 +286,7 @@ static int utmip_pad_power_off(struct tegra_usb_phy *phy)
void __iomem *base = phy->pad_regs;
 
if (!utmip_pad_count) {
-   pr_err("%s: utmip pad already powered off\n", __func__);
+   dev_err(phy->u_phy.dev, "UTMIP pad already powered off\n");
return -EINVAL;
}
 
@@ -338,7 +342,8 @@ static void utmi_phy_clk_disable(struct tegra_usb_phy *phy)
set_phcd(phy, true);
 
if (utmi_wait_register(base + USB_SUSP_CTRL, USB_PHY_CLK_VALID, 0) < 0)
-   pr_err("%s: timeout waiting for phy to stabilize\n", __func__);
+   dev_err(phy->u_phy.dev,
+   "Timeout waiting for PHY to stabilize on disable\n");
 }
 
 static void utmi_phy_clk_enable(struct tegra_usb_phy *phy)
@@ -370,7 +375,8 @@ static void utmi_phy_clk_enable(struct tegra_usb_phy *phy)
 
if (utmi_wait_register(base + USB_SUSP_CTRL, USB_PHY_CLK_VALID,
 USB_PHY_CLK_VALID))
-   pr_err("%s: timeout waiting for phy to stabilize\n", __func__);
+   dev_err(phy->u_phy.dev,
+   "Timeout waiting for PHY to stabilize on enable\n");
 }
 
 static int utmi_phy_power_on(struct tegra_usb_phy *phy)
@@ -617,15 +623,15 @@ static int ulpi_phy_power_on(struct tegra_usb_phy *phy)
 
ret = gpio_direction_output(phy->reset_gpio, 0);
if (ret < 0) {
-   dev_err(phy->u_phy.dev, "gpio %d not set to 0\n",
-   phy->reset_gpio);
+   dev_err(phy->u_phy.dev, "GPIO %d not set to 0: %d\n",
+   phy->reset_gpio, ret);
return ret;
}
msleep(5);
ret = gpio_direction_output(phy->reset_gpio, 1);
if (ret < 0) {
-   dev_err(phy->u_phy.dev, "gpio %d not set to 1\n",
-   phy->reset_gpio);
+   dev_err(phy->u_phy.dev, "GPIO %d not set to 1: %d\n",
+   phy->reset_gpio, ret);
return ret;
}
 
@@ -661,13 +667,13 @@ static int ulpi_phy_power_on(struct tegra_usb_phy *phy)
/* Fix VbusInvalid due to floating VBUS */
ret = usb_phy_io_write(phy->ulpi, 0x40, 0x08);
if (ret) {
-   pr_err("%s: ulpi write failed\n", __func__);
+   dev_err(phy->u_phy.dev, "ULPI write failed: %d\n", ret);
return ret;
}
 
ret = usb_phy_io_write(phy->ulpi, 0x80, 0x0B);
if (ret) {
-   pr_err("%s: ulpi write failed\n", __func__);
+   dev_err(phy->u_phy.dev, "ULPI write failed: %d\n", ret);
return ret;
}
 
@@ -728,28 +734,30 @@ static int ulpi_open(struct tegra_usb_phy *phy)
 
phy->clk = devm_clk_get(phy->u_phy.dev, "ulpi-link");
if (IS_ERR(phy->clk)) {
-   pr_err("%s: can't get ulpi clock\n", __func__);
-   return PTR_ERR(phy->clk);
+   err = PTR_ERR(phy->clk);
+   dev_err(phy->u_phy.dev, "Failed to get ULPI clock: %d\n", err);
+   return err;
}
 
err = devm_gpio_request(phy->u_phy.dev, phy->reset_gpio,
"ulpi_phy_reset_b");
if (err < 0) {
-   dev_err(phy->u_phy.dev, "request failed for gpio: %d\n",
-  phy->reset_gpio);
+   dev_err(phy->u_phy.dev, "Request failed for G

[PATCH v2 3/4] usb: phy: Add Kconfig entry for Tegra PHY driver

2017-12-17 Thread Dmitry Osipenko
Currently tegra-phy driver is built only when ehci-tegra is. Add own
Kconfig entry for tegra-phy so that drivers other than ehci-tegra (like
ChipIdea) could work without ehci-tegra.

Signed-off-by: Dmitry Osipenko 
---

Change log:
v2: Added missed USB_ULPI dependency to USB_TEGRA_PHY.

 drivers/usb/phy/Kconfig  | 9 +
 drivers/usb/phy/Makefile | 2 +-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/phy/Kconfig b/drivers/usb/phy/Kconfig
index 0f8ab981d572..b9b0a44be679 100644
--- a/drivers/usb/phy/Kconfig
+++ b/drivers/usb/phy/Kconfig
@@ -159,6 +159,15 @@ config USB_MXS_PHY
 
  MXS Phy is used by some of the i.MX SoCs, for example imx23/28/6x.
 
+config USB_TEGRA_PHY
+   tristate "NVIDIA Tegra USB PHY Driver"
+   depends on ARCH_TEGRA
+   select USB_PHY
+   select USB_ULPI
+   help
+ This driver provides PHY support for the USB controllers found
+ on NVIDIA Tegra SoC's.
+
 config USB_ULPI
bool "Generic ULPI Transceiver Driver"
depends on ARM || ARM64
diff --git a/drivers/usb/phy/Makefile b/drivers/usb/phy/Makefile
index 25e579fb92b8..df1d99010079 100644
--- a/drivers/usb/phy/Makefile
+++ b/drivers/usb/phy/Makefile
@@ -16,7 +16,7 @@ obj-$(CONFIG_AM335X_CONTROL_USB)  += phy-am335x-control.o
 obj-$(CONFIG_AM335X_PHY_USB)   += phy-am335x.o
 obj-$(CONFIG_OMAP_OTG) += phy-omap-otg.o
 obj-$(CONFIG_TWL6030_USB)  += phy-twl6030-usb.o
-obj-$(CONFIG_USB_EHCI_TEGRA)   += phy-tegra-usb.o
+obj-$(CONFIG_USB_TEGRA_PHY)+= phy-tegra-usb.o
 obj-$(CONFIG_USB_GPIO_VBUS)+= phy-gpio-vbus-usb.o
 obj-$(CONFIG_USB_ISP1301)  += phy-isp1301.o
 obj-$(CONFIG_USB_MV_OTG)   += phy-mv-usb.o
-- 
2.15.1



[PATCH v2 4/4] usb: host: ehci-tegra: Remove USB_PHY dependencies from Kconfig

2017-12-17 Thread Dmitry Osipenko
Previously tegra-phy driver was built only when ehci-tegra was, now
tegra-phy has its own Kconfig entry. Remove the USB_PHY dependencies
from ehci-tegra's Kconfig since they aren't useful anymore.

Signed-off-by: Dmitry Osipenko 
---
 drivers/usb/host/Kconfig | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/usb/host/Kconfig b/drivers/usb/host/Kconfig
index 6150bed7cfa8..5042e72c1b76 100644
--- a/drivers/usb/host/Kconfig
+++ b/drivers/usb/host/Kconfig
@@ -234,9 +234,6 @@ config USB_EHCI_TEGRA
tristate "NVIDIA Tegra HCD support"
depends on ARCH_TEGRA
select USB_EHCI_ROOT_HUB_TT
-   select USB_PHY
-   select USB_ULPI
-   select USB_ULPI_VIEWPORT
help
  This driver enables support for the internal USB Host Controllers
  found in NVIDIA Tegra SoCs. The controllers are EHCI compliant.
-- 
2.15.1



[PATCH v2 2/4] usb: tegra: Move utmi-pads reset from ehci-tegra to tegra-phy

2017-12-17 Thread Dmitry Osipenko
UTMI pads are shared by USB controllers and reset of UTMI pads is shared
with the reset of USB1 controller. Currently reset of UTMI pads is done by
the EHCI driver and ChipIdea UDC works because EHCI driver always happen
to be probed first. Move reset controls from ehci-tegra to tegra-phy in
order to resolve the problem.

Signed-off-by: Dmitry Osipenko 
---

Change log:
v2: Corrected UTMI pads reset by moving reset assert/deassert to the
PHY's probe.

 drivers/usb/host/ehci-tegra.c | 87 ++-
 drivers/usb/phy/phy-tegra-usb.c   | 79 ---
 include/linux/usb/tegra_usb_phy.h |  2 +
 3 files changed, 115 insertions(+), 53 deletions(-)

diff --git a/drivers/usb/host/ehci-tegra.c b/drivers/usb/host/ehci-tegra.c
index c809f7d2f08f..63294892e198 100644
--- a/drivers/usb/host/ehci-tegra.c
+++ b/drivers/usb/host/ehci-tegra.c
@@ -36,7 +36,6 @@
 #define DRV_NAME "tegra-ehci"
 
 static struct hc_driver __read_mostly tegra_ehci_hc_driver;
-static bool usb1_reset_attempted;
 
 struct tegra_ehci_soc_config {
bool has_hostpc;
@@ -51,67 +50,54 @@ struct tegra_ehci_hcd {
enum tegra_usb_phy_port_speed port_speed;
 };
 
-/*
- * The 1st USB controller contains some UTMI pad registers that are global for
- * all the controllers on the chip. Those registers are also cleared when
- * reset is asserted to the 1st controller. This means that the 1st controller
- * can only be reset when no other controlled has finished probing. So we'll
- * reset the 1st controller before doing any other setup on any of the
- * controllers, and then never again.
- *
- * Since this is a PHY issue, the Tegra PHY driver should probably be doing
- * the resetting of the USB controllers. But to keep compatibility with old
- * device trees that don't have reset phandles in the PHYs, do it here.
- * Those old DTs will be vulnerable to total USB breakage if the 1st EHCI
- * device isn't the first one to finish probing, so warn them.
- */
 static int tegra_reset_usb_controller(struct platform_device *pdev)
 {
struct device_node *phy_np;
struct usb_hcd *hcd = platform_get_drvdata(pdev);
struct tegra_ehci_hcd *tegra =
(struct tegra_ehci_hcd *)hcd_to_ehci(hcd)->priv;
-   bool has_utmi_pad_registers = false;
+   struct reset_control *rst;
+   int err;
 
phy_np = of_parse_phandle(pdev->dev.of_node, "nvidia,phy", 0);
if (!phy_np)
return -ENOENT;
 
-   if (of_property_read_bool(phy_np, "nvidia,has-utmi-pad-registers"))
-   has_utmi_pad_registers = true;
+   /*
+* The 1st USB controller contains some UTMI pad registers that are
+* global for all the controllers on the chip. Those registers are
+* also cleared when reset is asserted to the 1st controller.
+*/
+   rst = of_reset_control_get_shared(phy_np, "utmi-pads");
+   if (IS_ERR(rst)) {
+   dev_warn(>dev,
+"can't get utmi-pads reset from the PHY\n");
+   dev_warn(>dev,
+"continuing, but please update your DT\n");
+   } else {
+   /*
+* PHY driver performs UTMI-pads reset in a case of
+* non-legacy DT.
+*/
+   reset_control_put(rst);
+   }
 
-   if (!usb1_reset_attempted) {
-   struct reset_control *usb1_reset;
+   of_node_put(phy_np);
 
-   if (!has_utmi_pad_registers)
-   usb1_reset = of_reset_control_get(phy_np, "utmi-pads");
-   else
-   usb1_reset = tegra->rst;
-
-   if (IS_ERR(usb1_reset)) {
-   dev_warn(>dev,
-"can't get utmi-pads reset from the PHY\n");
-   dev_warn(>dev,
-"continuing, but please update your DT\n");
-   } else {
-   reset_control_assert(usb1_reset);
-   udelay(1);
-   reset_control_deassert(usb1_reset);
-
-   if (!has_utmi_pad_registers)
-   reset_control_put(usb1_reset);
-   }
+   /* reset control is shared, hence initialize it first */
+   err = reset_control_deassert(tegra->rst);
+   if (err)
+   return err;
 
-   usb1_reset_attempted = true;
-   }
+   err = reset_control_assert(tegra->rst);
+   if (err)
+   return err;
 
-   if (!has_utmi_pad_registers) {
-   reset_control_assert(tegra->rst);
-   udelay(1);
-   reset_control_deassert(tegra->rst);
-   }
+   udelay(1);
 
-   of_node_put(phy_np);
+   err = reset_control_deassert(tegra->rs

[PATCH v1] drm/tegra: Correct timeout in tegra_syncpt_wait

2017-12-17 Thread Dmitry Osipenko
host1x_syncpt_wait() takes timeout value in jiffies, but DRM passes it in
milliseconds.

Signed-off-by: Dmitry Osipenko 
---
 drivers/gpu/drm/tegra/drm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index bb98336fa8d7..57396388341b 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -629,7 +629,8 @@ static int tegra_syncpt_wait(struct drm_device *drm, void 
*data,
if (!sp)
return -EINVAL;
 
-   return host1x_syncpt_wait(sp, args->thresh, args->timeout,
+   return host1x_syncpt_wait(sp, args->thresh,
+ msecs_to_jiffies(args->timeout),
  >value);
 }
 
-- 
2.15.1



[PATCH v2] drm/tegra: dc: Link DC1 to DC0 on Tegra20

2017-12-17 Thread Dmitry Osipenko
HW reset isn't actually broken on Tegra20, but there is a dependency on
first display controller to be taken out of reset for the second to be
enabled successfully.

Signed-off-by: Dmitry Osipenko 
---

Change log:
v2: Got rid of global variable and now use driver_find_device() instead.

 drivers/gpu/drm/tegra/dc.c | 80 +-
 drivers/gpu/drm/tegra/dc.h |  2 +-
 2 files changed, 51 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index e963e40d8a25..be84c36ad81f 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -1842,7 +1842,7 @@ static const struct tegra_dc_soc_info tegra20_dc_soc_info 
= {
.supports_block_linear = false,
.pitch_align = 8,
.has_powergate = false,
-   .broken_reset = true,
+   .coupled_pm = true,
.has_nvdisplay = false,
.num_primary_formats = ARRAY_SIZE(tegra20_primary_formats),
.primary_formats = tegra20_primary_formats,
@@ -1857,7 +1857,7 @@ static const struct tegra_dc_soc_info tegra30_dc_soc_info 
= {
.supports_block_linear = false,
.pitch_align = 8,
.has_powergate = false,
-   .broken_reset = false,
+   .coupled_pm = false,
.has_nvdisplay = false,
.num_primary_formats = ARRAY_SIZE(tegra20_primary_formats),
.primary_formats = tegra20_primary_formats,
@@ -1872,7 +1872,7 @@ static const struct tegra_dc_soc_info 
tegra114_dc_soc_info = {
.supports_block_linear = false,
.pitch_align = 64,
.has_powergate = true,
-   .broken_reset = false,
+   .coupled_pm = false,
.has_nvdisplay = false,
.num_primary_formats = ARRAY_SIZE(tegra114_primary_formats),
.primary_formats = tegra114_primary_formats,
@@ -1887,7 +1887,7 @@ static const struct tegra_dc_soc_info 
tegra124_dc_soc_info = {
.supports_block_linear = true,
.pitch_align = 64,
.has_powergate = true,
-   .broken_reset = false,
+   .coupled_pm = false,
.has_nvdisplay = false,
.num_primary_formats = ARRAY_SIZE(tegra124_primary_formats),
.primary_formats = tegra114_primary_formats,
@@ -1902,7 +1902,7 @@ static const struct tegra_dc_soc_info 
tegra210_dc_soc_info = {
.supports_block_linear = true,
.pitch_align = 64,
.has_powergate = true,
-   .broken_reset = false,
+   .coupled_pm = false,
.has_nvdisplay = false,
.num_primary_formats = ARRAY_SIZE(tegra114_primary_formats),
.primary_formats = tegra114_primary_formats,
@@ -1951,7 +1951,7 @@ static const struct tegra_dc_soc_info 
tegra186_dc_soc_info = {
.supports_block_linear = true,
.pitch_align = 64,
.has_powergate = false,
-   .broken_reset = false,
+   .coupled_pm = false,
.has_nvdisplay = true,
.wgrps = tegra186_dc_wgrps,
.num_wgrps = ARRAY_SIZE(tegra186_dc_wgrps),
@@ -2019,6 +2019,11 @@ static int tegra_dc_parse_dt(struct tegra_dc *dc)
return 0;
 }
 
+static int tegra_dc_match(struct device *dev, void *data)
+{
+   return of_device_is_compatible(dev->of_node, "nvidia,tegra20-dc");
+}
+
 static int tegra_dc_probe(struct platform_device *pdev)
 {
struct resource *regs;
@@ -2039,6 +2044,28 @@ static int tegra_dc_probe(struct platform_device *pdev)
if (err < 0)
return err;
 
+   /*
+* On Tegra20 DC1 requires DC0 to be taken out of reset in order to
+* be enabled, otherwise CPU hangs on writing to CMD_DISPLAY_COMMAND /
+* POWER_CONTROL registers during CRTC enabling.
+*/
+   if (dc->pipe == 1 && dc->soc->coupled_pm) {
+   struct device_link *link;
+   struct device *dc0_dev;
+
+   dc0_dev = driver_find_device(pdev->dev.driver, NULL, NULL,
+tegra_dc_match);
+   if (!dc0_dev)
+   return -EPROBE_DEFER;
+
+   link = device_link_add(>dev, dc0_dev,
+  DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE);
+   if (!link) {
+   dev_err(>dev, "failed to link to DC0\n");
+   return -EINVAL;
+   }
+   }
+
dc->clk = devm_clk_get(>dev, NULL);
if (IS_ERR(dc->clk)) {
dev_err(>dev, "failed to get clock\n");
@@ -2052,21 +2079,19 @@ static int tegra_dc_probe(struct platform_device *pdev)
}
 
/* assert reset and disable clock */
-   if (!dc->soc->broken_reset) {
-   err = clk_prepare_enable(dc->clk);
-   if (err < 0)
-   return err;
+   err = clk_prepare_enable(dc->clk);
+   if (err < 0)
+   return err;
 
-   usleep_range(2000, 4000);
+   usleep_range

[PATCH v1] drm/tegra: Restore opaque formats on Tegra20/30

2017-12-17 Thread Dmitry Osipenko
Commit 7772fdaef939 ("drm/tegra: Support ARGB and ABGR formats") broke
DRM's MODE_ADDFB IOCTL on Tegra20/30, because it uses XRGBA format if
requested FB depth is 24bpp. As a result, Xorg doesn't work anymore with
both modesetting and opentegra drivers. On all Tegra's each plane has a
blending configuration which should be used to enable / disable alpha
blending and right now the blending configs are hardcoded with alpha
blending being disabled. In order to support alpha formats properly,
planes blending configuration must be adjusted, until then the alpha
formats are equal to non-alpha.

Fixes: 7772fdaef939 ("drm/tegra: Support ARGB and ABGR formats")
Signed-off-by: Dmitry Osipenko 
---
 drivers/gpu/drm/tegra/dc.c| 19 ++-
 drivers/gpu/drm/tegra/dc.h|  1 +
 drivers/gpu/drm/tegra/fb.c| 13 -
 drivers/gpu/drm/tegra/hub.c   |  3 ++-
 drivers/gpu/drm/tegra/plane.c | 22 +-
 drivers/gpu/drm/tegra/plane.h |  2 +-
 6 files changed, 39 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index be84c36ad81f..7e58143f4145 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -305,6 +305,11 @@ static const u32 tegra20_primary_formats[] = {
DRM_FORMAT_RGBA5551,
DRM_FORMAT_ABGR,
DRM_FORMAT_ARGB,
+   /* non-native formats */
+   DRM_FORMAT_XRGB1555,
+   DRM_FORMAT_RGBX5551,
+   DRM_FORMAT_XRGB,
+   DRM_FORMAT_XBGR,
 };
 
 static const u32 tegra114_primary_formats[] = {
@@ -369,7 +374,8 @@ static int tegra_plane_atomic_check(struct drm_plane *plane,
 
err = tegra_plane_format(state->fb->format->format,
 _state->format,
-_state->swap);
+_state->swap,
+dc->soc->supports_opaque_formats);
if (err < 0)
return err;
 
@@ -698,6 +704,11 @@ static const u32 tegra20_overlay_formats[] = {
DRM_FORMAT_RGBA5551,
DRM_FORMAT_ABGR,
DRM_FORMAT_ARGB,
+   /* non-native formats */
+   DRM_FORMAT_XRGB1555,
+   DRM_FORMAT_RGBX5551,
+   DRM_FORMAT_XRGB,
+   DRM_FORMAT_XBGR,
/* planar formats */
DRM_FORMAT_UYVY,
DRM_FORMAT_YUYV,
@@ -1848,6 +1859,7 @@ static const struct tegra_dc_soc_info tegra20_dc_soc_info 
= {
.primary_formats = tegra20_primary_formats,
.num_overlay_formats = ARRAY_SIZE(tegra20_overlay_formats),
.overlay_formats = tegra20_overlay_formats,
+   .supports_opaque_formats = false,
 };
 
 static const struct tegra_dc_soc_info tegra30_dc_soc_info = {
@@ -1863,6 +1875,7 @@ static const struct tegra_dc_soc_info tegra30_dc_soc_info 
= {
.primary_formats = tegra20_primary_formats,
.num_overlay_formats = ARRAY_SIZE(tegra20_overlay_formats),
.overlay_formats = tegra20_overlay_formats,
+   .supports_opaque_formats = false,
 };
 
 static const struct tegra_dc_soc_info tegra114_dc_soc_info = {
@@ -1878,6 +1891,7 @@ static const struct tegra_dc_soc_info 
tegra114_dc_soc_info = {
.primary_formats = tegra114_primary_formats,
.num_overlay_formats = ARRAY_SIZE(tegra114_overlay_formats),
.overlay_formats = tegra114_overlay_formats,
+   .supports_opaque_formats = true,
 };
 
 static const struct tegra_dc_soc_info tegra124_dc_soc_info = {
@@ -1893,6 +1907,7 @@ static const struct tegra_dc_soc_info 
tegra124_dc_soc_info = {
.primary_formats = tegra114_primary_formats,
.num_overlay_formats = ARRAY_SIZE(tegra124_overlay_formats),
.overlay_formats = tegra114_overlay_formats,
+   .supports_opaque_formats = true,
 };
 
 static const struct tegra_dc_soc_info tegra210_dc_soc_info = {
@@ -1908,6 +1923,7 @@ static const struct tegra_dc_soc_info 
tegra210_dc_soc_info = {
.primary_formats = tegra114_primary_formats,
.num_overlay_formats = ARRAY_SIZE(tegra114_overlay_formats),
.overlay_formats = tegra114_overlay_formats,
+   .supports_opaque_formats = true,
 };
 
 static const struct tegra_windowgroup_soc tegra186_dc_wgrps[] = {
@@ -1955,6 +1971,7 @@ static const struct tegra_dc_soc_info 
tegra186_dc_soc_info = {
.has_nvdisplay = true,
.wgrps = tegra186_dc_wgrps,
.num_wgrps = ARRAY_SIZE(tegra186_dc_wgrps),
+   .supports_opaque_formats = true,
 };
 
 static const struct of_device_id tegra_dc_of_match[] = {
diff --git a/drivers/gpu/drm/tegra/dc.h b/drivers/gpu/drm/tegra/dc.h
index 8098f49c0d96..3a66a1127ee7 100644
--- a/drivers/gpu/drm/tegra/dc.h
+++ b/drivers/gpu/drm/tegra/dc.h
@@ -65,6 +65,7 @@ struct tegra_dc_soc_info {
unsigned int num_primary_formats;
const u32 *overlay_formats;
unsigned int num_overlay_formats;
+   bool supports_opaque_formats;
 };
 
 struct tegra_dc {
diff --git a/drivers/gpu/dr

[PATCH v1] drm/tegra: Trade overlay plane for cursor on older Tegra's

2017-12-17 Thread Dmitry Osipenko
Older Tegra's do not support RGBA format for the cursor, but instead
overlay plane could be used for it. Since there is no much use for the
overlays on a regular desktop and HW-accelerated cursor is much nicer
than the jerky SW cursor, let's trade one overlay plane for the cursor.

Signed-off-by: Dmitry Osipenko 
---
 drivers/gpu/drm/tegra/dc.c | 52 +-
 1 file changed, 37 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index 7e58143f4145..3282aa911351 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -125,9 +125,10 @@ static inline u32 compute_initial_dda(unsigned int in)
return dfixed_frac(inf);
 }
 
-static void tegra_dc_setup_window(struct tegra_dc *dc, unsigned int index,
+static void tegra_dc_setup_window(struct tegra_dc *dc, struct drm_plane *plane,
  const struct tegra_dc_window *window)
 {
+   struct tegra_plane *p = to_tegra_plane(plane);
unsigned h_offset, v_offset, h_size, v_size, h_dda, v_dda, bpp;
unsigned long value, flags;
bool yuv, planar;
@@ -144,7 +145,7 @@ static void tegra_dc_setup_window(struct tegra_dc *dc, 
unsigned int index,
 
spin_lock_irqsave(>lock, flags);
 
-   value = WINDOW_A_SELECT << index;
+   value = WINDOW_A_SELECT << p->index;
tegra_dc_writel(dc, value, DC_CMD_DISPLAY_WINDOW_HEADER);
 
tegra_dc_writel(dc, window->format, DC_WIN_COLOR_DEPTH);
@@ -275,23 +276,29 @@ static void tegra_dc_setup_window(struct tegra_dc *dc, 
unsigned int index,
tegra_dc_writel(dc, 0x00, DC_WIN_BLEND_NOKEY);
tegra_dc_writel(dc, 0x00, DC_WIN_BLEND_1WIN);
 
-   switch (index) {
+   switch (p->index) {
case 0:
tegra_dc_writel(dc, 0x00, DC_WIN_BLEND_2WIN_X);
-   tegra_dc_writel(dc, 0x00, DC_WIN_BLEND_2WIN_Y);
+   tegra_dc_writel(dc, 0x08, DC_WIN_BLEND_2WIN_Y);
tegra_dc_writel(dc, 0x00, DC_WIN_BLEND_3WIN_XY);
break;
 
case 1:
tegra_dc_writel(dc, 0x00, DC_WIN_BLEND_2WIN_X);
tegra_dc_writel(dc, 0x00, DC_WIN_BLEND_2WIN_Y);
-   tegra_dc_writel(dc, 0x00, DC_WIN_BLEND_3WIN_XY);
+   tegra_dc_writel(dc, 0x08, DC_WIN_BLEND_3WIN_XY);
break;
 
case 2:
-   tegra_dc_writel(dc, 0x00, DC_WIN_BLEND_2WIN_X);
-   tegra_dc_writel(dc, 0x00, DC_WIN_BLEND_2WIN_Y);
-   tegra_dc_writel(dc, 0x00, DC_WIN_BLEND_3WIN_XY);
+   if (plane->type == DRM_PLANE_TYPE_CURSOR) {
+   tegra_dc_writel(dc, 0x04, DC_WIN_BLEND_2WIN_X);
+   tegra_dc_writel(dc, 0x04, DC_WIN_BLEND_2WIN_Y);
+   tegra_dc_writel(dc, 0x04, DC_WIN_BLEND_3WIN_XY);
+   } else {
+   tegra_dc_writel(dc, 0x00, DC_WIN_BLEND_2WIN_X);
+   tegra_dc_writel(dc, 0x00, DC_WIN_BLEND_2WIN_Y);
+   tegra_dc_writel(dc, 0x00, DC_WIN_BLEND_3WIN_XY);
+   }
break;
}
 
@@ -438,7 +445,6 @@ static void tegra_plane_atomic_update(struct drm_plane 
*plane,
struct tegra_plane_state *state = to_tegra_plane_state(plane->state);
struct tegra_dc *dc = to_tegra_dc(plane->state->crtc);
struct drm_framebuffer *fb = plane->state->fb;
-   struct tegra_plane *p = to_tegra_plane(plane);
struct tegra_dc_window window;
unsigned int i;
 
@@ -480,7 +486,7 @@ static void tegra_plane_atomic_update(struct drm_plane 
*plane,
window.stride[i] = fb->pitches[i];
}
 
-   tegra_dc_setup_window(dc, p->index, );
+   tegra_dc_setup_window(dc, plane, );
 }
 
 static const struct drm_plane_helper_funcs tegra_plane_helper_funcs = {
@@ -775,9 +781,11 @@ static const u32 tegra124_overlay_formats[] = {
 
 static struct drm_plane *tegra_dc_overlay_plane_create(struct drm_device *drm,
   struct tegra_dc *dc,
-  unsigned int index)
+  unsigned int index,
+  bool cursor)
 {
struct tegra_plane *plane;
+   enum drm_plane_type type;
unsigned int num_formats;
const u32 *formats;
int err;
@@ -793,11 +801,14 @@ static struct drm_plane 
*tegra_dc_overlay_plane_create(struct drm_device *drm,
 
num_formats = dc->soc->num_overlay_formats;
formats = dc->soc->overlay_formats;
+   type = DRM_PLANE_TYPE_OVERLAY;
+
+   if (cursor)
+   type = DRM_PLANE_TYPE_CURSOR;
 
err = drm_universal_plane_init(drm, >base, 1 << d

[PATCH v1] drm/tegra: gem: Correct iommu_map_sg() error checking

2017-12-17 Thread Dmitry Osipenko
iommu_map_sg() doesn't return a error value, but a size of the requested
IOMMU mapping or zero in case of error.

Signed-off-by: Dmitry Osipenko 
---
 drivers/gpu/drm/tegra/gem.c | 13 ++---
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c
index ab1e53d434e8..710d3c289b2e 100644
--- a/drivers/gpu/drm/tegra/gem.c
+++ b/drivers/gpu/drm/tegra/gem.c
@@ -114,7 +114,7 @@ static const struct host1x_bo_ops tegra_bo_ops = {
 static int tegra_bo_iommu_map(struct tegra_drm *tegra, struct tegra_bo *bo)
 {
int prot = IOMMU_READ | IOMMU_WRITE;
-   ssize_t err;
+   int err;
 
if (bo->mm)
return -EBUSY;
@@ -135,15 +135,14 @@ static int tegra_bo_iommu_map(struct tegra_drm *tegra, 
struct tegra_bo *bo)
 
bo->paddr = bo->mm->start;
 
-   err = iommu_map_sg(tegra->domain, bo->paddr, bo->sgt->sgl,
-  bo->sgt->nents, prot);
-   if (err < 0) {
-   dev_err(tegra->drm->dev, "failed to map buffer: %zd\n", err);
+   bo->size = iommu_map_sg(tegra->domain, bo->paddr, bo->sgt->sgl,
+   bo->sgt->nents, prot);
+   if (!bo->size) {
+   dev_err(tegra->drm->dev, "failed to map buffer\n");
+   err = -ENOMEM;
goto remove;
}
 
-   bo->size = err;
-
mutex_unlock(>mm_lock);
 
return 0;
-- 
2.15.1



Re: [PATCH v5 0/4] NVIDIA Tegra video decoder driver

2017-12-17 Thread Dmitry Osipenko
On 12.12.2017 03:26, Dmitry Osipenko wrote:
> VDE driver provides accelerated video decoding to NVIDIA Tegra SoC's,
> it is a result of reverse-engineering efforts. Driver has been tested on
> Toshiba AC100 and Acer A500, it should work on any Tegra20 device.
> 
> In userspace this driver is utilized by libvdpau-tegra [0] that implements
> VDPAU interface, so any video player that supports VDPAU can provide
> accelerated video decoding on Tegra20 on Linux.
> 
> [0] https://github.com/grate-driver/libvdpau-tegra

Thierry, driver has been approved by media maintainers and should appear in 4.16
(it is already in -next). Please schedule the DT patches for 4.16, thanks.


Re: T20 Cpuidle Freeze

2017-11-03 Thread Dmitry Osipenko
On 03.11.2017 16:07, Marcel Ziswiler wrote:
> Hi Rafael, dear community
> 
> One of our customers reported seeing freezes when running the LTS Linux
> kernel 4.9.x on our Toradex Colibri T20 modules [1]. I was able to
> reproduce a complete SoC lock-up after a few minutes also running the
> latest 4.14-rc7 while LTS 4.4.x seemed to run stable.
> 
> Having attempted a multi-level bisection points towards the following
> first bad commit:
> 
> 9c4b2867ed7c8c8784dd417ffd16e705e81eb145
> 
> cpuidle: menu: Fix menu_select() for CPUIDLE_DRIVER_STATE_START == 0
> 
> Unfortunately as drivers/cpuidle/governors/menu.c has gotten further
> edits since it seems not trivial to just revert it.
> 
> However I found out that it indeed has to do with CPU idle as when I
> did disable the CONFIG_CPU_IDLE Linux kernel configuration option also
> LTS 4.9.59 as well as latest 4.14-rc7 run now stable overnight.
> 
> Does anybody have any clue what exactly may be happening and/or why
> cpuidle may not run stable on T20? Or is everybody always just
> disabling cpuidle on T20 anyway?
> 
> Thanks!
> 
> [1] https://www.toradex.com/community/questions/16838/actual-lts-kernel
> -49-on-colibri-t20.html

I haven't seen any problems with the cpuidle on next and 4.14-rc7 works fine.

# cat /sys/devices/system/cpu/cpu[0-1]/cpuidle/state[0-1]/usage
162283
32905
254669
32905

# cat /sys/devices/system/cpu/cpu[0-1]/cpuidle/state[0-1]/time
436981763
2110484666
458260707
2121781516

# uptime
 18:50:24 up 44 min,  1 user,  load average: 0.15, 0.08, 0.07

It could be that cpuidle unmasks some other issue on the Colibri.


Re: T20 Cpuidle Freeze

2017-11-05 Thread Dmitry Osipenko
On 04.11.2017 23:49, Marcel Ziswiler wrote:
> On Fri, 2017-11-03 at 21:52 +0300, Dmitry Osipenko wrote:
>> I haven't seen any problems with the cpuidle on next and 4.14-rc7
>> works fine.
>>
>> # cat /sys/devices/system/cpu/cpu[0-1]/cpuidle/state[0-1]/usage
>> 162283
>> 32905
>> 254669
>> 32905
>>
>> # cat /sys/devices/system/cpu/cpu[0-1]/cpuidle/state[0-1]/time
>> 436981763
>> 2110484666
>> 458260707
>> 2121781516
>>
>> # uptime
>>  18:50:24 up 44 min,  1 user,  load average: 0.15, 0.08, 0.07
> 
> OK, thanks. Good to know.
> 
>> It could be that cpuidle unmasks some other issue on the Colibri.
> 
> Yes, that's also my thinking.
> 
> What hardware did you run this on?
> 

I ran it on Acer A500 tablet, which is tegra20.

> Does your kernel configuration differ from the stock tegra_defconfig I used?
> 

Doesn't differ, used stock tegra_defconfig as well.

> What exact device tree source are you using (or just stock)?
> 

https://gist.github.com/digetx/2f624a0df4caff657ef28863b5354d5b

> Maybe the compiler version you are using could also have some influence?
> 

Compiler bugs aren't rareness, I'm using armv7a-hardfloat-linux-gnueabi-gcc
(Gentoo 6.4.0 p1.0) 6.4.0

> If you have any additional suggestions on what else could be relevant please 
> let me know.
> 

1) I suppose you could attach JTAG and see where hang happens.

2) Enable all kernel debug Kconfig options.

3) Disable all non-critical device drivers in Kconfig, so that you could boot.
See if it makes difference.

4) Probably just adding some debug printk's would be good enough to localize the
offending place in the code.


Re: [PATCH 04/10] gpu: host1x: Lock classes during job submission

2017-11-05 Thread Dmitry Osipenko
On 05.11.2017 14:01, Mikko Perttunen wrote:
> Host1x has a feature called MLOCKs which allow a certain class
> (~HW unit) to be locked (in the mutex sense) and unlocked during
> command execution, preventing other channels from accessing the
> class while it is locked. This is necessary to prevent concurrent
> jobs from messing up class state.
> 
> This has not been necessary so far since due to our channel allocation
> model, there has only been a single hardware channel submitting
> commands to each class. Future patches, however, change the channel
> allocation model to allow hardware-scheduled concurrency, and as such
> we need to start locking.
> 
> This patch implements locking on all platforms from Tegra20 to
> Tegra186.
> 
> Signed-off-by: Mikko Perttunen 
> ---
>  drivers/gpu/host1x/cdma.c  |   1 +
>  drivers/gpu/host1x/cdma.h  |   1 +
>  drivers/gpu/host1x/hw/cdma_hw.c| 122 
> +
>  drivers/gpu/host1x/hw/channel_hw.c |  71 ++
>  drivers/gpu/host1x/hw/host1x01_hardware.h  |  10 ++
>  drivers/gpu/host1x/hw/host1x02_hardware.h  |  10 ++
>  drivers/gpu/host1x/hw/host1x04_hardware.h  |  10 ++
>  drivers/gpu/host1x/hw/host1x05_hardware.h  |  10 ++
>  drivers/gpu/host1x/hw/host1x06_hardware.h  |  10 ++
>  drivers/gpu/host1x/hw/hw_host1x01_sync.h   |   6 ++
>  drivers/gpu/host1x/hw/hw_host1x02_sync.h   |   6 ++
>  drivers/gpu/host1x/hw/hw_host1x04_sync.h   |   6 ++
>  drivers/gpu/host1x/hw/hw_host1x05_sync.h   |   6 ++
>  drivers/gpu/host1x/hw/hw_host1x06_hypervisor.h |   5 +
>  14 files changed, 257 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c
> index 28541b280739..f787cfe69c11 100644
> --- a/drivers/gpu/host1x/cdma.c
> +++ b/drivers/gpu/host1x/cdma.c
> @@ -232,6 +232,7 @@ static void cdma_start_timer_locked(struct host1x_cdma 
> *cdma,
>   }
>  
>   cdma->timeout.client = job->client;
> + cdma->timeout.class = job->class;
>   cdma->timeout.syncpt = host1x_syncpt_get(host, job->syncpt_id);
>   cdma->timeout.syncpt_val = job->syncpt_end;
>   cdma->timeout.start_ktime = ktime_get();
> diff --git a/drivers/gpu/host1x/cdma.h b/drivers/gpu/host1x/cdma.h
> index 286d49386be9..e72660fc83c9 100644
> --- a/drivers/gpu/host1x/cdma.h
> +++ b/drivers/gpu/host1x/cdma.h
> @@ -59,6 +59,7 @@ struct buffer_timeout {
>   ktime_t start_ktime;/* starting time */
>   /* context timeout information */
>   int client;
> + u32 class;
>  };
>  
>  enum cdma_event {
> diff --git a/drivers/gpu/host1x/hw/cdma_hw.c b/drivers/gpu/host1x/hw/cdma_hw.c
> index ce320534cbed..4d5970d863d5 100644
> --- a/drivers/gpu/host1x/hw/cdma_hw.c
> +++ b/drivers/gpu/host1x/hw/cdma_hw.c
> @@ -16,6 +16,7 @@
>   * along with this program.  If not, see .
>   */
>  
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -243,6 +244,125 @@ static void cdma_resume(struct host1x_cdma *cdma, u32 
> getptr)
>   cdma_timeout_restart(cdma, getptr);
>  }
>  
> +static int mlock_id_for_class(unsigned int class)
> +{
> +#if HOST1X_HW >= 6
> + switch (class)
> + {
> + case HOST1X_CLASS_HOST1X:
> + return 0;
> + case HOST1X_CLASS_VIC:
> + return 17;

What is the meaning of returned ID values that you have defined here? Why VIC
should have different ID on T186?

> + default:
> + return -EINVAL;
> + }
> +#else
> + switch (class)
> + {
> + case HOST1X_CLASS_HOST1X:
> + return 0;
> + case HOST1X_CLASS_GR2D:
> + return 1;
> + case HOST1X_CLASS_GR2D_SB:
> + return 2;

Note that we are allowing to switch 2d classes in the same jobs context and
currently jobs class is somewhat hardcoded to GR2D.

Even though that GR2D and GR2D_SB use different register banks, is it okay to
trigger execution of different classes simultaneously? Would syncpoint
differentiate classes on OP_DONE event?

I suppose that MLOCK (the module lock) implies the whole module locking,
wouldn't it make sense to just use the module ID's defined in the TRM?

> + case HOST1X_CLASS_VIC:
> + return 3;
> + case HOST1X_CLASS_GR3D:
> + return 4;
> + default:
> + return -EINVAL;
> + }
> +#endif
> +}
> +
> +static void timeout_release_mlock(struct host1x_cdma *cdma)
> +{
> +#if HOST1X_HW >= 6
> + struct host1x_channel *ch = cdma_to_channel(cdma);
> + struct host1x *host = cdma_to_host1x(cdma);
> + u32 pb_pos, pb_temp[3], val;
> + int err, mlock_id;
> +
> + if (!host->hv_regs)
> + return;
> +
> + mlock_id = mlock_id_for_class(cdma->timeout.class);
> + if (WARN(mlock_id < 0, "Invalid class ID"))
> + return;
> +
> + val = host1x_hypervisor_readl(host, HOST1X_HV_MLOCK(mlock_id));
> + if 

Re: [PATCH 10/10] gpu: host1x: Optionally block when acquiring channel

2017-11-05 Thread Dmitry Osipenko
On 05.11.2017 14:01, Mikko Perttunen wrote:
> Add an option to host1x_channel_request to interruptibly wait for a
> free channel. This allows IOCTLs that acquire a channel to block
> the userspace.
> 

Wouldn't it be more optimal to request channel and block after job's pining,
when all patching and checks are completed? Note that right now we have locking
around submission in DRM, which I suppose should go away by making locking fine
grained.

Or maybe it would be more optimal to just iterate over channels, like I
suggested before [0]?

[0]
https://github.com/cyndis/linux/commit/9e6d87f40afb01fbe13ba65c73cb617bdfcd80b2#commitcomment-25012960

> Signed-off-by: Mikko Perttunen 
> ---
>  drivers/gpu/drm/tegra/drm.c  |  9 +
>  drivers/gpu/drm/tegra/gr2d.c |  6 +++---
>  drivers/gpu/drm/tegra/gr3d.c |  6 +++---
>  drivers/gpu/host1x/channel.c | 40 ++--
>  drivers/gpu/host1x/channel.h |  1 +
>  include/linux/host1x.h   |  2 +-
>  6 files changed, 43 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
> index 658bc8814f38..19f77c1a76c0 100644
> --- a/drivers/gpu/drm/tegra/drm.c
> +++ b/drivers/gpu/drm/tegra/drm.c
> @@ -389,7 +389,8 @@ static int host1x_waitchk_copy_from_user(struct 
> host1x_waitchk *dest,
>   * Request a free hardware host1x channel for this user context, or if the
>   * context already has one, bump its refcount.
>   *
> - * Returns 0 on success, or -EBUSY if there were no free hardware channels.
> + * Returns 0 on success, -EINTR if wait for a free channel was interrupted,
> + * or other error.
>   */
>  int tegra_drm_context_get_channel(struct tegra_drm_context *context)
>  {
> @@ -398,10 +399,10 @@ int tegra_drm_context_get_channel(struct 
> tegra_drm_context *context)
>   mutex_lock(>lock);
>  
>   if (context->pending_jobs == 0) {
> - context->channel = host1x_channel_request(client->dev);
> - if (!context->channel) {
> + context->channel = host1x_channel_request(client->dev, true);
> + if (IS_ERR(context->channel)) {
>   mutex_unlock(>lock);
> - return -EBUSY;
> + return PTR_ERR(context->channel);
>   }
>   }
>  
> diff --git a/drivers/gpu/drm/tegra/gr2d.c b/drivers/gpu/drm/tegra/gr2d.c
> index 3db3bcac48b9..c1853402f69b 100644
> --- a/drivers/gpu/drm/tegra/gr2d.c
> +++ b/drivers/gpu/drm/tegra/gr2d.c
> @@ -32,9 +32,9 @@ static int gr2d_init(struct host1x_client *client)
>   unsigned long flags = HOST1X_SYNCPT_HAS_BASE;
>   struct gr2d *gr2d = to_gr2d(drm);
>  
> - gr2d->channel = host1x_channel_request(client->dev);
> - if (!gr2d->channel)
> - return -ENOMEM;
> + gr2d->channel = host1x_channel_request(client->dev, false);
> + if (IS_ERR(gr2d->channel))
> + return PTR_ERR(gr2d->channel);
>  
>   client->syncpts[0] = host1x_syncpt_request(client->dev, flags);
>   if (!client->syncpts[0]) {
> diff --git a/drivers/gpu/drm/tegra/gr3d.c b/drivers/gpu/drm/tegra/gr3d.c
> index 279438342c8c..793a91d577cb 100644
> --- a/drivers/gpu/drm/tegra/gr3d.c
> +++ b/drivers/gpu/drm/tegra/gr3d.c
> @@ -42,9 +42,9 @@ static int gr3d_init(struct host1x_client *client)
>   unsigned long flags = HOST1X_SYNCPT_HAS_BASE;
>   struct gr3d *gr3d = to_gr3d(drm);
>  
> - gr3d->channel = host1x_channel_request(client->dev);
> - if (!gr3d->channel)
> - return -ENOMEM;
> + gr3d->channel = host1x_channel_request(client->dev, false);
> + if (IS_ERR(gr3d->channel))
> + return PTR_ERR(gr3d->channel);
>  
>   client->syncpts[0] = host1x_syncpt_request(client->dev, flags);
>   if (!client->syncpts[0]) {
> diff --git a/drivers/gpu/host1x/channel.c b/drivers/gpu/host1x/channel.c
> index 9d8cad12f9d8..eebcd51261df 100644
> --- a/drivers/gpu/host1x/channel.c
> +++ b/drivers/gpu/host1x/channel.c
> @@ -43,6 +43,7 @@ int host1x_channel_list_init(struct host1x_channel_list 
> *chlist,
>   bitmap_zero(chlist->allocated_channels, num_channels);
>  
>   mutex_init(>lock);
> + sema_init(>sema, num_channels);
>  
>   return 0;
>  }
> @@ -99,6 +100,8 @@ static void release_channel(struct kref *kref)
>   host1x_cdma_deinit(>cdma);
>  
>   clear_bit(channel->id, chlist->allocated_channels);
> +
> + up(>sema);
>  }
>  
>  void host1x_channel_put(struct host1x_channel *channel)
> @@ -107,19 +110,30 @@ void host1x_channel_put(struct host1x_channel *channel)
>  }
>  EXPORT_SYMBOL(host1x_channel_put);
>  
> -static struct host1x_channel *acquire_unused_channel(struct host1x *host)
> +static struct host1x_channel *acquire_unused_channel(struct host1x *host,
> +  bool wait)
>  {
>   struct host1x_channel_list *chlist = >channel_list;
>   unsigned int max_channels = host->info->nb_channels;
>   unsigned int index;
> + 

Re: [PATCH 08/10] drm/tegra: Implement dynamic channel allocation model

2017-11-05 Thread Dmitry Osipenko
On 05.11.2017 14:01, Mikko Perttunen wrote:
> In the traditional channel allocation model, a single hardware channel
> was allocated for each client. This is simple from an implementation
> perspective but prevents use of hardware scheduling.
> 
> This patch implements a channel allocation model where when a user
> submits a job for a context, a hardware channel is allocated for
> that context. The same channel is kept for as long as there are
> incomplete jobs for that context. This way we can use hardware
> scheduling and channel isolation between userspace processes, but
> also prevent idling contexts from taking up hardware resources.
> 

The dynamic channels resources (pushbuf) allocation is very expensive,
neglecting all benefits that this model should bring at least in non-IOMMU case.
We could have statically preallocated channels resources or defer resources 
freeing.

> For now, this patch only adapts VIC to the new model.
> 

I think VIC's conversion should be a distinct patch.

> Signed-off-by: Mikko Perttunen 
> ---
>  drivers/gpu/drm/tegra/drm.c | 46 ++
>  drivers/gpu/drm/tegra/drm.h |  7 +++-
>  drivers/gpu/drm/tegra/vic.c | 79 
> +++--
>  3 files changed, 92 insertions(+), 40 deletions(-)
> 
> diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
> index b964e18e3058..658bc8814f38 100644
> --- a/drivers/gpu/drm/tegra/drm.c
> +++ b/drivers/gpu/drm/tegra/drm.c
> @@ -382,6 +382,51 @@ static int host1x_waitchk_copy_from_user(struct 
> host1x_waitchk *dest,
>   return 0;
>  }
>  
> +/**
> + * tegra_drm_context_get_channel() - Get a channel for submissions
> + * @context: Context for which to get a channel for
> + *
> + * Request a free hardware host1x channel for this user context, or if the
> + * context already has one, bump its refcount.
> + *
> + * Returns 0 on success, or -EBUSY if there were no free hardware channels.
> + */
> +int tegra_drm_context_get_channel(struct tegra_drm_context *context)
> +{
> + struct host1x_client *client = >client->base;
> +
> + mutex_lock(>lock);
> +
> + if (context->pending_jobs == 0) {
> + context->channel = host1x_channel_request(client->dev);
> + if (!context->channel) {
> + mutex_unlock(>lock);
> + return -EBUSY;
> + }
> + }
> +
> + context->pending_jobs++;
> +
> + mutex_unlock(>lock);
> +
> + return 0;
> +}
> +
> +/**
> + * tegra_drm_context_put_channel() - Put a previously gotten channel
> + * @context: Context which channel is no longer needed
> + *
> + * Decrease the refcount of the channel associated with this context,
> + * freeing it if the refcount drops to zero.
> + */
> +void tegra_drm_context_put_channel(struct tegra_drm_context *context)
> +{
> + mutex_lock(>lock);
> + if (--context->pending_jobs == 0)
> + host1x_channel_put(context->channel);
> + mutex_unlock(>lock);
> +}
> +
>  static void tegra_drm_job_done(struct host1x_job *job)
>  {
>   struct tegra_drm_context *context = job->callback_data;
> @@ -737,6 +782,7 @@ static int tegra_open_channel(struct drm_device *drm, 
> void *data,
>   kfree(context);
>  
>   kref_init(>ref);
> + mutex_init(>lock);
>  
>   mutex_unlock(>lock);
>   return err;
> diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h
> index 11d690846fd0..d0c3f1f779f6 100644
> --- a/drivers/gpu/drm/tegra/drm.h
> +++ b/drivers/gpu/drm/tegra/drm.h
> @@ -78,9 +78,12 @@ struct tegra_drm_context {
>   struct kref ref;
>  
>   struct tegra_drm_client *client;
> + unsigned int id;
> +
> + struct mutex lock;
>   struct host1x_channel *channel;
>   struct host1x_syncpt *syncpt;
> - unsigned int id;
> + unsigned int pending_jobs;
>  };
>  
>  struct tegra_drm_client_ops {
> @@ -95,6 +98,8 @@ struct tegra_drm_client_ops {
>   void (*submit_done)(struct tegra_drm_context *context);
>  };
>  
> +int tegra_drm_context_get_channel(struct tegra_drm_context *context);
> +void tegra_drm_context_put_channel(struct tegra_drm_context *context);
>  int tegra_drm_submit(struct tegra_drm_context *context,
>struct drm_tegra_submit *args, struct drm_device *drm,
>struct drm_file *file);
> diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c
> index efe5f3af933e..0cacf023a890 100644
> --- a/drivers/gpu/drm/tegra/vic.c
> +++ b/drivers/gpu/drm/tegra/vic.c
> @@ -33,7 +33,6 @@ struct vic {
>  
>   void __iomem *regs;
>   struct tegra_drm_client client;
> - struct host1x_channel *channel;
>   struct iommu_domain *domain;
>   struct device *dev;
>   struct clk *clk;
> @@ -161,28 +160,12 @@ static int vic_init(struct host1x_client *client)
>   goto detach_device;
>   }
>  
> - vic->channel = host1x_channel_request(client->dev);
> - if (!vic->channel) {
> 

Re: [PATCH 10/10] gpu: host1x: Optionally block when acquiring channel

2017-11-10 Thread Dmitry Osipenko
On 07.11.2017 18:29, Dmitry Osipenko wrote:
> On 07.11.2017 16:11, Mikko Perttunen wrote:
>> On 05.11.2017 19:14, Dmitry Osipenko wrote:
>>> On 05.11.2017 14:01, Mikko Perttunen wrote:
>>>> Add an option to host1x_channel_request to interruptibly wait for a
>>>> free channel. This allows IOCTLs that acquire a channel to block
>>>> the userspace.
>>>>
>>>
>>> Wouldn't it be more optimal to request channel and block after job's pining,
>>> when all patching and checks are completed? Note that right now we have 
>>> locking
>>> around submission in DRM, which I suppose should go away by making locking 
>>> fine
>>> grained.
>>
>> That would be possible, but I don't think it should matter much since 
>> contention
>> here should not be the common case.
>>
>>>
>>> Or maybe it would be more optimal to just iterate over channels, like I
>>> suggested before [0]?
>>
>> Somehow I hadn't noticed this before, but this would break the invariant of
>> having one client/class per channel.
>>
> 
> Yes, currently there is a weak relation of channel and clients device, but 
> seems
> channels device is only used for printing dev_* messages and device could be
> borrowed from the channels job. I don't see any real point of hardwiring 
> channel
> to a specific device or client.

Although, it won't work with syncpoint assignment to channel.



Re: [PATCH 10/10] gpu: host1x: Optionally block when acquiring channel

2017-11-12 Thread Dmitry Osipenko
On 11.11.2017 00:15, Dmitry Osipenko wrote:
> On 07.11.2017 18:29, Dmitry Osipenko wrote:
>> On 07.11.2017 16:11, Mikko Perttunen wrote:
>>> On 05.11.2017 19:14, Dmitry Osipenko wrote:
>>>> On 05.11.2017 14:01, Mikko Perttunen wrote:
>>>>> Add an option to host1x_channel_request to interruptibly wait for a
>>>>> free channel. This allows IOCTLs that acquire a channel to block
>>>>> the userspace.
>>>>>
>>>>
>>>> Wouldn't it be more optimal to request channel and block after job's 
>>>> pining,
>>>> when all patching and checks are completed? Note that right now we have 
>>>> locking
>>>> around submission in DRM, which I suppose should go away by making locking 
>>>> fine
>>>> grained.
>>>
>>> That would be possible, but I don't think it should matter much since 
>>> contention
>>> here should not be the common case.
>>>
>>>>
>>>> Or maybe it would be more optimal to just iterate over channels, like I
>>>> suggested before [0]?
>>>
>>> Somehow I hadn't noticed this before, but this would break the invariant of
>>> having one client/class per channel.
>>>
>>
>> Yes, currently there is a weak relation of channel and clients device, but 
>> seems
>> channels device is only used for printing dev_* messages and device could be
>> borrowed from the channels job. I don't see any real point of hardwiring 
>> channel
>> to a specific device or client.
> 
> Although, it won't work with syncpoint assignment to channel.

On the other hand.. it should work if one syncpoint could be assigned to
multiple channels, couldn't it?


Re: [PATCH v4 3/5] staging: Introduce NVIDIA Tegra video decoder driver

2017-11-12 Thread Dmitry Osipenko
On 11.11.2017 17:06, Vladimir Zapolskiy wrote:
> Hi Dmitry,
> 
> I'll add just a couple of minor comments, in general the code looks
> very good.
> 

Thank you very much for the review!

> On 10/20/2017 12:34 AM, Dmitry Osipenko wrote:
>> NVIDIA Tegra20/30/114/124/132 SoC's have video decoder engine that
>> supports standard set of video formats like H.264 / MPEG-4 / WMV / VC1.
>> Currently implemented decoding of CAVLC H.264 on Tegra20 only.
>>
>> Signed-off-by: Dmitry Osipenko 
> 
> [snip]
> 
>> +++ b/drivers/staging/tegra-vde/uapi.h
>> @@ -0,0 +1,101 @@
>> +/*
>> + * Copyright (C) 2016-2017 Dmitry Osipenko 
>> + * All Rights Reserved.
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the 
>> "Software"),
>> + * to deal in the Software without restriction, including without limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice (including the next
>> + * paragraph) shall be included in all copies or substantial portions of the
>> + * Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 
>> OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
>> + * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
>> + * OTHER DEALINGS IN THE SOFTWARE.
> 
> From the specified MODULE_LICENSE("GPL") I'd rather expect to see a reference
> to GPLv2+ license in the header, and here the text resembles MIT license only.
> 
> I understand that it is a UAPI header file and it may happen that different
> rules are applied to this kind of sources, hopefully Greg can give the right
> directions.

Indeed, probably I copied the license text from some other UAPI header without
putting much thought. Will change it to GPL, thanks.

> 
> In general you may avoid the headache with the custom UAPI, if you reuse
> V4L2 interfaces, if I remember correctly drivers/media/platform/coda does it.
> Also from my point of view the custom UAPI is the only reason why the driver
> is pushed to the staging folder.

Thanks for the pointer. I see that coda driver does some raw bitstream parsing
in the driver, which is a bit icky, but probably is a good enough variant. I'll
take a closer look at implementing V4L interface at some point later, meanwhile
custom UAPI + VDPAU userspace serves us pretty well.

> 
> [snip]
> 
>> +struct tegra_vde {
>> +void __iomem *sxe;
>> +void __iomem *bsev;
>> +void __iomem *mbe;
>> +void __iomem *ppe;
>> +void __iomem *mce;
>> +void __iomem *tfe;
>> +void __iomem *ppb;
>> +void __iomem *vdma;
>> +void __iomem *frameid;
> 
> Please find a comment in tegra_vde_probe() function regarding
> devm_ioremap_resource() calls.
> 
>> +struct mutex lock;
>> +struct miscdevice miscdev;
>> +struct reset_control *rst;
>> +struct gen_pool *iram_pool;
>> +struct completion decode_completion;
>> +struct clk *clk;
>> +dma_addr_t iram_lists_addr;
>> +u32 *iram;
>> +};
> 
> [snip]
> 
>> +static int tegra_vde_wait_bsev(struct tegra_vde *vde, bool wait_dma)
>> +{
>> +struct device *dev = vde->miscdev.parent;
>> +u32 value;
>> +int err;
>> +
>> +err = readl_relaxed_poll_timeout(vde->bsev + INTR_STATUS, value,
>> + !(value & BIT(2)), 1, 100);
>> +if (err) {
>> +dev_err(dev, "BSEV unknown bit timeout\n");
>> +return err;
>> +}
>> +
>> +err = readl_relaxed_poll_timeout(vde->bsev + INTR_STATUS, value,
>> + (value & BSE_ICMDQUE_EMPTY), 1, 100);
>> +if (err) {
>> +dev_err(dev, "BSEV ICMDQUE flush timeout\n");
>> +return err;
>> +}
>> +
>> +if (!wait_dma)
>> +return 0;
>> +
>> +err = readl_relaxed_poll_timeout(

Re: [PATCH v4 2/5] media: dt: bindings: Add binding for NVIDIA Tegra Video Decoder Engine

2017-11-12 Thread Dmitry Osipenko
On 11.11.2017 17:21, Vladimir Zapolskiy wrote:
> Hi Dmitry,
> 
> On 10/20/2017 12:34 AM, Dmitry Osipenko wrote:
>> Add binding documentation for the Video Decoder Engine which is found
>> on NVIDIA Tegra20/30/114/124/132 SoC's.
>>
>> Signed-off-by: Dmitry Osipenko 
>> ---
>>  .../devicetree/bindings/media/nvidia,tegra-vde.txt | 55 
>> ++
>>  1 file changed, 55 insertions(+)
>>  create mode 100644 
>> Documentation/devicetree/bindings/media/nvidia,tegra-vde.txt
>>
>> diff --git a/Documentation/devicetree/bindings/media/nvidia,tegra-vde.txt 
>> b/Documentation/devicetree/bindings/media/nvidia,tegra-vde.txt
>> new file mode 100644
>> index ..470237ed6fe5
>> --- /dev/null
>> +++ b/Documentation/devicetree/bindings/media/nvidia,tegra-vde.txt
>> @@ -0,0 +1,55 @@
>> +NVIDIA Tegra Video Decoder Engine
>> +
>> +Required properties:
>> +- compatible : Must contain one of the following values:
>> +   - "nvidia,tegra20-vde"
>> +   - "nvidia,tegra30-vde"
>> +   - "nvidia,tegra114-vde"
>> +   - "nvidia,tegra124-vde"
>> +   - "nvidia,tegra132-vde"
>> +- reg : Must contain an entry for each entry in reg-names.
>> +- reg-names : Must include the following entries:
>> +  - sxe
>> +  - bsev
>> +  - mbe
>> +  - ppe
>> +  - mce
>> +  - tfe
>> +  - ppb
>> +  - vdma
>> +  - frameid
> 
> I've already mentioned it in my review of the driver code, but the
> version from v3 with a single region is more preferable.
> 
> Also it implies that "reg-names" property will be removed.
> 

Please see my reply to the drivers code review.

>> +- iram : Must contain phandle to the mmio-sram device node that represents
>> + IRAM region used by VDE.
>> +- interrupts : Must contain an entry for each entry in interrupt-names.
>> +- interrupt-names : Must include the following entries:
>> +  - sync-token
>> +  - bsev
>> +  - sxe
>> +- clocks : Must include the following entries:
>> +  - vde
>> +- resets : Must include the following entries:
>> +  - vde
>> +
>> +Example:
>> +
>> +video-codec@6001a000 {
>> +compatible = "nvidia,tegra20-vde";
>> +reg = <0x6001a000 0x1000 /* Syntax Engine */
>> +   0x6001b000 0x1000 /* Video Bitstream Engine */
>> +   0x6001c000  0x100 /* Macroblock Engine */
>> +   0x6001c200  0x100 /* Post-processing Engine */
>> +   0x6001c400  0x100 /* Motion Compensation Engine */
>> +   0x6001c600  0x100 /* Transform Engine */
>> +   0x6001c800  0x100 /* Pixel prediction block */
>> +   0x6001ca00  0x100 /* Video DMA */
>> +   0x6001d800  0x300 /* Video frame controls */>;
>> +reg-names = "sxe", "bsev", "mbe", "ppe", "mce",
>> +"tfe", "ppb", "vdma", "frameid";
>> +iram = <_pool>; /* IRAM region */
>> +interrupts = , /* Sync token interrupt 
>> */
>> + , /* BSE-V interrupt */
>> + ; /* SXE interrupt */
>> +interrupt-names = "sync-token", "bsev", "sxe";
>> +clocks = <_car TEGRA20_CLK_VDE>;
>> +resets = <_car 61>;
>> +};
>>



Re: [PATCH v4 1/5] ARM: tegra: Add device tree node to describe IRAM

2017-11-12 Thread Dmitry Osipenko
On 11.11.2017 17:18, Vladimir Zapolskiy wrote:
> Hi Dmitry,
> 
> On 10/20/2017 12:34 AM, Dmitry Osipenko wrote:
>> From: Vladimir Zapolskiy 
>>
>> All Tegra SoCs contain 256KiB IRAM, which is used to store CPU resume code
>> and by hardware engines like a video decoder.
>>
>> Signed-off-by: Vladimir Zapolskiy 
> 
> Please add also your own closing "Signed-off-by" tag, please reference
> to "Developer's Certificate of Origin 1.1", point (c), it is found in
> Documentation/process/submitting-patches.rst
> 

Indeed, thanks!

>> ---
>>  arch/arm/boot/dts/tegra114.dtsi | 8 
>>  arch/arm/boot/dts/tegra124.dtsi | 8 
>>  arch/arm/boot/dts/tegra20.dtsi  | 8 
>>  arch/arm/boot/dts/tegra30.dtsi  | 8 
> 
> My assumption is that Thierry would prefer to get 4 separate patches,
> one for each platform, please split the patch.
> 

Thierry, would you?

> Also thanks for your time and your efforts applied to push my occasional
> change, please feel free to take your own authorship for 3 out of 4 patches.
> 

Okay.

>>  4 files changed, 32 insertions(+)
>>
>> diff --git a/arch/arm/boot/dts/tegra114.dtsi 
>> b/arch/arm/boot/dts/tegra114.dtsi
>> index 8932ea3afd5f..13f6087790c8 100644
>> --- a/arch/arm/boot/dts/tegra114.dtsi
>> +++ b/arch/arm/boot/dts/tegra114.dtsi
>> @@ -10,6 +10,14 @@
>>  compatible = "nvidia,tegra114";
>>  interrupt-parent = <>;
>>  
>> +iram@4000 {
>> +compatible = "mmio-sram";
> 
> Unfortunately Thierry hasn't yet replied, but my assumption is that
> the list of compatibles should be extended with one more SoC specific
> value like
> 
>   compatible = "nvidia,tegra114-sysram", "mmio-sram";
> 
> I'm not sure, if Tegra maintainers want to see a new compatible
> described in Documentation/devicetree/bindings.
> 

The custom compatible string shouldn't be needed. AFAIK, IRAM doesn't have any
exposed controls, so just a generic "mmio-sram" suits well here.

>> +reg = <0x4000 0x4>;
>> +#address-cells = <1>;
>> +#size-cells = <1>;
>> +ranges = <0 0x4000 0x4>;
>> +};
>> +
>>  host1x@5000 {
>>  compatible = "nvidia,tegra114-host1x", "simple-bus";
>>  reg = <0x5000 0x00028000>;
>> diff --git a/arch/arm/boot/dts/tegra124.dtsi 
>> b/arch/arm/boot/dts/tegra124.dtsi
>> index 8baf00b89efb..a3585ed82646 100644
>> --- a/arch/arm/boot/dts/tegra124.dtsi
>> +++ b/arch/arm/boot/dts/tegra124.dtsi
> 
> The considerations from above are applicable to the rest of
> the touched platforms.



Re: [PATCH 08/10] drm/tegra: Implement dynamic channel allocation model

2017-11-13 Thread Dmitry Osipenko
On 07.11.2017 15:29, Mikko Perttunen wrote:
> On 05.11.2017 19:43, Dmitry Osipenko wrote:
>> On 05.11.2017 14:01, Mikko Perttunen wrote:
>>> In the traditional channel allocation model, a single hardware channel
>>> was allocated for each client. This is simple from an implementation
>>> perspective but prevents use of hardware scheduling.
>>>
>>> This patch implements a channel allocation model where when a user
>>> submits a job for a context, a hardware channel is allocated for
>>> that context. The same channel is kept for as long as there are
>>> incomplete jobs for that context. This way we can use hardware
>>> scheduling and channel isolation between userspace processes, but
>>> also prevent idling contexts from taking up hardware resources.
>>>
>>
>> The dynamic channels resources (pushbuf) allocation is very expensive,
>> neglecting all benefits that this model should bring at least in non-IOMMU 
>> case.
>> We could have statically preallocated channels resources or defer resources
>> freeing.
> 
> This is true. I'll try to figure out a nice way to keep the pushbuf 
> allocations.

One variant could be to have all channels resources statically preallocated in a
non-IOMMU case because CMA is expensive. Then you should measure the allocations
impact in a case of IOMMU allocations and if it is significant, maybe implement
Host1x PM autosuspend, releasing all channels when Host1x become idle.

I think the above should be efficient and easy to implement.


Re: [PATCH v2 1/6] gpu: host1x: Enable Tegra186 syncpoint protection

2017-09-22 Thread Dmitry Osipenko
On 22.09.2017 17:02, Mikko Perttunen wrote:
> On 09/05/2017 04:33 PM, Dmitry Osipenko wrote:
>> On 05.09.2017 11:10, Mikko Perttunen wrote:
>>> ... >> diff --git a/drivers/gpu/host1x/hw/channel_hw.c 
> b/drivers/gpu/host1x/hw/channel_hw.c
>>> index 8447a56c41ca..0161da331702 100644
>>> --- a/drivers/gpu/host1x/hw/channel_hw.c
>>> +++ b/drivers/gpu/host1x/hw/channel_hw.c
>>> @@ -147,6 +147,9 @@ static int channel_submit(struct host1x_job *job)
>>>     syncval = host1x_syncpt_incr_max(sp, user_syncpt_incrs);
>>>   +    /* assign syncpoint to channel */
>>> +    host1x_hw_syncpt_assign_channel(host, sp, ch);
>>
>> This function could be renamed to host1x_hw_assign_syncpt_to_channel() and 
>> then
>> comment to it won't be needed.
> 
> Maybe host1x_hw_syncpt_assign_to_channel? I'd like to keep the current 
> noun_verb
> format. Though IMHO even the current name is pretty descriptive in itself.
> 

That variant sounds good to me as well.

>>
>> It is not very nice that channel would be re-assigned on each submit. Maybe 
>> that
>> assignment should be done by host1x_syncpt_request() ?
> 
> host1x_syncpt_request doesn't know about the channel so we'd have to thread 
> this
> information there and through each client driver in drm/tegra/, so I decided 
> not
> to do this at this time. I'm planning a new channel allocation model which 
> will
> change that side of the code anyway, so I'd like to revisit this at that 
> point.
> For our current channel model, the current implementation doesn't have any
> functional downsides anyway.
> 

Another very minor downside is that it causes an extra dummy invocation on
pre-Tegra186. Of course that could be changed later in a follow-up patch, not a
big deal :)

>>
>>> +
>>>   job->syncpt_end = syncval;
>>>     /* add a setclass for modules that require it */
>>> diff --git a/drivers/gpu/host1x/hw/syncpt_hw.c
>>> b/drivers/gpu/host1x/hw/syncpt_hw.c
>>> index 7b0270d60742..dc7a44614fef 100644
>>> --- a/drivers/gpu/host1x/hw/syncpt_hw.c
>>> +++ b/drivers/gpu/host1x/hw/syncpt_hw.c
>>> @@ -106,6 +106,50 @@ static int syncpt_patch_wait(struct host1x_syncpt *sp,
>>> void *patch_addr)
>>>   return 0;
>>>   }
>>>   +/**
>>> + * syncpt_assign_channel() - Assign syncpoint to channel
>>> + * @sp: syncpoint
>>> + * @ch: channel
>>> + *
>>> + * On chips with the syncpoint protection feature (Tegra186+), assign @sp 
>>> to
>>> + * @ch, preventing other channels from incrementing the syncpoints. If @ch 
>>> is
>>> + * NULL, unassigns the syncpoint.
>>> + *
>>> + * On older chips, do nothing.
>>> + */
>>> +static void syncpt_assign_channel(struct host1x_syncpt *sp,
>>> +  struct host1x_channel *ch)
>>> +{
>>> +#if HOST1X_HW >= 6
>>> +    struct host1x *host = sp->host;
>>> +
>>> +    if (!host->hv_regs)
>>> +    return;
>>> +
>>> +    host1x_sync_writel(host,
>>> +   HOST1X_SYNC_SYNCPT_CH_APP_CH(ch ? ch->id : 0xff),
>>> +   HOST1X_SYNC_SYNCPT_CH_APP(sp->id));
>>> +#endif
>>> +}
>>> +
>>> +/**
>>> + * syncpt_enable_protection() - Enable syncpoint protection
>>> + * @host: host1x instance
>>> + *
>>> + * On chips with the syncpoint protection feature (Tegra186+), enable this
>>> + * feature. On older chips, do nothing.
>>> + */
>>> +static void syncpt_enable_protection(struct host1x *host)
>>> +{
>>> +#if HOST1X_HW >= 6
>>> +    if (!host->hv_regs)
>>> +    return;
>>> +
>>> +    host1x_hypervisor_writel(host, HOST1X_HV_SYNCPT_PROT_EN_CH_EN,
>>> + HOST1X_HV_SYNCPT_PROT_EN);
>>> +#endif
>>> +}
>>> +
>>>   static const struct host1x_syncpt_ops host1x_syncpt_ops = {
>>>   .restore = syncpt_restore,
>>>   .restore_wait_base = syncpt_restore_wait_base,
>>> @@ -113,4 +157,6 @@ static const struct host1x_syncpt_ops host1x_syncpt_ops 
>>> = {
>>>   .load = syncpt_load,
>>>   .cpu_incr = syncpt_cpu_incr,
>>>   .patch_wait = syncpt_patch_wait,
>>> +    .assign_channel = syncpt_assign_channel,
>>> +    .enable_protection = syncpt_enable_protection,
>>>   };
>>> diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
>>> index 048ac9e344ce..4c7a4

Re: [PATCH 3/4] gpu: host1x: Improve debug disassembly formatting

2017-08-18 Thread Dmitry Osipenko
On 18.08.2017 19:15, Mikko Perttunen wrote:
> The host1x driver prints out "disassembly" dumps of the command FIFO
> and gather contents on submission timeouts. However, the output has
> been quite difficult to read with unnecessary newlines and occasional
> missing parentheses.
> 
> Fix these problems by using pr_cont to remove unnecessary newlines
> and by fixing other small issues.
> 
> Signed-off-by: Mikko Perttunen 
> ---
It's indeed a bit more readable now.

Reviewed-by: Dmitry Osipenko 
Tested-by: Dmitry Osipenko 

-- 
Dmitry


Re: [PATCH 4/4] drm/tegra: Use u64_to_user_ptr helper

2017-08-18 Thread Dmitry Osipenko
On 18.08.2017 19:15, Mikko Perttunen wrote:
> Use the u64_to_user_ptr helper macro to cast IOCTL argument u64 values
> to user pointers instead of writing out the cast manually.
> 
> Signed-off-by: Mikko Perttunen 
> ---
>  drivers/gpu/drm/tegra/drm.c | 9 -
>  1 file changed, 4 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
> index e3331a2bc082..78c98736b0a5 100644
> --- a/drivers/gpu/drm/tegra/drm.c
> +++ b/drivers/gpu/drm/tegra/drm.c
> @@ -389,11 +389,10 @@ int tegra_drm_submit(struct tegra_drm_context *context,
>   unsigned int num_relocs = args->num_relocs;
>   unsigned int num_waitchks = args->num_waitchks;
>   struct drm_tegra_cmdbuf __user *cmdbufs =
> - (void __user *)(uintptr_t)args->cmdbufs;
> - struct drm_tegra_reloc __user *relocs =
> - (void __user *)(uintptr_t)args->relocs;
> + u64_to_user_ptr(args->cmdbufs);
> + struct drm_tegra_reloc __user *relocs = u64_to_user_ptr(args->relocs);
>   struct drm_tegra_waitchk __user *waitchks =
> - (void __user *)(uintptr_t)args->waitchks;
> + u64_to_user_ptr(args->waitchks);

What about to factor out 'u64_to_user_ptr()' assignments to reduce messiness a
tad? Like this:

struct drm_tegra_waitchk __user *waitchks;
struct drm_tegra_cmdbuf __user *cmdbufs;
struct drm_tegra_reloc __user *relocs;
...
waitchks = u64_to_user_ptr(args->waitchks);
cmdbufs = u64_to_user_ptr(args->cmdbufs);
relocs = u64_to_user_ptr(args->relocs);


>   struct drm_tegra_syncpt syncpt;
>   struct host1x *host1x = dev_get_drvdata(drm->dev->parent);
>   struct host1x_syncpt *sp;
> @@ -520,7 +519,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
>   }
>   }
>  
> - if (copy_from_user(, (void __user *)(uintptr_t)args->syncpts,
> + if (copy_from_user(, u64_to_user_ptr(args->syncpts),

What about to define and use 'struct drm_tegra_reloc __user *syncpts' for
consistency with other '__user' definitions?

>  sizeof(syncpt))) {
>   err = -EFAULT;
>   goto fail;
> 

-- 
Dmitry


Re: [PATCH 1/4] gpu: host1x: Enable Tegra186 syncpoint protection

2017-08-18 Thread Dmitry Osipenko
On 18.08.2017 19:15, Mikko Perttunen wrote:
> Since Tegra186 the Host1x hardware allows syncpoints to be assigned to
> specific channels, preventing any other channels from incrementing
> them.
> 
> Enable this feature where available and assign syncpoints to channels
> when submitting a job. Syncpoints are currently never unassigned from
> channels since that would require extra work and is unnecessary with
> the current channel allocation model.
> 
> Signed-off-by: Mikko Perttunen 
> ---
>  drivers/gpu/host1x/dev.h   | 16 
>  drivers/gpu/host1x/hw/channel_hw.c |  3 +++
>  drivers/gpu/host1x/hw/syncpt_hw.c  | 26 ++
>  drivers/gpu/host1x/syncpt.c|  3 +++
>  4 files changed, 48 insertions(+)
> 
> diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
> index def802c0a6bf..2432a30ff6e2 100644
> --- a/drivers/gpu/host1x/dev.h
> +++ b/drivers/gpu/host1x/dev.h
> @@ -79,6 +79,9 @@ struct host1x_syncpt_ops {
>   u32 (*load)(struct host1x_syncpt *syncpt);
>   int (*cpu_incr)(struct host1x_syncpt *syncpt);
>   int (*patch_wait)(struct host1x_syncpt *syncpt, void *patch_addr);
> + void (*assign_channel)(struct host1x_syncpt *syncpt,
> +struct host1x_channel *channel);
> + void (*set_protection)(struct host1x *host, bool enabled);
>  };
>  
>  struct host1x_intr_ops {
> @@ -186,6 +189,19 @@ static inline int host1x_hw_syncpt_patch_wait(struct 
> host1x *host,
>   return host->syncpt_op->patch_wait(sp, patch_addr);
>  }
>  
> +static inline void host1x_hw_syncpt_assign_channel(struct host1x *host,
> +struct host1x_syncpt *sp,
> +struct host1x_channel *ch)
> +{
> + return host->syncpt_op->assign_channel(sp, ch);
> +}
> +
> +static inline void host1x_hw_syncpt_set_protection(struct host1x *host,
> +bool enabled)
> +{
> + return host->syncpt_op->set_protection(host, enabled);
> +}
> +
>  static inline int host1x_hw_intr_init_host_sync(struct host1x *host, u32 cpm,
>   void (*syncpt_thresh_work)(struct work_struct *))
>  {
> diff --git a/drivers/gpu/host1x/hw/channel_hw.c 
> b/drivers/gpu/host1x/hw/channel_hw.c
> index 8447a56c41ca..0161da331702 100644
> --- a/drivers/gpu/host1x/hw/channel_hw.c
> +++ b/drivers/gpu/host1x/hw/channel_hw.c
> @@ -147,6 +147,9 @@ static int channel_submit(struct host1x_job *job)
>  
>   syncval = host1x_syncpt_incr_max(sp, user_syncpt_incrs);
>  
> + /* assign syncpoint to channel */
> + host1x_hw_syncpt_assign_channel(host, sp, ch);
> +
>   job->syncpt_end = syncval;
>  
>   /* add a setclass for modules that require it */
> diff --git a/drivers/gpu/host1x/hw/syncpt_hw.c 
> b/drivers/gpu/host1x/hw/syncpt_hw.c
> index 7b0270d60742..5d117ab1699e 100644
> --- a/drivers/gpu/host1x/hw/syncpt_hw.c
> +++ b/drivers/gpu/host1x/hw/syncpt_hw.c
> @@ -106,6 +106,30 @@ static int syncpt_patch_wait(struct host1x_syncpt *sp, 
> void *patch_addr)
>   return 0;
>  }
>  
> +static void syncpt_assign_channel(struct host1x_syncpt *sp,
> +   struct host1x_channel *ch)
> +{
> +#if HOST1X_HW >= 6
> + struct host1x *host = sp->host;
> +
> + if (!host->hv_regs)
> + return;
> +
> + host1x_sync_writel(host,
> +HOST1X_SYNC_SYNCPT_CH_APP_CH(ch ? ch->id : 0xff),
> +HOST1X_SYNC_SYNCPT_CH_APP(sp->id));
> +#endif
> +}
> +
> +static void syncpt_set_protection(struct host1x *host, bool enabled)
> +{
> +#if HOST1X_HW >= 6
> + host1x_hypervisor_writel(host,
> +  enabled ? HOST1X_HV_SYNCPT_PROT_EN_CH_EN : 0,
> +  HOST1X_HV_SYNCPT_PROT_EN);
> +#endif
> +}
> +
>  static const struct host1x_syncpt_ops host1x_syncpt_ops = {
>   .restore = syncpt_restore,
>   .restore_wait_base = syncpt_restore_wait_base,
> @@ -113,4 +137,6 @@ static const struct host1x_syncpt_ops host1x_syncpt_ops = 
> {
>   .load = syncpt_load,
>   .cpu_incr = syncpt_cpu_incr,
>   .patch_wait = syncpt_patch_wait,
> + .assign_channel = syncpt_assign_channel,
> + .set_protection = syncpt_set_protection,
>  };
> diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
> index 048ac9e344ce..fe4d963b3e2a 100644
> --- a/drivers/gpu/host1x/syncpt.c
> +++ b/drivers/gpu/host1x/syncpt.c
> @@ -398,6 +398,8 @@ int host1x_syncpt_init(struct host1x *host)
>   for (i = 0; i < host->info->nb_pts; i++) {
>   syncpt[i].id = i;
>   syncpt[i].host = host;
> +
> + host1x_hw_syncpt_assign_channel(host, [i], NULL);
>   }
>  
>   for (i = 0; i < host->info->nb_bases; i++)
> @@ -408,6 +410,7 @@ int host1x_syncpt_init(struct host1x *host)
>   host->bases = bases;
>  
>   host1x_syncpt_restore(host);
> + 

Re: [PATCH 1/4] gpu: host1x: Enable Tegra186 syncpoint protection

2017-08-19 Thread Dmitry Osipenko
On 19.08.2017 11:10, Mikko Perttunen wrote:
[snip]
>>> +host1x_hw_syncpt_set_protection(host, true);
>>
>> Is it really okay to force the protection? Maybe protection should be enabled
>> with a respect to CONFIG_TEGRA_HOST1X_FIREWALL? In that case we would have to
>> avoid software jobs validation for Tegra124+.
> 
> I don't quite get your comment. The hardware syncpt protection layer being
> enabled should never hurt - it doesn't mess with any valid jobs. It's also 
> only
> on Tegra186 so I'm not sure where the Tegra124 comes from.

Right, it's the gather filter on T124+, my bad. This raises several questions.

1) Why we have CONFIG_TEGRA_HOST1X_FIREWALL? Should it be always enforced or we
actually want to be a bit more flexible and allow to disable it. Imagine that
you are making a custom application and want to utilize channels in a different 
way.

2) Since syncpoint protection is a T186 feature, what about previous
generations? Should we validate syncpoints in software for them? We have
'syncpoint validation' patch staged in grate's kernel
https://github.com/grate-driver/linux/commit/c8b6c82173f2ee9fead23380e8330b8099e7d5e7
(I'll start sending out this and other patches after a bit more thorough
testing.) Improperly used syncpoints potentially could allow one program to
damage others.

3) What exactly does gather filter? Could you list all the commands that it
filters out, please?

4) What about T30/T114 that do not have gather filter? Should we validate those
commands for them in a software firewall?

So maybe we should implement several layers of validation in the SW firewall.
Like all layers for T20 (memory boundaries validation etc), software gather
filter for T30/114 and software syncpoint validation for T30/114/124/210.

-- 
Dmitry


Re: [PATCH 2/4] gpu: host1x: Enable gather filter

2017-08-19 Thread Dmitry Osipenko
On 18.08.2017 19:15, Mikko Perttunen wrote:
> The gather filter is a feature present on Tegra124 and newer where the
> hardware prevents GATHERed command buffers from executing commands
> normally reserved for the CDMA pushbuffer which is maintained by the
> kernel driver.
> 
> This commit enables the gather filter on all supporting hardware.
> 
> Signed-off-by: Mikko Perttunen 
> ---

TRM says that "Invalid Gbuffer cmd" interrupt would be raised when filtering
happens. Is that interrupt disabled by default or it would cause 'unhandled
interrupt'?

-- 
Dmitry


Re: [PATCH 1/4] gpu: host1x: Enable Tegra186 syncpoint protection

2017-08-19 Thread Dmitry Osipenko
On 19.08.2017 13:35, Mikko Perttunen wrote:
> On 08/19/2017 01:09 PM, Dmitry Osipenko wrote:
>> On 19.08.2017 11:10, Mikko Perttunen wrote:
>> [snip]
>>>>> +host1x_hw_syncpt_set_protection(host, true);
>>>>
>>>> Is it really okay to force the protection? Maybe protection should be 
>>>> enabled
>>>> with a respect to CONFIG_TEGRA_HOST1X_FIREWALL? In that case we would have 
>>>> to
>>>> avoid software jobs validation for Tegra124+.
>>>
>>> I don't quite get your comment. The hardware syncpt protection layer being
>>> enabled should never hurt - it doesn't mess with any valid jobs. It's also 
>>> only
>>> on Tegra186 so I'm not sure where the Tegra124 comes from.
>>
>> Right, it's the gather filter on T124+, my bad. This raises several 
>> questions.
>>
>> 1) Why we have CONFIG_TEGRA_HOST1X_FIREWALL? Should it be always enforced or 
>> we
>> actually want to be a bit more flexible and allow to disable it. Imagine that
>> you are making a custom application and want to utilize channels in a
>> different way.
> 
> I think it should be up to the user to decide whether they want the firewall 
> or
> not. It's clearly the most useful on the older chips - especially Tegra20 due 
> to
> lack of IOMMU. The performance penalty is too great to force it on always.
> 

Of course there is some overhead but is not that great. Usually command buffer
contains just a dozen of commands. It should be an interesting challenge to
optimize its performance though.

> The programming model should always be considered the same - the rules of what
> you are allowed to do are the same whether the firewall, or any
> hardware-implemented protection features, are on or not.
> 

Well, okay.

>>
>> 2) Since syncpoint protection is a T186 feature, what about previous
>> generations? Should we validate syncpoints in software for them? We have
>> 'syncpoint validation' patch staged in grate's kernel
>> https://github.com/grate-driver/linux/commit/c8b6c82173f2ee9fead23380e8330b8099e7d5e7
>>
>> (I'll start sending out this and other patches after a bit more thorough
>> testing.) Improperly used syncpoints potentially could allow one program to
>> damage others.
> 
> Yes, I think the firewall should have this feature for older generations. We
> could disable the check on Tegra186, as you point towards in question 4.
> 
>>
>> 3) What exactly does gather filter? Could you list all the commands that it
>> filters out, please?
> 
> According to the Tegra186 TRM (section 16.8.32), SETCLASS, SETSTRMID and 
> EXTEND
> are filtered.
> 

Okay, then what about SETSTRMID command, I don't see its disassembly in the
host1x gather debug dump. Is it accidentally missed?

>>
>> 4) What about T30/T114 that do not have gather filter? Should we validate 
>> those
>> commands for them in a software firewall?
> 
> Yes, the firewall should validate that.
> 
>>
>> So maybe we should implement several layers of validation in the SW firewall.
>> Like all layers for T20 (memory boundaries validation etc), software gather
>> filter for T30/114 and software syncpoint validation for T30/114/124/210.
>>
> 
> That seems like a good idea.

Alright, factoring out firewall from job.c probably should be the first step.

-- 
Dmitry


Re: [PATCH 1/4] gpu: host1x: Enable Tegra186 syncpoint protection

2017-08-19 Thread Dmitry Osipenko
On 19.08.2017 14:32, Mikko Perttunen wrote:
> 
> 
> On 08/19/2017 02:11 PM, Dmitry Osipenko wrote:
>> On 19.08.2017 13:35, Mikko Perttunen wrote:
>>> On 08/19/2017 01:09 PM, Dmitry Osipenko wrote:
>>>> On 19.08.2017 11:10, Mikko Perttunen wrote:
>>>> [snip]
>>>>>>> +host1x_hw_syncpt_set_protection(host, true);
>>>>>>
>>>>>> Is it really okay to force the protection? Maybe protection should be 
>>>>>> enabled
>>>>>> with a respect to CONFIG_TEGRA_HOST1X_FIREWALL? In that case we would 
>>>>>> have to
>>>>>> avoid software jobs validation for Tegra124+.
>>>>>
>>>>> I don't quite get your comment. The hardware syncpt protection layer being
>>>>> enabled should never hurt - it doesn't mess with any valid jobs. It's also
>>>>> only
>>>>> on Tegra186 so I'm not sure where the Tegra124 comes from.
>>>>
>>>> Right, it's the gather filter on T124+, my bad. This raises several 
>>>> questions.
>>>>
>>>> 1) Why we have CONFIG_TEGRA_HOST1X_FIREWALL? Should it be always enforced 
>>>> or we
>>>> actually want to be a bit more flexible and allow to disable it. Imagine 
>>>> that
>>>> you are making a custom application and want to utilize channels in a
>>>> different way.
>>>
>>> I think it should be up to the user to decide whether they want the 
>>> firewall or
>>> not. It's clearly the most useful on the older chips - especially Tegra20 
>>> due to
>>> lack of IOMMU. The performance penalty is too great to force it on always.
>>>
>>
>> Of course there is some overhead but is not that great. Usually command 
>> buffer
>> contains just a dozen of commands. It should be an interesting challenge to
>> optimize its performance though.
>>
>>> The programming model should always be considered the same - the rules of 
>>> what
>>> you are allowed to do are the same whether the firewall, or any
>>> hardware-implemented protection features, are on or not.
>>>
>>
>> Well, okay.
>>
>>>>
>>>> 2) Since syncpoint protection is a T186 feature, what about previous
>>>> generations? Should we validate syncpoints in software for them? We have
>>>> 'syncpoint validation' patch staged in grate's kernel
>>>> https://github.com/grate-driver/linux/commit/c8b6c82173f2ee9fead23380e8330b8099e7d5e7
>>>>
>>>>
>>>> (I'll start sending out this and other patches after a bit more thorough
>>>> testing.) Improperly used syncpoints potentially could allow one program to
>>>> damage others.
>>>
>>> Yes, I think the firewall should have this feature for older generations. We
>>> could disable the check on Tegra186, as you point towards in question 4.
>>>
>>>>
>>>> 3) What exactly does gather filter? Could you list all the commands that it
>>>> filters out, please?
>>>
>>> According to the Tegra186 TRM (section 16.8.32), SETCLASS, SETSTRMID and 
>>> EXTEND
>>> are filtered.
>>>
>>
>> Okay, then what about SETSTRMID command, I don't see its disassembly in the
>> host1x gather debug dump. Is it accidentally missed?
>>
> 
> True, it's a new command in Tegra186 and I missed adding it to the 
> disassembler.
> It's probably fine to add it in another patch since it's only intended for
> kernel use and it's useless without IOMMU support anyway (which we don't have
> currently on Tegra186).
> 

Yeah, but it probably would be more preferable that this patch would predate the
"gather filter" enabling.

>>>>
>>>> 4) What about T30/T114 that do not have gather filter? Should we validate 
>>>> those
>>>> commands for them in a software firewall?
>>>
>>> Yes, the firewall should validate that.
>>>
>>>>
>>>> So maybe we should implement several layers of validation in the SW 
>>>> firewall.
>>>> Like all layers for T20 (memory boundaries validation etc), software gather
>>>> filter for T30/114 and software syncpoint validation for T30/114/124/210.
>>>>
>>>
>>> That seems like a good idea.
>>
>> Alright, factoring out firewall from job.c probably should be the first step.
>>


-- 
Dmitry


Re: [PATCH 1/4] gpu: host1x: Enable Tegra186 syncpoint protection

2017-08-19 Thread Dmitry Osipenko
On 18.08.2017 19:15, Mikko Perttunen wrote:
> Since Tegra186 the Host1x hardware allows syncpoints to be assigned to
> specific channels, preventing any other channels from incrementing
> them.
> 
> Enable this feature where available and assign syncpoints to channels
> when submitting a job. Syncpoints are currently never unassigned from
> channels since that would require extra work and is unnecessary with
> the current channel allocation model.
> 
> Signed-off-by: Mikko Perttunen 
> ---
[snip]
> diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
> index 048ac9e344ce..fe4d963b3e2a 100644
> --- a/drivers/gpu/host1x/syncpt.c
> +++ b/drivers/gpu/host1x/syncpt.c
> @@ -398,6 +398,8 @@ int host1x_syncpt_init(struct host1x *host)
>   for (i = 0; i < host->info->nb_pts; i++) {
>   syncpt[i].id = i;
>   syncpt[i].host = host;
> +
> + host1x_hw_syncpt_assign_channel(host, [i], NULL);
>   }

What about to factor out that assignment and add a comment, something like this:

/* clear syncpoint-channel assignments on Tegra186+ */
for (i = 0; i < host->info->nb_pts; i++)
host1x_hw_syncpt_assign_channel(host, [i], NULL);

And maybe even add an inline function for clarity, like:

static inline void host1x_hw_syncpt_deassign_channel(struct host1x *host,
 struct host1x_syncpt *sp)
{
return host->syncpt_op->assign_channel(sp, NULL);
}

-- 
Dmitry


Re: [PATCH 2/4] gpu: host1x: Enable gather filter

2017-08-19 Thread Dmitry Osipenko
On 19.08.2017 13:46, Mikko Perttunen wrote:
> On 08/19/2017 01:42 PM, Dmitry Osipenko wrote:
>> On 18.08.2017 19:15, Mikko Perttunen wrote:
>>> The gather filter is a feature present on Tegra124 and newer where the
>>> hardware prevents GATHERed command buffers from executing commands
>>> normally reserved for the CDMA pushbuffer which is maintained by the
>>> kernel driver.
>>>
>>> This commit enables the gather filter on all supporting hardware.
>>>
>>> Signed-off-by: Mikko Perttunen 
>>> ---
>>
>> TRM says that "Invalid Gbuffer cmd" interrupt would be raised when filtering
>> happens. Is that interrupt disabled by default or it would cause 'unhandled
>> interrupt'?
>>
> 
> It's disabled by default. Jobs that are stopped by the filter are then handled
> by the usual timeout mechanism.
> 

Alright, then it looks good to me.

Reviewed-by: Dmitry Osipenko 

-- 
Dmitry


Re: [PATCH 1/4] gpu: host1x: Enable Tegra186 syncpoint protection

2017-08-20 Thread Dmitry Osipenko
On 18.08.2017 19:15, Mikko Perttunen wrote:
> Since Tegra186 the Host1x hardware allows syncpoints to be assigned to
> specific channels, preventing any other channels from incrementing
> them.
> 
> Enable this feature where available and assign syncpoints to channels
> when submitting a job. Syncpoints are currently never unassigned from
> channels since that would require extra work and is unnecessary with
> the current channel allocation model.
> 
> Signed-off-by: Mikko Perttunen 
> ---
>  drivers/gpu/host1x/dev.h   | 16 
>  drivers/gpu/host1x/hw/channel_hw.c |  3 +++
>  drivers/gpu/host1x/hw/syncpt_hw.c  | 26 ++
>  drivers/gpu/host1x/syncpt.c|  3 +++
>  4 files changed, 48 insertions(+)
> 
> diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
> index def802c0a6bf..2432a30ff6e2 100644
> --- a/drivers/gpu/host1x/dev.h
> +++ b/drivers/gpu/host1x/dev.h
> @@ -79,6 +79,9 @@ struct host1x_syncpt_ops {
>   u32 (*load)(struct host1x_syncpt *syncpt);
>   int (*cpu_incr)(struct host1x_syncpt *syncpt);
>   int (*patch_wait)(struct host1x_syncpt *syncpt, void *patch_addr);
> + void (*assign_channel)(struct host1x_syncpt *syncpt,
> +struct host1x_channel *channel);
> + void (*set_protection)(struct host1x *host, bool enabled);
>  };
>  
>  struct host1x_intr_ops {
> @@ -186,6 +189,19 @@ static inline int host1x_hw_syncpt_patch_wait(struct 
> host1x *host,
>   return host->syncpt_op->patch_wait(sp, patch_addr);
>  }
>  
> +static inline void host1x_hw_syncpt_assign_channel(struct host1x *host,
> +struct host1x_syncpt *sp,
> +struct host1x_channel *ch)
> +{
> + return host->syncpt_op->assign_channel(sp, ch);
> +}
> +
> +static inline void host1x_hw_syncpt_set_protection(struct host1x *host,
> +bool enabled)
> +{
> + return host->syncpt_op->set_protection(host, enabled);
> +}
> +
>  static inline int host1x_hw_intr_init_host_sync(struct host1x *host, u32 cpm,
>   void (*syncpt_thresh_work)(struct work_struct *))
>  {
> diff --git a/drivers/gpu/host1x/hw/channel_hw.c 
> b/drivers/gpu/host1x/hw/channel_hw.c
> index 8447a56c41ca..0161da331702 100644
> --- a/drivers/gpu/host1x/hw/channel_hw.c
> +++ b/drivers/gpu/host1x/hw/channel_hw.c
> @@ -147,6 +147,9 @@ static int channel_submit(struct host1x_job *job)
>  
>   syncval = host1x_syncpt_incr_max(sp, user_syncpt_incrs);
>  
> + /* assign syncpoint to channel */
> + host1x_hw_syncpt_assign_channel(host, sp, ch);
> +
>   job->syncpt_end = syncval;
>  
>   /* add a setclass for modules that require it */
> diff --git a/drivers/gpu/host1x/hw/syncpt_hw.c 
> b/drivers/gpu/host1x/hw/syncpt_hw.c
> index 7b0270d60742..5d117ab1699e 100644
> --- a/drivers/gpu/host1x/hw/syncpt_hw.c
> +++ b/drivers/gpu/host1x/hw/syncpt_hw.c
> @@ -106,6 +106,30 @@ static int syncpt_patch_wait(struct host1x_syncpt *sp, 
> void *patch_addr)
>   return 0;
>  }
>  
> +static void syncpt_assign_channel(struct host1x_syncpt *sp,
> +   struct host1x_channel *ch)
> +{
> +#if HOST1X_HW >= 6
> + struct host1x *host = sp->host;
> +
> + if (!host->hv_regs)
> + return;
> +
> + host1x_sync_writel(host,
> +HOST1X_SYNC_SYNCPT_CH_APP_CH(ch ? ch->id : 0xff),
> +HOST1X_SYNC_SYNCPT_CH_APP(sp->id));
> +#endif
> +}
> +
> +static void syncpt_set_protection(struct host1x *host, bool enabled)
> +{
> +#if HOST1X_HW >= 6
> + host1x_hypervisor_writel(host,
> +  enabled ? HOST1X_HV_SYNCPT_PROT_EN_CH_EN : 0,
> +  HOST1X_HV_SYNCPT_PROT_EN);
> +#endif
> +}
> +
>  static const struct host1x_syncpt_ops host1x_syncpt_ops = {
>   .restore = syncpt_restore,
>   .restore_wait_base = syncpt_restore_wait_base,
> @@ -113,4 +137,6 @@ static const struct host1x_syncpt_ops host1x_syncpt_ops = 
> {
>   .load = syncpt_load,
>   .cpu_incr = syncpt_cpu_incr,
>   .patch_wait = syncpt_patch_wait,
> + .assign_channel = syncpt_assign_channel,
> + .set_protection = syncpt_set_protection,
>  };
> diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
> index 048ac9e344ce..fe4d963b3e2a 100644
> --- a/drivers/gpu/host1x/syncpt.c
> +++ b/drivers/gpu/host1x/syncpt.c
> @@ -398,6 +398,8 @@ int host1x_syncpt_init(struct host1x *host)
>   for (i = 0; i < host->info->nb_pts; i++) {
>   syncpt[i].id = i;
>   syncpt[i].host = host;
> +
> + host1x_hw_syncpt_assign_channel(host, [i], NULL);
>   }
>  
>   for (i = 0; i < host->info->nb_bases; i++)
> @@ -408,6 +410,7 @@ int host1x_syncpt_init(struct host1x *host)
>   host->bases = bases;
>  
>   host1x_syncpt_restore(host);
> + 

Re: [PATCH 2/4] gpu: host1x: Enable gather filter

2017-08-20 Thread Dmitry Osipenko
On 18.08.2017 19:15, Mikko Perttunen wrote:
> The gather filter is a feature present on Tegra124 and newer where the
> hardware prevents GATHERed command buffers from executing commands
> normally reserved for the CDMA pushbuffer which is maintained by the
> kernel driver.
> 
> This commit enables the gather filter on all supporting hardware.
> 
> Signed-off-by: Mikko Perttunen 
> ---
>  drivers/gpu/host1x/hw/channel_hw.c  | 22 ++
>  drivers/gpu/host1x/hw/hw_host1x04_channel.h | 12 
>  drivers/gpu/host1x/hw/hw_host1x05_channel.h | 12 
>  3 files changed, 46 insertions(+)
> 
> diff --git a/drivers/gpu/host1x/hw/channel_hw.c 
> b/drivers/gpu/host1x/hw/channel_hw.c
> index 0161da331702..5c0dc6bb51d1 100644
> --- a/drivers/gpu/host1x/hw/channel_hw.c
> +++ b/drivers/gpu/host1x/hw/channel_hw.c
> @@ -181,10 +181,32 @@ static int channel_submit(struct host1x_job *job)
>   return err;
>  }
>  
> +static void enable_gather_filter(struct host1x *host,
> +  struct host1x_channel *ch)
> +{
> +#if HOST1X_HW >= 6
> + u32 val;
> +
> + if (!host->hv_regs)
> + return;

Is it really possible that gather filter could be not present on HW without
hypervisor? Maybe there is other way to enable it in that case?

Is possible at all that hypervisor could be missed?

> +
> + val = host1x_hypervisor_readl(
> + host, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32));
> + val |= BIT(ch->id % 32);
> + host1x_hypervisor_writel(
> + host, val, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32));
> +#elif HOST1X_HW >= 4
> + host1x_ch_writel(ch,
> +  HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(1),
> +  HOST1X_CHANNEL_CHANNELCTRL);
> +#endif
> +}
> +
>  static int host1x_channel_init(struct host1x_channel *ch, struct host1x *dev,
>  unsigned int index)
>  {
>   ch->regs = dev->regs + index * HOST1X_CHANNEL_SIZE;
> + enable_gather_filter(dev, ch);
>   return 0;
>  }
>  
> diff --git a/drivers/gpu/host1x/hw/hw_host1x04_channel.h 
> b/drivers/gpu/host1x/hw/hw_host1x04_channel.h
> index 95e6f96142b9..2e8b635aa660 100644
> --- a/drivers/gpu/host1x/hw/hw_host1x04_channel.h
> +++ b/drivers/gpu/host1x/hw/hw_host1x04_channel.h
> @@ -117,5 +117,17 @@ static inline u32 host1x_channel_dmactrl_dmainitget(void)
>  }
>  #define HOST1X_CHANNEL_DMACTRL_DMAINITGET \
>   host1x_channel_dmactrl_dmainitget()
> +static inline u32 host1x_channel_channelctrl_r(void)
> +{
> + return 0x98;
> +}
> +#define HOST1X_CHANNEL_CHANNELCTRL \
> + host1x_channel_channelctrl_r()
> +static inline u32 host1x_channel_channelctrl_kernel_filter_gbuffer_f(u32 v)
> +{
> + return (v & 0x1) << 2;
> +}
> +#define HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(v) \
> + host1x_channel_channelctrl_kernel_filter_gbuffer_f(v)
>  
>  #endif
> diff --git a/drivers/gpu/host1x/hw/hw_host1x05_channel.h 
> b/drivers/gpu/host1x/hw/hw_host1x05_channel.h
> index fce6e2c1ff4c..abbbc2641ce6 100644
> --- a/drivers/gpu/host1x/hw/hw_host1x05_channel.h
> +++ b/drivers/gpu/host1x/hw/hw_host1x05_channel.h
> @@ -117,5 +117,17 @@ static inline u32 host1x_channel_dmactrl_dmainitget(void)
>  }
>  #define HOST1X_CHANNEL_DMACTRL_DMAINITGET \
>   host1x_channel_dmactrl_dmainitget()
> +static inline u32 host1x_channel_channelctrl_r(void)
> +{
> + return 0x98;
> +}
> +#define HOST1X_CHANNEL_CHANNELCTRL \
> + host1x_channel_channelctrl_r()
> +static inline u32 host1x_channel_channelctrl_kernel_filter_gbuffer_f(u32 v)
> +{
> + return (v & 0x1) << 2;
> +}
> +#define HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(v) \
> + host1x_channel_channelctrl_kernel_filter_gbuffer_f(v)
>  
>  #endif
> 


-- 
Dmitry


Re: [PATCH 2/4] gpu: host1x: Enable gather filter

2017-08-20 Thread Dmitry Osipenko
On 20.08.2017 19:24, Dmitry Osipenko wrote:
> On 18.08.2017 19:15, Mikko Perttunen wrote:
>> The gather filter is a feature present on Tegra124 and newer where the
>> hardware prevents GATHERed command buffers from executing commands
>> normally reserved for the CDMA pushbuffer which is maintained by the
>> kernel driver.
>>
>> This commit enables the gather filter on all supporting hardware.
>>
>> Signed-off-by: Mikko Perttunen 
>> ---
>>  drivers/gpu/host1x/hw/channel_hw.c  | 22 ++
>>  drivers/gpu/host1x/hw/hw_host1x04_channel.h | 12 
>>  drivers/gpu/host1x/hw/hw_host1x05_channel.h | 12 
>>  3 files changed, 46 insertions(+)
>>
>> diff --git a/drivers/gpu/host1x/hw/channel_hw.c 
>> b/drivers/gpu/host1x/hw/channel_hw.c
>> index 0161da331702..5c0dc6bb51d1 100644
>> --- a/drivers/gpu/host1x/hw/channel_hw.c
>> +++ b/drivers/gpu/host1x/hw/channel_hw.c
>> @@ -181,10 +181,32 @@ static int channel_submit(struct host1x_job *job)
>>  return err;
>>  }
>>  
>> +static void enable_gather_filter(struct host1x *host,
>> + struct host1x_channel *ch)
>> +{
>> +#if HOST1X_HW >= 6
>> +u32 val;
>> +
>> +if (!host->hv_regs)
>> +return;
> 
> Is it really possible that gather filter could be not present on HW without
> hypervisor? Maybe there is other way to enable it in that case?
> 
> Is possible at all that hypervisor could be missed?

BTW, this is also incoherent with the 'syncpoint protection' patch which doesn't
check for hypervisor presence.

-- 
Dmitry


Re: [PATCH 1/4] gpu: host1x: Enable Tegra186 syncpoint protection

2017-08-20 Thread Dmitry Osipenko
On 18.08.2017 19:15, Mikko Perttunen wrote:
> Since Tegra186 the Host1x hardware allows syncpoints to be assigned to
> specific channels, preventing any other channels from incrementing
> them.
> 
> Enable this feature where available and assign syncpoints to channels
> when submitting a job. Syncpoints are currently never unassigned from
> channels since that would require extra work and is unnecessary with
> the current channel allocation model.
> 
> Signed-off-by: Mikko Perttunen 
> ---
>  drivers/gpu/host1x/dev.h   | 16 
>  drivers/gpu/host1x/hw/channel_hw.c |  3 +++
>  drivers/gpu/host1x/hw/syncpt_hw.c  | 26 ++
>  drivers/gpu/host1x/syncpt.c|  3 +++
>  4 files changed, 48 insertions(+)
> 
> diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
> index def802c0a6bf..2432a30ff6e2 100644
> --- a/drivers/gpu/host1x/dev.h
> +++ b/drivers/gpu/host1x/dev.h
> @@ -79,6 +79,9 @@ struct host1x_syncpt_ops {
>   u32 (*load)(struct host1x_syncpt *syncpt);
>   int (*cpu_incr)(struct host1x_syncpt *syncpt);
>   int (*patch_wait)(struct host1x_syncpt *syncpt, void *patch_addr);
> + void (*assign_channel)(struct host1x_syncpt *syncpt,
> +struct host1x_channel *channel);
> + void (*set_protection)(struct host1x *host, bool enabled);
>  };
>  
>  struct host1x_intr_ops {
> @@ -186,6 +189,19 @@ static inline int host1x_hw_syncpt_patch_wait(struct 
> host1x *host,
>   return host->syncpt_op->patch_wait(sp, patch_addr);
>  }
>  
> +static inline void host1x_hw_syncpt_assign_channel(struct host1x *host,
> +struct host1x_syncpt *sp,
> +struct host1x_channel *ch)
> +{
> + return host->syncpt_op->assign_channel(sp, ch);
> +}
> +
> +static inline void host1x_hw_syncpt_set_protection(struct host1x *host,
> +bool enabled)
> +{
> + return host->syncpt_op->set_protection(host, enabled);
> +}
> +
>  static inline int host1x_hw_intr_init_host_sync(struct host1x *host, u32 cpm,
>   void (*syncpt_thresh_work)(struct work_struct *))
>  {
> diff --git a/drivers/gpu/host1x/hw/channel_hw.c 
> b/drivers/gpu/host1x/hw/channel_hw.c
> index 8447a56c41ca..0161da331702 100644
> --- a/drivers/gpu/host1x/hw/channel_hw.c
> +++ b/drivers/gpu/host1x/hw/channel_hw.c
> @@ -147,6 +147,9 @@ static int channel_submit(struct host1x_job *job)
>  
>   syncval = host1x_syncpt_incr_max(sp, user_syncpt_incrs);
>  
> + /* assign syncpoint to channel */
> + host1x_hw_syncpt_assign_channel(host, sp, ch);
> +
>   job->syncpt_end = syncval;
>  
>   /* add a setclass for modules that require it */
> diff --git a/drivers/gpu/host1x/hw/syncpt_hw.c 
> b/drivers/gpu/host1x/hw/syncpt_hw.c
> index 7b0270d60742..5d117ab1699e 100644
> --- a/drivers/gpu/host1x/hw/syncpt_hw.c
> +++ b/drivers/gpu/host1x/hw/syncpt_hw.c
> @@ -106,6 +106,30 @@ static int syncpt_patch_wait(struct host1x_syncpt *sp, 
> void *patch_addr)
>   return 0;
>  }
>  
> +static void syncpt_assign_channel(struct host1x_syncpt *sp,
> +   struct host1x_channel *ch)
> +{
> +#if HOST1X_HW >= 6
> + struct host1x *host = sp->host;
> +
> + if (!host->hv_regs)
> + return;

This check should be placed in syncpt_set_protection().

> +
> + host1x_sync_writel(host,
> +HOST1X_SYNC_SYNCPT_CH_APP_CH(ch ? ch->id : 0xff),
> +HOST1X_SYNC_SYNCPT_CH_APP(sp->id));
> +#endif
> +}
> +
> +static void syncpt_set_protection(struct host1x *host, bool enabled)
> +{
> +#if HOST1X_HW >= 6
> + host1x_hypervisor_writel(host,
> +  enabled ? HOST1X_HV_SYNCPT_PROT_EN_CH_EN : 0,
> +  HOST1X_HV_SYNCPT_PROT_EN);
> +#endif
> +}
> +
>  static const struct host1x_syncpt_ops host1x_syncpt_ops = {
>   .restore = syncpt_restore,
>   .restore_wait_base = syncpt_restore_wait_base,
> @@ -113,4 +137,6 @@ static const struct host1x_syncpt_ops host1x_syncpt_ops = 
> {
>   .load = syncpt_load,
>   .cpu_incr = syncpt_cpu_incr,
>   .patch_wait = syncpt_patch_wait,
> + .assign_channel = syncpt_assign_channel,
> + .set_protection = syncpt_set_protection,
>  };
> diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
> index 048ac9e344ce..fe4d963b3e2a 100644
> --- a/drivers/gpu/host1x/syncpt.c
> +++ b/drivers/gpu/host1x/syncpt.c
> @@ -398,6 +398,8 @@ int host1x_syncpt_init(struct host1x *host)
>   for (i = 0; i < host->info->nb_pts; i++) {
>   syncpt[i].id = i;
>   syncpt[i].host = host;
> +
> + host1x_hw_syncpt_assign_channel(host, [i], NULL);
>   }
>  
>   for (i = 0; i < host->info->nb_bases; i++)
> @@ -408,6 +410,7 @@ int host1x_syncpt_init(struct host1x *host)
>   host->bases = bases;
>  
>   

Re: [PATCH 2/4] gpu: host1x: Enable gather filter

2017-08-20 Thread Dmitry Osipenko
On 20.08.2017 19:44, Dmitry Osipenko wrote:
> On 20.08.2017 19:24, Dmitry Osipenko wrote:
>> On 18.08.2017 19:15, Mikko Perttunen wrote:
>>> The gather filter is a feature present on Tegra124 and newer where the
>>> hardware prevents GATHERed command buffers from executing commands
>>> normally reserved for the CDMA pushbuffer which is maintained by the
>>> kernel driver.
>>>
>>> This commit enables the gather filter on all supporting hardware.
>>>
>>> Signed-off-by: Mikko Perttunen 
>>> ---
>>>  drivers/gpu/host1x/hw/channel_hw.c  | 22 ++
>>>  drivers/gpu/host1x/hw/hw_host1x04_channel.h | 12 
>>>  drivers/gpu/host1x/hw/hw_host1x05_channel.h | 12 
>>>  3 files changed, 46 insertions(+)
>>>
>>> diff --git a/drivers/gpu/host1x/hw/channel_hw.c 
>>> b/drivers/gpu/host1x/hw/channel_hw.c
>>> index 0161da331702..5c0dc6bb51d1 100644
>>> --- a/drivers/gpu/host1x/hw/channel_hw.c
>>> +++ b/drivers/gpu/host1x/hw/channel_hw.c
>>> @@ -181,10 +181,32 @@ static int channel_submit(struct host1x_job *job)
>>> return err;
>>>  }
>>>  
>>> +static void enable_gather_filter(struct host1x *host,
>>> +struct host1x_channel *ch)
>>> +{
>>> +#if HOST1X_HW >= 6
>>> +   u32 val;
>>> +
>>> +   if (!host->hv_regs)
>>> +   return;
>>
>> Is it really possible that gather filter could be not present on HW without
>> hypervisor? Maybe there is other way to enable it in that case?
>>
>> Is possible at all that hypervisor could be missed?
> 
> BTW, this is also incoherent with the 'syncpoint protection' patch which 
> doesn't
> check for hypervisor presence.
> 

However, I noticed that check and it's wrongly placed ;) See comment to the
'syncpoint protection' patch.

-- 
Dmitry


Re: [PATCH 1/4] gpu: host1x: Enable Tegra186 syncpoint protection

2017-08-20 Thread Dmitry Osipenko
On 18.08.2017 19:15, Mikko Perttunen wrote:
> Since Tegra186 the Host1x hardware allows syncpoints to be assigned to
> specific channels, preventing any other channels from incrementing
> them.
> 
> Enable this feature where available and assign syncpoints to channels
> when submitting a job. Syncpoints are currently never unassigned from
> channels since that would require extra work and is unnecessary with
> the current channel allocation model.
> 
> Signed-off-by: Mikko Perttunen 
> ---
>  drivers/gpu/host1x/dev.h   | 16 
>  drivers/gpu/host1x/hw/channel_hw.c |  3 +++
>  drivers/gpu/host1x/hw/syncpt_hw.c  | 26 ++
>  drivers/gpu/host1x/syncpt.c|  3 +++
>  4 files changed, 48 insertions(+)
> 
> diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
> index def802c0a6bf..2432a30ff6e2 100644
> --- a/drivers/gpu/host1x/dev.h
> +++ b/drivers/gpu/host1x/dev.h
> @@ -79,6 +79,9 @@ struct host1x_syncpt_ops {
>   u32 (*load)(struct host1x_syncpt *syncpt);
>   int (*cpu_incr)(struct host1x_syncpt *syncpt);
>   int (*patch_wait)(struct host1x_syncpt *syncpt, void *patch_addr);
> + void (*assign_channel)(struct host1x_syncpt *syncpt,
> +struct host1x_channel *channel);
> + void (*set_protection)(struct host1x *host, bool enabled);
>  };
>  
>  struct host1x_intr_ops {
> @@ -186,6 +189,19 @@ static inline int host1x_hw_syncpt_patch_wait(struct 
> host1x *host,
>   return host->syncpt_op->patch_wait(sp, patch_addr);
>  }
>  
> +static inline void host1x_hw_syncpt_assign_channel(struct host1x *host,
> +struct host1x_syncpt *sp,
> +struct host1x_channel *ch)
> +{
> + return host->syncpt_op->assign_channel(sp, ch);
> +}
> +
> +static inline void host1x_hw_syncpt_set_protection(struct host1x *host,
> +bool enabled)
> +{
> + return host->syncpt_op->set_protection(host, enabled);
> +}
> +
>  static inline int host1x_hw_intr_init_host_sync(struct host1x *host, u32 cpm,
>   void (*syncpt_thresh_work)(struct work_struct *))
>  {
> diff --git a/drivers/gpu/host1x/hw/channel_hw.c 
> b/drivers/gpu/host1x/hw/channel_hw.c
> index 8447a56c41ca..0161da331702 100644
> --- a/drivers/gpu/host1x/hw/channel_hw.c
> +++ b/drivers/gpu/host1x/hw/channel_hw.c
> @@ -147,6 +147,9 @@ static int channel_submit(struct host1x_job *job)
>  
>   syncval = host1x_syncpt_incr_max(sp, user_syncpt_incrs);
>  
> + /* assign syncpoint to channel */
> + host1x_hw_syncpt_assign_channel(host, sp, ch);
> +

Since there is one client per channel, it probably would make sense to assign
client syncpoints on host1x_channel_request().

>   job->syncpt_end = syncval;
>  
>   /* add a setclass for modules that require it */
> diff --git a/drivers/gpu/host1x/hw/syncpt_hw.c 
> b/drivers/gpu/host1x/hw/syncpt_hw.c
> index 7b0270d60742..5d117ab1699e 100644
> --- a/drivers/gpu/host1x/hw/syncpt_hw.c
> +++ b/drivers/gpu/host1x/hw/syncpt_hw.c
> @@ -106,6 +106,30 @@ static int syncpt_patch_wait(struct host1x_syncpt *sp, 
> void *patch_addr)
>   return 0;
>  }
>  
> +static void syncpt_assign_channel(struct host1x_syncpt *sp,
> +   struct host1x_channel *ch)
> +{
> +#if HOST1X_HW >= 6
> + struct host1x *host = sp->host;
> +
> + if (!host->hv_regs)
> + return;
> +
> + host1x_sync_writel(host,
> +HOST1X_SYNC_SYNCPT_CH_APP_CH(ch ? ch->id : 0xff),
> +HOST1X_SYNC_SYNCPT_CH_APP(sp->id));
> +#endif
> +}
> +
> +static void syncpt_set_protection(struct host1x *host, bool enabled)
> +{
> +#if HOST1X_HW >= 6
> + host1x_hypervisor_writel(host,
> +  enabled ? HOST1X_HV_SYNCPT_PROT_EN_CH_EN : 0,
> +  HOST1X_HV_SYNCPT_PROT_EN);
> +#endif
> +}
> +
>  static const struct host1x_syncpt_ops host1x_syncpt_ops = {
>   .restore = syncpt_restore,
>   .restore_wait_base = syncpt_restore_wait_base,
> @@ -113,4 +137,6 @@ static const struct host1x_syncpt_ops host1x_syncpt_ops = 
> {
>   .load = syncpt_load,
>   .cpu_incr = syncpt_cpu_incr,
>   .patch_wait = syncpt_patch_wait,
> + .assign_channel = syncpt_assign_channel,
> + .set_protection = syncpt_set_protection,
>  };
> diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
> index 048ac9e344ce..fe4d963b3e2a 100644
> --- a/drivers/gpu/host1x/syncpt.c
> +++ b/drivers/gpu/host1x/syncpt.c
> @@ -398,6 +398,8 @@ int host1x_syncpt_init(struct host1x *host)
>   for (i = 0; i < host->info->nb_pts; i++) {
>   syncpt[i].id = i;
>   syncpt[i].host = host;
> +
> + host1x_hw_syncpt_assign_channel(host, [i], NULL);
>   }
>  
>   for (i = 0; i < host->info->nb_bases; i++)
> @@ -408,6 +410,7 @@ int 

Re: [PATCH v2 6/6] drm/tegra: Use u64_to_user_ptr helper

2017-09-05 Thread Dmitry Osipenko
On 05.09.2017 11:10, Mikko Perttunen wrote:
> Use the u64_to_user_ptr helper macro to cast IOCTL argument u64 values
> to user pointers instead of writing out the cast manually.
> 
> Signed-off-by: Mikko Perttunen 
> ---

This patch doesn't apply to linux-next, you should probably rebase this series.

>  drivers/gpu/drm/tegra/drm.c | 18 ++
>  1 file changed, 10 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
> index e3331a2bc082..72d5c0021540 100644
> --- a/drivers/gpu/drm/tegra/drm.c
> +++ b/drivers/gpu/drm/tegra/drm.c
> @@ -388,18 +388,21 @@ int tegra_drm_submit(struct tegra_drm_context *context,
>   unsigned int num_cmdbufs = args->num_cmdbufs;
>   unsigned int num_relocs = args->num_relocs;
>   unsigned int num_waitchks = args->num_waitchks;
> - struct drm_tegra_cmdbuf __user *cmdbufs =
> - (void __user *)(uintptr_t)args->cmdbufs;
> - struct drm_tegra_reloc __user *relocs =
> - (void __user *)(uintptr_t)args->relocs;
> - struct drm_tegra_waitchk __user *waitchks =
> - (void __user *)(uintptr_t)args->waitchks;
> + struct drm_tegra_cmdbuf __user *cmdbufs;
> + struct drm_tegra_reloc __user *relocs;
> + struct drm_tegra_waitchk __user *waitchks;
> + struct drm_tegra_syncpt __user *user_syncpt;
>   struct drm_tegra_syncpt syncpt;
>   struct host1x *host1x = dev_get_drvdata(drm->dev->parent);
>   struct host1x_syncpt *sp;
>   struct host1x_job *job;
>   int err;
>  
> + cmdbufs = u64_to_user_ptr(args->cmdbufs);
> + relocs = u64_to_user_ptr(args->relocs);
> + waitchks = u64_to_user_ptr(args->waitchks);

What about to prefix these variables with 'user_' for consistency?

> + user_syncpt = u64_to_user_ptr(args->syncpts);
> +
>   /* We don't yet support other than one syncpt_incr struct per submit */
>   if (args->num_syncpts != 1)
>   return -EINVAL;
> @@ -520,8 +523,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
>   }
>   }
>  
> - if (copy_from_user(, (void __user *)(uintptr_t)args->syncpts,
> -sizeof(syncpt))) {
> + if (copy_from_user(, user_syncpt, sizeof(syncpt))) {
>   err = -EFAULT;
>   goto fail;
>   }
> 


-- 
Dmitry


Re: [PATCH v2 4/6] gpu: host1x: Disassemble more instructions

2017-09-05 Thread Dmitry Osipenko
On 05.09.2017 11:10, Mikko Perttunen wrote:
> The disassembler for debug dumps was missing some newer host1x opcodes.
> Add disassembly support for these.
> 
> Signed-off-by: Mikko Perttunen 
> ---
>  drivers/gpu/host1x/hw/debug_hw.c  | 57 
> ---
>  drivers/gpu/host1x/hw/debug_hw_1x01.c |  3 +-
>  drivers/gpu/host1x/hw/debug_hw_1x06.c |  3 +-
>  3 files changed, 57 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/host1x/hw/debug_hw.c 
> b/drivers/gpu/host1x/hw/debug_hw.c
> index 1e67667e308c..de2a0ba7a32d 100644
> --- a/drivers/gpu/host1x/hw/debug_hw.c
> +++ b/drivers/gpu/host1x/hw/debug_hw.c
> @@ -30,6 +30,13 @@ enum {
>   HOST1X_OPCODE_IMM   = 0x04,
>   HOST1X_OPCODE_RESTART   = 0x05,
>   HOST1X_OPCODE_GATHER= 0x06,
> + HOST1X_OPCODE_SETSTRMID = 0x07,
> + HOST1X_OPCODE_SETAPPID  = 0x08,
> + HOST1X_OPCODE_SETPYLD   = 0x09,
> + HOST1X_OPCODE_INCR_W= 0x0a,
> + HOST1X_OPCODE_NONINCR_W = 0x0b,
> + HOST1X_OPCODE_GATHER_W  = 0x0c,
> + HOST1X_OPCODE_RESTART_W = 0x0d,
>   HOST1X_OPCODE_EXTEND= 0x0e,
>  };
>  
> @@ -38,11 +45,16 @@ enum {
>   HOST1X_OPCODE_EXTEND_RELEASE_MLOCK  = 0x01,
>  };
>  
> -static unsigned int show_channel_command(struct output *o, u32 val)
> +#define INVALID_PAYLOAD  0x
> +
> +static unsigned int show_channel_command(struct output *o, u32 val,
> +  u32 *payload)
>  {
> - unsigned int mask, subop, num;
> + unsigned int mask, subop, num, opcode;
> +
> + opcode = val >> 28;
>  
> - switch (val >> 28) {
> + switch (opcode) {
>   case HOST1X_OPCODE_SETCLASS:
>   mask = val & 0x3f;
>   if (mask) {
> @@ -97,6 +109,42 @@ static unsigned int show_channel_command(struct output 
> *o, u32 val)
>   val >> 14 & 0x1, val & 0x3fff);
>   return 1;
>  

Opcodes below aren't relevant to older Tegra's, seems "#if HOST1X_HW >= 6"
should be added here.

> + case HOST1X_OPCODE_SETSTRMID:
> + host1x_debug_cont(o, "SETSTRMID(offset=%06x)\n",
> +   val & 0x3f);
> + return 0;
> +
> + case HOST1X_OPCODE_SETAPPID:
> + host1x_debug_cont(o, "SETAPPID(appid=%02x)\n", val & 0xff);
> + return 0;
> +
> + case HOST1X_OPCODE_SETPYLD:
> + *payload = val & 0x;
> + host1x_debug_cont(o, "SETPYLD(data=%04x)\n", *payload);
> + return 0;
> +
> + case HOST1X_OPCODE_INCR_W:
> + case HOST1X_OPCODE_NONINCR_W:
> + host1x_debug_cont(o, "%s(offset=%06x, ",
> +   opcode == HOST1X_OPCODE_INCR_W ?
> + "INCR_W" : "NONINCR_W",
> +   val & 0x3f);
> + if (*payload == 0) {
> + host1x_debug_cont(o, "[])\n");
> + return 0;
> + } else if (*payload == INVALID_PAYLOAD) {
> + host1x_debug_cont(o, "unknown)\n");
> + return 0;
> + } else {
> + host1x_debug_cont(o, "[");
> + return *payload;
> + }
> +
> + case HOST1X_OPCODE_GATHER_W:
> + host1x_debug_cont(o, "GATHER_W(count=%04x, addr=[",
> +   val & 0x3fff);
> + return 2;
> +
>   case HOST1X_OPCODE_EXTEND:
>   subop = val >> 24 & 0xf;
>   if (subop == HOST1X_OPCODE_EXTEND_ACQUIRE_MLOCK)
> @@ -122,6 +170,7 @@ static void show_gather(struct output *o, phys_addr_t 
> phys_addr,
>   /* Map dmaget cursor to corresponding mem handle */
>   u32 offset = phys_addr - pin_addr;
>   unsigned int data_count = 0, i;
> + u32 payload = INVALID_PAYLOAD;
>  
>   /*
>* Sometimes we're given different hardware address to the same
> @@ -139,7 +188,7 @@ static void show_gather(struct output *o, phys_addr_t 
> phys_addr,
>  
>   if (!data_count) {
>   host1x_debug_output(o, "%08x: %08x: ", addr, val);
> - data_count = show_channel_command(o, val);
> + data_count = show_channel_command(o, val, );
>   } else {
>   host1x_debug_cont(o, "%08x%s", val,
>   data_count > 1 ? ", " : "])\n");
> diff --git a/drivers/gpu/host1x/hw/debug_hw_1x01.c 
> b/drivers/gpu/host1x/hw/debug_hw_1x01.c
> index 09e1aa7bb5dd..7d1401c6c193 100644
> --- a/drivers/gpu/host1x/hw/debug_hw_1x01.c
> +++ b/drivers/gpu/host1x/hw/debug_hw_1x01.c
> @@ -78,6 +78,7 @@ static void host1x_debug_show_channel_fifo(struct host1x 
> *host,
>  struct output *o)
>  {
>   u32 val, rd_ptr, wr_ptr, start, end;
> + u32 payload = INVALID_PAYLOAD;
>   unsigned int data_count = 0;
>  
>   

Re: [PATCH v2 5/6] gpu: host1x: Fix incorrect comment for channel_request

2017-09-05 Thread Dmitry Osipenko
On 05.09.2017 11:10, Mikko Perttunen wrote:
> This function actually doesn't sleep in the version that was merged.
> 
> Signed-off-by: Mikko Perttunen 
> ---
>  drivers/gpu/host1x/channel.c | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/host1x/channel.c b/drivers/gpu/host1x/channel.c
> index db9b91d1384c..2fb93c27c1d9 100644
> --- a/drivers/gpu/host1x/channel.c
> +++ b/drivers/gpu/host1x/channel.c
> @@ -128,8 +128,7 @@ static struct host1x_channel 
> *acquire_unused_channel(struct host1x *host)
>   * host1x_channel_request() - Allocate a channel
>   * @device: Host1x unit this channel will be used to send commands to
>   *
> - * Allocates a new host1x channel for @device. If there are no free channels,
> - * this will sleep until one becomes available. May return NULL if CDMA
> + * Allocates a new host1x channel for @device. May return NULL if CDMA
>   * initialization fails.
>   */
>  struct host1x_channel *host1x_channel_request(struct device *dev)
> 

Reviewed-by: Dmitry Osipenko 

-- 
Dmitry


Re: [PATCH v2 1/6] gpu: host1x: Enable Tegra186 syncpoint protection

2017-09-05 Thread Dmitry Osipenko
On 05.09.2017 11:10, Mikko Perttunen wrote:
> Since Tegra186 the Host1x hardware allows syncpoints to be assigned to
> specific channels, preventing any other channels from incrementing
> them.
> 
> Enable this feature where available and assign syncpoints to channels
> when submitting a job. Syncpoints are currently never unassigned from
> channels since that would require extra work and is unnecessary with
> the current channel allocation model.
> 
> Signed-off-by: Mikko Perttunen 
> ---
> 
> Notes:
> v2:
> - Changed from set_protection(bool) to enable_protection
> - Added some comments
> - Added missing check for hv_regs being NULL in
>   enable_protection
> 
>  drivers/gpu/host1x/dev.h   | 15 +
>  drivers/gpu/host1x/hw/channel_hw.c |  3 +++
>  drivers/gpu/host1x/hw/syncpt_hw.c  | 46 
> ++
>  drivers/gpu/host1x/syncpt.c|  8 +++
>  4 files changed, 72 insertions(+)
> 
> diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
> index def802c0a6bf..7497cc5ead9e 100644
> --- a/drivers/gpu/host1x/dev.h
> +++ b/drivers/gpu/host1x/dev.h
> @@ -79,6 +79,9 @@ struct host1x_syncpt_ops {
>   u32 (*load)(struct host1x_syncpt *syncpt);
>   int (*cpu_incr)(struct host1x_syncpt *syncpt);
>   int (*patch_wait)(struct host1x_syncpt *syncpt, void *patch_addr);
> + void (*assign_channel)(struct host1x_syncpt *syncpt,
> +struct host1x_channel *channel);
> + void (*enable_protection)(struct host1x *host);
>  };
>  
>  struct host1x_intr_ops {
> @@ -186,6 +189,18 @@ static inline int host1x_hw_syncpt_patch_wait(struct 
> host1x *host,
>   return host->syncpt_op->patch_wait(sp, patch_addr);
>  }
>  
> +static inline void host1x_hw_syncpt_assign_channel(struct host1x *host,
> +struct host1x_syncpt *sp,
> +struct host1x_channel *ch)
> +{
> + return host->syncpt_op->assign_channel(sp, ch);
> +}
> +
> +static inline void host1x_hw_syncpt_enable_protection(struct host1x *host)
> +{
> + return host->syncpt_op->enable_protection(host);
> +}
> +
>  static inline int host1x_hw_intr_init_host_sync(struct host1x *host, u32 cpm,
>   void (*syncpt_thresh_work)(struct work_struct *))
>  {
> diff --git a/drivers/gpu/host1x/hw/channel_hw.c 
> b/drivers/gpu/host1x/hw/channel_hw.c
> index 8447a56c41ca..0161da331702 100644
> --- a/drivers/gpu/host1x/hw/channel_hw.c
> +++ b/drivers/gpu/host1x/hw/channel_hw.c
> @@ -147,6 +147,9 @@ static int channel_submit(struct host1x_job *job)
>  
>   syncval = host1x_syncpt_incr_max(sp, user_syncpt_incrs);
>  
> + /* assign syncpoint to channel */
> + host1x_hw_syncpt_assign_channel(host, sp, ch);

This function could be renamed to host1x_hw_assign_syncpt_to_channel() and then
comment to it won't be needed.

It is not very nice that channel would be re-assigned on each submit. Maybe that
assignment should be done by host1x_syncpt_request() ?

> +
>   job->syncpt_end = syncval;
>  
>   /* add a setclass for modules that require it */
> diff --git a/drivers/gpu/host1x/hw/syncpt_hw.c 
> b/drivers/gpu/host1x/hw/syncpt_hw.c
> index 7b0270d60742..dc7a44614fef 100644
> --- a/drivers/gpu/host1x/hw/syncpt_hw.c
> +++ b/drivers/gpu/host1x/hw/syncpt_hw.c
> @@ -106,6 +106,50 @@ static int syncpt_patch_wait(struct host1x_syncpt *sp, 
> void *patch_addr)
>   return 0;
>  }
>  
> +/**
> + * syncpt_assign_channel() - Assign syncpoint to channel
> + * @sp: syncpoint
> + * @ch: channel
> + *
> + * On chips with the syncpoint protection feature (Tegra186+), assign @sp to
> + * @ch, preventing other channels from incrementing the syncpoints. If @ch is
> + * NULL, unassigns the syncpoint.
> + *
> + * On older chips, do nothing.
> + */
> +static void syncpt_assign_channel(struct host1x_syncpt *sp,
> +   struct host1x_channel *ch)
> +{
> +#if HOST1X_HW >= 6
> + struct host1x *host = sp->host;
> +
> + if (!host->hv_regs)
> + return;
> +
> + host1x_sync_writel(host,
> +HOST1X_SYNC_SYNCPT_CH_APP_CH(ch ? ch->id : 0xff),
> +HOST1X_SYNC_SYNCPT_CH_APP(sp->id));
> +#endif
> +}
> +
> +/**
> + * syncpt_enable_protection() - Enable syncpoint protection
> + * @host: host1x instance
> + *
> + * On chips with the syncpoint protection feature (Tegra186+), enable this
> + * feature. On older chips, do nothing.
> + */
> +static void syncpt_enable_protection(struct host1x *host)
> +{
> +#if HOST1X_HW >= 6
> + if (!host->hv_regs)
> + return;
> +
> + host1x_hypervisor_writel(host, HOST1X_HV_SYNCPT_PROT_EN_CH_EN,
> +  HOST1X_HV_SYNCPT_PROT_EN);
> +#endif
> +}
> +
>  static const struct host1x_syncpt_ops host1x_syncpt_ops = {
>   .restore = syncpt_restore,
>   .restore_wait_base = syncpt_restore_wait_base,
> 

[PATCH v4 14/15] memory: tegra: Add Tegra30 memory controller hot resets

2018-04-13 Thread Dmitry Osipenko
Define the table of memory controller hot resets for Tegra30.

Signed-off-by: Dmitry Osipenko 
---
 drivers/memory/tegra/tegra30.c | 33 +
 1 file changed, 33 insertions(+)

diff --git a/drivers/memory/tegra/tegra30.c b/drivers/memory/tegra/tegra30.c
index d2ba50ed0490..bee5314ed404 100644
--- a/drivers/memory/tegra/tegra30.c
+++ b/drivers/memory/tegra/tegra30.c
@@ -960,6 +960,36 @@ static const struct tegra_smmu_soc tegra30_smmu_soc = {
.num_asids = 4,
 };
 
+#define TEGRA30_MC_RESET(_name, _control, _status, _bit)   \
+   {   \
+   .name = #_name, \
+   .id = TEGRA30_MC_RESET_##_name, \
+   .control = _control,\
+   .status = _status,  \
+   .bit = _bit,\
+   }
+
+static const struct tegra_mc_reset tegra30_mc_resets[] = {
+   TEGRA30_MC_RESET(AFI,  0x200, 0x204,  0),
+   TEGRA30_MC_RESET(AVPC, 0x200, 0x204,  1),
+   TEGRA30_MC_RESET(DC,   0x200, 0x204,  2),
+   TEGRA30_MC_RESET(DCB,  0x200, 0x204,  3),
+   TEGRA30_MC_RESET(EPP,  0x200, 0x204,  4),
+   TEGRA30_MC_RESET(2D,   0x200, 0x204,  5),
+   TEGRA30_MC_RESET(HC,   0x200, 0x204,  6),
+   TEGRA30_MC_RESET(HDA,  0x200, 0x204,  7),
+   TEGRA30_MC_RESET(ISP,  0x200, 0x204,  8),
+   TEGRA30_MC_RESET(MPCORE,   0x200, 0x204,  9),
+   TEGRA30_MC_RESET(MPCORELP, 0x200, 0x204, 10),
+   TEGRA30_MC_RESET(MPE,  0x200, 0x204, 11),
+   TEGRA30_MC_RESET(3D,   0x200, 0x204, 12),
+   TEGRA30_MC_RESET(3D2,  0x200, 0x204, 13),
+   TEGRA30_MC_RESET(PPCS, 0x200, 0x204, 14),
+   TEGRA30_MC_RESET(SATA, 0x200, 0x204, 15),
+   TEGRA30_MC_RESET(VDE,  0x200, 0x204, 16),
+   TEGRA30_MC_RESET(VI,   0x200, 0x204, 17),
+};
+
 const struct tegra_mc_soc tegra30_mc_soc = {
.clients = tegra30_mc_clients,
.num_clients = ARRAY_SIZE(tegra30_mc_clients),
@@ -969,4 +999,7 @@ const struct tegra_mc_soc tegra30_mc_soc = {
.smmu = _smmu_soc,
.intmask = MC_INT_INVALID_SMMU_PAGE | MC_INT_SECURITY_VIOLATION |
   MC_INT_DECERR_EMEM,
+   .reset_ops = _mc_reset_ops_common,
+   .resets = tegra30_mc_resets,
+   .num_resets = ARRAY_SIZE(tegra30_mc_resets),
 };
-- 
2.17.0



[PATCH v4 13/15] memory: tegra: Add Tegra114 memory controller hot resets

2018-04-13 Thread Dmitry Osipenko
Define the table of memory controller hot resets for Tegra114.

Signed-off-by: Dmitry Osipenko 
---
 drivers/memory/tegra/tegra114.c | 33 +
 1 file changed, 33 insertions(+)

diff --git a/drivers/memory/tegra/tegra114.c b/drivers/memory/tegra/tegra114.c
index 7560b2f558a7..12528aa3062b 100644
--- a/drivers/memory/tegra/tegra114.c
+++ b/drivers/memory/tegra/tegra114.c
@@ -938,6 +938,36 @@ static const struct tegra_smmu_soc tegra114_smmu_soc = {
.num_asids = 4,
 };
 
+#define TEGRA114_MC_RESET(_name, _control, _status, _bit)  \
+   {   \
+   .name = #_name, \
+   .id = TEGRA114_MC_RESET_##_name,\
+   .control = _control,\
+   .status = _status,  \
+   .bit = _bit,\
+   }
+
+static const struct tegra_mc_reset tegra114_mc_resets[] = {
+   TEGRA114_MC_RESET(AFI,  0x200, 0x204,  0),
+   TEGRA114_MC_RESET(AVPC, 0x200, 0x204,  1),
+   TEGRA114_MC_RESET(DC,   0x200, 0x204,  2),
+   TEGRA114_MC_RESET(DCB,  0x200, 0x204,  3),
+   TEGRA114_MC_RESET(EPP,  0x200, 0x204,  4),
+   TEGRA114_MC_RESET(2D,   0x200, 0x204,  5),
+   TEGRA114_MC_RESET(HC,   0x200, 0x204,  6),
+   TEGRA114_MC_RESET(HDA,  0x200, 0x204,  7),
+   TEGRA114_MC_RESET(ISP,  0x200, 0x204,  8),
+   TEGRA114_MC_RESET(MPCORE,   0x200, 0x204,  9),
+   TEGRA114_MC_RESET(MPCORELP, 0x200, 0x204, 10),
+   TEGRA114_MC_RESET(MPE,  0x200, 0x204, 11),
+   TEGRA114_MC_RESET(3D,   0x200, 0x204, 12),
+   TEGRA114_MC_RESET(3D2,  0x200, 0x204, 13),
+   TEGRA114_MC_RESET(PPCS, 0x200, 0x204, 14),
+   TEGRA114_MC_RESET(SATA, 0x200, 0x204, 15),
+   TEGRA114_MC_RESET(VDE,  0x200, 0x204, 16),
+   TEGRA114_MC_RESET(VI,   0x200, 0x204, 17),
+};
+
 const struct tegra_mc_soc tegra114_mc_soc = {
.clients = tegra114_mc_clients,
.num_clients = ARRAY_SIZE(tegra114_mc_clients),
@@ -947,4 +977,7 @@ const struct tegra_mc_soc tegra114_mc_soc = {
.smmu = _smmu_soc,
.intmask = MC_INT_INVALID_SMMU_PAGE | MC_INT_SECURITY_VIOLATION |
   MC_INT_DECERR_EMEM,
+   .reset_ops = _mc_reset_ops_common,
+   .resets = tegra114_mc_resets,
+   .num_resets = ARRAY_SIZE(tegra114_mc_resets),
 };
-- 
2.17.0



[PATCH v4 11/15] memory: tegra: Add Tegra210 memory controller hot resets

2018-04-13 Thread Dmitry Osipenko
From: Thierry Reding 

Define the table of memory controller hot resets for Tegra210.

Signed-off-by: Thierry Reding 
---
 drivers/memory/tegra/tegra210.c | 45 +
 1 file changed, 45 insertions(+)

diff --git a/drivers/memory/tegra/tegra210.c b/drivers/memory/tegra/tegra210.c
index b729f49ffc8f..d00a77160407 100644
--- a/drivers/memory/tegra/tegra210.c
+++ b/drivers/memory/tegra/tegra210.c
@@ -1080,6 +1080,48 @@ static const struct tegra_smmu_soc tegra210_smmu_soc = {
.num_asids = 128,
 };
 
+#define TEGRA210_MC_RESET(_name, _control, _status, _bit)  \
+   {   \
+   .name = #_name, \
+   .id = TEGRA210_MC_RESET_##_name,\
+   .control = _control,\
+   .status = _status,  \
+   .bit = _bit,\
+   }
+
+static const struct tegra_mc_reset tegra210_mc_resets[] = {
+   TEGRA210_MC_RESET(AFI,   0x200, 0x204,  0),
+   TEGRA210_MC_RESET(AVPC,  0x200, 0x204,  1),
+   TEGRA210_MC_RESET(DC,0x200, 0x204,  2),
+   TEGRA210_MC_RESET(DCB,   0x200, 0x204,  3),
+   TEGRA210_MC_RESET(HC,0x200, 0x204,  6),
+   TEGRA210_MC_RESET(HDA,   0x200, 0x204,  7),
+   TEGRA210_MC_RESET(ISP2,  0x200, 0x204,  8),
+   TEGRA210_MC_RESET(MPCORE,0x200, 0x204,  9),
+   TEGRA210_MC_RESET(NVENC, 0x200, 0x204, 11),
+   TEGRA210_MC_RESET(PPCS,  0x200, 0x204, 14),
+   TEGRA210_MC_RESET(SATA,  0x200, 0x204, 15),
+   TEGRA210_MC_RESET(VI,0x200, 0x204, 17),
+   TEGRA210_MC_RESET(VIC,   0x200, 0x204, 18),
+   TEGRA210_MC_RESET(XUSB_HOST, 0x200, 0x204, 19),
+   TEGRA210_MC_RESET(XUSB_DEV,  0x200, 0x204, 20),
+   TEGRA210_MC_RESET(A9AVP, 0x200, 0x204, 21),
+   TEGRA210_MC_RESET(TSEC,  0x200, 0x204, 22),
+   TEGRA210_MC_RESET(SDMMC1,0x200, 0x204, 29),
+   TEGRA210_MC_RESET(SDMMC2,0x200, 0x204, 30),
+   TEGRA210_MC_RESET(SDMMC3,0x200, 0x204, 31),
+   TEGRA210_MC_RESET(SDMMC4,0x970, 0x974,  0),
+   TEGRA210_MC_RESET(ISP2B, 0x970, 0x974,  1),
+   TEGRA210_MC_RESET(GPU,   0x970, 0x974,  2),
+   TEGRA210_MC_RESET(NVDEC, 0x970, 0x974,  5),
+   TEGRA210_MC_RESET(APE,   0x970, 0x974,  6),
+   TEGRA210_MC_RESET(SE,0x970, 0x974,  7),
+   TEGRA210_MC_RESET(NVJPG, 0x970, 0x974,  8),
+   TEGRA210_MC_RESET(AXIAP, 0x970, 0x974, 11),
+   TEGRA210_MC_RESET(ETR,   0x970, 0x974, 12),
+   TEGRA210_MC_RESET(TSECB, 0x970, 0x974, 13),
+};
+
 const struct tegra_mc_soc tegra210_mc_soc = {
.clients = tegra210_mc_clients,
.num_clients = ARRAY_SIZE(tegra210_mc_clients),
@@ -1090,4 +1132,7 @@ const struct tegra_mc_soc tegra210_mc_soc = {
.intmask = MC_INT_DECERR_MTS | MC_INT_SECERR_SEC | MC_INT_DECERR_VPR |
   MC_INT_INVALID_APB_ASID_UPDATE | MC_INT_INVALID_SMMU_PAGE |
   MC_INT_SECURITY_VIOLATION | MC_INT_DECERR_EMEM,
+   .reset_ops = _mc_reset_ops_common,
+   .resets = tegra210_mc_resets,
+   .num_resets = ARRAY_SIZE(tegra210_mc_resets),
 };
-- 
2.17.0



[PATCH v4 15/15] memory: tegra: Add Tegra20 memory controller hot resets

2018-04-13 Thread Dmitry Osipenko
Define the table of memory controller hot resets for Tegra20 and add
specific to Tegra20 hot reset operations.

Signed-off-by: Dmitry Osipenko 
---
 drivers/memory/tegra/tegra20.c | 118 +
 1 file changed, 118 insertions(+)

diff --git a/drivers/memory/tegra/tegra20.c b/drivers/memory/tegra/tegra20.c
index 512a3418cb80..7119e532471c 100644
--- a/drivers/memory/tegra/tegra20.c
+++ b/drivers/memory/tegra/tegra20.c
@@ -6,6 +6,8 @@
  * published by the Free Software Foundation.
  */
 
+#include 
+
 #include "mc.h"
 
 static const struct tegra_mc_client tegra20_mc_clients[] = {
@@ -168,6 +170,119 @@ static const struct tegra_mc_client tegra20_mc_clients[] 
= {
},
 };
 
+#define TEGRA20_MC_RESET(_name, _control, _status, _reset, _bit)   \
+   {   \
+   .name = #_name, \
+   .id = TEGRA20_MC_RESET_##_name, \
+   .control = _control,\
+   .status = _status,  \
+   .reset = _reset,\
+   .bit = _bit,\
+   }
+
+static const struct tegra_mc_reset tegra20_mc_resets[] = {
+   TEGRA20_MC_RESET(AVPC,   0x100, 0x140, 0x104,  0),
+   TEGRA20_MC_RESET(DC, 0x100, 0x144, 0x104,  1),
+   TEGRA20_MC_RESET(DCB,0x100, 0x148, 0x104,  2),
+   TEGRA20_MC_RESET(EPP,0x100, 0x14c, 0x104,  3),
+   TEGRA20_MC_RESET(2D, 0x100, 0x150, 0x104,  4),
+   TEGRA20_MC_RESET(HC, 0x100, 0x154, 0x104,  5),
+   TEGRA20_MC_RESET(ISP,0x100, 0x158, 0x104,  6),
+   TEGRA20_MC_RESET(MPCORE, 0x100, 0x15c, 0x104,  7),
+   TEGRA20_MC_RESET(MPEA,   0x100, 0x160, 0x104,  8),
+   TEGRA20_MC_RESET(MPEB,   0x100, 0x164, 0x104,  9),
+   TEGRA20_MC_RESET(MPEC,   0x100, 0x168, 0x104, 10),
+   TEGRA20_MC_RESET(3D, 0x100, 0x16c, 0x104, 11),
+   TEGRA20_MC_RESET(PPCS,   0x100, 0x170, 0x104, 12),
+   TEGRA20_MC_RESET(VDE,0x100, 0x174, 0x104, 13),
+   TEGRA20_MC_RESET(VI, 0x100, 0x178, 0x104, 14),
+};
+
+static int terga20_mc_hotreset_assert(struct tegra_mc *mc,
+ const struct tegra_mc_reset *rst)
+{
+   unsigned long flags;
+   u32 value;
+
+   spin_lock_irqsave(>lock, flags);
+
+   value = mc_readl(mc, rst->reset);
+   mc_writel(mc, value & ~BIT(rst->bit), rst->reset);
+
+   spin_unlock_irqrestore(>lock, flags);
+
+   return 0;
+}
+
+static int terga20_mc_hotreset_deassert(struct tegra_mc *mc,
+   const struct tegra_mc_reset *rst)
+{
+   unsigned long flags;
+   u32 value;
+
+   spin_lock_irqsave(>lock, flags);
+
+   value = mc_readl(mc, rst->reset);
+   mc_writel(mc, value | BIT(rst->bit), rst->reset);
+
+   spin_unlock_irqrestore(>lock, flags);
+
+   return 0;
+}
+
+static int terga20_mc_block_dma(struct tegra_mc *mc,
+   const struct tegra_mc_reset *rst)
+{
+   unsigned long flags;
+   u32 value;
+
+   spin_lock_irqsave(>lock, flags);
+
+   value = mc_readl(mc, rst->control) & ~BIT(rst->bit);
+   mc_writel(mc, value, rst->control);
+
+   spin_unlock_irqrestore(>lock, flags);
+
+   return 0;
+}
+
+static bool terga20_mc_dma_idling(struct tegra_mc *mc,
+ const struct tegra_mc_reset *rst)
+{
+   return mc_readl(mc, rst->status) == 0;
+}
+
+static int terga20_mc_reset_status(struct tegra_mc *mc,
+  const struct tegra_mc_reset *rst)
+{
+   return (mc_readl(mc, rst->reset) & BIT(rst->bit)) == 0;
+}
+
+static int terga20_mc_unblock_dma(struct tegra_mc *mc,
+ const struct tegra_mc_reset *rst)
+{
+   unsigned long flags;
+   u32 value;
+
+   spin_lock_irqsave(>lock, flags);
+
+   value = mc_readl(mc, rst->control) | BIT(rst->bit);
+   mc_writel(mc, value, rst->control);
+
+   spin_unlock_irqrestore(>lock, flags);
+
+   return 0;
+}
+
+const struct tegra_mc_reset_ops terga20_mc_reset_ops = {
+   .hotreset_assert = terga20_mc_hotreset_assert,
+   .hotreset_deassert = terga20_mc_hotreset_deassert,
+   .block_dma = terga20_mc_block_dma,
+   .dma_idling = terga20_mc_dma_idling,
+   .unblock_dma = terga20_mc_unblock_dma,
+   .reset_status = terga20_mc_reset_status,
+};
+
 const struct tegra_mc_soc tegra20_mc_soc = {
.clients = tegra20_mc_clients,
.num_clients = ARRAY_SIZE(tegra20_mc_clients),
@@ -175,4 +290,7 @@ const struct tegra_mc_soc tegra20_mc_soc = {
.client_id_mask = 0x3f,
.intmask = MC_INT_SECURITY_VIOLATION | MC_INT_IN

[PATCH v4 10/15] memory: tegra: Introduce memory client hot reset

2018-04-13 Thread Dmitry Osipenko
In order to reset busy HW properly, memory controller needs to be
involved, otherwise it is possible to get corrupted memory or hang machine
if HW was reset during DMA. Introduce memory client 'hot reset' that will
be used for resetting of busy HW.

Signed-off-by: Dmitry Osipenko 
---
 drivers/memory/tegra/mc.c | 210 ++
 drivers/memory/tegra/mc.h |   2 +
 include/soc/tegra/mc.h|  33 ++
 3 files changed, 245 insertions(+)

diff --git a/drivers/memory/tegra/mc.c b/drivers/memory/tegra/mc.c
index 5932ab33202a..6b211daa99bf 100644
--- a/drivers/memory/tegra/mc.c
+++ b/drivers/memory/tegra/mc.c
@@ -7,6 +7,7 @@
  */
 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -72,6 +73,207 @@ static const struct of_device_id tegra_mc_of_match[] = {
 };
 MODULE_DEVICE_TABLE(of, tegra_mc_of_match);
 
+static int terga_mc_block_dma_common(struct tegra_mc *mc,
+const struct tegra_mc_reset *rst)
+{
+   unsigned long flags;
+   u32 value;
+
+   spin_lock_irqsave(>lock, flags);
+
+   value = mc_readl(mc, rst->control) | BIT(rst->bit);
+   mc_writel(mc, value, rst->control);
+
+   spin_unlock_irqrestore(>lock, flags);
+
+   return 0;
+}
+
+static bool terga_mc_dma_idling_common(struct tegra_mc *mc,
+  const struct tegra_mc_reset *rst)
+{
+   return (mc_readl(mc, rst->status) & BIT(rst->bit)) != 0;
+}
+
+static int terga_mc_unblock_dma_common(struct tegra_mc *mc,
+  const struct tegra_mc_reset *rst)
+{
+   unsigned long flags;
+   u32 value;
+
+   spin_lock_irqsave(>lock, flags);
+
+   value = mc_readl(mc, rst->control) & ~BIT(rst->bit);
+   mc_writel(mc, value, rst->control);
+
+   spin_unlock_irqrestore(>lock, flags);
+
+   return 0;
+}
+
+static int terga_mc_reset_status_common(struct tegra_mc *mc,
+   const struct tegra_mc_reset *rst)
+{
+   return (mc_readl(mc, rst->control) & BIT(rst->bit)) != 0;
+}
+
+const struct tegra_mc_reset_ops terga_mc_reset_ops_common = {
+   .block_dma = terga_mc_block_dma_common,
+   .dma_idling = terga_mc_dma_idling_common,
+   .unblock_dma = terga_mc_unblock_dma_common,
+   .reset_status = terga_mc_reset_status_common,
+};
+
+static inline struct tegra_mc *reset_to_mc(struct reset_controller_dev *rcdev)
+{
+   return container_of(rcdev, struct tegra_mc, reset);
+}
+
+static const struct tegra_mc_reset *tegra_mc_reset_find(struct tegra_mc *mc,
+   unsigned long id)
+{
+   unsigned int i;
+
+   for (i = 0; i < mc->soc->num_resets; i++)
+   if (mc->soc->resets[i].id == id)
+   return >soc->resets[i];
+
+   return NULL;
+}
+
+static int tegra_mc_hotreset_assert(struct reset_controller_dev *rcdev,
+   unsigned long id)
+{
+   struct tegra_mc *mc = reset_to_mc(rcdev);
+   const struct tegra_mc_reset_ops *rst_ops;
+   const struct tegra_mc_reset *rst;
+   int retries = 500;
+   int err;
+
+   rst = tegra_mc_reset_find(mc, id);
+   if (!rst)
+   return -ENODEV;
+
+   rst_ops = mc->soc->reset_ops;
+   if (!rst_ops)
+   return -ENODEV;
+
+   if (rst_ops->block_dma) {
+   /* block clients DMA requests */
+   err = rst_ops->block_dma(mc, rst);
+   if (err) {
+   dev_err(mc->dev, "Failed to block %s DMA: %d\n",
+   rst->name, err);
+   return err;
+   }
+   }
+
+   if (rst_ops->dma_idling) {
+   /* wait for completion of the outstanding DMA requests */
+   while (!rst_ops->dma_idling(mc, rst)) {
+   if (!retries--) {
+   dev_err(mc->dev, "Failed to flush %s DMA\n",
+   rst->name);
+   return -EBUSY;
+   }
+
+   usleep_range(10, 100);
+   }
+   }
+
+   if (rst_ops->hotreset_assert) {
+   /* clear clients DMA requests sitting before arbitration */
+   err = rst_ops->hotreset_assert(mc, rst);
+   if (err) {
+   dev_err(mc->dev, "Failed to hot reset %s: %d\n",
+   rst->name, err);
+   return err;
+   }
+   }
+
+   return 0;
+}
+
+static int tegra_mc_hotreset_deassert(struct reset_controller_dev *rcdev,
+ unsigned long id)
+{
+   struct tegra_mc *mc = reset_to_mc(rcdev);
+   const struct tegra_mc_reset_ops *rst_ops;
+

[PATCH v4 12/15] memory: tegra: Add Tegra124 memory controller hot resets

2018-04-13 Thread Dmitry Osipenko
Define the table of memory controller hot resets for Tegra124.

Signed-off-by: Dmitry Osipenko 
---
 drivers/memory/tegra/tegra124.c | 42 +
 1 file changed, 42 insertions(+)

diff --git a/drivers/memory/tegra/tegra124.c b/drivers/memory/tegra/tegra124.c
index bd16555cca0f..b561a1fe7f46 100644
--- a/drivers/memory/tegra/tegra124.c
+++ b/drivers/memory/tegra/tegra124.c
@@ -1012,6 +1012,42 @@ static const struct tegra_smmu_group_soc 
tegra124_groups[] = {
},
 };
 
+#define TEGRA124_MC_RESET(_name, _control, _status, _bit)  \
+   {   \
+   .name = #_name, \
+   .id = TEGRA124_MC_RESET_##_name,\
+   .control = _control,\
+   .status = _status,  \
+   .bit = _bit,\
+   }
+
+static const struct tegra_mc_reset tegra124_mc_resets[] = {
+   TEGRA124_MC_RESET(AFI,   0x200, 0x204,  0),
+   TEGRA124_MC_RESET(AVPC,  0x200, 0x204,  1),
+   TEGRA124_MC_RESET(DC,0x200, 0x204,  2),
+   TEGRA124_MC_RESET(DCB,   0x200, 0x204,  3),
+   TEGRA124_MC_RESET(HC,0x200, 0x204,  6),
+   TEGRA124_MC_RESET(HDA,   0x200, 0x204,  7),
+   TEGRA124_MC_RESET(ISP2,  0x200, 0x204,  8),
+   TEGRA124_MC_RESET(MPCORE,0x200, 0x204,  9),
+   TEGRA124_MC_RESET(MPCORELP,  0x200, 0x204, 10),
+   TEGRA124_MC_RESET(MSENC, 0x200, 0x204, 11),
+   TEGRA124_MC_RESET(PPCS,  0x200, 0x204, 14),
+   TEGRA124_MC_RESET(SATA,  0x200, 0x204, 15),
+   TEGRA124_MC_RESET(VDE,   0x200, 0x204, 16),
+   TEGRA124_MC_RESET(VI,0x200, 0x204, 17),
+   TEGRA124_MC_RESET(VIC,   0x200, 0x204, 18),
+   TEGRA124_MC_RESET(XUSB_HOST, 0x200, 0x204, 19),
+   TEGRA124_MC_RESET(XUSB_DEV,  0x200, 0x204, 20),
+   TEGRA124_MC_RESET(TSEC,  0x200, 0x204, 21),
+   TEGRA124_MC_RESET(SDMMC1,0x200, 0x204, 22),
+   TEGRA124_MC_RESET(SDMMC2,0x200, 0x204, 23),
+   TEGRA124_MC_RESET(SDMMC3,0x200, 0x204, 25),
+   TEGRA124_MC_RESET(SDMMC4,0x970, 0x974,  0),
+   TEGRA124_MC_RESET(ISP2B, 0x970, 0x974,  1),
+   TEGRA124_MC_RESET(GPU,   0x970, 0x974,  2),
+};
+
 #ifdef CONFIG_ARCH_TEGRA_124_SOC
 static const struct tegra_smmu_soc tegra124_smmu_soc = {
.clients = tegra124_mc_clients,
@@ -1038,6 +1074,9 @@ const struct tegra_mc_soc tegra124_mc_soc = {
.intmask = MC_INT_DECERR_MTS | MC_INT_SECERR_SEC | MC_INT_DECERR_VPR |
   MC_INT_INVALID_APB_ASID_UPDATE | MC_INT_INVALID_SMMU_PAGE |
   MC_INT_SECURITY_VIOLATION | MC_INT_DECERR_EMEM,
+   .reset_ops = _mc_reset_ops_common,
+   .resets = tegra124_mc_resets,
+   .num_resets = ARRAY_SIZE(tegra124_mc_resets),
 };
 #endif /* CONFIG_ARCH_TEGRA_124_SOC */
 
@@ -1065,5 +1104,8 @@ const struct tegra_mc_soc tegra132_mc_soc = {
.intmask = MC_INT_DECERR_MTS | MC_INT_SECERR_SEC | MC_INT_DECERR_VPR |
   MC_INT_INVALID_APB_ASID_UPDATE | MC_INT_INVALID_SMMU_PAGE |
   MC_INT_SECURITY_VIOLATION | MC_INT_DECERR_EMEM,
+   .reset_ops = _mc_reset_ops_common,
+   .resets = tegra124_mc_resets,
+   .num_resets = ARRAY_SIZE(tegra124_mc_resets),
 };
 #endif /* CONFIG_ARCH_TEGRA_132_SOC */
-- 
2.17.0



Re: [PATCH] ARM: tegra: fix ulpi regression on tegra20

2018-04-20 Thread Dmitry Osipenko
On 20.04.2018 11:52, Marc Dietrich wrote:
> Hi Marcel,
> 
> Am Montag, 19. Februar 2018, 16:12:52 CEST schrieb Marcel Ziswiler:
>> From: Marcel Ziswiler 
>>
>> Since commit f8f8f1d04494 ("clk: Don't touch hardware when reparenting
>> during registration") ULPI has been broken on Tegra20 leading to the
>> following error message during boot:
>>
>> [1.974698] ulpi_phy_power_on: ulpi write failed
>> [1.979384] tegra-ehci c5004000.usb: Failed to power on the phy
>> [1.985434] tegra-ehci: probe of c5004000.usb failed with error -110
>>
>> Debugging through the changes and finally also consulting the TRM
>> revealed that rather than the CDEV2 clock off OSC requiring such pin
>> muxing actually the PLL_P_OUT4 clock is in use. It looks like so far it
>> just worked by chance of that one having been enabled which Stephen's
>> commit now changed when reparenting sclk away from pll_p_out4 leaving
>> that one disabled. Fix this by properly assigning the PLL_P_OUT4 clock
>> as the ULPI PHY clock.
> 
> I booted 4.17-rc1 (which includes this fix) on an AC100 (T20 paz00 board) and 
> the error above is still there. Surprisingly the error vanishes when I revert 
> your patch. So this patch actually *causes* the problem above on my board. 
> Could it be, that we need all four clocks? Dimitry mentioned on IRC that it 
> could also be a problem in the clock init table. I don't have the technical 
> background myself to fix it, but I still wonder what could be so different 
> between TrimSlice and AC100.

I managed to find CDEV clocks in TRM this time. Seems assigning CDEV2 clock to
"ulpi-link" was correct and both CDEV2 and PLL_P_OUT4 should be enabled, CDEV2
should gate the PLL_P_OUT4 that feeds USB HW and PLL_P_OUT4 should be
always-enabled because it is enabled by init_table, but apparently it is getting
disabled erroneously.

Marcel, could you please revert your patch, add
"trace_event=clk_enable,clk_disable,clk_set_parent tp_printk" to kernels cmdline
and post the log?

It looks like there is some clk framework bug, but just in case please also try
to apply this patch:

diff --git a/drivers/clk/tegra/clk-tegra-periph.c
b/drivers/clk/tegra/clk-tegra-periph.c
index 2acba2986bc6..407bd0c0ac2f 100644
--- a/drivers/clk/tegra/clk-tegra-periph.c
+++ b/drivers/clk/tegra/clk-tegra-periph.c
@@ -1024,7 +1024,7 @@ static void __init init_pllp(void __iomem *clk_base, void
__iomem *pmc_base,
if (dt_clk) {
clk = tegra_clk_register_pll_out("pll_p_out4",
"pll_p_out4_div", clk_base + PLLP_OUTB,
-   17, 16, CLK_IGNORE_UNUSED |
+   17, 16, CLK_IS_CRITICAL |
CLK_SET_RATE_PARENT, 0,
_OUTB_lock);
*dt_clk = clk;


[PATCH v1] gpu: host1x: Fix dma_free_wc() argument in the error path

2018-04-23 Thread Dmitry Osipenko
If IOVA allocation or IOMMU mapping fails, dma_free_wc() is invoked with
size=0 because of a typo, that triggers "kernel BUG at mm/vmalloc.c:124!".

Signed-off-by: Dmitry Osipenko 
---
 drivers/gpu/host1x/cdma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c
index 93df28228721..0724122afeac 100644
--- a/drivers/gpu/host1x/cdma.c
+++ b/drivers/gpu/host1x/cdma.c
@@ -127,7 +127,7 @@ static int host1x_pushbuffer_init(struct push_buffer *pb)
 iommu_free_iova:
__free_iova(>iova, alloc);
 iommu_free_mem:
-   dma_free_wc(host1x->dev, pb->alloc_size, pb->mapped, pb->phys);
+   dma_free_wc(host1x->dev, size, pb->mapped, pb->phys);
 
return err;
 }
-- 
2.17.0



[PATCH v4 00/15] Memory controller hot reset

2018-04-09 Thread Dmitry Osipenko
Tegra's memory controller has a "memory hot reset" functionality that
blocks all memory transactions for the memory client, which is required
for a proper HW resetting. HW could be in process of performing DMA while
being reset and this could lead to a system hang or memory corruption, so
here comes the memory hot reset that blocks all interactions of HW with
memory so that it could be reset safely.

Changelog:

V4:
Fixed compilation warning about unused function for the case where
Tegra20 SoC isn't enabled in Kconfig, corrected typos in commit
messages.

V3:
Addressed review comments to V2 from Thierry Reding. MC now
uses generic reset controller API instead of a custom one,
hence DT changes are now involved and so Rob Herring is CC'd
for a review.

Added couple minor cleanup/correction patches.

V2:
Basically a re-send of V1 with some minor changes.


Dmitry Osipenko (14):
  dt-bindings: arm: tegra: Remove duplicated Tegra30+ MC binding
  dt-bindings: memory: tegra: Document #reset-cells property of the
Tegra30 MC
  dt-bindings: arm: tegra: Document #reset-cells property of the Tegra20
MC
  dt-bindings: memory: tegra: Add hot resets definitions
  memory: tegra: Do not handle spurious interrupts
  memory: tegra: Setup interrupts mask before requesting IRQ
  memory: tegra: Apply interrupts mask per SoC
  memory: tegra: Remove unused headers inclusions
  memory: tegra: Squash tegra20-mc into common tegra-mc driver
  memory: tegra: Introduce memory client hot reset
  memory: tegra: Add Tegra124 memory controller hot resets
  memory: tegra: Add Tegra114 memory controller hot resets
  memory: tegra: Add Tegra30 memory controller hot resets
  memory: tegra: Add Tegra20 memory controller hot resets

Thierry Reding (1):
  memory: tegra: Add Tegra210 memory controller hot resets

 .../bindings/arm/tegra/nvidia,tegra20-mc.txt   |  12 +-
 .../bindings/arm/tegra/nvidia,tegra30-mc.txt   |  18 --
 .../memory-controllers/nvidia,tegra30-mc.txt   |   5 +
 drivers/memory/Kconfig |  10 -
 drivers/memory/Makefile|   1 -
 drivers/memory/tegra/Makefile  |   1 +
 drivers/memory/tegra/mc.c  | 358 +++--
 drivers/memory/tegra/mc.h  |  22 ++
 drivers/memory/tegra/tegra114.c|  35 ++
 drivers/memory/tegra/tegra124.c|  48 +++
 drivers/memory/tegra/tegra20.c | 296 +
 drivers/memory/tegra/tegra210.c|  53 ++-
 drivers/memory/tegra/tegra30.c |  35 ++
 drivers/memory/tegra20-mc.c| 254 ---
 include/dt-bindings/memory/tegra114-mc.h   |  19 ++
 include/dt-bindings/memory/tegra124-mc.h   |  25 ++
 include/dt-bindings/memory/tegra20-mc.h|  21 ++
 include/dt-bindings/memory/tegra210-mc.h   |  31 ++
 include/dt-bindings/memory/tegra30-mc.h|  19 ++
 include/soc/tegra/mc.h |  37 ++-
 20 files changed, 978 insertions(+), 322 deletions(-)
 delete mode 100644 
Documentation/devicetree/bindings/arm/tegra/nvidia,tegra30-mc.txt
 create mode 100644 drivers/memory/tegra/tegra20.c
 delete mode 100644 drivers/memory/tegra20-mc.c
 create mode 100644 include/dt-bindings/memory/tegra20-mc.h

-- 
2.16.3



[PATCH v4 09/15] memory: tegra: Squash tegra20-mc into common tegra-mc driver

2018-04-09 Thread Dmitry Osipenko
Tegra30+ has some minor differences in registers / bits layout compared
to Tegra20. Let's squash Tegra20 driver into the common tegra-mc driver
in a preparation for the upcoming MC hot reset controls implementation,
avoiding code duplication.

Signed-off-by: Dmitry Osipenko 
---
 drivers/memory/Kconfig |  10 --
 drivers/memory/Makefile|   1 -
 drivers/memory/tegra/Makefile  |   1 +
 drivers/memory/tegra/mc.c  | 120 +--
 drivers/memory/tegra/mc.h  |  11 ++
 drivers/memory/tegra/tegra20.c | 178 +
 drivers/memory/tegra20-mc.c| 254 -
 include/soc/tegra/mc.h |   2 +-
 8 files changed, 299 insertions(+), 278 deletions(-)
 create mode 100644 drivers/memory/tegra/tegra20.c
 delete mode 100644 drivers/memory/tegra20-mc.c

diff --git a/drivers/memory/Kconfig b/drivers/memory/Kconfig
index 19a0e83f260d..8d731d6c3e54 100644
--- a/drivers/memory/Kconfig
+++ b/drivers/memory/Kconfig
@@ -104,16 +104,6 @@ config MVEBU_DEVBUS
  Armada 370 and Armada XP. This controller allows to handle flash
  devices such as NOR, NAND, SRAM, and FPGA.
 
-config TEGRA20_MC
-   bool "Tegra20 Memory Controller(MC) driver"
-   default y
-   depends on ARCH_TEGRA_2x_SOC
-   help
- This driver is for the Memory Controller(MC) module available
- in Tegra20 SoCs, mainly for a address translation fault
- analysis, especially for IOMMU/GART(Graphics Address
- Relocation Table) module.
-
 config FSL_CORENET_CF
tristate "Freescale CoreNet Error Reporting"
depends on FSL_SOC_BOOKE
diff --git a/drivers/memory/Makefile b/drivers/memory/Makefile
index 66f55240830e..a01ab3e22f94 100644
--- a/drivers/memory/Makefile
+++ b/drivers/memory/Makefile
@@ -16,7 +16,6 @@ obj-$(CONFIG_OMAP_GPMC)   += omap-gpmc.o
 obj-$(CONFIG_FSL_CORENET_CF)   += fsl-corenet-cf.o
 obj-$(CONFIG_FSL_IFC)  += fsl_ifc.o
 obj-$(CONFIG_MVEBU_DEVBUS) += mvebu-devbus.o
-obj-$(CONFIG_TEGRA20_MC)   += tegra20-mc.o
 obj-$(CONFIG_JZ4780_NEMC)  += jz4780-nemc.o
 obj-$(CONFIG_MTK_SMI)  += mtk-smi.o
 obj-$(CONFIG_DA8XX_DDRCTL) += da8xx-ddrctl.o
diff --git a/drivers/memory/tegra/Makefile b/drivers/memory/tegra/Makefile
index ce87a9470034..94ab16ba075b 100644
--- a/drivers/memory/tegra/Makefile
+++ b/drivers/memory/tegra/Makefile
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 tegra-mc-y := mc.o
 
+tegra-mc-$(CONFIG_ARCH_TEGRA_2x_SOC)  += tegra20.o
 tegra-mc-$(CONFIG_ARCH_TEGRA_3x_SOC)  += tegra30.o
 tegra-mc-$(CONFIG_ARCH_TEGRA_114_SOC) += tegra114.o
 tegra-mc-$(CONFIG_ARCH_TEGRA_124_SOC) += tegra124.o
diff --git a/drivers/memory/tegra/mc.c b/drivers/memory/tegra/mc.c
index 60509f0a386b..5932ab33202a 100644
--- a/drivers/memory/tegra/mc.c
+++ b/drivers/memory/tegra/mc.c
@@ -37,6 +37,10 @@
 
 #define MC_ERR_ADR 0x0c
 
+#define MC_GART_ERROR_REQ  0x30
+#define MC_DECERR_EMEM_OTHERS_STATUS   0x58
+#define MC_SECURITY_VIOLATION_STATUS   0x74
+
 #define MC_EMEM_ARB_CFG 0x90
 #define  MC_EMEM_ARB_CFG_CYCLES_PER_UPDATE(x)  (((x) & 0x1ff) << 0)
 #define  MC_EMEM_ARB_CFG_CYCLES_PER_UPDATE_MASK0x1ff
@@ -46,6 +50,9 @@
 #define MC_EMEM_ADR_CFG_EMEM_NUMDEV BIT(0)
 
 static const struct of_device_id tegra_mc_of_match[] = {
+#ifdef CONFIG_ARCH_TEGRA_2x_SOC
+   { .compatible = "nvidia,tegra20-mc", .data = _mc_soc },
+#endif
 #ifdef CONFIG_ARCH_TEGRA_3x_SOC
{ .compatible = "nvidia,tegra30-mc", .data = _mc_soc },
 #endif
@@ -221,6 +228,7 @@ static int tegra_mc_setup_timings(struct tegra_mc *mc)
 static const char *const status_names[32] = {
[ 1] = "External interrupt",
[ 6] = "EMEM address decode error",
+   [ 7] = "GART page fault",
[ 8] = "Security violation",
[ 9] = "EMEM arbitration error",
[10] = "Page fault",
@@ -334,11 +342,85 @@ static irqreturn_t tegra_mc_irq(int irq, void *data)
return IRQ_HANDLED;
 }
 
+static __maybe_unused irqreturn_t tegra20_mc_irq(int irq, void *data)
+{
+   struct tegra_mc *mc = data;
+   unsigned long status;
+   unsigned int bit;
+
+   /* mask all interrupts to avoid flooding */
+   status = mc_readl(mc, MC_INTSTATUS) & mc->soc->intmask;
+   if (!status)
+   return IRQ_NONE;
+
+   for_each_set_bit(bit, , 32) {
+   const char *direction = "read", *secure = "";
+   const char *error = status_names[bit];
+   const char *client, *desc;
+   phys_addr_t addr;
+   u32 value, reg;
+   u8 id, type;
+
+   switch (BIT(bit)) {
+   case MC_INT_DECERR_EMEM:
+   reg = MC_DECERR_EMEM_OTHERS_STATUS;
+   value = mc_readl(mc, reg);
+
+

[PATCH v4 02/15] dt-bindings: memory: tegra: Document #reset-cells property of the Tegra30 MC

2018-04-09 Thread Dmitry Osipenko
Memory Controller has a memory client "hot reset" functionality, which
resets the DMA interface of a memory client. So MC is a reset controller
in addition to IOMMU.

Signed-off-by: Dmitry Osipenko 
Reviewed-by: Rob Herring 
---
 .../devicetree/bindings/memory-controllers/nvidia,tegra30-mc.txt | 5 +
 1 file changed, 5 insertions(+)

diff --git 
a/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra30-mc.txt 
b/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra30-mc.txt
index 14968b048cd3..a878b5908a4d 100644
--- a/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra30-mc.txt
+++ b/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra30-mc.txt
@@ -12,6 +12,9 @@ Required properties:
 - clock-names: Must include the following entries:
   - mc: the module's clock input
 - interrupts: The interrupt outputs from the controller.
+- #reset-cells : Should be 1. This cell represents memory client module ID.
+  The assignments may be found in header file 
+  or in the TRM documentation.
 
 Required properties for Tegra30, Tegra114, Tegra124, Tegra132 and Tegra210:
 - #iommu-cells: Should be 1. The single cell of the IOMMU specifier defines
@@ -72,12 +75,14 @@ Example SoC include file:
interrupts = ;
 
#iommu-cells = <1>;
+   #reset-cells = <1>;
};
 
sdhci@700b {
compatible = "nvidia,tegra124-sdhci";
...
iommus = < TEGRA_SWGROUP_SDMMC1A>;
+   resets = < TEGRA124_MC_RESET_SDMMC1>;
};
 };
 
-- 
2.16.3



[PATCH v4 08/15] memory: tegra: Remove unused headers inclusions

2018-04-09 Thread Dmitry Osipenko
Tegra210 contains some unused leftover headers, remove them for
consistency.

Signed-off-by: Dmitry Osipenko 
---
 drivers/memory/tegra/tegra210.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/drivers/memory/tegra/tegra210.c b/drivers/memory/tegra/tegra210.c
index 3b8d0100088c..b729f49ffc8f 100644
--- a/drivers/memory/tegra/tegra210.c
+++ b/drivers/memory/tegra/tegra210.c
@@ -6,11 +6,6 @@
  * published by the Free Software Foundation.
  */
 
-#include 
-#include 
-
-#include 
-
 #include 
 
 #include "mc.h"
-- 
2.16.3



[PATCH v4 07/15] memory: tegra: Apply interrupts mask per SoC

2018-04-09 Thread Dmitry Osipenko
Currently we are enabling handling of interrupts specific to Tegra124+
which happen to overlap with previous generations. Let's specify
interrupts mask per SoC generation for consistency and in a preparation
of squashing of Tegra20 driver into the common one that will enable
handling of GART faults which may be undesirable by newer generations.

Signed-off-by: Dmitry Osipenko 
---
 drivers/memory/tegra/mc.c   | 21 +++--
 drivers/memory/tegra/mc.h   |  9 +
 drivers/memory/tegra/tegra114.c |  2 ++
 drivers/memory/tegra/tegra124.c |  6 ++
 drivers/memory/tegra/tegra210.c |  3 +++
 drivers/memory/tegra/tegra30.c  |  2 ++
 include/soc/tegra/mc.h  |  2 ++
 7 files changed, 27 insertions(+), 18 deletions(-)

diff --git a/drivers/memory/tegra/mc.c b/drivers/memory/tegra/mc.c
index e55b9733bd83..60509f0a386b 100644
--- a/drivers/memory/tegra/mc.c
+++ b/drivers/memory/tegra/mc.c
@@ -20,14 +20,6 @@
 #include "mc.h"
 
 #define MC_INTSTATUS 0x000
-#define  MC_INT_DECERR_MTS (1 << 16)
-#define  MC_INT_SECERR_SEC (1 << 13)
-#define  MC_INT_DECERR_VPR (1 << 12)
-#define  MC_INT_INVALID_APB_ASID_UPDATE (1 << 11)
-#define  MC_INT_INVALID_SMMU_PAGE (1 << 10)
-#define  MC_INT_ARBITRATION_EMEM (1 << 9)
-#define  MC_INT_SECURITY_VIOLATION (1 << 8)
-#define  MC_INT_DECERR_EMEM (1 << 6)
 
 #define MC_INTMASK 0x004
 
@@ -248,13 +240,11 @@ static const char *const error_names[8] = {
 static irqreturn_t tegra_mc_irq(int irq, void *data)
 {
struct tegra_mc *mc = data;
-   unsigned long status, mask;
+   unsigned long status;
unsigned int bit;
 
/* mask all interrupts to avoid flooding */
-   mask = mc_readl(mc, MC_INTMASK);
-   status = mc_readl(mc, MC_INTSTATUS) & mask;
-
+   status = mc_readl(mc, MC_INTSTATUS) & mc->soc->intmask;
if (!status)
return IRQ_NONE;
 
@@ -349,7 +339,6 @@ static int tegra_mc_probe(struct platform_device *pdev)
const struct of_device_id *match;
struct resource *res;
struct tegra_mc *mc;
-   u32 value;
int err;
 
match = of_match_node(tegra_mc_of_match, pdev->dev.of_node);
@@ -409,11 +398,7 @@ static int tegra_mc_probe(struct platform_device *pdev)
 
WARN(!mc->soc->client_id_mask, "Missing client ID mask for this SoC\n");
 
-   value = MC_INT_DECERR_MTS | MC_INT_SECERR_SEC | MC_INT_DECERR_VPR |
-   MC_INT_INVALID_APB_ASID_UPDATE | MC_INT_INVALID_SMMU_PAGE |
-   MC_INT_SECURITY_VIOLATION | MC_INT_DECERR_EMEM;
-
-   mc_writel(mc, value, MC_INTMASK);
+   mc_writel(mc, mc->soc->intmask, MC_INTMASK);
 
err = devm_request_irq(>dev, mc->irq, tegra_mc_irq, IRQF_SHARED,
   dev_name(>dev), mc);
diff --git a/drivers/memory/tegra/mc.h b/drivers/memory/tegra/mc.h
index ddb16676c3af..24e020b4609b 100644
--- a/drivers/memory/tegra/mc.h
+++ b/drivers/memory/tegra/mc.h
@@ -14,6 +14,15 @@
 
 #include 
 
+#define MC_INT_DECERR_MTS (1 << 16)
+#define MC_INT_SECERR_SEC (1 << 13)
+#define MC_INT_DECERR_VPR (1 << 12)
+#define MC_INT_INVALID_APB_ASID_UPDATE (1 << 11)
+#define MC_INT_INVALID_SMMU_PAGE (1 << 10)
+#define MC_INT_ARBITRATION_EMEM (1 << 9)
+#define MC_INT_SECURITY_VIOLATION (1 << 8)
+#define MC_INT_DECERR_EMEM (1 << 6)
+
 static inline u32 mc_readl(struct tegra_mc *mc, unsigned long offset)
 {
return readl(mc->regs + offset);
diff --git a/drivers/memory/tegra/tegra114.c b/drivers/memory/tegra/tegra114.c
index b20e6e3e208e..7560b2f558a7 100644
--- a/drivers/memory/tegra/tegra114.c
+++ b/drivers/memory/tegra/tegra114.c
@@ -945,4 +945,6 @@ const struct tegra_mc_soc tegra114_mc_soc = {
.atom_size = 32,
.client_id_mask = 0x7f,
.smmu = _smmu_soc,
+   .intmask = MC_INT_INVALID_SMMU_PAGE | MC_INT_SECURITY_VIOLATION |
+  MC_INT_DECERR_EMEM,
 };
diff --git a/drivers/memory/tegra/tegra124.c b/drivers/memory/tegra/tegra124.c
index 8b6360eabb8a..bd16555cca0f 100644
--- a/drivers/memory/tegra/tegra124.c
+++ b/drivers/memory/tegra/tegra124.c
@@ -1035,6 +1035,9 @@ const struct tegra_mc_soc tegra124_mc_soc = {
.smmu = _smmu_soc,
.emem_regs = tegra124_mc_emem_regs,
.num_emem_regs = ARRAY_SIZE(tegra124_mc_emem_regs),
+   .intmask = MC_INT_DECERR_MTS | MC_INT_SECERR_SEC | MC_INT_DECERR_VPR |
+  MC_INT_INVALID_APB_ASID_UPDATE | MC_INT_INVALID_SMMU_PAGE |
+  MC_INT_SECURITY_VIOLATION | MC_INT_DECERR_EMEM,
 };
 #endif /* CONFIG_ARCH_TEGRA_124_SOC */
 
@@ -1059,5 +1062,8 @@ const struct tegra_mc_soc tegra132_mc_soc = {
.atom_size = 32,
.client_id_mask = 0x7f,
.smmu = _smmu_soc,
+   .intmask = MC_INT_DECERR_MTS | MC_INT_SECERR_SEC | MC_INT_DECERR_VPR |
+  MC_INT_INVALID_APB_ASID_UPDATE | MC_INT_INVALID

[PATCH v4 06/15] memory: tegra: Setup interrupts mask before requesting IRQ

2018-04-09 Thread Dmitry Osipenko
This avoids unwanted interrupt during MC driver probe.

Signed-off-by: Dmitry Osipenko 
---
 drivers/memory/tegra/mc.c | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/memory/tegra/mc.c b/drivers/memory/tegra/mc.c
index d2005b995821..e55b9733bd83 100644
--- a/drivers/memory/tegra/mc.c
+++ b/drivers/memory/tegra/mc.c
@@ -407,14 +407,6 @@ static int tegra_mc_probe(struct platform_device *pdev)
return mc->irq;
}
 
-   err = devm_request_irq(>dev, mc->irq, tegra_mc_irq, IRQF_SHARED,
-  dev_name(>dev), mc);
-   if (err < 0) {
-   dev_err(>dev, "failed to request IRQ#%u: %d\n", mc->irq,
-   err);
-   return err;
-   }
-
WARN(!mc->soc->client_id_mask, "Missing client ID mask for this SoC\n");
 
value = MC_INT_DECERR_MTS | MC_INT_SECERR_SEC | MC_INT_DECERR_VPR |
@@ -423,6 +415,14 @@ static int tegra_mc_probe(struct platform_device *pdev)
 
mc_writel(mc, value, MC_INTMASK);
 
+   err = devm_request_irq(>dev, mc->irq, tegra_mc_irq, IRQF_SHARED,
+  dev_name(>dev), mc);
+   if (err < 0) {
+   dev_err(>dev, "failed to request IRQ#%u: %d\n", mc->irq,
+   err);
+   return err;
+   }
+
return 0;
 }
 
-- 
2.16.3



[PATCH v4 04/15] dt-bindings: memory: tegra: Add hot resets definitions

2018-04-09 Thread Dmitry Osipenko
Add definitions for the Tegra20+ memory controller hot resets.

Signed-off-by: Dmitry Osipenko 
Reviewed-by: Rob Herring 
---
 include/dt-bindings/memory/tegra114-mc.h | 19 +++
 include/dt-bindings/memory/tegra124-mc.h | 25 +
 include/dt-bindings/memory/tegra20-mc.h  | 21 +
 include/dt-bindings/memory/tegra210-mc.h | 31 +++
 include/dt-bindings/memory/tegra30-mc.h  | 19 +++
 5 files changed, 115 insertions(+)
 create mode 100644 include/dt-bindings/memory/tegra20-mc.h

diff --git a/include/dt-bindings/memory/tegra114-mc.h 
b/include/dt-bindings/memory/tegra114-mc.h
index 27c8386987ff..54a12adec7b8 100644
--- a/include/dt-bindings/memory/tegra114-mc.h
+++ b/include/dt-bindings/memory/tegra114-mc.h
@@ -23,4 +23,23 @@
 #define TEGRA_SWGROUP_EMUCIF   18
 #define TEGRA_SWGROUP_TSEC 19
 
+#define TEGRA114_MC_RESET_AFI  0
+#define TEGRA114_MC_RESET_AVPC 1
+#define TEGRA114_MC_RESET_DC   2
+#define TEGRA114_MC_RESET_DCB  3
+#define TEGRA114_MC_RESET_EPP  4
+#define TEGRA114_MC_RESET_2D   5
+#define TEGRA114_MC_RESET_HC   6
+#define TEGRA114_MC_RESET_HDA  7
+#define TEGRA114_MC_RESET_ISP  8
+#define TEGRA114_MC_RESET_MPCORE   9
+#define TEGRA114_MC_RESET_MPCORELP 10
+#define TEGRA114_MC_RESET_MPE  11
+#define TEGRA114_MC_RESET_3D   12
+#define TEGRA114_MC_RESET_3D2  13
+#define TEGRA114_MC_RESET_PPCS 14
+#define TEGRA114_MC_RESET_SATA 15
+#define TEGRA114_MC_RESET_VDE  16
+#define TEGRA114_MC_RESET_VI   17
+
 #endif
diff --git a/include/dt-bindings/memory/tegra124-mc.h 
b/include/dt-bindings/memory/tegra124-mc.h
index f534d7c06019..186e6b7e9b35 100644
--- a/include/dt-bindings/memory/tegra124-mc.h
+++ b/include/dt-bindings/memory/tegra124-mc.h
@@ -29,4 +29,29 @@
 #define TEGRA_SWGROUP_VIC  24
 #define TEGRA_SWGROUP_VI   25
 
+#define TEGRA124_MC_RESET_AFI  0
+#define TEGRA124_MC_RESET_AVPC 1
+#define TEGRA124_MC_RESET_DC   2
+#define TEGRA124_MC_RESET_DCB  3
+#define TEGRA124_MC_RESET_HC   4
+#define TEGRA124_MC_RESET_HDA  5
+#define TEGRA124_MC_RESET_ISP2 6
+#define TEGRA124_MC_RESET_MPCORE   7
+#define TEGRA124_MC_RESET_MPCORELP 8
+#define TEGRA124_MC_RESET_MSENC9
+#define TEGRA124_MC_RESET_PPCS 10
+#define TEGRA124_MC_RESET_SATA 11
+#define TEGRA124_MC_RESET_VDE  12
+#define TEGRA124_MC_RESET_VI   13
+#define TEGRA124_MC_RESET_VIC  14
+#define TEGRA124_MC_RESET_XUSB_HOST15
+#define TEGRA124_MC_RESET_XUSB_DEV 16
+#define TEGRA124_MC_RESET_TSEC 17
+#define TEGRA124_MC_RESET_SDMMC1   18
+#define TEGRA124_MC_RESET_SDMMC2   19
+#define TEGRA124_MC_RESET_SDMMC3   20
+#define TEGRA124_MC_RESET_SDMMC4   21
+#define TEGRA124_MC_RESET_ISP2B22
+#define TEGRA124_MC_RESET_GPU  23
+
 #endif
diff --git a/include/dt-bindings/memory/tegra20-mc.h 
b/include/dt-bindings/memory/tegra20-mc.h
new file mode 100644
index ..35e131eee198
--- /dev/null
+++ b/include/dt-bindings/memory/tegra20-mc.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef DT_BINDINGS_MEMORY_TEGRA20_MC_H
+#define DT_BINDINGS_MEMORY_TEGRA20_MC_H
+
+#define TEGRA20_MC_RESET_AVPC  0
+#define TEGRA20_MC_RESET_DC1
+#define TEGRA20_MC_RESET_DCB   2
+#define TEGRA20_MC_RESET_EPP   3
+#define TEGRA20_MC_RESET_2D4
+#define TEGRA20_MC_RESET_HC5
+#define TEGRA20_MC_RESET_ISP   6
+#define TEGRA20_MC_RESET_MPCORE7
+#define TEGRA20_MC_RESET_MPEA  8
+#define TEGRA20_MC_RESET_MPEB  9
+#define TEGRA20_MC_RESET_MPEC  10
+#define TEGRA20_MC_RESET_3D11
+#define TEGRA20_MC_RESET_PPCS  12
+#define TEGRA20_MC_RESET_VDE   13
+#define TEGRA20_MC_RESET_VI14
+
+#endif
diff --git a/include/dt-bindings/memory/tegra210-mc.h 
b/include/dt-bindings/memory/tegra210-mc.h
index 4490f7cf4772..cacf05617e03 100644
--- a/include/dt-bindings/memory/tegra210-mc.h
+++ b/include/dt-bindings/memory/tegra210-mc.h
@@ -34,4 +34,35 @@
 #define TEGRA_SWGROUP_ETR  29
 #define TEGRA_SWGROUP_TSECB30
 
+#define TEGRA210_MC_RESET_AFI  0
+#define TEGRA210_MC_RESET_AVPC 1
+#define TEGRA210_MC_RESET_DC   2
+#define TEGRA210_MC_RESET_DCB  3
+#define TEGRA210_MC_RESET_HC   4
+#define TEGRA210_MC_RESET_HDA  5
+#define TEGRA210_MC_RESET_ISP2 6
+#define TEGRA210_MC_RESET_MPCORE   7
+#define TEGRA210_MC_RESET_NVENC8
+#define TEGRA210_MC_RESET_PPCS 9
+#define TEGRA210_MC_RESET_SATA 10
+#define TEGRA210_MC_RESET_VI   11
+#define TEGRA210_MC_RESET_VIC  12
+#define TEGRA210_MC_RESET_XUSB_HOST13
+#define

[PATCH v4 03/15] dt-bindings: arm: tegra: Document #reset-cells property of the Tegra20 MC

2018-04-09 Thread Dmitry Osipenko
Memory Controller has a memory client "hot reset" functionality, which
resets the DMA interface of a memory client, so MC is a reset controller.

Signed-off-by: Dmitry Osipenko 
Reviewed-by: Rob Herring 
---
 .../devicetree/bindings/arm/tegra/nvidia,tegra20-mc.txt  | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-mc.txt 
b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-mc.txt
index f9632bacbd04..7d60a50a4fa1 100644
--- a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-mc.txt
+++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-mc.txt
@@ -6,11 +6,21 @@ Required properties:
   example below. Note that the MC registers are interleaved with the
   GART registers, and hence must be represented as multiple ranges.
 - interrupts : Should contain MC General interrupt.
+- #reset-cells : Should be 1. This cell represents memory client module ID.
+  The assignments may be found in header file 
+  or in the TRM documentation.
 
 Example:
-   memory-controller@7000f000 {
+   mc: memory-controller@7000f000 {
compatible = "nvidia,tegra20-mc";
reg = <0x7000f000 0x024
   0x7000f03c 0x3c4>;
interrupts = <0 77 0x04>;
+   #reset-cells = <1>;
+   };
+
+   video-codec@6001a000 {
+   compatible = "nvidia,tegra20-vde";
+   ...
+   resets = < TEGRA20_MC_RESET_VDE>;
};
-- 
2.16.3



[PATCH v4 05/15] memory: tegra: Do not handle spurious interrupts

2018-04-09 Thread Dmitry Osipenko
The ISR reads interrupts-enable mask, but doesn't utilize it. Apply the
mask to the interrupt status and don't handle interrupts that MC driver
haven't asked for. Kernel would disable spurious MC IRQ and report the
error. This would happen only in a case of a very severe bug.

Signed-off-by: Dmitry Osipenko 
---
 drivers/memory/tegra/mc.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/memory/tegra/mc.c b/drivers/memory/tegra/mc.c
index a4803ac192bb..d2005b995821 100644
--- a/drivers/memory/tegra/mc.c
+++ b/drivers/memory/tegra/mc.c
@@ -252,8 +252,11 @@ static irqreturn_t tegra_mc_irq(int irq, void *data)
unsigned int bit;
 
/* mask all interrupts to avoid flooding */
-   status = mc_readl(mc, MC_INTSTATUS);
mask = mc_readl(mc, MC_INTMASK);
+   status = mc_readl(mc, MC_INTSTATUS) & mask;
+
+   if (!status)
+   return IRQ_NONE;
 
for_each_set_bit(bit, , 32) {
const char *error = status_names[bit] ?: "unknown";
-- 
2.16.3



[PATCH v4 01/15] dt-bindings: arm: tegra: Remove duplicated Tegra30+ MC binding

2018-04-09 Thread Dmitry Osipenko
There are two bindings for the same Memory Controller. One of the bindings
became obsolete long time ago and probably was left unnoticed, remove it
for consistency.

Signed-off-by: Dmitry Osipenko 
Reviewed-by: Rob Herring 
---
 .../bindings/arm/tegra/nvidia,tegra30-mc.txt   | 18 --
 1 file changed, 18 deletions(-)
 delete mode 100644 
Documentation/devicetree/bindings/arm/tegra/nvidia,tegra30-mc.txt

diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra30-mc.txt 
b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra30-mc.txt
deleted file mode 100644
index bdf1a612422b..
--- a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra30-mc.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-NVIDIA Tegra30 MC(Memory Controller)
-
-Required properties:
-- compatible : "nvidia,tegra30-mc"
-- reg : Should contain 4 register ranges(address and length); see the
-  example below. Note that the MC registers are interleaved with the
-  SMMU registers, and hence must be represented as multiple ranges.
-- interrupts : Should contain MC General interrupt.
-
-Example:
-   memory-controller {
-   compatible = "nvidia,tegra30-mc";
-   reg = <0x7000f000 0x010
-  0x7000f03c 0x1b4
-  0x7000f200 0x028
-  0x7000f284 0x17c>;
-   interrupts = <0 77 0x04>;
-   };
-- 
2.16.3



[PATCH v1 3/4] iommu/tegra: gart: Constify number of GART pages

2018-04-09 Thread Dmitry Osipenko
GART has a fixed aperture size, hence the number of pages is constant.

Signed-off-by: Dmitry Osipenko 
---
 drivers/iommu/tegra-gart.c | 14 --
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c
index 89ec24c6952c..4a0607669d34 100644
--- a/drivers/iommu/tegra-gart.c
+++ b/drivers/iommu/tegra-gart.c
@@ -33,6 +33,8 @@
 
 #include 
 
+#define GART_APERTURE_SIZE SZ_32M
+
 /* bitmap of the page sizes currently supported */
 #define GART_IOMMU_PGSIZES (SZ_4K)
 
@@ -47,6 +49,8 @@
 #define GART_PAGE_MASK \
(~(GART_PAGE_SIZE - 1) & ~GART_ENTRY_PHYS_ADDR_VALID)
 
+#define GART_PAGECOUNT (GART_APERTURE_SIZE >> GART_PAGE_SHIFT)
+
 struct gart_client {
struct device   *dev;
struct list_headlist;
@@ -55,7 +59,6 @@ struct gart_client {
 struct gart_device {
void __iomem*regs;
u32 *savedata;
-   u32 page_count; /* total remappable size */
dma_addr_t  iovmm_base; /* offset to vmm_area */
spinlock_t  pte_lock;   /* for pagetable */
struct list_headclient;
@@ -91,7 +94,7 @@ static struct gart_domain *to_gart_domain(struct iommu_domain 
*dom)
 
 #define for_each_gart_pte(gart, iova)  \
for (iova = gart->iovmm_base;   \
-iova < gart->iovmm_base + GART_PAGE_SIZE * gart->page_count; \
+iova < gart->iovmm_base + GART_APERTURE_SIZE;  \
 iova += GART_PAGE_SIZE)
 
 static inline void gart_set_pte(struct gart_device *gart,
@@ -158,7 +161,7 @@ static inline bool gart_iova_range_valid(struct gart_device 
*gart,
iova_start = iova;
iova_end = iova_start + bytes - 1;
gart_start = gart->iovmm_base;
-   gart_end = gart_start + gart->page_count * GART_PAGE_SIZE - 1;
+   gart_end = gart_start + GART_APERTURE_SIZE - 1;
 
if (iova_start < gart_start)
return false;
@@ -241,7 +244,7 @@ static struct iommu_domain 
*gart_iommu_domain_alloc(unsigned type)
gart_domain->gart = gart;
gart_domain->domain.geometry.aperture_start = gart->iovmm_base;
gart_domain->domain.geometry.aperture_end = gart->iovmm_base +
-   gart->page_count * GART_PAGE_SIZE - 1;
+   GART_APERTURE_SIZE - 1;
gart_domain->domain.geometry.force_aperture = true;
 
return _domain->domain;
@@ -463,9 +466,8 @@ static int tegra_gart_probe(struct platform_device *pdev)
INIT_LIST_HEAD(>client);
gart->regs = gart_regs;
gart->iovmm_base = (dma_addr_t)res_remap->start;
-   gart->page_count = (resource_size(res_remap) >> GART_PAGE_SHIFT);
 
-   gart->savedata = vmalloc(sizeof(u32) * gart->page_count);
+   gart->savedata = vmalloc(sizeof(u32) * GART_PAGECOUNT);
if (!gart->savedata) {
dev_err(dev, "failed to allocate context save area\n");
return -ENOMEM;
-- 
2.16.3



[PATCH v1 1/4] iommu/tegra: gart: Add debugging facility

2018-04-09 Thread Dmitry Osipenko
Page mapping could overwritten by an accident (a bug). We can catch this
case by checking 'VALID' bit of GART's page entry prior to mapping of a
page. Since that check introduces a small performance impact, it should be
enabled explicitly using new GART's kernel module 'debug' parameter.

Signed-off-by: Dmitry Osipenko 
---
 drivers/iommu/tegra-gart.c | 13 +
 1 file changed, 13 insertions(+)

diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c
index b62f790ad1ba..4c0abdcd1ad2 100644
--- a/drivers/iommu/tegra-gart.c
+++ b/drivers/iommu/tegra-gart.c
@@ -72,6 +72,8 @@ struct gart_domain {
 
 static struct gart_device *gart_handle; /* unique for a system */
 
+static bool gart_debug;
+
 #define GART_PTE(_pfn) \
(GART_ENTRY_PHYS_ADDR_VALID | ((_pfn) << PAGE_SHIFT))
 
@@ -271,6 +273,7 @@ static int gart_iommu_map(struct iommu_domain *domain, 
unsigned long iova,
struct gart_device *gart = gart_domain->gart;
unsigned long flags;
unsigned long pfn;
+   unsigned long pte;
 
if (!gart_iova_range_valid(gart, iova, bytes))
return -EINVAL;
@@ -282,6 +285,14 @@ static int gart_iommu_map(struct iommu_domain *domain, 
unsigned long iova,
spin_unlock_irqrestore(>pte_lock, flags);
return -EINVAL;
}
+   if (gart_debug) {
+   pte = gart_read_pte(gart, iova);
+   if (pte & GART_ENTRY_PHYS_ADDR_VALID) {
+   spin_unlock_irqrestore(>pte_lock, flags);
+   dev_err(gart->dev, "Page entry is in-use\n");
+   return -EBUSY;
+   }
+   }
gart_set_pte(gart, iova, GART_PTE(pfn));
FLUSH_GART_REGS(gart);
spin_unlock_irqrestore(>pte_lock, flags);
@@ -515,7 +526,9 @@ static void __exit tegra_gart_exit(void)
 
 subsys_initcall(tegra_gart_init);
 module_exit(tegra_gart_exit);
+module_param(gart_debug, bool, 0644);
 
+MODULE_PARM_DESC(gart_debug, "Enable GART debugging");
 MODULE_DESCRIPTION("IOMMU API for GART in Tegra20");
 MODULE_AUTHOR("Hiroshi DOYU ");
 MODULE_ALIAS("platform:tegra-gart");
-- 
2.16.3



[PATCH v1 4/4] iommu/tegra: gart: Optimize map/unmap

2018-04-09 Thread Dmitry Osipenko
Currently GART writes one page entry at a time. More optimal would be to
aggregate the writes and flush BUS buffer in the end, this gives map/unmap
10-40% (depending on size of mapping) performance boost compared to a
flushing after each entry update.

Signed-off-by: Dmitry Osipenko 
---
 drivers/iommu/tegra-gart.c | 63 +++---
 1 file changed, 48 insertions(+), 15 deletions(-)

diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c
index 4a0607669d34..9f59f5f17661 100644
--- a/drivers/iommu/tegra-gart.c
+++ b/drivers/iommu/tegra-gart.c
@@ -36,7 +36,7 @@
 #define GART_APERTURE_SIZE SZ_32M
 
 /* bitmap of the page sizes currently supported */
-#define GART_IOMMU_PGSIZES (SZ_4K)
+#define GART_IOMMU_PGSIZES GENMASK(24, 12)
 
 #define GART_REG_BASE  0x24
 #define GART_CONFIG(0x24 - GART_REG_BASE)
@@ -269,25 +269,21 @@ static void gart_iommu_domain_free(struct iommu_domain 
*domain)
kfree(gart_domain);
 }
 
-static int gart_iommu_map(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t pa, size_t bytes, int prot)
+static int gart_iommu_map_page(struct gart_device *gart,
+  unsigned long iova,
+  phys_addr_t pa)
 {
-   struct gart_domain *gart_domain = to_gart_domain(domain);
-   struct gart_device *gart = gart_domain->gart;
unsigned long flags;
unsigned long pfn;
unsigned long pte;
 
-   if (!gart_iova_range_valid(gart, iova, bytes))
-   return -EINVAL;
-
-   spin_lock_irqsave(>pte_lock, flags);
pfn = __phys_to_pfn(pa);
if (!pfn_valid(pfn)) {
dev_err(gart->dev, "Invalid page: %pa\n", );
-   spin_unlock_irqrestore(>pte_lock, flags);
return -EINVAL;
}
+
+   spin_lock_irqsave(>pte_lock, flags);
if (gart_debug) {
pte = gart_read_pte(gart, iova);
if (pte & GART_ENTRY_PHYS_ADDR_VALID) {
@@ -297,8 +293,41 @@ static int gart_iommu_map(struct iommu_domain *domain, 
unsigned long iova,
}
}
gart_set_pte(gart, iova, GART_PTE(pfn));
+   spin_unlock_irqrestore(>pte_lock, flags);
+
+   return 0;
+}
+
+static int gart_iommu_map(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t pa, size_t bytes, int prot)
+{
+   struct gart_domain *gart_domain = to_gart_domain(domain);
+   struct gart_device *gart = gart_domain->gart;
+   size_t mapped;
+   int ret = -1;
+
+   if (!gart_iova_range_valid(gart, iova, bytes))
+   return -EINVAL;
+
+   for (mapped = 0; mapped < bytes; mapped += GART_PAGE_SIZE) {
+   ret = gart_iommu_map_page(gart, iova + mapped, pa + mapped);
+   if (ret)
+   break;
+   }
+
FLUSH_GART_REGS(gart);
+   return ret;
+}
+
+static int gart_iommu_unmap_page(struct gart_device *gart,
+unsigned long iova)
+{
+   unsigned long flags;
+
+   spin_lock_irqsave(>pte_lock, flags);
+   gart_set_pte(gart, iova, 0);
spin_unlock_irqrestore(>pte_lock, flags);
+
return 0;
 }
 
@@ -307,16 +336,20 @@ static size_t gart_iommu_unmap(struct iommu_domain 
*domain, unsigned long iova,
 {
struct gart_domain *gart_domain = to_gart_domain(domain);
struct gart_device *gart = gart_domain->gart;
-   unsigned long flags;
+   size_t unmapped;
+   int ret;
 
if (!gart_iova_range_valid(gart, iova, bytes))
return 0;
 
-   spin_lock_irqsave(>pte_lock, flags);
-   gart_set_pte(gart, iova, 0);
+   for (unmapped = 0; unmapped < bytes; unmapped += GART_PAGE_SIZE) {
+   ret = gart_iommu_unmap_page(gart, iova + unmapped);
+   if (ret)
+   break;
+   }
+
FLUSH_GART_REGS(gart);
-   spin_unlock_irqrestore(>pte_lock, flags);
-   return bytes;
+   return unmapped;
 }
 
 static phys_addr_t gart_iommu_iova_to_phys(struct iommu_domain *domain,
-- 
2.16.3



[PATCH v1 0/4] Tegra GART fixes and improvements

2018-04-09 Thread Dmitry Osipenko
GART driver wasn't ever been utilized in upstream, but finally this should
change sometime soon with Tegra's DRM driver rework. In general GART driver
works fine, though there are couple things that could be improved.

Dmitry Osipenko (4):
  iommu/tegra: gart: Add debugging facility
  iommu/tegra: gart: Fix gart_iommu_unmap()
  iommu/tegra: gart: Constify number of GART pages
  iommu/tegra: gart: Optimize map/unmap

 drivers/iommu/tegra-gart.c | 90 +++---
 1 file changed, 69 insertions(+), 21 deletions(-)

-- 
2.16.3



[PATCH v1 2/4] iommu/tegra: gart: Fix gart_iommu_unmap()

2018-04-09 Thread Dmitry Osipenko
It must return the number of unmapped bytes on success, returning 0 means
that unmapping failed and in result only one page is unmapped.

Signed-off-by: Dmitry Osipenko 
---
 drivers/iommu/tegra-gart.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c
index 4c0abdcd1ad2..89ec24c6952c 100644
--- a/drivers/iommu/tegra-gart.c
+++ b/drivers/iommu/tegra-gart.c
@@ -313,7 +313,7 @@ static size_t gart_iommu_unmap(struct iommu_domain *domain, 
unsigned long iova,
gart_set_pte(gart, iova, 0);
FLUSH_GART_REGS(gart);
spin_unlock_irqrestore(>pte_lock, flags);
-   return 0;
+   return bytes;
 }
 
 static phys_addr_t gart_iommu_iova_to_phys(struct iommu_domain *domain,
-- 
2.16.3



[PATCH v3 3/3] usb: phy: Add Kconfig entry for Tegra PHY driver

2018-04-09 Thread Dmitry Osipenko
Tegra's EHCI driver has a build dependency on Tegra's PHY driver and
currently Tegra's PHY driver is built only when Tegra's EHCI driver is
built. Add own Kconfig entry for the Tegra's PHY driver so that drivers
other than ehci-tegra (like ChipIdea UDC) could work with ehci-tegra
driver being disabled in kernels config by allowing user to manually
select the PHY driver.

Signed-off-by: Dmitry Osipenko 
---
 drivers/usb/host/Kconfig | 4 +---
 drivers/usb/phy/Kconfig  | 9 +
 drivers/usb/phy/Makefile | 2 +-
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/host/Kconfig b/drivers/usb/host/Kconfig
index 5d958da8e1bc..9f0aeb068acb 100644
--- a/drivers/usb/host/Kconfig
+++ b/drivers/usb/host/Kconfig
@@ -234,9 +234,7 @@ config USB_EHCI_TEGRA
tristate "NVIDIA Tegra HCD support"
depends on ARCH_TEGRA
select USB_EHCI_ROOT_HUB_TT
-   select USB_PHY
-   select USB_ULPI
-   select USB_ULPI_VIEWPORT
+   select USB_TEGRA_PHY
help
  This driver enables support for the internal USB Host Controllers
  found in NVIDIA Tegra SoCs. The controllers are EHCI compliant.
diff --git a/drivers/usb/phy/Kconfig b/drivers/usb/phy/Kconfig
index 0f8ab981d572..b9b0a44be679 100644
--- a/drivers/usb/phy/Kconfig
+++ b/drivers/usb/phy/Kconfig
@@ -159,6 +159,15 @@ config USB_MXS_PHY
 
  MXS Phy is used by some of the i.MX SoCs, for example imx23/28/6x.
 
+config USB_TEGRA_PHY
+   tristate "NVIDIA Tegra USB PHY Driver"
+   depends on ARCH_TEGRA
+   select USB_PHY
+   select USB_ULPI
+   help
+ This driver provides PHY support for the USB controllers found
+ on NVIDIA Tegra SoC's.
+
 config USB_ULPI
bool "Generic ULPI Transceiver Driver"
depends on ARM || ARM64
diff --git a/drivers/usb/phy/Makefile b/drivers/usb/phy/Makefile
index 25e579fb92b8..df1d99010079 100644
--- a/drivers/usb/phy/Makefile
+++ b/drivers/usb/phy/Makefile
@@ -16,7 +16,7 @@ obj-$(CONFIG_AM335X_CONTROL_USB)  += phy-am335x-control.o
 obj-$(CONFIG_AM335X_PHY_USB)   += phy-am335x.o
 obj-$(CONFIG_OMAP_OTG) += phy-omap-otg.o
 obj-$(CONFIG_TWL6030_USB)  += phy-twl6030-usb.o
-obj-$(CONFIG_USB_EHCI_TEGRA)   += phy-tegra-usb.o
+obj-$(CONFIG_USB_TEGRA_PHY)+= phy-tegra-usb.o
 obj-$(CONFIG_USB_GPIO_VBUS)+= phy-gpio-vbus-usb.o
 obj-$(CONFIG_USB_ISP1301)  += phy-isp1301.o
 obj-$(CONFIG_USB_MV_OTG)   += phy-mv-usb.o
-- 
2.16.3



<    6   7   8   9   10   11   12   13   14   15   >