Re: [4.4-RT PATCH RFC/RFT] drivers: net: cpsw: mark rx/tx irq as IRQF_NO_THREAD

2017-04-07 Thread Grygorii Strashko
Hi Sebastian,

On 09/09/2016 07:46 AM, Grygorii Strashko wrote:
> On 09/08/2016 07:00 PM, Sebastian Andrzej Siewior wrote:
>> On 2016-08-19 17:29:16 [+0300], Grygorii Strashko wrote:
>>> I've collected trace before first occurrence of  "NOHZ: 
>>> local_softirq_pending 80"
>>>
> 
>>
>>>  irq/354-4848400-85[000]90.642393: softirq_exit: vec=3 
>>> [action=NET_RX]
>>>  irq/354-4848400-85[000]90.642419: sched_switch: 
>>> irq/354-4848400:85 [49] S ==> rcuc/0:11 [98]
>>
>> We don't serve TIMER & SCHED because those two are pushed to the
>> ksoftirq thread(s). So we keep mostly doing NET_RX and now we switch to
>> the next best thing which is RCU.
>>
>>>   rcuc/0-11[000]90.642430: irq_handler_entry:irq=354 
>>> name=48484000.ethernet
>> but not for long.
>>
>>>   rcuc/0-11[000]90.642432: irq_handler_exit: irq=354 
>>> ret=handled
>>>   rcuc/0-11[000]90.642435: sched_waking: 
>>> comm=irq/354-4848400 pid=85 prio=49 target_cpu=000
>>>   rcuc/0-11[000]90.642442: sched_migrate_task:   
>>> comm=irq/354-4848400 pid=85 prio=49 orig_cpu=0 dest_cpu=1
>>>   rcuc/0-11[000]90.642453: sched_wakeup: 
>>> irq/354-4848400:85 [49] success=1 CPU:001
>>>iperf-1284  [001]90.642462: sched_stat_runtime:   comm=iperf 
>>> pid=1284 runtime=113053 [ns] vruntime=2106997666 [ns]
>>>   rcuc/0-11[000]90.642464: irq_handler_entry:irq=355 
>>> name=48484000.ethernet
>>>   rcuc/0-11[000]90.642466: irq_handler_exit: irq=355 
>>> ret=handled
>>>   rcuc/0-11[000]90.642469: sched_waking: 
>>> comm=irq/355-4848400 pid=86 prio=49 target_cpu=001
>>>iperf-1284  [001]90.642473: sched_switch: iperf:1284 
>>> [120] R ==> irq/354-4848400:85 [49]
>>>  irq/354-4848400-85[001]90.642481: softirq_raise:vec=3 
>>> [action=NET_RX]
>>>   rcuc/0-11[000]90.642483: sched_wakeup: 
>>> irq/355-4848400:86 [49] success=1 CPU:001
>>>  irq/354-4848400-85[001]90.642493: softirq_entry:vec=3 
>>> [action=NET_RX]
>>>   rcuc/0-11[000]90.642497: sched_migrate_task:   
>>> comm=irq/355-4848400 pid=86 prio=49 orig_cpu=1 dest_cpu=0
>> ach that IRQ thread no pinned. Good. We migrate.
>>
> 
> It looks like scheduler playing ping-pong between CPUs with threaded irqs 
> irq/354-355.
> And seems this might be the case - if I pin both threaded IRQ handlers to CPU0
> I can see better latency and netperf improvement
> cyclictest -m -Sp98 -q  -D4m
> T: 0 ( 1318) P:98 I:1000 C: 24 Min:  9 Act:   14 Avg:   15 Max:  
> 42
> T: 1 ( 1319) P:98 I:1500 C: 159909 Min:  9 Act:   14 Avg:   16 Max:  
> 39
> 
> if I arrange hwirqs  and pin pin both threaded IRQ handlers on CPU1 
> I can observe more less similar results as with this patch. 
> 
> 
>>>   rcuc/0-11[000]90.642515: irq_handler_entry:irq=354 
>>> name=48484000.ethernet
>>>   rcuc/0-11[000]90.642516: irq_handler_exit: irq=354 
>>> ret=handled
> 
>>
>> As you see ksoftirqd left the CPU with a D so I would assume it is
>> blocked on a lock and waits. 
>> NET_RX is in progress but scheduled out due to RCUC which is also
>> scheduled out.
>>
>> I assume we got to softirq because nothing else can run. It will see
>> that NET_RX is pending and tries it but blocks on the lock
>> (lock_softirq()). It schedules out. Nothing left -> idle.
>>
>> The idle code checks to see if a softirq is pending and in fact there is
>> SCHED on the list and ksoftirq was about to handle it but due to
>> ordering complication (NET_RX before SCHED) it can't. And we have the
>> warning.
>>
>> This 
>>
>> --- a/kernel/softirq.c
>> +++ b/kernel/softirq.c
>> @@ -105,6 +105,7 @@ void softirq_check_pending_idle(void)
>>  {
>>  static int rate_limit;
>>  struct softirq_runner *sr = this_cpu_ptr(&softirq_runners);
>> +struct task_struct *ksoft_tsk = __this_cpu_read(ksoftirqd);
>>  u32 warnpending;
>>  int i;
>>  
>> @@ -112,7 +113,7 @@ void softirq_check_pending_idle(void)
>>  return;
>>  
>>  warnpending = local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK;
>> -for (i = 0; i &l

[PATCH] net: phy: micrel: KSZ9031: fix crash when statistic requested

2017-04-10 Thread Grygorii Strashko
Now the command:
ethtool --phy-statistics eth0
will cause system crash with meassage "Unable to handle kernel NULL pointer
dereference at virtual address 0010" from:

 (kszphy_get_stats) from [] (ethtool_get_phy_stats+0xd8/0x210)
 (ethtool_get_phy_stats) from [] (dev_ethtool+0x5b8/0x228c)
 (dev_ethtool) from [] (dev_ioctl+0x3fc/0x964)
 (dev_ioctl) from [] (sock_ioctl+0x170/0x2c0)
 (sock_ioctl) from [] (do_vfs_ioctl+0xa8/0x95c)
 (do_vfs_ioctl) from [] (SyS_ioctl+0x3c/0x64)
 (SyS_ioctl) from [] (ret_fast_syscall+0x0/0x44)

The reason: phy_driver structure for KSZ9031 phy has no .probe() callback
defined. As result, struct phy_device *phydev->priv pointer will not be
initializes (null).

Fix it by adding additional checks for !phydev->priv in
kszphy_get_stats(), kszphy_get_strings() and kszphy_get_sset_count()

Fixes: 2b2427d06426 ("phy: micrel: Add ethtool statistics counters")
Signed-off-by: Grygorii Strashko 
---
 drivers/net/phy/micrel.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 6742070..8dbc1be 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -574,7 +574,6 @@ static int ksz9031_config_init(struct phy_device *phydev)
MII_KSZ9031RN_TX_DATA_PAD_SKEW, 4,
tx_data_skews, 4);
}
-
return ksz9031_center_flp_timing(phydev);
 }
 
@@ -654,6 +653,9 @@ ksz9021_wr_mmd_phyreg(struct phy_device *phydev, int ptrad, 
int devnum,
 
 static int kszphy_get_sset_count(struct phy_device *phydev)
 {
+   if (!phydev->priv)
+   return -EOPNOTSUPP;
+
return ARRAY_SIZE(kszphy_hw_stats);
 }
 
@@ -661,6 +663,9 @@ static void kszphy_get_strings(struct phy_device *phydev, 
u8 *data)
 {
int i;
 
+   if (!phydev->priv)
+   return;
+
for (i = 0; i < ARRAY_SIZE(kszphy_hw_stats); i++) {
memcpy(data + i * ETH_GSTRING_LEN,
   kszphy_hw_stats[i].string, ETH_GSTRING_LEN);
@@ -694,6 +699,9 @@ static void kszphy_get_stats(struct phy_device *phydev,
 {
int i;
 
+   if (!phydev->priv)
+   return;
+
for (i = 0; i < ARRAY_SIZE(kszphy_hw_stats); i++)
data[i] = kszphy_get_stat(phydev, i);
 }
-- 
2.10.1



Re: [PATCH] net: phy: micrel: KSZ9031: fix crash when statistic requested

2017-04-11 Thread Grygorii Strashko


On 04/10/2017 06:40 PM, Florian Fainelli wrote:
> On 04/10/2017 04:33 PM, Grygorii Strashko wrote:
>> Now the command:
>>  ethtool --phy-statistics eth0
>> will cause system crash with meassage "Unable to handle kernel NULL pointer
>> dereference at virtual address 0010" from:
>>
>>  (kszphy_get_stats) from [] (ethtool_get_phy_stats+0xd8/0x210)
>>  (ethtool_get_phy_stats) from [] (dev_ethtool+0x5b8/0x228c)
>>  (dev_ethtool) from [] (dev_ioctl+0x3fc/0x964)
>>  (dev_ioctl) from [] (sock_ioctl+0x170/0x2c0)
>>  (sock_ioctl) from [] (do_vfs_ioctl+0xa8/0x95c)
>>  (do_vfs_ioctl) from [] (SyS_ioctl+0x3c/0x64)
>>  (SyS_ioctl) from [] (ret_fast_syscall+0x0/0x44)
>>
>> The reason: phy_driver structure for KSZ9031 phy has no .probe() callback
>> defined. As result, struct phy_device *phydev->priv pointer will not be
>> initializes (null).
> 
> This is a strange way to fix the problem, presumably this PHY supports
> fetching statistics, if that is the case it sounds like we would want to
> sort of provide two probe function:
> 
> - one which is just allocating the PHY device's private structure so we
> have enough room for statistics
> - another one which is doing all the reference clock fetching and so on
> 
> By adding a NULL pointer check here, you'd be better off just removing
> all the function pointers pertaining to ethtool statistics.

I've considered all this option, honestly, but I do not know if this
phys (issue should affect KSZ886X, KSZ8873MLL, KSZ9031, KSZ9021, KSZ8061, 
KS8737)
support fetching statistics or not and was hoping to get feedback from 
community.

Simples way for me was to block statistic callbacks for them,
but I, of course, can just remove those callback for above phys if this is 
acceptable.

> 
>>
>> Fix it by adding additional checks for !phydev->priv in
>> kszphy_get_stats(), kszphy_get_strings() and kszphy_get_sset_count()
>>
>> Fixes: 2b2427d06426 ("phy: micrel: Add ethtool statistics counters")
>> Signed-off-by: Grygorii Strashko 
>> ---
>>  drivers/net/phy/micrel.c | 10 +-
>>  1 file changed, 9 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
>> index 6742070..8dbc1be 100644
>> --- a/drivers/net/phy/micrel.c
>> +++ b/drivers/net/phy/micrel.c
>> @@ -574,7 +574,6 @@ static int ksz9031_config_init(struct phy_device *phydev)
>>  MII_KSZ9031RN_TX_DATA_PAD_SKEW, 4,
>>  tx_data_skews, 4);
>>  }
>> -
>>  return ksz9031_center_flp_timing(phydev);
>>  }
>>  
>> @@ -654,6 +653,9 @@ ksz9021_wr_mmd_phyreg(struct phy_device *phydev, int 
>> ptrad, int devnum,
>>  
>>  static int kszphy_get_sset_count(struct phy_device *phydev)
>>  {
>> +if (!phydev->priv)
>> +return -EOPNOTSUPP;
>> +
>>  return ARRAY_SIZE(kszphy_hw_stats);
>>  }
>>  
>> @@ -661,6 +663,9 @@ static void kszphy_get_strings(struct phy_device 
>> *phydev, u8 *data)
>>  {
>>  int i;
>>  
>> +if (!phydev->priv)
>> +return;
>> +
>>  for (i = 0; i < ARRAY_SIZE(kszphy_hw_stats); i++) {
>>  memcpy(data + i * ETH_GSTRING_LEN,
>> kszphy_hw_stats[i].string, ETH_GSTRING_LEN);
>> @@ -694,6 +699,9 @@ static void kszphy_get_stats(struct phy_device *phydev,
>>  {
>>  int i;
>>  
>> +if (!phydev->priv)
>> +return;
>> +
>>  for (i = 0; i < ARRAY_SIZE(kszphy_hw_stats); i++)
>>  data[i] = kszphy_get_stat(phydev, i);
>>  }
>>
> 
> 

-- 
regards,
-grygorii


Re: [PATCH] net: phy: micrel: KSZ9031: fix crash when statistic requested

2017-04-11 Thread Grygorii Strashko



On 04/11/2017 11:36 AM, Andrew Lunn wrote:

I've considered all this option, honestly, but I do not know if this
phys (issue should affect KSZ886X, KSZ8873MLL, KSZ9031, KSZ9021, KSZ8061, 
KS8737)
support fetching statistics or not and was hoping to get feedback from 
community.


Hi Grygorii

Microchip are one of the good guys. All there datesheets are open. So
you should be able to check this.



Below is what i've found
0xa 0x15 action
KS8737  -   +remove stats? - reg 10(0xa) reserved
KSZ8061MNX/MNG  -   +remove stats? - reg 10(0xa) reserved
KSZ9021GN   +   +add .probe()
KSZ9031 +   +add .probe()
KSZ8873MLL/FLL/RLL  -   -remove stats
KSZ886X/KSZ8862 -   -remove stats

The kszphy_probe() can be reused as .probe() callback as it seems
not phy specific.


--
regards,
-grygorii


[PATCH v2] net: phy: micrel: fix crash when statistic requested for KSZ9031 phy

2017-04-12 Thread Grygorii Strashko
Now the command:
ethtool --phy-statistics eth0
will cause system crash with meassage "Unable to handle kernel NULL pointer
dereference at virtual address 0010" from:

 (kszphy_get_stats) from [] (ethtool_get_phy_stats+0xd8/0x210)
 (ethtool_get_phy_stats) from [] (dev_ethtool+0x5b8/0x228c)
 (dev_ethtool) from [] (dev_ioctl+0x3fc/0x964)
 (dev_ioctl) from [] (sock_ioctl+0x170/0x2c0)
 (sock_ioctl) from [] (do_vfs_ioctl+0xa8/0x95c)
 (do_vfs_ioctl) from [] (SyS_ioctl+0x3c/0x64)
 (SyS_ioctl) from [] (ret_fast_syscall+0x0/0x44)

The reason: phy_driver structure for KSZ9031 phy has no .probe() callback
defined. As result, struct phy_device *phydev->priv pointer will not be
initializes (null).
This issue will affect also following phys:
 KSZ8795, KSZ886X, KSZ8873MLL, KSZ9031, KSZ9021, KSZ8061, KS8737

Fix it by:
- adding .probe() = kszphy_probe() callback to KSZ9031, KSZ9021
phys. The kszphy_probe() can be re-used as it doesn't do any phy specific
settings.
- removing statistic callbacks from other phys (KSZ8795, KSZ886X,
KSZ8873MLL, KSZ8061, KS8737) as they doesn't have corresponding
statistic counters.

Fixes: 2b2427d06426 ("phy: micrel: Add ethtool statistics counters")
Signed-off-by: Grygorii Strashko 
---
changes in v2:
 - probe callback added to KSZ9031, KSZ9021
 - statistic callback removed from KSZ8795, KSZ886X, KSZ8873MLL, KSZ8061, KS8737

Link on v1:
 https://lkml.org/lkml/2017/4/10/1183

 drivers/net/phy/micrel.c | 18 ++
 1 file changed, 2 insertions(+), 16 deletions(-)

diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 6742070..6f207e6 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -574,7 +574,6 @@ static int ksz9031_config_init(struct phy_device *phydev)
MII_KSZ9031RN_TX_DATA_PAD_SKEW, 4,
tx_data_skews, 4);
}
-
return ksz9031_center_flp_timing(phydev);
 }
 
@@ -798,9 +797,6 @@ static struct phy_driver ksphy_driver[] = {
.read_status= genphy_read_status,
.ack_interrupt  = kszphy_ack_interrupt,
.config_intr= kszphy_config_intr,
-   .get_sset_count = kszphy_get_sset_count,
-   .get_strings= kszphy_get_strings,
-   .get_stats  = kszphy_get_stats,
.suspend= genphy_suspend,
.resume = genphy_resume,
 }, {
@@ -940,9 +936,6 @@ static struct phy_driver ksphy_driver[] = {
.read_status= genphy_read_status,
.ack_interrupt  = kszphy_ack_interrupt,
.config_intr= kszphy_config_intr,
-   .get_sset_count = kszphy_get_sset_count,
-   .get_strings= kszphy_get_strings,
-   .get_stats  = kszphy_get_stats,
.suspend= genphy_suspend,
.resume = genphy_resume,
 }, {
@@ -952,6 +945,7 @@ static struct phy_driver ksphy_driver[] = {
.features   = PHY_GBIT_FEATURES,
.flags  = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
.driver_data= &ksz9021_type,
+   .probe  = kszphy_probe,
.config_init= ksz9021_config_init,
.config_aneg= genphy_config_aneg,
.read_status= genphy_read_status,
@@ -971,6 +965,7 @@ static struct phy_driver ksphy_driver[] = {
.features   = PHY_GBIT_FEATURES,
.flags  = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
.driver_data= &ksz9021_type,
+   .probe  = kszphy_probe,
.config_init= ksz9031_config_init,
.config_aneg= genphy_config_aneg,
.read_status= ksz9031_read_status,
@@ -989,9 +984,6 @@ static struct phy_driver ksphy_driver[] = {
.config_init= kszphy_config_init,
.config_aneg= ksz8873mll_config_aneg,
.read_status= ksz8873mll_read_status,
-   .get_sset_count = kszphy_get_sset_count,
-   .get_strings= kszphy_get_strings,
-   .get_stats  = kszphy_get_stats,
.suspend= genphy_suspend,
.resume = genphy_resume,
 }, {
@@ -1003,9 +995,6 @@ static struct phy_driver ksphy_driver[] = {
.config_init= kszphy_config_init,
.config_aneg= genphy_config_aneg,
.read_status= genphy_read_status,
-   .get_sset_count = kszphy_get_sset_count,
-   .get_strings= kszphy_get_strings,
-   .get_stats  = kszphy_get_stats,
.suspend= genphy_suspend,
.resume = genphy_resume,
 }, {
@@ -1017,9 +1006,6 @@ static struct phy_driver ksphy_driver[] = {
.config_init= kszphy_config_init,
.config_aneg= ksz8873mll_config_aneg,
.read_status= ksz8873mll_read_status,
-   .get_sset_count = kszphy_get_sset_count,
-   .get_strings= kszphy_get_strings,
-   .get_stats  = kszphy_get_stats,
.suspend= genphy_suspend,
.resume = genphy_resume,
 } };
-- 
2.10.1



Re: [PATCH v2] net: phy: micrel: fix crash when statistic requested for KSZ9031 phy

2017-04-13 Thread Grygorii Strashko



On 04/13/2017 01:51 PM, Andrew Lunn wrote:

On Wed, Apr 12, 2017 at 05:55:10PM -0500, Grygorii Strashko wrote:

Now the command:
ethtool --phy-statistics eth0
will cause system crash with meassage "Unable to handle kernel NULL pointer
dereference at virtual address 0010" from:

 (kszphy_get_stats) from [] (ethtool_get_phy_stats+0xd8/0x210)
 (ethtool_get_phy_stats) from [] (dev_ethtool+0x5b8/0x228c)
 (dev_ethtool) from [] (dev_ioctl+0x3fc/0x964)
 (dev_ioctl) from [] (sock_ioctl+0x170/0x2c0)
 (sock_ioctl) from [] (do_vfs_ioctl+0xa8/0x95c)
 (do_vfs_ioctl) from [] (SyS_ioctl+0x3c/0x64)
 (SyS_ioctl) from [] (ret_fast_syscall+0x0/0x44)

The reason: phy_driver structure for KSZ9031 phy has no .probe() callback
defined. As result, struct phy_device *phydev->priv pointer will not be
initializes (null).
This issue will affect also following phys:
 KSZ8795, KSZ886X, KSZ8873MLL, KSZ9031, KSZ9021, KSZ8061, KS8737

Fix it by:
- adding .probe() = kszphy_probe() callback to KSZ9031, KSZ9021
phys. The kszphy_probe() can be re-used as it doesn't do any phy specific
settings.
- removing statistic callbacks from other phys (KSZ8795, KSZ886X,
KSZ8873MLL, KSZ8061, KS8737) as they doesn't have corresponding
statistic counters.

Fixes: 2b2427d06426 ("phy: micrel: Add ethtool statistics counters")
Signed-off-by: Grygorii Strashko 
---
changes in v2:
 - probe callback added to KSZ9031, KSZ9021
 - statistic callback removed from KSZ8795, KSZ886X, KSZ8873MLL, KSZ8061, KS8737

Link on v1:
 https://lkml.org/lkml/2017/4/10/1183

 drivers/net/phy/micrel.c | 18 ++
 1 file changed, 2 insertions(+), 16 deletions(-)

diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 6742070..6f207e6 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -574,7 +574,6 @@ static int ksz9031_config_init(struct phy_device *phydev)
MII_KSZ9031RN_TX_DATA_PAD_SKEW, 4,
tx_data_skews, 4);
}
-
return ksz9031_center_flp_timing(phydev);
 }


Hi Grygorii

Whitespace changed like this should be in a separate patch, or not
made at all.



Oh. sry i've missed it. Will resend




Otherwise, thanks for looking at the datasheets and fixing this up.

Reviewed-by: Andrew Lunn 



--
regards,
-grygorii


[PATCH v3] net: phy: micrel: fix crash when statistic requested for KSZ9031 phy

2017-04-13 Thread Grygorii Strashko
Now the command:
ethtool --phy-statistics eth0
will cause system crash with meassage "Unable to handle kernel NULL pointer
dereference at virtual address 0010" from:

 (kszphy_get_stats) from [] (ethtool_get_phy_stats+0xd8/0x210)
 (ethtool_get_phy_stats) from [] (dev_ethtool+0x5b8/0x228c)
 (dev_ethtool) from [] (dev_ioctl+0x3fc/0x964)
 (dev_ioctl) from [] (sock_ioctl+0x170/0x2c0)
 (sock_ioctl) from [] (do_vfs_ioctl+0xa8/0x95c)
 (do_vfs_ioctl) from [] (SyS_ioctl+0x3c/0x64)
 (SyS_ioctl) from [] (ret_fast_syscall+0x0/0x44)

The reason: phy_driver structure for KSZ9031 phy has no .probe() callback
defined. As result, struct phy_device *phydev->priv pointer will not be
initializes (null).
This issue will affect also following phys:
 KSZ8795, KSZ886X, KSZ8873MLL, KSZ9031, KSZ9021, KSZ8061, KS8737

Fix it by:
- adding .probe() = kszphy_probe() callback to KSZ9031, KSZ9021
phys. The kszphy_probe() can be re-used as it doesn't do any phy specific
settings.
- removing statistic callbacks from other phys (KSZ8795, KSZ886X,
KSZ8873MLL, KSZ8061, KS8737) as they doesn't have corresponding
statistic counters.

Fixes: 2b2427d06426 ("phy: micrel: Add ethtool statistics counters")
Signed-off-by: Grygorii Strashko 
Reviewed-by: Andrew Lunn 
---
changes in v3:
- occasional whitespace change removed

changes in v2:
 - probe callback added to KSZ9031, KSZ9021
 - statistic callback removed from KSZ8795, KSZ886X, KSZ8873MLL, KSZ8061, KS8737

Links
v2: https://patchwork.ozlabs.org/patch/750194/
v1: https://lkml.org/lkml/2017/4/10/1183

 drivers/net/phy/micrel.c | 17 ++---
 1 file changed, 2 insertions(+), 15 deletions(-)

diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 6742070..1326d99 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -798,9 +798,6 @@ static struct phy_driver ksphy_driver[] = {
.read_status= genphy_read_status,
.ack_interrupt  = kszphy_ack_interrupt,
.config_intr= kszphy_config_intr,
-   .get_sset_count = kszphy_get_sset_count,
-   .get_strings= kszphy_get_strings,
-   .get_stats  = kszphy_get_stats,
.suspend= genphy_suspend,
.resume = genphy_resume,
 }, {
@@ -940,9 +937,6 @@ static struct phy_driver ksphy_driver[] = {
.read_status= genphy_read_status,
.ack_interrupt  = kszphy_ack_interrupt,
.config_intr= kszphy_config_intr,
-   .get_sset_count = kszphy_get_sset_count,
-   .get_strings= kszphy_get_strings,
-   .get_stats  = kszphy_get_stats,
.suspend= genphy_suspend,
.resume = genphy_resume,
 }, {
@@ -952,6 +946,7 @@ static struct phy_driver ksphy_driver[] = {
.features   = PHY_GBIT_FEATURES,
.flags  = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
.driver_data= &ksz9021_type,
+   .probe  = kszphy_probe,
.config_init= ksz9021_config_init,
.config_aneg= genphy_config_aneg,
.read_status= genphy_read_status,
@@ -971,6 +966,7 @@ static struct phy_driver ksphy_driver[] = {
.features   = PHY_GBIT_FEATURES,
.flags  = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
.driver_data= &ksz9021_type,
+   .probe  = kszphy_probe,
.config_init= ksz9031_config_init,
.config_aneg= genphy_config_aneg,
.read_status= ksz9031_read_status,
@@ -989,9 +985,6 @@ static struct phy_driver ksphy_driver[] = {
.config_init= kszphy_config_init,
.config_aneg= ksz8873mll_config_aneg,
.read_status= ksz8873mll_read_status,
-   .get_sset_count = kszphy_get_sset_count,
-   .get_strings= kszphy_get_strings,
-   .get_stats  = kszphy_get_stats,
.suspend= genphy_suspend,
.resume = genphy_resume,
 }, {
@@ -1003,9 +996,6 @@ static struct phy_driver ksphy_driver[] = {
.config_init= kszphy_config_init,
.config_aneg= genphy_config_aneg,
.read_status= genphy_read_status,
-   .get_sset_count = kszphy_get_sset_count,
-   .get_strings= kszphy_get_strings,
-   .get_stats  = kszphy_get_stats,
.suspend= genphy_suspend,
.resume = genphy_resume,
 }, {
@@ -1017,9 +1007,6 @@ static struct phy_driver ksphy_driver[] = {
.config_init= kszphy_config_init,
.config_aneg= ksz8873mll_config_aneg,
.read_status= ksz8873mll_read_status,
-   .get_sset_count = kszphy_get_sset_count,
-   .get_strings= kszphy_get_strings,
-   .get_stats  = kszphy_get_stats,
.suspend= genphy_suspend,
.resume = genphy_resume,
 } };
-- 
2.10.1



[PATCH] net: ethernet: ti: cpsw: fix NULL pointer dereference in switch mode

2017-01-31 Thread Grygorii Strashko
In switch mode on struct cpsw_slave->ndev field will be initialized with
proper value only for the one cpsw slave port, as result
cpsw_get_usage_count() will generate "Unable to handle kernel NULL pointer
dereference" exception when first ethernet interface is opening
cpsw_ndo_open(). This issue causes boot regression on AM335x EVM and
reproducible on am57xx-evm (switch mode).
Fix it by adding additional check for !cpsw->slaves[i].ndev in
cpsw_get_usage_count().

Cc: Ivan Khoronzhuk 
fixes: 03fd01ad0eea ("net: ethernet: ti: cpsw: don't duplicate ndev_running")
Signed-off-by: Grygorii Strashko 
---
 drivers/net/ethernet/ti/cpsw.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 67b7323..35a95dc 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -677,7 +677,7 @@ static int cpsw_get_usage_count(struct cpsw_common *cpsw)
u32 usage_count = 0;
 
for (i = 0; i < cpsw->data.slaves; i++)
-   if (netif_running(cpsw->slaves[i].ndev))
+   if (cpsw->slaves[i].ndev && netif_running(cpsw->slaves[i].ndev))
usage_count++;
 
return usage_count;
-- 
2.10.1.dirty



Re: [PATCH 2/2] ARM: dts: am335x-icev2: Add CPSW ethernet0 and ethernet1

2017-03-13 Thread Grygorii Strashko



On 03/13/2017 08:42 AM, Roger Quadros wrote:

Enable the 2 ethernet ports as CPSW ports in dual-mac mode

Signed-off-by: Roger Quadros 
[nsek...@ti.com: use AM33XX_IOPAD()]
Signed-off-by: Sekhar Nori 
---
 arch/arm/boot/dts/am335x-icev2.dts | 113 +
 1 file changed, 113 insertions(+)

diff --git a/arch/arm/boot/dts/am335x-icev2.dts 
b/arch/arm/boot/dts/am335x-icev2.dts
index a2ad076..cc343b0 100644
--- a/arch/arm/boot/dts/am335x-icev2.dts
+++ b/arch/arm/boot/dts/am335x-icev2.dts
@@ -201,6 +201,69 @@
AM33XX_IOPAD(0x938, PIN_OUTPUT_PULLUP | MUX_MODE1) /* 
(L16) gmii1_rxd2.uart3_txd */
>;
};
+




 &i2c0 {
@@ -350,3 +413,53 @@
pinctrl-0 = <&uart3_pins_default>;
status = "okay";
 };
+
+&gpio3 {
+   p4 {
+   gpio-hog;
+   gpios = <4 GPIO_ACTIVE_HIGH>;
+   output-high;
+   line-name = "PR1_MII_CTRL";
+   };
+
+   p10 {
+   gpio-hog;
+   gpios = <10 GPIO_ACTIVE_HIGH>;
+   /* ETH1 mux: Low for MII-PRU, high for RMII-CPSW */
+   output-high;
+   line-name = "MUX_MII_CTL1";
+   };
+};
+
+&cpsw_emac0 {
+   phy_id = <&davinci_mdio>, <1>;


this is deprecated definition.
pls, use phy-handle.


+   phy-mode = "rmii";
+   dual_emac_res_vlan = <1>;
+};
+
+&cpsw_emac1 {
+   phy_id = <&davinci_mdio>, <3>;


same


+   phy-mode = "rmii";
+   dual_emac_res_vlan = <2>;
+};
+
+&mac {
+   pinctrl-names = "default", "sleep";
+   pinctrl-0 = <&cpsw_default>;
+   pinctrl-1 = <&cpsw_sleep>;
+   status = "okay";
+   dual_emac;
+};
+
+&phy_sel {
+   rmii-clock-ext;
+};
+
+&davinci_mdio {
+   pinctrl-names = "default", "sleep";
+   pinctrl-0 = <&davinci_mdio_default>;
+   pinctrl-1 = <&davinci_mdio_sleep>;
+   status = "okay";
+   reset-gpios = <&gpio2 5 GPIO_ACTIVE_LOW>;
+   reset-delay-us = <2>;   /* PHY datasheet states 1uS min */
+};



--
regards,
-grygorii


Re: [PATCH 0/4] net: ethernet: ti: cpsw: correct common res usage

2017-01-09 Thread Grygorii Strashko

Hi Ivan,

On 01/08/2017 10:40 AM, Ivan Khoronzhuk wrote:

This series is intended to remove unneeded redundancies connected with
common resource usage function.

Based on net-next/master
Tested on am572x idk

Ivan Khoronzhuk (4):
  net: ethernet: ti: cpsw: remove dual check from common res usage
function
  net: ethernet: ti: cpsw: don't disable interrupts in ndo_open
  net: ethernet: ti: cpsw: don't duplicate ndev_running
  net: ethernet: ti: cpsw: don't duplicate common res in rx handler



thanks for the patches - I'll need some time to review them.


 drivers/net/ethernet/ti/cpsw.c | 57 ++
 1 file changed, 19 insertions(+), 38 deletions(-)



--
regards,
-grygorii


Re: [PATCH 3/4] net: ethernet: ti: cpsw: don't duplicate ndev_running

2017-01-09 Thread Grygorii Strashko


On 01/08/2017 10:41 AM, Ivan Khoronzhuk wrote:
> No need to create additional vars to identify if interface is running.
> So simplify code by removing redundant var and checking usage counter
> instead.
> 
> Signed-off-by: Ivan Khoronzhuk 
> ---
>  drivers/net/ethernet/ti/cpsw.c | 14 --
>  1 file changed, 4 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
> index 40d7fc9..daae87f 100644
> --- a/drivers/net/ethernet/ti/cpsw.c
> +++ b/drivers/net/ethernet/ti/cpsw.c
> @@ -357,7 +357,6 @@ struct cpsw_slave {
>   struct phy_device   *phy;
>   struct net_device   *ndev;
>   u32 port_vlan;
> - u32 open_stat;
>  };
>  
>  static inline u32 slave_read(struct cpsw_slave *slave, u32 offset)
> @@ -1241,7 +1240,7 @@ static int cpsw_common_res_usage_state(struct 
> cpsw_common *cpsw)
>   u32 usage_count = 0;
>  
>   for (i = 0; i < cpsw->data.slaves; i++)
> - if (cpsw->slaves[i].open_stat)
> + if (netif_running(cpsw->slaves[i].ndev))
>   usage_count++;

Not sure this will work as you expected, but may be I've missed smth :(

code in static int __dev_open(struct net_device *dev)
..
set_bit(__LINK_STATE_START, &dev->state);

if (ops->ndo_validate_addr)
ret = ops->ndo_validate_addr(dev);

if (!ret && ops->ndo_open)
ret = ops->ndo_open(dev);

netpoll_poll_enable(dev);

if (ret)
clear_bit(__LINK_STATE_START, &dev->state);
..

so, netif_running(ndev) will start returning true before calling 
ops->ndo_open(dev);

>  
>   return usage_count;
> @@ -1502,7 +1501,7 @@ static int cpsw_ndo_open(struct net_device *ndev)
>CPSW_RTL_VERSION(reg));
>  
>   /* initialize host and slave ports */
> - if (!cpsw_common_res_usage_state(cpsw))
> + if (cpsw_common_res_usage_state(cpsw) < 2)

Ah. You've changed the condition here.

I think it might be reasonable to hide this inside cpsw_common_res_usage_state()
and seems it can be renamed to smth like cpsw_is_running().


>   cpsw_init_host_port(priv);
>   for_each_slave(priv, cpsw_slave_open, priv);
>  
> @@ -1513,7 +1512,7 @@ static int cpsw_ndo_open(struct net_device *ndev)
>   cpsw_ale_add_vlan(cpsw->ale, cpsw->data.default_vlan,
> ALE_ALL_PORTS, ALE_ALL_PORTS, 0, 0);
>  
> - if (!cpsw_common_res_usage_state(cpsw)) {
> + if (cpsw_common_res_usage_state(cpsw) < 2) {
>   /* disable priority elevation */
>   __raw_writel(0, &cpsw->regs->ptype);
>  
> @@ -1556,9 +1555,6 @@ static int cpsw_ndo_open(struct net_device *ndev)
>   cpdma_ctlr_start(cpsw->dma);
>   cpsw_intr_enable(cpsw);
>  
> - if (cpsw->data.dual_emac)
> - cpsw->slaves[priv->emac_port].open_stat = true;
> -
>   return 0;
>  
>  err_cleanup:
> @@ -1578,7 +1574,7 @@ static int cpsw_ndo_stop(struct net_device *ndev)
>   netif_tx_stop_all_queues(priv->ndev);
>   netif_carrier_off(priv->ndev);
>  
> - if (cpsw_common_res_usage_state(cpsw) <= 1) {
> + if (!cpsw_common_res_usage_state(cpsw)) {

and here __LINK_STATE_START will be cleared before calling ops->ndo_stop(dev);
So, from one side netif_running(ndev) usage will simplify 
cpsw_common_res_usage_state() internals,
but from another side - it will make places where it's used even more entangled 
:( as for me,
because when cpsw_common_res_usage_state() will return 1 in cpsw_ndo_open() it 
will mean
"no interfaces is really running yet", but the same value 1 in cpsw_ndo_stop()
will mean "there are still one is running".

>   napi_disable(&cpsw->napi_rx);
>   napi_disable(&cpsw->napi_tx);
>   cpts_unregister(cpsw->cpts);
> @@ -1592,8 +1588,6 @@ static int cpsw_ndo_stop(struct net_device *ndev)
>   cpsw_split_res(ndev);
>  
>   pm_runtime_put_sync(cpsw->dev);
> - if (cpsw->data.dual_emac)
> - cpsw->slaves[priv->emac_port].open_stat = false;
>   return 0;
>  }
>  
> 

-- 
regards,
-grygorii


Re: [PATCH 2/2] ARM: dts: dra72-evm-revc: enable irqs for dp83867 eth phys

2017-01-09 Thread Grygorii Strashko



On 01/06/2017 03:54 PM, Tony Lindgren wrote:

* Grygorii Strashko  [170106 12:56]:

TI DRA72-EVM Rev C has two DP83867 ethernet phys which support IRQ
generation in case of phy/link status changes. The INT/PWDN lines from both
DP83867 phys are wired to DRA7 gpio6.16, so reflect the same in DT.


Hmm not seeing the patch 1/2 here.. Can this one be queued separately?
Is it for v4.11 or a fix?


This is for next (v4.11) and there is just a mistake in subj, Sry.


Signed-off-by: Grygorii Strashko 
---
 arch/arm/boot/dts/dra72-evm-revc.dts | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/dra72-evm-revc.dts 
b/arch/arm/boot/dts/dra72-evm-revc.dts
index c3d939c..3ecac56 100644
--- a/arch/arm/boot/dts/dra72-evm-revc.dts
+++ b/arch/arm/boot/dts/dra72-evm-revc.dts
@@ -68,6 +68,8 @@
ti,tx-internal-delay = ;
ti,fifo-depth = ;
ti,min-output-impedance;
+   interrupt-parent = <&gpio6>;
+   interrupts = <16 IRQ_TYPE_EDGE_FALLING>;
};

dp83867_1: ethernet-phy@3 {
@@ -75,6 +77,8 @@
ti,rx-internal-delay = ;
ti,tx-internal-delay = ;
ti,fifo-depth = ;
-   ti,min-output-imepdance;
+   ti,min-output-impedance;
+   interrupt-parent = <&gpio6>;
+   interrupts = <16 IRQ_TYPE_EDGE_FALLING>;
};
 };
--
2.10.1.dirty



--
regards,
-grygorii


Re: [PATCH 3/4] net: ethernet: ti: cpsw: don't duplicate ndev_running

2017-01-12 Thread Grygorii Strashko


On 01/10/2017 07:56 PM, Ivan Khoronzhuk wrote:
> On Mon, Jan 09, 2017 at 11:25:38AM -0600, Grygorii Strashko wrote:
>>
>>
>> On 01/08/2017 10:41 AM, Ivan Khoronzhuk wrote:
>>> No need to create additional vars to identify if interface is running.
>>> So simplify code by removing redundant var and checking usage counter
>>> instead.
>>>
>>> Signed-off-by: Ivan Khoronzhuk 
>>> ---
>>>  drivers/net/ethernet/ti/cpsw.c | 14 --
>>>  1 file changed, 4 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
>>> index 40d7fc9..daae87f 100644
>>> --- a/drivers/net/ethernet/ti/cpsw.c
>>> +++ b/drivers/net/ethernet/ti/cpsw.c
>>> @@ -357,7 +357,6 @@ struct cpsw_slave {
>>> struct phy_device   *phy;
>>> struct net_device   *ndev;
>>> u32 port_vlan;
>>> -   u32 open_stat;
>>>  };
>>>  
>>>  static inline u32 slave_read(struct cpsw_slave *slave, u32 offset)
>>> @@ -1241,7 +1240,7 @@ static int cpsw_common_res_usage_state(struct 
>>> cpsw_common *cpsw)
>>> u32 usage_count = 0;
>>>  
>>> for (i = 0; i < cpsw->data.slaves; i++)
>>> -   if (cpsw->slaves[i].open_stat)
>>> +   if (netif_running(cpsw->slaves[i].ndev))
>>> usage_count++;
>>
>> Not sure this will work as you expected, but may be I've missed smth :(
> I've changed conditions, will work.
> 
>>
>> code in static int __dev_open(struct net_device *dev)
>> ..
>>  set_bit(__LINK_STATE_START, &dev->state);
>>
>>  if (ops->ndo_validate_addr)
>>  ret = ops->ndo_validate_addr(dev);
>>
>>  if (!ret && ops->ndo_open)
>>  ret = ops->ndo_open(dev);
>>
>>  netpoll_poll_enable(dev);
>>
>>  if (ret)
>>  clear_bit(__LINK_STATE_START, &dev->state);
>> ..
>>
>> so, netif_running(ndev) will start returning true before calling 
>> ops->ndo_open(dev);
> Yes, It's done bearing it in mind of course.
> 
>>
>>>  
>>> return usage_count;
>>> @@ -1502,7 +1501,7 @@ static int cpsw_ndo_open(struct net_device *ndev)
>>>  CPSW_RTL_VERSION(reg));
>>>  
>>> /* initialize host and slave ports */
>>> -   if (!cpsw_common_res_usage_state(cpsw))
>>> +   if (cpsw_common_res_usage_state(cpsw) < 2)
>>
>> Ah. You've changed the condition here.
>>
>> I think it might be reasonable to hide this inside 
>> cpsw_common_res_usage_state()
>> and seems it can be renamed to smth like cpsw_is_running().
> It probably needs to be renamed to smth a little different,
> like cpsw_get_usage_count ...or cpsw_get_open_ndev_count

cpsw_get_usage_count () sounds good

> 
>>
>>
>>> cpsw_init_host_port(priv);
>>> for_each_slave(priv, cpsw_slave_open, priv);
>>>  
>>> @@ -1513,7 +1512,7 @@ static int cpsw_ndo_open(struct net_device *ndev)
>>> cpsw_ale_add_vlan(cpsw->ale, cpsw->data.default_vlan,
>>>   ALE_ALL_PORTS, ALE_ALL_PORTS, 0, 0);
>>>  
>>> -   if (!cpsw_common_res_usage_state(cpsw)) {
>>> +   if (cpsw_common_res_usage_state(cpsw) < 2) {
>>> /* disable priority elevation */
>>> __raw_writel(0, &cpsw->regs->ptype);
>>>  
>>> @@ -1556,9 +1555,6 @@ static int cpsw_ndo_open(struct net_device *ndev)
>>> cpdma_ctlr_start(cpsw->dma);
>>> cpsw_intr_enable(cpsw);
>>>  
>>> -   if (cpsw->data.dual_emac)
>>> -   cpsw->slaves[priv->emac_port].open_stat = true;
>>> -
>>> return 0;
>>>  
>>>  err_cleanup:
>>> @@ -1578,7 +1574,7 @@ static int cpsw_ndo_stop(struct net_device *ndev)
>>> netif_tx_stop_all_queues(priv->ndev);
>>> netif_carrier_off(priv->ndev);
>>>  
>>> -   if (cpsw_common_res_usage_state(cpsw) <= 1) {
>>> +   if (!cpsw_common_res_usage_state(cpsw)) {
>>
>> and here __LINK_STATE_START will be cleared before calling 
>> ops->ndo_stop(dev);
> Actually it's changed because of it.
> 
>> So, from one side netif_running(ndev) usage will simplify 
>> cpsw_common_res_usage_state() internals,
>>

[PATCH] ARM: dts: am57xx-beagle-x15: implement errata "Ethernet RGMII2 Limited to 10/100 Mbps"

2017-01-12 Thread Grygorii Strashko
According to errata i880 description the speed of Ethernet port 1 on AM572x
SoCs rev 1.1 shuld be limited to 10/100Mbps, because RGMII2 Switching
Characteristics are not compatible with 1000 Mbps operation [1].
The issue is fixed with Rev 2.0 silicon.

Hence, rework Beagle-X15 and Begale-X15-revb1 to use phy-handle instead of
phy_id and apply corresponding limitation to the Ethernet Phy 1.

[1] http://www.ti.com/lit/er/sprz429j/sprz429j.pdf

Signed-off-by: Grygorii Strashko 
---
 arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi | 14 --
 arch/arm/boot/dts/am57xx-beagle-x15-revb1.dts   |  5 +
 arch/arm/boot/dts/am57xx-beagle-x15.dts |  5 +
 3 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi 
b/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi
index 78bee26..0429fa0 100644
--- a/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi
+++ b/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi
@@ -421,19 +421,29 @@
  <&dra7_pmx_core 0x3f8>;
 };
 
+&davinci_mdio {
+   phy0: ethernet-phy@1 {
+   reg = <1>;
+   };
+
+   phy1: ethernet-phy@2 {
+   reg = <2>;
+   };
+};
+
 &mac {
status = "okay";
dual_emac;
 };
 
 &cpsw_emac0 {
-   phy_id = <&davinci_mdio>, <1>;
+   phy-handle = <&phy0>;
phy-mode = "rgmii";
dual_emac_res_vlan = <1>;
 };
 
 &cpsw_emac1 {
-   phy_id = <&davinci_mdio>, <2>;
+   phy-handle = <&phy1>;
phy-mode = "rgmii";
dual_emac_res_vlan = <2>;
 };
diff --git a/arch/arm/boot/dts/am57xx-beagle-x15-revb1.dts 
b/arch/arm/boot/dts/am57xx-beagle-x15-revb1.dts
index ca85570..39a92af 100644
--- a/arch/arm/boot/dts/am57xx-beagle-x15-revb1.dts
+++ b/arch/arm/boot/dts/am57xx-beagle-x15-revb1.dts
@@ -22,3 +22,8 @@
vmmc-supply = <&vdd_3v3>;
vmmc-aux-supply = <&ldo1_reg>;
 };
+
+/* errata i880 "Ethernet RGMII2 Limited to 10/100 Mbps" */
+&phy1 {
+   max-speed = <100>;
+};
diff --git a/arch/arm/boot/dts/am57xx-beagle-x15.dts 
b/arch/arm/boot/dts/am57xx-beagle-x15.dts
index 8c66f2e..19a60a1 100644
--- a/arch/arm/boot/dts/am57xx-beagle-x15.dts
+++ b/arch/arm/boot/dts/am57xx-beagle-x15.dts
@@ -22,3 +22,8 @@
 &mmc1 {
vmmc-supply = <&ldo1_reg>;
 };
+
+/* errata i880 "Ethernet RGMII2 Limited to 10/100 Mbps" */
+&phy1 {
+   max-speed = <100>;
+};
-- 
2.10.1.dirty



Re: [PATCH v2 0/5] net: ethernet: ti: cpsw: correct common res usage

2017-01-19 Thread Grygorii Strashko



On 01/19/2017 10:58 AM, Ivan Khoronzhuk wrote:

This series is intended to remove unneeded redundancies connected with
common resource usage function.

Since v1:
- changed name to cpsw_get_usage_count()
- added comments to open/closw for cpsw_get_usage_count()
- added patch:
  net: ethernet: ti: cpsw: clarify ethtool ops changing num of descs

Based on net-next/master

Ivan Khoronzhuk (5):
  net: ethernet: ti: cpsw: remove dual check from common res usage
function
  net: ethernet: ti: cpsw: don't disable interrupts in ndo_open
  net: ethernet: ti: cpsw: don't duplicate ndev_running
  net: ethernet: ti: cpsw: don't duplicate common res in rx handler
  net: ethernet: ti: cpsw: clarify ethtool ops changing num of descs

 drivers/net/ethernet/ti/cpsw.c | 200 ++---
 1 file changed, 88 insertions(+), 112 deletions(-)



Reviewed-by: Grygorii Strashko 

--
regards,
-grygorii


Re: [PATCH net 3/3] cpsw/netcp: work around reverse cpts dependency

2016-12-21 Thread Grygorii Strashko



On 12/16/2016 03:19 AM, Arnd Bergmann wrote:

The dependency is reversed: cpsw and netcp call into cpts,
but cpts depends on the other two in Kconfig. This can lead
to cpts being a loadable module and its callers built-in:

drivers/net/ethernet/ti/cpsw.o: In function `cpsw_remove':
cpsw.c:(.text.cpsw_remove+0xd0): undefined reference to `cpts_release'
drivers/net/ethernet/ti/cpsw.o: In function `cpsw_rx_handler':
cpsw.c:(.text.cpsw_rx_handler+0x2dc): undefined reference to `cpts_rx_timestamp'
drivers/net/ethernet/ti/cpsw.o: In function `cpsw_tx_handler':
cpsw.c:(.text.cpsw_tx_handler+0x7c): undefined reference to `cpts_tx_timestamp'
drivers/net/ethernet/ti/cpsw.o: In function `cpsw_ndo_stop':

As a workaround, I'm introducing another Kconfig symbol to
control the compilation of cpts, while making the actual
module controlled by a silent symbol that is =y when necessary.

Fixes: 6246168b4a38 ("net: ethernet: ti: netcp: add support of cpts")
Signed-off-by: Arnd Bergmann 


Thanks for the fix.
 I've tried to test as much combination as possible,
but seems not of them:(

Reviewed-by: Grygorii Strashko 


---
 drivers/net/ethernet/ti/Kconfig | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig
index 366e29ff8605..c114efcd1575 100644
--- a/drivers/net/ethernet/ti/Kconfig
+++ b/drivers/net/ethernet/ti/Kconfig
@@ -73,8 +73,8 @@ config TI_CPSW
  To compile this driver as a module, choose M here: the module
  will be called cpsw.

-config TI_CPTS
-   tristate "TI Common Platform Time Sync (CPTS) Support"
+config TI_CPTS_ENABLE
+   bool "TI Common Platform Time Sync (CPTS) Support"
depends on TI_CPSW || TI_KEYSTONE_NETCP
depends on POSIX_TIMERS
select PTP_1588_CLOCK
@@ -84,6 +84,12 @@ config TI_CPTS
  The unit can time stamp PTP UDP/IPv4 and Layer 2 packets, and the
  driver offers a PTP Hardware Clock.

+config TI_CPTS
+   tristate
+   depends on TI_CPTS_ENABLE
+   default y if TI_CPSW=y || TI_KEYSTONE_NETCP=y
+   default m
+
 config TI_KEYSTONE_NETCP
tristate "TI Keystone NETCP Core Support"
select TI_CPSW_ALE



--
regards,
-grygorii


[PATCH] net: ethernet: ti: davinci_cpdma: fix access to uninitialized variable in cpdma_chan_set_descs()

2016-12-28 Thread Grygorii Strashko
Now below code sequence causes "Unable to handle kernel NULL pointer
dereference.." exception and system crash during CPSW CPDMA initialization:

cpsw_probe
|-cpdma_chan_create (TX channel)
  |-cpdma_chan_split_pool
|-cpdma_chan_set_descs(for TX channels)
|-cpdma_chan_set_descs(for RX channels) [1]

- and -
static void cpdma_chan_set_descs(struct cpdma_ctlr *ctlr,
 int rx, int desc_num,
 int per_ch_desc)
{
struct cpdma_chan *chan, *most_chan = NULL;

...

for (i = min; i < max; i++) {
chan = ctlr->channels[i];
if (!chan)
continue;
...

if (most_dnum < chan->desc_num) {
most_dnum = chan->desc_num;
most_chan = chan;
}
}
/* use remains */
most_chan->desc_num += desc_cnt; [2]
}

So, most_chan value will never be reassigned when cpdma_chan_set_descs() is
called second time [1], because there are no RX channels yet and system
will crash at [2].

Hence, fix the issue by checking most_chan for NULL before accessing it.

Fixes: 0fc6432cc78d ("net: ethernet: ti: davinci_cpdma: add weight function for 
channels")
Signed-off-by: Grygorii Strashko 
---
 drivers/net/ethernet/ti/davinci_cpdma.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c 
b/drivers/net/ethernet/ti/davinci_cpdma.c
index 36518fc..b349d572 100644
--- a/drivers/net/ethernet/ti/davinci_cpdma.c
+++ b/drivers/net/ethernet/ti/davinci_cpdma.c
@@ -708,7 +708,8 @@ static void cpdma_chan_set_descs(struct cpdma_ctlr *ctlr,
}
}
/* use remains */
-   most_chan->desc_num += desc_cnt;
+   if (most_chan)
+   most_chan->desc_num += desc_cnt;
 }
 
 /**
-- 
2.10.1.dirty



Re: [PATCH] net: ethernet: ti: davinci_cpdma: fix access to uninitialized variable in cpdma_chan_set_descs()

2016-12-29 Thread Grygorii Strashko
Hi Ivan,

On 12/28/2016 07:49 PM, Ivan Khoronzhuk wrote:
> On Wed, Dec 28, 2016 at 05:42:13PM -0600, Grygorii Strashko wrote:
>> Now below code sequence causes "Unable to handle kernel NULL pointer
>> dereference.." exception and system crash during CPSW CPDMA initialization:
>>
>> cpsw_probe
>> |-cpdma_chan_create (TX channel)
>>   |-cpdma_chan_split_pool
>> |-cpdma_chan_set_descs(for TX channels)
>> |-cpdma_chan_set_descs(for RX channels) [1]
>>
>> - and -
>> static void cpdma_chan_set_descs(struct cpdma_ctlr *ctlr,
>>   int rx, int desc_num,
>>   int per_ch_desc)
>> {
>>  struct cpdma_chan *chan, *most_chan = NULL;
>>
>> ...
>>
>>  for (i = min; i < max; i++) {
>>  chan = ctlr->channels[i];
>>  if (!chan)
>>  continue;
>> ...
>>
>>  if (most_dnum < chan->desc_num) {
>>  most_dnum = chan->desc_num;
>>  most_chan = chan;
>>  }
>>  }
>>  /* use remains */
>>  most_chan->desc_num += desc_cnt; [2]
>> }
>>
>> So, most_chan value will never be reassigned when cpdma_chan_set_descs() is
>> called second time [1], because there are no RX channels yet and system
>> will crash at [2].
> 
> How did you get this?
> I just remember as I fixed it before sending patchset.
> 
> Maybe it was some experiment with it.
> I just wonder and want to find actual reason what's happening.
> 
> Look bellow:
> 
> cpsw_probe
> |-cpdma_chan_create (TX channel)
>   |-cpdma_chan_split_pool
> |-cpdma_chan_set_descs(for TX channels)
> |-cpdma_chan_set_descs(for RX channels) [1]
> 
> |-cpdma_chan_set_descs(for RX channels) in case you'be described has to be
> called with rx_desc_num = 0, because all descs are assigned already for tx
> channel. And, if desc_num = 0, cpdma_chan_set_descs just exits and no issues.
> So, could you please explain how you get this, in what circumstances.

You are right. I've hit this issue while working on other feature which allows
to split pool between RX and TX path and as part of it cpdma_chan_set_descs()
is called with different set of arguments. I probably will just squash it in my 
changes or 
or send as part of my series.


-- 
regards,
-grygorii


[PATCH] net: phy: dp83867: fix irq generation

2017-01-05 Thread Grygorii Strashko
For proper IRQ generation by DP83867 phy the INT/PWDN pin has to be
programmed as an interrupt output instead of a Powerdown input in
Configuration Register 3 (CFG3), Address 0x001E, bit 7 INT_OE = 1. The
current driver doesn't do this and as result IRQs will not be generated by
DP83867 phy even if they are properly configured in DT.

Hence, fix IRQ generation by properly configuring CFG3.INT_OE bit and
ensure that Link Status Change (LINK_STATUS_CHNG_INT) and Auto-Negotiation
Complete (AUTONEG_COMP_INT) interrupt are enabled. After this the DP83867
driver will work properly in interrupt enabled mode.

Signed-off-by: Grygorii Strashko 
---
 drivers/net/phy/dp83867.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c
index 1b63924..e84ae08 100644
--- a/drivers/net/phy/dp83867.c
+++ b/drivers/net/phy/dp83867.c
@@ -29,6 +29,7 @@
 #define MII_DP83867_MICR   0x12
 #define MII_DP83867_ISR0x13
 #define DP83867_CTRL   0x1f
+#define DP83867_CFG3   0x1e
 
 /* Extended Registers */
 #define DP83867_RGMIICTL   0x0032
@@ -98,6 +99,8 @@ static int dp83867_config_intr(struct phy_device *phydev)
micr_status |=
(MII_DP83867_MICR_AN_ERR_INT_EN |
MII_DP83867_MICR_SPEED_CHNG_INT_EN |
+   MII_DP83867_MICR_AUTONEG_COMP_INT_EN |
+   MII_DP83867_MICR_LINK_STS_CHNG_INT_EN |
MII_DP83867_MICR_DUP_MODE_CHNG_INT_EN |
MII_DP83867_MICR_SLEEP_MODE_CHNG_INT_EN);
 
@@ -214,6 +217,13 @@ static int dp83867_config_init(struct phy_device *phydev)
}
}
 
+   /* Enable Interrupt output INT_OE in CFG3 register */
+   if (phy_interrupt_is_valid(phydev)) {
+   val = phy_read(phydev, DP83867_CFG3);
+   val |= BIT(7);
+   phy_write(phydev, DP83867_CFG3, val);
+   }
+
return 0;
 }
 
-- 
2.10.1.dirty



Re: [PATCH] net: phy: dp83867: fix irq generation

2017-01-06 Thread Grygorii Strashko



On 01/05/2017 04:10 PM, Florian Fainelli wrote:

On 01/05/2017 12:48 PM, Grygorii Strashko wrote:

For proper IRQ generation by DP83867 phy the INT/PWDN pin has to be
programmed as an interrupt output instead of a Powerdown input in
Configuration Register 3 (CFG3), Address 0x001E, bit 7 INT_OE = 1. The
current driver doesn't do this and as result IRQs will not be generated by
DP83867 phy even if they are properly configured in DT.

Hence, fix IRQ generation by properly configuring CFG3.INT_OE bit and
ensure that Link Status Change (LINK_STATUS_CHNG_INT) and Auto-Negotiation
Complete (AUTONEG_COMP_INT) interrupt are enabled. After this the DP83867
driver will work properly in interrupt enabled mode.

Signed-off-by: Grygorii Strashko 
---
 drivers/net/phy/dp83867.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c
index 1b63924..e84ae08 100644
--- a/drivers/net/phy/dp83867.c
+++ b/drivers/net/phy/dp83867.c
@@ -29,6 +29,7 @@
 #define MII_DP83867_MICR   0x12
 #define MII_DP83867_ISR0x13
 #define DP83867_CTRL   0x1f
+#define DP83867_CFG3   0x1e

 /* Extended Registers */
 #define DP83867_RGMIICTL   0x0032
@@ -98,6 +99,8 @@ static int dp83867_config_intr(struct phy_device *phydev)
micr_status |=
(MII_DP83867_MICR_AN_ERR_INT_EN |
MII_DP83867_MICR_SPEED_CHNG_INT_EN |
+   MII_DP83867_MICR_AUTONEG_COMP_INT_EN |
+   MII_DP83867_MICR_LINK_STS_CHNG_INT_EN |
MII_DP83867_MICR_DUP_MODE_CHNG_INT_EN |
MII_DP83867_MICR_SLEEP_MODE_CHNG_INT_EN);

@@ -214,6 +217,13 @@ static int dp83867_config_init(struct phy_device *phydev)
}
}

+   /* Enable Interrupt output INT_OE in CFG3 register */
+   if (phy_interrupt_is_valid(phydev)) {
+   val = phy_read(phydev, DP83867_CFG3);
+   val |= BIT(7);
+   phy_write(phydev, DP83867_CFG3, val);
+   }


Don't you need to clear that bit in the case phy_interrupt_is_valid()
returns false?


Not sure I need to touch it in this case - default value is 0
and Linux will not configure any IRQ.



Other than that:

Reviewed-by: Florian Fainelli 



--
regards,
-grygorii


[PATCH v2 4/7] net: ethernet: ti: cpdma: use devm_ioremap

2017-01-06 Thread Grygorii Strashko
Use devm_ioremap() and simplify the code.

Signed-off-by: Grygorii Strashko 
---
 drivers/net/ethernet/ti/davinci_cpdma.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c 
b/drivers/net/ethernet/ti/davinci_cpdma.c
index 7080eb9..b229bf3 100644
--- a/drivers/net/ethernet/ti/davinci_cpdma.c
+++ b/drivers/net/ethernet/ti/davinci_cpdma.c
@@ -195,8 +195,6 @@ static void cpdma_desc_pool_destroy(struct cpdma_ctlr *ctlr)
if (pool->cpumap)
dma_free_coherent(ctlr->dev, pool->mem_size, pool->cpumap,
  pool->phys);
-   else
-   iounmap(pool->iomap);
 }
 
 /*
@@ -231,7 +229,8 @@ int cpdma_desc_pool_create(struct cpdma_ctlr *ctlr)
 
if (cpdma_params->desc_mem_phys) {
pool->phys  = cpdma_params->desc_mem_phys;
-   pool->iomap = ioremap(pool->phys, pool->mem_size);
+   pool->iomap = devm_ioremap(ctlr->dev, pool->phys,
+  pool->mem_size);
pool->hw_addr = cpdma_params->desc_hw_addr;
} else {
pool->cpumap = dma_alloc_coherent(ctlr->dev,  pool->mem_size,
-- 
2.10.1.dirty



[PATCH v2 0/7] net: ethernet: ti: cpsw: support placing CPDMA descriptors into DDR

2017-01-06 Thread Grygorii Strashko
This series intended to add support for placing CPDMA descriptors into DDR by
introducing new module parameter "descs_pool_size" to specify size of 
descriptor's
pool. The "descs_pool_size" defines total number of CPDMA
CPPI descriptors to be used for both ingress/egress packets
processing. If not specified - the default value 256 will be used
which will allow to place descriptor's pool into the internal CPPI
RAM.

In addition, added ability to re-split CPDMA pool of descriptors between RX and 
TX
path via ethtool '-G' command wich will allow to configure and fix number
of descriptors used by RX and TX path, which, then, will be split between
RX/TX channels proportionally depending on number of RX/TX channels and
its weight. 

This allows significantly to reduce UDP packets drop rate
for bandwidth >301 Mbits/sec (am57x).  

Before enabling this feature, the am437x SoC has to be fixed as it's proved
that it's not working when CPDMA descriptors placed in DDR.
So, the patch 1 fixes this issue.

Grygorii Strashko (7):
  net: ethernet: ti: cpdma: am437x: allow descs to be plased in ddr
  net: ethernet: ti: cpdma: fix desc re-queuing
  net: ethernet: ti: cpdma: minimize number of parameters in
cpdma_desc_pool_create/destroy()
  net: ethernet: ti: cpdma: use devm_ioremap
  net: ethernet: ti: cpsw: add support for descs pool size configuration
  net: ethernet: ti: cpsw: add support for ringparam configuration
  Documentation: DT: net: cpsw: remove no_bd_ram property

 Documentation/devicetree/bindings/net/cpsw.txt |   3 -
 arch/arm/boot/dts/am33xx.dtsi  |   1 -
 arch/arm/boot/dts/am4372.dtsi  |   1 -
 arch/arm/boot/dts/dm814x.dtsi  |   1 -
 arch/arm/boot/dts/dra7.dtsi|   1 -
 drivers/net/ethernet/ti/cpsw.c |  98 ++-
 drivers/net/ethernet/ti/davinci_cpdma.c| 161 +++--
 drivers/net/ethernet/ti/davinci_cpdma.h|   5 +
 8 files changed, 199 insertions(+), 72 deletions(-)

-- 
2.10.1.dirty



[PATCH v2 2/7] net: ethernet: ti: cpdma: fix desc re-queuing

2017-01-06 Thread Grygorii Strashko
The currently processing cpdma descriptor with EOQ flag set may
contain two values in Next Descriptor Pointer field:
- valid pointer: means CPDMA missed addition of new desc in queue;
- null: no more descriptors in queue.
In the later case, it's not required to write to HDP register, but now
CPDMA does it.

Hence, add additional check for Next Descriptor Pointer != null in
cpdma_chan_process() function before writing in HDP register.

Signed-off-by: Grygorii Strashko 
---
 drivers/net/ethernet/ti/davinci_cpdma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c 
b/drivers/net/ethernet/ti/davinci_cpdma.c
index d6f0ded..a53b384 100644
--- a/drivers/net/ethernet/ti/davinci_cpdma.c
+++ b/drivers/net/ethernet/ti/davinci_cpdma.c
@@ -1159,7 +1159,7 @@ static int __cpdma_chan_process(struct cpdma_chan *chan)
chan->count--;
chan->stats.good_dequeue++;
 
-   if (status & CPDMA_DESC_EOQ) {
+   if ((status & CPDMA_DESC_EOQ) && chan->head) {
chan->stats.requeue++;
chan_write(chan, hdp, desc_phys(pool, chan->head));
}
-- 
2.10.1.dirty



[PATCH v2 7/7] Documentation: DT: net: cpsw: remove no_bd_ram property

2017-01-06 Thread Grygorii Strashko
Even if no_bd_ram property is described in TI CPSW bindings the support for
it has never been introduced in CPSW driver, so there are no real users of
it. Hence, remove no_bd_ram property from documentation and DT files.

Cc: 'Rob Herring '
Signed-off-by: Grygorii Strashko 
---
 Documentation/devicetree/bindings/net/cpsw.txt | 3 ---
 arch/arm/boot/dts/am33xx.dtsi  | 1 -
 arch/arm/boot/dts/am4372.dtsi  | 1 -
 arch/arm/boot/dts/dm814x.dtsi  | 1 -
 arch/arm/boot/dts/dra7.dtsi| 1 -
 5 files changed, 7 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/cpsw.txt 
b/Documentation/devicetree/bindings/net/cpsw.txt
index ebda7c9..7cc15c9 100644
--- a/Documentation/devicetree/bindings/net/cpsw.txt
+++ b/Documentation/devicetree/bindings/net/cpsw.txt
@@ -23,7 +23,6 @@ Required properties:
 
 Optional properties:
 - ti,hwmods: Must be "cpgmac0"
-- no_bd_ram: Must be 0 or 1
 - dual_emac: Specifies Switch to act as Dual EMAC
 - syscon   : Phandle to the system control device node, which is
  the control module device of the am33x
@@ -70,7 +69,6 @@ Examples:
cpdma_channels = <8>;
ale_entries = <1024>;
bd_ram_size = <0x2000>;
-   no_bd_ram = <0>;
rx_descs = <64>;
mac_control = <0x20>;
slaves = <2>;
@@ -99,7 +97,6 @@ Examples:
cpdma_channels = <8>;
ale_entries = <1024>;
bd_ram_size = <0x2000>;
-   no_bd_ram = <0>;
rx_descs = <64>;
mac_control = <0x20>;
slaves = <2>;
diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi
index 64c8aa9..d458ceb 100644
--- a/arch/arm/boot/dts/am33xx.dtsi
+++ b/arch/arm/boot/dts/am33xx.dtsi
@@ -781,7 +781,6 @@
cpdma_channels = <8>;
ale_entries = <1024>;
bd_ram_size = <0x2000>;
-   no_bd_ram = <0>;
mac_control = <0x20>;
slaves = <2>;
active_slave = <0>;
diff --git a/arch/arm/boot/dts/am4372.dtsi b/arch/arm/boot/dts/am4372.dtsi
index ac55f93..837aff1 100644
--- a/arch/arm/boot/dts/am4372.dtsi
+++ b/arch/arm/boot/dts/am4372.dtsi
@@ -669,7 +669,6 @@
cpdma_channels = <8>;
ale_entries = <1024>;
bd_ram_size = <0x2000>;
-   no_bd_ram = <0>;
mac_control = <0x20>;
slaves = <2>;
active_slave = <0>;
diff --git a/arch/arm/boot/dts/dm814x.dtsi b/arch/arm/boot/dts/dm814x.dtsi
index 1facc5f..4cbeb5c 100644
--- a/arch/arm/boot/dts/dm814x.dtsi
+++ b/arch/arm/boot/dts/dm814x.dtsi
@@ -509,7 +509,6 @@
cpdma_channels = <8>;
ale_entries = <1024>;
bd_ram_size = <0x2000>;
-   no_bd_ram = <0>;
mac_control = <0x20>;
slaves = <2>;
active_slave = <0>;
diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi
index addb753..b69df91 100644
--- a/arch/arm/boot/dts/dra7.dtsi
+++ b/arch/arm/boot/dts/dra7.dtsi
@@ -1707,7 +1707,6 @@
cpdma_channels = <8>;
ale_entries = <1024>;
bd_ram_size = <0x2000>;
-   no_bd_ram = <0>;
mac_control = <0x20>;
slaves = <2>;
active_slave = <0>;
-- 
2.10.1.dirty



[PATCH v2 3/7] net: ethernet: ti: cpdma: minimize number of parameters in cpdma_desc_pool_create/destroy()

2017-01-06 Thread Grygorii Strashko
Update cpdma_desc_pool_create/destroy() to accept only one parameter
struct cpdma_ctlr*, as this structure contains all required
information for pool creation/destruction.

Signed-off-by: Grygorii Strashko 
---
 drivers/net/ethernet/ti/davinci_cpdma.c | 62 -
 1 file changed, 30 insertions(+), 32 deletions(-)

diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c 
b/drivers/net/ethernet/ti/davinci_cpdma.c
index a53b384..7080eb9 100644
--- a/drivers/net/ethernet/ti/davinci_cpdma.c
+++ b/drivers/net/ethernet/ti/davinci_cpdma.c
@@ -181,8 +181,10 @@ static struct cpdma_control_info controls[] = {
 (directed << CPDMA_TO_PORT_SHIFT));\
} while (0)
 
-static void cpdma_desc_pool_destroy(struct cpdma_desc_pool *pool)
+static void cpdma_desc_pool_destroy(struct cpdma_ctlr *ctlr)
 {
+   struct cpdma_desc_pool *pool = ctlr->pool;
+
if (!pool)
return;
 
@@ -191,7 +193,7 @@ static void cpdma_desc_pool_destroy(struct cpdma_desc_pool 
*pool)
 gen_pool_size(pool->gen_pool),
 gen_pool_avail(pool->gen_pool));
if (pool->cpumap)
-   dma_free_coherent(pool->dev, pool->mem_size, pool->cpumap,
+   dma_free_coherent(ctlr->dev, pool->mem_size, pool->cpumap,
  pool->phys);
else
iounmap(pool->iomap);
@@ -203,37 +205,37 @@ static void cpdma_desc_pool_destroy(struct 
cpdma_desc_pool *pool)
  * devices (e.g. cpsw switches) use plain old memory.  Descriptor pools
  * abstract out these details
  */
-static struct cpdma_desc_pool *
-cpdma_desc_pool_create(struct device *dev, u32 phys, dma_addr_t hw_addr,
-   int size, int align)
+int cpdma_desc_pool_create(struct cpdma_ctlr *ctlr)
 {
+   struct cpdma_params *cpdma_params = &ctlr->params;
struct cpdma_desc_pool *pool;
-   int ret;
+   int ret = -ENOMEM;
 
-   pool = devm_kzalloc(dev, sizeof(*pool), GFP_KERNEL);
+   pool = devm_kzalloc(ctlr->dev, sizeof(*pool), GFP_KERNEL);
if (!pool)
goto gen_pool_create_fail;
+   ctlr->pool = pool;
 
-   pool->dev   = dev;
-   pool->mem_size  = size;
-   pool->desc_size = ALIGN(sizeof(struct cpdma_desc), align);
-   pool->num_desc  = size / pool->desc_size;
+   pool->mem_size  = cpdma_params->desc_mem_size;
+   pool->desc_size = ALIGN(sizeof(struct cpdma_desc),
+   cpdma_params->desc_align);
+   pool->num_desc  = pool->mem_size / pool->desc_size;
 
-   pool->gen_pool = devm_gen_pool_create(dev, ilog2(pool->desc_size), -1,
- "cpdma");
+   pool->gen_pool = devm_gen_pool_create(ctlr->dev, ilog2(pool->desc_size),
+ -1, "cpdma");
if (IS_ERR(pool->gen_pool)) {
-   dev_err(dev, "pool create failed %ld\n",
-   PTR_ERR(pool->gen_pool));
+   ret = PTR_ERR(pool->gen_pool);
+   dev_err(ctlr->dev, "pool create failed %d\n", ret);
goto gen_pool_create_fail;
}
 
-   if (phys) {
-   pool->phys  = phys;
-   pool->iomap = ioremap(phys, size); /* should be memremap? */
-   pool->hw_addr = hw_addr;
+   if (cpdma_params->desc_mem_phys) {
+   pool->phys  = cpdma_params->desc_mem_phys;
+   pool->iomap = ioremap(pool->phys, pool->mem_size);
+   pool->hw_addr = cpdma_params->desc_hw_addr;
} else {
-   pool->cpumap = dma_alloc_coherent(dev, size, &pool->hw_addr,
- GFP_KERNEL);
+   pool->cpumap = dma_alloc_coherent(ctlr->dev,  pool->mem_size,
+ &pool->hw_addr, GFP_KERNEL);
pool->iomap = (void __iomem __force *)pool->cpumap;
pool->phys = pool->hw_addr; /* assumes no IOMMU, don't use this 
value */
}
@@ -244,16 +246,17 @@ cpdma_desc_pool_create(struct device *dev, u32 phys, 
dma_addr_t hw_addr,
ret = gen_pool_add_virt(pool->gen_pool, (unsigned long)pool->iomap,
pool->phys, pool->mem_size, -1);
if (ret < 0) {
-   dev_err(dev, "pool add failed %d\n", ret);
+   dev_err(ctlr->dev, "pool add failed %d\n", ret);
goto gen_pool_add_virt_fail;
}
 
-   return pool;
+   return 0;
 
 gen_pool_add_virt_fail:
-   cpdma_desc_pool_destroy(pool);
+   cpdma_desc_pool_destroy(ctlr);
 gen_pool_create_fail:
-   return NULL;
+   ctlr->po

[PATCH v2 1/7] net: ethernet: ti: cpdma: am437x: allow descs to be plased in ddr

2017-01-06 Thread Grygorii Strashko
It's observed that cpsw/cpdma is not working properly when CPPI
descriptors are placed in DDR instead of internal CPPI RAM on am437x
SoC:
- rx/tx silently stops processing packets;
- or - after boot it's working for sometime, but stuck once Network
load is increased (ping is working, but iperf is not).
(The same issue has not been reproduced on am335x and am57xx).

It seems that write to HDP register processed faster by interconnect
than writing of descriptor memory buffer in DDR, which is probably
caused by store buffer / write buffer differences as these functions
are implemented differently across devices. So, to fix this i come up
with two minimal, required changes:

1) all accesses to the channel register HDP/CP/RXFREE registers should
be done using sync IO accessors readl()/writel(), because all previous
memory writes writes have to be completed before starting channel
(write to HDP) or completing desc processing.

2) the change 1 only doesn't work on am437x and additional reading of
desc's field is required right after the new descriptor was filled
with data and before pointer on it will be stored in
prev_desc->hw_next field or HDP register.

In addition, to above changes this patch eliminates all relaxed ordering
I/O accessors in this driver as suggested by David Miller to avoid such
kind of issues in the future, but with one exception - relaxed IO accessors
will still be used to fill desc in cpdma_chan_submit(), which is safe as
there is read barrier at the end of write sequence, and because sync IO
accessors usage here will affect on net performance.

Signed-off-by: Grygorii Strashko 
---
 drivers/net/ethernet/ti/davinci_cpdma.c | 40 ++---
 1 file changed, 22 insertions(+), 18 deletions(-)

diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c 
b/drivers/net/ethernet/ti/davinci_cpdma.c
index 36518fc..d6f0ded 100644
--- a/drivers/net/ethernet/ti/davinci_cpdma.c
+++ b/drivers/net/ethernet/ti/davinci_cpdma.c
@@ -166,12 +166,12 @@ static struct cpdma_control_info controls[] = {
 #define num_chan   params.num_chan
 
 /* various accessors */
-#define dma_reg_read(ctlr, ofs)__raw_readl((ctlr)->dmaregs + 
(ofs))
-#define chan_read(chan, fld)   __raw_readl((chan)->fld)
-#define desc_read(desc, fld)   __raw_readl(&(desc)->fld)
-#define dma_reg_write(ctlr, ofs, v)__raw_writel(v, (ctlr)->dmaregs + (ofs))
-#define chan_write(chan, fld, v)   __raw_writel(v, (chan)->fld)
-#define desc_write(desc, fld, v)   __raw_writel((u32)(v), &(desc)->fld)
+#define dma_reg_read(ctlr, ofs)readl((ctlr)->dmaregs + (ofs))
+#define chan_read(chan, fld)   readl((chan)->fld)
+#define desc_read(desc, fld)   readl(&(desc)->fld)
+#define dma_reg_write(ctlr, ofs, v)writel(v, (ctlr)->dmaregs + (ofs))
+#define chan_write(chan, fld, v)   writel(v, (chan)->fld)
+#define desc_write(desc, fld, v)   writel((u32)(v), &(desc)->fld)
 
 #define cpdma_desc_to_port(chan, mode, directed)   \
do {\
@@ -542,10 +542,10 @@ int cpdma_ctlr_start(struct cpdma_ctlr *ctlr)
}
 
for (i = 0; i < ctlr->num_chan; i++) {
-   __raw_writel(0, ctlr->params.txhdp + 4 * i);
-   __raw_writel(0, ctlr->params.rxhdp + 4 * i);
-   __raw_writel(0, ctlr->params.txcp + 4 * i);
-   __raw_writel(0, ctlr->params.rxcp + 4 * i);
+   writel(0, ctlr->params.txhdp + 4 * i);
+   writel(0, ctlr->params.rxhdp + 4 * i);
+   writel(0, ctlr->params.txcp + 4 * i);
+   writel(0, ctlr->params.rxcp + 4 * i);
}
 
dma_reg_write(ctlr, CPDMA_RXINTMASKCLEAR, 0x);
@@ -1061,13 +1061,17 @@ int cpdma_chan_submit(struct cpdma_chan *chan, void 
*token, void *data,
mode = CPDMA_DESC_OWNER | CPDMA_DESC_SOP | CPDMA_DESC_EOP;
cpdma_desc_to_port(chan, mode, directed);
 
-   desc_write(desc, hw_next,   0);
-   desc_write(desc, hw_buffer, buffer);
-   desc_write(desc, hw_len,len);
-   desc_write(desc, hw_mode,   mode | len);
-   desc_write(desc, sw_token,  token);
-   desc_write(desc, sw_buffer, buffer);
-   desc_write(desc, sw_len,len);
+   /* Relaxed IO accessors can be used here as there is read barrier
+* at the end of write sequence.
+*/
+   writel_relaxed(0, &desc->hw_next);
+   writel_relaxed(buffer, &desc->hw_buffer);
+   writel_relaxed(len, &desc->hw_len);
+   writel_relaxed(mode | len, &desc->hw_mode);
+   writel_relaxed(token, &desc->sw_token);
+   writel_relaxed(buffer, &desc->sw_buffer);
+   writel_relaxed(len, &desc->sw_len);
+   desc_read(desc, sw_len);
 
__cpdma_chan_submit(chan, desc);
 
@@ -1136

[PATCH v2 5/7] net: ethernet: ti: cpsw: add support for descs pool size configuration

2017-01-06 Thread Grygorii Strashko
The CPSW CPDMA can process buffer descriptors placed as in internal
CPPI RAM as in DDR. This patch adds support in CPSW and CPDMA for
descs_pool_size mudule parameter, which defines total number of CPDMA CPPI
descriptors to be used for both ingress/egress packets processing:
 - memory size, required for CPDMA descriptor pool, is calculated basing
on number of descriptors specified by user in descs_pool_size and
CPDMA descriptor size and allocated from coherent memory (CMA area);
 - CPDMA descriptor pool will be allocated in DDR if pool memory size >
internal CPPI RAM or use internal CPPI RAM otherwise;
 - if descs_pool_size not specified in DT - the default value 256 will
be used which will allow to place CPDMA descriptors pool into the
internal CPPI RAM (current default behaviour);
 - CPDMA will ignore descs_pool_size if descs_pool_size = 0 for
backward comaptiobility with davinci_emac.

descs_pool_size is boot time setting and can't be changed once
CPSW/CPDMA is initialized.

Signed-off-by: Grygorii Strashko 
---
 drivers/net/ethernet/ti/cpsw.c  | 12 +---
 drivers/net/ethernet/ti/davinci_cpdma.c | 12 
 drivers/net/ethernet/ti/davinci_cpdma.h |  1 +
 3 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index b203143..d39875e 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -145,6 +145,7 @@ do {
\
cpsw->data.active_slave)
 #define IRQ_NUM2
 #define CPSW_MAX_QUEUES8
+#define CPSW_CPDMA_DESCS_POOL_SIZE_DEFAULT 256
 
 static int debug_level;
 module_param(debug_level, int, 0);
@@ -158,6 +159,10 @@ static int rx_packet_max = CPSW_MAX_PACKET_SIZE;
 module_param(rx_packet_max, int, 0);
 MODULE_PARM_DESC(rx_packet_max, "maximum receive packet size (bytes)");
 
+static int descs_pool_size = CPSW_CPDMA_DESCS_POOL_SIZE_DEFAULT;
+module_param(descs_pool_size, int, 0444);
+MODULE_PARM_DESC(descs_pool_size, "Number of CPDMA CPPI descriptors in pool");
+
 struct cpsw_wr_regs {
u32 id_ver;
u32 soft_reset;
@@ -2969,6 +2974,7 @@ static int cpsw_probe(struct platform_device *pdev)
dma_params.has_ext_regs = true;
dma_params.desc_hw_addr = dma_params.desc_mem_phys;
dma_params.bus_freq_mhz = cpsw->bus_freq_mhz;
+   dma_params.descs_pool_size  = descs_pool_size;
 
cpsw->dma = cpdma_ctlr_create(&dma_params);
if (!cpsw->dma) {
@@ -3072,9 +3078,9 @@ static int cpsw_probe(struct platform_device *pdev)
goto clean_ale_ret;
}
 
-   cpsw_notice(priv, probe, "initialized device (regs %pa, irq %d)\n",
-   &ss_res->start, ndev->irq);
-
+   cpsw_notice(priv, probe,
+   "initialized device (regs %pa, irq %d, pool size %d)\n",
+   &ss_res->start, ndev->irq, dma_params.descs_pool_size);
if (cpsw->data.dual_emac) {
ret = cpsw_probe_dual_emac(priv);
if (ret) {
diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c 
b/drivers/net/ethernet/ti/davinci_cpdma.c
index b229bf3..65e2f12 100644
--- a/drivers/net/ethernet/ti/davinci_cpdma.c
+++ b/drivers/net/ethernet/ti/davinci_cpdma.c
@@ -219,6 +219,18 @@ int cpdma_desc_pool_create(struct cpdma_ctlr *ctlr)
cpdma_params->desc_align);
pool->num_desc  = pool->mem_size / pool->desc_size;
 
+   if (cpdma_params->descs_pool_size) {
+   /* recalculate memory size required cpdma descriptor pool
+* basing on number of descriptors specified by user and
+* if memory size > CPPI internal RAM size (desc_mem_size)
+* then switch to use DDR
+*/
+   pool->num_desc = cpdma_params->descs_pool_size;
+   pool->mem_size = pool->desc_size * pool->num_desc;
+   if (pool->mem_size > cpdma_params->desc_mem_size)
+   cpdma_params->desc_mem_phys = 0;
+   }
+
pool->gen_pool = devm_gen_pool_create(ctlr->dev, ilog2(pool->desc_size),
  -1, "cpdma");
if (IS_ERR(pool->gen_pool)) {
diff --git a/drivers/net/ethernet/ti/davinci_cpdma.h 
b/drivers/net/ethernet/ti/davinci_cpdma.h
index 4a167db..cb45f8f 100644
--- a/drivers/net/ethernet/ti/davinci_cpdma.h
+++ b/drivers/net/ethernet/ti/davinci_cpdma.h
@@ -37,6 +37,7 @@ struct cpdma_params {
int desc_mem_size;
int desc_align;
u32 bus_freq_mhz;
+   u32 descs_pool_size;
 
/*
 * Some instances of embedded cpdma controllers have extra control and
-- 
2.10.1.dirty



[PATCH v2 6/7] net: ethernet: ti: cpsw: add support for ringparam configuration

2017-01-06 Thread Grygorii Strashko
The CPDMA uses one pool of descriptors for both RX and TX which by default
split between all channels proportionally depending on total number of
CPDMA channels and number of TX and RX channels. As result, more
descriptors will be consumed by TX path if there are more TX channels and
there is no way now to dedicate more descriptors for RX path.

So, add the ability to re-split CPDMA pool of descriptors between RX and TX
path via ethtool '-G' command wich will allow to configure and fix number
of descriptors used by RX and TX path, which, then, will be split between
RX/TX channels proportionally depending on RX/TX channels number and
weight. ethtool '-G' command will accept only number of RX entries and rest
of descriptors will be arranged for TX automatically.

Command:
  ethtool -G  rx 

defaults and limitations:
- minimum number of rx descriptors is 10% of total number of descriptors in
  CPDMA pool
- maximum number of rx descriptors is 90% of total number of descriptors in
  CPDMA pool
- by default, descriptors will be split equally between RX/TX path
- any values passed in "tx" parameter will be ignored

Usage:

 # ethtool -g eth0
Pre-set maximums:
RX: 7372
RX Mini:0
RX Jumbo:   0
TX: 0
Current hardware settings:
RX: 4096
RX Mini:0
RX Jumbo:   0
TX: 4096

 # ethtool -G eth0 rx 7372
 # ethtool -g eth0
Ring parameters for eth0:
Pre-set maximums:
RX: 7372
RX Mini:0
RX Jumbo:   0
TX: 0
Current hardware settings:
RX: 7372
RX Mini:0
RX Jumbo:   0
TX:     820

Signed-off-by: Grygorii Strashko 
---
 drivers/net/ethernet/ti/cpsw.c  | 86 +
 drivers/net/ethernet/ti/davinci_cpdma.c | 40 ---
 drivers/net/ethernet/ti/davinci_cpdma.h |  4 ++
 3 files changed, 122 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index d39875e..f339268 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -2484,6 +2484,90 @@ static int cpsw_nway_reset(struct net_device *ndev)
return -EOPNOTSUPP;
 }
 
+static void cpsw_get_ringparam(struct net_device *ndev,
+  struct ethtool_ringparam *ering)
+{
+   struct cpsw_priv *priv = netdev_priv(ndev);
+   struct cpsw_common *cpsw = priv->cpsw;
+
+   /* not supported */
+   ering->tx_max_pending = 0;
+   ering->tx_pending = cpdma_get_num_tx_descs(cpsw->dma);
+   /* Max 90% RX buffers */
+   ering->rx_max_pending = (descs_pool_size * 9) / 10;
+   ering->rx_pending = cpdma_get_num_rx_descs(cpsw->dma);
+}
+
+static int cpsw_set_ringparam(struct net_device *ndev,
+ struct ethtool_ringparam *ering)
+{
+   struct cpsw_priv *priv = netdev_priv(ndev);
+   struct cpsw_common *cpsw = priv->cpsw;
+   struct cpsw_slave *slave;
+   int i, ret;
+
+   /* ignore ering->tx_pending - only rx_pending adjustment is supported */
+
+   if (ering->rx_mini_pending || ering->rx_jumbo_pending ||
+   ering->rx_pending < (descs_pool_size / 10) ||
+   ering->rx_pending > ((descs_pool_size * 9) / 10))
+   return -EINVAL;
+
+   if (ering->rx_pending == cpdma_get_num_rx_descs(cpsw->dma))
+   return 0;
+
+   /* Disable NAPI scheduling */
+   cpsw_intr_disable(cpsw);
+
+   /* Stop all transmit queues for every network device.
+* Disable re-using rx descriptors with dormant_on.
+*/
+   for (i = cpsw->data.slaves, slave = cpsw->slaves; i; i--, slave++) {
+   if (!(slave->ndev && netif_running(slave->ndev)))
+   continue;
+
+   netif_tx_stop_all_queues(slave->ndev);
+   netif_dormant_on(slave->ndev);
+   }
+
+   /* Handle rest of tx packets and stop cpdma channels */
+   cpdma_ctlr_stop(cpsw->dma);
+
+   cpdma_set_num_rx_descs(cpsw->dma, ering->rx_pending);
+
+   for (i = cpsw->data.slaves, slave = cpsw->slaves; i; i--, slave++) {
+   if (!(slave->ndev && netif_running(slave->ndev)))
+   continue;
+
+   /* Enable rx packets handling */
+   netif_dormant_off(slave->ndev);
+   }
+
+   if (cpsw_common_res_usage_state(cpsw)) {
+   cpdma_chan_split_pool(cpsw->dma);
+
+   ret = cpsw_fill_rx_channels(priv);
+   if (ret)
+   goto err;
+
+   /* After this receive is started */
+   cpdma_ctlr_start(cpsw->dma);
+   cpsw_intr_en

[PATCH 2/2] ARM: dts: dra72-evm-revc: enable irqs for dp83867 eth phys

2017-01-06 Thread Grygorii Strashko
TI DRA72-EVM Rev C has two DP83867 ethernet phys which support IRQ
generation in case of phy/link status changes. The INT/PWDN lines from both
DP83867 phys are wired to DRA7 gpio6.16, so reflect the same in DT.

Signed-off-by: Grygorii Strashko 
---
 arch/arm/boot/dts/dra72-evm-revc.dts | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/dra72-evm-revc.dts 
b/arch/arm/boot/dts/dra72-evm-revc.dts
index c3d939c..3ecac56 100644
--- a/arch/arm/boot/dts/dra72-evm-revc.dts
+++ b/arch/arm/boot/dts/dra72-evm-revc.dts
@@ -68,6 +68,8 @@
ti,tx-internal-delay = ;
ti,fifo-depth = ;
ti,min-output-impedance;
+   interrupt-parent = <&gpio6>;
+   interrupts = <16 IRQ_TYPE_EDGE_FALLING>;
};
 
dp83867_1: ethernet-phy@3 {
@@ -75,6 +77,8 @@
ti,rx-internal-delay = ;
ti,tx-internal-delay = ;
ti,fifo-depth = ;
-   ti,min-output-imepdance;
+   ti,min-output-impedance;
+   interrupt-parent = <&gpio6>;
+   interrupts = <16 IRQ_TYPE_EDGE_FALLING>;
};
 };
-- 
2.10.1.dirty



Re: [PATCH 0/2] net: ethernet: ti: cpsw: fix susp/resume

2017-02-10 Thread Grygorii Strashko



On 02/09/2017 07:45 PM, David Miller wrote:

From: Ivan Khoronzhuk 
Date: Fri, 10 Feb 2017 00:54:24 +0200


On Thu, Feb 09, 2017 at 05:21:26PM -0500, David Miller wrote:

From: Ivan Khoronzhuk 
Date: Thu,  9 Feb 2017 02:07:34 +0200


These two patches fix suspend/resume chain.


Patch 2 doesn't apply cleanly to the 'net' tree, please
respin this series.


Strange, I've just checked it on net-next/master, it was applied w/o any
warnings.


It makes no sense to test "net-next" when I am telling you that it is
the "net" tree it doesn't apply to.

This is a bug fix, so it should be targetting the "net" tree.



Looks like the first fix is for net, but the second one is for net-next
I do not see
03fd01ad0eead23eb79294b6fb4d71dcac493855
"net: ethernet: ti: cpsw: don't duplicate ndev_running"
in net.

--
regards,
-grygorii


[PATCH] net: ethernet: ti: cpsw: adjust cpsw fifos depth for fullduplex flow control

2017-05-08 Thread Grygorii Strashko
When users set flow control using ethtool the bits are set properly in the
CPGMAC_SL MACCONTROL register, but the FIFO depth in the respective Port n
Maximum FIFO Blocks (Pn_MAX_BLKS) registers remains set to the minimum size
reset value. When receive flow control is enabled on a port, the port's
associated FIFO block allocation must be adjusted. The port RX allocation
must increase to accommodate the flow control runout. The TRM recommends
numbers of 5 or 6.

Hence, apply required Port FIFO configuration to
Pn_MAX_BLKS.Pn_TX_MAX_BLKS=0xF and Pn_MAX_BLKS.Pn_RX_MAX_BLKS=0x5 during
interface initialization.

Cc: Schuyler Patton 
Signed-off-by: Grygorii Strashko 
---
 drivers/net/ethernet/ti/cpsw.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 20fd14c..7cf9c79 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -287,6 +287,10 @@ struct cpsw_ss_regs {
 /* Bit definitions for the CPSW1_TS_SEQ_LTYPE register */
 #define CPSW_V1_SEQ_ID_OFS_SHIFT   16
 
+#define CPSW_MAX_BLKS_TX   15
+#define CPSW_MAX_BLKS_TX_SHIFT 4
+#define CPSW_MAX_BLKS_RX   5
+
 struct cpsw_host_regs {
u32 max_blks;
u32 blk_cnt;
@@ -1278,11 +1282,23 @@ static void cpsw_slave_open(struct cpsw_slave *slave, 
struct cpsw_priv *priv)
switch (cpsw->version) {
case CPSW_VERSION_1:
slave_write(slave, TX_PRIORITY_MAPPING, CPSW1_TX_PRI_MAP);
+   /* Increase RX FIFO size to 5 for supporting fullduplex
+* flow control mode
+*/
+   slave_write(slave,
+   (CPSW_MAX_BLKS_TX << CPSW_MAX_BLKS_TX_SHIFT) |
+   CPSW_MAX_BLKS_RX, CPSW1_MAX_BLKS);
break;
case CPSW_VERSION_2:
case CPSW_VERSION_3:
case CPSW_VERSION_4:
slave_write(slave, TX_PRIORITY_MAPPING, CPSW2_TX_PRI_MAP);
+   /* Increase RX FIFO size to 5 for supporting fullduplex
+* flow control mode
+*/
+   slave_write(slave,
+   (CPSW_MAX_BLKS_TX << CPSW_MAX_BLKS_TX_SHIFT) |
+   CPSW_MAX_BLKS_RX, CPSW2_MAX_BLKS);
break;
}
 
-- 
2.10.1



Re: [PATCH 2/4] net: ethernet: ti: cpsw: add multi queue support

2016-07-08 Thread Grygorii Strashko
On 06/30/2016 10:04 PM, Ivan Khoronzhuk wrote:
> The cpsw h/w supports up to 8 tx and 8 rx channels.This patch adds
> multi-queue support to the driver. An ability to configure h/w
> shaper will be added with separate patch. Default shaper mode, as
> before, priority mode.
> 
> The poll function handles all unprocessed channels, till all of
> them are free, beginning from hi priority channel.
> 
> The statistic for every channel can be read with:
> ethtool -S ethX
> 
> Signed-off-by: Ivan Khoronzhuk 
> ---
>   drivers/net/ethernet/ti/cpsw.c  | 334 
> +---
>   drivers/net/ethernet/ti/davinci_cpdma.c |  12 ++
>   drivers/net/ethernet/ti/davinci_cpdma.h |   2 +
>   3 files changed, 237 insertions(+), 111 deletions(-)
> 
> diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
> index a713336..14d53eb 100644
> --- a/drivers/net/ethernet/ti/cpsw.c
> +++ b/drivers/net/ethernet/ti/cpsw.c
> @@ -140,6 +140,8 @@ do {  
> \
>   #define CPSW_CMINTMAX_INTVL (1000 / CPSW_CMINTMIN_CNT)
>   #define CPSW_CMINTMIN_INTVL ((1000 / CPSW_CMINTMAX_CNT) + 1)
>   
> +#define CPSW_MAX_QUEUES  8
> +
>   #define cpsw_slave_index(priv)  \
>   ((priv->data.dual_emac) ? priv->emac_port : \
>   priv->data.active_slave)
> @@ -383,7 +385,8 @@ struct cpsw_priv {
>   u8  mac_addr[ETH_ALEN];
>   struct cpsw_slave   *slaves;
>   struct cpdma_ctlr   *dma;
> - struct cpdma_chan   *txch, *rxch;
> + struct cpdma_chan   *txch[CPSW_MAX_QUEUES];
> + struct cpdma_chan   *rxch[CPSW_MAX_QUEUES];
>   struct cpsw_ale *ale;
>   boolrx_pause;
>   booltx_pause;
> @@ -395,6 +398,7 @@ struct cpsw_priv {
>   u32 num_irqs;
>   struct cpts *cpts;
>   u32 emac_port;
> + int rx_ch_num, tx_ch_num;
>   };
>   

[...]

>   
> @@ -989,26 +1024,50 @@ update_return:
>   
>   static int cpsw_get_sset_count(struct net_device *ndev, int sset)
>   {
> + struct cpsw_priv *priv = netdev_priv(ndev);
> +
>   switch (sset) {
>   case ETH_SS_STATS:
> - return CPSW_STATS_LEN;
> + return (CPSW_STATS_COMMON_LEN +
> +(priv->rx_ch_num + priv->tx_ch_num) *
> +CPSW_STATS_CH_LEN);
>   default:
>   return -EOPNOTSUPP;
>   }
>   }
>   
> +static void cpsw_add_ch_strings(u8 **p, int ch_num, int rx_dir)
> +{
> + int ch_stats_len;
> + int line;
> + int i;
> +
> + ch_stats_len = CPSW_STATS_CH_LEN * ch_num;
> + for (i = 0; i < ch_stats_len; i++) {
> + line = i % CPSW_STATS_CH_LEN;
> + sprintf(*p, "%s DMA chan %d: %s", rx_dir ? "Rx" : "Tx",
> + i / CPSW_STATS_CH_LEN,

snprintf(,ETH_GSTRING_LEN,) ?

> + cpsw_gstrings_ch_stats[line].stat_string);
> + *p += ETH_GSTRING_LEN;
> + }
> +}
> +
>   static void cpsw_get_strings(struct net_device *ndev, u32 stringset, u8 
> *data)
>   {
> + struct cpsw_priv *priv = netdev_priv(ndev);
>   u8 *p = data;
>   int i;
>   
>   switch (stringset) {
>   case ETH_SS_STATS:
> - for (i = 0; i < CPSW_STATS_LEN; i++) {
> + for (i = 0; i < CPSW_STATS_COMMON_LEN; i++) {
>   memcpy(p, cpsw_gstrings_stats[i].stat_string,
>  ETH_GSTRING_LEN);
>   p += ETH_GSTRING_LEN;
>   }
> +
> + cpsw_add_ch_strings(&p, priv->rx_ch_num, 1);
> + cpsw_add_ch_strings(&p, priv->tx_ch_num, 0);
>   break;
>   }
>   }
> @@ -1017,35 +1076,38 @@ static void cpsw_get_ethtool_stats(struct net_device 
> *ndev,
>   struct ethtool_stats *stats, u64 *data)
>   {
>   struct cpsw_priv *priv = netdev_priv(ndev);
> - struct cpdma_chan_stats rx_stats;
> - struct cpdma_chan_stats tx_stats;
> - u32 val;
> + struct cpdma_chan_stats ch_stats;
> + int i, l, ch, ret;
>   u8 *p;
> - int i;
> +
> + ret = pm_runtime_get_sync(&priv->pdev->dev);
> + if (ret < 0) {
> + pm_runtime_put_noidle(&priv->pdev->dev);
> + return;
> + }

You probably need to base you work on top of net-next.git

>   
>   /* Collect Davinci CPDMA stats for Rx and Tx Channel */
> - cpdma_chan_get_stats(priv->rxch, &rx_stats);
> - cpdma_chan_get_stats(priv->txch, &tx_stats);
> -
> - for (i = 0; i < CPSW_STATS_LEN; i++) {
> - switch (cpsw_gstrings_stats[i].type) {
> - case CPSW_STATS:
> - val = readl(priv->hw_stats +
> - cpsw_gstrings_stats[i].stat_offset);
> - data[i] = val;
> - 

Re: [PATCH 4/4] net: ethernet: ti: cpsw: add ethtool channels support

2016-07-08 Thread Grygorii Strashko
On 06/30/2016 10:04 PM, Ivan Khoronzhuk wrote:
> These ops allow to control number of channels driver is allowed to
> work with. The maximum number of channels is 8 for rx and 8 for tx.
> After this patch the following commands are possible:
> 
> $ ethtool -l eth0
> $ ethtool -L eth0 rx 6 tx 6


Could you add some description here about switch vs dual_mac behavior? 

> 
> Signed-off-by: Ivan Khoronzhuk 
> ---
>   drivers/net/ethernet/ti/cpsw.c | 188 
> +
>   1 file changed, 188 insertions(+)
> 
> diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
> index 595ed56..729b8be 100644
> --- a/drivers/net/ethernet/ti/cpsw.c
> +++ b/drivers/net/ethernet/ti/cpsw.c
> @@ -740,6 +740,11 @@ static void cpsw_rx_handler(void *token, int len, int 
> status)
>   }
>   
>   requeue:
> + if (netif_dormant(ndev)) {
> + dev_kfree_skb_any(new_skb);
> + return;
> + }
> +
>   ch = priv->rxch[skb_get_queue_mapping(new_skb)];
>   ret = cpdma_chan_submit(ch, new_skb, new_skb->data,
>   skb_tailroom(new_skb), 0);
> @@ -2077,6 +2082,187 @@ static void cpsw_ethtool_op_complete(struct 
> net_device *ndev)
>   cpsw_err(priv, drv, "ethtool complete failed %d\n", ret);
>   }
>   
> +static void cpsw_get_channels(struct net_device *dev,
> +   struct ethtool_channels *ch)
> +{
> + struct cpsw_priv *priv = netdev_priv(dev);
> +
> + ch->max_combined = 0;
> + ch->max_rx = CPSW_MAX_QUEUES;
> + ch->max_tx = CPSW_MAX_QUEUES;
> + ch->max_other = 0;
> + ch->other_count = 0;
> + ch->rx_count = priv->rx_ch_num;
> + ch->tx_count = priv->tx_ch_num;
> + ch->combined_count = 0;
> +}
> +
> +static int cpsw_check_ch_settings(struct cpsw_priv *priv,
> +   struct ethtool_channels *ch)
> +{
> + if (ch->combined_count)
> + return -EINVAL;
> +
> + /* verify we have at least one channel in each direction */
> + if (!ch->rx_count || !ch->tx_count)
> + return -EINVAL;
> +
> + if (ch->rx_count > priv->data.channels ||
> + ch->tx_count > priv->data.channels)
> + return -EINVAL;
> +
> + return 0;
> +}
> +
> +static void cpsw_sync_dual_ch_list(struct net_device *sdev,
> +struct net_device *ddev)
> +{
> + struct cpsw_priv *priv_s, *priv_d;
> + int i;
> +
> + priv_s = netdev_priv(sdev);
> + priv_d = netdev_priv(ddev);
> +
> + priv_d->rx_ch_num = priv_s->rx_ch_num;
> + priv_d->tx_ch_num = priv_s->tx_ch_num;
> +
> + for (i = 0; i < priv_d->tx_ch_num; i++)
> + priv_d->txch[i] = priv_s->txch[i];
> + for (i = 0; i < priv_d->rx_ch_num; i++)
> + priv_d->rxch[i] = priv_s->rxch[i];
> +}
> +
> +static int cpsw_update_channels_res(struct cpsw_priv *priv, int ch_num, int 
> rx)
> +{
> + int (*poll)(struct napi_struct *, int);
> + void (*handler)(void *, int, int);
> + struct cpdma_chan **chan;
> + int *ch;
> + int ret;
> +
> + if (rx) {
> + ch = &priv->rx_ch_num;
> + chan = priv->rxch;
> + handler = cpsw_rx_handler;
> + poll = cpsw_rx_poll;
> + } else {
> + ch = &priv->tx_ch_num;
> + chan = priv->txch;
> + handler = cpsw_tx_handler;
> + poll = cpsw_tx_poll;
> + }
> +
> + while (*ch < ch_num) {
> + chan[*ch] = cpdma_chan_create(priv->dma, *ch, handler, rx);
> +
> + if (IS_ERR(chan[*ch]))
> + return PTR_ERR(chan[*ch]);
> +
> + if (!chan[*ch])
> + return -EINVAL;
> +
> + dev_info(priv->dev, "created new %d %s channel\n", *ch,
> +  (rx ? "rx" : "tx"));
> + (*ch)++;
> + }
> +
> + while (*ch > ch_num) {
> + int tch = *ch - 1;

why tch? can you use more informative names

> +
> + ret = cpdma_chan_destroy(chan[tch]);
> + if (ret)
> + return ret;
> +
> + dev_info(priv->dev, "destroyed %d %s channel\n", tch,
> +  (rx ? "rx" : "tx"));
> + (*ch)--;
> + }
> +
> + return 0;
> +}
> +
> +static int cpsw_update_channels(struct net_device *dev,
> + struct ethtool_channels *ch)
> +{
> + struct cpsw_priv *priv;
> + int ret;
> +
> + priv = netdev_priv(dev);
> +
> + ret = cpsw_update_channels_res(priv, ch->rx_count, 1);
> + if (ret)
> + return ret;
> +
> + ret = cpsw_update_channels_res(priv, ch->tx_count, 0);
> + if (ret)
> + return ret;
> +
> + if (priv->data.dual_emac) {
> + int i;
> + /* mirror channels for another SL */
> + for (i = 0; i < priv->data.slaves; i++) {
> + if (priv->slaves[i].ndev == dev)
> +   

[PATCH 2/3] drivers: net: cpsw: fix wrong regs access in cpsw_remove

2016-07-22 Thread Grygorii Strashko
The L3 error will be generated and system will crash during unloading
of CPSW driver if CPSW is used as module and ethX devices are down.
This happens because CPSW can be power off by PM runtime now when ethX
devices are down.

Hence, ensure that CPSW powered up by PM runtime before performing any
deinitialization actions which require CPSW registers access. In case
of PM runtime error just leave cpsw_remove() as we can't do anything
anymore.

Signed-off-by: Grygorii Strashko 
---
 drivers/net/ethernet/ti/cpsw.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 46423dd..a4d6eb5 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -2589,6 +2589,13 @@ static int cpsw_remove(struct platform_device *pdev)
 {
struct net_device *ndev = platform_get_drvdata(pdev);
struct cpsw_priv *priv = netdev_priv(ndev);
+   int ret;
+
+   ret = pm_runtime_get_sync(&pdev->dev);
+   if (ret < 0) {
+   pm_runtime_put_noidle(&pdev->dev);
+   return ret;
+   }
 
if (priv->data.dual_emac)
unregister_netdev(cpsw_get_slave_ndev(priv, 1));
@@ -2596,8 +2603,9 @@ static int cpsw_remove(struct platform_device *pdev)
 
cpsw_ale_destroy(priv->ale);
cpdma_ctlr_destroy(priv->dma);
-   pm_runtime_disable(&pdev->dev);
device_for_each_child(&pdev->dev, NULL, cpsw_remove_child_device);
+   pm_runtime_put_sync(&pdev->dev);
+   pm_runtime_disable(&pdev->dev);
if (priv->data.dual_emac)
free_netdev(cpsw_get_slave_ndev(priv, 1));
free_netdev(ndev);
-- 
2.9.2



[PATCH 1/3] net: ethernet: ti: cpdma: fix lockup in cpdma_ctlr_destroy()

2016-07-22 Thread Grygorii Strashko
Fix deadlock in cpdma_ctlr_destroy() which is triggered now on
cpsw module removal:
 cpsw_remove()
 - cpdma_ctlr_destroy()
   - spin_lock_irqsave(&ctlr->lock, flags)
   - cpdma_ctlr_stop()
 - spin_lock_irqsave(&ctlr->lock, flags); <- deadlock
   - cpdma_chan_destroy()
 - spin_lock_irqsave(&ctlr->lock, flags); <- deadlock

The issue has not been observed before because CPDMA channels have
been destroyed manually by CPSW until commit d941ebe88a41 ("net:
ethernet: ti: cpsw: use destroy ctlr to destroy channels") was merged.

Signed-off-by: Grygorii Strashko 
---
 drivers/net/ethernet/ti/davinci_cpdma.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c 
b/drivers/net/ethernet/ti/davinci_cpdma.c
index a68652a..89242e9 100644
--- a/drivers/net/ethernet/ti/davinci_cpdma.c
+++ b/drivers/net/ethernet/ti/davinci_cpdma.c
@@ -436,7 +436,6 @@ int cpdma_ctlr_destroy(struct cpdma_ctlr *ctlr)
if (!ctlr)
return -EINVAL;
 
-   spin_lock_irqsave(&ctlr->lock, flags);
if (ctlr->state != CPDMA_STATE_IDLE)
cpdma_ctlr_stop(ctlr);
 
@@ -444,7 +443,6 @@ int cpdma_ctlr_destroy(struct cpdma_ctlr *ctlr)
cpdma_chan_destroy(ctlr->channels[i]);
 
cpdma_desc_pool_destroy(ctlr->pool);
-   spin_unlock_irqrestore(&ctlr->lock, flags);
return ret;
 }
 EXPORT_SYMBOL_GPL(cpdma_ctlr_destroy);
-- 
2.9.2



[PATCH 3/3] drivers: net: cpsw: use of_platform_depopulate()

2016-07-22 Thread Grygorii Strashko
Use of_platform_depopulate() in cpsw_remove() instead of
of_device_unregister(), because CSPW child devices will not be
recreated otherwise on next insmod. of_platform_depopulate() is
correct way now as it will ensure that all steps done in
of_platform_populate() are reverted, including cleaning up of
OF_POPULATED flag.

Signed-off-by: Grygorii Strashko 
---
 drivers/net/ethernet/ti/cpsw.c | 11 +--
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index a4d6eb5..ca5890f 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -2576,15 +2576,6 @@ clean_ndev_ret:
return ret;
 }
 
-static int cpsw_remove_child_device(struct device *dev, void *c)
-{
-   struct platform_device *pdev = to_platform_device(dev);
-
-   of_device_unregister(pdev);
-
-   return 0;
-}
-
 static int cpsw_remove(struct platform_device *pdev)
 {
struct net_device *ndev = platform_get_drvdata(pdev);
@@ -2603,7 +2594,7 @@ static int cpsw_remove(struct platform_device *pdev)
 
cpsw_ale_destroy(priv->ale);
cpdma_ctlr_destroy(priv->dma);
-   device_for_each_child(&pdev->dev, NULL, cpsw_remove_child_device);
+   of_platform_depopulate(&pdev->dev);
pm_runtime_put_sync(&pdev->dev);
pm_runtime_disable(&pdev->dev);
if (priv->data.dual_emac)
-- 
2.9.2



[PATCH 0/3] drivers: net: cpsw: fix driver loading/unloading

2016-07-22 Thread Grygorii Strashko
This series fixes set of isssues observed when CPSW driver module is 
unloaded/loaded:
1) rmmod: deadlock in cpdma_ctlr_destroy
2) rmmod: L3 back-trace and crash if all net interfaces are down, because CPSW
can be powerred down by PM runtime in this case.
3) insmod: mdio device is not recreated on next insmod
 - need to use of_platform_depopulate() in cpsw_remove().

Grygorii Strashko (3):
  net: ethernet: ti: cpdma: fix lockup in cpdma_ctlr_destroy()
  drivers: net: cpsw: fix wrong regs access in cpsw_remove
  drivers: net: cpsw: use of_platform_depopulate()

 drivers/net/ethernet/ti/cpsw.c  | 19 +--
 drivers/net/ethernet/ti/davinci_cpdma.c |  2 --
 2 files changed, 9 insertions(+), 12 deletions(-)

-- 
2.9.2



Re: [PATCH v2] net: davinci_cpdma: remove excessive dump of register values to kernel log

2016-07-26 Thread Grygorii Strashko

On 07/26/2016 03:57 AM, David Miller wrote:

From: Uwe Kleine-König 
Date: Mon, 25 Jul 2016 11:54:45 +0200


Such a big dump of register values is hardly useful on a production
system.

Another downside of the now removed functions is that calling
emac_dump_regs resulted in at least 87 calls to dev_info while holding a
spinlock and having irqs off which is a big source of latency.

Signed-off-by: Uwe Kleine-König 


Applied.



:( To be honest I was really enjoyed using cpdma_ctlr_dump()
for debug purposed during the past few weeks in cpsw where I've added 
calls to this func manually. And I even thinking about adding debug_fs 
entry for the same :(


Pretty fast merge :(


--
regards,
-grygorii


Re: [PATCH 2/2] net: davinci_cpdma: reduce time holding chan->lock in cpdma_chan_submit

2016-07-26 Thread Grygorii Strashko
On 07/26/2016 03:02 PM, Uwe Kleine-König wrote:
> Allocating and preparing a dma descriptor doesn't need to happen under
> the channel's lock. So do this before taking the channel's lock. The only
> down side is that the dma descriptor might be allocated even though the
> channel is about to be stopped. This is unlikely though.
> 
> Signed-off-by: Uwe Kleine-König 
> ---
>  drivers/net/ethernet/ti/davinci_cpdma.c | 38 
> +
>  1 file changed, 20 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c 
> b/drivers/net/ethernet/ti/davinci_cpdma.c
> index 5ffa04a306c6..ba3462707ae3 100644
> --- a/drivers/net/ethernet/ti/davinci_cpdma.c
> +++ b/drivers/net/ethernet/ti/davinci_cpdma.c
> @@ -542,24 +542,10 @@ int cpdma_chan_submit(struct cpdma_chan *chan, void 
> *token, void *data,
>   u32 mode;
>   int ret = 0;
>  
> - spin_lock_irqsave(&chan->lock, flags);
> -
> - if (chan->state == CPDMA_STATE_TEARDOWN) {
> - ret = -EINVAL;
> - goto unlock_ret;
> - }
> -
> - if (chan->count >= chan->desc_num)  {
> - chan->stats.desc_alloc_fail++;
> - ret = -ENOMEM;
> - goto unlock_ret;
> - }

I'm not sure this is right thing to do. This check is expected to be strict
and means "channel has exhausted the available descriptors, so further descs 
allocation does not allowed".


This also might affect on Ivan's work [1] "[PATCH 0/4]  net: ethernet: ti: 
cpsw: add multi-queue support"



[1] https://lkml.org/lkml/2016/6/30/603
-- 
regards,
-grygorii


Re: [PATCH v2] net: davinci_cpdma: remove excessive dump of register values to kernel log

2016-07-26 Thread Grygorii Strashko

+ CC: Ivan

On 07/26/2016 05:09 PM, Grygorii Strashko wrote:

On 07/26/2016 03:57 AM, David Miller wrote:

From: Uwe Kleine-König 
Date: Mon, 25 Jul 2016 11:54:45 +0200


Such a big dump of register values is hardly useful on a production
system.

Another downside of the now removed functions is that calling
emac_dump_regs resulted in at least 87 calls to dev_info while holding a
spinlock and having irqs off which is a big source of latency.

Signed-off-by: Uwe Kleine-König 


Applied.



:( To be honest I was really enjoyed using cpdma_ctlr_dump()
for debug purposed during the past few weeks in cpsw where I've added
calls to this func manually. And I even thinking about adding debug_fs
entry for the same :(

Pretty fast merge :(





--
regards,
-grygorii


Re: [PATCH 0/2] net: davinci_cpdma: reduce latency on -rt

2016-07-26 Thread Grygorii Strashko

On 07/26/2016 03:02 PM, Uwe Kleine-König wrote:

Hello,

these patches are based on next-20160726. I didn't check yet how latency
improves by using these patches, but even if the improvment is small,
it's still a good idea to have them.


Sry, but how this will affect on -RT? This is not a raw locks, so
they will be converted to rt-mutexes which are sleepable.
Or I've missed smth?



A second pair of eyes checking what I did would be great.

Best regards
Uwe


Uwe Kleine-König (2):
  net: davinci_cpdma: reduce time holding ctlr->lock in
cpdma_control_set
  net: davinci_cpdma: reduce time holding chan->lock in
cpdma_chan_submit

 drivers/net/ethernet/ti/davinci_cpdma.c | 58 -
 1 file changed, 29 insertions(+), 29 deletions(-)




--
regards,
-grygorii


Re: [PATCH 1/3] net: ethernet: ti: cpdma: fix lockup in cpdma_ctlr_destroy()

2016-07-26 Thread Grygorii Strashko
On 07/23/2016 09:24 AM, Ivan Khoronzhuk wrote:
> 
> 
> On 22.07.16 16:58, Grygorii Strashko wrote:
>> Fix deadlock in cpdma_ctlr_destroy() which is triggered now on
>> cpsw module removal:
>>  cpsw_remove()
>>  - cpdma_ctlr_destroy()
>>- spin_lock_irqsave(&ctlr->lock, flags)
>>- cpdma_ctlr_stop()
>>  - spin_lock_irqsave(&ctlr->lock, flags); <- deadlock
>>- cpdma_chan_destroy()
>>  - spin_lock_irqsave(&ctlr->lock, flags); <- deadlock
>>
>> The issue has not been observed before because CPDMA channels have
>> been destroyed manually by CPSW until commit d941ebe88a41 ("net:
>> ethernet: ti: cpsw: use destroy ctlr to destroy channels") was merged.
>>
>> Signed-off-by: Grygorii Strashko 
>> ---
>>  drivers/net/ethernet/ti/davinci_cpdma.c | 2 --
>>  1 file changed, 2 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c
>> b/drivers/net/ethernet/ti/davinci_cpdma.c
>> index a68652a..89242e9 100644
>> --- a/drivers/net/ethernet/ti/davinci_cpdma.c
>> +++ b/drivers/net/ethernet/ti/davinci_cpdma.c
>> @@ -436,7 +436,6 @@ int cpdma_ctlr_destroy(struct cpdma_ctlr *ctlr)
>>  if (!ctlr)
>>  return -EINVAL;
>>
>> -spin_lock_irqsave(&ctlr->lock, flags);
> Should ctlr->state be checked under lock?
> Seems like here should be used unlocked static versions of
> cpdma_ctlr_stop() and cpdma_chan_destroy() instead.

As per my understanding it's not expected the ctlr->state will be changed at 
this 
moment as all net devices has been unregistered already.

> 
>>  if (ctlr->state != CPDMA_STATE_IDLE)

May be I can move above check in cpdma_ctlr_stop() instead.
What do you think?

>>  cpdma_ctlr_stop(ctlr);
>>
>> @@ -444,7 +443,6 @@ int cpdma_ctlr_destroy(struct cpdma_ctlr *ctlr)
>>  cpdma_chan_destroy(ctlr->channels[i]);
>>
>>  cpdma_desc_pool_destroy(ctlr->pool);
>> -spin_unlock_irqrestore(&ctlr->lock, flags);
>>  return ret;
>>  }
>>  EXPORT_SYMBOL_GPL(cpdma_ctlr_destroy);
>>
> 


-- 
regards,
-grygorii


Re: [PATCH 2/2] net: davinci_cpdma: reduce time holding chan->lock in cpdma_chan_submit

2016-07-27 Thread Grygorii Strashko
On 07/27/2016 10:12 AM, Uwe Kleine-König wrote:
> Hello,
> 
> On Tue, Jul 26, 2016 at 05:25:58PM +0300, Grygorii Strashko wrote:
>> On 07/26/2016 03:02 PM, Uwe Kleine-König wrote:
>>> Allocating and preparing a dma descriptor doesn't need to happen under
>>> the channel's lock. So do this before taking the channel's lock. The only
>>> down side is that the dma descriptor might be allocated even though the
>>> channel is about to be stopped. This is unlikely though.
>>>
>>> Signed-off-by: Uwe Kleine-König 
>>> ---
>>>  drivers/net/ethernet/ti/davinci_cpdma.c | 38 
>>> +
>>>  1 file changed, 20 insertions(+), 18 deletions(-)
>>>
>>> diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c 
>>> b/drivers/net/ethernet/ti/davinci_cpdma.c
>>> index 5ffa04a306c6..ba3462707ae3 100644
>>> --- a/drivers/net/ethernet/ti/davinci_cpdma.c
>>> +++ b/drivers/net/ethernet/ti/davinci_cpdma.c
>>> @@ -542,24 +542,10 @@ int cpdma_chan_submit(struct cpdma_chan *chan, void 
>>> *token, void *data,
>>> u32 mode;
>>> int ret = 0;
>>>  
>>> -   spin_lock_irqsave(&chan->lock, flags);
>>> -
>>> -   if (chan->state == CPDMA_STATE_TEARDOWN) {
>>> -   ret = -EINVAL;
>>> -   goto unlock_ret;
>>> -   }
>>> -
>>> -   if (chan->count >= chan->desc_num)  {
>>> -   chan->stats.desc_alloc_fail++;
>>> -   ret = -ENOMEM;
>>> -   goto unlock_ret;
>>> -   }
>>
>> I'm not sure this is right thing to do. This check is expected to be strict
>> and means "channel has exhausted the available descriptors, so further descs 
>> allocation does not allowed".
> 
> I developed this patch basing on a 4.4 kernel which doesn't have
> 742fb20fd4c7 ("net: ethernet: ti: cpdma: switch to use genalloc"). There
> my patch is more obviously correct. As currently chan->count is
> protected by chan->lock we must hold the lock for this check. If a
> failing check means we must not call cpdma_desc_alloc in the first
> place, that's bad.

Yes. That's intention of this check :(
Now it'll work as following for two (rx/tx) channels, as example
RX desc_num = 16 (max allowed number of descriptors)
TX desc_num = 16 (max allowed number of descriptors)
and with current code number of allocated descriptors will never exceed 16.

with your change, in corner case when TX channel's already utilized 16 
descriptors the
following will happen:
cpdma_chan_submit()
 - cpdma_desc_alloc() -  will allocate 17th desc
 - lock
 - check for chan->count - fail
 - unlock
 - cpdma_desc_free() 

so your patch will add additional desc_alloc/desc_free in the above corner case
and that's what i'm worry about (TEARDOWN seems ok) especially taking into 
account
further multi-queue feature development.

Above corner case seems might happen very rare, because of the guard check in 
cpsw_ndo_start_xmit(), 
but it could.

> 
> But I'm not sure this is the case here. After all cpdma_desc_alloc
> doesn't do anything relevant for the hardware, right?

Right.

Thanks. I'd try to do some measurement also.
-- 
regards,
-grygorii


Re: [PATCH 0/2] net: davinci_cpdma: reduce latency on -rt

2016-07-27 Thread Grygorii Strashko
On 07/27/2016 10:03 AM, Uwe Kleine-König wrote:
> On Tue, Jul 26, 2016 at 05:36:49PM +0300, Grygorii Strashko wrote:
>> On 07/26/2016 03:02 PM, Uwe Kleine-König wrote:
>>> Hello,
>>>
>>> these patches are based on next-20160726. I didn't check yet how latency
>>> improves by using these patches, but even if the improvment is small,
>>> it's still a good idea to have them.
>>
>> Sry, but how this will affect on -RT? This is not a raw locks, so
>> they will be converted to rt-mutexes which are sleepable.
>> Or I've missed smth?
> 
> They are still locks after all. On -rt I saw for the relevant
> application:
> 
>   send package |
> take lock  |
> write pckt to hw   |
>| rcv irq
>  |   take lock
>  | schedule
> drop lock| 
>   schedule |
>|   get pckt from hw
>  |   drop lock
> 
> So reducing the time a lock is taken reduces the chances that the lock
> is contended for another thread which results in extra context switches.
> 
Thanks a lot for explanation. So, this is not exactly rt-latency reduction,
but it might improve net performance on -RT. correct?

Thanks.
-- 
regards,
-grygorii


Re: [PATCH 0/2] net: davinci_cpdma: reduce latency on -rt

2016-07-28 Thread Grygorii Strashko
On 07/27/2016 05:38 PM, Uwe Kleine-König wrote:
> Hello,
> 
> On Wed, Jul 27, 2016 at 05:11:54PM +0300, Grygorii Strashko wrote:
>> On 07/27/2016 10:03 AM, Uwe Kleine-König wrote:
>>> On Tue, Jul 26, 2016 at 05:36:49PM +0300, Grygorii Strashko wrote:
>>>> On 07/26/2016 03:02 PM, Uwe Kleine-König wrote:
>>>>> Hello,
>>>>>
>>>>> these patches are based on next-20160726. I didn't check yet how latency
>>>>> improves by using these patches, but even if the improvment is small,
>>>>> it's still a good idea to have them.
>>>>
>>>> Sry, but how this will affect on -RT? This is not a raw locks, so
>>>> they will be converted to rt-mutexes which are sleepable.
>>>> Or I've missed smth?
>>>
>>> They are still locks after all. On -rt I saw for the relevant
>>> application:
>>>
>>>   send package |
>>> take lock  |
>>> write pckt to hw   |
>>>| rcv irq
>>>|   take lock
>>>| schedule
>>> drop lock  | 
>>>   schedule |
>>>|   get pckt from hw
>>>|   drop lock
>>>
>>> So reducing the time a lock is taken reduces the chances that the lock
>>> is contended for another thread which results in extra context switches.
>>>
>> Thanks a lot for explanation. So, this is not exactly rt-latency reduction,
>> but it might improve net performance on -RT. correct?
> 
> Well, it's not really rt related, but if you hit a locked lock on rt it
> hurts more than on !rt. And this results in increased latency.
> 

Thanks. I've just wanted to have clear understanding of the [possible] issue.
And I'd be appreciated if you could share and measurement results if you have.

-- 
regards,
-grygorii


Re: [PATCH 1/3] net: ethernet: ti: cpdma: fix lockup in cpdma_ctlr_destroy()

2016-07-28 Thread Grygorii Strashko
On 07/26/2016 11:54 PM, ivan.khoronzhuk wrote:
> 
> 
> On 26.07.16 19:02, Grygorii Strashko wrote:
>> On 07/23/2016 09:24 AM, Ivan Khoronzhuk wrote:
>>>
>>>
>>> On 22.07.16 16:58, Grygorii Strashko wrote:
>>>> Fix deadlock in cpdma_ctlr_destroy() which is triggered now on
>>>> cpsw module removal:
>>>>  cpsw_remove()
>>>>  - cpdma_ctlr_destroy()
>>>>- spin_lock_irqsave(&ctlr->lock, flags)
>>>>- cpdma_ctlr_stop()
>>>>  - spin_lock_irqsave(&ctlr->lock, flags); <- deadlock
>>>>- cpdma_chan_destroy()
>>>>  - spin_lock_irqsave(&ctlr->lock, flags); <- deadlock
>>>>
>>>> The issue has not been observed before because CPDMA channels have
>>>> been destroyed manually by CPSW until commit d941ebe88a41 ("net:
>>>> ethernet: ti: cpsw: use destroy ctlr to destroy channels") was merged.
>>>>
>>>> Signed-off-by: Grygorii Strashko 
>>>> ---
>>>>  drivers/net/ethernet/ti/davinci_cpdma.c | 2 --
>>>>  1 file changed, 2 deletions(-)
>>>>
>>>> diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c
>>>> b/drivers/net/ethernet/ti/davinci_cpdma.c
>>>> index a68652a..89242e9 100644
>>>> --- a/drivers/net/ethernet/ti/davinci_cpdma.c
>>>> +++ b/drivers/net/ethernet/ti/davinci_cpdma.c
>>>> @@ -436,7 +436,6 @@ int cpdma_ctlr_destroy(struct cpdma_ctlr *ctlr)
>>>>  if (!ctlr)
>>>>  return -EINVAL;
>>>>
>>>> -spin_lock_irqsave(&ctlr->lock, flags);
>>> Should ctlr->state be checked under lock?
>>> Seems like here should be used unlocked static versions of
>>> cpdma_ctlr_stop() and cpdma_chan_destroy() instead.
>>
>> As per my understanding it's not expected the ctlr->state will be
>> changed at this
>> moment as all net devices has been unregistered already.
> Seems yes, the race can be only in case of incorrect usage, stop while
> destroy,
> destroy while start...etc..all they are mostly unreal use-cases, you are
> right,
> but such check w/o lock always under eyes control, that always makes you
> think
> that smth wrong.
> 
>>
>>>
>>>>  if (ctlr->state != CPDMA_STATE_IDLE)
>>
>> May be I can move above check in cpdma_ctlr_stop() instead.
>> What do you think?
> Yes, it be more clear.
> I was thinking about lock deletion also, as under this destroy function the
> ctlr destroys it's resources one by one, ok, the channels are destroyed
> under lock,
> but pool (it's good that it's destroyed after channels). I see that
> it should never
> happen, but ctrl is external structure, who knows as it can be used
> while destroying.
> That was my paranoiac point, so don't pay a lot attention to it. In case
> of normal usage,
> as it's currently is and should be, the lock can be removed.


I'm going to keep it as is after some thinking and code checking -
I don't see any reasons for races here and I can't simply move this check in 
cpdma_ctlr_stop() 
as it might break ndo_open failure handling (and this is not smth. I'd like to 
fix within this series).

I'll resend v2 with build issue fixed and with fix for new issue I've found.
> 
>>
>>>>  cpdma_ctlr_stop(ctlr);
>>>>
>>>> @@ -444,7 +443,6 @@ int cpdma_ctlr_destroy(struct cpdma_ctlr *ctlr)
>>>>  cpdma_chan_destroy(ctlr->channels[i]);
>>>>
>>>>  cpdma_desc_pool_destroy(ctlr->pool);
>>>> -spin_unlock_irqrestore(&ctlr->lock, flags);
>>>>  return ret;
>>>>  }
>>>>  EXPORT_SYMBOL_GPL(cpdma_ctlr_destroy);
>>>>
>>>
>>
>>


-- 
regards,
-grygorii


[PATCH v2 3/4] drivers: net: cpsw: use of_platform_depopulate()

2016-07-28 Thread Grygorii Strashko
Use of_platform_depopulate() in cpsw_remove() instead of
of_device_unregister(), because CSPW child devices will not be
recreated otherwise on next insmod. of_platform_depopulate() is
correct way now as it will ensure that all steps done in
of_platform_populate() are reverted, including cleaning up of
OF_POPULATED flag.

Signed-off-by: Grygorii Strashko 
---
 drivers/net/ethernet/ti/cpsw.c | 11 +--
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index ec6f473..f0ed470 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -2566,15 +2566,6 @@ clean_ndev_ret:
return ret;
 }
 
-static int cpsw_remove_child_device(struct device *dev, void *c)
-{
-   struct platform_device *pdev = to_platform_device(dev);
-
-   of_device_unregister(pdev);
-
-   return 0;
-}
-
 static int cpsw_remove(struct platform_device *pdev)
 {
struct net_device *ndev = platform_get_drvdata(pdev);
@@ -2593,7 +2584,7 @@ static int cpsw_remove(struct platform_device *pdev)
 
cpsw_ale_destroy(priv->ale);
cpdma_ctlr_destroy(priv->dma);
-   device_for_each_child(&pdev->dev, NULL, cpsw_remove_child_device);
+   of_platform_depopulate(&pdev->dev);
pm_runtime_put_sync(&pdev->dev);
pm_runtime_disable(&pdev->dev);
if (priv->data.dual_emac)
-- 
2.9.2



[PATCH v2 2/4] drivers: net: cpsw: fix wrong regs access in cpsw_remove

2016-07-28 Thread Grygorii Strashko
The L3 error will be generated and system will crash during unloading
of CPSW driver if CPSW is used as module and ethX devices are down.
This happens because CPSW can be power off by PM runtime now when ethX
devices are down.

Hence, ensure that CPSW powered up by PM runtime before performing any
deinitialization actions which require CPSW registers access. In case
of PM runtime error just leave cpsw_remove() as we can't do anything
anymore.

Signed-off-by: Grygorii Strashko 
---
 drivers/net/ethernet/ti/cpsw.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 8f1eab9..ec6f473 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -2579,6 +2579,13 @@ static int cpsw_remove(struct platform_device *pdev)
 {
struct net_device *ndev = platform_get_drvdata(pdev);
struct cpsw_priv *priv = netdev_priv(ndev);
+   int ret;
+
+   ret = pm_runtime_get_sync(&pdev->dev);
+   if (ret < 0) {
+   pm_runtime_put_noidle(&pdev->dev);
+   return ret;
+   }
 
if (priv->data.dual_emac)
unregister_netdev(cpsw_get_slave_ndev(priv, 1));
@@ -2586,8 +2593,9 @@ static int cpsw_remove(struct platform_device *pdev)
 
cpsw_ale_destroy(priv->ale);
cpdma_ctlr_destroy(priv->dma);
-   pm_runtime_disable(&pdev->dev);
device_for_each_child(&pdev->dev, NULL, cpsw_remove_child_device);
+   pm_runtime_put_sync(&pdev->dev);
+   pm_runtime_disable(&pdev->dev);
if (priv->data.dual_emac)
free_netdev(cpsw_get_slave_ndev(priv, 1));
free_netdev(ndev);
-- 
2.9.2



[PATCH v2 1/4] net: ethernet: ti: cpdma: fix lockup in cpdma_ctlr_destroy()

2016-07-28 Thread Grygorii Strashko
Fix deadlock in cpdma_ctlr_destroy() which is triggered now on
cpsw module removal:
 cpsw_remove()
 - cpdma_ctlr_destroy()
   - spin_lock_irqsave(&ctlr->lock, flags)
   - cpdma_ctlr_stop()
 - spin_lock_irqsave(&ctlr->lock, flags);
   - cpdma_chan_destroy()
 - spin_lock_irqsave(&ctlr->lock, flags);

The issue has not been observed before because CPDMA channels have
been destroyed manually by CPSW until commit d941ebe88a41 ("net:
ethernet: ti: cpsw: use destroy ctlr to destroy channels") was merged.

Signed-off-by: Grygorii Strashko 
---
 drivers/net/ethernet/ti/davinci_cpdma.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c 
b/drivers/net/ethernet/ti/davinci_cpdma.c
index c8154ff..fdc0f4f 100644
--- a/drivers/net/ethernet/ti/davinci_cpdma.c
+++ b/drivers/net/ethernet/ti/davinci_cpdma.c
@@ -357,13 +357,11 @@ EXPORT_SYMBOL_GPL(cpdma_ctlr_stop);
 
 int cpdma_ctlr_destroy(struct cpdma_ctlr *ctlr)
 {
-   unsigned long flags;
int ret = 0, i;
 
if (!ctlr)
return -EINVAL;
 
-   spin_lock_irqsave(&ctlr->lock, flags);
if (ctlr->state != CPDMA_STATE_IDLE)
cpdma_ctlr_stop(ctlr);
 
@@ -371,7 +369,6 @@ int cpdma_ctlr_destroy(struct cpdma_ctlr *ctlr)
cpdma_chan_destroy(ctlr->channels[i]);
 
cpdma_desc_pool_destroy(ctlr->pool);
-   spin_unlock_irqrestore(&ctlr->lock, flags);
return ret;
 }
 EXPORT_SYMBOL_GPL(cpdma_ctlr_destroy);
-- 
2.9.2



[PATCH v2 0/4] drivers: net: cpsw: fix driver loading/unloading

2016-07-28 Thread Grygorii Strashko
This series fixes set of isssues observed when CPSW driver module is 
unloaded/loaded:
1) rmmod: deadlock in cpdma_ctlr_destroy
2) rmmod: L3 back-trace and crash if all net interfaces are down, because CPSW
can be powerred down by PM runtime in this case.
3) insmod: mdio device is not recreated on next insmod
 - need to use of_platform_depopulate() in cpsw_remove().
4) rmmod: system crash on omap_device removal

Tested on: am437x-idk, am57xx-beagle-x15

Changes in v2:
- build warning fixed
- added fix for correct omap_device removal

Link on v1:
 https://lkml.org/lkml/2016/7/22/240

Grygorii Strashko (4):
  net: ethernet: ti: cpdma: fix lockup in cpdma_ctlr_destroy()
  drivers: net: cpsw: fix wrong regs access in cpsw_remove
  drivers: net: cpsw: use of_platform_depopulate()
  ARM: OMAP2+: omap_device: fix crash on omap_device removal

 arch/arm/mach-omap2/omap_device.c   |  2 +-
 drivers/net/ethernet/ti/cpsw.c  | 19 +--
 drivers/net/ethernet/ti/davinci_cpdma.c |  3 ---
 3 files changed, 10 insertions(+), 14 deletions(-)

-- 
2.9.2



[PATCH v2 4/4] ARM: OMAP2+: omap_device: fix crash on omap_device removal

2016-07-28 Thread Grygorii Strashko
Below call chain causes system crash when OMAP device is
removed by calling of_platform_depopulate()/device_del():

device_del()
- blocking_notifier_call_chain(&dev->bus->p->bus_notifier,
 BUS_NOTIFY_DEL_DEVICE, dev);
  - _omap_device_notifier_call()
- omap_device_delete()
  - od->pdev->archdata.od = NULL;
kfree(od->hwmods);
kfree(od);
  - bus_remove_device()
- device_release_driver()
  - __device_release_driver()
- pm_runtime_get_sync()
   - _od_runtime_resume()
 - omap_hwmod_enable() <- OOPS od's delted already

Backtrace:
Unable to handle kernel NULL pointer dereference at virtual address 000d
pgd = eb10
[000d] *pgd=ad6e1831, *pte=, *ppte=
Internal error: Oops: 17 [#1] PREEMPT SMP ARM
CPU: 1 PID: 1273 Comm: modprobe Not tainted 4.4.15-rt19-00115-ge4d3cd3-dirty #68
Hardware name: Generic DRA74X (Flattened Device Tree)
task: eb1ee800 ti: ec962000 task.ti: ec962000
PC is at omap_device_enable+0x10/0x90
LR is at _od_runtime_resume+0x10/0x24
[...]
[] (omap_device_enable) from [] 
(_od_runtime_resume+0x10/0x24)
[] (_od_runtime_resume) from [] (__rpm_callback+0x20/0x34)
[] (__rpm_callback) from [] (rpm_callback+0x20/0x80)
[] (rpm_callback) from [] (rpm_resume+0x48c/0x964)
[] (rpm_resume) from [] (__pm_runtime_resume+0x60/0x88)
[] (__pm_runtime_resume) from [] 
(__device_release_driver+0x30/0x100)
[] (__device_release_driver) from [] 
(device_release_driver+0x1c/0x28)
[] (device_release_driver) from [] 
(bus_remove_device+0xec/0x144)
[] (bus_remove_device) from [] (device_del+0x10c/0x210)
[] (device_del) from [] (platform_device_del+0x18/0x84)
[] (platform_device_del) from [] 
(platform_device_unregister+0xc/0x20)
[] (platform_device_unregister) from [] 
(of_platform_device_destroy+0x8c/0x90)
[] (of_platform_device_destroy) from [] 
(device_for_each_child+0x4c/0x78)
[] (device_for_each_child) from [] 
(of_platform_depopulate+0x30/0x44)
[] (of_platform_depopulate) from [] (cpsw_remove+0x68/0xf4 
[ti_cpsw])
[] (cpsw_remove [ti_cpsw]) from [] 
(platform_drv_remove+0x24/0x3c)
[] (platform_drv_remove) from [] 
(__device_release_driver+0x84/0x100)
[] (__device_release_driver) from [] 
(driver_detach+0xac/0xb0)
[] (driver_detach) from [] (bus_remove_driver+0x60/0xd4)
[] (bus_remove_driver) from [] 
(SyS_delete_module+0x184/0x20c)
[] (SyS_delete_module) from [] (ret_fast_syscall+0x0/0x1c)
Code: e350 e92d4070 1590630c 01a06000 (e5d6300d)

Hence, fix it by using BUS_NOTIFY_REMOVED_DEVICE event for OMAP device
deletion which is sent when DD has finished processing of device
deletion.

Cc: Tony Lindgren 
Cc: Tero Kristo 
Signed-off-by: Grygorii Strashko 
---
 arch/arm/mach-omap2/omap_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/mach-omap2/omap_device.c 
b/arch/arm/mach-omap2/omap_device.c
index f7ff3b9..208f115 100644
--- a/arch/arm/mach-omap2/omap_device.c
+++ b/arch/arm/mach-omap2/omap_device.c
@@ -194,7 +194,7 @@ static int _omap_device_notifier_call(struct notifier_block 
*nb,
int err;
 
switch (event) {
-   case BUS_NOTIFY_DEL_DEVICE:
+   case BUS_NOTIFY_REMOVED_DEVICE:
if (pdev->archdata.od)
omap_device_delete(pdev->archdata.od);
break;
-- 
2.9.2



Re: [PATCH v2 4/4] ARM: OMAP2+: omap_device: fix crash on omap_device removal

2016-07-29 Thread Grygorii Strashko

On 07/29/2016 08:15 AM, Peter Ujfalusi wrote:

On 07/28/16 20:50, Grygorii Strashko wrote:

Below call chain causes system crash when OMAP device is
removed by calling of_platform_depopulate()/device_del():


Should you swap 3 <-> 4 in the series?
Currently patch 3 will introduce the crash you are fixing in patch 4...


No. The key function here is device_del() - of_device_unregister(), 
which was used previously has the same issue:

  of_device_unregister() -> device_unregister() ->device_del()

In general all these patches are independent and they were created just 
in bugs detection order.






device_del()
- blocking_notifier_call_chain(&dev->bus->p->bus_notifier,
 BUS_NOTIFY_DEL_DEVICE, dev);
  - _omap_device_notifier_call()
- omap_device_delete()
  - od->pdev->archdata.od = NULL;
kfree(od->hwmods);
kfree(od);
  - bus_remove_device()
- device_release_driver()
  - __device_release_driver()
- pm_runtime_get_sync()
   - _od_runtime_resume()
 - omap_hwmod_enable() <- OOPS od's delted already

Backtrace:
Unable to handle kernel NULL pointer dereference at virtual address 000d
pgd = eb10
[000d] *pgd=ad6e1831, *pte=, *ppte=
Internal error: Oops: 17 [#1] PREEMPT SMP ARM
CPU: 1 PID: 1273 Comm: modprobe Not tainted 4.4.15-rt19-00115-ge4d3cd3-dirty #68
Hardware name: Generic DRA74X (Flattened Device Tree)
task: eb1ee800 ti: ec962000 task.ti: ec962000
PC is at omap_device_enable+0x10/0x90
LR is at _od_runtime_resume+0x10/0x24
[...]
[] (omap_device_enable) from [] 
(_od_runtime_resume+0x10/0x24)
[] (_od_runtime_resume) from [] (__rpm_callback+0x20/0x34)
[] (__rpm_callback) from [] (rpm_callback+0x20/0x80)
[] (rpm_callback) from [] (rpm_resume+0x48c/0x964)
[] (rpm_resume) from [] (__pm_runtime_resume+0x60/0x88)
[] (__pm_runtime_resume) from [] 
(__device_release_driver+0x30/0x100)
[] (__device_release_driver) from [] 
(device_release_driver+0x1c/0x28)
[] (device_release_driver) from [] 
(bus_remove_device+0xec/0x144)
[] (bus_remove_device) from [] (device_del+0x10c/0x210)
[] (device_del) from [] (platform_device_del+0x18/0x84)
[] (platform_device_del) from [] 
(platform_device_unregister+0xc/0x20)
[] (platform_device_unregister) from [] 
(of_platform_device_destroy+0x8c/0x90)
[] (of_platform_device_destroy) from [] 
(device_for_each_child+0x4c/0x78)
[] (device_for_each_child) from [] 
(of_platform_depopulate+0x30/0x44)
[] (of_platform_depopulate) from [] (cpsw_remove+0x68/0xf4 
[ti_cpsw])
[] (cpsw_remove [ti_cpsw]) from [] 
(platform_drv_remove+0x24/0x3c)
[] (platform_drv_remove) from [] 
(__device_release_driver+0x84/0x100)
[] (__device_release_driver) from [] 
(driver_detach+0xac/0xb0)
[] (driver_detach) from [] (bus_remove_driver+0x60/0xd4)
[] (bus_remove_driver) from [] 
(SyS_delete_module+0x184/0x20c)
[] (SyS_delete_module) from [] (ret_fast_syscall+0x0/0x1c)
Code: e350 e92d4070 1590630c 01a06000 (e5d6300d)

Hence, fix it by using BUS_NOTIFY_REMOVED_DEVICE event for OMAP device
deletion which is sent when DD has finished processing of device
deletion.

Cc: Tony Lindgren 
Cc: Tero Kristo 
Signed-off-by: Grygorii Strashko 
---
 arch/arm/mach-omap2/omap_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/mach-omap2/omap_device.c 
b/arch/arm/mach-omap2/omap_device.c
index f7ff3b9..208f115 100644
--- a/arch/arm/mach-omap2/omap_device.c
+++ b/arch/arm/mach-omap2/omap_device.c
@@ -194,7 +194,7 @@ static int _omap_device_notifier_call(struct notifier_block 
*nb,
int err;

switch (event) {
-   case BUS_NOTIFY_DEL_DEVICE:
+   case BUS_NOTIFY_REMOVED_DEVICE:
if (pdev->archdata.od)
omap_device_delete(pdev->archdata.od);
break;







--
regards,
-grygorii


Re: [PATCH 1/7] net: ethernet: ti: cpdma: am437x: allow descs to be plased in ddr

2016-12-05 Thread Grygorii Strashko


On 12/03/2016 02:34 PM, David Miller wrote:
> From: Grygorii Strashko 
> Date: Thu, 1 Dec 2016 17:34:26 -0600
> 
>> @@ -167,10 +167,10 @@ static struct cpdma_control_info controls[] = {
>>  
>>  /* various accessors */
>>  #define dma_reg_read(ctlr, ofs) __raw_readl((ctlr)->dmaregs + 
>> (ofs))
>> -#define chan_read(chan, fld)__raw_readl((chan)->fld)
>> +#define chan_read(chan, fld)readl((chan)->fld)
>>  #define desc_read(desc, fld)__raw_readl(&(desc)->fld)
>>  #define dma_reg_write(ctlr, ofs, v) __raw_writel(v, (ctlr)->dmaregs + (ofs))
>> -#define chan_write(chan, fld, v)__raw_writel(v, (chan)->fld)
>> +#define chan_write(chan, fld, v)writel(v, (chan)->fld)
>>  #define desc_write(desc, fld, v)__raw_writel((u32)(v), &(desc)->fld)
> 
> Unless you want to keep running into subtle errors all over the
> place wrt. register vs. memory write ordering, I strong suggest
> you use strongly ordered readl/writel for all register accesses.
> 
> I see no tangible, worthwhile, advantage to using these relaxed
> ordering primitives.  The only result is potential bugs.
> 
> People who use the relaxed ordering primitives properly are only
> doing so in extremely carefully coded sequences where a series
> of writes has no dependency on main memory operations and is
> explicitly completed with a barrier operation such as a read
> back of a register in the same device.
> 
> That's not at all what is going on here, instead the driver is wildly
> using relaxed ordered register accesses for basically everything.
> This is extremely unwise and it's why you ran into this bug in the
> first place.
> 
> Therefore, I absolutely require that you fix this by eliminating
> any and all usese of relaxed ordering I/O accessors in this driver.

Thanks for your comments - that's what I've tried first, but that decided
to find out minimal change which still works.

I'll update it. 

-- 
regards,
-grygorii


Re: [PATCH 2/7] net: ethernet: ti: cpdma: fix desc re-queuing

2016-12-05 Thread Grygorii Strashko


On 12/02/2016 05:28 PM, Ivan Khoronzhuk wrote:
> On Fri, Dec 02, 2016 at 10:45:07AM -0600, Grygorii Strashko wrote:
>>
>>
>> On 12/02/2016 05:03 AM, Ivan Khoronzhuk wrote:
>>> On Thu, Dec 01, 2016 at 05:34:27PM -0600, Grygorii Strashko wrote:
>>>> The currently processing cpdma descriptor with EOQ flag set may
>>>> contain two values in Next Descriptor Pointer field:
>>>> - valid pointer: means CPDMA missed addition of new desc in queue;
>>> It shouldn't happen in normal circumstances, right?
>>
>> it might happen, because desc push compete with desc pop.
>> You can check stats values:
>> chan->stats.misqueued
>> chan->stats.requeue
>>  under different types of net-loads.
> I've done this, of-course.
> By whole logic the misqueued counter has to cover all cases.
> But that's not true.
> 
>>
>> TRM:
>> "
>> If the pNext pointer is initially NULL, and more packets need to be queued 
>> for transmit, the software
>> application may alter this pointer to point to a newly appended descriptor. 
>> The EMAC will use the new
>> pointer value and proceed to the next descriptor unless the pNext value has 
>> already been read. In this
>> latter case, the transmitter will halt on the transmit channel in question, 
>> and the software application may
>> restart it at that time. The software can detect this case by checking for 
>> an end of queue (EOQ) condition
>> flag on the updated packet descriptor when it is returned by the EMAC.
>> "
> That's true. No issues in desc.
> In the code no any place to update next_desc except submit function.
> 
> And this case is supposed to be caught here:
> For submit:
> cpdma_chan_submit()
> spin_lock_irqsave(&chan->lock);
> ...
> --->__cpdma_chan_submit()
> ...
> --> desc_write(prev, hw_next, desc_dma); // here next pointer is updated, 
> it can be not in time
> ...
> --> mode = desc_read(prev, hw_mode); // pay attention, it's read after 
> updating next pointer
> --> if ((mode & CPDMA_DESC_EOQ) &&
> --> (chan->state == CPDMA_STATE_ACTIVE)) { // here checked if it was late 
> update
> -> chan_write(chan, hdp, desc_dma); // here transmit is restarted, if 
> needed

You've forgot about CPPI hw itself - it's not sync by sw, and so continue run in
background - as result, CPPI might read "next" already, but flags are not 
updated yet.

> 
> For process it only caught the fact of late update, but it has to be caught in
> submit() already:
> __cpdma_chan_process()
> spin_lock_irqsave(&chan->lock);
> --> if (mode & CPDMA_DESC_EOQ) // here transmit is restarted, if needed
> -> chan_write(chan, hdp, desc_dma); // but w/o updating next pointer
> 
> Seems there is no place where hw_next is updated w/o updating hdp :-| in case
> of late hw_next set. And that is strange. I know it happens, I've checked it
> before of-course. Then I thought, maybe there is some problem with write 
> order,
> thus out of sync, nothing more.
> 
>>
>>
>>> So, why it happens only for egress channels? And Does that mean
>>> there is some resynchronization between submit and process function,
>>> or this is h/w issue?
>>
>> no hw issues. this patch just removes one unnecessary I/O access 
> No objections against patch. Anyway it's better then before.
> Just want to know the real reason why it happens, maybe there is smth else.
> 
>>
>>>
>>>> - null: no more descriptors in queue.
>>>> In the later case, it's not required to write to HDP register, but now
>>>> CPDMA does it.
>>>>
>>>> Hence, add additional check for Next Descriptor Pointer != null in
>>>> cpdma_chan_process() function before writing in HDP register.
>>>>
>>>> Signed-off-by: Grygorii Strashko 
>>>> ---
>>>>  drivers/net/ethernet/ti/davinci_cpdma.c | 2 +-
>>>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>>>
>>>> diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c 
>>>> b/drivers/net/ethernet/ti/davinci_cpdma.c
>>>> index 0924014..379314f 100644
>>>> --- a/drivers/net/ethernet/ti/davinci_cpdma.c
>>>> +++ b/drivers/net/ethernet/ti/davinci_cpdma.c
>>>> @@ -1152,7 +1152,7 @@ static int __cpdma_chan_process(struct cpdma_chan 
>>>> *chan)
>>>>chan->count--;
>>>>chan->stats.good_dequeue++;
>>>>  
>>>> -  if (status & CPDMA_DESC_EOQ) {
>>>> +  if ((status & CPDMA_DESC_EOQ) && chan->head) {
>>>>chan->stats.requeue++;
>>>>chan_write(chan, hdp, desc_phys(pool, chan->head));
>>>>}
>>>> -- 
>>>> 2.10.1
>>>>
>>
>> -- 
>> regards,
>> -grygorii

-- 
regards,
-grygorii


Re: [PATCH 1/6] net: ethernet: ti: netcp: add support of cpts

2016-12-05 Thread Grygorii Strashko


On 12/05/2016 08:49 AM, Rob Herring wrote:
> On Mon, Nov 28, 2016 at 05:04:23PM -0600, Grygorii Strashko wrote:
>> From: WingMan Kwok 
>>
>> This patch adds support of the cpts device found in the
>> gbe and 10gbe ethernet switches on the keystone 2 SoCs
>> (66AK2E/L/Hx, 66AK2Gx).
>>
>> Signed-off-by: WingMan Kwok 
>> Signed-off-by: Grygorii Strashko 
>> ---
>>  .../devicetree/bindings/net/keystone-netcp.txt |   9 +
>>  drivers/net/ethernet/ti/Kconfig|   7 +-
>>  drivers/net/ethernet/ti/netcp.h|   2 +-
>>  drivers/net/ethernet/ti/netcp_core.c   |  18 +-
>>  drivers/net/ethernet/ti/netcp_ethss.c  | 437 
>> -
>>  5 files changed, 459 insertions(+), 14 deletions(-)
>>
>> diff --git a/Documentation/devicetree/bindings/net/keystone-netcp.txt 
>> b/Documentation/devicetree/bindings/net/keystone-netcp.txt
>> index 04ba1dc..c37b54e 100644
>> --- a/Documentation/devicetree/bindings/net/keystone-netcp.txt
>> +++ b/Documentation/devicetree/bindings/net/keystone-netcp.txt
>> @@ -113,6 +113,15 @@ Optional properties:
>>  will only initialize these ports and attach PHY
>>  driver to them if needed.
>>  
>> + Properties related to cpts configurations.
>> +- cpts_clock_mult/cpts_clock_shift:
> 
> Needs vendor prefix. Don't use '_'.

This module is used as part of OMAP and Keystone SoCs, so names for
this props is ABI already :(

> 
>> +used for converting time counter cycles to ns as in
>> +
>> +ns = (cycles * clock_mult) >> _shift
>> +
>> +Defaults: clock_mult, clock_shift = calculated from
>> +CPTS refclk
> 
> What does this mean?
> 

I'll add more description here.

>> +
>>  NetCP interface properties: Interface specification for NetCP sub-modules.
>>  Required properties:
>>  - rx-channel:   the navigator packet dma channel name for rx.

-- 
regards,
-grygorii


Re: [PATCH v3 00/13] net: ethernet: ti: cpts: update and fixes

2016-12-05 Thread Grygorii Strashko



On 12/03/2016 03:22 AM, Richard Cochran wrote:

On Fri, Dec 02, 2016 at 02:30:10PM -0600, Grygorii Strashko wrote:

It is preparation series intended to clean up and optimize TI CPTS driver to
facilitate further integration with other TI's SoCs like Keystone 2.

Changes in v3:
- patches reordered: fixes and small updates moved first
- added comments in code about cpts->cc_mult
- conversation range (maxsec) limited to 10sec


On net-next:

$ git am ~/grygorii.strashko
Applying: net: ethernet: ti: cpts: switch to readl/writel_relaxed()
Applying: net: ethernet: ti: allow cpts to be built separately
error: patch failed: drivers/net/ethernet/ti/cpsw.c:1963
error: drivers/net/ethernet/ti/cpsw.c: patch does not apply
Patch failed at 0002 net: ethernet: ti: allow cpts to be built separately


Sorry for that, also there build error due to patch reordering :(



Also, you have the order of the SOB tags wrong.  The author's SOB goes
first.


Will fix and resend, sorry again.


--
regards,
-grygorii


Re: [PATCH] net: ethernet: ti: cpdma: use desc_read in chan_process instead of raw read

2016-12-05 Thread Grygorii Strashko



On 12/02/2016 08:05 PM, Ivan Khoronzhuk wrote:

There is desc_read() macros to read desc fields, so no need to
use __raw_readl();

Signed-off-by: Ivan Khoronzhuk 



I'm going to update it all at once as part of [1].

[1] https://lkml.org/lkml/2016/12/1/781


---
Based on net-next/master

 drivers/net/ethernet/ti/davinci_cpdma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c 
b/drivers/net/ethernet/ti/davinci_cpdma.c
index c776e45..d96dca5 100644
--- a/drivers/net/ethernet/ti/davinci_cpdma.c
+++ b/drivers/net/ethernet/ti/davinci_cpdma.c
@@ -1132,7 +1132,7 @@ static int __cpdma_chan_process(struct cpdma_chan *chan)
}
desc_dma = desc_phys(pool, desc);

-   status  = __raw_readl(&desc->hw_mode);
+   status = desc_read(desc, hw_mode);
outlen  = status & 0x7ff;
if (status & CPDMA_DESC_OWNER) {
chan->stats.busy_dequeue++;



--
regards,
-grygorii


[PATCH v4 09/13] net: ethernet: ti: cpts: rework initialization/deinitialization

2016-12-05 Thread Grygorii Strashko
The current implementation CPTS initialization and deinitialization
(represented by cpts_register/unregister()) does too many static
initialization from .ndo_open(), which is reasonable to do once at probe
time instead, and also require caller to allocate memory for struct cpts,
which is internal for CPTS driver in general.

This patch splits CPTS initialization and deinitialization on two parts:

- static initializtion cpts_create()/cpts_release() which expected to be
executed when parent driver is probed/removed;

- dynamic part cpts_register/unregister() which expected to be executed
when network device is opened/closed.

As result, current code of CPTS parent driver - CPSW - will be simplified
(and it also will allow simplify adding support for Keystone 2 devices in
the future), plus more initialization errors will be catched earlier. In
addition, this change allows to clean up cpts.h for the case when CPTS is
disabled.

Signed-off-by: Grygorii Strashko 
---
 drivers/net/ethernet/ti/cpsw.c |  24 +-
 drivers/net/ethernet/ti/cpts.c | 102 -
 drivers/net/ethernet/ti/cpts.h |  26 +--
 3 files changed, 95 insertions(+), 57 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index ec05e20..deb008a 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -1486,9 +1486,7 @@ static int cpsw_ndo_open(struct net_device *ndev)
if (ret < 0)
goto err_cleanup;
 
-   if (cpts_register(cpsw->dev, cpsw->cpts,
- cpsw->data.cpts_clock_mult,
- cpsw->data.cpts_clock_shift))
+   if (cpts_register(cpsw->cpts))
dev_err(priv->dev, "error registering cpts device\n");
 
}
@@ -2796,6 +2794,7 @@ static int cpsw_probe(struct platform_device *pdev)
struct cpdma_params dma_params;
struct cpsw_ale_params  ale_params;
void __iomem*ss_regs;
+   void __iomem*cpts_regs;
struct resource *res, *ss_res;
const struct of_device_id   *of_id;
struct gpio_descs   *mode;
@@ -2823,12 +2822,6 @@ static int cpsw_probe(struct platform_device *pdev)
priv->dev  = &ndev->dev;
priv->msg_enable = netif_msg_init(debug_level, CPSW_DEBUG);
cpsw->rx_packet_max = max(rx_packet_max, 128);
-   cpsw->cpts = devm_kzalloc(&pdev->dev, sizeof(struct cpts), GFP_KERNEL);
-   if (!cpsw->cpts) {
-   dev_err(&pdev->dev, "error allocating cpts\n");
-   ret = -ENOMEM;
-   goto clean_ndev_ret;
-   }
 
mode = devm_gpiod_get_array_optional(&pdev->dev, "mode", GPIOD_OUT_LOW);
if (IS_ERR(mode)) {
@@ -2916,7 +2909,7 @@ static int cpsw_probe(struct platform_device *pdev)
switch (cpsw->version) {
case CPSW_VERSION_1:
cpsw->host_port_regs = ss_regs + CPSW1_HOST_PORT_OFFSET;
-   cpsw->cpts->reg  = ss_regs + CPSW1_CPTS_OFFSET;
+   cpts_regs   = ss_regs + CPSW1_CPTS_OFFSET;
cpsw->hw_stats   = ss_regs + CPSW1_HW_STATS;
dma_params.dmaregs   = ss_regs + CPSW1_CPDMA_OFFSET;
dma_params.txhdp = ss_regs + CPSW1_STATERAM_OFFSET;
@@ -2930,7 +2923,7 @@ static int cpsw_probe(struct platform_device *pdev)
case CPSW_VERSION_3:
case CPSW_VERSION_4:
cpsw->host_port_regs = ss_regs + CPSW2_HOST_PORT_OFFSET;
-   cpsw->cpts->reg  = ss_regs + CPSW2_CPTS_OFFSET;
+   cpts_regs   = ss_regs + CPSW2_CPTS_OFFSET;
cpsw->hw_stats   = ss_regs + CPSW2_HW_STATS;
dma_params.dmaregs   = ss_regs + CPSW2_CPDMA_OFFSET;
dma_params.txhdp = ss_regs + CPSW2_STATERAM_OFFSET;
@@ -2997,6 +2990,14 @@ static int cpsw_probe(struct platform_device *pdev)
goto clean_dma_ret;
}
 
+   cpsw->cpts = cpts_create(cpsw->dev, cpts_regs,
+cpsw->data.cpts_clock_mult,
+cpsw->data.cpts_clock_shift);
+   if (IS_ERR(cpsw->cpts)) {
+   ret = PTR_ERR(cpsw->cpts);
+   goto clean_ale_ret;
+   }
+
ndev->irq = platform_get_irq(pdev, 1);
if (ndev->irq < 0) {
dev_err(priv->dev, "error getting irq resource\n");
@@ -3112,6 +3113,7 @@ static int cpsw_remove(struct platform_device *pdev)
unregister_netdev(cpsw->slaves[1].ndev);
unregister_netdev(ndev);
 
+   cpts_release(cpsw->cpts);
cpsw_ale_destroy(cpsw->ale);
cpdma_ctlr_destroy(cpsw->

[PATCH v4 04/13] net: ethernet: ti: cpts: fix unbalanced clk api usage in cpts_register/unregister

2016-12-05 Thread Grygorii Strashko
There are two issues with TI CPTS code which are reproducible when TI
CPSW ethX device passes few up/down iterations:
- cpts refclk prepare counter continuously incremented after each
up/down iteration;
- devm_clk_get(dev, "cpts") is called many times.

Hence, fix these issues by using clk_disable_unprepare() in
cpts_clk_release() and skipping devm_clk_get() if cpts refclk has been
acquired already.

Signed-off-by: Grygorii Strashko 
Acked-by: Richard Cochran 
---
 drivers/net/ethernet/ti/cpts.c | 14 --
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index 8cb0369..61198f1 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -230,18 +230,20 @@ static void cpts_overflow_check(struct work_struct *work)
 
 static void cpts_clk_init(struct device *dev, struct cpts *cpts)
 {
-   cpts->refclk = devm_clk_get(dev, "cpts");
-   if (IS_ERR(cpts->refclk)) {
-   dev_err(dev, "Failed to get cpts refclk\n");
-   cpts->refclk = NULL;
-   return;
+   if (!cpts->refclk) {
+   cpts->refclk = devm_clk_get(dev, "cpts");
+   if (IS_ERR(cpts->refclk)) {
+   dev_err(dev, "Failed to get cpts refclk\n");
+   cpts->refclk = NULL;
+   return;
+   }
}
clk_prepare_enable(cpts->refclk);
 }
 
 static void cpts_clk_release(struct cpts *cpts)
 {
-   clk_disable(cpts->refclk);
+   clk_disable_unprepare(cpts->refclk);
 }
 
 static int cpts_match(struct sk_buff *skb, unsigned int ptp_class,
-- 
2.10.1



[PATCH v4 08/13] net: ethernet: ti: cpts: drop excessive writes to CTRL and INT_EN regs

2016-12-05 Thread Grygorii Strashko
CPTS module and IRQs are always enabled when CPTS is registered,
before starting overflow check work, and disabled during
deregistration, when overflow check work has been canceled already.
So, It doesn't require to (re)enable CPTS module and IRQs in
cpts_overflow_check().

Signed-off-by: Grygorii Strashko 
Acked-by: Richard Cochran 
---
 drivers/net/ethernet/ti/cpts.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index 7ab1fa7..fe1bb7f 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -243,8 +243,6 @@ static void cpts_overflow_check(struct work_struct *work)
struct timespec64 ts;
struct cpts *cpts = container_of(work, struct cpts, overflow_work.work);
 
-   cpts_write32(cpts, CPTS_EN, control);
-   cpts_write32(cpts, TS_PEND_EN, int_enable);
cpts_ptp_gettime(&cpts->info, &ts);
pr_debug("cpts overflow check at %lld.%09lu\n", ts.tv_sec, ts.tv_nsec);
schedule_delayed_work(&cpts->overflow_work, CPTS_OVERFLOW_PERIOD);
-- 
2.10.1



[PATCH v4 13/13] net: ethernet: ti: cpts: fix overflow check period

2016-12-05 Thread Grygorii Strashko
The CPTS drivers uses 8sec period for overflow checking with
assumption that CPTS retclk will not exceed 500MHz. But that's not
true on some TI platforms (Kesytone 2). As result, it is possible that
CPTS counter will overflow more than once between two readings.

Hence, fix it by selecting overflow check period dynamically as
max_sec_before_overflow/2, where
 max_sec_before_overflow = max_counter_val / rftclk_freq.

Cc: John Stultz 
Cc: Thomas Gleixner 
Signed-off-by: Grygorii Strashko 
Acked-by: Richard Cochran 
---
 drivers/net/ethernet/ti/cpts.c | 10 +++---
 drivers/net/ethernet/ti/cpts.h |  4 +---
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index 361d13a..a60d837 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -245,7 +245,7 @@ static void cpts_overflow_check(struct work_struct *work)
 
cpts_ptp_gettime(&cpts->info, &ts);
pr_debug("cpts overflow check at %lld.%09lu\n", ts.tv_sec, ts.tv_nsec);
-   schedule_delayed_work(&cpts->overflow_work, CPTS_OVERFLOW_PERIOD);
+   schedule_delayed_work(&cpts->overflow_work, cpts->ov_check_period);
 }
 
 static int cpts_match(struct sk_buff *skb, unsigned int ptp_class,
@@ -382,8 +382,7 @@ int cpts_register(struct cpts *cpts)
}
cpts->phc_index = ptp_clock_index(cpts->clock);
 
-   schedule_delayed_work(&cpts->overflow_work, CPTS_OVERFLOW_PERIOD);
-
+   schedule_delayed_work(&cpts->overflow_work, cpts->ov_check_period);
return 0;
 
 err_ptp:
@@ -427,6 +426,11 @@ static void cpts_calc_mult_shift(struct cpts *cpts)
if (maxsec > 10)
maxsec = 10;
 
+   /* Calc overflow check period (maxsec / 2) */
+   cpts->ov_check_period = (HZ * maxsec) / 2;
+   dev_info(cpts->dev, "cpts: overflow check period %lu (jiffies)\n",
+cpts->ov_check_period);
+
if (cpts->cc_mult || cpts->cc.shift)
return;
 
diff --git a/drivers/net/ethernet/ti/cpts.h b/drivers/net/ethernet/ti/cpts.h
index 5da23af..c96eca2 100644
--- a/drivers/net/ethernet/ti/cpts.h
+++ b/drivers/net/ethernet/ti/cpts.h
@@ -97,9 +97,6 @@ enum {
CPTS_EV_TX,   /* Ethernet Transmit Event */
 };
 
-/* This covers any input clock up to about 500 MHz. */
-#define CPTS_OVERFLOW_PERIOD (HZ * 8)
-
 #define CPTS_FIFO_DEPTH 16
 #define CPTS_MAX_EVENTS 32
 
@@ -127,6 +124,7 @@ struct cpts {
struct list_head events;
struct list_head pool;
struct cpts_event pool_data[CPTS_MAX_EVENTS];
+   unsigned long ov_check_period;
 };
 
 void cpts_rx_timestamp(struct cpts *cpts, struct sk_buff *skb);
-- 
2.10.1



[PATCH v4 06/13] net: ethernet: ti: cpts: disable cpts when unregistered

2016-12-05 Thread Grygorii Strashko
The cpts now is left enabled after unregistration.
Hence, disable it in cpts_unregister().

Signed-off-by: Grygorii Strashko 
Acked-by: Richard Cochran 
---
 drivers/net/ethernet/ti/cpts.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index 3dda6d5..d3c1ac5 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -404,6 +404,10 @@ void cpts_unregister(struct cpts *cpts)
ptp_clock_unregister(cpts->clock);
cancel_delayed_work_sync(&cpts->overflow_work);
}
+
+   cpts_write32(cpts, 0, int_enable);
+   cpts_write32(cpts, 0, control);
+
if (cpts->refclk)
cpts_clk_release(cpts);
 }
-- 
2.10.1



[PATCH v4 07/13] net: ethernet: ti: cpts: clean up event list if event pool is empty

2016-12-05 Thread Grygorii Strashko
From: WingMan Kwok 

When a CPTS user does not exit gracefully by disabling cpts
timestamping and leaving a joined multicast group, the system
continues to receive and timestamps the ptp packets which eventually
occupy all the event list entries.  When this happns, the added code
tries to remove some list entries which are expired.

Signed-off-by: WingMan Kwok 
Signed-off-by: Grygorii Strashko 
---
 drivers/net/ethernet/ti/cpts.c | 26 --
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index d3c1ac5..7ab1fa7 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -57,6 +57,26 @@ static int cpts_fifo_pop(struct cpts *cpts, u32 *high, u32 
*low)
return -1;
 }
 
+static int cpts_purge_events(struct cpts *cpts)
+{
+   struct list_head *this, *next;
+   struct cpts_event *event;
+   int removed = 0;
+
+   list_for_each_safe(this, next, &cpts->events) {
+   event = list_entry(this, struct cpts_event, list);
+   if (event_expired(event)) {
+   list_del_init(&event->list);
+   list_add(&event->list, &cpts->pool);
+   ++removed;
+   }
+   }
+
+   if (removed)
+   pr_debug("cpts: event pool cleaned up %d\n", removed);
+   return removed ? 0 : -1;
+}
+
 /*
  * Returns zero if matching event type was found.
  */
@@ -69,10 +89,12 @@ static int cpts_fifo_read(struct cpts *cpts, int match)
for (i = 0; i < CPTS_FIFO_DEPTH; i++) {
if (cpts_fifo_pop(cpts, &hi, &lo))
break;
-   if (list_empty(&cpts->pool)) {
-   pr_err("cpts: event pool is empty\n");
+
+   if (list_empty(&cpts->pool) && cpts_purge_events(cpts)) {
+   pr_err("cpts: event pool empty\n");
return -1;
}
+
event = list_first_entry(&cpts->pool, struct cpts_event, list);
event->tmo = jiffies + 2;
event->high = hi;
-- 
2.10.1



[PATCH v4 01/13] net: ethernet: ti: cpts: switch to readl/writel_relaxed()

2016-12-05 Thread Grygorii Strashko
Switch to readl/writel_relaxed() APIs, because this is recommended
API and the CPTS IP is reused on Keystone 2 SoCs
where LE/BE modes are supported.

Signed-off-by: Grygorii Strashko 
Acked-by: Richard Cochran 
---
 drivers/net/ethernet/ti/cpts.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index 85a55b4..a42c449 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -33,8 +33,8 @@
 
 #ifdef CONFIG_TI_CPTS
 
-#define cpts_read32(c, r)  __raw_readl(&c->reg->r)
-#define cpts_write32(c, v, r)  __raw_writel(v, &c->reg->r)
+#define cpts_read32(c, r)  readl_relaxed(&c->reg->r)
+#define cpts_write32(c, v, r)  writel_relaxed(v, &c->reg->r)
 
 static int event_expired(struct cpts_event *event)
 {
-- 
2.10.1



[PATCH v4 03/13] net: ethernet: ti: cpsw: minimize direct access to struct cpts

2016-12-05 Thread Grygorii Strashko
This will provide more flexibility in changing CPTS internals and also
required for further changes.

Signed-off-by: Grygorii Strashko 
---
 drivers/net/ethernet/ti/cpsw.c | 28 +++-
 drivers/net/ethernet/ti/cpts.h | 39 +++
 2 files changed, 54 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 323174d..ec05e20 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -1562,7 +1562,7 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff 
*skb,
}
 
if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP &&
-   cpsw->cpts->tx_enable)
+   cpts_is_tx_enabled(cpsw->cpts))
skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
 
skb_tx_timestamp(skb);
@@ -1601,7 +1601,8 @@ static void cpsw_hwtstamp_v1(struct cpsw_common *cpsw)
struct cpsw_slave *slave = &cpsw->slaves[cpsw->data.active_slave];
u32 ts_en, seq_id;
 
-   if (!cpsw->cpts->tx_enable && !cpsw->cpts->rx_enable) {
+   if (!cpts_is_tx_enabled(cpsw->cpts) &&
+   !cpts_is_rx_enabled(cpsw->cpts)) {
slave_write(slave, 0, CPSW1_TS_CTL);
return;
}
@@ -1609,10 +1610,10 @@ static void cpsw_hwtstamp_v1(struct cpsw_common *cpsw)
seq_id = (30 << CPSW_V1_SEQ_ID_OFS_SHIFT) | ETH_P_1588;
ts_en = EVENT_MSG_BITS << CPSW_V1_MSG_TYPE_OFS;
 
-   if (cpsw->cpts->tx_enable)
+   if (cpts_is_tx_enabled(cpsw->cpts))
ts_en |= CPSW_V1_TS_TX_EN;
 
-   if (cpsw->cpts->rx_enable)
+   if (cpts_is_rx_enabled(cpsw->cpts))
ts_en |= CPSW_V1_TS_RX_EN;
 
slave_write(slave, ts_en, CPSW1_TS_CTL);
@@ -1635,20 +1636,20 @@ static void cpsw_hwtstamp_v2(struct cpsw_priv *priv)
case CPSW_VERSION_2:
ctrl &= ~CTRL_V2_ALL_TS_MASK;
 
-   if (cpsw->cpts->tx_enable)
+   if (cpts_is_tx_enabled(cpsw->cpts))
ctrl |= CTRL_V2_TX_TS_BITS;
 
-   if (cpsw->cpts->rx_enable)
+   if (cpts_is_rx_enabled(cpsw->cpts))
ctrl |= CTRL_V2_RX_TS_BITS;
break;
case CPSW_VERSION_3:
default:
ctrl &= ~CTRL_V3_ALL_TS_MASK;
 
-   if (cpsw->cpts->tx_enable)
+   if (cpts_is_tx_enabled(cpsw->cpts))
ctrl |= CTRL_V3_TX_TS_BITS;
 
-   if (cpsw->cpts->rx_enable)
+   if (cpts_is_rx_enabled(cpsw->cpts))
ctrl |= CTRL_V3_RX_TS_BITS;
break;
}
@@ -1684,7 +1685,7 @@ static int cpsw_hwtstamp_set(struct net_device *dev, 
struct ifreq *ifr)
 
switch (cfg.rx_filter) {
case HWTSTAMP_FILTER_NONE:
-   cpts->rx_enable = 0;
+   cpts_rx_enable(cpts, 0);
break;
case HWTSTAMP_FILTER_ALL:
case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
@@ -1700,14 +1701,14 @@ static int cpsw_hwtstamp_set(struct net_device *dev, 
struct ifreq *ifr)
case HWTSTAMP_FILTER_PTP_V2_EVENT:
case HWTSTAMP_FILTER_PTP_V2_SYNC:
case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
-   cpts->rx_enable = 1;
+   cpts_rx_enable(cpts, 1);
cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
break;
default:
return -ERANGE;
}
 
-   cpts->tx_enable = cfg.tx_type == HWTSTAMP_TX_ON;
+   cpts_tx_enable(cpts, cfg.tx_type == HWTSTAMP_TX_ON);
 
switch (cpsw->version) {
case CPSW_VERSION_1:
@@ -1736,8 +1737,9 @@ static int cpsw_hwtstamp_get(struct net_device *dev, 
struct ifreq *ifr)
return -EOPNOTSUPP;
 
cfg.flags = 0;
-   cfg.tx_type = cpts->tx_enable ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF;
-   cfg.rx_filter = (cpts->rx_enable ?
+   cfg.tx_type = cpts_is_tx_enabled(cpts) ?
+ HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF;
+   cfg.rx_filter = (cpts_is_rx_enabled(cpts) ?
 HWTSTAMP_FILTER_PTP_V2_EVENT : HWTSTAMP_FILTER_NONE);
 
return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0;
diff --git a/drivers/net/ethernet/ti/cpts.h b/drivers/net/ethernet/ti/cpts.h
index 416ba2c..29a1e80c 100644
--- a/drivers/net/ethernet/ti/cpts.h
+++ b/drivers/net/ethernet/ti/cpts.h
@@ -132,6 +132,27 @@ void cpts_rx_timestamp(struct cpts *cpts, struct sk_buff 
*skb);
 void cpts_tx_timestamp(struct cpts *cpts, struct sk_buff *skb);
 int cpts_register(struct device *dev, struct cpts *cpts, u32 mult, u32 shift);
 void cpts_unregister(struct cpts *cpts);
+
+static inline void cpts_rx_enable(struct cpts *cpts, int enable)
+{
+   cpts->rx_enable = enab

[PATCH v4 11/13] clocksource: export the clocks_calc_mult_shift to use by timestamp code

2016-12-05 Thread Grygorii Strashko
From: Murali Karicheri 

The CPSW CPTS driver is capable of doing timestamping on tx/rx packets and
requires to know mult and shift factors for timestamp conversion from raw
value to nanoseconds (ptp clock). Now these mult and shift factors are
calculated manually and provided through DT, which makes very hard to
support of a lot number of platforms, especially if CPTS refclk is not the
same for some kind of boards and depends on efuse settings (Keystone 2
platforms). Hence, export clocks_calc_mult_shift() to allow drivers like
CPSW CPTS (and other ptp drivesr) to benefit from automaitc calculation of
mult and shift factors.

Cc: John Stultz 
Signed-off-by: Murali Karicheri 
Signed-off-by: Grygorii Strashko 
Acked-by: Thomas Gleixner 
---
 kernel/time/clocksource.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 7e4fad7..150242c 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -89,6 +89,7 @@ clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 
to, u32 maxsec)
*mult = tmp;
*shift = sft;
 }
+EXPORT_SYMBOL_GPL(clocks_calc_mult_shift);
 
 /*[Clocksource internal variables]-
  * curr_clocksource:
-- 
2.10.1



[PATCH v4 00/13] net: ethernet: ti: cpts: update and fixes

2016-12-05 Thread Grygorii Strashko
It is preparation series intended to clean up and optimize TI CPTS driver to
facilitate further integration with other TI's SoCs like Keystone 2.

Changes in v4:
- fixed build error in patch
  "net: ethernet: ti: cpts: clean up event list if event pool is empty"
- rebased on top of net-next
 
Changes in v3:
- patches reordered: fixes and small updates moved first
- added comments in code about cpts->cc_mult
- conversation range (maxsec) limited to 10sec

Changes in v2:
- patch "net: ethernet: ti: cpts: rework initialization/deinitialization"
  was split on 4 patches
- applied comments from Richard Cochran
- dropped patch
  "net: ethernet: ti: cpts: add return value to tx and rx timestamp funcitons"
- new patches added:
  "net: ethernet: ti: cpts: drop excessive writes to CTRL and INT_EN regs"
  and "clocksource: export the clocks_calc_mult_shift to use by timestamp code"

Links on prev versions:
v3: https://www.spinics.net/lists/devicetree/msg153474.html
v2: http://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1282034.html
v1: http://www.spinics.net/lists/linux-omap/msg131925.html

Grygorii Strashko (11):
  net: ethernet: ti: cpts: switch to readl/writel_relaxed()
  net: ethernet: ti: allow cpts to be built separately
  net: ethernet: ti: cpsw: minimize direct access to struct cpts
  net: ethernet: ti: cpts: fix unbalanced clk api usage in 
cpts_register/unregister
  net: ethernet: ti: cpts: fix registration order
  net: ethernet: ti: cpts: disable cpts when unregistered
  net: ethernet: ti: cpts: drop excessive writes to CTRL and INT_EN regs
  net: ethernet: ti: cpts: rework initialization/deinitialization
  net: ethernet: ti: cpts: move dt props parsing to cpts driver
  net: ethernet: ti: cpts: calc mult and shift from refclk freq
  net: ethernet: ti: cpts: fix overflow check period

Murali Karicheri (1):
  clocksource: export the clocks_calc_mult_shift to use by timestamp code

WingMan Kwok (1):
  net: ethernet: ti: cpts: clean up event list if event pool is empty

 Documentation/devicetree/bindings/net/cpsw.txt |   8 +-
 drivers/net/ethernet/ti/Kconfig|   2 +-
 drivers/net/ethernet/ti/Makefile   |   3 +-
 drivers/net/ethernet/ti/cpsw.c |  84 -
 drivers/net/ethernet/ti/cpsw.h |   2 -
 drivers/net/ethernet/ti/cpts.c | 239 +++--
 drivers/net/ethernet/ti/cpts.h |  80 -
 kernel/time/clocksource.c  |   1 +
 8 files changed, 304 insertions(+), 115 deletions(-)

-- 
2.10.1



[PATCH v4 02/13] net: ethernet: ti: allow cpts to be built separately

2016-12-05 Thread Grygorii Strashko
TI CPTS IP is used as part of TI OMAP CPSW driver, but it's also
present as part of NETCP on TI Keystone 2 SoCs. So, It's required
to enable build of CPTS for both this drivers and this can be
achieved by allowing CPTS to be built separately.

Hence, allow cpts to be built separately and convert it to be
a module as both CPSW and NETCP drives can be built as modules.

Signed-off-by: Grygorii Strashko 
---
 drivers/net/ethernet/ti/Kconfig  |  2 +-
 drivers/net/ethernet/ti/Makefile |  3 ++-
 drivers/net/ethernet/ti/cpsw.c   | 22 +-
 drivers/net/ethernet/ti/cpts.c   | 16 
 drivers/net/ethernet/ti/cpts.h   | 18 ++
 5 files changed, 42 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig
index 9904d74..ff7f518 100644
--- a/drivers/net/ethernet/ti/Kconfig
+++ b/drivers/net/ethernet/ti/Kconfig
@@ -74,7 +74,7 @@ config TI_CPSW
  will be called cpsw.
 
 config TI_CPTS
-   bool "TI Common Platform Time Sync (CPTS) Support"
+   tristate "TI Common Platform Time Sync (CPTS) Support"
depends on TI_CPSW
select PTP_1588_CLOCK
---help---
diff --git a/drivers/net/ethernet/ti/Makefile b/drivers/net/ethernet/ti/Makefile
index d420d94..1e7c10b 100644
--- a/drivers/net/ethernet/ti/Makefile
+++ b/drivers/net/ethernet/ti/Makefile
@@ -12,8 +12,9 @@ obj-$(CONFIG_TI_DAVINCI_MDIO) += davinci_mdio.o
 obj-$(CONFIG_TI_DAVINCI_CPDMA) += davinci_cpdma.o
 obj-$(CONFIG_TI_CPSW_PHY_SEL) += cpsw-phy-sel.o
 obj-$(CONFIG_TI_CPSW_ALE) += cpsw_ale.o
+obj-$(CONFIG_TI_CPTS) += cpts.o
 obj-$(CONFIG_TI_CPSW) += ti_cpsw.o
-ti_cpsw-y := cpsw.o cpts.o
+ti_cpsw-y := cpsw.o
 
 obj-$(CONFIG_TI_KEYSTONE_NETCP) += keystone_netcp.o
 keystone_netcp-y := netcp_core.o
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 3f96c57..323174d 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -1594,7 +1594,7 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff 
*skb,
return NETDEV_TX_BUSY;
 }
 
-#ifdef CONFIG_TI_CPTS
+#if IS_ENABLED(CONFIG_TI_CPTS)
 
 static void cpsw_hwtstamp_v1(struct cpsw_common *cpsw)
 {
@@ -1742,7 +1742,16 @@ static int cpsw_hwtstamp_get(struct net_device *dev, 
struct ifreq *ifr)
 
return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0;
 }
+#else
+static int cpsw_hwtstamp_get(struct net_device *dev, struct ifreq *ifr)
+{
+   return -EOPNOTSUPP;
+}
 
+static int cpsw_hwtstamp_set(struct net_device *dev, struct ifreq *ifr)
+{
+   return -EOPNOTSUPP;
+}
 #endif /*CONFIG_TI_CPTS*/
 
 static int cpsw_ndo_ioctl(struct net_device *dev, struct ifreq *req, int cmd)
@@ -1755,12 +1764,10 @@ static int cpsw_ndo_ioctl(struct net_device *dev, 
struct ifreq *req, int cmd)
return -EINVAL;
 
switch (cmd) {
-#ifdef CONFIG_TI_CPTS
case SIOCSHWTSTAMP:
return cpsw_hwtstamp_set(dev, req);
case SIOCGHWTSTAMP:
return cpsw_hwtstamp_get(dev, req);
-#endif
}
 
if (!cpsw->slaves[slave_no].phy)
@@ -2100,10 +2107,10 @@ static void cpsw_set_msglevel(struct net_device *ndev, 
u32 value)
priv->msg_enable = value;
 }
 
+#if IS_ENABLED(CONFIG_TI_CPTS)
 static int cpsw_get_ts_info(struct net_device *ndev,
struct ethtool_ts_info *info)
 {
-#ifdef CONFIG_TI_CPTS
struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
 
info->so_timestamping =
@@ -2120,7 +2127,12 @@ static int cpsw_get_ts_info(struct net_device *ndev,
info->rx_filters =
(1 << HWTSTAMP_FILTER_NONE) |
(1 << HWTSTAMP_FILTER_PTP_V2_EVENT);
+   return 0;
+}
 #else
+static int cpsw_get_ts_info(struct net_device *ndev,
+   struct ethtool_ts_info *info)
+{
info->so_timestamping =
SOF_TIMESTAMPING_TX_SOFTWARE |
SOF_TIMESTAMPING_RX_SOFTWARE |
@@ -2128,9 +2140,9 @@ static int cpsw_get_ts_info(struct net_device *ndev,
info->phc_index = -1;
info->tx_types = 0;
info->rx_filters = 0;
-#endif
return 0;
 }
+#endif
 
 static int cpsw_get_link_ksettings(struct net_device *ndev,
   struct ethtool_link_ksettings *ecmd)
diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index a42c449..8cb0369 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -31,8 +31,6 @@
 
 #include "cpts.h"
 
-#ifdef CONFIG_TI_CPTS
-
 #define cpts_read32(c, r)  readl_relaxed(&c->reg->r)
 #define cpts_write32(c, v, r)  writel_relaxed(v, &c->reg->r)
 
@@ -334,6 +332,7 @@ void cpts_rx_timestamp(struct cpts *cpts, struct sk_buff 
*skb)
memset(ssh, 0, sizeof(*ssh));
ssh->hwtstamp = ns_to_ktime(ns);
 }
+EXPORT_SYMBOL_GPL(cpts_rx_t

[PATCH v4 12/13] net: ethernet: ti: cpts: calc mult and shift from refclk freq

2016-12-05 Thread Grygorii Strashko
The cyclecounter mult and shift values can be calculated based on the
CPTS rfclk frequency and timekeepnig framework provides required algos
and API's.

Hence, calc mult and shift basing on CPTS rfclk frequency if both
cpts_clock_shift and cpts_clock_mult properties are not provided in DT (the
basis of calculation algorithm is borrowed from
__clocksource_update_freq_scale() commit 7d2f944a2b83 ("clocksource:
Provide a generic mult/shift factor calculation")). After this change
cpts_clock_shift and cpts_clock_mult DT properties will become optional.

Cc: John Stultz 
Cc: Thomas Gleixner 
Signed-off-by: Grygorii Strashko 
---
 Documentation/devicetree/bindings/net/cpsw.txt |  8 ++--
 drivers/net/ethernet/ti/cpts.c | 53 +++---
 2 files changed, 52 insertions(+), 9 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/cpsw.txt 
b/Documentation/devicetree/bindings/net/cpsw.txt
index 5ad439f..ebda7c9 100644
--- a/Documentation/devicetree/bindings/net/cpsw.txt
+++ b/Documentation/devicetree/bindings/net/cpsw.txt
@@ -20,8 +20,6 @@ Required properties:
 - slaves   : Specifies number for slaves
 - active_slave : Specifies the slave to use for time stamping,
  ethtool and SIOCGMIIPHY
-- cpts_clock_mult  : Numerator to convert input clock ticks into 
nanoseconds
-- cpts_clock_shift : Denominator to convert input clock ticks into 
nanoseconds
 
 Optional properties:
 - ti,hwmods: Must be "cpgmac0"
@@ -35,7 +33,11 @@ Optional properties:
  For example in dra72x-evm, pcf gpio has to be
  driven low so that cpsw slave 0 and phy data
  lines are connected via mux.
-
+- cpts_clock_mult  : Numerator to convert input clock ticks into 
nanoseconds
+- cpts_clock_shift : Denominator to convert input clock ticks into 
nanoseconds
+ Mult and shift will be calculated basing on CPTS
+ rftclk frequency if both cpts_clock_shift and
+ cpts_clock_mult properties are not provided.
 
 Slave Properties:
 Required properties:
diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index 59c09a4..361d13a 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -409,21 +409,60 @@ void cpts_unregister(struct cpts *cpts)
 }
 EXPORT_SYMBOL_GPL(cpts_unregister);
 
+static void cpts_calc_mult_shift(struct cpts *cpts)
+{
+   u64 frac, maxsec, ns;
+   u32 freq, mult, shift;
+
+   freq = clk_get_rate(cpts->refclk);
+
+   /* Calc the maximum number of seconds which we can run before
+* wrapping around.
+*/
+   maxsec = cpts->cc.mask;
+   do_div(maxsec, freq);
+   /* limit conversation rate to 10 sec as higher values will produce
+* too small mult factors and so reduce the conversion accuracy
+*/
+   if (maxsec > 10)
+   maxsec = 10;
+
+   if (cpts->cc_mult || cpts->cc.shift)
+   return;
+
+   clocks_calc_mult_shift(&mult, &shift, freq, NSEC_PER_SEC, maxsec);
+
+   cpts->cc_mult = mult;
+   cpts->cc.mult = mult;
+   cpts->cc.shift = shift;
+
+   frac = 0;
+   ns = cyclecounter_cyc2ns(&cpts->cc, freq, cpts->cc.mask, &frac);
+
+   dev_info(cpts->dev,
+"CPTS: ref_clk_freq:%u calc_mult:%u calc_shift:%u error:%lld 
nsec/sec\n",
+freq, cpts->cc_mult, cpts->cc.shift, (ns - NSEC_PER_SEC));
+}
+
 static int cpts_of_parse(struct cpts *cpts, struct device_node *node)
 {
int ret = -EINVAL;
u32 prop;
 
-   if (of_property_read_u32(node, "cpts_clock_mult", &prop))
-   goto  of_error;
/* save cc.mult original value as it can be modified
 * by cpts_ptp_adjfreq().
 */
-   cpts->cc_mult = prop;
+   cpts->cc_mult = 0;
+   if (!of_property_read_u32(node, "cpts_clock_mult", &prop))
+   cpts->cc_mult = prop;
+
+   cpts->cc.shift = 0;
+   if (!of_property_read_u32(node, "cpts_clock_shift", &prop))
+   cpts->cc.shift = prop;
 
-   if (of_property_read_u32(node, "cpts_clock_shift", &prop))
-   goto  of_error;
-   cpts->cc.shift = prop;
+   if ((cpts->cc_mult && !cpts->cc.shift) ||
+   (!cpts->cc_mult && cpts->cc.shift))
+   goto of_error;
 
return 0;
 
@@ -463,6 +502,8 @@ struct cpts *cpts_create(struct device *dev, void __iomem 
*regs,
cpts->cc.mask = CLOCKSOURCE_MASK(32);
cpts->info = cpts_info;
 
+   cpts_calc_mult_shift(cpts);
+
return cpts;
 }
 EXPORT_SYMBOL_GPL(cpts_create);
-- 
2.10.1



[PATCH v4 10/13] net: ethernet: ti: cpts: move dt props parsing to cpts driver

2016-12-05 Thread Grygorii Strashko
Move DT properties parsing into CPTS driver to simplify CPSW
code and CPTS driver porting on other SoC in the future
(like Keystone 2) - with this change it will not be required
to add the same DT parsing code in Keystone 2 NETCP driver.

Signed-off-by: Grygorii Strashko 
---
 drivers/net/ethernet/ti/cpsw.c | 16 +---
 drivers/net/ethernet/ti/cpsw.h |  2 --
 drivers/net/ethernet/ti/cpts.c | 32 +---
 drivers/net/ethernet/ti/cpts.h |  5 +++--
 4 files changed, 33 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index deb008a..259c717 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -2524,18 +2524,6 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data,
}
data->active_slave = prop;
 
-   if (of_property_read_u32(node, "cpts_clock_mult", &prop)) {
-   dev_err(&pdev->dev, "Missing cpts_clock_mult property in the 
DT.\n");
-   return -EINVAL;
-   }
-   data->cpts_clock_mult = prop;
-
-   if (of_property_read_u32(node, "cpts_clock_shift", &prop)) {
-   dev_err(&pdev->dev, "Missing cpts_clock_shift property in the 
DT.\n");
-   return -EINVAL;
-   }
-   data->cpts_clock_shift = prop;
-
data->slave_data = devm_kzalloc(&pdev->dev, data->slaves
* sizeof(struct cpsw_slave_data),
GFP_KERNEL);
@@ -2990,9 +2978,7 @@ static int cpsw_probe(struct platform_device *pdev)
goto clean_dma_ret;
}
 
-   cpsw->cpts = cpts_create(cpsw->dev, cpts_regs,
-cpsw->data.cpts_clock_mult,
-cpsw->data.cpts_clock_shift);
+   cpsw->cpts = cpts_create(cpsw->dev, cpts_regs, cpsw->dev->of_node);
if (IS_ERR(cpsw->cpts)) {
ret = PTR_ERR(cpsw->cpts);
goto clean_ale_ret;
diff --git a/drivers/net/ethernet/ti/cpsw.h b/drivers/net/ethernet/ti/cpsw.h
index 16b54c6..6c3037a 100644
--- a/drivers/net/ethernet/ti/cpsw.h
+++ b/drivers/net/ethernet/ti/cpsw.h
@@ -31,8 +31,6 @@ struct cpsw_platform_data {
u32 channels;   /* number of cpdma channels (symmetric) */
u32 slaves; /* number of slave cpgmac ports */
u32 active_slave; /* time stamping, ethtool and SIOCGMIIPHY slave */
-   u32 cpts_clock_mult;  /* convert input clock ticks to nanoseconds */
-   u32 cpts_clock_shift; /* convert input clock ticks to nanoseconds */
u32 ale_entries;/* ale table size */
u32 bd_ram_size;  /*buffer descriptor ram size */
u32 mac_control;/* Mac control register */
diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index 9356803..59c09a4 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -409,10 +409,34 @@ void cpts_unregister(struct cpts *cpts)
 }
 EXPORT_SYMBOL_GPL(cpts_unregister);
 
+static int cpts_of_parse(struct cpts *cpts, struct device_node *node)
+{
+   int ret = -EINVAL;
+   u32 prop;
+
+   if (of_property_read_u32(node, "cpts_clock_mult", &prop))
+   goto  of_error;
+   /* save cc.mult original value as it can be modified
+* by cpts_ptp_adjfreq().
+*/
+   cpts->cc_mult = prop;
+
+   if (of_property_read_u32(node, "cpts_clock_shift", &prop))
+   goto  of_error;
+   cpts->cc.shift = prop;
+
+   return 0;
+
+of_error:
+   dev_err(cpts->dev, "CPTS: Missing property in the DT.\n");
+   return ret;
+}
+
 struct cpts *cpts_create(struct device *dev, void __iomem *regs,
-u32 mult, u32 shift)
+struct device_node *node)
 {
struct cpts *cpts;
+   int ret;
 
cpts = devm_kzalloc(dev, sizeof(*cpts), GFP_KERNEL);
if (!cpts)
@@ -423,6 +447,10 @@ struct cpts *cpts_create(struct device *dev, void __iomem 
*regs,
spin_lock_init(&cpts->lock);
INIT_DELAYED_WORK(&cpts->overflow_work, cpts_overflow_check);
 
+   ret = cpts_of_parse(cpts, node);
+   if (ret)
+   return ERR_PTR(ret);
+
cpts->refclk = devm_clk_get(dev, "cpts");
if (IS_ERR(cpts->refclk)) {
dev_err(dev, "Failed to get cpts refclk\n");
@@ -433,8 +461,6 @@ struct cpts *cpts_create(struct device *dev, void __iomem 
*regs,
 
cpts->cc.read = cpts_systim_read;
cpts->cc.mask = CLOCKSOURCE_MASK(32);
-   cpts->cc.shift = shift;
-   cpts->cc_mult = mult;
cpts->info = cpts_info;
 
return cpts;
diff --git a/drivers/net/ethernet/ti/cpts.h b/drivers/net/

[PATCH v4 05/13] net: ethernet: ti: cpts: fix registration order

2016-12-05 Thread Grygorii Strashko
The ptp clock registered before spinlock, which is protecting it, and
before timecounter and cyclecounter initialization in cpts_register().

So, ensure that ptp clock is registered the last, after everything
else is done.

Signed-off-by: Grygorii Strashko 
Acked-by: Richard Cochran 
---
 drivers/net/ethernet/ti/cpts.c | 24 ++--
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index 61198f1..3dda6d5 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -356,15 +356,8 @@ int cpts_register(struct device *dev, struct cpts *cpts,
  u32 mult, u32 shift)
 {
int err, i;
-   unsigned long flags;
 
cpts->info = cpts_info;
-   cpts->clock = ptp_clock_register(&cpts->info, dev);
-   if (IS_ERR(cpts->clock)) {
-   err = PTR_ERR(cpts->clock);
-   cpts->clock = NULL;
-   return err;
-   }
spin_lock_init(&cpts->lock);
 
cpts->cc.read = cpts_systim_read;
@@ -382,15 +375,26 @@ int cpts_register(struct device *dev, struct cpts *cpts,
cpts_write32(cpts, CPTS_EN, control);
cpts_write32(cpts, TS_PEND_EN, int_enable);
 
-   spin_lock_irqsave(&cpts->lock, flags);
timecounter_init(&cpts->tc, &cpts->cc, ktime_to_ns(ktime_get_real()));
-   spin_unlock_irqrestore(&cpts->lock, flags);
 
INIT_DELAYED_WORK(&cpts->overflow_work, cpts_overflow_check);
-   schedule_delayed_work(&cpts->overflow_work, CPTS_OVERFLOW_PERIOD);
 
+   cpts->clock = ptp_clock_register(&cpts->info, dev);
+   if (IS_ERR(cpts->clock)) {
+   err = PTR_ERR(cpts->clock);
+   cpts->clock = NULL;
+   goto err_ptp;
+   }
cpts->phc_index = ptp_clock_index(cpts->clock);
+
+   schedule_delayed_work(&cpts->overflow_work, CPTS_OVERFLOW_PERIOD);
+
return 0;
+
+err_ptp:
+   if (cpts->refclk)
+   cpts_clk_release(cpts);
+   return err;
 }
 EXPORT_SYMBOL_GPL(cpts_register);
 
-- 
2.10.1



Re: [PATCH 0/5] cpsw: add per channel shaper configuration

2016-12-05 Thread Grygorii Strashko
Hi Ivan,

On 11/29/2016 09:00 AM, Ivan Khoronzhuk wrote:
> This series is intended to allow user to set rate for per channel
> shapers at cpdma level. This patchset doesn't have impact on performance.
> The rate can be set with:
> 
> echo 100 > /sys/class/net/ethX/queues/tx-0/tx_maxrate
> 
> Tested on am572xx
> Based on net-next/master
> 
> Ivan Khoronzhuk (5):
>   net: ethernet: ti: davinci_cpdma: add weight function for channels
>   net: ethernet: ti: davinci_cpdma: add set rate for a channel
>   net: ethernet: ti: cpsw: add .ndo to set per-queue rate
>   net: ethernet: ti: cpsw: optimize end of poll cycle
>   net: ethernet: ti: cpsw: split tx budget according between channels
> 
>  drivers/net/ethernet/ti/cpsw.c  | 264 +++
>  drivers/net/ethernet/ti/davinci_cpdma.c | 453 
> 
>  drivers/net/ethernet/ti/davinci_cpdma.h |   6 +
>  3 files changed, 624 insertions(+), 99 deletions(-)
> 


I've just tried net-next on BBB and got below back-trace:
INIT: Entering runlevel: 5
Configuring network interfaces... [   15.018356] net eth0: initializing cpsw 
version 1.12 (0)
[   15.120153] SMSC LAN8710/LAN8720 4a101000.mdio:00: attached PHY driver [SMSC 
LAN8710/LAN8720] (mii_bus:phy_addr=4a101000.mdio:00, irq=-1)
[   15.138578] Division by zero in kernel.
[   15.142667] CPU: 0 PID: 755 Comm: ifconfig Not tainted 
4.9.0-rc7-01617-g6ea3f00 #5
[   15.150277] Hardware name: Generic AM33XX (Flattened Device Tree)
[   15.156399] Backtrace: 
[   15.158898] [] (dump_backtrace) from [] 
(show_stack+0x18/0x1c)
[   15.166508]  r7: r6:600f0013 r5: r4:c0d395d0
[   15.172200] [] (show_stack) from [] 
(dump_stack+0x8c/0xa0)
[   15.179460] [] (dump_stack) from [] (__div0+0x1c/0x20)
[   15.186368]  r7: r6:ddf1c010 r5:0001 r4:0001
[   15.192068] [] (__div0) from [] (Ldiv0+0x8/0x14)
[   15.198503] [] (cpsw_split_budget [ti_cpsw]) from [] 
(cpsw_ndo_open+0x4b8/0x5e4 [ti_cpsw])
[   15.208554]  r10:ddf1c010 r9: r8: r7:ddf1c010 r6:dcc88000 
r5:dcc88500
[   15.216418]  r4:ddf1c0b0
[   15.218985] [] (cpsw_ndo_open [ti_cpsw]) from [] 
(__dev_open+0xb0/0x114)
[   15.227466]  r10: r9: r8: r7:dcc88030 r6:bf080364 
r5:
[   15.235330]  r4:dcc88000
[   15.237880] [] (__dev_open) from [] 
(__dev_change_flags+0x9c/0x14c)
[   15.245923]  r7:1002 r6:0001 r5:1043 r4:dcc88000
[   15.251613] [] (__dev_change_flags) from [] 
(dev_change_flags+0x20/0x50)
[   15.260093]  r9: r8: r7:dcbabf0c r6:1002 r5:dcc8813c 
r4:dcc88000
[   15.267886] [] (dev_change_flags) from [] 
(devinet_ioctl+0x6d4/0x794)
[   15.276105]  r9: r8:bee8cc64 r7:dcbabf0c r6: r5:dc921e80 
r4:
[   15.283891] [] (devinet_ioctl) from [] 
(inet_ioctl+0x19c/0x1c8)
[   15.291587]  r10: r9:dc92 r8:bee8cc64 r7:c0d64bc0 r6:bee8cc64 
r5:dd2728e0
[   15.299450]  r4:8914
[   15.302002] [] (inet_ioctl) from [] 
(sock_ioctl+0x14c/0x300)
[   15.309443] [] (sock_ioctl) from [] 
(do_vfs_ioctl+0xa8/0x98c)
[   15.316962]  r7:0003 r6:ddf0a780 r5:dd2728e0 r4:bee8cc64
[   15.322653] [] (do_vfs_ioctl) from [] 
(SyS_ioctl+0x3c/0x64)
[   15.33]  r10: r9:dc92 r8:bee8cc64 r7:8914 r6:ddf0a780 
r5:0003
[   15.337864]  r4:ddf0a780
[   15.340416] [] (SyS_ioctl) from [] 
(ret_fast_syscall+0x0/0x3c)
[   15.348024]  r9:dc92 r8:c0107f24 r7:0036 r6:bee8cf4d r5:bee8ce4c 
r4:000949f0
[   15.361174] IPv6: ADDRCONF(NETDEV_UP): eth0: link is not ready
udhcpc (v1.23.1) started


-- 
regards,
-grygorii


Re: [PATCH v4 09/13] net: ethernet: ti: cpts: rework initialization/deinitialization

2016-12-06 Thread Grygorii Strashko


On 12/06/2016 07:40 AM, Richard Cochran wrote:
> On Mon, Dec 05, 2016 at 02:05:21PM -0600, Grygorii Strashko wrote:
>> @@ -372,34 +354,27 @@ void cpts_tx_timestamp(struct cpts *cpts, struct 
>> sk_buff *skb)
>>  }
>>  EXPORT_SYMBOL_GPL(cpts_tx_timestamp);
>>  
>> -int cpts_register(struct device *dev, struct cpts *cpts,
>> -  u32 mult, u32 shift)
>> +int cpts_register(struct cpts *cpts)
>>  {
>>  int err, i;
>>  
>> -cpts->info = cpts_info;
>> -spin_lock_init(&cpts->lock);
>> -
>> -cpts->cc.read = cpts_systim_read;
>> -cpts->cc.mask = CLOCKSOURCE_MASK(32);
>> -cpts->cc_mult = mult;
>> -cpts->cc.mult = mult;
>> -cpts->cc.shift = shift;
>> -
>>  INIT_LIST_HEAD(&cpts->events);
>>  INIT_LIST_HEAD(&cpts->pool);
>>  for (i = 0; i < CPTS_MAX_EVENTS; i++)
>>  list_add(&cpts->pool_data[i].list, &cpts->pool);
>>  
>> -cpts_clk_init(dev, cpts);
>> +clk_enable(cpts->refclk);
>> +
>>  cpts_write32(cpts, CPTS_EN, control);
>>  cpts_write32(cpts, TS_PEND_EN, int_enable);
>>  
>> +/* reinitialize cc.mult to original value as it can be modified
>> + * by cpts_ptp_adjfreq().
>> + */
>> +cpts->cc.mult = cpts->cc_mult;
> 
> This still isn't quite right.  First of all, you shouldn't clobber the
> learned cc.mult value in cpts_register().  Presumably, if PTP had been
> run on this port before, then the learned frequency is approximately
> correct, and it should be left alone.
> 
> [ BTW, resetting the timecounter here makes no sense either.  Why
>   reset the clock just because the interface goes down?  ]
> 

Huh. This is how it works now (even before my changes) - this is just 
refactoring!
(really new thing is the only cpts_calc_mult_shift()).

Also, this is how cpts is supported now as part of cpsw (and keystone):
configure cpsw (cpts)
- ifup
   cpsw (*soft_reset*, full reconfiguration of cpsw)
  (start cpts) - cpts/ptp active

- ifdown
   if last netdev - shutdown/poweroff cpsw (cpts)

in other words, cpts/ptp is expected to work once and until at least one cpsw 
netdev is active.

Also there are additional questions such as:
- is there guarantee that cpsw port will be connected to the same network after 
ifup?
- should there be possibility to reset cc.mult if it's value will be kept from 
the previous run?

> Secondly, you have made the initialization order of these fields hard
> to follow.  With the whole series applied:
> 
> probe()
>   cpts_create()
>   cpts_of_parse()
>   {
>   /* Set cc_mult but not cc.mult! */
>   set cc_mult
>   set cc.shift
>   }
>   cpts_calc_mult_shift()
>   {
>   /* Set them both. */
>   cpts->cc_mult = mult;
>   cpts->cc.mult = mult; 

^^ this assignment of cpts->cc.mult not required.

>   cpts->cc.shift = shift;


only in case there were not set in DT before
(I have a requirement to support both - DT and cpts_calc_mult_shift and
 that introduces a bit of complexity)

Also, I've tried not to add more fields in struct cpts.

>   }
> /* later on */
> cpts_register()
>   cpts->cc.mult = cpts->cc_mult;
> 
> There is no need for such complexity.  Simply set cc.mult in
> cpts_create() _once_, immediately after the call to
> cpts_calc_mult_shift().
> 
> You can remove the assignment from cpts_calc_mult_shift() and
> cpts_register().

Just to clarify: do you propose to get rid of cpts->cc_mult at all?

static int cpts_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
{
...
if (ppb < 0) {
neg_adj = 1;
ppb = -ppb;
}
mult = cpts->cc_mult;
^^
adj = mult;
adj *= ppb;
diff = div_u64(adj, 10ULL);
...
cpts->cc.mult = neg_adj ? mult - diff : mult + diff;

Honestly, i'd not prefer to change functional behavior of ptp clock as part of
this series.

-- 
regards,
-grygorii


Re: [PATCH v4 09/13] net: ethernet: ti: cpts: rework initialization/deinitialization

2016-12-06 Thread Grygorii Strashko
Hi Richard,

On 12/06/2016 11:18 AM, Richard Cochran wrote:
> On Tue, Dec 06, 2016 at 10:45:55AM -0600, Grygorii Strashko wrote:
>> On 12/06/2016 07:40 AM, Richard Cochran wrote:
>>> [ BTW, resetting the timecounter here makes no sense either.  Why
>>>   reset the clock just because the interface goes down?  ]
>>>
>>
>> Huh. This is how it works now (even before my changes) - this is just 
>> refactoring!
>> (really new thing is the only cpts_calc_mult_shift()).
> 
> The cpts_register() used to be called from probe(), but this changed
> without any review in f280e89ad.  That wasn't your fault, but still...
>  
>> Also there are additional questions such as:
>> - is there guarantee that cpsw port will be connected to the same network 
>> after ifup?
> 
> The network is not relevant.  PTP time is a global standard, just like
> with NTP.  We don't reset the NTP clock with ifup/down, do we?

But we do reset whole cpsw :( and that's required to support PM use cases as
suspend/resume.

There are also PM requirement to shutdown cpsw in case all interfaces are down.

More over, there are requirement to minimize cpsw power consumption in case all 
links are
disconnected (and cpts is special case here).

So, at least resetting of the timecounter still required.



> 
>> - should there be possibility to reset cc.mult if it's value will be kept 
>> from the previous run?
> 
> cc.mult will change naturally according to the operation of the user
> space PTP stack.  There is no need to reset it that I can see.
>  
>>> Secondly, you have made the initialization order of these fields hard
>>> to follow.  With the whole series applied:
>>>
>>> probe()
>>> cpts_create()
>>> cpts_of_parse()
>>> {
>>> /* Set cc_mult but not cc.mult! */
>>> set cc_mult
>>> set cc.shift
>>> }
>>> cpts_calc_mult_shift()
>>> {
>>> /* Set them both. */
>>> cpts->cc_mult = mult;
>>> cpts->cc.mult = mult; 
>>
>> ^^ this assignment of cpts->cc.mult not required.
> 
> You wrote the code, not me.

Sry, I meant, thank for catching this.

>  
>>> cpts->cc.shift = shift;
>>  
>>
>> only in case there were not set in DT before
>> (I have a requirement to support both - DT and cpts_calc_mult_shift and
>>  that introduces a bit of complexity)
>>
>> Also, I've tried not to add more fields in struct cpts.
>>
>>> }
>>> /* later on */
>>> cpts_register()
>>> cpts->cc.mult = cpts->cc_mult;
>>>
>>> There is no need for such complexity.  Simply set cc.mult in
>>> cpts_create() _once_, immediately after the call to
>>> cpts_calc_mult_shift().
>>>
>>> You can remove the assignment from cpts_calc_mult_shift() and
>>> cpts_register().
>>
>> Just to clarify: do you propose to get rid of cpts->cc_mult at all?
> 
> No.  Read what I said before:
> 
>There is no need for such complexity.  Simply set cc.mult in
>cpts_create() _once_, immediately after the call to
>cpts_calc_mult_shift().
> 
>> Honestly, i'd not prefer to change functional behavior of ptp clock as part 
>> of
>> this series.
> 
> Fair enough.  The bogus clock reset existed before your series.
> 
> But please don't obfuscate simple initialization routines.  The way
> you set cc.mult and cc_mult is illogical and convoluted.
> 

Ok. I'll try to optimize it following your directions.

Thanks.

-- 
regards,
-grygorii


Re: [PATCH 2/6] net: ethernet: ti: cpts: add support for ext rftclk selection

2016-12-06 Thread Grygorii Strashko


On 11/30/2016 11:35 AM, Grygorii Strashko wrote:
>
>
> On 11/30/2016 03:56 AM, Richard Cochran wrote:
>> On Mon, Nov 28, 2016 at 05:04:24PM -0600, Grygorii Strashko wrote:
>>> Some CPTS instances, which can be found on KeyStone 2 1/10G Ethernet
>>> Switch Subsystems, can control an external multiplexer that selects
>>> one of up to 32 clocks for time sync reference (RFTCLK). This feature
>>> can be configured through CPTS_RFTCLK_SEL register (offset: x08).
>>>
>>> Hence, introduce optional DT cpts_rftclk_sel poperty wich, if present,
>>> will specify CPTS reference clock. The cpts_rftclk_sel should be
>>> omitted in DT if HW doesn't support this feature. The external fixed
>>> rate clocks can be defined in board files as "fixed-clock".
>>
>> Can't you implement this using the clock tree, rather than an ad-hoc
>> DT property?
>>
>
> I've thought about this, but decided to move forward with this impl
>  which is pretty simple. I will try.
>
>

I come with below RFC patch. if no objection I'll move forward with it.

According to Keystone 2 66AK2e DM there are 7 possible ref clocks:
 = SYSCLK2
0001 = SYSCLK3
0010 = TIMI0
0011 = TIMI1
0100 = TSIPCLKA
1000 = TSREFCLK
1100 = TSIPCLKB
Others = Reserved

2 from above clocks are internal SYSCLK2 and SYSCLK3 - other external
(board specific). So default definition of cpts_mux will have only two parents.
If ext clock is going to be use as cpts rftclk then it should be
defined in board file and  cpts_refclk_mux definition updated to support 
this ext clock:

timi1clk: timi1clk {
#clock-cells = <0>;
compatible = "fixed-clock";
clock-frequency = ;
clock-output-names = "timi1";
};

&cpts_mux {
clocks = <&chipclk12>, <&chipclk13>, ;
    cpts-mux-tbl = <0>, <1>, <3>;
assigned-clocks = <&cpts_mux>;
    assigned-clock-parents = <&timi1clk>;
};



>From ec5c7bed0e021c2ca7e9392173bf67bb9a45d0f4 Mon Sep 17 00:00:00 2001
From: Grygorii Strashko 
Date: Mon, 5 Dec 2016 12:34:45 -0600
Subject: [PATCH] cpts refclk sel

Signed-off-by: Grygorii Strashko 
---
 arch/arm/boot/dts/keystone-k2e-netcp.dtsi | 10 +-
 drivers/net/ethernet/ti/cpts.c| 52 ++-
 2 files changed, 60 insertions(+), 2 deletions(-)

diff --git a/arch/arm/boot/dts/keystone-k2e-netcp.dtsi 
b/arch/arm/boot/dts/keystone-k2e-netcp.dtsi
index 919e655..b27aa22 100644
--- a/arch/arm/boot/dts/keystone-k2e-netcp.dtsi
+++ b/arch/arm/boot/dts/keystone-k2e-netcp.dtsi
@@ -138,7 +138,7 @@ netcp: netcp@2400 {
/* NetCP address range */
ranges = <0 0x2400 0x100>;

-   clocks = <&clkpa>, <&clkcpgmac>, <&chipclk12>;
+   clocks = <&clkpa>, <&clkcpgmac>, <&cpts_mux>;
clock-names = "pa_clk", "ethss_clk", "cpts";
dma-coherent;

@@ -162,6 +162,14 @@ netcp: netcp@2400 {
cpts-ext-ts-inputs = <6>;
cpts-ts-comp-length;

+   cpts_mux: cpts_refclk_mux {
+   #clock-cells = <0>;
+   clocks = <&chipclk12>, <&chipclk13>;
+   cpts-mux-tbl = <0>, <1>;
+   assigned-clocks = <&cpts_mux>;
+   assigned-clock-parents = <&chipclk12>;
+   };
+
interfaces {
gbe0: interface-0 {
slave-port = <0>;
diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index 938de22..ef94316 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -17,6 +17,7 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
  */
+#include 
 #include 
 #include 
 #include 
@@ -672,6 +673,7 @@ int cpts_register(struct cpts *cpts)
cpts->phc_index = ptp_clock_index(cpts->clock);

schedule_delayed_work(&cpts->overflow_work, cpts->ov_check_period);
+
return 0;

 err_ptp:
@@ -741,6 +743,54 @@ static void cpts_calc_mult_shift(struct cpts *cpts)
 freq, cpts->cc_mult, cpts->cc.shift, (ns - NSEC_PER_SEC));
 }

+static int cpts_of_mux_clk_setup(struct cpts *cpts, struct device_node *node)
+{
+   unsigned int num_parents;
+   const char **parent_names;
+   struct device_node *refclk_np;
+   void __iomem *reg;
+   struct clk *clk;
+   u32 *mux_table;
+   int ret;
+
+

Re: [PATCH 2/6] net: ethernet: ti: cpts: add support for ext rftclk selection

2016-12-06 Thread Grygorii Strashko


On 12/06/2016 02:25 PM, Richard Cochran wrote:
> On Tue, Dec 06, 2016 at 01:39:40PM -0600, Grygorii Strashko wrote:
>> I come with below RFC patch. if no objection I'll move forward with it.
> 
> Thanks for following through with this!
> 
> The am335x will also need the MUX in its clock tree, won't it?
> 

Not exactly. I do not see CPTS_RFTCLK_SEL register in trm, but looks like
it's already implemented in am335 clock tree:
cpsw_cpts_rft_clk: cpsw_cpts_rft_clk@520 {
#clock-cells = <0>;
compatible = "ti,mux-clock";
clocks = <&dpll_core_m5_ck>, <&dpll_core_m4_ck>;
reg = <0x0520>;
};

and ssigned-clock-xx can be used to change parent in board file:

&cpsw_cpts_rft_clk {
assigned-clocks = <&cpsw_cpts_rft_clk>;
assigned-clock-parents = <&dpll_core_m4_ck>;
};


-- 
regards,
-grygorii


Re: [PATCH 4/6] net: ethernet: ti: cpts: add ptp pps support

2016-12-06 Thread Grygorii Strashko



On 12/06/2016 12:08 PM, Richard Cochran wrote:

On Wed, Nov 30, 2016 at 11:05:19AM +0100, Richard Cochran wrote:

Can you adjust the frequency of the keystone devices in hardware?  If
so, then please implement it, and just disable PPS for the CPSW.

The only reason I used the timecounter for frequency adjustment was
because the am335x HW is broken.  But this shouldn't hold back other
newer HW without the same silicon flaws.


I am talking here about the ADPLLLJ units.  Are they usable on the
keystone?

If so, please implement the frequency adjustment with them.



No, I think it's not impossible (at least as I know now).
i'll drop this patch for now.

By the way, I've tested am335 (BBB)+ HW_TS_PUSH + PWM.
Seems works.

--
regards,
-grygorii


[PATCH v5 11/13] clocksource: export the clocks_calc_mult_shift to use by timestamp code

2016-12-06 Thread Grygorii Strashko
From: Murali Karicheri 

The CPSW CPTS driver is capable of doing timestamping on tx/rx packets and
requires to know mult and shift factors for timestamp conversion from raw
value to nanoseconds (ptp clock). Now these mult and shift factors are
calculated manually and provided through DT, which makes very hard to
support of a lot number of platforms, especially if CPTS refclk is not the
same for some kind of boards and depends on efuse settings (Keystone 2
platforms). Hence, export clocks_calc_mult_shift() to allow drivers like
CPSW CPTS (and other ptp drivesr) to benefit from automaitc calculation of
mult and shift factors.

Cc: John Stultz 
Signed-off-by: Murali Karicheri 
Signed-off-by: Grygorii Strashko 
Acked-by: Thomas Gleixner 
---
 kernel/time/clocksource.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 7e4fad7..150242c 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -89,6 +89,7 @@ clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 
to, u32 maxsec)
*mult = tmp;
*shift = sft;
 }
+EXPORT_SYMBOL_GPL(clocks_calc_mult_shift);
 
 /*[Clocksource internal variables]-
  * curr_clocksource:
-- 
2.10.1



[PATCH v5 04/13] net: ethernet: ti: cpts: fix unbalanced clk api usage in cpts_register/unregister

2016-12-06 Thread Grygorii Strashko
There are two issues with TI CPTS code which are reproducible when TI
CPSW ethX device passes few up/down iterations:
- cpts refclk prepare counter continuously incremented after each
up/down iteration;
- devm_clk_get(dev, "cpts") is called many times.

Hence, fix these issues by using clk_disable_unprepare() in
cpts_clk_release() and skipping devm_clk_get() if cpts refclk has been
acquired already.

Signed-off-by: Grygorii Strashko 
Acked-by: Richard Cochran 
---
 drivers/net/ethernet/ti/cpts.c | 14 --
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index 8cb0369..61198f1 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -230,18 +230,20 @@ static void cpts_overflow_check(struct work_struct *work)
 
 static void cpts_clk_init(struct device *dev, struct cpts *cpts)
 {
-   cpts->refclk = devm_clk_get(dev, "cpts");
-   if (IS_ERR(cpts->refclk)) {
-   dev_err(dev, "Failed to get cpts refclk\n");
-   cpts->refclk = NULL;
-   return;
+   if (!cpts->refclk) {
+   cpts->refclk = devm_clk_get(dev, "cpts");
+   if (IS_ERR(cpts->refclk)) {
+   dev_err(dev, "Failed to get cpts refclk\n");
+   cpts->refclk = NULL;
+   return;
+   }
}
clk_prepare_enable(cpts->refclk);
 }
 
 static void cpts_clk_release(struct cpts *cpts)
 {
-   clk_disable(cpts->refclk);
+   clk_disable_unprepare(cpts->refclk);
 }
 
 static int cpts_match(struct sk_buff *skb, unsigned int ptp_class,
-- 
2.10.1



[PATCH v5 12/13] net: ethernet: ti: cpts: calc mult and shift from refclk freq

2016-12-06 Thread Grygorii Strashko
The cyclecounter mult and shift values can be calculated based on the
CPTS rfclk frequency and timekeepnig framework provides required algos
and API's.

Hence, calc mult and shift basing on CPTS rfclk frequency if both
cpts_clock_shift and cpts_clock_mult properties are not provided in DT (the
basis of calculation algorithm is borrowed from
__clocksource_update_freq_scale() commit 7d2f944a2b83 ("clocksource:
Provide a generic mult/shift factor calculation")). After this change
cpts_clock_shift and cpts_clock_mult DT properties will become optional.

Cc: John Stultz 
Cc: Thomas Gleixner 
Signed-off-by: Grygorii Strashko 
---
 Documentation/devicetree/bindings/net/cpsw.txt |  8 +++--
 drivers/net/ethernet/ti/cpts.c | 50 ++
 2 files changed, 48 insertions(+), 10 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/cpsw.txt 
b/Documentation/devicetree/bindings/net/cpsw.txt
index 5ad439f..ebda7c9 100644
--- a/Documentation/devicetree/bindings/net/cpsw.txt
+++ b/Documentation/devicetree/bindings/net/cpsw.txt
@@ -20,8 +20,6 @@ Required properties:
 - slaves   : Specifies number for slaves
 - active_slave : Specifies the slave to use for time stamping,
  ethtool and SIOCGMIIPHY
-- cpts_clock_mult  : Numerator to convert input clock ticks into 
nanoseconds
-- cpts_clock_shift : Denominator to convert input clock ticks into 
nanoseconds
 
 Optional properties:
 - ti,hwmods: Must be "cpgmac0"
@@ -35,7 +33,11 @@ Optional properties:
  For example in dra72x-evm, pcf gpio has to be
  driven low so that cpsw slave 0 and phy data
  lines are connected via mux.
-
+- cpts_clock_mult  : Numerator to convert input clock ticks into 
nanoseconds
+- cpts_clock_shift : Denominator to convert input clock ticks into 
nanoseconds
+ Mult and shift will be calculated basing on CPTS
+ rftclk frequency if both cpts_clock_shift and
+ cpts_clock_mult properties are not provided.
 
 Slave Properties:
 Required properties:
diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index cb844ed..ebd413a 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -405,18 +405,52 @@ void cpts_unregister(struct cpts *cpts)
 }
 EXPORT_SYMBOL_GPL(cpts_unregister);
 
+static void cpts_calc_mult_shift(struct cpts *cpts)
+{
+   u64 frac, maxsec, ns;
+   u32 freq;
+
+   freq = clk_get_rate(cpts->refclk);
+
+   /* Calc the maximum number of seconds which we can run before
+* wrapping around.
+*/
+   maxsec = cpts->cc.mask;
+   do_div(maxsec, freq);
+   /* limit conversation rate to 10 sec as higher values will produce
+* too small mult factors and so reduce the conversion accuracy
+*/
+   if (maxsec > 10)
+   maxsec = 10;
+
+   if (cpts->cc.mult || cpts->cc.shift)
+   return;
+
+   clocks_calc_mult_shift(&cpts->cc.mult, &cpts->cc.shift,
+  freq, NSEC_PER_SEC, maxsec);
+
+   frac = 0;
+   ns = cyclecounter_cyc2ns(&cpts->cc, freq, cpts->cc.mask, &frac);
+
+   dev_info(cpts->dev,
+"CPTS: ref_clk_freq:%u calc_mult:%u calc_shift:%u error:%lld 
nsec/sec\n",
+freq, cpts->cc.mult, cpts->cc.shift, (ns - NSEC_PER_SEC));
+}
+
 static int cpts_of_parse(struct cpts *cpts, struct device_node *node)
 {
int ret = -EINVAL;
u32 prop;
 
-   if (of_property_read_u32(node, "cpts_clock_mult", &prop))
-   goto  of_error;
-   cpts->cc.mult = prop;
+   if (!of_property_read_u32(node, "cpts_clock_mult", &prop))
+   cpts->cc.mult = prop;
 
-   if (of_property_read_u32(node, "cpts_clock_shift", &prop))
-   goto  of_error;
-   cpts->cc.shift = prop;
+   if (!of_property_read_u32(node, "cpts_clock_shift", &prop))
+   cpts->cc.shift = prop;
+
+   if ((cpts->cc.mult && !cpts->cc.shift) ||
+   (!cpts->cc.mult && cpts->cc.shift))
+   goto of_error;
 
return 0;
 
@@ -454,11 +488,13 @@ struct cpts *cpts_create(struct device *dev, void __iomem 
*regs,
 
cpts->cc.read = cpts_systim_read;
cpts->cc.mask = CLOCKSOURCE_MASK(32);
+   cpts->info = cpts_info;
+
+   cpts_calc_mult_shift(cpts);
/* save cc.mult original value as it can be modified
 * by cpts_ptp_adjfreq().
 */
cpts->cc_mult = cpts->cc.mult;
-   cpts->info = cpts_info;
 
return cpts;
 }
-- 
2.10.1



[PATCH v5 07/13] net: ethernet: ti: cpts: clean up event list if event pool is empty

2016-12-06 Thread Grygorii Strashko
From: WingMan Kwok 

When a CPTS user does not exit gracefully by disabling cpts
timestamping and leaving a joined multicast group, the system
continues to receive and timestamps the ptp packets which eventually
occupy all the event list entries.  When this happns, the added code
tries to remove some list entries which are expired.

Signed-off-by: WingMan Kwok 
Signed-off-by: Grygorii Strashko 
---
 drivers/net/ethernet/ti/cpts.c | 26 --
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index d3c1ac5..7ab1fa7 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -57,6 +57,26 @@ static int cpts_fifo_pop(struct cpts *cpts, u32 *high, u32 
*low)
return -1;
 }
 
+static int cpts_purge_events(struct cpts *cpts)
+{
+   struct list_head *this, *next;
+   struct cpts_event *event;
+   int removed = 0;
+
+   list_for_each_safe(this, next, &cpts->events) {
+   event = list_entry(this, struct cpts_event, list);
+   if (event_expired(event)) {
+   list_del_init(&event->list);
+   list_add(&event->list, &cpts->pool);
+   ++removed;
+   }
+   }
+
+   if (removed)
+   pr_debug("cpts: event pool cleaned up %d\n", removed);
+   return removed ? 0 : -1;
+}
+
 /*
  * Returns zero if matching event type was found.
  */
@@ -69,10 +89,12 @@ static int cpts_fifo_read(struct cpts *cpts, int match)
for (i = 0; i < CPTS_FIFO_DEPTH; i++) {
if (cpts_fifo_pop(cpts, &hi, &lo))
break;
-   if (list_empty(&cpts->pool)) {
-   pr_err("cpts: event pool is empty\n");
+
+   if (list_empty(&cpts->pool) && cpts_purge_events(cpts)) {
+   pr_err("cpts: event pool empty\n");
return -1;
}
+
event = list_first_entry(&cpts->pool, struct cpts_event, list);
event->tmo = jiffies + 2;
event->high = hi;
-- 
2.10.1



[PATCH v5 13/13] net: ethernet: ti: cpts: fix overflow check period

2016-12-06 Thread Grygorii Strashko
The CPTS drivers uses 8sec period for overflow checking with
assumption that CPTS retclk will not exceed 500MHz. But that's not
true on some TI platforms (Kesytone 2). As result, it is possible that
CPTS counter will overflow more than once between two readings.

Hence, fix it by selecting overflow check period dynamically as
max_sec_before_overflow/2, where
 max_sec_before_overflow = max_counter_val / rftclk_freq.

Cc: John Stultz 
Cc: Thomas Gleixner 
Signed-off-by: Grygorii Strashko 
Acked-by: Richard Cochran 
---
 drivers/net/ethernet/ti/cpts.c | 10 +++---
 drivers/net/ethernet/ti/cpts.h |  4 +---
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index ebd413a..0c0d48e 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -245,7 +245,7 @@ static void cpts_overflow_check(struct work_struct *work)
 
cpts_ptp_gettime(&cpts->info, &ts);
pr_debug("cpts overflow check at %lld.%09lu\n", ts.tv_sec, ts.tv_nsec);
-   schedule_delayed_work(&cpts->overflow_work, CPTS_OVERFLOW_PERIOD);
+   schedule_delayed_work(&cpts->overflow_work, cpts->ov_check_period);
 }
 
 static int cpts_match(struct sk_buff *skb, unsigned int ptp_class,
@@ -378,8 +378,7 @@ int cpts_register(struct cpts *cpts)
}
cpts->phc_index = ptp_clock_index(cpts->clock);
 
-   schedule_delayed_work(&cpts->overflow_work, CPTS_OVERFLOW_PERIOD);
-
+   schedule_delayed_work(&cpts->overflow_work, cpts->ov_check_period);
return 0;
 
 err_ptp:
@@ -423,6 +422,11 @@ static void cpts_calc_mult_shift(struct cpts *cpts)
if (maxsec > 10)
maxsec = 10;
 
+   /* Calc overflow check period (maxsec / 2) */
+   cpts->ov_check_period = (HZ * maxsec) / 2;
+   dev_info(cpts->dev, "cpts: overflow check period %lu (jiffies)\n",
+cpts->ov_check_period);
+
if (cpts->cc.mult || cpts->cc.shift)
return;
 
diff --git a/drivers/net/ethernet/ti/cpts.h b/drivers/net/ethernet/ti/cpts.h
index 5da23af..c96eca2 100644
--- a/drivers/net/ethernet/ti/cpts.h
+++ b/drivers/net/ethernet/ti/cpts.h
@@ -97,9 +97,6 @@ enum {
CPTS_EV_TX,   /* Ethernet Transmit Event */
 };
 
-/* This covers any input clock up to about 500 MHz. */
-#define CPTS_OVERFLOW_PERIOD (HZ * 8)
-
 #define CPTS_FIFO_DEPTH 16
 #define CPTS_MAX_EVENTS 32
 
@@ -127,6 +124,7 @@ struct cpts {
struct list_head events;
struct list_head pool;
struct cpts_event pool_data[CPTS_MAX_EVENTS];
+   unsigned long ov_check_period;
 };
 
 void cpts_rx_timestamp(struct cpts *cpts, struct sk_buff *skb);
-- 
2.10.1



[PATCH v5 01/13] net: ethernet: ti: cpts: switch to readl/writel_relaxed()

2016-12-06 Thread Grygorii Strashko
Switch to readl/writel_relaxed() APIs, because this is recommended
API and the CPTS IP is reused on Keystone 2 SoCs
where LE/BE modes are supported.

Signed-off-by: Grygorii Strashko 
Acked-by: Richard Cochran 
---
 drivers/net/ethernet/ti/cpts.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index 85a55b4..a42c449 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -33,8 +33,8 @@
 
 #ifdef CONFIG_TI_CPTS
 
-#define cpts_read32(c, r)  __raw_readl(&c->reg->r)
-#define cpts_write32(c, v, r)  __raw_writel(v, &c->reg->r)
+#define cpts_read32(c, r)  readl_relaxed(&c->reg->r)
+#define cpts_write32(c, v, r)  writel_relaxed(v, &c->reg->r)
 
 static int event_expired(struct cpts_event *event)
 {
-- 
2.10.1



[PATCH v5 08/13] net: ethernet: ti: cpts: drop excessive writes to CTRL and INT_EN regs

2016-12-06 Thread Grygorii Strashko
CPTS module and IRQs are always enabled when CPTS is registered,
before starting overflow check work, and disabled during
deregistration, when overflow check work has been canceled already.
So, It doesn't require to (re)enable CPTS module and IRQs in
cpts_overflow_check().

Signed-off-by: Grygorii Strashko 
Acked-by: Richard Cochran 
---
 drivers/net/ethernet/ti/cpts.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index 7ab1fa7..fe1bb7f 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -243,8 +243,6 @@ static void cpts_overflow_check(struct work_struct *work)
struct timespec64 ts;
struct cpts *cpts = container_of(work, struct cpts, overflow_work.work);
 
-   cpts_write32(cpts, CPTS_EN, control);
-   cpts_write32(cpts, TS_PEND_EN, int_enable);
cpts_ptp_gettime(&cpts->info, &ts);
pr_debug("cpts overflow check at %lld.%09lu\n", ts.tv_sec, ts.tv_nsec);
schedule_delayed_work(&cpts->overflow_work, CPTS_OVERFLOW_PERIOD);
-- 
2.10.1



[PATCH v5 02/13] net: ethernet: ti: allow cpts to be built separately

2016-12-06 Thread Grygorii Strashko
TI CPTS IP is used as part of TI OMAP CPSW driver, but it's also
present as part of NETCP on TI Keystone 2 SoCs. So, It's required
to enable build of CPTS for both this drivers and this can be
achieved by allowing CPTS to be built separately.

Hence, allow cpts to be built separately and convert it to be
a module as both CPSW and NETCP drives can be built as modules.

Signed-off-by: Grygorii Strashko 
---
 drivers/net/ethernet/ti/Kconfig  |  2 +-
 drivers/net/ethernet/ti/Makefile |  3 ++-
 drivers/net/ethernet/ti/cpsw.c   | 22 +-
 drivers/net/ethernet/ti/cpts.c   | 16 
 drivers/net/ethernet/ti/cpts.h   | 18 ++
 5 files changed, 42 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig
index 9904d74..ff7f518 100644
--- a/drivers/net/ethernet/ti/Kconfig
+++ b/drivers/net/ethernet/ti/Kconfig
@@ -74,7 +74,7 @@ config TI_CPSW
  will be called cpsw.
 
 config TI_CPTS
-   bool "TI Common Platform Time Sync (CPTS) Support"
+   tristate "TI Common Platform Time Sync (CPTS) Support"
depends on TI_CPSW
select PTP_1588_CLOCK
---help---
diff --git a/drivers/net/ethernet/ti/Makefile b/drivers/net/ethernet/ti/Makefile
index d420d94..1e7c10b 100644
--- a/drivers/net/ethernet/ti/Makefile
+++ b/drivers/net/ethernet/ti/Makefile
@@ -12,8 +12,9 @@ obj-$(CONFIG_TI_DAVINCI_MDIO) += davinci_mdio.o
 obj-$(CONFIG_TI_DAVINCI_CPDMA) += davinci_cpdma.o
 obj-$(CONFIG_TI_CPSW_PHY_SEL) += cpsw-phy-sel.o
 obj-$(CONFIG_TI_CPSW_ALE) += cpsw_ale.o
+obj-$(CONFIG_TI_CPTS) += cpts.o
 obj-$(CONFIG_TI_CPSW) += ti_cpsw.o
-ti_cpsw-y := cpsw.o cpts.o
+ti_cpsw-y := cpsw.o
 
 obj-$(CONFIG_TI_KEYSTONE_NETCP) += keystone_netcp.o
 keystone_netcp-y := netcp_core.o
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index f373a4b..8fdb274 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -1594,7 +1594,7 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff 
*skb,
return NETDEV_TX_BUSY;
 }
 
-#ifdef CONFIG_TI_CPTS
+#if IS_ENABLED(CONFIG_TI_CPTS)
 
 static void cpsw_hwtstamp_v1(struct cpsw_common *cpsw)
 {
@@ -1742,7 +1742,16 @@ static int cpsw_hwtstamp_get(struct net_device *dev, 
struct ifreq *ifr)
 
return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0;
 }
+#else
+static int cpsw_hwtstamp_get(struct net_device *dev, struct ifreq *ifr)
+{
+   return -EOPNOTSUPP;
+}
 
+static int cpsw_hwtstamp_set(struct net_device *dev, struct ifreq *ifr)
+{
+   return -EOPNOTSUPP;
+}
 #endif /*CONFIG_TI_CPTS*/
 
 static int cpsw_ndo_ioctl(struct net_device *dev, struct ifreq *req, int cmd)
@@ -1755,12 +1764,10 @@ static int cpsw_ndo_ioctl(struct net_device *dev, 
struct ifreq *req, int cmd)
return -EINVAL;
 
switch (cmd) {
-#ifdef CONFIG_TI_CPTS
case SIOCSHWTSTAMP:
return cpsw_hwtstamp_set(dev, req);
case SIOCGHWTSTAMP:
return cpsw_hwtstamp_get(dev, req);
-#endif
}
 
if (!cpsw->slaves[slave_no].phy)
@@ -2100,10 +2107,10 @@ static void cpsw_set_msglevel(struct net_device *ndev, 
u32 value)
priv->msg_enable = value;
 }
 
+#if IS_ENABLED(CONFIG_TI_CPTS)
 static int cpsw_get_ts_info(struct net_device *ndev,
struct ethtool_ts_info *info)
 {
-#ifdef CONFIG_TI_CPTS
struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
 
info->so_timestamping =
@@ -2120,7 +2127,12 @@ static int cpsw_get_ts_info(struct net_device *ndev,
info->rx_filters =
(1 << HWTSTAMP_FILTER_NONE) |
(1 << HWTSTAMP_FILTER_PTP_V2_EVENT);
+   return 0;
+}
 #else
+static int cpsw_get_ts_info(struct net_device *ndev,
+   struct ethtool_ts_info *info)
+{
info->so_timestamping =
SOF_TIMESTAMPING_TX_SOFTWARE |
SOF_TIMESTAMPING_RX_SOFTWARE |
@@ -2128,9 +2140,9 @@ static int cpsw_get_ts_info(struct net_device *ndev,
info->phc_index = -1;
info->tx_types = 0;
info->rx_filters = 0;
-#endif
return 0;
 }
+#endif
 
 static int cpsw_get_link_ksettings(struct net_device *ndev,
   struct ethtool_link_ksettings *ecmd)
diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index a42c449..8cb0369 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -31,8 +31,6 @@
 
 #include "cpts.h"
 
-#ifdef CONFIG_TI_CPTS
-
 #define cpts_read32(c, r)  readl_relaxed(&c->reg->r)
 #define cpts_write32(c, v, r)  writel_relaxed(v, &c->reg->r)
 
@@ -334,6 +332,7 @@ void cpts_rx_timestamp(struct cpts *cpts, struct sk_buff 
*skb)
memset(ssh, 0, sizeof(*ssh));
ssh->hwtstamp = ns_to_ktime(ns);
 }
+EXPORT_SYMBOL_GPL(cpts_rx_t

[PATCH v5 09/13] net: ethernet: ti: cpts: rework initialization/deinitialization

2016-12-06 Thread Grygorii Strashko
The current implementation CPTS initialization and deinitialization
(represented by cpts_register/unregister()) does too many static
initialization from .ndo_open(), which is reasonable to do once at probe
time instead, and also require caller to allocate memory for struct cpts,
which is internal for CPTS driver in general.

This patch splits CPTS initialization and deinitialization on two parts:

- static initializtion cpts_create()/cpts_release() which expected to be
executed when parent driver is probed/removed;

- dynamic part cpts_register/unregister() which expected to be executed
when network device is opened/closed.

As result, current code of CPTS parent driver - CPSW - will be simplified
(and it also will allow simplify adding support for Keystone 2 devices in
the future), plus more initialization errors will be catched earlier. In
addition, this change allows to clean up cpts.h for the case when CPTS is
disabled.

Signed-off-by: Grygorii Strashko 
---
 drivers/net/ethernet/ti/cpsw.c | 24 +-
 drivers/net/ethernet/ti/cpts.c | 99 +-
 drivers/net/ethernet/ti/cpts.h | 26 ---
 3 files changed, 92 insertions(+), 57 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 7599895..a9a8354 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -1487,9 +1487,7 @@ static int cpsw_ndo_open(struct net_device *ndev)
if (ret < 0)
goto err_cleanup;
 
-   if (cpts_register(cpsw->dev, cpsw->cpts,
- cpsw->data.cpts_clock_mult,
- cpsw->data.cpts_clock_shift))
+   if (cpts_register(cpsw->cpts))
dev_err(priv->dev, "error registering cpts device\n");
 
}
@@ -2796,6 +2794,7 @@ static int cpsw_probe(struct platform_device *pdev)
struct cpdma_params dma_params;
struct cpsw_ale_params  ale_params;
void __iomem*ss_regs;
+   void __iomem*cpts_regs;
struct resource *res, *ss_res;
const struct of_device_id   *of_id;
struct gpio_descs   *mode;
@@ -2823,12 +2822,6 @@ static int cpsw_probe(struct platform_device *pdev)
priv->dev  = &ndev->dev;
priv->msg_enable = netif_msg_init(debug_level, CPSW_DEBUG);
cpsw->rx_packet_max = max(rx_packet_max, 128);
-   cpsw->cpts = devm_kzalloc(&pdev->dev, sizeof(struct cpts), GFP_KERNEL);
-   if (!cpsw->cpts) {
-   dev_err(&pdev->dev, "error allocating cpts\n");
-   ret = -ENOMEM;
-   goto clean_ndev_ret;
-   }
 
mode = devm_gpiod_get_array_optional(&pdev->dev, "mode", GPIOD_OUT_LOW);
if (IS_ERR(mode)) {
@@ -2916,7 +2909,7 @@ static int cpsw_probe(struct platform_device *pdev)
switch (cpsw->version) {
case CPSW_VERSION_1:
cpsw->host_port_regs = ss_regs + CPSW1_HOST_PORT_OFFSET;
-   cpsw->cpts->reg  = ss_regs + CPSW1_CPTS_OFFSET;
+   cpts_regs   = ss_regs + CPSW1_CPTS_OFFSET;
cpsw->hw_stats   = ss_regs + CPSW1_HW_STATS;
dma_params.dmaregs   = ss_regs + CPSW1_CPDMA_OFFSET;
dma_params.txhdp = ss_regs + CPSW1_STATERAM_OFFSET;
@@ -2930,7 +2923,7 @@ static int cpsw_probe(struct platform_device *pdev)
case CPSW_VERSION_3:
case CPSW_VERSION_4:
cpsw->host_port_regs = ss_regs + CPSW2_HOST_PORT_OFFSET;
-   cpsw->cpts->reg  = ss_regs + CPSW2_CPTS_OFFSET;
+   cpts_regs   = ss_regs + CPSW2_CPTS_OFFSET;
cpsw->hw_stats   = ss_regs + CPSW2_HW_STATS;
dma_params.dmaregs   = ss_regs + CPSW2_CPDMA_OFFSET;
dma_params.txhdp = ss_regs + CPSW2_STATERAM_OFFSET;
@@ -2997,6 +2990,14 @@ static int cpsw_probe(struct platform_device *pdev)
goto clean_dma_ret;
}
 
+   cpsw->cpts = cpts_create(cpsw->dev, cpts_regs,
+cpsw->data.cpts_clock_mult,
+cpsw->data.cpts_clock_shift);
+   if (IS_ERR(cpsw->cpts)) {
+   ret = PTR_ERR(cpsw->cpts);
+   goto clean_ale_ret;
+   }
+
ndev->irq = platform_get_irq(pdev, 1);
if (ndev->irq < 0) {
dev_err(priv->dev, "error getting irq resource\n");
@@ -3112,6 +3113,7 @@ static int cpsw_remove(struct platform_device *pdev)
unregister_netdev(cpsw->slaves[1].ndev);
unregister_netdev(ndev);
 
+   cpts_release(cpsw->cpts);
cpsw_ale_destroy(cpsw->ale);
cpdma_ctlr_destroy(cpsw->

[PATCH v5 05/13] net: ethernet: ti: cpts: fix registration order

2016-12-06 Thread Grygorii Strashko
The ptp clock registered before spinlock, which is protecting it, and
before timecounter and cyclecounter initialization in cpts_register().

So, ensure that ptp clock is registered the last, after everything
else is done.

Signed-off-by: Grygorii Strashko 
Acked-by: Richard Cochran 
---
 drivers/net/ethernet/ti/cpts.c | 24 ++--
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index 61198f1..3dda6d5 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -356,15 +356,8 @@ int cpts_register(struct device *dev, struct cpts *cpts,
  u32 mult, u32 shift)
 {
int err, i;
-   unsigned long flags;
 
cpts->info = cpts_info;
-   cpts->clock = ptp_clock_register(&cpts->info, dev);
-   if (IS_ERR(cpts->clock)) {
-   err = PTR_ERR(cpts->clock);
-   cpts->clock = NULL;
-   return err;
-   }
spin_lock_init(&cpts->lock);
 
cpts->cc.read = cpts_systim_read;
@@ -382,15 +375,26 @@ int cpts_register(struct device *dev, struct cpts *cpts,
cpts_write32(cpts, CPTS_EN, control);
cpts_write32(cpts, TS_PEND_EN, int_enable);
 
-   spin_lock_irqsave(&cpts->lock, flags);
timecounter_init(&cpts->tc, &cpts->cc, ktime_to_ns(ktime_get_real()));
-   spin_unlock_irqrestore(&cpts->lock, flags);
 
INIT_DELAYED_WORK(&cpts->overflow_work, cpts_overflow_check);
-   schedule_delayed_work(&cpts->overflow_work, CPTS_OVERFLOW_PERIOD);
 
+   cpts->clock = ptp_clock_register(&cpts->info, dev);
+   if (IS_ERR(cpts->clock)) {
+   err = PTR_ERR(cpts->clock);
+   cpts->clock = NULL;
+   goto err_ptp;
+   }
cpts->phc_index = ptp_clock_index(cpts->clock);
+
+   schedule_delayed_work(&cpts->overflow_work, CPTS_OVERFLOW_PERIOD);
+
return 0;
+
+err_ptp:
+   if (cpts->refclk)
+   cpts_clk_release(cpts);
+   return err;
 }
 EXPORT_SYMBOL_GPL(cpts_register);
 
-- 
2.10.1



[PATCH v5 10/13] net: ethernet: ti: cpts: move dt props parsing to cpts driver

2016-12-06 Thread Grygorii Strashko
Move DT properties parsing into CPTS driver to simplify CPSW
code and CPTS driver porting on other SoC in the future
(like Keystone 2) - with this change it will not be required
to add the same DT parsing code in Keystone 2 NETCP driver.

Signed-off-by: Grygorii Strashko 
---
 drivers/net/ethernet/ti/cpsw.c | 16 +---
 drivers/net/ethernet/ti/cpsw.h |  2 --
 drivers/net/ethernet/ti/cpts.c | 34 ++
 drivers/net/ethernet/ti/cpts.h |  5 +++--
 4 files changed, 34 insertions(+), 23 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index a9a8354..b62d958 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -2524,18 +2524,6 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data,
}
data->active_slave = prop;
 
-   if (of_property_read_u32(node, "cpts_clock_mult", &prop)) {
-   dev_err(&pdev->dev, "Missing cpts_clock_mult property in the 
DT.\n");
-   return -EINVAL;
-   }
-   data->cpts_clock_mult = prop;
-
-   if (of_property_read_u32(node, "cpts_clock_shift", &prop)) {
-   dev_err(&pdev->dev, "Missing cpts_clock_shift property in the 
DT.\n");
-   return -EINVAL;
-   }
-   data->cpts_clock_shift = prop;
-
data->slave_data = devm_kzalloc(&pdev->dev, data->slaves
* sizeof(struct cpsw_slave_data),
GFP_KERNEL);
@@ -2990,9 +2978,7 @@ static int cpsw_probe(struct platform_device *pdev)
goto clean_dma_ret;
}
 
-   cpsw->cpts = cpts_create(cpsw->dev, cpts_regs,
-cpsw->data.cpts_clock_mult,
-cpsw->data.cpts_clock_shift);
+   cpsw->cpts = cpts_create(cpsw->dev, cpts_regs, cpsw->dev->of_node);
if (IS_ERR(cpsw->cpts)) {
ret = PTR_ERR(cpsw->cpts);
goto clean_ale_ret;
diff --git a/drivers/net/ethernet/ti/cpsw.h b/drivers/net/ethernet/ti/cpsw.h
index 16b54c6..6c3037a 100644
--- a/drivers/net/ethernet/ti/cpsw.h
+++ b/drivers/net/ethernet/ti/cpsw.h
@@ -31,8 +31,6 @@ struct cpsw_platform_data {
u32 channels;   /* number of cpdma channels (symmetric) */
u32 slaves; /* number of slave cpgmac ports */
u32 active_slave; /* time stamping, ethtool and SIOCGMIIPHY slave */
-   u32 cpts_clock_mult;  /* convert input clock ticks to nanoseconds */
-   u32 cpts_clock_shift; /* convert input clock ticks to nanoseconds */
u32 ale_entries;/* ale table size */
u32 bd_ram_size;  /*buffer descriptor ram size */
u32 mac_control;/* Mac control register */
diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index 31cd83f..cb844ed 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -405,10 +405,31 @@ void cpts_unregister(struct cpts *cpts)
 }
 EXPORT_SYMBOL_GPL(cpts_unregister);
 
+static int cpts_of_parse(struct cpts *cpts, struct device_node *node)
+{
+   int ret = -EINVAL;
+   u32 prop;
+
+   if (of_property_read_u32(node, "cpts_clock_mult", &prop))
+   goto  of_error;
+   cpts->cc.mult = prop;
+
+   if (of_property_read_u32(node, "cpts_clock_shift", &prop))
+   goto  of_error;
+   cpts->cc.shift = prop;
+
+   return 0;
+
+of_error:
+   dev_err(cpts->dev, "CPTS: Missing property in the DT.\n");
+   return ret;
+}
+
 struct cpts *cpts_create(struct device *dev, void __iomem *regs,
-u32 mult, u32 shift)
+struct device_node *node)
 {
struct cpts *cpts;
+   int ret;
 
cpts = devm_kzalloc(dev, sizeof(*cpts), GFP_KERNEL);
if (!cpts)
@@ -419,6 +440,10 @@ struct cpts *cpts_create(struct device *dev, void __iomem 
*regs,
spin_lock_init(&cpts->lock);
INIT_DELAYED_WORK(&cpts->overflow_work, cpts_overflow_check);
 
+   ret = cpts_of_parse(cpts, node);
+   if (ret)
+   return ERR_PTR(ret);
+
cpts->refclk = devm_clk_get(dev, "cpts");
if (IS_ERR(cpts->refclk)) {
dev_err(dev, "Failed to get cpts refclk\n");
@@ -429,9 +454,10 @@ struct cpts *cpts_create(struct device *dev, void __iomem 
*regs,
 
cpts->cc.read = cpts_systim_read;
cpts->cc.mask = CLOCKSOURCE_MASK(32);
-   cpts->cc.shift = shift;
-   cpts->cc_mult = mult;
-   cpts->cc.mult = mult;
+   /* save cc.mult original value as it can be modified
+* by cpts_ptp_adjfreq().
+*/
+   cpts->cc_mult = cpts->cc.mult;
cpts->info = cpts_info;
 

[PATCH v5 06/13] net: ethernet: ti: cpts: disable cpts when unregistered

2016-12-06 Thread Grygorii Strashko
The cpts now is left enabled after unregistration.
Hence, disable it in cpts_unregister().

Signed-off-by: Grygorii Strashko 
Acked-by: Richard Cochran 
---
 drivers/net/ethernet/ti/cpts.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index 3dda6d5..d3c1ac5 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -404,6 +404,10 @@ void cpts_unregister(struct cpts *cpts)
ptp_clock_unregister(cpts->clock);
cancel_delayed_work_sync(&cpts->overflow_work);
}
+
+   cpts_write32(cpts, 0, int_enable);
+   cpts_write32(cpts, 0, control);
+
if (cpts->refclk)
cpts_clk_release(cpts);
 }
-- 
2.10.1



[PATCH v5 03/13] net: ethernet: ti: cpsw: minimize direct access to struct cpts

2016-12-06 Thread Grygorii Strashko
This will provide more flexibility in changing CPTS internals and also
required for further changes.

Signed-off-by: Grygorii Strashko 
---
 drivers/net/ethernet/ti/cpsw.c | 28 +++-
 drivers/net/ethernet/ti/cpts.h | 39 +++
 2 files changed, 54 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 8fdb274..7599895 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -1562,7 +1562,7 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff 
*skb,
}
 
if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP &&
-   cpsw->cpts->tx_enable)
+   cpts_is_tx_enabled(cpsw->cpts))
skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
 
skb_tx_timestamp(skb);
@@ -1601,7 +1601,8 @@ static void cpsw_hwtstamp_v1(struct cpsw_common *cpsw)
struct cpsw_slave *slave = &cpsw->slaves[cpsw->data.active_slave];
u32 ts_en, seq_id;
 
-   if (!cpsw->cpts->tx_enable && !cpsw->cpts->rx_enable) {
+   if (!cpts_is_tx_enabled(cpsw->cpts) &&
+   !cpts_is_rx_enabled(cpsw->cpts)) {
slave_write(slave, 0, CPSW1_TS_CTL);
return;
}
@@ -1609,10 +1610,10 @@ static void cpsw_hwtstamp_v1(struct cpsw_common *cpsw)
seq_id = (30 << CPSW_V1_SEQ_ID_OFS_SHIFT) | ETH_P_1588;
ts_en = EVENT_MSG_BITS << CPSW_V1_MSG_TYPE_OFS;
 
-   if (cpsw->cpts->tx_enable)
+   if (cpts_is_tx_enabled(cpsw->cpts))
ts_en |= CPSW_V1_TS_TX_EN;
 
-   if (cpsw->cpts->rx_enable)
+   if (cpts_is_rx_enabled(cpsw->cpts))
ts_en |= CPSW_V1_TS_RX_EN;
 
slave_write(slave, ts_en, CPSW1_TS_CTL);
@@ -1635,20 +1636,20 @@ static void cpsw_hwtstamp_v2(struct cpsw_priv *priv)
case CPSW_VERSION_2:
ctrl &= ~CTRL_V2_ALL_TS_MASK;
 
-   if (cpsw->cpts->tx_enable)
+   if (cpts_is_tx_enabled(cpsw->cpts))
ctrl |= CTRL_V2_TX_TS_BITS;
 
-   if (cpsw->cpts->rx_enable)
+   if (cpts_is_rx_enabled(cpsw->cpts))
ctrl |= CTRL_V2_RX_TS_BITS;
break;
case CPSW_VERSION_3:
default:
ctrl &= ~CTRL_V3_ALL_TS_MASK;
 
-   if (cpsw->cpts->tx_enable)
+   if (cpts_is_tx_enabled(cpsw->cpts))
ctrl |= CTRL_V3_TX_TS_BITS;
 
-   if (cpsw->cpts->rx_enable)
+   if (cpts_is_rx_enabled(cpsw->cpts))
ctrl |= CTRL_V3_RX_TS_BITS;
break;
}
@@ -1684,7 +1685,7 @@ static int cpsw_hwtstamp_set(struct net_device *dev, 
struct ifreq *ifr)
 
switch (cfg.rx_filter) {
case HWTSTAMP_FILTER_NONE:
-   cpts->rx_enable = 0;
+   cpts_rx_enable(cpts, 0);
break;
case HWTSTAMP_FILTER_ALL:
case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
@@ -1700,14 +1701,14 @@ static int cpsw_hwtstamp_set(struct net_device *dev, 
struct ifreq *ifr)
case HWTSTAMP_FILTER_PTP_V2_EVENT:
case HWTSTAMP_FILTER_PTP_V2_SYNC:
case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
-   cpts->rx_enable = 1;
+   cpts_rx_enable(cpts, 1);
cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
break;
default:
return -ERANGE;
}
 
-   cpts->tx_enable = cfg.tx_type == HWTSTAMP_TX_ON;
+   cpts_tx_enable(cpts, cfg.tx_type == HWTSTAMP_TX_ON);
 
switch (cpsw->version) {
case CPSW_VERSION_1:
@@ -1736,8 +1737,9 @@ static int cpsw_hwtstamp_get(struct net_device *dev, 
struct ifreq *ifr)
return -EOPNOTSUPP;
 
cfg.flags = 0;
-   cfg.tx_type = cpts->tx_enable ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF;
-   cfg.rx_filter = (cpts->rx_enable ?
+   cfg.tx_type = cpts_is_tx_enabled(cpts) ?
+ HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF;
+   cfg.rx_filter = (cpts_is_rx_enabled(cpts) ?
 HWTSTAMP_FILTER_PTP_V2_EVENT : HWTSTAMP_FILTER_NONE);
 
return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0;
diff --git a/drivers/net/ethernet/ti/cpts.h b/drivers/net/ethernet/ti/cpts.h
index 416ba2c..29a1e80c 100644
--- a/drivers/net/ethernet/ti/cpts.h
+++ b/drivers/net/ethernet/ti/cpts.h
@@ -132,6 +132,27 @@ void cpts_rx_timestamp(struct cpts *cpts, struct sk_buff 
*skb);
 void cpts_tx_timestamp(struct cpts *cpts, struct sk_buff *skb);
 int cpts_register(struct device *dev, struct cpts *cpts, u32 mult, u32 shift);
 void cpts_unregister(struct cpts *cpts);
+
+static inline void cpts_rx_enable(struct cpts *cpts, int enable)
+{
+   cpts->rx_enable = enab

[PATCH v5 00/13] net: ethernet: ti: cpts: update and fixes

2016-12-06 Thread Grygorii Strashko
It is preparation series intended to clean up and optimize TI CPTS driver to
facilitate further integration with other TI's SoCs like Keystone 2.

Changes in v5:
- fixed copy paste error in cpts_release
- reworked cc.mult/shift and cc_mult initialization 

Changes in v4:
- fixed build error in patch
  "net: ethernet: ti: cpts: clean up event list if event pool is empty"
- rebased on top of net-next
 
Changes in v3:
- patches reordered: fixes and small updates moved first
- added comments in code about cpts->cc_mult
- conversation range (maxsec) limited to 10sec

Changes in v2:
- patch "net: ethernet: ti: cpts: rework initialization/deinitialization"
  was split on 4 patches
- applied comments from Richard Cochran
- dropped patch
  "net: ethernet: ti: cpts: add return value to tx and rx timestamp funcitons"
- new patches added:
  "net: ethernet: ti: cpts: drop excessive writes to CTRL and INT_EN regs"
  and "clocksource: export the clocks_calc_mult_shift to use by timestamp code"

Links on prev versions:
v4: https://lkml.org/lkml/2016/12/6/496
v3: https://www.spinics.net/lists/devicetree/msg153474.html
v2: http://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1282034.html
v1: http://www.spinics.net/lists/linux-omap/msg131925.html

Grygorii Strashko (11):
  net: ethernet: ti: cpts: switch to readl/writel_relaxed()
  net: ethernet: ti: allow cpts to be built separately
  net: ethernet: ti: cpsw: minimize direct access to struct cpts
  net: ethernet: ti: cpts: fix unbalanced clk api usage in 
cpts_register/unregister
  net: ethernet: ti: cpts: fix registration order
  net: ethernet: ti: cpts: disable cpts when unregistered
  net: ethernet: ti: cpts: drop excessive writes to CTRL and INT_EN regs
  net: ethernet: ti: cpts: rework initialization/deinitialization
  net: ethernet: ti: cpts: move dt props parsing to cpts driver
  net: ethernet: ti: cpts: calc mult and shift from refclk freq
  net: ethernet: ti: cpts: fix overflow check period

Murali Karicheri (1):
  clocksource: export the clocks_calc_mult_shift to use by timestamp code

WingMan Kwok (1):
  net: ethernet: ti: cpts: clean up event list if event pool is empty

 Documentation/devicetree/bindings/net/cpsw.txt |   8 +-
 drivers/net/ethernet/ti/Kconfig|   2 +-
 drivers/net/ethernet/ti/Makefile   |   3 +-
 drivers/net/ethernet/ti/cpsw.c |  84 -
 drivers/net/ethernet/ti/cpsw.h |   2 -
 drivers/net/ethernet/ti/cpts.c | 233 ++---
 drivers/net/ethernet/ti/cpts.h |  80 -
 kernel/time/clocksource.c  |   1 +
 8 files changed, 297 insertions(+), 116 deletions(-)

-- 
2.10.1



Re: [PATCH 5/7] Documentation: DT: net: cpsw: allow to specify descriptors pool size

2016-12-07 Thread Grygorii Strashko


On 12/02/2016 11:21 AM, Grygorii Strashko wrote:
> 
> 
> On 12/02/2016 05:28 AM, Ivan Khoronzhuk wrote:
>> On Thu, Dec 01, 2016 at 05:34:30PM -0600, Grygorii Strashko wrote:
>>> Add optional property "descs_pool_size" to specify buffer descriptor's
>>> pool size. The "descs_pool_size" should define total number of CPDMA
>>> CPPI descriptors to be used for both ingress/egress packets
>>> processing. If not specified - the default value 256 will be used
>>> which will allow to place descriptor's pool into the internal CPPI
>>> RAM on most of TI SoC.
>>>
>>> Signed-off-by: Grygorii Strashko 
>>> ---
>>>  Documentation/devicetree/bindings/net/cpsw.txt | 5 +
>>>  1 file changed, 5 insertions(+)
>>>
>>> diff --git a/Documentation/devicetree/bindings/net/cpsw.txt 
>>> b/Documentation/devicetree/bindings/net/cpsw.txt
>>> index 5ad439f..b99d196 100644
>>> --- a/Documentation/devicetree/bindings/net/cpsw.txt
>>> +++ b/Documentation/devicetree/bindings/net/cpsw.txt
>>> @@ -35,6 +35,11 @@ Optional properties:
>>>   For example in dra72x-evm, pcf gpio has to be
>>>   driven low so that cpsw slave 0 and phy data
>>>   lines are connected via mux.
>>> +- descs_pool_size  : total number of CPDMA CPPI descriptors to be used for
>>> + both ingress/egress packets processing. if not
>>> + specified the default value 256 will be used which
>>> + will allow to place descriptors pool into the
>>> + internal CPPI RAM.
>> Does it describe h/w? Why now module parameter? or even smth like ethtool num
>> ring entries?
>>
> 
> It can be module parameter too. for the use cases i'm aware of -
> this is one-time boot setting only.  
> 
> - OR
> So, do you propose to use 
>ethtool -g ethX
> 
>ethtool -G ethX [rx N] [tx N]
> ?
> 
> Now cpdma has one pool for all RX/TX channels, so changing this settings
> by ethtool will require: pause interfaces, reallocate cpdma pool, 
> re-arrange buffers between channels, resume interface. Correct?
> 
> How do you think - we can move forward with one pool or better to have two 
> (Rx and Tx)?
> 
> Wouldn't it be reasonable to still have DT (or module) parameter to avoid 
> cpdma reconfiguration on system startup (pause/resume interfaces) (faster 
> boot)?
> 
> How about cpdma re-allocation policy (with expectation that is shouldn't 
> happen too often)?
> - increasing of Rx, Tx will grow total number of physically allocated buffers 
> (total_desc_num)
> - decreasing of Rx, Tx will just change number of available buffers (no 
> memory re-allocation)
> 
> - OR 
> Can we move forward with current patch (total number of CPDMA CPPI 
> descriptors defined in DT) 
> and add ethtool -G ethX [rx N] [tx N] which will allow to re-split descs 
> between RX and TX?
> 
> 

if no comments here, I'll rework patches to use module parameter for 
descs_pool_size and
will add possibility to re-split RX/TX buffers using  ethtool -G ethX [rx N] 
[tx N]

-- 
regards,
-grygorii


Re: [PATCH 3/6] net: ethernet: ti: cpts: add support of cpts HW_TS_PUSH

2016-12-08 Thread Grygorii Strashko


On 12/03/2016 05:21 PM, Richard Cochran wrote:
> On Mon, Nov 28, 2016 at 05:04:25PM -0600, Grygorii Strashko wrote:
>> This also change overflow polling period when HW_TS_PUSH feature is
>> enabled - overflow check work will be scheduled more often (every
>> 200ms) for proper HW_TS_PUSH events reporting.
> 
> For proper reporting, you should make use of the interrupt.  The small
> fifo (16 iirc) could very well overflow in 200 ms.  The interrupt
> handler should read out the entire fifo at each interrupt.
> 

huh. Seems this is not really good idea, because MISC Irq will be 
triggered for *any* CPTS event and there is no way to enable it just for
HW_TS_PUSH. So, this doesn't work will with current code for RX/TX timestamping
(which uses polling mode). + runtime overhead in net RX/TX caused by 
triggering more interrupts. 

May be, overflow check/polling timeout can be made configurable (module 
parameter). 

-- 
regards,
-grygorii


[PATCH v2] net: ethernet: ti: netcp: add support of cpts

2016-12-08 Thread Grygorii Strashko
From: WingMan Kwok 

This patch adds support of the cpts device found in the
gbe and 10gbe ethernet switches on the keystone 2 SoCs
(66AK2E/L/Hx, 66AK2Gx).

Cc: Richard Cochran 
Signed-off-by: WingMan Kwok 
Signed-off-by: Grygorii Strashko 
---
changes in v2:
 - dropped bindings changes

link on v1:
 https://lkml.org/lkml/2016/11/28/781

 drivers/net/ethernet/ti/Kconfig   |   7 +-
 drivers/net/ethernet/ti/netcp.h   |   2 +-
 drivers/net/ethernet/ti/netcp_core.c  |  20 +-
 drivers/net/ethernet/ti/netcp_ethss.c | 437 +-
 4 files changed, 452 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig
index ff7f518..dc217fd 100644
--- a/drivers/net/ethernet/ti/Kconfig
+++ b/drivers/net/ethernet/ti/Kconfig
@@ -75,12 +75,13 @@ config TI_CPSW
 
 config TI_CPTS
tristate "TI Common Platform Time Sync (CPTS) Support"
-   depends on TI_CPSW
+   depends on TI_CPSW || TI_KEYSTONE_NETCP
select PTP_1588_CLOCK
---help---
  This driver supports the Common Platform Time Sync unit of
- the CPSW Ethernet Switch. The unit can time stamp PTP UDP/IPv4
- and Layer 2 packets, and the driver offers a PTP Hardware Clock.
+ the CPSW Ethernet Switch and Keystone 2 1g/10g Switch Subsystem.
+ The unit can time stamp PTP UDP/IPv4 and Layer 2 packets, and the
+ driver offers a PTP Hardware Clock.
 
 config TI_KEYSTONE_NETCP
tristate "TI Keystone NETCP Core Support"
diff --git a/drivers/net/ethernet/ti/netcp.h b/drivers/net/ethernet/ti/netcp.h
index 17a26a4..0f58c58 100644
--- a/drivers/net/ethernet/ti/netcp.h
+++ b/drivers/net/ethernet/ti/netcp.h
@@ -121,7 +121,7 @@ struct netcp_packet {
boolrxtstamp_complete;
void*ts_context;
 
-   int (*txtstamp_complete)(void *ctx, struct netcp_packet *pkt);
+   void (*txtstamp)(void *ctx, struct sk_buff *skb);
 };
 
 static inline u32 *netcp_push_psdata(struct netcp_packet *p_info,
diff --git a/drivers/net/ethernet/ti/netcp_core.c 
b/drivers/net/ethernet/ti/netcp_core.c
index 7981b99..c243335 100644
--- a/drivers/net/ethernet/ti/netcp_core.c
+++ b/drivers/net/ethernet/ti/netcp_core.c
@@ -100,6 +100,11 @@ struct netcp_intf_modpriv {
void*module_priv;
 };
 
+struct netcp_tx_cb {
+   void*ts_context;
+   void(*txtstamp)(void *context, struct sk_buff *skb);
+};
+
 static LIST_HEAD(netcp_devices);
 static LIST_HEAD(netcp_modules);
 static DEFINE_MUTEX(netcp_modules_lock);
@@ -544,6 +549,7 @@ int netcp_register_rxhook(struct netcp_intf *netcp_priv, 
int order,
 
return 0;
 }
+EXPORT_SYMBOL_GPL(netcp_register_rxhook);
 
 int netcp_unregister_rxhook(struct netcp_intf *netcp_priv, int order,
netcp_hook_rtn *hook_rtn, void *hook_data)
@@ -566,6 +572,7 @@ int netcp_unregister_rxhook(struct netcp_intf *netcp_priv, 
int order,
 
return -ENOENT;
 }
+EXPORT_SYMBOL_GPL(netcp_unregister_rxhook);
 
 static void netcp_frag_free(bool is_frag, void *ptr)
 {
@@ -730,6 +737,7 @@ static int netcp_process_one_rx_packet(struct netcp_intf 
*netcp)
 
/* Call each of the RX hooks */
p_info.skb = skb;
+   skb->dev = netcp->ndev;
p_info.rxtstamp_complete = false;
list_for_each_entry(rx_hook, &netcp->rxhook_list_head, list) {
int ret;
@@ -987,6 +995,7 @@ static int netcp_process_tx_compl_packets(struct netcp_intf 
*netcp,
  unsigned int budget)
 {
struct knav_dma_desc *desc;
+   struct netcp_tx_cb *tx_cb;
struct sk_buff *skb;
unsigned int dma_sz;
dma_addr_t dma;
@@ -1014,6 +1023,10 @@ static int netcp_process_tx_compl_packets(struct 
netcp_intf *netcp,
continue;
}
 
+   tx_cb = (struct netcp_tx_cb *)skb->cb;
+   if (tx_cb->txtstamp)
+   tx_cb->txtstamp(tx_cb->ts_context, skb);
+
if (netif_subqueue_stopped(netcp->ndev, skb) &&
netif_running(netcp->ndev) &&
(knav_pool_count(netcp->tx_pool) >
@@ -1154,6 +1167,7 @@ static int netcp_tx_submit_skb(struct netcp_intf *netcp,
struct netcp_tx_pipe *tx_pipe = NULL;
struct netcp_hook_list *tx_hook;
struct netcp_packet p_info;
+   struct netcp_tx_cb *tx_cb;
unsigned int dma_sz;
dma_addr_t dma;
u32 tmp = 0;
@@ -1164,7 +1178,7 @@ static int netcp_tx_submit_skb(struct netcp_intf *netcp,
p_info.tx_pipe = NULL;
p_info.psdata_len = 0;
p_info.ts_context = NULL;
-   p_info.txtstamp_complete = NULL;
+   p_info.txtstamp = NULL;
p_info.epib = desc->epib;
p_info.psdata = (u32 __force *)desc->psdata;
memset(p_info.epib, 

Re: [PATCH 2/6] net: ethernet: ti: cpts: add support for ext rftclk selection

2016-12-09 Thread Grygorii Strashko


On 12/08/2016 06:47 PM, Stephen Boyd wrote:
> On 12/06, Grygorii Strashko wrote:
>> Subject: [PATCH] cpts refclk sel
>>
>> Signed-off-by: Grygorii Strashko 
>> ---
>>  arch/arm/boot/dts/keystone-k2e-netcp.dtsi | 10 +-
>>  drivers/net/ethernet/ti/cpts.c| 52 
>> ++-
>>  2 files changed, 60 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/arm/boot/dts/keystone-k2e-netcp.dtsi 
>> b/arch/arm/boot/dts/keystone-k2e-netcp.dtsi
>> index 919e655..b27aa22 100644
>> --- a/arch/arm/boot/dts/keystone-k2e-netcp.dtsi
>> +++ b/arch/arm/boot/dts/keystone-k2e-netcp.dtsi
>> @@ -138,7 +138,7 @@ netcp: netcp@2400 {
>>  /* NetCP address range */
>>  ranges = <0 0x2400 0x100>;
>>
>> -clocks = <&clkpa>, <&clkcpgmac>, <&chipclk12>;
>> +clocks = <&clkpa>, <&clkcpgmac>, <&cpts_mux>;

^^ mux clock used here

>>  clock-names = "pa_clk", "ethss_clk", "cpts";
>>  dma-coherent;
>>
>> @@ -162,6 +162,14 @@ netcp: netcp@2400 {
>>  cpts-ext-ts-inputs = <6>;
>>  cpts-ts-comp-length;
>>
>> +cpts_mux: cpts_refclk_mux {
>> +#clock-cells = <0>;
>> +clocks = <&chipclk12>, <&chipclk13>;
>> +cpts-mux-tbl = <0>, <1>;
>> +assigned-clocks = <&cpts_mux>;
>> +assigned-clock-parents = <&chipclk12>;
> 
> Is there a binding update?
 
this was pure RFC-DEV patch just to check the possibility of modeling 
CPTS_RFTCLK_SEL register as mux clock. 
Original patch:
https://lkml.org/lkml/2016/11/28/780

I've plan to resend it using clk framework.

 Why the subnode? 

Sry, I did not get this question - is there another way to pas phandle on clock
in clocks list property? Am I missing smth.?

Sry, this is my first clock :)

> Why not have it as part of the netcp node?

cpts is part of gbe ethss, which is part of netcp.

Only netcp is modeled as DD - cpts and gbe ethss implemented without using DD 
model,
so generic resources acquired by netcp and then passed to cpts and gbe ethss.

CPTS has register to control an external multiplexer that selects
one of up to 32 clocks for time sync reference (RFTCLK)

> Does the cpts-mux-tbl property change?

On Keystone 2 66AK2e (as example) the following list of clocks can be selected 
as ref clocks (list is different for other SoCs):
 = SYSCLK2
0001 = SYSCLK3
0010 = TIMI0
0011 = TIMI1
0100 = TSIPCLKA
1000 = TSREFCLK
1100 = TSIPCLKB
Others = Reserved

and only 0 and 1 are internal, other external and board specific
(parameters unknown and corresponding inputs can be used for other purposes),
so I can't define all parent clocks, only internal:

clocks = <&chipclk12>, <&chipclk13>;
cpts-mux-tbl = <0>, <1>;

to use another, external, clock - it should be explicitly defined in board file 
the board file 

timi1clk: timi1clk {
#clock-cells = <0>;
compatible = "fixed-clock";
...

&cpts_mux {
clocks = <&chipclk12>, <&chipclk13>, ;
^^^ i can't predict value here
cpts-mux-tbl = <0>, <1>, <3>;
^^i can't predict value here
assigned-clocks = <&cpts_mux>;
assigned-clock-parents = <&timi1clk>;
};

or I understood your question wrongly?

> 
>> +};
>> +
>>  interfaces {
>>  gbe0: interface-0 {
>>  slave-port = <0>;
>> diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
>> index 938de22..ef94316 100644
>> --- a/drivers/net/ethernet/ti/cpts.c
>> +++ b/drivers/net/ethernet/ti/cpts.c
>> @@ -17,6 +17,7 @@
>>   * along with this program; if not, write to the Free Software
>>   * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
>>   */
>> +#include 
>>  #include 
>>  #include 
>>  #include 
>> @@ -672,6 +673,7 @@ int cpts_register(struct cpts *cpts)
>>  cpts->phc_index = ptp_clock_index(cpts->clock);
>>
>>  schedule_delayed_work(&cpts->overflow_work, cpts->ov_check_period);
>> +
> 
> Maybe in another patch.
> 

sure

>>  return 0;
>>
>>  err_ptp:
>> @@ -741,6 +743,54 @@ static void cpts_calc_mult_shift(struct cpts *cpts)
>>   freq, cpts->cc_mult, cpts->cc.shift, (ns - NSEC_PER_SEC));
>>  }
>>

...

>> +
>> +reg = &cpts->reg->rftclk_sel;
>> +
>> +clk = clk_register_mux_table(cpts->dev, refclk_np->name,
>> + parent_names, num_parents,
>> + 0, reg, 0, 0x1F, 0, mux_table, NULL);
>> +if (IS_ERR(clk))
>> +return PTR_ERR(clk);
>> +
>> +return of_clk_add_provider(refclk_np, of_clk_src_simple_get, clk);
> 
> Can you please use the clk_hw APIs instead?
> 

ok

-- 
regards,
-grygorii


Re: [4.4-RT PATCH RFC/RFT] drivers: net: cpsw: mark rx/tx irq as IRQF_NO_THREAD

2016-09-08 Thread Grygorii Strashko
On 09/08/2016 05:28 PM, Sebastian Andrzej Siewior wrote:
> On 2016-08-12 18:58:21 [+0300], Grygorii Strashko wrote:
>> Hi Sebastian,
> Hi Grygorii,
> 
>> Thankds for comment. You're right:
>> irq_thread()->irq_forced_thread_fn()->local_bh_enable()
>>
>> but wouldn't here two wake_up_process() calls any way,
>> plus preempt_check_resched_rt() in napi_schedule().
> 
> Usually you prefer BH handling in the IRQ-thread because it runs at
> higher priority and is not interrupted by a SCHED_OTHER process. And you
> can assign it a higher priority if it should be preferred over an other
> interrupt. However, if the processing of the interrupt is taking too
> much time (like that ping flood, a lot of network traffic) then we push
> it to the softirq thread. If you do this now unconditionally in the
> SCHED_OTHER softirq thread then you take away all the `good' things we
> had (like processing important packets at higher priority as long as
> nobody floods us). Plus you share this thread with everything else that
> runs in there.

That's i understand, but effect from this patch on network throughput
is pretty amazing :)

> 
>>>> And, as result, get benefits from the following improvements (tested
>>>> on am57xx-evm):
>>>>
>>>> 1) "[ 78.348599] NOHZ: local_softirq_pending 80" message will not be
>>>>seen any more. Now these warnings can be seen once iperf is started.
>>>># iperf -c $IPERFHOST -w 128K  -d -t 60
>>>
>>> Do you also see "sched: RT throttling activated"? Because I don't see
>>> otherwise why this should pop up.
>>
>> I've reverted my patch an did requested experiments (some additional info 
>> below).
>>
>> I do not see "sched: RT throttling activated" :(
> 
> That is okay. However if aim for throughput you might want to switch
> away from NO_HZ (and deactivate the software watchdog wich runs at
> prio 99 if enabled).
> 
>> root@am57xx-evm:~# ./net_perf.sh & cyclictest -m -Sp98 -q  -D4m
>> [1] 1301
>> # /dev/cpu_dma_latency set to 0us
>> Linux am57xx-evm 4.4.16-rt23-00321-ga195e6a-dirty #92 SMP PREEMPT RT Fri Aug 
>> 12 14:03:59 EEST 2016 armv7l GNU/Linux
>>
> …
>> [1]+  Done./net_perf.sh
> 
> I can't parse this. But that local_softirq_pending() warning might
> contribute to lower numbers.
> 
>> === before, no net load:
>> cyclictest -m -Sp98 -q  -D4m -i250 -d0
>> # /dev/cpu_dma_latency set to 0us
>> T: 0 ( 1288) P:98 I:250 C: 96 Min:  8 Act:9 Avg:8 Max:  
>> 33
>> T: 1 ( 1289) P:98 I:250 C: 959929 Min:  7 Act:   11 Avg:9 Max:  
>> 26
> 
>> === after, no net load:
>> cyclictest -m -Sp98 -q  -D4m -i250 -d0
>> T: 0 ( 1301) P:98 I:250 C: 96 Min:  7 Act:9 Avg:8 Max:  
>> 22
>> T: 1 ( 1302) P:98 I:250 C: 959914 Min:  7 Act:   11 Avg:8 Max:  
>> 28
> 
> I think those two should be equal more or less since the change should
> have no impact on "no net load" or do I miss something?

Correct I see no differences in this case, as per above.

> 
>> === before, with net load:
>> cyclictest -m -Sp98 -q -D4m -i250 -d0
>> T: 0 ( 1400) P:98 I:250 C: 96 Min:  8 Act:   25 Avg:   18 Max:  
>> 83
>> T: 1 ( 1401) P:98 I:250 C: 959801 Min:  7 Act:   27 Avg:   17 Max:  
>> 48
>>
>>
>> === after, with net load:
>> cyclictest -m -Sp98 -q  -D4m -i250 -d0
>> T: 0 ( 1358) P:98 I:250 C: 96 Min:  8 Act:   11 Avg:   14 Max:  
>> 42
>> T: 1 ( 1359) P:98 I:250 C: 959743 Min:  7 Act:   18 Avg:   15 Max:  
>> 36
> 
> So the max value dropped by ~50% with your patch. Interesting. What I
> remember from testing is that once you had, say, one hour of hackbench
> running then after that, the extra network traffic didn't contribute
> much (if at all) to the max value.
> That said it is hard to believe that one extra context switch
> contributes about 40us to the max value on CPU0.

Yup. but short time testing provides very stable results.
This patch is going to be tested more intensively shortly.

> 
>>> What happens if s/__raise_softirq_irqoff_ksoft/__raise_softirq_irqoff/
>>> in net/core/dev.c and chrt the priority of you network interrupt
>>> handlers to SCHED_OTHER priority?
>>
>> = without this patch + __raise_softirq_irqoff + ne

Re: [4.4-RT PATCH RFC/RFT] drivers: net: cpsw: mark rx/tx irq as IRQF_NO_THREAD

2016-09-09 Thread Grygorii Strashko
On 09/08/2016 07:00 PM, Sebastian Andrzej Siewior wrote:
> On 2016-08-19 17:29:16 [+0300], Grygorii Strashko wrote:
>> I've collected trace before first occurrence of  "NOHZ: 
>> local_softirq_pending 80"
>>

> 
>>  irq/354-4848400-85[000]90.642393: softirq_exit: vec=3 
>> [action=NET_RX]
>>  irq/354-4848400-85[000]90.642419: sched_switch: 
>> irq/354-4848400:85 [49] S ==> rcuc/0:11 [98]
> 
> We don't serve TIMER & SCHED because those two are pushed to the
> ksoftirq thread(s). So we keep mostly doing NET_RX and now we switch to
> the next best thing which is RCU.
> 
>>   rcuc/0-11[000]90.642430: irq_handler_entry:irq=354 
>> name=48484000.ethernet
> but not for long.
> 
>>   rcuc/0-11[000]90.642432: irq_handler_exit: irq=354 
>> ret=handled
>>   rcuc/0-11[000]90.642435: sched_waking: 
>> comm=irq/354-4848400 pid=85 prio=49 target_cpu=000
>>   rcuc/0-11[000]90.642442: sched_migrate_task:   
>> comm=irq/354-4848400 pid=85 prio=49 orig_cpu=0 dest_cpu=1
>>   rcuc/0-11[000]90.642453: sched_wakeup: 
>> irq/354-4848400:85 [49] success=1 CPU:001
>>iperf-1284  [001]90.642462: sched_stat_runtime:   comm=iperf 
>> pid=1284 runtime=113053 [ns] vruntime=2106997666 [ns]
>>   rcuc/0-11[000]90.642464: irq_handler_entry:irq=355 
>> name=48484000.ethernet
>>   rcuc/0-11[000]90.642466: irq_handler_exit: irq=355 
>> ret=handled
>>   rcuc/0-11[000]90.642469: sched_waking: 
>> comm=irq/355-4848400 pid=86 prio=49 target_cpu=001
>>iperf-1284  [001]90.642473: sched_switch: iperf:1284 
>> [120] R ==> irq/354-4848400:85 [49]
>>  irq/354-4848400-85[001]90.642481: softirq_raise:vec=3 
>> [action=NET_RX]
>>   rcuc/0-11[000]90.642483: sched_wakeup: 
>> irq/355-4848400:86 [49] success=1 CPU:001
>>  irq/354-4848400-85[001]90.642493: softirq_entry:vec=3 
>> [action=NET_RX]
>>   rcuc/0-11[000]90.642497: sched_migrate_task:   
>> comm=irq/355-4848400 pid=86 prio=49 orig_cpu=1 dest_cpu=0
> ach that IRQ thread no pinned. Good. We migrate.
> 

It looks like scheduler playing ping-pong between CPUs with threaded irqs 
irq/354-355.
And seems this might be the case - if I pin both threaded IRQ handlers to CPU0
I can see better latency and netperf improvement
cyclictest -m -Sp98 -q  -D4m
T: 0 ( 1318) P:98 I:1000 C: 24 Min:  9 Act:   14 Avg:   15 Max:  42
T: 1 ( 1319) P:98 I:1500 C: 159909 Min:  9 Act:   14 Avg:   16 Max:  39

if I arrange hwirqs  and pin pin both threaded IRQ handlers on CPU1 
I can observe more less similar results as with this patch. 


>>   rcuc/0-11[000]90.642515: irq_handler_entry:irq=354 
>> name=48484000.ethernet
>>   rcuc/0-11[000]90.642516: irq_handler_exit: irq=354 
>> ret=handled

> 
> As you see ksoftirqd left the CPU with a D so I would assume it is
> blocked on a lock and waits. 
> NET_RX is in progress but scheduled out due to RCUC which is also
> scheduled out.
> 
> I assume we got to softirq because nothing else can run. It will see
> that NET_RX is pending and tries it but blocks on the lock
> (lock_softirq()). It schedules out. Nothing left -> idle.
> 
> The idle code checks to see if a softirq is pending and in fact there is
> SCHED on the list and ksoftirq was about to handle it but due to
> ordering complication (NET_RX before SCHED) it can't. And we have the
> warning.
> 
> This 
> 
> --- a/kernel/softirq.c
> +++ b/kernel/softirq.c
> @@ -105,6 +105,7 @@ void softirq_check_pending_idle(void)
>  {
>   static int rate_limit;
>   struct softirq_runner *sr = this_cpu_ptr(&softirq_runners);
> + struct task_struct *ksoft_tsk = __this_cpu_read(ksoftirqd);
>   u32 warnpending;
>   int i;
>  
> @@ -112,7 +113,7 @@ void softirq_check_pending_idle(void)
>   return;
>  
>   warnpending = local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK;
> - for (i = 0; i < NR_SOFTIRQS; i++) {
> + for (i = 0; (i < NR_SOFTIRQS) && warnpending; i++) {
>   struct task_struct *tsk = sr->runner[i];
>  
>   /*
> @@ -132,6 +133,15 @@ void softirq_check_pending_idle(void)
>   }
>   }
>  
> + if (warnpending && ksoft_tsk) {
> + raw_spin_lock(&ksoft_tsk->pi_lock);
> + if (ksoft_tsk->pi_blocked_on || ksoft

[PATCH 9/9] net: ethernet: ti: cpts: switch to readl/writel_relaxed()

2016-09-14 Thread Grygorii Strashko
Switch to readl/writel_relaxed() APIs, because The CPTS IP is reused
on Keystone 2 SoCs where LE/BE modes are supported.

Signed-off-by: Grygorii Strashko 
---
 drivers/net/ethernet/ti/cpts.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index cbe0974..0226582 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -31,8 +31,8 @@
 
 #include "cpts.h"
 
-#define cpts_read32(c, r)  __raw_readl(&c->reg->r)
-#define cpts_write32(c, v, r)  __raw_writel(v, &c->reg->r)
+#define cpts_read32(c, r)  readl_relaxed(&c->reg->r)
+#define cpts_write32(c, v, r)  writel_relaxed(v, &c->reg->r)
 
 static int event_expired(struct cpts_event *event)
 {
-- 
2.9.3



[PATCH 8/9] net: ethernet: ti: cpts: fix overflow check period

2016-09-14 Thread Grygorii Strashko
The CPTS drivers uses 8sec period for overflow checking with
assumption that CPTS rftclk will not exceed 500MHz. But that's not
true on some TI's platforms (Kesytone 2). As result, it is possible that
CPTS counter will overflow more than once between two readings.

Hence, fix it by selecting overflow check period dynamically as
max_sec_before_overflow/2, where
 max_sec_before_overflow = max_counter_val / rftclk_freq.

Signed-off-by: Grygorii Strashko 
---
 drivers/net/ethernet/ti/cpts.c | 16 +++-
 drivers/net/ethernet/ti/cpts.h |  4 +---
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index 8046a21..cbe0974 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -251,7 +251,7 @@ static void cpts_overflow_check(struct work_struct *work)
cpts_write32(cpts, TS_PEND_EN, int_enable);
cpts_ptp_gettime(&cpts->info, &ts);
pr_debug("cpts overflow check at %lld.%09lu\n", ts.tv_sec, ts.tv_nsec);
-   schedule_delayed_work(&cpts->overflow_work, CPTS_OVERFLOW_PERIOD);
+   schedule_delayed_work(&cpts->overflow_work, cpts->ov_check_period);
 }
 
 static int cpts_match(struct sk_buff *skb, unsigned int ptp_class,
@@ -391,7 +391,7 @@ int cpts_register(struct cpts *cpts)
}
cpts->phc_index = ptp_clock_index(cpts->clock);
 
-   schedule_delayed_work(&cpts->overflow_work, CPTS_OVERFLOW_PERIOD);
+   schedule_delayed_work(&cpts->overflow_work, cpts->ov_check_period);
return 0;
 
 err_ptp:
@@ -427,9 +427,6 @@ static void cpts_calc_mult_shift(struct cpts *cpts)
u64 ns;
u64 frac;
 
-   if (cpts->cc_mult || cpts->cc.shift)
-   return;
-
freq = clk_get_rate(cpts->refclk);
 
/* Calc the maximum number of seconds which we can run before
@@ -442,11 +439,20 @@ static void cpts_calc_mult_shift(struct cpts *cpts)
else if (maxsec > 600 && cpts->cc.mask > UINT_MAX)
maxsec = 600;
 
+   /* Calc overflow check period (maxsec / 2) */
+   cpts->ov_check_period = (HZ * maxsec) / 2;
+   dev_info(cpts->dev, "cpts: overflow check period %lu\n",
+cpts->ov_check_period);
+
+   if (cpts->cc_mult || cpts->cc.shift)
+   return;
+
clocks_calc_mult_shift(&mult, &shift, freq, NSEC_PER_SEC, maxsec);
 
cpts->cc_mult = mult;
cpts->cc.mult = mult;
cpts->cc.shift = shift;
+
/* Check calculations and inform if not precise */
frac = 0;
ns = cyclecounter_cyc2ns(&cpts->cc, freq, cpts->cc.mask, &frac);
diff --git a/drivers/net/ethernet/ti/cpts.h b/drivers/net/ethernet/ti/cpts.h
index 47026ec..e0e4a62b 100644
--- a/drivers/net/ethernet/ti/cpts.h
+++ b/drivers/net/ethernet/ti/cpts.h
@@ -97,9 +97,6 @@ enum {
CPTS_EV_TX,   /* Ethernet Transmit Event */
 };
 
-/* This covers any input clock up to about 500 MHz. */
-#define CPTS_OVERFLOW_PERIOD (HZ * 8)
-
 #define CPTS_FIFO_DEPTH 16
 #define CPTS_MAX_EVENTS 32
 
@@ -127,6 +124,7 @@ struct cpts {
struct list_head events;
struct list_head pool;
struct cpts_event pool_data[CPTS_MAX_EVENTS];
+   unsigned long ov_check_period;
 };
 
 int cpts_rx_timestamp(struct cpts *cpts, struct sk_buff *skb);
-- 
2.9.3



[PATCH 2/9] net: ethernet: ti: cpsw: minimize direct access to struct cpts

2016-09-14 Thread Grygorii Strashko
This will provide more flexibility in changing CPTS internals and also
required for further changes.

Signed-off-by: Grygorii Strashko 
---
 drivers/net/ethernet/ti/cpsw.c | 28 +++-
 drivers/net/ethernet/ti/cpts.h | 41 +
 2 files changed, 56 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index b743bb1d..9b900f0 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -1481,7 +1481,7 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff 
*skb,
}
 
if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP &&
-   cpsw->cpts->tx_enable)
+   cpts_is_tx_enabled(cpsw->cpts))
skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
 
skb_tx_timestamp(skb);
@@ -1519,7 +1519,8 @@ static void cpsw_hwtstamp_v1(struct cpsw_common *cpsw)
struct cpsw_slave *slave = &cpsw->slaves[cpsw->data.active_slave];
u32 ts_en, seq_id;
 
-   if (!cpsw->cpts->tx_enable && !cpsw->cpts->rx_enable) {
+   if (!cpts_is_tx_enabled(cpsw->cpts) &&
+   !cpts_is_rx_enabled(cpsw->cpts)) {
slave_write(slave, 0, CPSW1_TS_CTL);
return;
}
@@ -1527,10 +1528,10 @@ static void cpsw_hwtstamp_v1(struct cpsw_common *cpsw)
seq_id = (30 << CPSW_V1_SEQ_ID_OFS_SHIFT) | ETH_P_1588;
ts_en = EVENT_MSG_BITS << CPSW_V1_MSG_TYPE_OFS;
 
-   if (cpsw->cpts->tx_enable)
+   if (cpts_is_tx_enabled(cpsw->cpts))
ts_en |= CPSW_V1_TS_TX_EN;
 
-   if (cpsw->cpts->rx_enable)
+   if (cpts_is_rx_enabled(cpsw->cpts))
ts_en |= CPSW_V1_TS_RX_EN;
 
slave_write(slave, ts_en, CPSW1_TS_CTL);
@@ -1553,20 +1554,20 @@ static void cpsw_hwtstamp_v2(struct cpsw_priv *priv)
case CPSW_VERSION_2:
ctrl &= ~CTRL_V2_ALL_TS_MASK;
 
-   if (cpsw->cpts->tx_enable)
+   if (cpts_is_tx_enabled(cpsw->cpts))
ctrl |= CTRL_V2_TX_TS_BITS;
 
-   if (cpsw->cpts->rx_enable)
+   if (cpts_is_rx_enabled(cpsw->cpts))
ctrl |= CTRL_V2_RX_TS_BITS;
break;
case CPSW_VERSION_3:
default:
ctrl &= ~CTRL_V3_ALL_TS_MASK;
 
-   if (cpsw->cpts->tx_enable)
+   if (cpts_is_tx_enabled(cpsw->cpts))
ctrl |= CTRL_V3_TX_TS_BITS;
 
-   if (cpsw->cpts->rx_enable)
+   if (cpts_is_rx_enabled(cpsw->cpts))
ctrl |= CTRL_V3_RX_TS_BITS;
break;
}
@@ -1602,7 +1603,7 @@ static int cpsw_hwtstamp_set(struct net_device *dev, 
struct ifreq *ifr)
 
switch (cfg.rx_filter) {
case HWTSTAMP_FILTER_NONE:
-   cpts->rx_enable = 0;
+   cpts_rx_enable(cpts, 0);
break;
case HWTSTAMP_FILTER_ALL:
case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
@@ -1618,14 +1619,14 @@ static int cpsw_hwtstamp_set(struct net_device *dev, 
struct ifreq *ifr)
case HWTSTAMP_FILTER_PTP_V2_EVENT:
case HWTSTAMP_FILTER_PTP_V2_SYNC:
case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
-   cpts->rx_enable = 1;
+   cpts_rx_enable(cpts, 1);
cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
break;
default:
return -ERANGE;
}
 
-   cpts->tx_enable = cfg.tx_type == HWTSTAMP_TX_ON;
+   cpts_tx_enable(cpts, cfg.tx_type == HWTSTAMP_TX_ON);
 
switch (cpsw->version) {
case CPSW_VERSION_1:
@@ -1654,8 +1655,9 @@ static int cpsw_hwtstamp_get(struct net_device *dev, 
struct ifreq *ifr)
return -EOPNOTSUPP;
 
cfg.flags = 0;
-   cfg.tx_type = cpts->tx_enable ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF;
-   cfg.rx_filter = (cpts->rx_enable ?
+   cfg.tx_type = cpts_is_tx_enabled(cpts) ?
+ HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF;
+   cfg.rx_filter = (cpts_is_rx_enabled(cpts) ?
 HWTSTAMP_FILTER_PTP_V2_EVENT : HWTSTAMP_FILTER_NONE);
 
return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0;
diff --git a/drivers/net/ethernet/ti/cpts.h b/drivers/net/ethernet/ti/cpts.h
index a68780d..fec753c 100644
--- a/drivers/net/ethernet/ti/cpts.h
+++ b/drivers/net/ethernet/ti/cpts.h
@@ -132,6 +132,29 @@ void cpts_rx_timestamp(struct cpts *cpts, struct sk_buff 
*skb);
 void cpts_tx_timestamp(struct cpts *cpts, struct sk_buff *skb);
 int cpts_register(struct device *dev, struct cpts *cpts, u32 mult, u32 shift);
 void cpts_unregister(struct cpts *cpts);
+
+static inline void cpts_rx_enable(struct cpts *cpts, int enable)
+{
+   if (cpts)
+   cpts->r

<    1   2   3   4   5   6   7   >