On 13/09/2018 13:30, Alexandre Belloni wrote:
> Add a driver for the Atmel Timer Counter Blocks. This driver provides a
> clocksource and two clockevent devices.
> 
> One of the clockevent device is linked to the clocksource counter and so it
> will run at the same frequency. This will be used when there is only on TCB
> channel available for timers.
> 
> The other clockevent device runs on a separate TCB channel when available.
> 
> This driver uses regmap and syscon to be able to probe early in the boot
> and avoid having to switch on the TCB clocksource later. 

Sorry, I don't get it. Can you elaborate?

> Using regmap also
> means that unused TCB channels may be used by other drivers (PWM for
> example). read/writel are still used to access channel specific registers
> to avoid the performance impact of regmap (mainly locking).

I don't get the regmap reasoning here.

My main concern with this driver is the 16bits chained support. See
below in the comments.


> Tested-by: Alexander Dahl <a...@thorsis.com>
> Tested-by: Andras Szemzo <szemzo.and...@gmail.com>
> Signed-off-by: Alexandre Belloni <alexandre.bell...@bootlin.com>
> ---
>  drivers/clocksource/Kconfig           |   8 +
>  drivers/clocksource/Makefile          |   3 +-
>  drivers/clocksource/timer-atmel-tcb.c | 410 ++++++++++++++++++++++++++
>  3 files changed, 420 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/clocksource/timer-atmel-tcb.c
> 
> diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
> index a11f4ba98b05..8c7324e409f6 100644
> --- a/drivers/clocksource/Kconfig
> +++ b/drivers/clocksource/Kconfig
> @@ -403,6 +403,14 @@ config ATMEL_ST
>       help
>         Support for the Atmel ST timer.
>  
> +config ATMEL_ARM_TCB_CLKSRC
> +     bool "Microchip ARM TC Block" if COMPILE_TEST
> +     select REGMAP_MMIO
> +     depends on GENERIC_CLOCKEVENTS
> +     help
> +       This enables build of clocksource and clockevent driver for
> +       the integrated Timer Counter Blocks in Microchip ARM SoCs.
> +
>  config CLKSRC_EXYNOS_MCT
>       bool "Exynos multi core timer driver" if COMPILE_TEST
>       depends on ARM || ARM64
> diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
> index db51b2427e8a..0df9384a1230 100644
> --- a/drivers/clocksource/Makefile
> +++ b/drivers/clocksource/Makefile
> @@ -3,7 +3,8 @@ obj-$(CONFIG_TIMER_OF)                += timer-of.o
>  obj-$(CONFIG_TIMER_PROBE)    += timer-probe.o
>  obj-$(CONFIG_ATMEL_PIT)              += timer-atmel-pit.o
>  obj-$(CONFIG_ATMEL_ST)               += timer-atmel-st.o
> -obj-$(CONFIG_ATMEL_TCB_CLKSRC)       += tcb_clksrc.o
> +obj-$(CONFIG_ATMEL_TCB_CLKSRC) += tcb_clksrc.o
> +obj-$(CONFIG_ATMEL_ARM_TCB_CLKSRC)   += timer-atmel-tcb.o
>  obj-$(CONFIG_X86_PM_TIMER)   += acpi_pm.o
>  obj-$(CONFIG_SCx200HR_TIMER) += scx200_hrt.o
>  obj-$(CONFIG_CS5535_CLOCK_EVENT_SRC) += cs5535-clockevt.o
> diff --git a/drivers/clocksource/timer-atmel-tcb.c 
> b/drivers/clocksource/timer-atmel-tcb.c
> new file mode 100644
> index 000000000000..21fbe430f91b
> --- /dev/null
> +++ b/drivers/clocksource/timer-atmel-tcb.c
> @@ -0,0 +1,410 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <linux/clk.h>
> +#include <linux/clockchips.h>
> +#include <linux/clocksource.h>
> +#include <linux/interrupt.h>
> +#include <linux/kernel.h>
> +#include <linux/mfd/syscon.h>
> +#include <linux/of_address.h>
> +#include <linux/of_irq.h>
> +#include <linux/regmap.h>
> +#include <linux/sched_clock.h>
> +#include <soc/at91/atmel_tcb.h>
> +
> +struct atmel_tcb_clksrc {
> +     struct clocksource clksrc;
> +     struct clock_event_device clkevt;
> +     struct regmap *regmap;
> +     void __iomem *base;
> +     struct clk *clk[2];
> +     char name[20];

You can reasonably remove this field and use directly the ones in the
clocksrc/evt.

> +     int channels[2];
> +     int bits;
> +     int irq;

After removing the request_irq/free_irq calls below (see comment), this
field can be removed.

> +     struct {
> +             u32 cmr;
> +             u32 imr;
> +             u32 rc;
> +             bool clken;

Not sure clken is needed, 16/32 is enough information.

> +     } cache[2];
> +     u32 bmr_cache;

Are you sure you should save the bmr content ?

> +     bool registered;
> +     bool clk_enabled;

Not used.

> +};
> +
> +static struct atmel_tcb_clksrc tc;
> +
> +static struct clk *tcb_clk_get(struct device_node *node, int channel)
> +{
> +     struct clk *clk;
> +     char clk_name[] = "t0_clk";
> +
> +     clk_name[1] += channel;
> +     clk = of_clk_get_by_name(node->parent, clk_name);
> +     if (!IS_ERR(clk))
> +             return clk;
> +
> +     return of_clk_get_by_name(node->parent, "t0_clk");

Can you explain why returning "t0_clk" is better than returning an error?

> +}
> +
> +/*
> + * Clocksource and clockevent using the same channel(s)
> + */
> +static u64 tc_get_cycles(struct clocksource *cs)
> +{
> +     u32 lower, upper;
> +
> +     do {
> +             upper = readl_relaxed(tc.base + ATMEL_TC_CV(tc.channels[1]));
> +             lower = readl_relaxed(tc.base + ATMEL_TC_CV(tc.channels[0]));
> +     } while (upper != readl_relaxed(tc.base + ATMEL_TC_CV(tc.channels[1])));
> +
> +     return (upper << 16) | lower;
> +}
> +
> +static u64 tc_get_cycles32(struct clocksource *cs)
> +{
> +     return readl_relaxed(tc.base + ATMEL_TC_CV(tc.channels[0]));
> +}
> +
> +static u64 notrace tc_sched_clock_read(void)
> +{
> +     return tc_get_cycles(&tc.clksrc);
> +}
> +
> +static u64 notrace tc_sched_clock_read32(void)
> +{
> +     return tc_get_cycles32(&tc.clksrc);
> +}
> +
> +static int tcb_clkevt_next_event(unsigned long delta,
> +                              struct clock_event_device *d)
> +{
> +     u32 old, next, cur;
> +
> +     old = readl(tc.base + ATMEL_TC_CV(tc.channels[0]));
> +     next = old + delta;
> +     writel(next, tc.base + ATMEL_TC_RC(tc.channels[0]));
> +     cur = readl(tc.base + ATMEL_TC_CV(tc.channels[0]));
> +
> +     /* check whether the delta elapsed while setting the register */
> +     if ((next < old && cur < old && cur > next) ||
> +         (next > old && (cur < old || cur > next))) {
> +             /*
> +              * Clear the CPCS bit in the status register to avoid
> +              * generating a spurious interrupt next time a valid
> +              * timer event is configured.
> +              */
> +             old = readl(tc.base + ATMEL_TC_SR(tc.channels[0]));
> +             return -ETIME;
> +     }
> > +   writel(ATMEL_TC_CPCS, tc.base + ATMEL_TC_IER(tc.channels[0]));


How this is compatible with 16bits as defined in the init function ?

> +     return 0;
> +}
> +
> +static irqreturn_t tc_clkevt_irq(int irq, void *handle)
> +{
> +     unsigned int sr;
> +
> +     sr = readl(tc.base + ATMEL_TC_SR(tc.channels[0]));
> +     if (sr & ATMEL_TC_CPCS) {
> +             tc.clkevt.event_handler(&tc.clkevt);
> +             return IRQ_HANDLED;
> +     }
> +
> +     return IRQ_NONE;
> +}
> +
> +static int tcb_clkevt_oneshot(struct clock_event_device *dev)
> +{
> +     if (clockevent_state_oneshot(dev))
> +             return 0;
> +
> +     /*
> +      * Because both clockevent devices may share the same IRQ, we don't want
> +      * the less likely one to stay requested
> +      */
> +     return request_irq(tc.irq, tc_clkevt_irq, IRQF_TIMER | IRQF_SHARED,
> +                        tc.name, &tc);
> +}
> +
> +static int tcb_clkevt_shutdown(struct clock_event_device *dev)
> +{
> +     writel(0xff, tc.base + ATMEL_TC_IDR(tc.channels[0]));
> +     if (tc.bits == 16)
> +             writel(0xff, tc.base + ATMEL_TC_IDR(tc.channels[1]));
> +
> +     if (!clockevent_state_detached(dev))
> +             free_irq(tc.irq, &tc);

Why are you requesting and freeing the irq instead of using the
disable/enable register operations ?

> +     return 0;
> +}
> +
> +static void __init tcb_setup_dual_chan(struct atmel_tcb_clksrc *tc,
> +                                    int mck_divisor_idx)
> +{
> +     /* first channel: waveform mode, input mclk/8, clock TIOA on overflow */
> +     writel(mck_divisor_idx                  /* likely divide-by-8 */
> +            | ATMEL_TC_CMR_WAVE
> +            | ATMEL_TC_CMR_WAVESEL_UP        /* free-run */
> +            | ATMEL_TC_CMR_ACPA(SET)         /* TIOA rises at 0 */
> +            | ATMEL_TC_CMR_ACPC(CLEAR),      /* (duty cycle 50%) */
> +            tc->base + ATMEL_TC_CMR(tc->channels[0]));
> +     writel(0x0000, tc->base + ATMEL_TC_RA(tc->channels[0]));
> +     writel(0x8000, tc->base + ATMEL_TC_RC(tc->channels[0]));
> +     writel(0xff, tc->base + ATMEL_TC_IDR(tc->channels[0])); /* no irqs */
> +     writel(ATMEL_TC_CCR_CLKEN, tc->base + ATMEL_TC_CCR(tc->channels[0]));
> +
> +     /* second channel: waveform mode, input TIOA */
> +     writel(ATMEL_TC_CMR_XC(tc->channels[1])         /* input: TIOA */
> +            | ATMEL_TC_CMR_WAVE
> +            | ATMEL_TC_CMR_WAVESEL_UP,               /* free-run */
> +            tc->base + ATMEL_TC_CMR(tc->channels[1]));
> +     writel(0xff, tc->base + ATMEL_TC_IDR(tc->channels[1])); /* no irqs */
> +     writel(ATMEL_TC_CCR_CLKEN, tc->base + ATMEL_TC_CCR(tc->channels[1]));
> +
> +     /* chain both channel, we assume the previous channel */
> +     regmap_write(tc->regmap, ATMEL_TC_BMR,
> +                  ATMEL_TC_BMR_TCXC(1 + tc->channels[1], tc->channels[1]));
> +     /* then reset all the timers */
> +     regmap_write(tc->regmap, ATMEL_TC_BCR, ATMEL_TC_BCR_SYNC);
> +}
> +
> +static void __init tcb_setup_single_chan(struct atmel_tcb_clksrc *tc,
> +                                      int mck_divisor_idx)
> +{
> +     /* channel 0:  waveform mode, input mclk/8 */
> +     writel(mck_divisor_idx                  /* likely divide-by-8 */
> +            | ATMEL_TC_CMR_WAVE
> +            | ATMEL_TC_CMR_WAVESEL_UP,       /* free-run */
> +            tc->base + ATMEL_TC_CMR(tc->channels[0]));
> +     writel(0xff, tc->base + ATMEL_TC_IDR(tc->channels[0])); /* no irqs */
> +     writel(ATMEL_TC_CCR_CLKEN, tc->base + ATMEL_TC_CCR(tc->channels[0]));
> +
> +     /* then reset all the timers */
> +     regmap_write(tc->regmap, ATMEL_TC_BCR, ATMEL_TC_BCR_SYNC);
> +}
> +
> +static void tc_clksrc_suspend(struct clocksource *cs)
> +{
> +     int i;
> +
> +     for (i = 0; i < 1 + (tc.bits == 16); i++) {
> +             tc.cache[i].cmr = readl(tc.base + ATMEL_TC_CMR(tc.channels[i]));
> +             tc.cache[i].imr = readl(tc.base + ATMEL_TC_IMR(tc.channels[i]));
> +             tc.cache[i].rc = readl(tc.base + ATMEL_TC_RC(tc.channels[i]));
> +             tc.cache[i].clken = !!(readl(tc.base +
> +                                          ATMEL_TC_SR(tc.channels[i])) &
> +                                    ATMEL_TC_CLKSTA);
> +     }
> +
> +     if (tc.bits == 16)
> +             regmap_read(tc.regmap, ATMEL_TC_BMR, &tc.bmr_cache);
> +}
> +
> +static void tc_clksrc_resume(struct clocksource *cs)
> +{
> +     int i;
> +
> +     for (i = 0; i < 1 + (tc.bits == 16); i++) {
> +             /* Restore registers for the channel, RA and RB are not used  */
> +             writel(tc.cache[i].cmr, tc.base + ATMEL_TC_CMR(tc.channels[i]));
> +             writel(tc.cache[i].rc, tc.base + ATMEL_TC_RC(tc.channels[i]));
> +             writel(0, tc.base + ATMEL_TC_RA(tc.channels[i]));
> +             writel(0, tc.base + ATMEL_TC_RB(tc.channels[i]));
> +             /* Disable all the interrupts */
> +             writel(0xff, tc.base + ATMEL_TC_IDR(tc.channels[i]));
> +             /* Reenable interrupts that were enabled before suspending */
> +             writel(tc.cache[i].imr, tc.base + ATMEL_TC_IER(tc.channels[i]));
> +
> +             /* Start the clock if it was used */
> +             if (tc.cache[i].clken)
> +                     writel(ATMEL_TC_CCR_CLKEN, tc.base +
> +                            ATMEL_TC_CCR(tc.channels[i]));
> +     }
> +
> +     /* in case of dual channel, chain channels */
> +     if (tc.bits == 16)
> +             regmap_write(tc.regmap, ATMEL_TC_BMR, tc.bmr_cache);
> +     /* Finally, trigger all the channels*/
> +     regmap_write(tc.regmap, ATMEL_TC_BCR, ATMEL_TC_BCR_SYNC);
> +}
> +
> +static int __init tcb_clksrc_register(struct device_node *node,
> +                                   struct regmap *regmap, void __iomem *base,
> +                                   int channel, int channel1, int irq,
> +                                   int bits)
> +{
> +     u32 rate, divided_rate = 0;
> +     int best_divisor_idx = -1;
> +     int i, err = -1;
> +     u64 (*tc_sched_clock)(void);
> +
> +     tc.regmap = regmap;
> +     tc.base = base;
> +     tc.channels[0] = channel;
> +     tc.channels[1] = channel1;
> +     tc.irq = irq;
> +     tc.bits = bits;
> +
> +     tc.clk[0] = tcb_clk_get(node, tc.channels[0]);
> +     if (IS_ERR(tc.clk[0]))
> +             return PTR_ERR(tc.clk[0]);
> +     err = clk_prepare_enable(tc.clk[0]);
> +     if (err) {
> +             pr_debug("can't enable T0 clk\n");
> +             goto err_clk;
> +     }
> +
> +     /* How fast will we be counting?  Pick something over 5 MHz.  */
> +     rate = (u32)clk_get_rate(tc.clk[0]);
> +     for (i = 0; i < 5; i++) {
> +             unsigned int divisor = atmel_tc_divisors[i];
> +             unsigned int tmp;
> +
> +             if (!divisor)
> +                     continue;

I suppose you meant here 'break' ? Use atmel_tc_divisors[] = { 2, 8, 32,
128 }; And then the ARRAY_SIZE macro.

> +             tmp = rate / divisor;
> +             pr_debug("TC: %u / %-3u [%d] --> %u\n", rate, divisor, i, tmp);
> +             if (best_divisor_idx > 0) {
> +                     if (tmp < 5 * 1000 * 1000)
> +                             continue;
> +             }
> +             divided_rate = tmp;
> +             best_divisor_idx = i;

What is a best divisor ? The highest one or the one closer to 5MHz ?

> +     }
> +
> +     if (tc.bits == 32) {
> +             tc.clksrc.read = tc_get_cycles32;
> +             tcb_setup_single_chan(&tc, best_divisor_idx);
> +             tc_sched_clock = tc_sched_clock_read32;
> +             snprintf(tc.name, sizeof(tc.name), "%s:%d",
> +                      kbasename(node->parent->full_name), tc.channels[0]);
> +     } else {
> +             tc.clk[1] = tcb_clk_get(node, tc.channels[1]);
> +             if (IS_ERR(tc.clk[1]))
> +                     goto err_disable_t0;

This is very confusing. If the function tcb_clk_get() fails with this
channel, it will return "t0_clk" and will be used here ? Why ?

> +             err = clk_prepare_enable(tc.clk[1]);
> +             if (err) {
> +                     pr_debug("can't enable T1 clk\n");
> +                     goto err_clk1;
> +             }
> +             tc.clksrc.read = tc_get_cycles,
> +             tcb_setup_dual_chan(&tc, best_divisor_idx);
> +             tc_sched_clock = tc_sched_clock_read;
> +             snprintf(tc.name, sizeof(tc.name), "%s:%d,%d",
> +                      kbasename(node->parent->full_name), tc.channels[0],
> +                      tc.channels[1]);
> +     }
> +
> +     pr_debug("%s at %d.%03d MHz\n", tc.name,
> +              divided_rate / 1000000,
> +              ((divided_rate + 500000) % 1000000) / 1000);

Using two channels to emulate a 32bits timer has a significant cost,
especially in the sched_clock function which is part of the hot kernel
path. In addition it makes the code less maintainable and readable.

Why don't you just stick to a specific rate with the prescalar value and
reduce the rating of the timer ? (example in the stm32 timer,
stm32_timer_set_prescaler and init function).

It will be less precise (thus the lower rating) but will make the system
faster by preventing multiple register reads in the sched_clock.

Is it an acceptable trade-off ?

> +     tc.clksrc.name = tc.name;
> +     tc.clksrc.suspend = tc_clksrc_suspend;
> +     tc.clksrc.resume = tc_clksrc_resume;
> +     tc.clksrc.rating = 200;
> +     tc.clksrc.mask = CLOCKSOURCE_MASK(32);
> +     tc.clksrc.flags = CLOCK_SOURCE_IS_CONTINUOUS;
> +
> +     err = clocksource_register_hz(&tc.clksrc, divided_rate);
> +     if (err)
> +             goto err_disable_t1;
> +
> +     sched_clock_register(tc_sched_clock, 32, divided_rate);
> +
> +     tc.registered = true;
> +
> +     /* Set up and register clockevents */
> +     tc.clkevt.name = tc.name;
> +     tc.clkevt.cpumask = cpumask_of(0);
> +     tc.clkevt.set_next_event = tcb_clkevt_next_event;
> +     tc.clkevt.set_state_oneshot = tcb_clkevt_oneshot;
> +     tc.clkevt.set_state_shutdown = tcb_clkevt_shutdown;
> +     tc.clkevt.features = CLOCK_EVT_FEAT_ONESHOT;
> +     tc.clkevt.rating = 125;
> +
> +     clockevents_config_and_register(&tc.clkevt, divided_rate, 1,
> +                                     BIT(tc.bits) - 1);
> +
> +     return 0;
> +
> +err_disable_t1:
> +     if (tc.bits == 16)
> +             clk_disable_unprepare(tc.clk[1]);
> +
> +err_clk1:
> +     if (tc.bits == 16)
> +             clk_put(tc.clk[1]);
> +
> +err_disable_t0:
> +     clk_disable_unprepare(tc.clk[0]);
> +
> +err_clk:
> +     clk_put(tc.clk[0]);
> +
> +     pr_err("%s: unable to register clocksource/clockevent\n",
> +            tc.clksrc.name);
> +
> +     return err;
> +}
> +
> +static int __init tcb_clksrc_init(struct device_node *node)
> +{
> +     const struct of_device_id *match;
> +     struct regmap *regmap;
> +     void __iomem *tcb_base;
> +     u32 channel;
> +     int irq, err, chan1 = -1;
> +     unsigned bits;
> +
> +     if (tc.registered)
> +             return -ENODEV;
> +
> +     /*
> +      * The regmap has to be used to access registers that are shared
> +      * between channels on the same TCB but we keep direct IO access for
> +      * the counters to avoid the impact on performance
> +      */
> +     regmap = syscon_node_to_regmap(node->parent);
> +     if (IS_ERR(regmap))
> +             return PTR_ERR(regmap);
> +
> +     tcb_base = of_iomap(node->parent, 0);
> +     if (!tcb_base) {
> +             pr_err("%s +%d %s\n", __FILE__, __LINE__, __func__);

Remove those debug information and replace them by a proper error message.

> +             return -ENXIO;
> +     }
> +
> +     match = of_match_node(atmel_tcb_dt_ids, node->parent);
> +     bits = (uintptr_t)match->data;
> +
> +     err = of_property_read_u32_index(node, "reg", 0, &channel);
> +     if (err)
> +             return err;
> +
> +     irq = of_irq_get(node->parent, channel);
> +     if (irq < 0) {

if (irq <= 0) {

> +             irq = of_irq_get(node->parent, 0);

Why ?

> +             if (irq < 0)

if (irq <= 0) {

> +                     return irq;
> +     }
> +
> +     if (bits == 16) {
> +             of_property_read_u32_index(node, "reg", 1, &chan1);
> +             if (chan1 == -1) {
> +                     pr_err("%s: clocksource needs two channels\n",
> +                            node->parent->full_name);

Think about it. The code is giving up at this point in the boot process.
So of two things, you consider there is an alternate clocksource /
clockevent or the system hangs:

 - If there is an alternate clocksource why support 32bits by chaining
the channels with the cost it introduces instead of using the alternate
one ?

 - If there is no alternate clocksource why not support a 16bits less
precise timer and give up with the 32bits emulation and the complexity
it introduces in this driver ?

> +                     return -EINVAL;
> +             }
> +     }
> +     return tcb_clksrc_register(node, regmap, tcb_base, channel, chan1, irq,
> +                                bits);
> +}
> +TIMER_OF_DECLARE(atmel_tcb_clksrc, "atmel,tcb-timer", tcb_clksrc_init);
> 


-- 
 <http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs

Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog

Reply via email to