Grant,

On 07/04/2011 10:55 PM, Grant Likely wrote:
> On Mon, Jul 04, 2011 at 03:15:56PM -0500, Rob Herring wrote:
>> From: Rob Herring <[email protected]>
>>
>> This adds probing for ARM L2x0 cache controllers via device tree. Support
>> includes the L210, L220, and PL310 controllers. The binding allows setting
>> up cache RAM latencies and filter addresses (PL310 only).
>>
>> Signed-off-by: Rob Herring <[email protected]>
>> ---
>> I've tested this version and fixed some issues from the one I sent to the
>> CSR platform thread.
>>
>> Changes in v3:
>> - Allow platforms to set aux ctrl reg with aux_value and aux_mask.
>> - Add RAM latency and filter address bindings based on CSR's platform needs.
>>
>>  Documentation/devicetree/bindings/arm/l2cc.txt |   40 ++++++++
>>  arch/arm/include/asm/hardware/cache-l2x0.h     |   17 ++++
>>  arch/arm/mm/cache-l2x0.c                       |  120 
>> ++++++++++++++++++++++++
>>  3 files changed, 177 insertions(+), 0 deletions(-)
>>  create mode 100644 Documentation/devicetree/bindings/arm/l2cc.txt
>>
>> diff --git a/Documentation/devicetree/bindings/arm/l2cc.txt 
>> b/Documentation/devicetree/bindings/arm/l2cc.txt
>> new file mode 100644
>> index 0000000..79e66fb
>> --- /dev/null
>> +++ b/Documentation/devicetree/bindings/arm/l2cc.txt
>> @@ -0,0 +1,40 @@
>> +* ARM L2 Cache Controller
>> +
>> +ARM cores often have a separate level 2 cache controller. There are various
>> +implementations of the L2 cache controller with compatible programming 
>> models.
>> +The ARM L2 cache representation in the device tree should be done as under:-
> 
> Damaged sentence?
> 
>> +
>> +Required properties:
>> +
>> +- compatible : should be one of
>> +    "arm,pl310-cache"
>> +    "arm,l220-cache"
>> +    "arm,l210-cache"
>> +- cache-unified : Specifies the cache is a unified cache.
>> +- cache-level : Should be set to 2 for a level 2 cache.
>> +- reg : Physical base address and size of cache controller's memory mapped
>> +  registers.
>> +
>> +Optional properties:
>> +
>> +- data-latency : Cycles of latency for Data RAM accesses. Specifies 3 cells 
>> of
>> +  read, write and setup latencies. Controllers without setup latency control
>> +  should use 0.
>> +- tag-latency : Cycles of latency for Tag RAM accesses. Specifies 3 cells of
>> +  read, write and setup latencies. Controllers without setup latency control
>> +  should use 0.
>> +- dirty-latency : Cycles of latency for reads of Dirty RAMs. This is a 
>> single
>> +  cell.t
>> +- filter-ranges : <start end> Address range the  
> 
> Incomplete sentence?
> 
> Typically address ranges in the DT are <start size> pairs.  Does the
> filter-ranges property deviate from this?
> 
> Personally, I'd suggest prefixing these custom properties with "arm,"
> to avoid any potential namespace conflict.
> 
>> +
>> +Example:
>> +
>> +L2: l2-cache {
>> +        compatible = "arm,pl310-cache", "cache";
> 
> Drop "cache".  It isn't useful.
> 
>> +        reg = <0xfff12000 0x1000>;
>> +        data-latency = <1 1 1>;
>> +        tag-latency = <2 2 2>;
>> +        cache-unified;
>> +        cache-level = <2>;
>> +};
>> +
>> diff --git a/arch/arm/include/asm/hardware/cache-l2x0.h 
>> b/arch/arm/include/asm/hardware/cache-l2x0.h
>> index 16bd480..8fe149f 100644
>> --- a/arch/arm/include/asm/hardware/cache-l2x0.h
>> +++ b/arch/arm/include/asm/hardware/cache-l2x0.h
>> @@ -47,6 +47,8 @@
>>  #define L2X0_CLEAN_INV_WAY          0x7FC
>>  #define L2X0_LOCKDOWN_WAY_D         0x900
>>  #define L2X0_LOCKDOWN_WAY_I         0x904
>> +#define L2X0_ADDR_FILTER_START              0xC00
>> +#define L2X0_ADDR_FILTER_END                0xC04
>>  #define L2X0_TEST_OPERATION         0xF00
>>  #define L2X0_LINE_DATA                      0xF10
>>  #define L2X0_LINE_TAG                       0xF30
>> @@ -62,6 +64,14 @@
>>  #define L2X0_CACHE_ID_PART_L310             (3 << 6)
>>  
>>  #define L2X0_AUX_CTRL_MASK                  0xc0000fff
>> +#define L2X0_AUX_CTRL_DATA_RD_LATENCY_SHIFT 0
>> +#define L2X0_AUX_CTRL_DATA_RD_LATENCY_MASK  0x7
>> +#define L2X0_AUX_CTRL_DATA_WR_LATENCY_SHIFT 3
>> +#define L2X0_AUX_CTRL_DATA_WR_LATENCY_MASK  (0x7 << 3)
>> +#define L2X0_AUX_CTRL_TAG_LATENCY_SHIFT             6
>> +#define L2X0_AUX_CTRL_TAG_LATENCY_MASK              (0x7 << 6)
>> +#define L2X0_AUX_CTRL_DIRTY_LATENCY_SHIFT   9
>> +#define L2X0_AUX_CTRL_DIRTY_LATENCY_MASK    (0x7 << 9)
>>  #define L2X0_AUX_CTRL_ASSOCIATIVITY_SHIFT   16
>>  #define L2X0_AUX_CTRL_WAY_SIZE_SHIFT                17
>>  #define L2X0_AUX_CTRL_WAY_SIZE_MASK         (0x3 << 17)
>> @@ -72,8 +82,15 @@
>>  #define L2X0_AUX_CTRL_INSTR_PREFETCH_SHIFT  29
>>  #define L2X0_AUX_CTRL_EARLY_BRESP_SHIFT             30
>>  
>> +#define L2X0_LATENCY_CTRL_SETUP_SHIFT       0
>> +#define L2X0_LATENCY_CTRL_RD_SHIFT  4
>> +#define L2X0_LATENCY_CTRL_WR_SHIFT  8
>> +
>> +#define L2X0_ADDR_FILTER_EN         1
>> +
>>  #ifndef __ASSEMBLY__
>>  extern void __init l2x0_init(void __iomem *base, __u32 aux_val, __u32 
>> aux_mask);
>> +extern int l2x0_of_init(__u32 aux_val, __u32 aux_mask);
>>  #endif
>>  
>>  #endif
>> diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
>> index ef59099..649be84 100644
>> --- a/arch/arm/mm/cache-l2x0.c
>> +++ b/arch/arm/mm/cache-l2x0.c
>> @@ -16,9 +16,12 @@
>>   * along with this program; if not, write to the Free Software
>>   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
>>   */
>> +#include <linux/err.h>
>>  #include <linux/init.h>
>>  #include <linux/spinlock.h>
>>  #include <linux/io.h>
>> +#include <linux/of.h>
>> +#include <linux/of_address.h>
>>  
>>  #include <asm/cacheflush.h>
>>  #include <asm/hardware/cache-l2x0.h>
>> @@ -344,3 +347,120 @@ void __init l2x0_init(void __iomem *base, __u32 
>> aux_val, __u32 aux_mask)
>>      printk(KERN_INFO "l2x0: %d ways, CACHE_ID 0x%08x, AUX_CTRL 0x%08x, 
>> Cache size: %d B\n",
>>                      ways, cache_id, aux, l2x0_size);
>>  }
>> +
>> +#ifdef CONFIG_OF
>> +static const struct of_device_id l2x0_ids[] __initconst = {
>> +    { .compatible = "arm,pl310-cache" },
>> +    { .compatible = "arm,l220-cache" },
>> +    { .compatible = "arm,l210-cache" },
>> +    {}
>> +};
>> +
>> +static void __init l2x0_of_set_address_filter(const struct device_node *np)
>> +{
>> +    u32 start, end;
>> +    const u32 *prop;
>> +    int len;
>> +    int is_pl310 = of_device_is_compatible(np, "arm,pl310-cache");
>> +
>> +    if (!is_pl310 || (readl_relaxed(l2x0_base + L2X0_CTRL) & 1))
>> +            return;
>> +
>> +    prop = of_get_property(np, "filter-ranges", &len);
>> +    if (!prop || (len != (2 * sizeof(prop))))
>> +            return;
>> +
>> +    start = be32_to_cpup(prop++) | L2X0_ADDR_FILTER_EN;
>> +    end = be32_to_cpup(prop++);
>> +    writel_relaxed(end, l2x0_base + L2X0_ADDR_FILTER_END);
>> +    writel_relaxed(start, l2x0_base + L2X0_ADDR_FILTER_START);
>> +}
>> +
>> +static void __init l2x0_of_set_ram_timings(const struct device_node *np,
>> +                                      __u32 *aux_val, __u32 *aux_mask)
>> +{
>> +    u32 data_rd = 0, data_wr = 0, data_setup = 0;
>> +    u32 tag_rd = 0, tag_wr = 0, tag_setup = 0;
>> +    u32 dirty = 0;
>> +    const u32 *prop;
> 
> const __be32 *prop;
> 
>> +    int len;
>> +    int is_pl310 = of_device_is_compatible(np, "arm,pl310-cache");
>> +
>> +    if (readl_relaxed(l2x0_base + L2X0_CTRL) & 1)
>> +            return;
>> +
>> +    prop = of_get_property(np, "data-latency", &len);
>> +    if (prop && (len == (3 * sizeof(prop)))) {
>> +            data_rd = be32_to_cpup(prop++);
>> +            data_wr = be32_to_cpup(prop++);
>> +            data_setup = be32_to_cpup(prop);
>> +    }
> 
> I wonder if it would be useful to have an of_property_read_u32array() helper?
> 
>> +
>> +    prop = of_get_property(np, "tag-latency", &len);
>> +    if (prop && (len == (3 * sizeof(prop)))) {
>> +            tag_rd = be32_to_cpup(prop++);
>> +            tag_wr = be32_to_cpup(prop++);
>> +            tag_setup = be32_to_cpup(prop);
>> +    }
>> +
>> +    prop = of_get_property(np, "dirty-latency", &len);
>> +    if (prop && (len == sizeof(prop)))
>> +            dirty = be32_to_cpup(prop);
> 
> of_property_read_u32()
> 
>> +
>> +    if (is_pl310 && tag_wr && tag_rd && tag_setup)
>> +            writel_relaxed(
>> +                    (--tag_wr << L2X0_LATENCY_CTRL_WR_SHIFT) |
>> +                    (--tag_rd << L2X0_LATENCY_CTRL_RD_SHIFT) |
>> +                    (--tag_setup << L2X0_LATENCY_CTRL_SETUP_SHIFT),
> 
> tag_wr, tag_rd and tag_setup are only used once, so the self decrement
> is confusing.  I'd rather see simply '(tag_rw - 1) << ...'
> 
> 
>> +                    l2x0_base + L2X0_TAG_LATENCY_CTRL);
>> +
>> +    if (is_pl310 && data_wr && data_rd && data_setup)
>> +            writel_relaxed(
>> +                    (--data_wr << L2X0_LATENCY_CTRL_WR_SHIFT) |
>> +                    (--data_rd << L2X0_LATENCY_CTRL_RD_SHIFT) |
>> +                    (--data_setup << L2X0_LATENCY_CTRL_SETUP_SHIFT),
>> +                    l2x0_base + L2X0_TAG_LATENCY_CTRL);
> 
> Hmmm, there are 2 sets of if() blocks here.  One for is_pl310, and one
> for !is_pl310.  Instead of testing is_pl310 over and over, it would
> make more sense to me to do:
> 
>       if (is_pl310) {
>               if (tag_wr && tag_rd && tag_setup)
>                       ...
>               if (data_wr && data_wr && data_setup)
>                       ...
>       } else {
>               if (tag_rd)
>                       ...
>               if (data_rd)
>                       ...
>               ...
>       }

It was really just to avoid another level of indentation.

>> +
>> +    if (!is_pl310 && tag_rd) {
>> +            *aux_val &= ~L2X0_AUX_CTRL_TAG_LATENCY_MASK;
>> +            *aux_val |= --tag_rd << L2X0_AUX_CTRL_TAG_LATENCY_SHIFT;
>> +            *aux_mask &= ~L2X0_AUX_CTRL_TAG_LATENCY_MASK;
>> +    }
>> +
>> +    if (!is_pl310 && data_rd) {
>> +            *aux_val &= ~L2X0_AUX_CTRL_DATA_RD_LATENCY_MASK;
>> +            *aux_val |= --data_rd << L2X0_AUX_CTRL_DATA_RD_LATENCY_SHIFT;
>> +            *aux_mask &= ~L2X0_AUX_CTRL_DATA_RD_LATENCY_MASK;
>> +    }
>> +
>> +    if (!is_pl310 && data_wr) {
>> +            *aux_val &= ~L2X0_AUX_CTRL_DATA_WR_LATENCY_MASK;
>> +            *aux_val |= --data_wr << L2X0_AUX_CTRL_DATA_WR_LATENCY_SHIFT;
>> +            *aux_mask &= ~L2X0_AUX_CTRL_DATA_WR_LATENCY_MASK;
>> +    }
>> +
>> +    if (!is_pl310 && dirty) {
>> +            *aux_val &= ~L2X0_AUX_CTRL_DIRTY_LATENCY_MASK;
>> +            *aux_val |= --dirty << L2X0_AUX_CTRL_DIRTY_LATENCY_SHIFT;
>> +            *aux_mask &= ~L2X0_AUX_CTRL_DIRTY_LATENCY_MASK;
>> +    }
> 
> Something about this just feels suboptimal.  It's essentially the
> exact same block of code 4 times with different values, masks and
> shifts.  It may be best the way it is, but I do wonder if it could be
> made to look nicer.

How about something like this:

>       if (is_pl310) {
>               if (tag[0] && tag[1] && tag[2])
>                       writel_relaxed(
>                               ((tag[0] - 1) << L2X0_LATENCY_CTRL_RD_SHIFT) |
>                               ((tag[1] - 1) << L2X0_LATENCY_CTRL_WR_SHIFT) |
>                               ((tag[2] - 1) << L2X0_LATENCY_CTRL_SETUP_SHIFT),
>                               l2x0_base + L2X0_TAG_LATENCY_CTRL);
> 
>               if (data[0] && data[1] && data[2])
>                       writel_relaxed(
>                               ((data[0] - 1) << L2X0_LATENCY_CTRL_RD_SHIFT) |
>                               ((data[1] - 1) << L2X0_LATENCY_CTRL_WR_SHIFT) |
>                               ((data[2] - 1) << 
> L2X0_LATENCY_CTRL_SETUP_SHIFT),
>                               l2x0_base + L2X0_DATA_LATENCY_CTRL);
> 
>               return;
>       }
> 
>       if (tag[0]) {
>               mask |= L2X0_AUX_CTRL_TAG_LATENCY_MASK;
>               val |= (tag[0] - 1) << L2X0_AUX_CTRL_TAG_LATENCY_SHIFT;
>       }
> 
>       if (data[0] && data[1]) {
>               mask |= L2X0_AUX_CTRL_DATA_RD_LATENCY_MASK |
>                       L2X0_AUX_CTRL_DATA_WR_LATENCY_MASK;
>               val |= ((data[0] - 1) << L2X0_AUX_CTRL_DATA_RD_LATENCY_SHIFT) |
>                      ((data[1] - 1) << L2X0_AUX_CTRL_DATA_WR_LATENCY_SHIFT);
>       }
> 
>       if (dirty) {
>               mask |= L2X0_AUX_CTRL_DIRTY_LATENCY_MASK;
>               val |= (dirty - 1) << L2X0_AUX_CTRL_DIRTY_LATENCY_SHIFT;
>       }
> 
>       *aux_val &= ~mask;
>       *aux_val |= val;
>       *aux_mask &= ~mask;
> }

Rob
_______________________________________________
devicetree-discuss mailing list
[email protected]
https://lists.ozlabs.org/listinfo/devicetree-discuss

Reply via email to