On Tue, Feb 09, 2016 at 04:53:55PM -0600, Suravee Suthikulpanit wrote:
> The current amd_iommu_pc_get_set_reg_val() does not support muli-IOMMU
> system. This patch replace amd_iommu_pc_get_set_reg_val() with
> amd_iommu_pc_set_reg_val() and amd_iommu_pc_[set|get]_cnt_vals().
> 
> Also, the current struct hw_perf_event.prev_count can only store the
> previous counter value only from one IOMMU. So, this patch introduces
> a new data structure, perf_amd_iommu.prev_cnts, to track previous value
> of each performance counter of each bank of each IOMMU.
> 
> Last, this patch introduce perf_iommu_cnts to temporary hold counter
> values from each IOMMU for internal use when manages counters among
> multiple IOMMUs.
> 
> Note that this implementation makes an assumption that the counters
> on all IOMMUs will be programmed the same way (i.e with the same events).
> 
> Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpa...@amd.com>
> ---
>  arch/x86/kernel/cpu/perf_event_amd_iommu.c | 125 
> +++++++++++++++++++++--------
>  drivers/iommu/amd_iommu_init.c             |  87 +++++++++++++++++---
>  include/linux/perf/perf_event_amd_iommu.h  |   8 +-
>  3 files changed, 174 insertions(+), 46 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/perf_event_amd_iommu.c 
> b/arch/x86/kernel/cpu/perf_event_amd_iommu.c
> index 791bbcf..ce6ba3f 100644
> --- a/arch/x86/kernel/cpu/perf_event_amd_iommu.c
> +++ b/arch/x86/kernel/cpu/perf_event_amd_iommu.c
> @@ -42,6 +42,12 @@ struct perf_amd_iommu {
>       u64 cntr_assign_mask;
>       raw_spinlock_t lock;
>       const struct attribute_group *attr_groups[4];
> +
> +     /* This is a 3D array used to store the previous count values
> +      * from each performance counter of each bank of each IOMMU.
> +      * I.E. size of array = (num iommus * num banks * num counters)
> +      */
> +     local64_t *prev_cnts;
>  };
>  
>  #define format_group attr_groups[0]
> @@ -121,6 +127,11 @@ static struct amd_iommu_event_desc 
> amd_iommu_v2_event_descs[] = {
>       { /* end: all zeroes */ },
>  };
>  
> +/* This is an array used to temporary hold the current values
> + * read from a particular perf counter from each IOMMU.
> + */
> +static u64 *perf_iommu_cnts;
> +
>  /*---------------------------------------------
>   * sysfs cpumask attributes
>   *---------------------------------------------*/
> @@ -256,44 +267,46 @@ static void perf_iommu_enable_event(struct perf_event 
> *ev)
>       u64 reg = 0ULL;
>  
>       reg = csource;
> -     amd_iommu_pc_get_set_reg_val(devid,
> +     amd_iommu_pc_set_reg_val(devid,
>                       _GET_BANK(ev), _GET_CNTR(ev) ,
> -                      IOMMU_PC_COUNTER_SRC_REG, &reg, true);
> +                      IOMMU_PC_COUNTER_SRC_REG, &reg);

Read-impairing indentation. It should be aligned at the opening brace:

        function_name(param1, param2,
                      param3, ...

Ditto for the calls below.

>  
>       reg = 0ULL | devid | (_GET_DEVID_MASK(ev) << 32);

0ULL?

Is this supposed to clear the old value from the previous read above?

What's wrong with

        reg = devid | (_GET_DEVID_MASK(ev) << 32);

?

Same for the rest.

>       if (reg)
>               reg |= (1UL << 31);

All those can be turned into

                reg |= BIT(31);

> -     amd_iommu_pc_get_set_reg_val(devid,
> +     amd_iommu_pc_set_reg_val(devid,
>                       _GET_BANK(ev), _GET_CNTR(ev) ,
> -                      IOMMU_PC_DEVID_MATCH_REG, &reg, true);
> +                      IOMMU_PC_DEVID_MATCH_REG, &reg);
>  
>       reg = 0ULL | _GET_PASID(ev) | (_GET_PASID_MASK(ev) << 32);
>       if (reg)
>               reg |= (1UL << 31);
> -     amd_iommu_pc_get_set_reg_val(devid,
> +     amd_iommu_pc_set_reg_val(devid,
>                       _GET_BANK(ev), _GET_CNTR(ev) ,
> -                      IOMMU_PC_PASID_MATCH_REG, &reg, true);
> +                      IOMMU_PC_PASID_MATCH_REG, &reg);
>  
>       reg = 0ULL | _GET_DOMID(ev) | (_GET_DOMID_MASK(ev) << 32);
>       if (reg)
>               reg |= (1UL << 31);
> -     amd_iommu_pc_get_set_reg_val(devid,
> +     amd_iommu_pc_set_reg_val(devid,
>                       _GET_BANK(ev), _GET_CNTR(ev) ,
> -                      IOMMU_PC_DOMID_MATCH_REG, &reg, true);
> +                      IOMMU_PC_DOMID_MATCH_REG, &reg);
>  }
>  
>  static void perf_iommu_disable_event(struct perf_event *event)
>  {
>       u64 reg = 0ULL;
>  
> -     amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
> +     amd_iommu_pc_set_reg_val(_GET_DEVID(event),
>                       _GET_BANK(event), _GET_CNTR(event),
> -                     IOMMU_PC_COUNTER_SRC_REG, &reg, true);
> +                     IOMMU_PC_COUNTER_SRC_REG, &reg);
>  }
>  
>  static void perf_iommu_start(struct perf_event *event, int flags)
>  {
>       struct hw_perf_event *hwc = &event->hw;
> +     struct perf_amd_iommu *perf_iommu =
> +                     container_of(event->pmu, struct perf_amd_iommu, pmu);

There's no need to make it less readable and still fit within the 80
cols rule. Feel free to relax it a little.

>       pr_debug("perf: amd_iommu:perf_iommu_start\n");
>       if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
> @@ -303,10 +316,19 @@ static void perf_iommu_start(struct perf_event *event, 
> int flags)
>       hwc->state = 0;
>  
>       if (flags & PERF_EF_RELOAD) {
> -             u64 prev_raw_count =  local64_read(&hwc->prev_count);
> -             amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
> -                             _GET_BANK(event), _GET_CNTR(event),
> -                             IOMMU_PC_COUNTER_REG, &prev_raw_count, true);
> +             int i;
> +
> +             for (i = 0; i < amd_iommu_get_num_iommus(); i++) {
> +                     int index = get_iommu_bnk_cnt_evt_idx(perf_iommu, i,
> +                                       _GET_BANK(event), _GET_CNTR(event));

No need for the line break.

> +
> +                     perf_iommu_cnts[i] = local64_read(
> +                                             &perf_iommu->prev_cnts[index]);

Yuck. What's wrong with:

                        perf_iommu_cnts[i] = 
local64_read(&perf_iommu->prev_cnts[index]);

?

It looks much better to me.

> +             }
> +
> +             amd_iommu_pc_set_cnt_vals(_GET_BANK(event), _GET_CNTR(event),
> +                                       amd_iommu_get_num_iommus(),
> +                                       perf_iommu_cnts);

This one looks good.

>       }
>  
>       perf_iommu_enable_event(event);
> @@ -316,29 +338,47 @@ static void perf_iommu_start(struct perf_event *event, 
> int flags)
>  
>  static void perf_iommu_read(struct perf_event *event)
>  {
> -     u64 count = 0ULL;
> -     u64 prev_raw_count = 0ULL;
> +     int i;
>       u64 delta = 0ULL;
>       struct hw_perf_event *hwc = &event->hw;
> -     pr_debug("perf: amd_iommu:perf_iommu_read\n");
> -
> -     amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
> -                             _GET_BANK(event), _GET_CNTR(event),
> -                             IOMMU_PC_COUNTER_REG, &count, false);
> +     struct perf_amd_iommu *perf_iommu =
> +                     container_of(event->pmu, struct perf_amd_iommu, pmu);

Indentation.

>  
> -     /* IOMMU pc counter register is only 48 bits */
> -     count &= 0xFFFFFFFFFFFFULL;
> +     pr_debug("perf: amd_iommu:perf_iommu_read\n");

What is that debug statement good for? Can it go?

>From looking at that file, it has a bunch more which are completely
useless and can be deleted. In a separate patch.

> -     prev_raw_count =  local64_read(&hwc->prev_count);
> -     if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
> -                                     count) != prev_raw_count)
> +     if (amd_iommu_pc_get_cnt_vals(_GET_BANK(event), _GET_CNTR(event),
> +                                   amd_iommu_get_num_iommus(),
> +                                   perf_iommu_cnts))
>               return;
>  
> -     /* Handling 48-bit counter overflowing */
> -     delta = (count << COUNTER_SHIFT) - (prev_raw_count << COUNTER_SHIFT);
> -     delta >>= COUNTER_SHIFT;
> -     local64_add(delta, &event->count);
> -
> +     /* Now we re-aggregating event counts and prev-counts
> +      * from all IOMMUs
> +      */

Kernel comment style:

        /*
         * Start of first sentence...
         * Second sentence...
         * ...
         */

> +     local64_set(&hwc->prev_count, 0);
> +
> +     for (i = 0; i < amd_iommu_get_num_iommus(); i++) {
> +             int indx = get_iommu_bnk_cnt_evt_idx(perf_iommu, i,
> +                                       _GET_BANK(event), _GET_CNTR(event));

Indentation.

> +             u64 prev_raw_count = local64_read(&perf_iommu->prev_cnts[indx]);
> +
> +             /* IOMMU pc counter register is only 48 bits */
> +             perf_iommu_cnts[i] &= 0xFFFFFFFFFFFFULL;

                                   &= GENMASK_ULL(48, 0)

> +
> +             /*
> +              * Since we do not enable counter overflow interrupts,
> +              * we do not have to worry about prev_count changing on us
> +              */
> +             local64_set(&perf_iommu->prev_cnts[indx],
> +                             perf_iommu_cnts[i]);

No need to break it.

> +
> +             local64_add(prev_raw_count, &hwc->prev_count);
> +
> +             /* Handling 48-bit counter overflowing */

                /* Handle 48-bit counter overflow: */

reads better.

> +             delta = (perf_iommu_cnts[i] << COUNTER_SHIFT) -
> +                             (prev_raw_count << COUNTER_SHIFT);

No need for the linebreak.

> +             delta >>= COUNTER_SHIFT;
> +             local64_add(delta, &event->count);
> +     }
>  }
>  
>  static void perf_iommu_stop(struct perf_event *event, int flags)
> @@ -428,10 +468,14 @@ static __init int _init_events_attrs(struct 
> perf_amd_iommu *perf_iommu)
>  
>  static __init void amd_iommu_pc_exit(void)
>  {
> -     if (__perf_iommu.events_group != NULL) {
> -             kfree(__perf_iommu.events_group);
> -             __perf_iommu.events_group = NULL;
> -     }
> +     kfree(__perf_iommu.events_group);
> +     __perf_iommu.events_group = NULL;
> +
> +     kfree(__perf_iommu.prev_cnts);
> +     __perf_iommu.prev_cnts = NULL;
> +
> +     kfree(perf_iommu_cnts);
> +     perf_iommu_cnts = NULL;
>  }
>  
>  static __init int _init_perf_amd_iommu(
> @@ -461,6 +505,17 @@ static __init int _init_perf_amd_iommu(
>       perf_iommu->null_group = NULL;
>       perf_iommu->pmu.attr_groups = perf_iommu->attr_groups;
>  
> +     perf_iommu->prev_cnts = kzalloc(sizeof(*perf_iommu->prev_cnts) *
> +             (amd_iommu_get_num_iommus() * perf_iommu->max_banks *
> +             perf_iommu->max_counters), GFP_KERNEL);

Indentation.

        perf_iommu->prev_cnts = kzalloc(sizeof(*perf_iommu->prev_cnts) *
                                        amd_iommu_get_num_iommus() *
                                        perf_iommu->max_banks *
                                        perf_iommu->max_counters), GFP_KERNEL);

looks more readable to me.

> +     if (!perf_iommu->prev_cnts)
> +             return -ENOMEM;
> +
> +     perf_iommu_cnts = kzalloc(sizeof(*perf_iommu_cnts) *
> +                               amd_iommu_get_num_iommus(), GFP_KERNEL);

No need for the linebreak.

> +     if (!perf_iommu_cnts)
> +             return -ENOMEM;
> +
>       ret = perf_pmu_register(&perf_iommu->pmu, name, -1);
>       if (ret) {
>               pr_err("perf: amd_iommu: Failed to initialized.\n");

You can define pr_fmt to "perf/amd_iommu: " at the beginning of this
file and then you don't need to supply that prefix each time you call
pr_*

And that sentence above it wrong. It should be:

        pr_err("Error initializing AMD IOMMU perf counters.\n");

or something like that.

> diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
> index 531b2e1..7b1b561 100644
> --- a/drivers/iommu/amd_iommu_init.c
> +++ b/drivers/iommu/amd_iommu_init.c
> @@ -1133,6 +1133,9 @@ static int __init init_iommu_all(struct 
> acpi_table_header *table)
>       return 0;
>  }
>  
> +static int _amd_iommu_pc_get_set_reg_val(struct amd_iommu *iommu,
> +                                      u8 bank, u8 cntr, u8 fxn,
> +                                      u64 *value, bool is_write);

static int _amd_iommu_this_func_name_is_def_too_long

>  
>  static void init_iommu_perf_ctr(struct amd_iommu *iommu)
>  {
> @@ -1144,8 +1147,8 @@ static void init_iommu_perf_ctr(struct amd_iommu *iommu)
>       amd_iommu_pc_present = true;
>  
>       /* Check if the performance counters can be written to */
> -     if ((0 != amd_iommu_pc_get_set_reg_val(0, 0, 0, 0, &val, true)) ||
> -         (0 != amd_iommu_pc_get_set_reg_val(0, 0, 0, 0, &val2, false)) ||
> +     if ((_amd_iommu_pc_get_set_reg_val(iommu, 0, 0, 0, &val, true)) ||
> +         (_amd_iommu_pc_get_set_reg_val(iommu, 0, 0, 0, &val2, false)) ||
>           (val != val2)) {
>               pr_err("AMD-Vi: Unable to write to IOMMU perf counter.\n");
>               amd_iommu_pc_present = false;
> @@ -2294,10 +2297,10 @@ u8 amd_iommu_pc_get_max_counters(void)
>  }
>  EXPORT_SYMBOL(amd_iommu_pc_get_max_counters);
>  
> -int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn,
> -                                 u64 *value, bool is_write)
> +static int _amd_iommu_pc_get_set_reg_val(struct amd_iommu *iommu,
> +                                      u8 bank, u8 cntr, u8 fxn,
> +                                      u64 *value, bool is_write)

Ditto.

>  {
> -     struct amd_iommu *iommu;
>       u32 offset;
>       u32 max_offset_lim;
>  
> @@ -2305,9 +2308,6 @@ int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 
> cntr, u8 fxn,
>       if (!amd_iommu_pc_present)
>               return -ENODEV;
>  
> -     /* Locate the iommu associated with the device ID */
> -     iommu = amd_iommu_rlookup_table[devid];
> -
>       /* Check for valid iommu and pc register indexing */
>       if (WARN_ON((iommu == NULL) || (fxn > 0x28) || (fxn & 7)))
>               return -ENODEV;
> @@ -2332,4 +2332,73 @@ int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, 
> u8 cntr, u8 fxn,
>  
>       return 0;
>  }
> -EXPORT_SYMBOL(amd_iommu_pc_get_set_reg_val);
> +
> +int amd_iommu_pc_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn, u64 *value)
> +{
> +     struct amd_iommu *iommu;
> +
> +     for_each_iommu(iommu) {
> +             int ret = _amd_iommu_pc_get_set_reg_val(iommu, bank, cntr,
> +                                                     fxn, value, true);
> +             if (ret)
> +                     return ret;
> +     }
> +
> +     return 0;
> +}
> +EXPORT_SYMBOL(amd_iommu_pc_set_reg_val);

Why isn't this EXPORT_SYMBOL_GPL?

> +
> +int amd_iommu_pc_set_cnt_vals(u8 bank, u8 cntr, int num, u64 *value)

That whole naming is hard to read. What's wrong with

amd_iommu_set_counters()

?

> +{
> +     struct amd_iommu *iommu;
> +     int i = 0;
> +
> +     if (num > amd_iommus_present)
> +             return -EINVAL;
> +
> +     for_each_iommu(iommu) {
> +             int ret = _amd_iommu_pc_get_set_reg_val(iommu, bank, cntr,
> +                                                     IOMMU_PC_COUNTER_REG,
> +                                                     &value[i], true);
> +             if (ret)
> +                     return ret;
> +             if (i++ == amd_iommus_present)
> +                     break;
> +     }
> +
> +     return 0;
> +}
> +EXPORT_SYMBOL(amd_iommu_pc_set_cnt_vals);

Ditto, why not EXPORT_SYMBOL_GPL ?

> +
> +int amd_iommu_pc_get_cnt_vals(u8 bank, u8 cntr, int num, u64 *value)
> +{
> +     struct amd_iommu *iommu;
> +     int i = 0, ret;
> +
> +     if (!num)
> +             return -EINVAL;
> +
> +     /*
> +      * Here, we read the specified counters on all IOMMU,

Plural is IOMMUs ?

> +      * which should have been programmed the same way.
> +      * and aggregate the counter values.

That comment is a sentence with a fullstop in the middle.

> +      */
> +     for_each_iommu(iommu) {
> +             u64 tmp;
> +
> +             if (i >= num)
> +                     return -EINVAL;
> +
> +             ret = _amd_iommu_pc_get_set_reg_val(iommu, bank, cntr,
> +                                                     IOMMU_PC_COUNTER_REG,
> +                                                     &tmp, false);
> +             if (ret)
> +                     return ret;

So if one of those intermediary calls of _amd_iommu_pc_get_set_reg_val()
fails, we return here not having read all counters. Unwind and handle
that error properly maybe?

Same issue above.

> +
> +             /* IOMMU pc counter register is only 48 bits */
> +             value[i] = tmp & 0xFFFFFFFFFFFFULL;
> +     }
> +
> +     return 0;
> +}
> +EXPORT_SYMBOL(amd_iommu_pc_get_cnt_vals);
> diff --git a/include/linux/perf/perf_event_amd_iommu.h 
> b/include/linux/perf/perf_event_amd_iommu.h
> index cb820c2..be1a17d 100644
> --- a/include/linux/perf/perf_event_amd_iommu.h
> +++ b/include/linux/perf/perf_event_amd_iommu.h
> @@ -33,7 +33,11 @@ extern u8 amd_iommu_pc_get_max_banks(void);
>  
>  extern u8 amd_iommu_pc_get_max_counters(void);
>  
> -extern int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr,
> -                     u8 fxn, u64 *value, bool is_write);
> +extern int amd_iommu_pc_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn,
> +                                 u64 *value);
> +
> +extern int amd_iommu_pc_set_cnt_vals(u8 bank, u8 cntr, int num, u64 *value);
> +
> +extern int amd_iommu_pc_get_cnt_vals(u8 bank, u8 cntr, int num, u64 *value);

No need for that "extern"

>  #endif /*_PERF_EVENT_AMD_IOMMU_H_*/
> -- 
> 2.5.0
> 
> 

-- 
Regards/Gruss,
    Boris.

ECO tip #101: Trim your mails when you reply.

Reply via email to