[PATCH 4/4] KVM: x86: Hyper-V tsc page setup
Lately tsc page was implemented but filled with empty values. This patch setup tsc page scale and offset based on vcpu tsc, tsc_khz and HV_X64_MSR_TIME_REF_COUNT value. The valid tsc page drops HV_X64_MSR_TIME_REF_COUNT msr reads count to zero which potentially improves performance. Signed-off-by: Andrey Smetanin Reviewed-by: Peter Hornyack CC: Paolo Bonzini CC: Radim Krčmář CC: Roman Kagan CC: Denis V. Lunev [Computation of TSC page parameters rewritten to use the Linux timekeeper parameters. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 + arch/x86/kvm/hyperv.c | 162 arch/x86/kvm/hyperv.h | 3 + arch/x86/kvm/x86.c | 8 +- 4 files changed, 155 insertions(+), 20 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 32a43a25d415..4b20f7304b9c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -702,6 +702,8 @@ struct kvm_hv { /* Hyper-v based guest crash (NT kernel bugcheck) parameters */ u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS]; u64 hv_crash_ctl; + + HV_REFERENCE_TSC_PAGE tsc_ref; }; struct kvm_arch { diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index ed5b77f39ffb..555951625350 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -386,7 +386,21 @@ static void synic_init(struct kvm_vcpu_hv_synic *synic) static u64 get_time_ref_counter(struct kvm *kvm) { - return div_u64(get_kvmclock_ns(kvm), 100); + struct kvm_hv *hv = &kvm->arch.hyperv; + struct kvm_vcpu *vcpu; + u64 tsc; + + /* +* The guest has not set up the TSC page or the clock isn't +* stable, fall back to get_kvmclock_ns. +*/ + if (!hv->tsc_ref.tsc_sequence) + return div_u64(get_kvmclock_ns(kvm), 100); + + vcpu = kvm_get_vcpu(kvm, 0); + tsc = kvm_read_l1_tsc(vcpu, rdtsc()); + return mul_u64_u64_shr(tsc, hv->tsc_ref.tsc_scale, 64) + + hv->tsc_ref.tsc_offset; } static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer, @@ -756,6 +774,129 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu, return 0; } +/* + * The kvmclock and Hyper-V TSC page use similar formulas, and converting + * between them is possible: + * + * kvmclock formula: + *nsec = (ticks - tsc_timestamp) * tsc_to_system_mul * 2^(tsc_shift-32) + * + system_time + * + * Hyper-V formula: + *nsec/100 = ticks * scale / 2^64 + offset + * + * When tsc_timestamp = system_time = 0, offset is zero in the Hyper-V formula. + * By dividing the kvmclock formula by 100 and equating what's left we get: + *ticks * scale / 2^64 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100 + *scale / 2^64 = tsc_to_system_mul * 2^(tsc_shift-32) / 100 + *scale= tsc_to_system_mul * 2^(32+tsc_shift) / 100 + * + * Now expand the kvmclock formula and divide by 100: + *nsec = ticks * tsc_to_system_mul * 2^(tsc_shift-32) + * - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) + * + system_time + *nsec/100 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100 + * - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) / 100 + * + system_time / 100 + * + * Replace tsc_to_system_mul * 2^(tsc_shift-32) / 100 by scale / 2^64: + *nsec/100 = ticks * scale / 2^64 + * - tsc_timestamp * scale / 2^64 + * + system_time / 100 + * + * Equate with the Hyper-V formula so that ticks * scale / 2^64 cancels out: + *offset = system_time / 100 - tsc_timestamp * scale / 2^64 + * + * These two equivalencies are implemented in this function. + */ +static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info *hv_clock, + HV_REFERENCE_TSC_PAGE *tsc_ref) +{ + u64 max_mul; + + if (!(hv_clock->flags & PVCLOCK_TSC_STABLE_BIT)) + return false; + + /* +* check if scale would overflow, if so we use the time ref counter +*tsc_to_system_mul * 2^(tsc_shift+32) / 100 >= 2^64 +*tsc_to_system_mul / 100 >= 2^(32-tsc_shift) +*tsc_to_system_mul >= 100 * 2^(32-tsc_shift) +*/ + max_mul = 100ull << (32 - hv_clock->tsc_shift); + if (hv_clock->tsc_to_system_mul >= max_mul) + return false; + + /* +* Otherwise compute the scale and offset according to the formulas +* derived above. +*/ + tsc_ref->tsc_scale = + mul_u64_u32_div(1ULL << (32 + hv_clock->tsc_shift), + hv_clock->tsc_to_system_mul, + 100); + + tsc_ref->tsc_offset = hv_clock->system_time; + do_div(tsc_ref->tsc_offset, 100); + tsc_ref->tsc_offset -= + mul_u
Re: Cannot load linux after recent efi-related changes
On Mon, 19 Sep, at 02:36:36PM, Mike Krinkin wrote: > On Mon, Sep 19, 2016 at 12:14:24PM +0100, Matt Fleming wrote: > > On Sun, 18 Sep, at 04:14:45AM, Mike Krinkin wrote: > > > > > > diff --git a/drivers/firmware/efi/memmap.c b/drivers/firmware/efi/memmap.c > > > index cd96086..34322d1 100644 > > > --- a/drivers/firmware/efi/memmap.c > > > +++ b/drivers/firmware/efi/memmap.c > > > @@ -221,8 +221,8 @@ void __init efi_memmap_insert(struct efi_memory_map > > > *old_memmap, void *buf, > > > void *old, *new; > > > > > > /* modifying range */ > > > - m_start = mem->range.start; > > > - m_end = mem->range.end; > > > + m_start = mem->range.start & ~(u64)EFI_PAGE_SIZE; > > > + m_end = ALIGN(mem->range.end, EFI_PAGE_SIZE) - 1; > > > m_attr = mem->attribute; > > > > > > for (old = old_memmap->map, new = buf; > > > > Thanks for the analysis and patch Mike, but this needs fixing further > > up the call stack so that we don't map things the caller didn't > > expect. > > > > This bug was also reported in this thread, > > > > https://lkml.kernel.org/r/1474005912.3930.10.ca...@gmail.com > > Sorry, i haven't seen that. No problem. > > > > Could you try this patch? > > Works fine for me. Thanks, I've added your Tested-by.
[PATCH v4 0/3] usb: chipidea: imx: Add USB configuration for imx53
Changes in V2: - Patches sent to early with bad contents Changes in V3: - Change subject - Split "configure imx for ULPI phy" for disable-oc code Changes in V4: - Fix "Change switch order" commit message - Indent switch/case (set case on the same column as switch) - Remove useless test in "Change switch order" Fabien Lahoudere (3): usb: chipidea: imx: Change switch order usb: chipidea: imx: configure imx for ULPI phy usb: chipidea: imx: Add binding to disable USB 60Mhz clock drivers/usb/chipidea/ci_hdrc_imx.c | 7 +++ drivers/usb/chipidea/ci_hdrc_imx.h | 2 + drivers/usb/chipidea/usbmisc_imx.c | 88 -- 3 files changed, 83 insertions(+), 14 deletions(-) -- 2.1.4
Re: [PATCH][v10] PM / hibernate: Verify the consistent of e820 memory map by md5 digest
On Fri, Sep 9, 2016 at 2:21 PM, Chen Yu wrote: > On some platforms, there is occasional panic triggered when trying to > resume from hibernation, a typical panic looks like: [cut] > @@ -211,10 +292,15 @@ int arch_hibernation_header_save(void *addr, unsigned > int max_size) > */ > int arch_hibernation_header_restore(void *addr) > { > + bool e820_mismatch = false; The extra local variable can be avoided if you structure the code slightly differently. > struct restore_data_record *rdr = addr; > > restore_jump_address = rdr->jump_address; > jump_address_phys = rdr->jump_address_phys; > restore_cr3 = rdr->cr3; > - return (rdr->magic == RESTORE_MAGIC) ? 0 : -EINVAL; > + > + e820_mismatch = hibernation_e820_mismatch(rdr->e820_digest); Also calling hibernation_e820_mismatch() before checking rdr->magic may not be useful at all. > + > + return (rdr->magic == RESTORE_MAGIC) ? > + (e820_mismatch ? -ENODEV : 0) : -EINVAL; So what about: if (rdr->magic != RESTORE_MAGIC) return -EINVAL; if (hibernation_e820_mismatch(rdr->e820_digest)) return -ENODEV; return 0; > } > -- Thanks, Rafael
[PATCH v4 3/3] usb: chipidea: imx: Add binding to disable USB 60Mhz clock
This binding allow to disable the internal 60Mhz clock for USB host2 and host3. Signed-off-by: Fabien Lahoudere --- drivers/usb/chipidea/ci_hdrc_imx.c | 2 ++ drivers/usb/chipidea/ci_hdrc_imx.h | 1 + drivers/usb/chipidea/usbmisc_imx.c | 13 + 3 files changed, 16 insertions(+) diff --git a/drivers/usb/chipidea/ci_hdrc_imx.c b/drivers/usb/chipidea/ci_hdrc_imx.c index 96c0e33..89a9d98 100644 --- a/drivers/usb/chipidea/ci_hdrc_imx.c +++ b/drivers/usb/chipidea/ci_hdrc_imx.c @@ -147,6 +147,8 @@ static struct imx_usbmisc_data *usbmisc_get_init_data(struct device *dev) if (of_find_property(np, "external-vbus-divider", NULL)) data->evdo = 1; + if (of_find_property(np, "disable-int60ck", NULL)) + data->disable_int60ck = 1; if (of_usb_get_phy_mode(np) == USBPHY_INTERFACE_MODE_ULPI) data->ulpi = 1; diff --git a/drivers/usb/chipidea/ci_hdrc_imx.h b/drivers/usb/chipidea/ci_hdrc_imx.h index d666c9f..43bafae 100644 --- a/drivers/usb/chipidea/ci_hdrc_imx.h +++ b/drivers/usb/chipidea/ci_hdrc_imx.h @@ -20,6 +20,7 @@ struct imx_usbmisc_data { unsigned int oc_polarity:1; /* over current polarity if oc enabled */ unsigned int evdo:1; /* set external vbus divider option */ unsigned int ulpi:1; /* connected to an ULPI phy */ + unsigned int disable_int60ck:1; /* disable 60 MHZ clock */ }; int imx_usbmisc_init(struct imx_usbmisc_data *); diff --git a/drivers/usb/chipidea/usbmisc_imx.c b/drivers/usb/chipidea/usbmisc_imx.c index 11f51bd..a781f87 100644 --- a/drivers/usb/chipidea/usbmisc_imx.c +++ b/drivers/usb/chipidea/usbmisc_imx.c @@ -53,6 +53,9 @@ #define MX53_USB_CTRL_1_H3_XCVR_CLK_SEL_ULPI BIT(6) #define MX53_USB_UH2_CTRL_OFFSET 0x14 #define MX53_USB_UH3_CTRL_OFFSET 0x18 +#define MX53_USB_CLKONOFF_CTRL_OFFSET 0x24 +#define MX53_USB_CLKONOFF_CTRL_H2_INT60CKOFF BIT(21) +#define MX53_USB_CLKONOFF_CTRL_H3_INT60CKOFF BIT(22) #define MX53_BM_OVER_CUR_DIS_H1BIT(5) #define MX53_BM_OVER_CUR_DIS_OTG BIT(8) #define MX53_BM_OVER_CUR_DIS_UHx BIT(30) @@ -240,6 +243,11 @@ static int usbmisc_imx53_init(struct imx_usbmisc_data *data) | MX53_USB_UHx_CTRL_ULPI_INT_EN; writel(val, reg); } + if (data->disable_int60ck) { + reg = usbmisc->base + MX53_USB_CLKONOFF_CTRL_OFFSET; + val = readl(reg) | MX53_USB_CLKONOFF_CTRL_H2_INT60CKOFF; + writel(val, reg); + } if (data->disable_oc) { reg = usbmisc->base + MX53_USB_UH2_CTRL_OFFSET; val = readl(reg) | MX53_BM_OVER_CUR_DIS_UHx; @@ -261,6 +269,11 @@ static int usbmisc_imx53_init(struct imx_usbmisc_data *data) | MX53_USB_UHx_CTRL_ULPI_INT_EN; writel(val, reg); } + if (data->disable_int60ck) { + reg = usbmisc->base + MX53_USB_CLKONOFF_CTRL_OFFSET; + val = readl(reg) | MX53_USB_CLKONOFF_CTRL_H3_INT60CKOFF; + writel(val, reg); + } if (data->disable_oc) { reg = usbmisc->base + MX53_USB_UH3_CTRL_OFFSET; val = readl(reg) | MX53_BM_OVER_CUR_DIS_UHx; -- 2.1.4
[PATCH v4 2/3] usb: chipidea: imx: configure imx for ULPI phy
In order to use ULPI phy with usb host 2 and 3, we need to configure controller register to enable ULPI features. Signed-off-by: Fabien Lahoudere --- drivers/usb/chipidea/ci_hdrc_imx.c | 5 + drivers/usb/chipidea/ci_hdrc_imx.h | 1 + drivers/usb/chipidea/usbmisc_imx.c | 37 + 3 files changed, 43 insertions(+) diff --git a/drivers/usb/chipidea/ci_hdrc_imx.c b/drivers/usb/chipidea/ci_hdrc_imx.c index 0991794..96c0e33 100644 --- a/drivers/usb/chipidea/ci_hdrc_imx.c +++ b/drivers/usb/chipidea/ci_hdrc_imx.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include "ci.h" @@ -146,6 +147,10 @@ static struct imx_usbmisc_data *usbmisc_get_init_data(struct device *dev) if (of_find_property(np, "external-vbus-divider", NULL)) data->evdo = 1; + + if (of_usb_get_phy_mode(np) == USBPHY_INTERFACE_MODE_ULPI) + data->ulpi = 1; + return data; } diff --git a/drivers/usb/chipidea/ci_hdrc_imx.h b/drivers/usb/chipidea/ci_hdrc_imx.h index 409aa5ca8..d666c9f 100644 --- a/drivers/usb/chipidea/ci_hdrc_imx.h +++ b/drivers/usb/chipidea/ci_hdrc_imx.h @@ -19,6 +19,7 @@ struct imx_usbmisc_data { unsigned int disable_oc:1; /* over current detect disabled */ unsigned int oc_polarity:1; /* over current polarity if oc enabled */ unsigned int evdo:1; /* set external vbus divider option */ + unsigned int ulpi:1; /* connected to an ULPI phy */ }; int imx_usbmisc_init(struct imx_usbmisc_data *); diff --git a/drivers/usb/chipidea/usbmisc_imx.c b/drivers/usb/chipidea/usbmisc_imx.c index 9549821..11f51bd 100644 --- a/drivers/usb/chipidea/usbmisc_imx.c +++ b/drivers/usb/chipidea/usbmisc_imx.c @@ -46,11 +46,20 @@ #define MX53_USB_OTG_PHY_CTRL_0_OFFSET 0x08 #define MX53_USB_OTG_PHY_CTRL_1_OFFSET 0x0c +#define MX53_USB_CTRL_1_OFFSET 0x10 +#define MX53_USB_CTRL_1_H2_XCVR_CLK_SEL_MASK (0x11 << 2) +#define MX53_USB_CTRL_1_H2_XCVR_CLK_SEL_ULPI BIT(2) +#define MX53_USB_CTRL_1_H3_XCVR_CLK_SEL_MASK (0x11 << 6) +#define MX53_USB_CTRL_1_H3_XCVR_CLK_SEL_ULPI BIT(6) #define MX53_USB_UH2_CTRL_OFFSET 0x14 #define MX53_USB_UH3_CTRL_OFFSET 0x18 #define MX53_BM_OVER_CUR_DIS_H1BIT(5) #define MX53_BM_OVER_CUR_DIS_OTG BIT(8) #define MX53_BM_OVER_CUR_DIS_UHx BIT(30) +#define MX53_USB_CTRL_1_UH2_ULPI_ENBIT(26) +#define MX53_USB_CTRL_1_UH3_ULPI_ENBIT(27) +#define MX53_USB_UHx_CTRL_WAKE_UP_EN BIT(7) +#define MX53_USB_UHx_CTRL_ULPI_INT_EN BIT(8) #define MX53_USB_PHYCTRL1_PLLDIV_MASK 0x3 #define MX53_USB_PLL_DIV_24_MHZ0x01 @@ -217,6 +226,20 @@ static int usbmisc_imx53_init(struct imx_usbmisc_data *data) } break; case 2: + if (data->ulpi) { + /* set USBH2 into ULPI-mode. */ + reg = usbmisc->base + MX53_USB_CTRL_1_OFFSET; + val = readl(reg) | MX53_USB_CTRL_1_UH2_ULPI_EN; + /* select ULPI clock */ + val &= ~MX53_USB_CTRL_1_H2_XCVR_CLK_SEL_MASK; + val |= MX53_USB_CTRL_1_H2_XCVR_CLK_SEL_ULPI; + writel(val, reg); + /* Set interrupt wake up enable */ + reg = usbmisc->base + MX53_USB_UH2_CTRL_OFFSET; + val = readl(reg) | MX53_USB_UHx_CTRL_WAKE_UP_EN + | MX53_USB_UHx_CTRL_ULPI_INT_EN; + writel(val, reg); + } if (data->disable_oc) { reg = usbmisc->base + MX53_USB_UH2_CTRL_OFFSET; val = readl(reg) | MX53_BM_OVER_CUR_DIS_UHx; @@ -224,6 +247,20 @@ static int usbmisc_imx53_init(struct imx_usbmisc_data *data) } break; case 3: + if (data->ulpi) { + /* set USBH3 into ULPI-mode. */ + reg = usbmisc->base + MX53_USB_CTRL_1_OFFSET; + val = readl(reg) | MX53_USB_CTRL_1_UH3_ULPI_EN; + /* select ULPI clock */ + val &= ~MX53_USB_CTRL_1_H3_XCVR_CLK_SEL_MASK; + val |= MX53_USB_CTRL_1_H3_XCVR_CLK_SEL_ULPI; + writel(val, reg); + /* Set interrupt wake up enable */ + reg = usbmisc->base + MX53_USB_UH3_CTRL_OFFSET; + val = readl(reg) | MX53_USB_UHx_CTRL_WAKE_UP_EN + | MX53_USB_UHx_CTRL_ULPI_INT_EN; + writel(val, reg); + } if (data->disable_oc) { reg = usbmisc->base + MX53_USB_UH3_CTRL_OFFSET; val = readl(reg) | MX53_BM_OVER_CUR_DIS_UHx; -- 2.1.4
[PATCH v4 1/3] usb: chipidea: imx: Change switch order
Each USB controller have different behaviour, so in order to avoid to have several "swicth(data->index)" and lock/unlock, we prefer to get the index switch and then test for features if they exist for this index. This patch also remove useless test of reg and val. Those two values cannot be NULL. Signed-off-by: Fabien Lahoudere --- drivers/usb/chipidea/usbmisc_imx.c | 38 -- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/drivers/usb/chipidea/usbmisc_imx.c b/drivers/usb/chipidea/usbmisc_imx.c index 20d02a5..9549821 100644 --- a/drivers/usb/chipidea/usbmisc_imx.c +++ b/drivers/usb/chipidea/usbmisc_imx.c @@ -199,31 +199,41 @@ static int usbmisc_imx53_init(struct imx_usbmisc_data *data) val |= MX53_USB_PLL_DIV_24_MHZ; writel(val, usbmisc->base + MX53_USB_OTG_PHY_CTRL_1_OFFSET); - if (data->disable_oc) { - spin_lock_irqsave(&usbmisc->lock, flags); - switch (data->index) { - case 0: + spin_lock_irqsave(&usbmisc->lock, flags); + + switch (data->index) { + case 0: + if (data->disable_oc) { reg = usbmisc->base + MX53_USB_OTG_PHY_CTRL_0_OFFSET; val = readl(reg) | MX53_BM_OVER_CUR_DIS_OTG; - break; - case 1: + writel(val, reg); + } + break; + case 1: + if (data->disable_oc) { reg = usbmisc->base + MX53_USB_OTG_PHY_CTRL_0_OFFSET; val = readl(reg) | MX53_BM_OVER_CUR_DIS_H1; - break; - case 2: + writel(val, reg); + } + break; + case 2: + if (data->disable_oc) { reg = usbmisc->base + MX53_USB_UH2_CTRL_OFFSET; val = readl(reg) | MX53_BM_OVER_CUR_DIS_UHx; - break; - case 3: + writel(val, reg); + } + break; + case 3: + if (data->disable_oc) { reg = usbmisc->base + MX53_USB_UH3_CTRL_OFFSET; val = readl(reg) | MX53_BM_OVER_CUR_DIS_UHx; - break; - } - if (reg && val) writel(val, reg); - spin_unlock_irqrestore(&usbmisc->lock, flags); + } + break; } + spin_unlock_irqrestore(&usbmisc->lock, flags); + return 0; } -- 2.1.4
Re: [PATCH] mm/mempolicy.c: forbid static or relative flags for local NUMA mode
On Sun 18-09-16 13:29:43, Piotr Kwapulinski wrote: > The MPOL_F_STATIC_NODES and MPOL_F_RELATIVE_NODES flags are irrelevant > when setting them for MPOL_LOCAL NUMA memory policy via set_mempolicy. > Return the "invalid argument" from set_mempolicy whenever > any of these flags is passed along with MPOL_LOCAL. man 2 set_mempolicy doesn't list this as invalid option. Maybe this is a documentation bug but is it possible that somebody will see this as an unexpected error? > It is consistent with MPOL_PREFERRED passed with empty nodemask. > It also slightly shortens the execution time in paths where these flags > are used e.g. when trying to rebind the NUMA nodes for changes in > cgroups cpuset mems (mpol_rebind_preferred()) or when just printing > the mempolicy structure (/proc/PID/numa_maps). I am not sure I understand this argument. What does this patch actually fix? If this is about the execution time then why not just bail out early when MPOL_LOCAL && (MPOL_F_STATIC_NODES || MPOL_F_RELATIVE_NODES) > Isolated tests done. > > Signed-off-by: Piotr Kwapulinski > --- > mm/mempolicy.c | 4 +++- > 1 file changed, 3 insertions(+), 1 deletion(-) > > diff --git a/mm/mempolicy.c b/mm/mempolicy.c > index 2da72a5..27b07d1 100644 > --- a/mm/mempolicy.c > +++ b/mm/mempolicy.c > @@ -276,7 +276,9 @@ static struct mempolicy *mpol_new(unsigned short mode, > unsigned short flags, > return ERR_PTR(-EINVAL); > } > } else if (mode == MPOL_LOCAL) { > - if (!nodes_empty(*nodes)) > + if (!nodes_empty(*nodes) || > + (flags & MPOL_F_STATIC_NODES) || > + (flags & MPOL_F_RELATIVE_NODES)) > return ERR_PTR(-EINVAL); > mode = MPOL_PREFERRED; > } else if (nodes_empty(*nodes)) > -- > 2.9.2 -- Michal Hocko SUSE Labs
[PATCH v3 2/2] usb: dwc3: Wait for control tranfer completed when stopping gadget
When we change the USB function with configfs dynamically, we possibly met this situation: one core is doing the control transfer, another core is trying to unregister the USB gadget from userspace, we must wait for completing this control tranfer, or it will hang the controller to set the DEVCTRLHLT flag. Signed-off-by: Baolin Wang --- drivers/usb/dwc3/core.h |2 ++ drivers/usb/dwc3/ep0.c|2 ++ drivers/usb/dwc3/gadget.c | 23 +++ 3 files changed, 27 insertions(+) diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index b2317e7..01a6fbd 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -745,6 +745,7 @@ struct dwc3_scratchpad_array { * @ep0_usb_req: dummy req used while handling STD USB requests * @ep0_bounce_addr: dma address of ep0_bounce * @scratch_addr: dma address of scratchbuf + * @ep0_in_setup: One control tranfer is completed and enter setup phase * @lock: for synchronizing * @dev: pointer to our struct device * @xhci: pointer to our xHCI child @@ -843,6 +844,7 @@ struct dwc3 { dma_addr_t ep0_bounce_addr; dma_addr_t scratch_addr; struct dwc3_request ep0_usb_req; + struct completion ep0_in_setup; /* device lock */ spinlock_t lock; diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c index fe79d77..06c167a 100644 --- a/drivers/usb/dwc3/ep0.c +++ b/drivers/usb/dwc3/ep0.c @@ -311,6 +311,8 @@ void dwc3_ep0_out_start(struct dwc3 *dwc) ret = dwc3_ep0_start_trans(dwc, 0, dwc->ctrl_req_addr, 8, DWC3_TRBCTL_CONTROL_SETUP, false); WARN_ON(ret < 0); + + complete(&dwc->ep0_in_setup); } static struct dwc3_ep *dwc3_wIndex_to_dep(struct dwc3 *dwc, __le16 wIndex_le) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index ca2ae5b..3a30d51 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1437,6 +1437,15 @@ static int dwc3_gadget_run_stop(struct dwc3 *dwc, int is_on, int suspend) if (pm_runtime_suspended(dwc->dev)) return 0; + /* +* Per databook, when we want to stop the gadget, if a control transfer +* is still in process, complete it and get the core into setup phase. +*/ + if (!is_on && dwc->ep0state != EP0_SETUP_PHASE) { + reinit_completion(&dwc->ep0_in_setup); + return -EBUSY; + } + reg = dwc3_readl(dwc->regs, DWC3_DCTL); if (is_on) { if (dwc->revision <= DWC3_REVISION_187A) { @@ -1487,10 +1496,22 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on) is_on = !!is_on; +try_again: spin_lock_irqsave(&dwc->lock, flags); ret = dwc3_gadget_run_stop(dwc, is_on, false); spin_unlock_irqrestore(&dwc->lock, flags); + if (ret == -EBUSY) { + ret = wait_for_completion_timeout(&dwc->ep0_in_setup, + msecs_to_jiffies(500)); + if (ret == 0) { + dev_err(dwc->dev, "timeout to stop gadget.\n"); + return -ETIMEDOUT; + } else { + goto try_again; + } + } + return ret; } @@ -2914,6 +2935,8 @@ int dwc3_gadget_init(struct dwc3 *dwc) goto err4; } + init_completion(&dwc->ep0_in_setup); + dwc->gadget.ops = &dwc3_gadget_ops; dwc->gadget.speed = USB_SPEED_UNKNOWN; dwc->gadget.sg_supported= true; -- 1.7.9.5
[PATCH v3 1/2] usb: dwc3: gadget: Add disconnect checking when changing function dynamically
When system has stpped the gadget, we should avoid queuing any requests which will cause tranfer failed. Thus adding some disconnect checking to avoid this situation. Signed-off-by: Baolin Wang --- Changes since v2: - Move disconnect checking into dwc3_send_gadget_ep_cmd(). - Rename completion name and issue complete() at one place. - Move completion initialization into dwc3_gadget_init(). Changes since v1: - Split into 2 separate ptaches. - Choose complete mechanism instead of polling. --- drivers/usb/dwc3/gadget.c |3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 1783406..ca2ae5b 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -241,6 +241,9 @@ int dwc3_send_gadget_ep_cmd(struct dwc3_ep *dep, unsigned cmd, int susphy = false; int ret = -EINVAL; + if (!dwc->pullups_connected) + return -ESHUTDOWN; + /* * Synopsys Databook 2.60a states, on section 6.3.2.5.[1-8], that if * we're issuing an endpoint command, we must check if -- 1.7.9.5
[PATCH] MFD: do not assign already already assigned compatible of_nodes
If 2 similar cells have the same of_compatible (2 instances of the same functionality), they both are assigned the first found of_node with this compatible. In the below example, the pdev of both cells get the child@0 of_node. parent@0 { /* MFD devices with 2 cells reg = <0>; child@0 { reg = <0>; compatible = "child-driver"; }; child@1 { reg = <1>; compatible = "child-driver"; }; }; To avoid this, the found of_nodes are checked to see if they are already assigned in the children of the parent dev and are only assigned if still "available" (not assigned to a child). This allows the 2nd cell's pdev to get the child@1 of_node. Signed-off-by: Valentin Longchamp --- drivers/mfd/mfd-core.c | 19 +-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c index 3ac486a..11c5ded 100644 --- a/drivers/mfd/mfd-core.c +++ b/drivers/mfd/mfd-core.c @@ -137,6 +137,19 @@ static inline void mfd_acpi_add_device(const struct mfd_cell *cell, } #endif +static inline int is_my_of_node(struct device *dev, void *data) +{ + struct device_node *np = data; + + return dev->of_node == np; +} + +static inline int of_node_already_in_children(struct device *dev, + struct device_node *node) +{ + return device_for_each_child(dev, node, is_my_of_node); +} + static int mfd_add_device(struct device *parent, int id, const struct mfd_cell *cell, atomic_t *usage_count, struct resource *mem_base, @@ -178,8 +191,10 @@ static int mfd_add_device(struct device *parent, int id, if (parent->of_node && cell->of_compatible) { for_each_child_of_node(parent->of_node, np) { if (of_device_is_compatible(np, cell->of_compatible)) { - pdev->dev.of_node = np; - break; + if (!of_node_already_in_children(parent, np)) { + pdev->dev.of_node = np; + break; + } } } } -- 1.8.3.1
Re: [PATCH v2 2/3] powerpc/mm: allow memory hotplug into a memoryless node
On 15/09/16 06:06, Reza Arbab wrote: > Remove the check which prevents us from hotplugging into an empty node. > > This limitation has been questioned before [1], and judging by the > response, there doesn't seem to be a reason we can't remove it. No issues > have been found in light testing. > > [1] > http://lkml.kernel.org/r/cagzkibrmksa1yyhbf5hwgxubcjse5smksmy4tpanerme2ug...@mail.gmail.com > > http://lkml.kernel.org/r/20160511215051.gf22...@arbab-laptop.austin.ibm.com > > Signed-off-by: Reza Arbab > Acked-by: Balbir Singh > Cc: Nathan Fontenot > Cc: Bharata B Rao > --- > arch/powerpc/mm/numa.c | 13 + > 1 file changed, 1 insertion(+), 12 deletions(-) > I presume you've tested with CONFIG_NODES_SHIFT of 8 (255 nodes?) Balbir Singh.
"CodingStyle: Clarify and complete chapter 7" in docs-next (was Re: [PATCH 03/47] block-rbd: Adjust the position of a jump label in rbd_header_from_disk())
On Mon, Sep 19, 2016 at 11:37 AM, Jean Delvare wrote: > Hi Ilya, > > Sorry for the late answer. > > On Tue, 13 Sep 2016 20:31:57 +0200, Ilya Dryomov wrote: >> Sorry, navigating lkml.org archive is a pain, and I was expecting to >> see patch. Your points >> >> "The acceptance of an optional single space before labels dates back to >> at least June 2007, as supported by the very first incarnation of >> checkpatch.pl. So nothing really new here, except for a preference >> (my preference, admittedly, but I'm know I'm not alone) being expressed >> in the coding style document." >> >> "Recommendations are not meant to document what people are currently >> doing but what we think they should be doing." >> >> are valid, but note that there is a world of difference between an >> acceptance and a preference. The *only* point of whitespace guidelines >> is to keep the code base consistent. > > Consistency is half of the reason, the other half is readability. This > is why the CodingStyle document has a number of rationales explained. > This is also why we put whitespace in the first place, while the C > language doesn't require any ;-) > > The sense of my proposal was to address a readability (or usability) > issue. > >> You don't go changing whitespace >> preferences in such a huge project, not unless you have a *very* good >> rationale and existing code base is swayed (which it isn't, given the >> 9/10 ratio). > > I did consider the reason to be good enough to warrant a "change", > actually. Or more exactly from "one space is allowed" to "one space is > recommended." Which is quite different from changing all the code > actively. I can understand how you don't like it, but again, this > "inconsistency" has been accepted for almost a decade now, so I find it > strange to see so much resistance when someone finally tries to sort it > out. Yeah, I guess that's where our disagreement lies - the "so that `diff -p` does not confuse labels with functions" in the age of git, hg and others, all of which can be customized to your heart's content is not a good enough reason to go from "allowed" to "advised". > >> >> If I wanted to clarify the >> >> situation, I'd have gone with "one space indented labels are also >> >> acceptable" or so. The example you've re-indented dates back to 2.6.4 >> >> times... >> > >> > I can't see how this is relevant. >> >> That was a 12 year old example, codifying an existing style used in >> ~90% cases, serving as a guideline for new contributors. > > OK, I get your point now. But the CodingStyle document isn't carved > into stone. I see 43 changes to that file in recent history (since > April 2005), some of which are actual changes or clarifications of our > coding style. This very section of the document was updated in December > 2014, so not so long ago. > > In the end I suppose it boils down to how problematic you consider the > current situation to be. Apparently you and several other maintainers > think it's just fine, while me (and a few others apparently) think it > is not. > >> >> git diff also works on regular files, BTW. >> > >> > I have no idea what you mean here, sorry. >> >> Oh, just that it works outside of git repos too, so you aren't stuck >> with diffutils if you want to diff two random .c files. > > Oh, I had never thought of that. Thanks for the hint :-) > >> > (...) >> > http://marc.info/?l=linux-kernel&m=147325166209844&w=2 >> > >> > It uses the git diff xfuncname feature you mentioned above. To be >> > honest I'm surprised it isn't the git default, it seems odd to have so >> > many diff drivers included in git and not enable them on obvious file >> > extensions. Oh well. >> >> This came up before: http://www.spinics.net/lists/git/msg164216.html, >> Linus didn't like it. I suggest you add him to the CC on this patch to >> see if he changed his mind. > > Thanks for the pointer. It is interesting to see many people had been > bothered by the same problem for many years and even proposed solution > for it. But also sad to see that nothing happened :-( > > Well Linus suggested to improve the default, he was not opposed to the > change per se I think. But it was 5 years ago and nothing happened > since then, so I'd rather go with what is available today. Which means > either one space before labels, or drivers in .gitattributes. Choose > your poison ;-) Jon, ping? My points upthread aside, both CodingStyle and .gitattributes patches seem to be queued... Thanks, Ilya
[PATCH] IB/rxe: avoid putting a large struct rxe_qp on stack
A race condition fix added an rxe_qp structure to the stack in order to be able to perform rollback in rxe_requester(), but the structure is large enough to trigger the warning for possible stack overflow: drivers/infiniband/sw/rxe/rxe_req.c: In function 'rxe_requester': drivers/infiniband/sw/rxe/rxe_req.c:757:1: error: the frame size of 2064 bytes is larger than 1024 bytes [-Werror=frame-larger-than=] This changes the rollback function to only save the psn inside the qp, which is the only field we access in the rollback_qp anyway. Fixes: 3050b9985024 ("IB/rxe: Fix race condition between requester and completer") Signed-off-by: Arnd Bergmann --- drivers/infiniband/sw/rxe/rxe_req.c | 14 +++--- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 13a848a518e8..cf1ffac25585 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -548,23 +548,23 @@ static void update_wqe_psn(struct rxe_qp *qp, static void save_state(struct rxe_send_wqe *wqe, struct rxe_qp *qp, struct rxe_send_wqe *rollback_wqe, - struct rxe_qp *rollback_qp) + u32 *rollback_psn) { rollback_wqe->state = wqe->state; rollback_wqe->first_psn = wqe->first_psn; rollback_wqe->last_psn = wqe->last_psn; - rollback_qp->req.psn= qp->req.psn; + *rollback_psn = qp->req.psn; } static void rollback_state(struct rxe_send_wqe *wqe, struct rxe_qp *qp, struct rxe_send_wqe *rollback_wqe, - struct rxe_qp *rollback_qp) + u32 rollback_psn) { wqe->state = rollback_wqe->state; wqe->first_psn = rollback_wqe->first_psn; wqe->last_psn = rollback_wqe->last_psn; - qp->req.psn= rollback_qp->req.psn; + qp->req.psn= rollback_psn; } static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe, @@ -593,8 +593,8 @@ int rxe_requester(void *arg) int mtu; int opcode; int ret; - struct rxe_qp rollback_qp; struct rxe_send_wqe rollback_wqe; + u32 rollback_psn; next_wqe: if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR)) @@ -717,7 +717,7 @@ int rxe_requester(void *arg) * rxe_xmit_packet(). * Otherwise, completer might initiate an unjustified retry flow. */ - save_state(wqe, qp, &rollback_wqe, &rollback_qp); + save_state(wqe, qp, &rollback_wqe, &rollback_psn); update_wqe_state(qp, wqe, &pkt); update_wqe_psn(qp, wqe, &pkt, payload); ret = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, &pkt, skb); @@ -725,7 +725,7 @@ int rxe_requester(void *arg) qp->need_req_skb = 1; kfree_skb(skb); - rollback_state(wqe, qp, &rollback_wqe, &rollback_qp); + rollback_state(wqe, qp, &rollback_wqe, rollback_psn); if (ret == -EAGAIN) { rxe_run_task(&qp->req.task, 1); -- 2.9.0
Re: [PATCH 1/3] mm: memcontrol: make per-cpu charge cache IRQ-safe for socket accounting
[Fixup Vladimir's email] On Wed 14-09-16 15:48:44, Johannes Weiner wrote: > From: Johannes Weiner > > During cgroup2 rollout into production, we started encountering css > refcount underflows and css access crashes in the memory controller. > Splitting the heavily shared css reference counter into logical users > narrowed the imbalance down to the cgroup2 socket memory accounting. > > The problem turns out to be the per-cpu charge cache. Cgroup1 had a > separate socket counter, but the new cgroup2 socket accounting goes > through the common charge path that uses a shared per-cpu cache for > all memory that is being tracked. Those caches are safe against > scheduling preemption, but not against interrupts - such as the newly > added packet receive path. When cache draining is interrupted by > network RX taking pages out of the cache, the resuming drain operation > will put references of in-use pages, thus causing the imbalance. > > Disable IRQs during all per-cpu charge cache operations. > > Fixes: f7e1cb6ec51b ("mm: memcontrol: account socket memory in unified > hierarchy memory controller") > Cc: # 4.5+ > Signed-off-by: Johannes Weiner Acked-by: Michal Hocko > --- > mm/memcontrol.c | 31 ++- > 1 file changed, 22 insertions(+), 9 deletions(-) > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c > index 7a8d6624758a..60bb830abc34 100644 > --- a/mm/memcontrol.c > +++ b/mm/memcontrol.c > @@ -1710,17 +1710,22 @@ static DEFINE_MUTEX(percpu_charge_mutex); > static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages) > { > struct memcg_stock_pcp *stock; > + unsigned long flags; > bool ret = false; > > if (nr_pages > CHARGE_BATCH) > return ret; > > - stock = &get_cpu_var(memcg_stock); > + local_irq_save(flags); > + > + stock = this_cpu_ptr(&memcg_stock); > if (memcg == stock->cached && stock->nr_pages >= nr_pages) { > stock->nr_pages -= nr_pages; > ret = true; > } > - put_cpu_var(memcg_stock); > + > + local_irq_restore(flags); > + > return ret; > } > > @@ -1741,15 +1746,18 @@ static void drain_stock(struct memcg_stock_pcp *stock) > stock->cached = NULL; > } > > -/* > - * This must be called under preempt disabled or must be called by > - * a thread which is pinned to local cpu. > - */ > static void drain_local_stock(struct work_struct *dummy) > { > - struct memcg_stock_pcp *stock = this_cpu_ptr(&memcg_stock); > + struct memcg_stock_pcp *stock; > + unsigned long flags; > + > + local_irq_save(flags); > + > + stock = this_cpu_ptr(&memcg_stock); > drain_stock(stock); > clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags); > + > + local_irq_restore(flags); > } > > /* > @@ -1758,14 +1766,19 @@ static void drain_local_stock(struct work_struct > *dummy) > */ > static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages) > { > - struct memcg_stock_pcp *stock = &get_cpu_var(memcg_stock); > + struct memcg_stock_pcp *stock; > + unsigned long flags; > + > + local_irq_save(flags); > > + stock = this_cpu_ptr(&memcg_stock); > if (stock->cached != memcg) { /* reset if necessary */ > drain_stock(stock); > stock->cached = memcg; > } > stock->nr_pages += nr_pages; > - put_cpu_var(memcg_stock); > + > + local_irq_restore(flags); > } > > /* > -- > 2.9.3 -- Michal Hocko SUSE Labs
Re: [PATCH 2/3] cgroup: duplicate cgroup reference when cloning sockets
[Fixup Vladimir's email] I am not familiar with this code path to give my ack, unfortunatelly. On Wed 14-09-16 15:48:45, Johannes Weiner wrote: > From: Johannes Weiner > > When a socket is cloned, the associated sock_cgroup_data is duplicated > but not its reference on the cgroup. As a result, the cgroup reference > count will underflow when both sockets are destroyed later on. > > Fixes: bd1060a1d671 ("sock, cgroup: add sock->sk_cgroup") > Cc: # 4.5+ > Signed-off-by: Johannes Weiner > --- > kernel/cgroup.c | 6 ++ > net/core/sock.c | 5 - > 2 files changed, 10 insertions(+), 1 deletion(-) > > diff --git a/kernel/cgroup.c b/kernel/cgroup.c > index 0c4db7908264..b0d727d26fc7 100644 > --- a/kernel/cgroup.c > +++ b/kernel/cgroup.c > @@ -6297,6 +6297,12 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd) > if (cgroup_sk_alloc_disabled) > return; > > + /* Socket clone path */ > + if (skcd->val) { > + cgroup_get(sock_cgroup_ptr(skcd)); > + return; > + } > + > rcu_read_lock(); > > while (true) { > diff --git a/net/core/sock.c b/net/core/sock.c > index 51a730485649..038e660ef844 100644 > --- a/net/core/sock.c > +++ b/net/core/sock.c > @@ -1340,7 +1340,6 @@ static struct sock *sk_prot_alloc(struct proto *prot, > gfp_t priority, > if (!try_module_get(prot->owner)) > goto out_free_sec; > sk_tx_queue_clear(sk); > - cgroup_sk_alloc(&sk->sk_cgrp_data); > } > > return sk; > @@ -1400,6 +1399,7 @@ struct sock *sk_alloc(struct net *net, int family, > gfp_t priority, > sock_net_set(sk, net); > atomic_set(&sk->sk_wmem_alloc, 1); > > + cgroup_sk_alloc(&sk->sk_cgrp_data); > sock_update_classid(&sk->sk_cgrp_data); > sock_update_netprioidx(&sk->sk_cgrp_data); > } > @@ -1544,6 +1544,9 @@ struct sock *sk_clone_lock(const struct sock *sk, const > gfp_t priority) > newsk->sk_priority = 0; > newsk->sk_incoming_cpu = raw_smp_processor_id(); > atomic64_set(&newsk->sk_cookie, 0); > + > + cgroup_sk_alloc(&newsk->sk_cgrp_data); > + > /* >* Before updating sk_refcnt, we must commit prior changes to > memory >* (Documentation/RCU/rculist_nulls.txt for details) > -- > 2.9.3 -- Michal Hocko SUSE Labs
Re: [PATCH 3/3] mm: memcontrol: consolidate cgroup socket tracking
[Fixup Vladimir's email] same here I do not feel familiar with the code enough to give my ack but Vladimir might be in a better position On Wed 14-09-16 15:48:46, Johannes Weiner wrote: > The cgroup core and the memory controller need to track socket > ownership for different purposes, but the tracking sites being > entirely different is kind of ugly. > > Be a better citizen and rename the memory controller callbacks to > match the cgroup core callbacks, then move them to the same place. > > Signed-off-by: Johannes Weiner > --- > include/linux/memcontrol.h | 4 ++-- > mm/memcontrol.c| 19 +++ > net/core/sock.c| 6 +++--- > net/ipv4/tcp.c | 2 -- > net/ipv4/tcp_ipv4.c| 3 --- > 5 files changed, 16 insertions(+), 18 deletions(-) > > diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h > index 0710143723bc..ca11b3e6dd65 100644 > --- a/include/linux/memcontrol.h > +++ b/include/linux/memcontrol.h > @@ -773,8 +773,8 @@ static inline void mem_cgroup_wb_stats(struct > bdi_writeback *wb, > #endif /* CONFIG_CGROUP_WRITEBACK */ > > struct sock; > -void sock_update_memcg(struct sock *sk); > -void sock_release_memcg(struct sock *sk); > +void mem_cgroup_sk_alloc(struct sock *sk); > +void mem_cgroup_sk_free(struct sock *sk); > bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int > nr_pages); > void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int > nr_pages); > #ifdef CONFIG_MEMCG > diff --git a/mm/memcontrol.c b/mm/memcontrol.c > index 60bb830abc34..2caf1ee86e78 100644 > --- a/mm/memcontrol.c > +++ b/mm/memcontrol.c > @@ -2939,7 +2939,7 @@ static int memcg_update_tcp_limit(struct mem_cgroup > *memcg, unsigned long limit) > /* >* The active flag needs to be written after the static_key >* update. This is what guarantees that the socket activation > - * function is the last one to run. See sock_update_memcg() for > + * function is the last one to run. See mem_cgroup_sk_alloc() > for >* details, and note that we don't mark any socket as belonging >* to this memcg until that flag is up. >* > @@ -2948,7 +2948,7 @@ static int memcg_update_tcp_limit(struct mem_cgroup > *memcg, unsigned long limit) >* as accounted, but the accounting functions are not patched in >* yet, we'll lose accounting. >* > - * We never race with the readers in sock_update_memcg(), > + * We never race with the readers in mem_cgroup_sk_alloc(), >* because when this value change, the code to process it is not >* patched in yet. >*/ > @@ -5651,11 +5651,15 @@ void mem_cgroup_migrate(struct page *oldpage, struct > page *newpage) > DEFINE_STATIC_KEY_FALSE(memcg_sockets_enabled_key); > EXPORT_SYMBOL(memcg_sockets_enabled_key); > > -void sock_update_memcg(struct sock *sk) > +void mem_cgroup_sk_alloc(struct sock *sk) > { > struct mem_cgroup *memcg; > > - /* Socket cloning can throw us here with sk_cgrp already > + if (!mem_cgroup_sockets_enabled) > + return; > + > + /* > + * Socket cloning can throw us here with sk_memcg already >* filled. It won't however, necessarily happen from >* process context. So the test for root memcg given >* the current task's memcg won't help us in this case. > @@ -5680,12 +5684,11 @@ void sock_update_memcg(struct sock *sk) > out: > rcu_read_unlock(); > } > -EXPORT_SYMBOL(sock_update_memcg); > > -void sock_release_memcg(struct sock *sk) > +void mem_cgroup_sk_free(struct sock *sk) > { > - WARN_ON(!sk->sk_memcg); > - css_put(&sk->sk_memcg->css); > + if (sk->sk_memcg) > + css_put(&sk->sk_memcg->css); > } > > /** > diff --git a/net/core/sock.c b/net/core/sock.c > index 038e660ef844..c73e28fc9c2a 100644 > --- a/net/core/sock.c > +++ b/net/core/sock.c > @@ -1363,6 +1363,7 @@ static void sk_prot_free(struct proto *prot, struct > sock *sk) > slab = prot->slab; > > cgroup_sk_free(&sk->sk_cgrp_data); > + mem_cgroup_sk_free(sk); > security_sk_free(sk); > if (slab != NULL) > kmem_cache_free(slab, sk); > @@ -1399,6 +1400,7 @@ struct sock *sk_alloc(struct net *net, int family, > gfp_t priority, > sock_net_set(sk, net); > atomic_set(&sk->sk_wmem_alloc, 1); > > + mem_cgroup_sk_alloc(sk); > cgroup_sk_alloc(&sk->sk_cgrp_data); > sock_update_classid(&sk->sk_cgrp_data); > sock_update_netprioidx(&sk->sk_cgrp_data); > @@ -1545,6 +1547,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const > gfp_t priority) > newsk->sk_incoming_cpu = raw_smp_processor_id(); > atomic64_set(&newsk->sk_cookie, 0); > > + mem_
Re: Cherryview wake up events
On Mon, Sep 19, 2016 at 01:21:17PM +0200, Johannes Stezenbach wrote: > Hi, > > Mika, I've been reading the thread about pinctrl-cherryview > interrupts, but I have some basic questions in understanding > the hardware and the relationship between ACPI and Linux drivers, > so I decided to start a new thread. > https://lkml.kernel.org/g/20160909085832.gk15...@lahna.fi.intel.com > > I have one Asus E200HA (Atom x5-Z8300) where the power button > doesn't generate any ACPI events (no SCI), instead it causes > a Thermal Event irq: > > TRM: 3 3 3 4 Thermal event interrupts > > [ 51.825488] CPU0: Core temperature above threshold, cpu clock throttled > (total events = 1) > [ 51.826933] CPU1: Core temperature above threshold, cpu clock throttled > (total events = 1) > [ 51.826965] mce: [Hardware Error]: Machine check events logged > [ 51.841180] mce: [Hardware Error]: Machine check events logged > > (These events are logged only sometimes, usually a power button > press only increments the TRM count.) Hmm, that's weird. > I would like to understand how this is possible, when I boot > with apic=debug I can't see anything claiming vector 0xfa. > > The LID causes a gpio irq: > 158: 2 0 0 0 chv-gpio 43 ACPI:Event > > However, neither LID nor power button can wake up the > device from "echo freeze >/sys/power/state". :-( The cherryview pinctrl driver does not (yet) support wake up events. It currently just sets IRQCHIP_SKIP_SET_WAKE for the irqchip. > "grep . /sys/firmware/acpi/interrupts/*" shows only zeros. > > I put the DSDT and some other tables at: > https://linuxtv.org/~js/e200ha/ > > During the last weeks I read what I could about the hardware > and ACPI, and poked at it with acpidbg, devmem, ioport > and in kernel source, but to no avail. > > On Thu, Sep 15, 2016 at 06:52:10PM +0300, Mika Westerberg wrote: > > It turns out that for north and southwest communities, they can only > > generate GPIO interrupts for lower 8 interrupts (IntSel value). The upper > > part (8-15) can only generate GPEs (General Purpose Events). > > I got the Atom Z8000 series datasheet from > http://www.intel.com/content/www/us/en/processors/atom/atom-technical-resources.html > and tried to find the source for this. The closest I > could find is the GPIO_ROUT PMC register? > However, the datasheet doesn't tell about the other > interrupts not covered by GPIO_ROUT, if they are fixed > IRQ or SCI or "no effect". Source for this information is coming from an internal documentation for the SoC. For some reason it is not included in the datasheet. > I also don't get the mapping from intsel irq to IO-APIC pin > number. And also not the mapping between the pin numbers used > on DSDT GpioInt to the pin numbers in pinctrl-cherryview.c. > Could you shed a light on this? Or point out where I can > find information? IntSel field is RO and filled in by the BIOS. The mapping from IntSel and I/O-APIC pins seems also missing from the datasheet but for example for north community, IntSel 0-7 maps to I/O-APIC pins 51-58 (and OR of those is 48). > It seems to imply BIOS sets up IntSel. I'm generally confused > about the responsibility of BIOS vs. drivers making use of the > information from DSDT, e.g. Device (GPO1) has a list of > GpioIo Connections, other devices like PMI2 use GpioInt > from GPO1. My E200HA has the INT33F5 TI PMIC > Controller, which according to Windows driver strings > seems to be the SND9039. > Does it mean I need a PMIC driver that reads the _CRS and > configures the GPIO? The GPIOs under GPIO controller are generally used for either ACPI GPIO events or GPIO Operation Region. Those are used by the AML code to access the GPIO hardware with the help of OS. The INT33F5 PMIC from your DSDT table seems to have one GpioInt() resource which it uses as an interrupt. This is handled already by the I2C core. In order to use that PMIC you need a driver and I'm not sure if there is one for that particular part. > BTW, the datasheet talks about 4 seconds for power button > override, but it takes 10 seconds. Maybe it means the > power button is connected to the TI PMIC, not to the > Cherryview SoC? Or it may be handled by the embedded controller itself. The FADT ACPI table should tell you if it has "fixed power button" or is it using some other mechanism (like control method power button). > Another question is about the virtual GPIO device that exists > in hardware and is used by DSDT. How does that work and > why does pinctrl-cherryview.c exclude it? Because it is "virtual" and does not expose any hardware. The pinctrl-cherryview deals only with the GPIO block found on Braswell/Cherryview. IIRC that virtual GPIO thing was used to fix USB device wakeup or something like that. > Sorry for so many questions, any info is appreciated, > and any suggestion what to try to get the thing to > wake up from freeze. I can make you a test
Re: [PATCH] x86/vdso: Add ARCH_MAP_VDSO_X32 if CONFIG_X86_X32_ABI.
On 09/17/2016 03:51 AM, Vinson Lee wrote: vdso_image_x32 is generated if CONFIG_X86_X32_ABI. This patch fixes this build error on CentOS 6.8 with gcc 4.4. LD init/built-in.o arch/x86/built-in.o: In function `do_arch_prctl': (.text+0x27466): undefined reference to `vdso_image_x32' arch/x86/built-in.o: In function `do_arch_prctl': (.text+0x27477): undefined reference to `vdso_image_x32' Fixes: 2eefd8789698 ("x86/arch_prctl/vdso: Add ARCH_MAP_VDSO_*") Signed-off-by: Vinson Lee Thanks! Reviewed-by: Dmitry Safonov Ifdeffery in arch/x86/include/asm/vdso.h dazzled my eyes. -- Dmitry
[PATCH] nfs: add missing CONFIG_MIGRATION for nfs_migrate_page
We'd better to use CONFIG_MIGRATION to cover nfs_migrate_page, otherwise when CONFIG_MIGRATION is not defined, unused nfs_migrate_page will still be compiled into kernel. Signed-off-by: Chao Yu --- fs/nfs/file.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 7d62097..6cfb83e 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -543,7 +543,9 @@ const struct address_space_operations nfs_file_aops = { .invalidatepage = nfs_invalidate_page, .releasepage = nfs_release_page, .direct_IO = nfs_direct_IO, +#ifdef CONFIG_MIGRATION .migratepage = nfs_migrate_page, +#endif .launder_page = nfs_launder_page, .is_dirty_writeback = nfs_check_dirty_writeback, .error_remove_page = generic_error_remove_page, -- 2.8.2.311.gee88674
Re: [PATCH] dma-buf/sync_file: fix documentation error
Hi Emilio, 2016-09-19 Emilio López : > The ioctl name and description on the documentation block don't > match the ioctl being defined. This was probably overlooked while > renaming the ioctls during the sync file destaging. This patch > provides a more accurate description of what the ioctl actually does. > > Signed-off-by: Emilio López > --- > > This is something I saw while refreshing my kselftest patches. Hopefully > this patch describes the new ioctl well enough, let me know if you > think it doesn't :) > > Cheers, > Emilio > > include/uapi/linux/sync_file.h | 13 + > 1 file changed, 5 insertions(+), 8 deletions(-) > > diff --git a/include/uapi/linux/sync_file.h b/include/uapi/linux/sync_file.h > index 413303d..cdf8ec2 100644 > --- a/include/uapi/linux/sync_file.h > +++ b/include/uapi/linux/sync_file.h > @@ -85,15 +85,12 @@ struct sync_file_info { > #define SYNC_IOC_MERGE _IOWR(SYNC_IOC_MAGIC, 3, struct > sync_merge_data) > > /** > - * DOC: SYNC_IOC_FENCE_INFO - get detailed information on a fence > + * DOC: SYNC_IOC_FILE_INFO - get detailed information on a sync_file > * > - * Takes a struct sync_file_info_data with extra space allocated for pt_info. > - * Caller should write the size of the buffer into len. On return, len is > - * updated to reflect the total size of the sync_file_info_data including > - * pt_info. > - * > - * pt_info is a buffer containing sync_pt_infos for every sync_pt in the > fence. > - * To iterate over the sync_pt_infos, use the sync_pt_info.len field. > + * Takes a struct sync_file_info. If num_fences is 0, the field is updated > + * with the actual number of fences. If num_fences is > 0, the system will > + * use the pointer provided on sync_fence_info to return up to num_fences of > + * struct sync_fence_info, with detailed fence information. > */ Reviewed-by: Gustavo Padovan Gustavo
RE: [PATCH v6 0/9] Replay Protected Memory Block (RPMB) subsystem
\ > Subject: [PATCH v6 0/9] Replay Protected Memory Block (RPMB) subsystem > > > Few storage technologies such is EMMC, UFS, and NVMe support RPMB > hardware partition with common protocol and frame layout. > The RPMB partition cannot be accessed via standard block layer, but by a set > of specific commands: WRITE, READ, GET_WRITE_COUNTER, and > PROGRAM_KEY. > Such a partition provides authenticated and replay protected access, hence > suitable as a secure storage. > > The RPMB layer aims to provide in-kernel API for Trusted Execution > Environment (TEE) devices that are capable to securely compute block frame > signature. In case a TEE device wish to store a replay protected data, it > creates an RPMB frame with requested data and computes HMAC of the > frame, then it requests the storage device via RPMB layer to store the data. > > The layer provides two APIs, for rpmb_req_cmd() for issuing one of RPMB > specific commands and rpmb_seq_cmd() for issuing of raw RPMB protocol > frames, which is close to the functionality provided by emmc multi ioctl > interface. > > A TEE driver can claim the RPMB interface, for example, via > class_interface_register (). > > A storage device registers its RPMB hardware (eMMC) partition or RPMB W- > LUN (UFS) with the RPMB layer providing an implementation for > rpmb_seq_cmd() handler. The interface enables sending sequence of RPMB > standard frames. > > A parallel user space API is provided via /dev/rpmbX character device with > two IOCTL commands. > Simplified one, RPMB_IOC_REQ_CMD, were read result cycles is performed > by the framework on behalf the user and second, RPMB_IOC_SEQ_CMD > where the whole RPMB sequence, including RESULT_READ is supplied by the > caller. > The latter is intended for easier adjusting of the applications that use > MMC_IOC_MULTI_CMD ioctl, such as > https://android.googlesource.com/trusty/app/storage/ > > There is a also sample tool under tools/rpmb/ directory that exercises these > interfaces and a simulation device that implements the device part. > > The code is also available from: > > https://github.com/tomasbw/linux-mei.git rpmb > Greg, can you please check if this series has addressed all your comments. Are there are any more items that preventing it from merging? Thanks Tomas
Re: [PATCH] iommu/vt-d: Fix the size calculation of pasid table
[Cc'ing David] On Mon, Sep 12, 2016 at 10:49:11AM +0800, Xunlei Pang wrote: > According to the vt-d spec, the size of pasid (state) entry is 8B > which equals 3 in power of 2, the number of pasid (state) entries > is (ecap_pss + 1) in power of 2. > > Thus the right size of pasid (state) table in power of 2 should be > ecap_pss(iommu->ecap) plus "1+3=4" other than 7. > > Signed-off-by: Xunlei Pang > --- > drivers/iommu/intel-svm.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c > index 8ebb353..cfa75c2 100644 > --- a/drivers/iommu/intel-svm.c > +++ b/drivers/iommu/intel-svm.c > @@ -39,7 +39,7 @@ int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu) > struct page *pages; > int order; > > - order = ecap_pss(iommu->ecap) + 7 - PAGE_SHIFT; > + order = ecap_pss(iommu->ecap) + 4 - PAGE_SHIFT; > if (order < 0) > order = 0; The patch seems to be correct, but I'll let David comment on it first. Joerg
Re: [PATCH 1/2] ptp_clock: allow for it to be optional
On Sun, 18 Sep 2016 23:51:09 -0400, Nicolas Pitre wrote: > And to make it possible for PTP to be configured out, the select statement > in the Kconfig entry for those ethernet drivers is changed from selecting > PTP_1588_CLOCK to PTP_1588_CLOCK_SELECTED whose purpose is to indicate the > default Kconfig value for the PTP subsystem. With this patch applied, the user is free to set a NIC driver as built in and PTP_1588_CLOCK as a module, right? If so, that would lead to non-functional PTP without any warning due to the use of IS_REACHABLE. That doesn't sound right. Could easily cause hours of headache to someone. Or is this handled somehow? Thanks, Jiri
Re: [PATCH 1/2] ptp_clock: allow for it to be optional
Reviewed-by: Eugenia Emantayev
Re: [PATCH 1/2] HID: input: ignore System Control application usages if not System Controls
On Tue, 13 Sep 2016, Michel Hermier wrote: > I'm the original author of the 2011 bug, and I still own one of the > Microsoft keyboard with the issue. But not one that has a fix, but one > that have the exact same broken report descriptor. When I'll get some > time to test the patch, on success I think it would be safe to remove > one of the 2 fixup available, since they seems to have reused the exact > same descriptor for the whole family. I'd be interested in the result of this test. In any case, to get as much testing coverage as possible, I've now applied this one (1/2) to hid.git#for-4.9. Thanks, -- Jiri Kosina SUSE Labs
Re: [PATCH] arm64/efi: efi_init error handling fix
On 2016/9/13 19:18, Matt Fleming wrote: > On Wed, 07 Sep, at 07:15:48PM, Yisheng Xie wrote: >> >> >> On 2016/9/5 21:57, Matt Fleming wrote: >>> On Fri, 02 Sep, at 11:26:18AM, Will Deacon wrote: On Fri, Sep 02, 2016 at 06:18:39PM +0800, Xie Yisheng wrote: > From: Yisheng Xie > >>> >>> This should be a call to efi_memmap_unmap() because the EFI_MEMMAP >>> flag also needs clearing. >>> >> Hi Matt, >> Thanks for your reply. >> You mean call the function efi_unmap_memmap() like x86? >> >> however, it seems no need to clear EFI_MEMMAP here. For arm*, the flag >> EFI_MEMMAP is set in function reserve_regions(), which is called >> only when uefi_init() return 0: >> >> efi_init() >>-> reserve_regions() >>-> set_bit(EFI_MEMMAP, &efi.flags); > > This patch should be against the 'next' branch here, > > https://git.kernel.org/cgit/linux/kernel/git/mfleming/efi.git/log/?h=next > > My comments were made with that branch in mind. > Hi Matt, Sorry for my misunderstanding, I will try to rebase to next branch. Thanks. Yisheng Xie. > . >
[PATCH] sbitmap: avoid maybe-uninitialized warning
The sbitmap code that has just been turned into a library module returns uninitialized data for sbitmap_weight(), as pointed out by gcc when building with -Wmaybe-uninitialized: lib/sbitmap.c: In function 'sbitmap_weight': lib/sbitmap.c:179:9: error: 'weight' may be used uninitialized in this function [-Werror=maybe-uninitialized] Note that the value is never initialized, we just add data on top, so it is wrong regardless of sb->map_nr. This adds the missing initialization. Fixes: 88459642cba4 ("blk-mq: abstract tag allocation out into sbitmap library") Signed-off-by: Arnd Bergmann --- lib/sbitmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/sbitmap.c b/lib/sbitmap.c index e40808921544..2cecf05c82fd 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -169,7 +169,7 @@ EXPORT_SYMBOL_GPL(sbitmap_any_bit_clear); unsigned int sbitmap_weight(const struct sbitmap *sb) { - unsigned int i, weight; + unsigned int i, weight = 0; for (i = 0; i < sb->map_nr; i++) { const struct sbitmap_word *word = &sb->map[i]; -- 2.9.0
Re: [PATCH] HID: alps: fix stick device not working after resume
On Mon, 19 Sep 2016, Kai-Heng Feng wrote: > The stick device does not work after resume, add U1_SP_ABS_MODE flag can > make the device work after resume. Do you happen to have any more details on why it doesn't work without U1_SP_ABS_MODE? Or was this a pure guesswork? > > Signed-off-by: Kai-Heng Feng > --- > drivers/hid/hid-alps.c | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/drivers/hid/hid-alps.c b/drivers/hid/hid-alps.c > index 048befd..390f8d3 100644 > --- a/drivers/hid/hid-alps.c > +++ b/drivers/hid/hid-alps.c > @@ -244,13 +244,13 @@ static int alps_raw_event(struct hid_device *hdev, > static int alps_post_reset(struct hid_device *hdev) > { > return u1_read_write_register(hdev, ADDRESS_U1_DEV_CTRL_1, > - NULL, U1_TP_ABS_MODE, false); > + NULL, U1_TP_ABS_MODE | U1_SP_ABS_MODE, false); > } > > static int alps_post_resume(struct hid_device *hdev) > { > return u1_read_write_register(hdev, ADDRESS_U1_DEV_CTRL_1, > - NULL, U1_TP_ABS_MODE, false); > + NULL, U1_TP_ABS_MODE | U1_SP_ABS_MODE, false); > } > #endif /* CONFIG_PM */ -- Jiri Kosina SUSE Labs
[PATCH] ovl: avoid setting uninitialized creds
If the call to ovl_copy_up() fails, we now call revert_creds on an uninitialized structure after a recent patch, as found by "gcc -Wmayby-uninitialized": fs/overlayfs/inode.c: In function 'ovl_open_maybe_copy_up': fs/overlayfs/inode.c:39:2: error: 'old_cred' may be used uninitialized in this function [-Werror=maybe-uninitialized] fs/overlayfs/inode.c:22:21: note: 'old_cred' was declared here This changes the code back to not call revert_creds unless we have already called ovl_override_creds(). Fixes: 54249cd03956 ("ovl: during copy up, switch to mounter's creds early") Signed-off-by: Arnd Bergmann --- fs/overlayfs/inode.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index af87c7c109b7..65375f9c5563 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -30,13 +30,14 @@ static int ovl_copy_up_truncate(struct dentry *dentry) old_cred = ovl_override_creds(dentry->d_sb); err = vfs_getattr(&lowerpath, &stat); if (err) - goto out_dput_parent; + goto out_revert; stat.size = 0; err = ovl_copy_up_one(parent, dentry, &lowerpath, &stat); -out_dput_parent: +out_revert: revert_creds(old_cred); +out_dput_parent: dput(parent); return err; } -- 2.9.0
Re: [PATCH] random: Fix kernel panic due to system_wq use before init
On Sun, 18 Sep, at 11:09:08PM, Waiman Long wrote: > On 09/14/2016 03:19 PM, Linus Torvalds wrote: > >On Wed, Sep 14, 2016 at 12:14 PM, Waiman Long wrote: > >>In the stack backtrace above, the kernel hadn't even reached SMP boot after > >>about 50s. That was extremely slow. I tried the 4.7.3 kernel and it booted > >>up fine. So I suspect that there may be too many interrupts going on and it > >>consumes most of the CPU cycles. The prime suspect is the random driver, I > >>think. > >Any chance of bisecting it at least partially? The random driver > >doesn't do interrupts itself, it just gets called by other drivers > >doing intterrupts. So if there are too many of them, that would be > >something else.. > > > >Linus > > I have finally finished bisecting the problem. I was wrong in saying that > the 4.7.3 kernel had no problem. It did have. There were some slight > differences between the 4.8 and 4.7 kernel config files that I used. After > some further testing, it was found that the bootup problem only happened > when the following kernel config option was defined: > > CONFIG_EFI_MIXED=y Could you try this patch? It won't be the final version, because it doesn't address the root cause of the crash, which looks like page table corruption of some kind, but it should at least confirm that this is the buggy code, --- diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 677e29e29473..8dd3784eb075 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -245,7 +245,7 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) * text and allocate a new stack because we can't rely on the * stack pointer being < 4GB. */ - if (!IS_ENABLED(CONFIG_EFI_MIXED)) + if (!IS_ENABLED(CONFIG_EFI_MIXED) || efi_is_native()) return 0; /*
[PATCH] [media] platform: pxa_camera: add VIDEO_V4L2 dependency
Moving the pxa_camera driver from soc_camera lots the implied VIDEO_V4L2 Kconfig dependency, and building the driver without V4L2 results in a kernel that cannot link: drivers/media/platform/pxa_camera.o: In function `pxa_camera_remove': pxa_camera.c:(.text.pxa_camera_remove+0x10): undefined reference to `v4l2_clk_unregister' pxa_camera.c:(.text.pxa_camera_remove+0x18): undefined reference to `v4l2_device_unregister' drivers/media/platform/pxa_camera.o: In function `pxa_camera_probe': pxa_camera.c:(.text.pxa_camera_probe+0x458): undefined reference to `v4l2_of_parse_endpoint' drivers/media/v4l2-core/videobuf2-core.o: In function `__enqueue_in_driver': drivers/media/v4l2-core/videobuf2-core.o: In function `vb2_core_streamon': videobuf2-core.c:(.text.vb2_core_streamon+0x1b4): undefined reference to `v4l_vb2q_enable_media_source' drivers/media/v4l2-core/videobuf2-v4l2.o: In function `vb2_ioctl_reqbufs': videobuf2-v4l2.c:(.text.vb2_ioctl_reqbufs+0xc): undefined reference to `video_devdata' This adds back an explicit dependency. Fixes: 3050b9985024 ("[media] media: platform: pxa_camera: move pxa_camera out of soc_camera") Signed-off-by: Arnd Bergmann --- drivers/media/platform/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/Kconfig b/drivers/media/platform/Kconfig index ce4a96fccc43..5ff803efdc03 100644 --- a/drivers/media/platform/Kconfig +++ b/drivers/media/platform/Kconfig @@ -93,7 +93,7 @@ config VIDEO_OMAP3_DEBUG config VIDEO_PXA27x tristate "PXA27x Quick Capture Interface driver" - depends on VIDEO_DEV && HAS_DMA + depends on VIDEO_DEV && VIDEO_V4L2 && HAS_DMA depends on PXA27x || COMPILE_TEST select VIDEOBUF2_DMA_SG select SG_SPLIT -- 2.9.0
Re: [PATCH 1/1 v6] ARM: imx: Added perf functionality to mmdc driver
Hi, This is generally looking good now. There are just a few issues remaining which I've noted below. On Wed, Sep 14, 2016 at 09:48:20AM -0500, Frank Li wrote: > From: Zhengyu Shen > +static bool mmdc_pmu_group_is_valid(struct perf_event *event) As a general note, you prefix other functions with mmdc_ rather than mmdc_pmu. For consistency, it would be better for all the perf-specific functions to be called mmdc_pmu_*. Likewise for attr_groups. > +{ > + struct pmu *pmu = event->pmu; > + struct perf_event *leader = event->group_leader; > + struct perf_event *sibling; > + > + int cfg = leader->attr.config; > + int counter_mask = 0; > + > + if (cfg < 0 || cfg >= MMDC_NUM_COUNTERS) > + return false; For any event managed by this PMU, this is checked in event_init, so this check is unnecessary. > + > + if (leader->pmu == pmu) > + counter_mask |= 1 << cfg; > + else if (!is_software_event(leader)) > + return false; > + > + list_for_each_entry(sibling, &leader->sibling_list, group_entry) { > + if (sibling->pmu == pmu) { > + cfg = sibling->attr.config; > + if (cfg < 0 || cfg >= MMDC_NUM_COUNTERS) > + return false; Likewise. > + counter_mask |= 1 << cfg; > + } else if (!is_software_event(sibling)) { > + return false; > + } > + } > + > + if (event == leader) > + return true; > + > + cfg = event->attr.config; > + if (cfg < 0 || cfg >= MMDC_NUM_COUNTERS) > + return false; > + > + return !(counter_mask & (1 << cfg)); > +} I think this would be far clearer if written something like the below: bool mmdc_pmu_group_event_is_valid(struct perf_event *event, struct pmu *pmu, unsigned long *used_counters) { int counter = event->attr.config; if (is_software_event(event)) return true; if (event->pmu != pmu) return false; return !test_and_set_bit(counter, &used_counters); } /* * Each event has a single fixed-purpose counter, so we can only have a * single active event for each at any point in time. Here we just check * for duplicates, and rely on mmdc_pmu_event_init to verify that the HW * event numbers are valid. */ bool mmdc_pmu_group_is_valid(struct perf_event *event) { struct pmu *pmu = event->pmu; struct perf_event *leader = event->group_leader; struct perf_event *sibling; unigned long used_counters = 0; set_bit(counter(event), &used_counters); if (event != leader) { if (!mmdc_pmu_event_is_valid(event, pmu, &counter_mask)) return false; } list_for_each_entry(sibling, &leader->sibling_list, group_entry) { if (!mmdc_pmu_group_event_is_valid(event, pmu, &used_counters)) return false; } return true; } > +static int mmdc_event_add(struct perf_event *event, int flags) > +{ > + struct mmdc_pmu *pmu_mmdc = to_mmdc_pmu(event->pmu); > + struct hw_perf_event *hwc = &event->hw; > + > + int cfg = event->attr.config; > + > + if (WARN_ONCE((cfg < 0 || cfg >= MMDC_NUM_COUNTERS), > + "invalid configuration %d for mmdc", cfg)) > + return -1; This should never happen, as you checked this at event_init time. If you must check this here, please use a real error code mnemonic rather than -1 (which happens to be -EPERM). > + > + if (flags & PERF_EF_START) > + mmdc_event_start(event, flags); > + > + pmu_mmdc->mmdc_events[cfg] = event; > + pmu_mmdc->active_events++; > + > + local64_set(&hwc->prev_count, mmdc_read_counter(pmu_mmdc, cfg)); > + > + return 0; > +} You must verify at pmu::add() time that the counter isn't in use already, as you can have separate events (i.e. that aren't in the same group) trying to reuse the same counter. If you don't check that, then stopping/starting/resetting the counter will not work as expected, and you will get erroneous results. I think that here all you need to do is to check that check pmu_mmdc->mmdc_events[cfg] is NULL before you assign to it. If it has a non-NULL value, return -EAGAIN. > + > +static void mmdc_event_stop(struct perf_event *event, int flags) > +{ > + struct mmdc_pmu *pmu_mmdc = to_mmdc_pmu(event->pmu); > + void __iomem *mmdc_base, *reg; > + int cfg = (int)event->attr.config; > + > + mmdc_base = pmu_mmdc->mmdc_base; > + reg = mmdc_base + MMDC_MADPCR0; > + > + if (WARN_ONCE((cfg < 0 || cfg >= MMDC_NUM_COUNTERS), > + "invalid configuration %d for mmdc counter", > cfg)) > + return; You checked this at event_init time. Is it really necessary to check again? > +static void mmdc
[PATCH 0/2 v2] x86/tsc: Update tsc crystal_khz whitelist
native_calibrate_tsc() contains a whitelist of models that have a known TSC frequency that differs from the CPU frequency. This patchset switches the code from using magic numbers to using the defines in asm/intel-family.h and adds new processors to the whitelist. v2: peterz, fixed typo in patch 0002. There was some question if the value was supposed to be 25000 or 24000. Experimenting on an 0x55 processor shows that 25000 is correct. Signed-off-by: Prarit Bhargava Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: x...@kernel.org Cc: Rafael Aquini Cc: "Peter Zijlstra (Intel)" Cc: Andy Lutomirski Cc: Len Brown Cc: l...@kernel.org Prarit Bhargava (2): x86,tsc: Use cpu id defines from intel-family.h x86/tsc: Add additional Intel CPU models to crystal_khz whitelist arch/x86/kernel/tsc.c | 14 +++--- 1 file changed, 11 insertions(+), 3 deletions(-) -- 1.7.9.3
[PATCH 1/2 v2] x86,tsc: Use cpu id defines from intel-family.h
asm/intel-family.h contains defines for cpu ids which should be used in the native_calibrate_tsc() function. Signed-off-by: Prarit Bhargava Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: x...@kernel.org Cc: Rafael Aquini Cc: "Peter Zijlstra (Intel)" Cc: Andy Lutomirski Cc: Len Brown Cc: l...@kernel.org --- arch/x86/kernel/tsc.c |7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 78b9cb5a26af..2344758ba8a3 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -23,6 +23,7 @@ #include #include #include +#include unsigned int __read_mostly cpu_khz;/* TSC clocks / usec, not used here */ EXPORT_SYMBOL(cpu_khz); @@ -686,11 +687,11 @@ unsigned long native_calibrate_tsc(void) if (crystal_khz == 0) { switch (boot_cpu_data.x86_model) { - case 0x4E: /* SKL */ - case 0x5E: /* SKL */ + case INTEL_FAM6_SKYLAKE_MOBILE: + case INTEL_FAM6_SKYLAKE_DESKTOP: crystal_khz = 24000;/* 24.0 MHz */ break; - case 0x5C: /* BXT */ + case INTEL_FAM6_ATOM_GOLDMONT: crystal_khz = 19200;/* 19.2 MHz */ break; } -- 1.7.9.3
[PATCH 2/2 v2] x86/tsc: Add additional Intel CPU models to crystal_khz whitelist
In commit aa297292d708 ("x86/tsc: Enumerate SKL cpu_khz and tsc_khz via CPUID"), the kernel added support for Intel processors which had a different CPU base-frequency and TSC frequency. The turbostat utility has been updated with KBL and SKX processors, and they should also be added to the crystal_khz white list. For example, on INTEL_FAM6_KABYLAKE_MOBILE native_calibrate_tsc() returns 0 MHz for tsc_khz and after this patch native_calibrate_tsc() returns 1608 MHz. v2: peterz, fix typo for SKX should be 25000 Signed-off-by: Prarit Bhargava Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: x...@kernel.org Cc: Rafael Aquini Cc: "Peter Zijlstra (Intel)" Cc: Andy Lutomirski Cc: Len Brown Cc: l...@kernel.org --- arch/x86/kernel/tsc.c |7 +++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 2344758ba8a3..cd0c9ece25de 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -689,11 +689,18 @@ unsigned long native_calibrate_tsc(void) switch (boot_cpu_data.x86_model) { case INTEL_FAM6_SKYLAKE_MOBILE: case INTEL_FAM6_SKYLAKE_DESKTOP: + case INTEL_FAM6_KABYLAKE_MOBILE: + case INTEL_FAM6_KABYLAKE_DESKTOP: crystal_khz = 24000;/* 24.0 MHz */ break; + case INTEL_FAM6_SKYLAKE_X: + crystal_khz = 25000;/* 25.0 MHz */ + break; case INTEL_FAM6_ATOM_GOLDMONT: crystal_khz = 19200;/* 19.2 MHz */ break; + default: + crystal_khz = 0; } } -- 1.7.9.3
[PATCH] drm: include linux/seq_file.h as needed
The addition of the debugfs info created references to seq_puts() and seq_printf(), but relied on the debugfs header to be included implicitly, which apparently doesn't happen all the time, as seen from this randconfig build output: drivers/gpu/drm/drm_dp_helper.c: In function 'drm_dp_downstream_debug': drivers/gpu/drm/drm_dp_helper.c:552:2: error: implicit declaration of function 'seq_printf' [-Werror=implicit-function-declaration] drivers/gpu/drm/drm_dp_helper.c:560:3: error: implicit declaration of function 'seq_puts' [-Werror=implicit-function-declaration] Adding the extra include brings back a clean build. Fixes: 80209e5f2c42 ("drm: Add DP branch device info on debugfs") Signed-off-by: Arnd Bergmann --- drivers/gpu/drm/drm_dp_helper.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c index a07adf0a07db..3e6fe82c6d64 100644 --- a/drivers/gpu/drm/drm_dp_helper.c +++ b/drivers/gpu/drm/drm_dp_helper.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include -- 2.9.0
Re: TRIM/UNMAP/DISCARD via ATA Passthrough
On 2016-09-17 01:14, James Bottomley wrote: On Fri, 2016-09-16 at 13:06 -0400, Austin S. Hemmelgarn wrote: On 2016-09-16 12:21, James Bottomley wrote: On Fri, 2016-09-16 at 11:53 -0400, Austin S. Hemmelgarn wrote: On 2016-09-16 07:16, Hannes Reinecke wrote: On 09/15/2016 10:52 PM, Jason A. Donenfeld wrote: Hi Martin, On Thu, Sep 15, 2016 at 6:07 PM, Martin K. Petersen But how do they signal that ATA passthrough is possible? Is there an ATA Information VPD page? Is REPORT SUPPORTED OPERATION CODES supported? We need really solid discovery data before we can entertain enabling something like this. `sg_opcodes` said invalid request, so I think there isn't REPORT SUPPORTED OPERATION CODES, and `sg_vpd -p ai` came up illegal too. However, sg_sat_identify worked reliably, which means a solid way of probing this would be to send IDENTIFY DEVICE ATA via SG_ATA_16 or SG_ATA_12. Let me know and I can give you access to the hardware if you're curious. Sadly, that's not sufficient. linux is not the only provider of an SATL (mpt3sas being the most prominent other one). And while they might support ATA_12/ATA_16, there is no indication that you can pass DSM TRIM that way. So it's better to not support it at all than to support it on hardware we can reliably identify? I get that having feature parity is a good thing, but the discussion isn't about providing support for all SATL devices, it's specifically about UAS connected SATL devices. Last I checked, mpt3sas doesn't do anything with UAS, which means it's kind of irrelevant WRT supporting this for UAS devices. We're getting a bit off topic on mptsas and it's eccentric SATL. The point is, you're asking for UAS devices which each have an internal SATL which you say potentially doesn't support discard. The three problems we have are 1. How do we identify if the UAS SATL doesn't support discard. If it does, we really don't want to cause further SATL related issues by bypassing it, so we need a way of telling this. 2. If the SATL doesn't support discard, will it reliably support the ATA_12 or ATA_16 pass through (and which one) .. we need a way of checking this because there are known SATLs that don't do pass through. 3. How do we actually configure it? Presumably if the SATL doesn't support discard, it also doesn't give us the useful mode page indications we use to configure TRIM, so we're going to have to do some pass through discovery as well. I assume by 'discard' here you're referring to SCSI UNMAP, as anything that supports ATA_12 or ATA_16 pass through correctly will support ATA TRIM/DISCARD on drives that support it. discard is the block layer terminology it's mapped per transport to UNMAP or WRITE SAME on SCSI and TRIM on ATA. I actually didn't know this. I'm not quite as knowledgeable about the block layer as I probably should be, and definitely not as up-to-date as I could be on the ATA and SCSI specs. If that's the case, then: 1. If SCSI UNMAP fails, it doesn't support UNMAP. This is of course non-trivial to verify safely (we pretty much have to assume it is supported if we have no clear indication it isn't, and then switch based on what happens the first time we try to use it). It's not quite that simple: to get us to configure discard in the first place, you have to indicate support in READ CAPACITY (16): the LBPME bit. The chances are your UAS SATL isn't setting this. OK, that makes sense. Given that though, is it known how something like that may react if you tried to issue an UNMAP or WRITE SAME command when it's not supported? 2. Unless there are SATL's out there that write garbage to the device or die when sent an ATA_12 or ATA_16 pass through command Yes, there are; the problems with USB devices that fail to speak standard versions of SCSI are legion. encapsulating an ATA DEVICE IDENTIFY command, this isn't an issue. Even if such SATL's exist, they can easily be blacklisted. 3. This isn't hard, a SATL which actually supports ATA pass through will almost always pass through the mode page unmodified. You mean the ATA Information VPD page? Yes, that's feasible because we already queried the supported VPD pages, so we can tell if this one's there. I kind of got my terminology confused here, and didn't proof-read properly. I'm not sure exactly what I was trying to refer to originally, but what I meant was that pretty much all UAS SATL's I've seen that support ATA pass through either have a proper ATA Information VPD page, or properly pass through ATA DEVICE IDENTIFY and related commands. On the note of UAS SATL's, all of them that I've seen fall into one of four categories: 1. Supports one or both of ATA_12 or ATA_16 pass through, and supports passing through ATA TRIM/DISCARD, but not SCSI UNMAP. 2. Supports one of ATA_12 or ATA_16 pass through, and does not support passing through ATA TRIM/DISCARD or translating SCSI UNMAP. All devices
[patch v2] i2c: add master driver for mellanox systems
From: Vadim Pasternak Device driver for Mellanox I2C controller logic, implemented in Lattice CPLD device. Device supports: - Master mode - One physical bus - Polling mode The Kconfig currently controlling compilation of this code is: drivers/i2c/busses/Kconfig:config I2C_MLXCPLD Signed-off-by: Michael Shych Signed-off-by: Vadim Pasternak Reviewed-by: Jiri Pirko v1->v2 Fixes added by Vadim: - Put new record in Makefile in alphabetic order; - Remove http://www.mellanox.com from MAINTAINERS record; --- Documentation/i2c/busses/i2c-mlxcpld | 47 +++ MAINTAINERS | 8 + drivers/i2c/busses/Kconfig | 12 + drivers/i2c/busses/Makefile | 1 + drivers/i2c/busses/i2c-mlxcpld.c | 597 +++ 5 files changed, 665 insertions(+) create mode 100644 Documentation/i2c/busses/i2c-mlxcpld create mode 100644 drivers/i2c/busses/i2c-mlxcpld.c diff --git a/Documentation/i2c/busses/i2c-mlxcpld b/Documentation/i2c/busses/i2c-mlxcpld new file mode 100644 index 000..0f8678a --- /dev/null +++ b/Documentation/i2c/busses/i2c-mlxcpld @@ -0,0 +1,47 @@ +Driver i2c-mlxcpld + +Author: Michael Shych + +This is a for Mellanox I2C controller logic, implemented in Lattice CPLD +device. +Device supports: + - Master mode. + - One physical bus. + - Polling mode. + +This controller is equipped within the next Mellanox systems: +"msx6710", "msx6720", "msb7700", "msn2700", "msx1410", "msn2410", "msb7800", +"msn2740", "msn2100". + +The next transaction types are supported: + - Receive Byte/Block. + - Send Byte/Block. + - Read Byte/Block. + - Write Byte/Block. + +Registers: +CTRL 0x1 - control reg. + Resets all the registers. +HALF_CYC 0x4 - cycle reg. + Configure the width of I2C SCL half clock cycle (in 4 LPC_CLK + units). +I2C_HOLD 0x5 - hold reg. + OE (output enable) is delayed by value set to this register + (in LPC_CLK units) +CMD0x6 - command reg. + Bit 7(lsb), 0 = write, 1 = read. + Bits [6:0] - the 7bit Address of the I2C device. + It should be written last as it triggers an I2C transaction. +NUM_DATA 0x7 - data size reg. + Number of address bytes to write in read transaction +NUM_ADDR 0x8 - address reg. + Number of address bytes to write in read transaction. +STATUS 0x9 - status reg. + Bit 0 - transaction is completed. + Bit 4 - ACK/NACK. +DATAx 0xa - 0x54 - 68 bytes data buffer regs. + For write transaction address is specified in four first bytes + (DATA1 - DATA4), data starting from DATA4. + For read transactions address is send in separate transaction and + specified in four first bytes (DATA0 - DATA3). Data is reading + starting from DATA0. diff --git a/MAINTAINERS b/MAINTAINERS index 6781a3f..dc31231 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7667,6 +7667,14 @@ W: http://www.mellanox.com Q: http://patchwork.ozlabs.org/project/netdev/list/ F: drivers/net/ethernet/mellanox/mlxsw/ +MELLANOX MLXCPLD I2C DRIVER +M: Vadim Pasternak +M: Michael Shych +L: linux-...@vger.kernel.org +S: Supported +F: drivers/i2c/busses/i2c-mlxcpld.c +F: Documentation/i2c/busses/i2c-mlxcpld + SOFT-ROCE DRIVER (rxe) M: Moni Shoua L: linux-r...@vger.kernel.org diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 5c3993b..1126142a 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -1203,4 +1203,16 @@ config I2C_OPAL This driver can also be built as a module. If so, the module will be called as i2c-opal. +config I2C_MLXCPLD +tristate "Mellanox I2C driver" +depends on X86_64 +default y +help + This exposes the Mellanox platform I2C busses to the linux I2C layer + for X86 based systems. + Controller is implemented as CPLD logic. + + This driver can also be built as a module. If so, the module will be + called as i2c-mlxcpld. + endmenu diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile index 37f2819..4df3578 100644 --- a/drivers/i2c/busses/Makefile +++ b/drivers/i2c/busses/Makefile @@ -118,5 +118,6 @@ obj-$(CONFIG_I2C_PCA_ISA) += i2c-pca-isa.o obj-$(CONFIG_I2C_SIBYTE) += i2c-sibyte.o obj-$(CONFIG_I2C_XGENE_SLIMPRO) += i2c-xgene-slimpro.o obj-$(CONFIG_SCx200_ACB) += scx200_acb.o +obj-$(CONFIG_I2C_MLXCPLD) += i2c-mlxcpld.o ccflags-$(CONFIG_I2C_DEBUG_BUS) := -DDEBUG diff --git a/drivers/i2c/busses/i2c-mlxcpld.c b/drivers/i2c/busses/i2c-mlxcpld.c new file mode 100644 index 000..dd62
[PATCH 01/61] perf symbols: Do not open device files again
Moving the regular file check into the entry of the dso__read_binary_type_filename function. This way we can eliminate some calls and extend the file check for all cases. Link: http://lkml.kernel.org/n/tip-np802m7jwzd7fu09vx2tp...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/util/dso.c| 8 +++- tools/perf/util/symbol.c | 3 --- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 774f6ec884d5..9a027a0cc037 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -43,6 +43,9 @@ int dso__read_binary_type_filename(const struct dso *dso, int ret = 0; size_t len; + if (!is_regular_file(filename)) + return -1; + switch (type) { case DSO_BINARY_TYPE__DEBUGLINK: { char *debuglink; @@ -53,11 +56,6 @@ int dso__read_binary_type_filename(const struct dso *dso, debuglink--; if (*debuglink == '/') debuglink++; - - ret = -1; - if (!is_regular_file(filename)) - break; - ret = filename__read_debuglink(filename, debuglink, size - (debuglink - filename)); } diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 19c9c558454f..827a58ce29f0 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1466,9 +1466,6 @@ int dso__load(struct dso *dso, struct map *map) root_dir, name, PATH_MAX)) continue; - if (!is_regular_file(name)) - continue; - /* Name is now the name of the next image to try */ if (symsrc__init(ss, dso, name, symtab_type) < 0) continue; -- 2.7.4
[PATCHv3 00/61] perf c2c: Add new tool to analyze cacheline contention on NUMA systems
hi, sending new version of c2c patches (v3) originally posted in here: http://lwn.net/Articles/588866/ I took the old set and reworked it to fit into current upstream code. It follows the same logic as original patch and provides (almost) the same stdio interface. In addition new TUI interface was added. The perf c2c tool provides means for Shared Data C2C/HITM analysis. It allows you to track down the cacheline contentions. The tool is based on x86's load latency and precise store facility events provided by Intel CPUs. The tool was tested by Joe Mario and has proven to be useful and found some cachelines contentions. Joe also wrote a blog about c2c tool with examples located in here: https://joemario.github.io/blog/2016/09/01/c2c-blog/ Code is also available in: git://git.kernel.org/pub/scm/linux/kernel/git/jolsa/perf.git perf/c2c Testing: $ perf c2c record -a [workload] $ perf c2c report [--stdio] $ man perf-c2c It's most likely you won't generate any remote HITMs on common laptops, so to get results for local HITMs please use: $ perf c2c report -d lcl [--stdio] thanks, jirka Cc: "Michael Trapp" Cc: "Long, Wai Man" --- Jiri Olsa (61): perf symbols: Do not open device files again perf tools: Remove superfluous initialization of weight perf tools: Make hist_entry__snprintf work over struct perf_hpp_list perf tools: Use bigger buffer for stdio headers perf tools: Introduce c2c_decode_stats function perf tools: Introduce c2c_add_stats function perf tools: Make reset_dimensions global perf tools: Make output_field_add and sort_dimension__add global perf tools: Make several sorting functions global perf tools: Make several display functions global perf tools: Make hist_entry__snprintf function global perf tools: Make hists__fprintf_headers function global perf c2c: Add c2c command perf c2c: Add record subcommand perf c2c: Add report subcommand perf c2c report: Add dimension support perf c2c report: Add sort_entry dimension support perf c2c report: Fallback to standard dimensions perf c2c report: Add sample processing perf c2c report: Add cacheline hists processing perf c2c report: Decode c2c_stats for hist entries perf c2c report: Add header macros perf c2c report: Add dcacheline dimension key perf c2c report: Add offset dimension key perf c2c report: Add iaddr dimension key perf c2c report: Add hitm related dimension keys perf c2c report: Add stores related dimension keys perf c2c report: Add loads related dimension keys perf c2c report: Add llc and remote loads related dimension keys perf c2c report: Add llc load miss dimension key perf c2c report: Add total record sort key perf c2c report: Add total loads sort key perf c2c report: Add hitm percent sort key perf c2c report: Add hitm/store percent related sort keys perf c2c report: Add dram related sort keys perf c2c report: Add pid sort key perf c2c report: Add tid sort key perf c2c report: Add symbol and dso sort keys perf c2c report: Add node sort key perf c2c report: Add stats related sort keys perf c2c report: Add cpu cnt sort key perf c2c report: Add src line sort key perf c2c report: Setup number of header lines for hists perf c2c report: Set final resort fields perf c2c report: Add stdio output support perf c2c report: Add main browser perf c2c report: Add cacheline browser perf c2c report: Add global stats stdio output perf c2c report: Add shared cachelines stats stdio output perf c2c report: Add c2c related stats stdio output perf c2c report: Allow to report callchains perf c2c report: Limit the cachelines table entries perf c2c report: Add support to choose local HITMs perf c2c report: Allow to set cacheline sort fields perf c2c report: Recalc width of global sort entries perf c2c report: Add cacheline index entry perf c2c report: Add support to manage symbol name length perf c2c report: Iterate node display in browser perf c2c report: Add help windows perf c2c: Add man page and credits perf tools: Fix width computation for srcline sort entry tools/perf/Build |1 + tools/perf/Documentation/perf-c2c.txt | 276 tools/perf/builtin-c2c.c | 2732 + tools/perf/builtin.h |1 + tools/perf/perf.c |1 + tools/perf/ui/browsers/hists.c|4 +- tools/perf/ui/browsers/hists.h|1 + tools/perf/ui/hist.c |2 +- tools/perf/ui/stdio/hist.c| 11 +- tools/perf/util/dso.c |8 +- tools/perf/util/evsel.c |2 - tools/perf/util/hist.c|7 +- tools/
[PATCH 21/61] perf c2c report: Decode c2c_stats for hist entries
Decoding and storing c2c_stats for each hist entry. Changing related function to work with c2c_* objects. Link: http://lkml.kernel.org/n/tip-obz2fu3801wuayz4rnteg...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-c2c.c | 38 ++ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index cd0406ab8b5d..7bf6248dbd75 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -14,10 +14,12 @@ struct c2c_hists { struct histshists; struct perf_hpp_listlist; + struct c2c_statsstats; }; struct c2c_hist_entry { struct c2c_hists*hists; + struct c2c_stats stats; /* * must be at the end, * because of its callchain dynamic entry @@ -64,9 +66,9 @@ static struct hist_entry_ops c2c_entry_ops = { static int c2c_hists__init(struct c2c_hists *hists, const char *sort); -static struct hists* -he__get_hists(struct hist_entry *he, - const char *sort) +static struct c2c_hists* +he__get_c2c_hists(struct hist_entry *he, + const char *sort) { struct c2c_hist_entry *c2c_he; struct c2c_hists *hists; @@ -74,7 +76,7 @@ he__get_hists(struct hist_entry *he, c2c_he = container_of(he, struct c2c_hist_entry, he); if (c2c_he->hists) - return &c2c_he->hists->hists; + return c2c_he->hists; hists = c2c_he->hists = zalloc(sizeof(*hists)); if (!hists) @@ -84,7 +86,7 @@ he__get_hists(struct hist_entry *he, if (ret) free(hists); - return &hists->hists; + return hists; } static int process_sample_event(struct perf_tool *tool __maybe_unused, @@ -93,7 +95,9 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, struct perf_evsel *evsel __maybe_unused, struct machine *machine) { - struct hists *hists = &c2c.hists.hists; + struct c2c_hists *c2c_hists = &c2c.hists; + struct c2c_hist_entry *c2c_he; + struct c2c_stats stats = { 0 }; struct hist_entry *he; struct addr_location al; struct mem_info *mi, *mi_dup; @@ -113,13 +117,19 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, if (!mi_dup) goto free_mi; - he = hists__add_entry_ops(hists, &c2c_entry_ops, + c2c_decode_stats(&stats, mi); + + he = hists__add_entry_ops(&c2c_hists->hists, &c2c_entry_ops, &al, NULL, NULL, mi, sample, true); if (he == NULL) goto free_mi_dup; - hists__inc_nr_samples(hists, he->filtered); + c2c_he = container_of(he, struct c2c_hist_entry, he); + c2c_add_stats(&c2c_he->stats, &stats); + c2c_add_stats(&c2c_hists->stats, &stats); + + hists__inc_nr_samples(&c2c_hists->hists, he->filtered); ret = hist_entry__append_callchain(he, sample); if (!ret) { @@ -129,17 +139,21 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, if (!mi_dup) goto free_mi; - hists = he__get_hists(he, "offset"); - if (!hists) + c2c_hists = he__get_c2c_hists(he, "offset"); + if (!c2c_hists) goto free_mi_dup; - he = hists__add_entry_ops(hists, &c2c_entry_ops, + he = hists__add_entry_ops(&c2c_hists->hists, &c2c_entry_ops, &al, NULL, NULL, mi, sample, true); if (he == NULL) goto free_mi_dup; - hists__inc_nr_samples(hists, he->filtered); + c2c_he = container_of(he, struct c2c_hist_entry, he); + c2c_add_stats(&c2c_he->stats, &stats); + c2c_add_stats(&c2c_hists->stats, &stats); + + hists__inc_nr_samples(&c2c_hists->hists, he->filtered); ret = hist_entry__append_callchain(he, sample); } -- 2.7.4
[PATCH 10/61] perf tools: Make several display functions global
Will be used from external places in following patches. Link: http://lkml.kernel.org/n/tip-w5tpcitxjvufkndq0x5eh...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/ui/browsers/hists.c | 2 +- tools/perf/ui/hist.c | 2 +- tools/perf/util/hist.h | 2 ++ 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 35e44b1879e3..77cf7a80e8d6 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1080,7 +1080,7 @@ struct hpp_arg { bool current_entry; }; -static int __hpp__slsmg_color_printf(struct perf_hpp *hpp, const char *fmt, ...) +int __hpp__slsmg_color_printf(struct perf_hpp *hpp, const char *fmt, ...) { struct hpp_arg *arg = hpp->ptr; int ret, len; diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index b47fafc8ee2a..84ad92ad24be 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -237,7 +237,7 @@ static int hpp__header_fn(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, return scnprintf(hpp->buf, hpp->size, "%*s", len, fmt->name); } -static int hpp_color_scnprintf(struct perf_hpp *hpp, const char *fmt, ...) +int hpp_color_scnprintf(struct perf_hpp *hpp, const char *fmt, ...) { va_list args; ssize_t ssize = hpp->size; diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index a002c93fe422..ef9985cba1de 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -484,5 +484,7 @@ static inline struct rb_node *rb_hierarchy_next(struct rb_node *node) #define HIERARCHY_INDENT 3 bool hist_entry__has_hierarchy_children(struct hist_entry *he, float limit); +int hpp_color_scnprintf(struct perf_hpp *hpp, const char *fmt, ...); +int __hpp__slsmg_color_printf(struct perf_hpp *hpp, const char *fmt, ...); #endif /* __PERF_HIST_H */ -- 2.7.4
[PATCH 19/61] perf c2c report: Add sample processing
Adding basic sample processing specific hist_entry allocation callbacks (via hists__add_entry_ops). Overloading 'struct hist_entry' object with new 'struct c2c_hist_entry'. The new hist entry object will carry specific stats and nested hists objects. Link: http://lkml.kernel.org/n/tip-ksr9smz4o1t040h50z28d...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-c2c.c | 108 ++- 1 file changed, 107 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index a3481f86e2ae..29fb9573e292 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -16,6 +16,15 @@ struct c2c_hists { struct perf_hpp_listlist; }; +struct c2c_hist_entry { + struct c2c_hists*hists; + /* +* must be at the end, +* because of its callchain dynamic entry +*/ + struct hist_entry he; +}; + struct perf_c2c { struct perf_tooltool; struct c2c_histshists; @@ -23,6 +32,86 @@ struct perf_c2c { static struct perf_c2c c2c; +static void *c2c_he_zalloc(size_t size) +{ + struct c2c_hist_entry *c2c_he; + + c2c_he = zalloc(size + sizeof(*c2c_he)); + if (!c2c_he) + return NULL; + + return &c2c_he->he; +} + +static void c2c_he_free(void *he) +{ + struct c2c_hist_entry *c2c_he; + + c2c_he = container_of(he, struct c2c_hist_entry, he); + if (c2c_he->hists) { + hists__delete_entries(&c2c_he->hists->hists); + free(c2c_he->hists); + } + + free(c2c_he); +} + +static struct hist_entry_ops c2c_entry_ops = { + .new= c2c_he_zalloc, + .free = c2c_he_free, +}; + +static int process_sample_event(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample, + struct perf_evsel *evsel __maybe_unused, + struct machine *machine) +{ + struct hists *hists = &c2c.hists.hists; + struct hist_entry *he; + struct addr_location al; + struct mem_info *mi; + int ret; + + if (machine__resolve(machine, &al, sample) < 0) { + pr_debug("problem processing %d event, skipping it.\n", +event->header.type); + return -1; + } + + mi = sample__resolve_mem(sample, &al); + if (mi == NULL) + return -ENOMEM; + + he = hists__add_entry_ops(hists, &c2c_entry_ops, + &al, NULL, NULL, mi, + sample, true); + if (he == NULL) { + free(mi); + return -ENOMEM; + } + + hists__inc_nr_samples(hists, he->filtered); + ret = hist_entry__append_callchain(he, sample); + + addr_location__put(&al); + return ret; +} + +static struct perf_c2c c2c = { + .tool = { + .sample = process_sample_event, + .mmap = perf_event__process_mmap, + .mmap2 = perf_event__process_mmap2, + .comm = perf_event__process_comm, + .exit = perf_event__process_exit, + .fork = perf_event__process_fork, + .lost = perf_event__process_lost, + .ordered_events = true, + .ordering_requires_timestamps = true, + }, +}; + static const char * const c2c_usage[] = { "perf c2c {record|report}", NULL @@ -314,6 +403,7 @@ static int c2c_hists__reinit(struct c2c_hists *c2c_hists, static int perf_c2c__report(int argc, const char **argv) { struct perf_session *session; + struct ui_progress prog; struct perf_data_file file = { .mode = PERF_DATA_MODE_READ, }; @@ -330,9 +420,12 @@ static int perf_c2c__report(int argc, const char **argv) argc = parse_options(argc, argv, c2c_options, report_c2c_usage, PARSE_OPT_STOP_AT_NON_OPTION); - if (!argc) + if (argc) usage_with_options(report_c2c_usage, c2c_options); + if (!input_name || !strlen(input_name)) + input_name = "perf.data"; + file.path = input_name; err = c2c_hists__init(&c2c.hists, "dcacheline"); @@ -356,6 +449,19 @@ static int perf_c2c__report(int argc, const char **argv) goto out_session; } + err = perf_session__process_events(session); + if (err) { + pr_err("failed to process sample\n"); + goto out_session; + } + + ui_progress__init(&prog, c2c.hists.hists.nr_entries, "Sorting..."); + + hists__collapse_resort(&c2c.hists.hists, NULL); + hists__output_resort(&c2c.hists.hists, &prog); + + ui_progress__finish(); + out_session:
[PATCH 09/61] perf tools: Make several sorting functions global
Will be used from external places in following patches. Link: http://lkml.kernel.org/n/tip-4jyvw21cac7yuqsdkzdo5...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/util/sort.c | 6 +++--- tools/perf/util/sort.h | 6 ++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 9f7c1ea9e3ad..452e15a10dd2 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -867,7 +867,7 @@ struct sort_entry sort_cycles = { }; /* --sort daddr_sym */ -static int64_t +int64_t sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right) { uint64_t l = 0, r = 0; @@ -896,7 +896,7 @@ static int hist_entry__daddr_snprintf(struct hist_entry *he, char *bf, width); } -static int64_t +int64_t sort__iaddr_cmp(struct hist_entry *left, struct hist_entry *right) { uint64_t l = 0, r = 0; @@ -1062,7 +1062,7 @@ static int hist_entry__snoop_snprintf(struct hist_entry *he, char *bf, return repsep_snprintf(bf, size, "%-*s", width, out); } -static int64_t +int64_t sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right) { u64 l, r; diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index ac7998048b1e..d4ef567dcd7b 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -273,4 +273,10 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok, struct perf_evlist *evlist, int level); int output_field_add(struct perf_hpp_list *list, char *tok); +int64_t +sort__iaddr_cmp(struct hist_entry *left, struct hist_entry *right); +int64_t +sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right); +int64_t +sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right); #endif /* __PERF_SORT_H */ -- 2.7.4
[PATCH 03/61] perf tools: Make hist_entry__snprintf work over struct perf_hpp_list
Make hist_entry__snprintf to take perf_hpp_list as an argument instead of using he->hists->hpp_list. This way we can display arbitrary list of entries regardles of the hists setup, which will be useful in following patches. Link: http://lkml.kernel.org/n/tip-j2sizkyglam3narmndlj9...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/ui/stdio/hist.c | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index a57131e61fe3..cb0371106c21 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -373,7 +373,8 @@ static size_t hist_entry_callchain__fprintf(struct hist_entry *he, return 0; } -static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp) +static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp, + struct perf_hpp_list *hpp_list) { const char *sep = symbol_conf.field_sep; struct perf_hpp_fmt *fmt; @@ -384,7 +385,7 @@ static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp) if (symbol_conf.exclude_other && !he->parent) return 0; - hists__for_each_format(he->hists, fmt) { + perf_hpp_list__for_each_format(hpp_list, fmt) { if (perf_hpp__should_skip(fmt, he->hists)) continue; @@ -509,7 +510,7 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size, if (symbol_conf.report_hierarchy) return hist_entry__hierarchy_fprintf(he, &hpp, hists, fp); - hist_entry__snprintf(he, &hpp); + hist_entry__snprintf(he, &hpp, hists->hpp_list); ret = fprintf(fp, "%s\n", bf); -- 2.7.4
[PATCH 20/61] perf c2c report: Add cacheline hists processing
Store cacheline related entries in nested hist object for each cacheline data. Nested entries are sorted by 'offset' within related cacheline. We will allow specific sort keys to be configured for nested cacheline data entries in following patches. Link: http://lkml.kernel.org/n/tip-37f751rgqamq9miubmr89...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-c2c.c | 90 1 file changed, 84 insertions(+), 6 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 29fb9573e292..cd0406ab8b5d 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -61,6 +61,32 @@ static struct hist_entry_ops c2c_entry_ops = { .free = c2c_he_free, }; +static int c2c_hists__init(struct c2c_hists *hists, + const char *sort); + +static struct hists* +he__get_hists(struct hist_entry *he, + const char *sort) +{ + struct c2c_hist_entry *c2c_he; + struct c2c_hists *hists; + int ret; + + c2c_he = container_of(he, struct c2c_hist_entry, he); + if (c2c_he->hists) + return &c2c_he->hists->hists; + + hists = c2c_he->hists = zalloc(sizeof(*hists)); + if (!hists) + return NULL; + + ret = c2c_hists__init(hists, sort); + if (ret) + free(hists); + + return &hists->hists; +} + static int process_sample_event(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, @@ -70,7 +96,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, struct hists *hists = &c2c.hists.hists; struct hist_entry *he; struct addr_location al; - struct mem_info *mi; + struct mem_info *mi, *mi_dup; int ret; if (machine__resolve(machine, &al, sample) < 0) { @@ -83,19 +109,50 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, if (mi == NULL) return -ENOMEM; + mi_dup = memdup(mi, sizeof(*mi)); + if (!mi_dup) + goto free_mi; + he = hists__add_entry_ops(hists, &c2c_entry_ops, &al, NULL, NULL, mi, sample, true); - if (he == NULL) { - free(mi); - return -ENOMEM; - } + if (he == NULL) + goto free_mi_dup; hists__inc_nr_samples(hists, he->filtered); ret = hist_entry__append_callchain(he, sample); + if (!ret) { + mi = mi_dup; + + mi_dup = memdup(mi, sizeof(*mi)); + if (!mi_dup) + goto free_mi; + + hists = he__get_hists(he, "offset"); + if (!hists) + goto free_mi_dup; + + he = hists__add_entry_ops(hists, &c2c_entry_ops, + &al, NULL, NULL, mi, + sample, true); + if (he == NULL) + goto free_mi_dup; + + hists__inc_nr_samples(hists, he->filtered); + ret = hist_entry__append_callchain(he, sample); + } + +out: addr_location__put(&al); return ret; + +free_mi_dup: + free(mi_dup); +free_mi: + free(mi); + ret = -ENOMEM; + goto out; } static struct perf_c2c c2c = { @@ -400,6 +457,27 @@ static int c2c_hists__reinit(struct c2c_hists *c2c_hists, return hpp_list__parse(&c2c_hists->list, output, sort); } +static int filter_cb(struct hist_entry *he __maybe_unused) +{ + return 0; +} + +static int resort_cl_cb(struct hist_entry *he) +{ + struct c2c_hist_entry *c2c_he; + struct c2c_hists *c2c_hists; + + c2c_he = container_of(he, struct c2c_hist_entry, he); + c2c_hists = c2c_he->hists; + + if (c2c_hists) { + hists__collapse_resort(&c2c_hists->hists, NULL); + hists__output_resort_cb(&c2c_hists->hists, NULL, filter_cb); + } + + return 0; +} + static int perf_c2c__report(int argc, const char **argv) { struct perf_session *session; @@ -458,7 +536,7 @@ static int perf_c2c__report(int argc, const char **argv) ui_progress__init(&prog, c2c.hists.hists.nr_entries, "Sorting..."); hists__collapse_resort(&c2c.hists.hists, NULL); - hists__output_resort(&c2c.hists.hists, &prog); + hists__output_resort_cb(&c2c.hists.hists, &prog, resort_cl_cb); ui_progress__finish(); -- 2.7.4
[PATCH 08/61] perf tools: Make output_field_add and sort_dimension__add global
Will be used from external places in following patches. Link: http://lkml.kernel.org/n/tip-15488tnxcj4rtteksy79y...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/util/sort.c | 8 tools/perf/util/sort.h | 4 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 9e1f6f75a50f..9f7c1ea9e3ad 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -2308,9 +2308,9 @@ int hpp_dimension__add_output(unsigned col) return __hpp_dimension__add_output(&perf_hpp_list, &hpp_sort_dimensions[col]); } -static int sort_dimension__add(struct perf_hpp_list *list, const char *tok, - struct perf_evlist *evlist, - int level) +int sort_dimension__add(struct perf_hpp_list *list, const char *tok, + struct perf_evlist *evlist, + int level) { unsigned int i; @@ -2685,7 +2685,7 @@ void sort__setup_elide(FILE *output) } } -static int output_field_add(struct perf_hpp_list *list, char *tok) +int output_field_add(struct perf_hpp_list *list, char *tok) { unsigned int i; diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 3f743bf2acd4..ac7998048b1e 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -269,4 +269,8 @@ bool is_strict_order(const char *order); int hpp_dimension__add_output(unsigned col); void reset_dimensions(void); +int sort_dimension__add(struct perf_hpp_list *list, const char *tok, + struct perf_evlist *evlist, + int level); +int output_field_add(struct perf_hpp_list *list, char *tok); #endif /* __PERF_SORT_H */ -- 2.7.4
[PATCH 11/61] perf tools: Make hist_entry__snprintf function global
Will be used from external places in following patches. Link: http://lkml.kernel.org/n/tip-uip4x9u74t3dcz8sh4mei...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/ui/stdio/hist.c | 4 ++-- tools/perf/util/hist.h | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 0a32b48eda80..3434d571ddd1 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -373,8 +373,8 @@ static size_t hist_entry_callchain__fprintf(struct hist_entry *he, return 0; } -static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp, - struct perf_hpp_list *hpp_list) +int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp, +struct perf_hpp_list *hpp_list) { const char *sep = symbol_conf.field_sep; struct perf_hpp_fmt *fmt; diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index ef9985cba1de..aa5ddfa1fa22 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -486,5 +486,7 @@ static inline struct rb_node *rb_hierarchy_next(struct rb_node *node) bool hist_entry__has_hierarchy_children(struct hist_entry *he, float limit); int hpp_color_scnprintf(struct perf_hpp *hpp, const char *fmt, ...); int __hpp__slsmg_color_printf(struct perf_hpp *hpp, const char *fmt, ...); +int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp, +struct perf_hpp_list *hpp_list); #endif /* __PERF_HIST_H */ -- 2.7.4
[PATCH 15/61] perf c2c: Add report subcommand
Adding c2c report subcommand. It reads the perf.data and displays shared data analysis. This patch adds report basic wirings. It gets fully implemented in following patches. Link: http://lkml.kernel.org/n/tip-8smklfkveeyv1pahfxv2r...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-c2c.c | 66 +++- 1 file changed, 65 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 58924c67f818..3fac3a294bdd 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -5,12 +5,74 @@ #include "builtin.h" #include #include "mem-events.h" +#include "session.h" +#include "hist.h" +#include "tool.h" +#include "data.h" + +struct perf_c2c { + struct perf_tool tool; +}; + +static struct perf_c2c c2c; static const char * const c2c_usage[] = { - "perf c2c", + "perf c2c {record|report}", NULL }; +static const char * const __usage_report[] = { + "perf c2c report", + NULL +}; + +static const char * const *report_c2c_usage = __usage_report; + +static int perf_c2c__report(int argc, const char **argv) +{ + struct perf_session *session; + struct perf_data_file file = { + .mode = PERF_DATA_MODE_READ, + }; + const struct option c2c_options[] = { + OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, + "file", "vmlinux pathname"), + OPT_INCR('v', "verbose", &verbose, +"be more verbose (show counter open errors, etc)"), + OPT_STRING('i', "input", &input_name, "file", + "the input file to process"), + OPT_END() + }; + int err = 0; + + argc = parse_options(argc, argv, c2c_options, report_c2c_usage, +PARSE_OPT_STOP_AT_NON_OPTION); + if (!argc) + usage_with_options(report_c2c_usage, c2c_options); + + file.path = input_name; + + session = perf_session__new(&file, 0, &c2c.tool); + if (session == NULL) { + pr_debug("No memory for session\n"); + goto out; + } + + if (symbol__init(&session->header.env) < 0) + goto out_session; + + /* No pipe support at the moment. */ + if (perf_data_file__is_pipe(session->file)) { + pr_debug("No pipe support at the moment.\n"); + goto out_session; + } + +out_session: + perf_session__delete(session); +out: + return err; +} + static int parse_record_events(const struct option *opt __maybe_unused, const char *str, int unset __maybe_unused) { @@ -129,6 +191,8 @@ int cmd_c2c(int argc, const char **argv, const char *prefix __maybe_unused) if (!strncmp(argv[0], "rec", 3)) { return perf_c2c__record(argc, argv); + } else if (!strncmp(argv[0], "rep", 3)) { + return perf_c2c__report(argc, argv); } else { usage_with_options(c2c_usage, c2c_options); } -- 2.7.4
[RFC] Arm64 boot fail with numa enable in BIOS
hi all, When I enable NUMA in BIOS for arm64, it failed to boot on v4.8-rc4-162-g071e31e. For the crash log, it seems caused by error number of cpumask. Any ideas about it? Thanks. The related config and detail dmesg can be seen in the attachment. --- crash messages --- [1.279155] [ cut here ] [1.537146] WARNING: CPU: 16 PID: 103 at ./include/linux/cpumask.h:121 try_to_wake_up+0x298/0x300 [1.546112] Modules linked in: [1.549190] [1.550687] CPU: 16 PID: 103 Comm: cpuhp/16 Tainted: GW 4.8.0-rc4-00163-g803ea3a #21 [1.559741] Hardware name: Hisilicon Hi1616 Evaluation Board (DT) [1.565896] task: 8013e9678000 task.stack: 8013e9674000 [1.571874] PC is at try_to_wake_up+0x298/0x300 [1.576446] LR is at try_to_wake_up+0x278/0x300 [1.581019] pc : [] lr : [] pstate: 20c5 [1.588490] sp : 8013e9677b90 [1.591832] x29: 8013e9677b90 x28: 8413eb81a4b0 [1.597196] x27: 008c x26: 08d6e840 [1.602561] x25: 0004 x24: 8013e96e82e0 [1.607925] x23: 0040 x22: 00c0 [1.613289] x21: 8013e96e868c x20: [1.618653] x19: 8013e96e8000 x18: [1.624018] x17: x16: 03010066 [1.629381] x15: 08ca8000 x14: 0003 [1.634745] x13: 0026 x12: 0009 [1.640109] x11: 0009 x10: [1.645472] x9 : x8 : 0014 [1.650837] x7 : 8013e9452e00 x6 : [1.656200] x5 : x4 : [1.661565] x3 : x2 : 0040 [1.666929] x1 : 0001 x0 : 08d63df9 [1.672293] [1.673788] ---[ end trace b58e70f3295a8cd8 ]--- [1.678448] Call trace: [1.680911] Exception stack(0x8013e96779c0 to 0x8013e9677af0) [1.687417] 79c0: 8013e96e8000 0001 8013e9677b90 080df66c [1.695329] 79e0: 0808e1f4 8013e9d30c80 [1.703242] 7a00: 8013e9677a20 0882b6f4 8013e9677a60 080dd384 [1.711153] 7a20: 8013e9677b00 08cbaa00 08d6e000 [1.719065] 7a40: 0001 0080 [1.726977] 7a60: 08d63df9 0001 0040 [1.734889] 7a80: 8013e9452e00 [1.742801] 7aa0: 0014 0009 [1.750713] 7ac0: 0009 0026 0003 08ca8000 [1.758624] 7ae0: 03010066 [1.763548] [] try_to_wake_up+0x298/0x300 [1.769175] [] wake_up_process+0x14/0x1c [1.774716] [] create_worker+0x108/0x194 [1.780255] [] alloc_unbound_pwq+0x1e4/0x398 [1.786146] [] wq_update_unbound_numa+0xdc/0x190 [1.792389] [] workqueue_online_cpu+0x254/0x2a8 [1.798545] [] cpuhp_up_callbacks+0x54/0x100 [1.804436] [] cpuhp_thread_fun+0x12c/0x13c [1.810240] [] smpboot_thread_fn+0x1a8/0x1cc [1.816130] [] kthread+0xd4/0xe8 [1.820967] [] ret_from_fork+0x10/0x40 [1.826334] Unable to handle kernel paging request at virtual address fffe841404c71524 [1.834333] pgd = 08dae000 [1.837762] [fffe841404c71524] *pgd=0413fbfee003, *pud= [1.844797] Internal error: Oops: 9604 [#1] SMP [1.849720] Modules linked in: [1.852799] CPU: 16 PID: 103 Comm: cpuhp/16 Tainted: GW 4.8.0-rc4-00163-g803ea3a #21 [1.861853] Hardware name: Hisilicon Hi1616 Evaluation Board (DT) [1.868007] task: 8013e9678000 task.stack: 8013e9674000 [1.873985] PC is at try_to_wake_up+0x148/0x300 [1.878557] LR is at try_to_wake_up+0x11c/0x300 [1.883129] pc : [] lr : [] pstate: 60c5 [1.890602] sp : 8013e9677b90 [1.893943] x29: 8013e9677b90 x28: 8413eb81a4b0 [1.899307] x27: 008c x26: 08d6e840 [1.904670] x25: 08ca5f10 x24: 08c77600 [1.910033] x23: 0040 x22: 00c0 [1.915398] x21: 8013e96e868c x20: 0004 [1.920761] x19: 8013e96e8000 x18: [1.926125] x17: x16: 03010066 [1.931489] x15: 08ca8000 x14: 0003 [1.936853] x13: 0026 x12: 0009 [1.942217] x11: 0009 x10: [1.947581] x9 : x8 : 0014 [1.952945] x7 : 8013e9452e00 x6 : [1.958309] x5 : 8413eb6ca700 x4 : [1.963674] x3 : 8413e2ba3000 x2 : 0010 [1.969037] x1 : 8413fbfffa80 x0 : 08c71aa4 [1.974401] [1.975897] Process cpuhp/16 (pid: 103, stack
[PATCH 24/61] perf c2c report: Add offset dimension key
Adding cacheline offset dimension key support. It displays cacheline offset as hex number. Link: http://lkml.kernel.org/n/tip-m0424ye98lqveg5nopto8...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-c2c.c | 35 +++ 1 file changed, 35 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 060ee1050da9..086e337e9d7d 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -300,6 +300,32 @@ static int dcacheline_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, return snprintf(hpp->buf, hpp->size, "%*s", width, hex_str(addr)); } +static int offset_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he) +{ + uint64_t addr = 0; + int width = c2c_width(fmt, hpp, he->hists); + + if (he->mem_info) + addr = cl_offset(he->mem_info->daddr.al_addr); + + return snprintf(hpp->buf, hpp->size, "%*s", width, hex_str(addr)); +} + +static int64_t +offset_cmp(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left, struct hist_entry *right) +{ + uint64_t l = 0, r = 0; + + if (left->mem_info) + l = cl_offset(left->mem_info->daddr.addr); + if (right->mem_info) + r = cl_offset(right->mem_info->daddr.addr); + + return (int64_t)(r - l); +} + #define HEADER_LOW(__h)\ { \ .line[1] = {\ @@ -343,6 +369,14 @@ static struct c2c_dimension dim_dcacheline = { .width = 18, }; +static struct c2c_dimension dim_offset = { + .header = HEADER_BOTH("Data address", "Offset"), + .name = "offset", + .cmp= offset_cmp, + .entry = offset_entry, + .width = 18, +}; + #undef HEADER_LOW #undef HEADER_BOTH #undef HEADER_SPAN @@ -350,6 +384,7 @@ static struct c2c_dimension dim_dcacheline = { static struct c2c_dimension *dimensions[] = { &dim_dcacheline, + &dim_offset, NULL, }; -- 2.7.4
[PATCH 26/61] perf c2c report: Add hitm related dimension keys
Adding 5 hitm related dimension key wrappers. First 3 are to be displayed in the main cachelines overall output: tot_hitm, lcl_hitm, rmt_hitm The latter 2 are to be displayed within single cacheline output: cl_rmt_hitm, cl_lcl_hitm They all display bare numbers of remote/local/total HITMs for cacheline or its related offsets. Link: http://lkml.kernel.org/n/tip-iju5239xa5heqqben65g1...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-c2c.c | 109 +++ 1 file changed, 109 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index a97e6d6c3b9b..a48fcc91e9fd 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -346,6 +346,70 @@ iaddr_cmp(struct perf_hpp_fmt *fmt __maybe_unused, return sort__iaddr_cmp(left, right); } +static int +tot_hitm_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he) +{ + struct c2c_hist_entry *c2c_he; + int width = c2c_width(fmt, hpp, he->hists); + unsigned int tot_hitm; + + c2c_he = container_of(he, struct c2c_hist_entry, he); + tot_hitm = c2c_he->stats.lcl_hitm + c2c_he->stats.rmt_hitm; + + return snprintf(hpp->buf, hpp->size, "%*u", width, tot_hitm); +} + +static int64_t +tot_hitm_cmp(struct perf_hpp_fmt *fmt __maybe_unused, +struct hist_entry *left, struct hist_entry *right) +{ + struct c2c_hist_entry *c2c_left; + struct c2c_hist_entry *c2c_right; + unsigned int tot_hitm_left; + unsigned int tot_hitm_right; + + c2c_left = container_of(left, struct c2c_hist_entry, he); + c2c_right = container_of(right, struct c2c_hist_entry, he); + + tot_hitm_left = c2c_left->stats.lcl_hitm + c2c_left->stats.rmt_hitm; + tot_hitm_right = c2c_right->stats.lcl_hitm + c2c_right->stats.rmt_hitm; + + return tot_hitm_left - tot_hitm_right; +} + +#define STAT_FN_ENTRY(__f) \ +static int \ +__f ## _entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, \ + struct hist_entry *he)\ +{ \ + struct c2c_hist_entry *c2c_he; \ + int width = c2c_width(fmt, hpp, he->hists); \ + \ + c2c_he = container_of(he, struct c2c_hist_entry, he); \ + return snprintf(hpp->buf, hpp->size, "%*u", width, \ + c2c_he->stats.__f); \ +} + +#define STAT_FN_CMP(__f) \ +static int64_t \ +__f ## _cmp(struct perf_hpp_fmt *fmt __maybe_unused, \ + struct hist_entry *left, struct hist_entry *right) \ +{ \ + struct c2c_hist_entry *c2c_left, *c2c_right;\ + \ + c2c_left = container_of(left, struct c2c_hist_entry, he); \ + c2c_right = container_of(right, struct c2c_hist_entry, he); \ + return c2c_left->stats.__f - c2c_right->stats.__f; \ +} + +#define STAT_FN(__f) \ + STAT_FN_ENTRY(__f) \ + STAT_FN_CMP(__f) + +STAT_FN(rmt_hitm) +STAT_FN(lcl_hitm) + #define HEADER_LOW(__h)\ { \ .line[1] = {\ @@ -405,6 +469,46 @@ static struct c2c_dimension dim_iaddr = { .width = 18, }; +static struct c2c_dimension dim_tot_hitm = { + .header = HEADER_SPAN("- LLC Load Hitm -", "Total", 2), + .name = "tot_hitm", + .cmp= tot_hitm_cmp, + .entry = tot_hitm_entry, + .width = 7, +}; + +static struct c2c_dimension dim_lcl_hitm = { + .header = HEADER_SPAN_LOW("Lcl"), + .name = "lcl_hitm", + .cmp= lcl_hitm_cmp, + .entry = lcl_hitm_entry, + .width = 7, +}; + +static struct c2c_dimension dim_rmt_hitm = { + .header = HEADER_SPAN_LOW("Rmt"), + .name = "rmt_hitm", + .cmp= rmt_hitm_cmp, + .entry = rmt_hitm_entry, + .width = 7, +}; + +static struct c2c_dimension dim_cl_rmt_hitm = { + .header = HEADER_SPAN("- HITM -", "Rmt", 1), + .name = "cl_rmt_hitm", + .cmp= rmt_hitm_cmp, + .entry = rmt_hitm_entry, + .width = 7, +}; + +static struct c2c_dimension dim_cl_lcl_hitm = { + .header = HEADER_SPAN_LOW("Lcl"), + .name = "cl_lcl_hitm", + .cmp
[PATCH 56/61] perf c2c report: Add cacheline index entry
It's convenient to have an index for each cacheline to help discussions about results over the phone. Add new 'Index' and 'Num' fields in main and single cacheline tables. $ perf c2c report = Shared Data Cache Line Table = # # Total Lcl - LLC Load Hitm - # Index Cacheline records HitmTotal Lcl Rmt ... # . .. ... ... ... ... ... # 0 0x880036233b401 11.11%110 1 0x88009ccb29001 11.11%110 2 0x8800b5b3bc407 11.11%110 ... = Shared Cache Line Distribution Pareto = # #- HITM - -- Store Refs --Data address # Num Rmt Lcl L1 Hit L1 Miss Offset Pid ... # . ... ... ... ... .. ... # - 00100 0x880036233b40 - 0.00% 100.00%0.00%0.00%0x300 - 10100 0x88009ccb2900 - 0.00% 100.00%0.00%0.00%0x28 549 ... Link: http://lkml.kernel.org/n/tip-4dhfagaz57tvrfjbg8nd2...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-c2c.c | 64 +--- 1 file changed, 61 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index c93a766190b1..eb78a73b9230 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -36,6 +36,7 @@ struct c2c_hist_entry { struct c2c_stats stats; unsigned long *cpuset; struct c2c_stats*node_stats; + unsigned int cacheline_idx; struct compute_stats cstats; @@ -1084,6 +1085,29 @@ cpucnt_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, return snprintf(hpp->buf, hpp->size, "%*s", width, buf); } +static int +cl_idx_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, +struct hist_entry *he) +{ + struct c2c_hist_entry *c2c_he; + int width = c2c_width(fmt, hpp, he->hists); + char buf[10]; + + c2c_he = container_of(he, struct c2c_hist_entry, he); + + snprintf(buf, 10, "%u", c2c_he->cacheline_idx); + return snprintf(hpp->buf, hpp->size, "%*s", width, buf); +} + +static int +cl_idx_empty_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, + struct hist_entry *he) +{ + int width = c2c_width(fmt, hpp, he->hists); + + return snprintf(hpp->buf, hpp->size, "%*s", width, ""); +} + #define HEADER_LOW(__h)\ { \ .line[1] = {\ @@ -1429,6 +1453,30 @@ static struct c2c_dimension dim_srcline = { .se = &sort_srcline, }; +static struct c2c_dimension dim_dcacheline_idx = { + .header = HEADER_LOW("Index"), + .name = "cl_idx", + .cmp= empty_cmp, + .entry = cl_idx_entry, + .width = 5, +}; + +static struct c2c_dimension dim_dcacheline_num = { + .header = HEADER_LOW("Num"), + .name = "cl_num", + .cmp= empty_cmp, + .entry = cl_idx_entry, + .width = 5, +}; + +static struct c2c_dimension dim_dcacheline_num_empty = { + .header = HEADER_LOW("Num"), + .name = "cl_num_empty", + .cmp= empty_cmp, + .entry = cl_idx_empty_entry, + .width = 5, +}; + #undef HEADER_LOW #undef HEADER_BOTH #undef HEADER_SPAN @@ -1473,6 +1521,9 @@ static struct c2c_dimension *dimensions[] = { &dim_mean_load, &dim_cpucnt, &dim_srcline, + &dim_dcacheline_idx, + &dim_dcacheline_num, + &dim_dcacheline_num_empty, NULL, }; @@ -1759,6 +1810,10 @@ static int resort_cl_cb(struct hist_entry *he) calc_width(he); if (display && c2c_hists) { + static unsigned int idx; + + c2c_he->cacheline_idx = idx++; + c2c_hists__reinit(c2c_hists, c2c.cl_output, c2c.cl_resort); hists__collapse_resort(&c2c_hists->hists, NULL); @@ -1946,10 +2001,10 @@ static void print_cachelin
[PATCH 57/61] perf c2c report: Add support to manage symbol name length
The width of symbol and source line entries could get really long and not convenient to display. Adding support to display only patrt of such strings and possibility to switch to full length by uing --full-symbols option or 's' key in TUI browser. Link: http://lkml.kernel.org/n/tip-yxf5hfteyfaoi8xrgczqt...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-c2c.c | 33 - 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index eb78a73b9230..1adb7fb4866c 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -62,6 +62,7 @@ struct perf_c2c { bool show_src; bool use_stdio; bool stats_only; + bool symbol_full; /* HITM shared clines stats */ struct c2c_statshitm_stats; @@ -334,6 +335,21 @@ struct c2c_fmt { struct c2c_dimension*dim; }; +#define SYMBOL_WIDTH 30 + +static struct c2c_dimension dim_symbol; +static struct c2c_dimension dim_srcline; + +static int symbol_width(struct hists *hists, struct sort_entry *se) +{ + int width = hists__col_len(hists, se->se_width_idx); + + if (!c2c.symbol_full) + width = MIN(width, SYMBOL_WIDTH); + + return width; +} + static int c2c_width(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp __maybe_unused, struct hists *hists __maybe_unused) @@ -344,6 +360,9 @@ static int c2c_width(struct perf_hpp_fmt *fmt, c2c_fmt = container_of(fmt, struct c2c_fmt, fmt); dim = c2c_fmt->dim; + if (dim == &dim_symbol || dim == &dim_srcline) + return symbol_width(hists, dim->se); + return dim->se ? hists__col_len(hists, dim->se->se_width_idx) : c2c_fmt->dim->width; } @@ -1564,9 +1583,13 @@ static int c2c_se_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, struct c2c_dimension *dim = c2c_fmt->dim; size_t len = fmt->user_len; - if (!len) + if (!len) { len = hists__col_len(he->hists, dim->se->se_width_idx); + if (dim == &dim_symbol || dim == &dim_srcline) + len = symbol_width(he->hists, dim->se); + } + return dim->se->se_snprintf(he, hpp->buf, hpp->size, len); } @@ -2156,6 +2179,9 @@ static int perf_c2c__browse_cacheline(struct hist_entry *he) struct hist_browser *browser; int key = -1; + /* Display compact version first. */ + c2c.symbol_full = false; + c2c_he = container_of(he, struct c2c_hist_entry, he); c2c_hists = c2c_he->hists; @@ -2175,6 +2201,9 @@ static int perf_c2c__browse_cacheline(struct hist_entry *he) key = hist_browser__run(browser, "help"); switch (key) { + case 's': + c2c.symbol_full = !c2c.symbol_full; + break; case 'q': goto out; default: @@ -2430,6 +2459,8 @@ static int perf_c2c__report(int argc, const char **argv) "Use the stdio interface"), OPT_BOOLEAN(0, "stats", &c2c.stats_only, "Use the stdio interface"), + OPT_BOOLEAN(0, "full-symbols", &c2c.symbol_full, + "Display full length of symbols"), OPT_CALLBACK_DEFAULT('g', "call-graph", &callchain_param, "print_type,threshold[,print_limit],order,sort_key[,branch],value", callchain_help, &parse_callchain_opt, -- 2.7.4
[PATCH 27/61] perf c2c report: Add stores related dimension keys
Adding 5 stores related dimension key wrappers. First 3 are to be displayed in the main cachelines overall output: stores, stores_l1hit, stores_l1miss The latter 2 are to be displayed within single cacheline output: cl_stores_l1hit, cl_stores_l1miss They all display bare numbers of stores for cacheline or its related offsets. Link: http://lkml.kernel.org/n/tip-qeml8v53v6q3wl5n8vgbf...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-c2c.c | 48 1 file changed, 48 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index a48fcc91e9fd..eb8bb158ad8a 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -409,6 +409,9 @@ __f ## _cmp(struct perf_hpp_fmt *fmt __maybe_unused, \ STAT_FN(rmt_hitm) STAT_FN(lcl_hitm) +STAT_FN(store) +STAT_FN(st_l1hit) +STAT_FN(st_l1miss) #define HEADER_LOW(__h)\ { \ @@ -509,6 +512,46 @@ static struct c2c_dimension dim_cl_lcl_hitm = { .width = 7, }; +static struct c2c_dimension dim_stores = { + .header = HEADER_SPAN(" Store Reference ", "Total", 2), + .name = "stores", + .cmp= store_cmp, + .entry = store_entry, + .width = 7, +}; + +static struct c2c_dimension dim_stores_l1hit = { + .header = HEADER_SPAN_LOW("L1Hit"), + .name = "stores_l1hit", + .cmp= st_l1hit_cmp, + .entry = st_l1hit_entry, + .width = 7, +}; + +static struct c2c_dimension dim_stores_l1miss = { + .header = HEADER_SPAN_LOW("L1Miss"), + .name = "stores_l1miss", + .cmp= st_l1miss_cmp, + .entry = st_l1miss_entry, + .width = 7, +}; + +static struct c2c_dimension dim_cl_stores_l1hit = { + .header = HEADER_SPAN("-- Store Refs --", "L1 Hit", 1), + .name = "cl_stores_l1hit", + .cmp= st_l1hit_cmp, + .entry = st_l1hit_entry, + .width = 7, +}; + +static struct c2c_dimension dim_cl_stores_l1miss = { + .header = HEADER_SPAN_LOW("L1 Miss"), + .name = "cl_stores_l1miss", + .cmp= st_l1miss_cmp, + .entry = st_l1miss_entry, + .width = 7, +}; + #undef HEADER_LOW #undef HEADER_BOTH #undef HEADER_SPAN @@ -523,6 +566,11 @@ static struct c2c_dimension *dimensions[] = { &dim_rmt_hitm, &dim_cl_lcl_hitm, &dim_cl_rmt_hitm, + &dim_stores, + &dim_stores_l1hit, + &dim_stores_l1miss, + &dim_cl_stores_l1hit, + &dim_cl_stores_l1miss, NULL, }; -- 2.7.4
[PATCH 52/61] perf c2c report: Limit the cachelines table entries
Add a limit for entries number of the cachelines table entries. By default now it's the 0.0005% minimum of remote HITMs. Also display only cachelines with remote hitm or store data. Link: http://lkml.kernel.org/n/tip-inykbom2f19difvsu1e18...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-c2c.c | 36 ++-- tools/perf/util/hist.c | 1 + tools/perf/util/hist.h | 1 + 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 913a6b9b4d45..571be80c6d18 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -1643,11 +1643,42 @@ static int c2c_hists__reinit(struct c2c_hists *c2c_hists, return hpp_list__parse(&c2c_hists->list, output, sort); } -static int filter_cb(struct hist_entry *he __maybe_unused) +#define DISPLAY_LINE_LIMIT 0.0005 + +static bool he__display(struct hist_entry *he, struct c2c_stats *stats) +{ + struct c2c_hist_entry *c2c_he; + double ld_dist; + + c2c_he = container_of(he, struct c2c_hist_entry, he); + + if (stats->rmt_hitm) { + ld_dist = ((double)c2c_he->stats.rmt_hitm / stats->rmt_hitm); + if (ld_dist < DISPLAY_LINE_LIMIT) + he->filtered = HIST_FILTER__C2C; + } else { + he->filtered = HIST_FILTER__C2C; + } + + return he->filtered == 0; +} + +static inline int valid_hitm_or_store(struct hist_entry *he) +{ + struct c2c_hist_entry *c2c_he; + + c2c_he = container_of(he, struct c2c_hist_entry, he); + return c2c_he->stats.rmt_hitm || c2c_he->stats.store; +} + +static int filter_cb(struct hist_entry *he) { if (c2c.show_src && !he->srcline) he->srcline = hist_entry__get_srcline(he); + if (!valid_hitm_or_store(he)) + he->filtered = HIST_FILTER__C2C; + return 0; } @@ -1655,11 +1686,12 @@ static int resort_cl_cb(struct hist_entry *he) { struct c2c_hist_entry *c2c_he; struct c2c_hists *c2c_hists; + bool display = he__display(he, &c2c.hitm_stats); c2c_he = container_of(he, struct c2c_hist_entry, he); c2c_hists = c2c_he->hists; - if (c2c_hists) { + if (display && c2c_hists) { c2c_hists__reinit(c2c_hists, "percent_rmt_hitm," "percent_lcl_hitm," diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 37a08f20730a..020efa9d3d74 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1193,6 +1193,7 @@ static void hist_entry__check_and_remove_filter(struct hist_entry *he, case HIST_FILTER__GUEST: case HIST_FILTER__HOST: case HIST_FILTER__SOCKET: + case HIST_FILTER__C2C: default: return; } diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 0e3493e33175..ff6298693227 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -22,6 +22,7 @@ enum hist_filter { HIST_FILTER__GUEST, HIST_FILTER__HOST, HIST_FILTER__SOCKET, + HIST_FILTER__C2C, }; enum hist_column { -- 2.7.4
[PATCH 51/61] perf c2c report: Allow to report callchains
Add --call-graph option to properly setup callchain code. Adding default settings to display callchains whenever they are stored in the perf.data. Link: http://lkml.kernel.org/n/tip-inykbom2f19difvsu1e18...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-c2c.c | 67 1 file changed, 67 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index f4bdef5004c9..913a6b9b4d45 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -17,6 +17,7 @@ #include "evsel.h" #include #include "ui/browsers/hists.h" +#include "evlist.h" struct c2c_hists { struct histshists; @@ -181,6 +182,11 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, return -1; } + ret = sample__resolve_callchain(sample, &callchain_cursor, NULL, + evsel, &al, sysctl_perf_event_max_stack); + if (ret) + goto out; + mi = sample__resolve_mem(sample, &al); if (mi == NULL) return -ENOMEM; @@ -2102,6 +2108,58 @@ static void ui_quirks(bool stdio) } } +#define CALLCHAIN_DEFAULT_OPT "graph,0.5,caller,function,percent" + +const char callchain_help[] = "Display call graph (stack chain/backtrace):\n\n" + CALLCHAIN_REPORT_HELP + "\n\t\t\t\tDefault: " CALLCHAIN_DEFAULT_OPT; + +static int +parse_callchain_opt(const struct option *opt, const char *arg, int unset) +{ + struct callchain_param *callchain = opt->value; + + callchain->enabled = !unset; + /* +* --no-call-graph +*/ + if (unset) { + symbol_conf.use_callchain = false; + callchain->mode = CHAIN_NONE; + return 0; + } + + return parse_callchain_report_opt(arg); +} + +static int setup_callchain(struct perf_evlist *evlist) +{ + u64 sample_type = perf_evlist__combined_sample_type(evlist); + enum perf_call_graph_mode mode = CALLCHAIN_NONE; + + if ((sample_type & PERF_SAMPLE_REGS_USER) && + (sample_type & PERF_SAMPLE_STACK_USER)) + mode = CALLCHAIN_DWARF; + else if (sample_type & PERF_SAMPLE_BRANCH_STACK) + mode = CALLCHAIN_LBR; + else if (sample_type & PERF_SAMPLE_CALLCHAIN) + mode = CALLCHAIN_FP; + + if (!callchain_param.enabled && + callchain_param.mode != CHAIN_NONE && + mode != CALLCHAIN_NONE) { + symbol_conf.use_callchain = true; + if (callchain_register_param(&callchain_param) < 0) { + ui__error("Can't register callchain params.\n"); + return -EINVAL; + } + } + + callchain_param.record_mode = mode; + callchain_param.min_percent = 0; + return 0; +} + static int perf_c2c__report(int argc, const char **argv) { struct perf_session *session; @@ -2109,6 +2167,7 @@ static int perf_c2c__report(int argc, const char **argv) struct perf_data_file file = { .mode = PERF_DATA_MODE_READ, }; + char callchain_default_opt[] = CALLCHAIN_DEFAULT_OPT; const struct option c2c_options[] = { OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, "file", "vmlinux pathname"), @@ -2122,6 +2181,10 @@ static int perf_c2c__report(int argc, const char **argv) "Use the stdio interface"), OPT_BOOLEAN(0, "stats", &c2c.stats_only, "Use the stdio interface"), + OPT_CALLBACK_DEFAULT('g', "call-graph", &callchain_param, + "print_type,threshold[,print_limit],order,sort_key[,branch],value", +callchain_help, &parse_callchain_opt, +callchain_default_opt), OPT_END() }; int err = 0; @@ -2166,6 +2229,10 @@ static int perf_c2c__report(int argc, const char **argv) goto out; } + err = setup_callchain(session->evlist); + if (err) + goto out_session; + if (symbol__init(&session->header.env) < 0) goto out_session; -- 2.7.4
[PATCH 49/61] perf c2c report: Add shared cachelines stats stdio output
Display global shared cachelines related stats table as part of the stdio output or when --stats option is speicified: $ perf c2c report --stats ... = Global Shared Cache Line Event Information = Total Shared Cache Lines : 1384 Load HITs on shared lines : 5995 Fill Buffer Hits on shared lines : 1726 L1D hits on shared lines : 1943 L2D hits on shared lines : 0 LLC hits on shared lines : 1360 Locked Access on shared lines : 1993 Store HITs on shared lines: 1504 Store L1D hits on shared lines: 1446 Total Merged records : 3527 Original-patch-by: Dick Fowles Original-patch-by: Don Zickus Link: http://lkml.kernel.org/n/tip-p0gty8ctbdzisrniwqxhq...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-c2c.c | 62 +++- 1 file changed, 61 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index aecfe70b2f52..e463da572207 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -56,6 +56,10 @@ struct perf_c2c { bool show_src; bool use_stdio; bool stats_only; + + /* HITM shared clines stats */ + struct c2c_statshitm_stats; + int shared_clines; }; static struct perf_c2c c2c; @@ -1733,6 +1737,39 @@ static int setup_nodes(struct perf_session *session) return 0; } +#define HAS_HITMS(__h) ((__h)->stats.lcl_hitm || (__h)->stats.rmt_hitm) + +static int resort_hitm_cb(struct hist_entry *he) +{ + struct c2c_hist_entry *c2c_he; + c2c_he = container_of(he, struct c2c_hist_entry, he); + + if (HAS_HITMS(c2c_he)) { + c2c.shared_clines++; + c2c_add_stats(&c2c.hitm_stats, &c2c_he->stats); + } + + return 0; +} + +static int hists__iterate_cb(struct hists *hists, hists__resort_cb_t cb) +{ + struct rb_node *next = rb_first(&hists->entries); + int ret = 0; + + while (next) { + struct hist_entry *he; + + he = rb_entry(next, struct hist_entry, rb_node); + ret = cb(he); + if (ret) + break; + next = rb_next(&he->rb_node); + } + + return ret; +} + static void print_c2c__display_stats(FILE *out) { int llc_misses; @@ -1778,6 +1815,26 @@ static void print_c2c__display_stats(FILE *out) fprintf(out, " Unable to parse data source : %10d\n", stats->noparse); } +static void print_shared_cacheline_info(FILE *out) +{ + struct c2c_stats *stats = &c2c.hitm_stats; + int hitm_cnt = stats->lcl_hitm + stats->rmt_hitm; + + fprintf(out, "=\n"); + fprintf(out, "Global Shared Cache Line Event Information \n"); + fprintf(out, "=\n"); + fprintf(out, " Total Shared Cache Lines : %10d\n", c2c.shared_clines); + fprintf(out, " Load HITs on shared lines : %10d\n", stats->load); + fprintf(out, " Fill Buffer Hits on shared lines : %10d\n", stats->ld_fbhit); + fprintf(out, " L1D hits on shared lines : %10d\n", stats->ld_l1hit); + fprintf(out, " L2D hits on shared lines : %10d\n", stats->ld_l2hit); + fprintf(out, " LLC hits on shared lines : %10d\n", stats->ld_llchit + stats->lcl_hitm); + fprintf(out, " Locked Access on shared lines : %10d\n", stats->locks); + fprintf(out, " Store HITs on shared lines: %10d\n", stats->store); + fprintf(out, " Store L1D hits on shared lines: %10d\n", stats->st_l1hit); + fprintf(out, " Total Merged records : %10d\n", hitm_cnt + stats->store); +} + static void print_cacheline(struct c2c_hists *c2c_hists, struct hist_entry *he_cl, struct perf_hpp_list *hpp_list, @@ -1842,6 +1899,8 @@ static void perf_c2c__hists_fprintf(FILE *out) setup_pager(); print_c2c__display_stats(out); + fprintf(out, "\n"); + print_shared_cacheline_info(out); if (c2c.stats_only) return; @@ -2118,7 +2177,8 @@ static int perf_c2c__report(int argc, const char **argv) ui_progress__init(&prog, c2c.hists.hists.nr_entries, "Sorting..."); hists__collapse_resort(&c2c.hists.hists, NULL); - hists__output_resort_cb(&c2c.hists.hists, &prog, resort_cl_cb); + hists__output_resort_cb(&c2c.hists.hists, &prog, resort_hitm_cb); + hists__iterate_cb(&c2c.hists.hists, resort_cl_cb); ui_progress__finish(); --
[PATCH 50/61] perf c2c report: Add c2c related stats stdio output
Display c2c related configuration options/setup. So far it's output of monitored events: $ perf c2c report --stats ... = c2c details = Events: cpu/mem-loads,ldlat=50/pp : cpu/mem-stores/pp Link: http://lkml.kernel.org/n/tip-ypz84f3a9fumyttrxurm4...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-c2c.c | 25 +++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index e463da572207..f4bdef5004c9 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -13,6 +13,8 @@ #include "tool.h" #include "data.h" #include "sort.h" +#include "evlist.h" +#include "evsel.h" #include #include "ui/browsers/hists.h" @@ -1894,13 +1896,32 @@ static void print_pareto(FILE *out) } } -static void perf_c2c__hists_fprintf(FILE *out) +static void print_c2c_info(FILE *out, struct perf_session *session) +{ + struct perf_evlist *evlist = session->evlist; + struct perf_evsel *evsel; + bool first = true; + + fprintf(out, "=\n"); + fprintf(out, " c2c details \n"); + fprintf(out, "=\n"); + + evlist__for_each_entry(evlist, evsel) { + fprintf(out, "%-36s: %s\n", first ? " Events" : "", + perf_evsel__name(evsel)); + first = false; + } +} + +static void perf_c2c__hists_fprintf(FILE *out, struct perf_session *session) { setup_pager(); print_c2c__display_stats(out); fprintf(out, "\n"); print_shared_cacheline_info(out); + fprintf(out, "\n"); + print_c2c_info(out, session); if (c2c.stats_only) return; @@ -2183,7 +2204,7 @@ static int perf_c2c__report(int argc, const char **argv) ui_progress__finish(); if (c2c.use_stdio) - perf_c2c__hists_fprintf(stdout); + perf_c2c__hists_fprintf(stdout, session); else perf_c2c__hists_browse(&c2c.hists.hists); -- 2.7.4
[PATCH 47/61] perf c2c report: Add cacheline browser
Adding single cacheline TUI browser. It triggers when you press 'd' in the main browser on the specific cacheline. It allows to navigate through cacheline's offsets and display callchains (implemented in following patches). Link: http://lkml.kernel.org/n/tip-fovjwgyusv3rz5qxk3hna...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-c2c.c | 81 1 file changed, 81 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 47d5408aeff8..b380cdf0e6aa 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -1829,6 +1829,84 @@ static void c2c_browser__update_nr_entries(struct hist_browser *hb) hb->nr_non_filtered_entries = nr_entries; } +struct c2c_cacheline_browser { + struct hist_browser hb; + struct hist_entry *he; +}; + +static int +perf_c2c_cacheline_browser__title(struct hist_browser *browser, + char *bf, size_t size) +{ + struct c2c_cacheline_browser *cl_browser; + struct hist_entry *he; + uint64_t addr = 0; + + cl_browser = container_of(browser, struct c2c_cacheline_browser, hb); + he = cl_browser->he; + + if (he->mem_info) + addr = cl_address(he->mem_info->daddr.addr); + + scnprintf(bf, size, "Cacheline 0x%lx", addr); + return 0; +} + +static struct c2c_cacheline_browser* +c2c_cacheline_browser__new(struct hists *hists, struct hist_entry *he) +{ + struct c2c_cacheline_browser *browser; + + browser = zalloc(sizeof(*browser)); + if (browser) { + hist_browser__init(&browser->hb, hists); + browser->hb.c2c_filter = true; + browser->hb.title = perf_c2c_cacheline_browser__title; + browser->he = he; + } + + return browser; +} + +static int perf_c2c__browse_cacheline(struct hist_entry *he) +{ + struct c2c_hist_entry *c2c_he; + struct c2c_hists *c2c_hists; + struct c2c_cacheline_browser *cl_browser; + struct hist_browser *browser; + int key = -1; + + c2c_he = container_of(he, struct c2c_hist_entry, he); + c2c_hists = c2c_he->hists; + + cl_browser = c2c_cacheline_browser__new(&c2c_hists->hists, he); + if (cl_browser == NULL) + return -1; + + browser = &cl_browser->hb; + + /* reset abort key so that it can get Ctrl-C as a key */ + SLang_reset_tty(); + SLang_init_tty(0, 0, 0); + + c2c_browser__update_nr_entries(browser); + + while (1) { + key = hist_browser__run(browser, "help"); + + switch (key) { + case 'q': + goto out; + default: + break; + } + } + +out: + free(cl_browser); + return 0; +} + static int perf_c2c_browser__title(struct hist_browser *browser, char *bf, size_t size) { @@ -1872,6 +1950,9 @@ static int perf_c2c__hists_browse(struct hists *hists) switch (key) { case 'q': goto out; + case 'd': + perf_c2c__browse_cacheline(browser->he_selection); + break; default: break; } -- 2.7.4
[PATCH 55/61] perf c2c report: Recalc width of global sort entries
Using resort callbacks to compute the columns' width. Computing only the global ones, c2c entries have fixed width only. Link: http://lkml.kernel.org/n/tip-zyayvq2u3dzyf3y7i9jza...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-c2c.c | 12 1 file changed, 12 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index ff8a66ee7092..c93a766190b1 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -1726,11 +1726,21 @@ static inline int valid_hitm_or_store(struct hist_entry *he) return has_hitm || c2c_he->stats.store; } +static void calc_width(struct hist_entry *he) +{ + struct c2c_hists *c2c_hists; + + c2c_hists = container_of(he->hists, struct c2c_hists, hists); + hists__calc_col_len(&c2c_hists->hists, he); +} + static int filter_cb(struct hist_entry *he) { if (c2c.show_src && !he->srcline) he->srcline = hist_entry__get_srcline(he); + calc_width(he); + if (!valid_hitm_or_store(he)) he->filtered = HIST_FILTER__C2C; @@ -1746,6 +1756,8 @@ static int resort_cl_cb(struct hist_entry *he) c2c_he = container_of(he, struct c2c_hist_entry, he); c2c_hists = c2c_he->hists; + calc_width(he); + if (display && c2c_hists) { c2c_hists__reinit(c2c_hists, c2c.cl_output, c2c.cl_resort); -- 2.7.4
[PATCH 42/61] perf c2c report: Add src line sort key
Adding source line dimension key wrapper. It is to be displayed in the single cacheline output: cl_srcline It displays source line related to the code address that accessed cacheline. It's a wrapper to global srcline sort entry. Link: http://lkml.kernel.org/n/tip-cmnzgm37mjz56ozsg4mnb...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-c2c.c | 11 +++ tools/perf/util/sort.c | 2 +- tools/perf/util/sort.h | 2 ++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index a4fea832e677..c540917a70c4 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -50,6 +50,8 @@ struct perf_c2c { int cpus_cnt; int *cpu2node; int node_info; + + bool show_src; }; static struct perf_c2c c2c; @@ -1363,6 +1365,11 @@ static struct c2c_dimension dim_cpucnt = { .width = 8, }; +static struct c2c_dimension dim_srcline = { + .name = "cl_srcline", + .se = &sort_srcline, +}; + #undef HEADER_LOW #undef HEADER_BOTH #undef HEADER_SPAN @@ -1406,6 +1413,7 @@ static struct c2c_dimension *dimensions[] = { &dim_mean_lcl, &dim_mean_load, &dim_cpucnt, + &dim_srcline, NULL, }; @@ -1613,6 +1621,9 @@ static int c2c_hists__reinit(struct c2c_hists *c2c_hists, static int filter_cb(struct hist_entry *he __maybe_unused) { + if (c2c.show_src && !he->srcline) + he->srcline = hist_entry__get_srcline(he); + return 0; } diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 452e15a10dd2..df622f4e301e 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -315,7 +315,7 @@ struct sort_entry sort_sym = { /* --sort srcline */ -static char *hist_entry__get_srcline(struct hist_entry *he) +char *hist_entry__get_srcline(struct hist_entry *he) { struct map *map = he->ms.map; diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index d4ef567dcd7b..7aff317fc7c4 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -40,6 +40,7 @@ extern struct sort_entry sort_dso_from; extern struct sort_entry sort_dso_to; extern struct sort_entry sort_sym_from; extern struct sort_entry sort_sym_to; +extern struct sort_entry sort_srcline; extern enum sort_type sort__first_dimension; extern const char default_mem_sort_order[]; @@ -279,4 +280,5 @@ int64_t sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right); int64_t sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right); +char *hist_entry__get_srcline(struct hist_entry *he); #endif /* __PERF_SORT_H */ -- 2.7.4
[PATCH 39/61] perf c2c report: Add node sort key
Adding node dimension key wrapper. It is to be displayed in the single cacheline output: node It displays nodes hits related to cacheline accesses. The node filed comes in 3 flavors: - node IDs separated by ',' - node IDs with stats for each ID, in following format: Node{cpus %hitms %stores} - node IDs with list of affected CPUs in following format: Node{cpu list} User can switch the flavor with -N option (-NN,-NNN). It will be available in TUI to switch this with 'n' key. Link: http://lkml.kernel.org/n/tip-6742e6g0r7n63y5wc4rrg...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-c2c.c | 219 +++ 1 file changed, 219 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index b3dcd590e97a..6b4224764ae4 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -1,6 +1,7 @@ #include #include #include +#include #include "util.h" #include "debug.h" #include "builtin.h" @@ -22,6 +23,8 @@ struct c2c_hists { struct c2c_hist_entry { struct c2c_hists*hists; struct c2c_stats stats; + unsigned long *cpuset; + struct c2c_stats*node_stats; /* * must be at the end, * because of its callchain dynamic entry @@ -32,6 +35,12 @@ struct c2c_hist_entry { struct perf_c2c { struct perf_tooltool; struct c2c_histshists; + + unsigned long **nodes; + int nodes_cnt; + int cpus_cnt; + int *cpu2node; + int node_info; }; static struct perf_c2c c2c; @@ -44,6 +53,14 @@ static void *c2c_he_zalloc(size_t size) if (!c2c_he) return NULL; + c2c_he->cpuset = bitmap_alloc(c2c.cpus_cnt); + if (!c2c_he->cpuset) + return NULL; + + c2c_he->node_stats = zalloc(c2c.nodes_cnt * sizeof(*c2c_he->node_stats)); + if (!c2c_he->node_stats) + return NULL; + return &c2c_he->he; } @@ -57,6 +74,8 @@ static void c2c_he_free(void *he) free(c2c_he->hists); } + free(c2c_he->cpuset); + free(c2c_he->node_stats); free(c2c_he); } @@ -91,6 +110,16 @@ he__get_c2c_hists(struct hist_entry *he, return hists; } +static void c2c_he__set_cpu(struct c2c_hist_entry *c2c_he, + struct perf_sample *sample) +{ + if (WARN_ONCE(sample->cpu == (unsigned int) -1, + "WARNING: no sample cpu value")) + return; + + set_bit(sample->cpu, c2c_he->cpuset); +} + static int process_sample_event(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, @@ -131,10 +160,23 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, c2c_add_stats(&c2c_he->stats, &stats); c2c_add_stats(&c2c_hists->stats, &stats); + c2c_he__set_cpu(c2c_he, sample); + hists__inc_nr_samples(&c2c_hists->hists, he->filtered); ret = hist_entry__append_callchain(he, sample); if (!ret) { + /* +* There's already been warning about missing +* sample's cpu value. Let's account all to +* node 0 in this case, without any further +* warning. +* +* Doing node stats only for single callchain data. +*/ + int cpu = sample->cpu == (unsigned int) -1 ? 0 : sample->cpu; + int node = c2c.cpu2node[cpu]; + mi = mi_dup; mi_dup = memdup(mi, sizeof(*mi)); @@ -154,6 +196,9 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, c2c_he = container_of(he, struct c2c_hist_entry, he); c2c_add_stats(&c2c_he->stats, &stats); c2c_add_stats(&c2c_hists->stats, &stats); + c2c_add_stats(&c2c_he->node_stats[node], &stats); + + c2c_he__set_cpu(c2c_he, sample); hists__inc_nr_samples(&c2c_hists->hists, he->filtered); ret = hist_entry__append_callchain(he, sample); @@ -826,6 +871,97 @@ pid_cmp(struct perf_hpp_fmt *fmt __maybe_unused, return left->thread->pid_ - right->thread->pid_; } +static int64_t +empty_cmp(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left __maybe_unused, + struct hist_entry *right __maybe_unused) +{ + return 0; +} + +static int +node_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, + struct hist_entry *he) +{ + struct c2c_hist_entry *c2c_he; + bool first = true; + int node; + int ret = 0; + + c2c_he = container_of(he, struct c2c_hist_entry, he); + + for (node = 0;
[PATCH 59/61] perf c2c report: Add help windows
Adding help windows to display key/action mappings for both browsers. Link: http://lkml.kernel.org/n/tip-zni4apopx6a9eyxsosm1e...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-c2c.c | 19 +-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 0902aba4cf19..e1e74ed27075 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2178,6 +2178,11 @@ static int perf_c2c__browse_cacheline(struct hist_entry *he) struct c2c_cacheline_browser *cl_browser; struct hist_browser *browser; int key = -1; + const char help[] = + " ENTER Togle callchains (if present) \n" + " n Togle Node details info \n" + " s Togle full lenght of symbol and source line columns \n" + " q Return back to cacheline list \n"; /* Display compact version first. */ c2c.symbol_full = false; @@ -2198,7 +2203,7 @@ static int perf_c2c__browse_cacheline(struct hist_entry *he) c2c_browser__update_nr_entries(browser); while (1) { - key = hist_browser__run(browser, "help"); + key = hist_browser__run(browser, "? - help"); switch (key) { case 's': @@ -2210,6 +2215,9 @@ static int perf_c2c__browse_cacheline(struct hist_entry *he) break; case 'q': goto out; + case '?': + ui_browser__help_window(&browser->b, help); + break; default: break; } @@ -2248,6 +2256,10 @@ static int perf_c2c__hists_browse(struct hists *hists) { struct hist_browser *browser; int key = -1; + const char help[] = + " d Display cacheline details \n" + " ENTER Togle callchains (if present) \n" + " q Quit \n"; browser = perf_c2c_browser__new(hists); if (browser == NULL) @@ -2260,7 +2272,7 @@ static int perf_c2c__hists_browse(struct hists *hists) c2c_browser__update_nr_entries(browser); while (1) { - key = hist_browser__run(browser, "help"); + key = hist_browser__run(browser, "? - help"); switch (key) { case 'q': @@ -2268,6 +2280,9 @@ static int perf_c2c__hists_browse(struct hists *hists) case 'd': perf_c2c__browse_cacheline(browser->he_selection); break; + case '?': + ui_browser__help_window(&browser->b, help); + break; default: break; } -- 2.7.4
[PATCH 22/61] perf c2c report: Add header macros
Adding helping macros to define header objects. It will be used in following patches, that add new dimensions. The c2c report will support 2 line headers, hence we only define line[0/1] in macros. Link: http://lkml.kernel.org/n/tip-tkgrfvlw0m5awb75fk2sv...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-c2c.c | 40 1 file changed, 40 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 7bf6248dbd75..c21124e6bb63 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -271,6 +271,46 @@ static int c2c_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, return scnprintf(hpp->buf, hpp->size, "%*s", width, text); } +#define HEADER_LOW(__h)\ + { \ + .line[1] = {\ + .text = __h,\ + }, \ + } + +#define HEADER_BOTH(__h0, __h1)\ + { \ + .line[0] = {\ + .text = __h0, \ + }, \ + .line[1] = {\ + .text = __h1, \ + }, \ + } + +#define HEADER_SPAN(__h0, __h1, __s) \ + { \ + .line[0] = {\ + .text = __h0, \ + .span = __s,\ + }, \ + .line[1] = {\ + .text = __h1, \ + }, \ + } + +#define HEADER_SPAN_LOW(__h) \ + { \ + .line[1] = {\ + .text = __h,\ + }, \ + } + +#undef HEADER_LOW +#undef HEADER_BOTH +#undef HEADER_SPAN +#undef HEADER_SPAN_LOW + static struct c2c_dimension *dimensions[] = { NULL, }; -- 2.7.4
Re: [PATCH] net: phy: Ensure the state machine is called when phy is UP
Hi all, I come back to this thread to re-start the conversation as I still have the issue... Le 16/04/2016 à 00:45, Alexandre Belloni a écrit : > On 16/04/2016 at 00:30:26 +0200, Andrew Lunn wrote : >> On Sat, Apr 16, 2016 at 12:17:11AM +0200, Alexandre Belloni wrote: >>> On 16/04/2016 at 00:05:08 +0200, Andrew Lunn wrote : > Trace without my patch: > libphy: MACB_mii_bus: probed > macb f802.ethernet eth0: Cadence GEM rev 0x00020120 at 0xf802 irq > 27 (fc:c2:3d:0c:6e:05) > Micrel KSZ8081 or KSZ8091 f802.etherne:01: attached PHY driver > [Micrel KSZ8081 or KSZ8091] (mii_bus:phy_addr=f802.etherne:01, > irq=171) > Micrel KSZ8081 or KSZ8091 f802.etherne:01: PHY state change READY -> > READY > [...] > Micrel KSZ8081 or KSZ8091 f802.etherne:01: PHY state change READY -> > READY Are there some state changes before this? How is it getting to state READY? It would expect it to start in DOWN, from when the phy device was created in phy_device_create(). >>> >>> No other changes. I forgot to mention that this is when booting with a >>> cable plugged in. Unplugging and replugging the cable makes the link >>> detection work fine even without the patch. >> >> Are you tftpbooting? I.e. has the boot loader already done an auto >> negotiation? >> > > Yes. Yes indeed: this is my use-case: load the kernel from U-Boot using tftp and having the rootfs in NAND flash so, no NFS rootfs for me. >> I've looked at the code and i still don't see how it gets to READY. >> What i do see is that when you connect the phy to the MAC, the >> interrupt handler is installed. So maybe there are some PHY interrupts >> before the interface is opened? Could you put a print in >> phy_interrupt(). >> > > That is indeed the case, and I'm not sure why because > 99f81afc139c6edd14d77a91ee91685a414a1c66 is trying to disable AN at > boot. I don't know what happens to the phy, but this patch does fix the issue for me: Tested-by: Nicolas Ferre The other alternative that I'm considering seriously as I'm still struggled with this is to simply remove the phy IRQ from my board DT. Best regards, -- Nicolas Ferre
[PATCH 60/61] perf c2c: Add man page and credits
Adding man page for c2c command and credits to builtin-c2c.c file. Link: http://lkml.kernel.org/n/tip-twbp391v8v9f5idp584hl...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/Documentation/perf-c2c.txt | 276 ++ tools/perf/builtin-c2c.c | 11 ++ 2 files changed, 287 insertions(+) create mode 100644 tools/perf/Documentation/perf-c2c.txt diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt new file mode 100644 index ..ba2f4de399c3 --- /dev/null +++ b/tools/perf/Documentation/perf-c2c.txt @@ -0,0 +1,276 @@ +perf-c2c(1) +=== + +NAME + +perf-c2c - Shared Data C2C/HITM Analyzer. + +SYNOPSIS + +[verse] +'perf c2c record' [] +'perf c2c record' [] -- [] +'perf c2c report' [] + +DESCRIPTION +--- +C2C stands for Cache To Cache. + +The perf c2c tool provides means for Shared Data C2C/HITM analysis. It allows +you to track down the cacheline contentions. + +The tool is based on x86's load latency and precise store facility events +provided by Intel CPUs. These events provide: + - memory address of the access + - type of the access (load and store details) + - latency (in cycles) of the load access + +The c2c tool provide means to record this data and report back access details +for cachelines with highest contention - highest number of HITM accesses. + +The basic workflow with this tool follows the standard record/report phase. +User uses the record command to record events data and report command to +display it. + + +RECORD OPTIONS +-- +-e:: +--event=:: + Select the PMU event. Use 'perf mem record -e list' + to list available events. + +-v:: +--verbose:: + Be more verbose (show counter open errors, etc). + +-l:: +--ldlat:: + Configure mem-loads latency. + +-k:: +--all-kernel:: + Configure all used events to run in kernel space. + +-u:: +--all-user:: + Configure all used events to run in user space. + +REPORT OPTIONS +-- +-k:: +--vmlinux=:: + vmlinux pathname + +-v:: +--verbose:: + Be more verbose (show counter open errors, etc). + +-i:: +--input:: + Specify the input file to process. + +-N:: +--node-info:: + Show extra node info in report (see NODE INFO section) + +-c:: +--coalesce:: + Specify sorintg fields for single cacheline display. + Following fields are available: tid,pid,iaddr,dso + (see COALESCE) + +-g:: +--call-graph:: + Setup callchains parameters. + Please refer to perf-report man page for details. + +--stdio:: + Force the stdio output (see STDIO OUTPUT) + +--stats:: + Display only statistic tables and force stdio mode. + +--full-symbols:: + Display full length of symbols. + +C2C RECORD +-- +The perf c2c record command setup options related to HITM cacheline analysis +and calls standard perf record command. + +Following perf record options are configured by default: +(check perf record man page for details) + + -W,-d,--sample-cpu + +Unless specified otherwise with '-e' option, following events are monitored by +default: + + cpu/mem-loads,ldlat=30/P + cpu/mem-stores/P + +User can pass any 'perf record' option behind '--' mark, like (to enable +callchains and system wide monitoring): + + $ perf c2c record -- -g -a + +Please check RECORD OPTIONS section for specific c2c record options. + +C2C REPORT +-- +The perf c2c report command displays shared data analysis. It comes in two +display modes: stdio and tui (default). + +The report command workflow is following: + - sort all the data based on the cacheline address + - store access details for each cacheline + - sort all cachelines based on user settings + - display data + +In general perf report output consist of 2 basic views: + 1) most expensive cachelines list + 2) offsets details for each cacheline + +For each cacheline in the 1) list we display following data: +(Both stdio and TUI modes follow the same fields output) + + Index + - zero based index to identify the cacheline + + Cacheline + - cacheline address (hex number) + + Total records + - sum of all cachelines accesses + + Rmt/Lcl Hitm + - cacheline percentage of all Remote/Local HITM accesses + + LLC Load Hitm - Total, Lcl, Rmt + - count of Total/Local/Remote load HITMs + + Store Reference - Total, L1Hit, L1Miss +Total - all store accesses +L1Hit - store accesses that hit L1 +L1Hit - store accesses that missed L1 + + Load Dram + - count of local and remote DRAM accesses + + LLC Ld Miss + - count of all accesses that missed LLC + + Total Loads + - sum of all load accesses + + Core Load Hit - FB, L1, L2 + - count of load hits in FB (Fill Buffer), L1 and L2 cache + + LLC Load Hit - Llc, Rmt + - count of LLC and Remote load hits + +For each offset in the 2) list we display following data: + + HITM - Rmt, Lcl + - % of Remote/Local HITM accesses for given offset within cachel
Re: [PATCH] kvm: x86: correctly reset dest_map->vector when restoring LAPIC state
On Wed, Sep 14, 2016 at 11:48:32PM +0200, Paolo Bonzini wrote: > When userspace sends KVM_SET_LAPIC, KVM schedules a check between > the vCPU's IRR and ISR and the IOAPIC redirection table, in order > to re-establish the IOAPIC's dest_map (the list of CPUs servicing > the real-time clock interrupt with the corresponding vectors). > > However, __rtc_irq_eoi_tracking_restore_one was forgetting to > set dest_map->vectors. Because of this, the IOAPIC did not process > the real-time clock interrupt EOI, ioapic->rtc_status.pending_eoi > got stuck at a non-zero value, and further RTC interrupts were > reported to userspace as coalesced. > > Fixes: 9e4aabe2bb3454c83dac8139cf9974503ee044db > Fixes: 4d99ba898dd0c521ca6cdfdde55c9b58aea3cb3d > Cc: Joerg Roedel > Cc: David Gilbert > Cc: Radim Krčmář > Signed-off-by: Paolo Bonzini Good catch, thanks for fixing this.
[PATCH 53/61] perf c2c report: Add support to choose local HITMs
Currently we sort and limit displayed data based on the remote HITMs count. Adding support to switch to local HITMs via --display option: --display ... lcl,rmt Link: http://lkml.kernel.org/n/tip-inykbom2f19difvsu1e18...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-c2c.c | 117 ++- 1 file changed, 96 insertions(+), 21 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 571be80c6d18..3541c94fff02 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -63,6 +63,13 @@ struct perf_c2c { /* HITM shared clines stats */ struct c2c_statshitm_stats; int shared_clines; + + int display; +}; + +enum { + DISPLAY_LCL, + DISPLAY_RMT, }; static struct perf_c2c c2c; @@ -680,15 +687,24 @@ static double percent_hitm(struct c2c_hist_entry *c2c_he) struct c2c_hists *hists; struct c2c_stats *stats; struct c2c_stats *total; - int tot, st; + int tot = 0, st = 0; double p; hists = container_of(c2c_he->he.hists, struct c2c_hists, hists); stats = &c2c_he->stats; total = &hists->stats; - st = stats->rmt_hitm; - tot = total->rmt_hitm; + switch (c2c.display) { + case DISPLAY_RMT: + st = stats->rmt_hitm; + tot = total->rmt_hitm; + break; + case DISPLAY_LCL: + st = stats->lcl_hitm; + tot = total->lcl_hitm; + default: + break; + } p = tot ? (double) st / tot : 0; @@ -971,14 +987,26 @@ node_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, ret = scnprintf(hpp->buf, hpp->size, "%2d{%2d ", node, num); advance_hpp(hpp, ret); + #define DISPLAY_HITM(__h) \ + if (c2c_he->stats.__h> 0) { \ + ret = scnprintf(hpp->buf, hpp->size, "%5.1f%% ",\ + percent(stats->__h, c2c_he->stats.__h));\ + } else { \ + ret = scnprintf(hpp->buf, hpp->size, "%6s ", "n/a");\ + } - if (c2c_he->stats.rmt_hitm > 0) { - ret = scnprintf(hpp->buf, hpp->size, "%5.1f%% ", - percent(stats->rmt_hitm, c2c_he->stats.rmt_hitm)); - } else { - ret = scnprintf(hpp->buf, hpp->size, "%6s ", "n/a"); + switch (c2c.display) { + case DISPLAY_RMT: + DISPLAY_HITM(rmt_hitm); + break; + case DISPLAY_LCL: + DISPLAY_HITM(lcl_hitm); + default: + break; } + #undef DISPLAY_HITM + advance_hpp(hpp, ret); if (c2c_he->stats.store > 0) { @@ -1254,8 +1282,12 @@ static struct c2c_dimension dim_tot_loads = { .width = 7, }; +static struct c2c_header percent_hitm_header[] = { + [DISPLAY_LCL] = HEADER_BOTH("Lcl", "Hitm"), + [DISPLAY_RMT] = HEADER_BOTH("Rmt", "Hitm"), +}; + static struct c2c_dimension dim_percent_hitm = { - .header = HEADER_LOW("%hitm"), .name = "percent_hitm", .cmp= percent_hitm_cmp, .entry = percent_hitm_entry, @@ -1652,23 +1684,39 @@ static bool he__display(struct hist_entry *he, struct c2c_stats *stats) c2c_he = container_of(he, struct c2c_hist_entry, he); - if (stats->rmt_hitm) { - ld_dist = ((double)c2c_he->stats.rmt_hitm / stats->rmt_hitm); - if (ld_dist < DISPLAY_LINE_LIMIT) - he->filtered = HIST_FILTER__C2C; - } else { - he->filtered = HIST_FILTER__C2C; +#define FILTER_HITM(__h) \ + if (stats->__h) { \ + ld_dist = ((double)c2c_he->stats.__h / stats->__h); \ + if (ld_dist < DISPLAY_LINE_LIMIT) \ + he->filtered = HIST_FILTER__C2C;\ + } else {\ + he->filtered = HIST_FILTER__C2C;\ } + switch (c2c.display) { + case DISPLAY_LCL: + FILTER_HITM(lcl_hitm); + break; + case DISPLAY_RMT: +
[PATCH 48/61] perf c2c report: Add global stats stdio output
Display global stats table as part of the stdio output or when --stats option is speicified: $ perf c2c report --stats = Trace Event Information = Total records : 41237 Locked Load/Store Operations : 4075 Load Operations : 20526 Loads - uncacheable : 0 Loads - IO: 0 Loads - Miss :552 Loads - no mapping: 31 Load Fill Buffer Hit : 7333 Load L1D hit : 6398 Load L2D hit :144 Load LLC hit : 4889 Load Local HITM : 1185 Load Remote HITM :838 Load Remote HIT : 52 Load Local DRAM :183 Load Remote DRAM :106 Load MESI State Exclusive :289 Load MESI State Shared: 0 Load LLC Misses : 1179 LLC Misses to Local DRAM : 15.5% LLC Misses to Remote DRAM :9.0% LLC Misses to Remote cache (HIT) :4.4% LLC Misses to Remote cache (HITM) : 71.1% Store Operations : 20711 Store - uncacheable : 0 Store - no mapping: 1 Store L1D Hit : 20158 Store L1D Miss:552 No Page Map Rejects : 7 Unable to parse data source : 0 Original-patch-by: Dick Fowles Original-patch-by: Don Zickus Link: http://lkml.kernel.org/n/tip-qkyvao3qsrnwazf0w1jvs...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-c2c.c | 56 1 file changed, 56 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index b380cdf0e6aa..aecfe70b2f52 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -55,6 +55,7 @@ struct perf_c2c { bool show_src; bool use_stdio; + bool stats_only; }; static struct perf_c2c c2c; @@ -1732,6 +1733,51 @@ static int setup_nodes(struct perf_session *session) return 0; } +static void print_c2c__display_stats(FILE *out) +{ + int llc_misses; + struct c2c_stats *stats = &c2c.hists.stats; + + llc_misses = stats->lcl_dram + +stats->rmt_dram + +stats->rmt_hit + +stats->rmt_hitm; + + fprintf(out, "=\n"); + fprintf(out, "Trace Event Information \n"); + fprintf(out, "=\n"); + fprintf(out, " Total records : %10d\n", stats->nr_entries); + fprintf(out, " Locked Load/Store Operations : %10d\n", stats->locks); + fprintf(out, " Load Operations : %10d\n", stats->load); + fprintf(out, " Loads - uncacheable : %10d\n", stats->ld_uncache); + fprintf(out, " Loads - IO: %10d\n", stats->ld_io); + fprintf(out, " Loads - Miss : %10d\n", stats->ld_miss); + fprintf(out, " Loads - no mapping: %10d\n", stats->ld_noadrs); + fprintf(out, " Load Fill Buffer Hit : %10d\n", stats->ld_fbhit); + fprintf(out, " Load L1D hit : %10d\n", stats->ld_l1hit); + fprintf(out, " Load L2D hit : %10d\n", stats->ld_l2hit); + fprintf(out, " Load LLC hit : %10d\n", stats->ld_llchit + stats->lcl_hitm); + fprintf(out, " Load Local HITM : %10d\n", stats->lcl_hitm); + fprintf(out, " Load Remote HITM : %10d\n", stats->rmt_hitm); + fprintf(out, " Load Remote HIT : %10d\n", stats->rmt_hit); + fprintf(out, " Load Local DRAM : %10d\n", stats->lcl_dram); + fprintf(out, " Load Remote DRAM : %10d\n", stats->rmt_dram); + fprintf(out, " Load MESI State Exclusive : %10d\n", stats->ld_excl); + fprintf(out, " Load MESI State Shared: %10d\n", stats->ld_shared); + fprintf(out, " Load LLC Misses : %10d\n", llc_misses); + fprintf(out, " LLC Misses to Local DRAM : %10.1f%%\n", ((double)stats->lcl_dram/(double)llc_misses) * 100.); + fprintf(out, " LLC Misses to Remote DRAM : %10.1f%%\n", ((double)stats->rmt_dram/(double)llc_misses) * 1
Re: [REGRESSION? v4.8] i2c-core: acpi_i2c_get_info() touches non-existent devices
On Mon, Sep 19, 2016 at 11:48:07AM +0300, Mika Westerberg wrote: > On Mon, Sep 19, 2016 at 12:30:53AM +0200, Nicolai Stange wrote: > > Hi, > > > > I'm encountering the following: > > > > [ 10.409490] ERROR: Unable to locate IOAPIC for GSI 37 > > > > Note that the system works fine, so it's a "cosmetic" regression, I think. > > > > > > I added a dump_stack() right below the printk() in question and it reads > > as > > > > [ 10.410290] CPU: 6 PID: 710 Comm: systemd-udevd Not tainted 4.7.0-rc4+ > > #348 > > [ 10.410962] Hardware name: Dell Inc. Latitude E6540/0725FP, BIOS A10 > > 06/26/2014 > > [ 10.411772] 0286 b9050627 8800c2e5f590 > > a54161e7 > > [ 10.412569] 0025 0001 8800c2e5f5a0 > > a50465df > > [ 10.413292] 8800c2e5f5d0 a5046ffd > > 0025 > > [ 10.414016] Call Trace: > > [ 10.414713] [] dump_stack+0x68/0xa1 > > [ 10.415406] [] mp_find_ioapic+0x4f/0x60 > > [ 10.416131] [] mp_map_gsi_to_irq+0x1d/0xc0 > > [ 10.416806] [] acpi_register_gsi_ioapic+0x7b/0x170 > > [ 10.417494] [] acpi_register_gsi+0xf/0x20 > > [ 10.418217] [] > > acpi_dev_get_irqresource.part.3+0xd7/0x11d > > [ 10.418871] [] ? > > acpi_dev_resource_address_space+0x31/0x67 > > [ 10.419655] [] acpi_dev_resource_interrupt+0x9b/0xab > > [ 10.420408] [] acpi_dev_process_resource+0xbc/0xf7 > > [ 10.421070] [] ? acpi_dev_resource_memory+0x7c/0x7c > > [ 10.421732] [] acpi_walk_resource_buffer+0x4d/0x85 > > [ 10.422399] [] ? acpi_dev_resource_memory+0x7c/0x7c > > [ 10.423158] [] acpi_walk_resources+0x83/0xb6 > > [ 10.423831] [] acpi_dev_get_resources+0x96/0xd7 > > [ 10.424505] [] acpi_i2c_get_info+0xe4/0x1a0 > > [ 10.425181] [] acpi_i2c_add_device+0x56/0xa0 > > [ 10.425856] [] acpi_ns_walk_namespace+0xe8/0x19d > > [ 10.426564] [] ? acpi_i2c_register_device+0x70/0x70 > > [ 10.427418] [] ? acpi_i2c_register_device+0x70/0x70 > > [ 10.428179] [] acpi_walk_namespace+0xa0/0xd5 > > [ 10.428858] [] i2c_register_adapter+0x369/0x500 > > [ 10.429499] [] i2c_add_adapter+0x5c/0x70 > > [ 10.430125] [] i801_probe+0x2bd/0x6a0 [i2c_i801] > > [ 10.431159] [] ? trace_hardirqs_on+0xd/0x10 > > [ 10.432196] [] local_pci_probe+0x42/0xa0 > > [ 10.432826] [] ? pci_match_device+0xca/0x110 > > [ 10.433460] [] pci_device_probe+0x103/0x150 > > [ 10.434083] [] driver_probe_device+0x22c/0x440 > > [ 10.434712] [] __driver_attach+0xd5/0x100 > > [ 10.435341] [] ? driver_probe_device+0x440/0x440 > > [ 10.435963] [] bus_for_each_dev+0x73/0xc0 > > [ 10.436676] [] driver_attach+0x1e/0x20 > > [ 10.437356] [] bus_add_driver+0x1c6/0x290 > > [ 10.437978] [] ? 0xc07e5000 > > [ 10.438699] [] driver_register+0x60/0xe0 > > [ 10.439502] [] ? 0xc07e5000 > > [ 10.440310] [] __pci_register_driver+0x5d/0x60 > > [ 10.440313] [] i2c_i801_init+0xaf/0x1000 [i2c_i801] > > [ 10.440314] [] ? 0xc07e5000 > > [ 10.440316] [] do_one_initcall+0x50/0x180 > > [ 10.440319] [] ? rcu_read_lock_sched_held+0x45/0x80 > > [ 10.440322] [] ? kmem_cache_alloc_trace+0x2d5/0x340 > > [ 10.440325] [] do_init_module+0x5f/0x1da > > [ 10.440329] [] load_module+0x2195/0x2950 > > [ 10.440331] [] ? __symbol_put+0x70/0x70 > > [ 10.440334] [] ? vfs_read+0x11b/0x130 > > [ 10.440337] [] SYSC_finit_module+0xe6/0x120 > > [ 10.440339] [] SyS_finit_module+0xe/0x10 > > [ 10.440340] [] do_syscall_64+0x61/0x170 > > [ 10.440343] [] entry_SYSCALL64_slow_path+0x25/0x25 > > > > > > I bisected this to commit 525e6fabeae2 ("i2c / ACPI: add support for > > ACPI reconfigure notifications"). > > > > The reason for the above message seems to be that acpi_i2c_get_info() > > configures the IRQs for any ACPI devices that have got some > > I2cSerialBus() resource, regardless of the actual adapter those are > > attached to. This behaviour is different from before that commit. > > > > My ACPI DSDT has got a PCI I2C adapter that isn't physically present, it > > seems. No clue why. > > > > That non-existent PCI I2C adapter is in turn I2cSerialBus()-referenced > > by some ACPI device that has got exactly this interrupt 37 assigned. > > > > So it looks like an attempt is made to configure this non-existent, > > ACPI-listed I2C slave's IRQs when an actually existing I2C adapter (i801 > > SMBus) gets probed. > > > > > > Let me know if I can provide you with any additional information, > > Can you send me acpidump from that that machine? Can you try if the following patch cures the problem? diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index da3a02ef4a31..4e6c6fde9bdb 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -205,7 +205,7 @@ static acpi_status acpi_i2c_add_device(acpi_handle handle, u32 level, void *data, void **return_value) { struct i2c_adapter *adapter = dat
[PATCH 61/61] perf tools: Fix width computation for srcline sort entry
Adding header size to width computation for srcline sort entry, because it's possible to get empty data with ':0' which set width of 2 which is lower than width needed to display column header. Link: http://lkml.kernel.org/n/tip-twbp391v8v9f5idp584hl...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/util/hist.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 020efa9d3d74..e1be4132054d 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -177,8 +177,10 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) hists__new_col_len(hists, HISTC_LOCAL_WEIGHT, 12); hists__new_col_len(hists, HISTC_GLOBAL_WEIGHT, 12); - if (h->srcline) - hists__new_col_len(hists, HISTC_SRCLINE, strlen(h->srcline)); + if (h->srcline) { + len = MAX(strlen(h->srcline), strlen(sort_srcline.se_header)); + hists__new_col_len(hists, HISTC_SRCLINE, len); + } if (h->srcfile) hists__new_col_len(hists, HISTC_SRCFILE, strlen(h->srcfile)); -- 2.7.4
[PATCH 58/61] perf c2c report: Iterate node display in browser
Adding TUI support to switch between Node entry versions in real time with 'n' key. Link: http://lkml.kernel.org/n/tip-xqbw4h4dxig54wff7fd14...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-c2c.c | 4 1 file changed, 4 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 1adb7fb4866c..0902aba4cf19 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2204,6 +2204,10 @@ static int perf_c2c__browse_cacheline(struct hist_entry *he) case 's': c2c.symbol_full = !c2c.symbol_full; break; + case 'n': + c2c.node_info = (c2c.node_info + 1) % 3; + setup_nodes_header(); + break; case 'q': goto out; default: -- 2.7.4
[PATCH 46/61] perf c2c report: Add main browser
Adding the main cachelines TUI browser. It allows to navigate through cachelines and disaplay their details and callchains (implemented in following patches). Link: http://lkml.kernel.org/n/tip-inykbom2f19difvsu1e18...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-c2c.c | 99 +- tools/perf/ui/browsers/hists.c | 2 +- tools/perf/ui/browsers/hists.h | 1 + 3 files changed, 99 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 222b1a34c788..47d5408aeff8 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -14,6 +14,7 @@ #include "data.h" #include "sort.h" #include +#include "ui/browsers/hists.h" struct c2c_hists { struct histshists; @@ -53,6 +54,7 @@ struct perf_c2c { int node_info; bool show_src; + bool use_stdio; }; static struct perf_c2c c2c; @@ -1077,6 +1079,8 @@ static struct c2c_dimension dim_dcacheline = { .width = 18, }; +static struct c2c_header header_offset_tui = HEADER_LOW("Off"); + static struct c2c_dimension dim_offset = { .header = HEADER_BOTH("Data address", "Offset"), .name = "offset", @@ -1808,6 +1812,84 @@ static void perf_c2c__hists_fprintf(FILE *out) print_pareto(out); } +static void c2c_browser__update_nr_entries(struct hist_browser *hb) +{ + u64 nr_entries = 0; + struct rb_node *nd = rb_first(&hb->hists->entries); + + do { + struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node); + + if (!he->filtered) + nr_entries++; + + nd = rb_next(nd); + } while (nd); + + hb->nr_non_filtered_entries = nr_entries; +} + +static int perf_c2c_browser__title(struct hist_browser *browser, + char *bf, size_t size) +{ + scnprintf(bf, size, + "Shared Data Cache Line Table " + "(%lu entries)", browser->nr_non_filtered_entries); + return 0; +} + +static struct hist_browser* +perf_c2c_browser__new(struct hists *hists) +{ + struct hist_browser *browser = hist_browser__new(hists); + + if (browser) { + browser->title = perf_c2c_browser__title; + browser->c2c_filter = true; + } + + return browser; +} + +static int perf_c2c__hists_browse(struct hists *hists) +{ + struct hist_browser *browser; + int key = -1; + + browser = perf_c2c_browser__new(hists); + if (browser == NULL) + return -1; + + /* reset abort key so that it can get Ctrl-C as a key */ + SLang_reset_tty(); + SLang_init_tty(0, 0, 0); + + c2c_browser__update_nr_entries(browser); + + while (1) { + key = hist_browser__run(browser, "help"); + + switch (key) { + case 'q': + goto out; + default: + break; + } + } + +out: + hist_browser__delete(browser); + return 0; +} + +static void ui_quirks(bool stdio) +{ + if (!stdio) { + dim_offset.width = 5; + dim_offset.header = header_offset_tui; + } +} + static int perf_c2c__report(int argc, const char **argv) { struct perf_session *session; @@ -1824,6 +1906,8 @@ static int perf_c2c__report(int argc, const char **argv) "the input file to process"), OPT_INCR('N', "node-info", &c2c.node_info, "show extra node info in report (repeat for more info)"), + OPT_BOOLEAN(0, "stdio", &c2c.use_stdio, + "Use the stdio interface"), OPT_END() }; int err = 0; @@ -1833,6 +1917,15 @@ static int perf_c2c__report(int argc, const char **argv) if (argc) usage_with_options(report_c2c_usage, c2c_options); + if (c2c.use_stdio) + use_browser = 0; + else + use_browser = 1; + + ui_quirks(c2c.use_stdio); + + setup_browser(false); + if (!input_name || !strlen(input_name)) input_name = "perf.data"; @@ -1892,8 +1985,10 @@ static int perf_c2c__report(int argc, const char **argv) ui_progress__finish(); - use_browser = 0; - perf_c2c__hists_fprintf(stdout); + if (c2c.use_stdio) + perf_c2c__hists_fprintf(stdout); + else + perf_c2c__hists_browse(&c2c.hists.hists); out_session: perf_session__delete(session); diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 77cf7a80e8d6..83fd2885d78a 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -30,7 +30,7 @@ static struct rb_node *hists__filter_entries(struct rb_node *nd, static bool hist_browser_
[PATCH 54/61] perf c2c report: Allow to set cacheline sort fields
Allowing user to configure the way the single cacheline data are sorted after being sorted by offset. Adding 'c' option to specify sorting fields for single cacheline: -c, --coalesce coalesce fields: pid,tid,iaddr,dso It's allowed to use following combination of fields: pid - process pid tid - process tid iaddr - code address dso - shared object Link: http://lkml.kernel.org/n/tip-aka8z31umxoq2gqr5mjd8...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-c2c.c | 119 --- 1 file changed, 102 insertions(+), 17 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 3541c94fff02..ff8a66ee7092 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -46,6 +46,8 @@ struct c2c_hist_entry { struct hist_entry he; }; +static char const *coalesce_default = "pid,tid,iaddr"; + struct perf_c2c { struct perf_tooltool; struct c2c_histshists; @@ -65,6 +67,11 @@ struct perf_c2c { int shared_clines; int display; + + const char *coalesce; + char*cl_sort; + char*cl_resort; + char*cl_output; }; enum { @@ -237,7 +244,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, if (!mi_dup) goto free_mi; - c2c_hists = he__get_c2c_hists(he, "offset", 2); + c2c_hists = he__get_c2c_hists(he, c2c.cl_sort, 2); if (!c2c_hists) goto free_mi_dup; @@ -1740,22 +1747,7 @@ static int resort_cl_cb(struct hist_entry *he) c2c_hists = c2c_he->hists; if (display && c2c_hists) { - c2c_hists__reinit(c2c_hists, - "percent_rmt_hitm," - "percent_lcl_hitm," - "percent_stores_l1hit," - "percent_stores_l1miss," - "offset," - "pid," - "tid," - "mean_rmt," - "mean_lcl," - "mean_load," - "cpucnt," - "symbol," - "dso," - "node", - "offset,rmt_hitm,lcl_hitm"); + c2c_hists__reinit(c2c_hists, c2c.cl_output, c2c.cl_resort); hists__collapse_resort(&c2c_hists->hists, NULL); hists__output_resort_cb(&c2c_hists->hists, NULL, filter_cb); @@ -1999,6 +1991,7 @@ static void print_c2c_info(FILE *out, struct perf_session *session) } fprintf(out, " Cachelines sort on: %s HITMs\n", c2c.display == DISPLAY_LCL ? "Local" : "Remote"); + fprintf(out, " Cacheline data grouping : %s\n", c2c.cl_sort); } static void perf_c2c__hists_fprintf(FILE *out, struct perf_session *session) @@ -2262,6 +2255,89 @@ static int setup_display(const char *str) return 0; } +#define for_each_token(__tok, __buf, __sep, __tmp) \ + for (__tok = strtok_r(__buf, __sep, &__tmp); __tok; \ +__tok = strtok_r(NULL, __sep, &__tmp)) + +static int build_cl_output(char *cl_sort) +{ + char *tok, *tmp, *buf = strdup(cl_sort); + bool add_pid = false; + bool add_tid = false; + bool add_iaddr = false; + bool add_sym = false; + bool add_dso = false; + bool add_src = false; + + if (!buf) + return -ENOMEM; + + for_each_token(tok, buf, ",", tmp) { + if (!strcmp(tok, "tid")) { + add_tid = true; + } else if (!strcmp(tok, "pid")) { + add_pid = true; + } else if (!strcmp(tok, "iaddr")) { + add_iaddr = true; + add_sym = true; + add_dso = true; + add_src = true; + } else if (!strcmp(tok, "dso")) { + add_dso = true; + } else if (strcmp(tok, "offset")) { + pr_err("unrecognized sort token: %s\n", tok); + return -EINVAL; + } + } + + if (asprintf(&c2c.cl_output, + "%s%s%s%s%s%s%s%s%s", + "percent_rmt_hitm," + "percent_lcl_hitm," + "percent_stores_l1hit," + "percent_stores_l1miss," + "offset,", + add_pid ? "pid," : "", + add_tid ? "tid," : "", + add_iaddr ? "iaddr," : "", + "mean_rmt," + "mean_lcl," + "mean_load," + "cpucnt,", + add_sym ? "symbol," : "", + ad
[PATCH 45/61] perf c2c report: Add stdio output support
Adding the --stdio option output support. The output tables are dumped directly to the stdio. $ perf c2c report = Shared Data Cache Line Table = # # Total - LLC Load Hitm - Store Reference --- Load Dram LLCTotal - Core Load Hit - -- LLC Load Hit -- # Cacheline records%hitmTotal Lcl RmtTotal L1Hit L1Miss Lcl Rmt Ld MissLoads FB L1 L2 Llc Rmt # .. ... ... ... ... ... ... ... ... ... ... ... ... ... # 0x88000235f840 170.00%000 17 170 0 000000 0 0 ... = Shared Cache Line Distribution Pareto = # # - HITM - -- Store Refs --Data address -- cycles -- cpu Shared # Rmt Lcl L1 Hit L1 Miss Offset Pid Tid rmt hitm lcl hitm load cntSymbol Object Node # ... ... ... ... .. ... . . # -- 00 170 0x88000235f840 -- 0.00%0.00%5.88%0.00% 0x011474 11474:kworker/u16:5 0 0 0 1 [k] rmap_walk_file [kernel.kallsyms] 0 0.00%0.00%5.88%0.00%0x1011474 11474:kworker/u16:5 0 0 0 1 [k] lock_page_memcg[kernel.kallsyms] 0 0.00%0.00% 11.76%0.00%0x2011474 11474:kworker/u16:5 0 0 0 1 [k] page_mapping [kernel.kallsyms] 0 0.00%0.00% 64.71%0.00%0x2811474 11474:kworker/u16:5 0 0 0 1 [k] __test_set_page_writeback [kernel.kallsyms] 0 0.00%0.00% 11.76%0.00%0x3011474 11474:kworker/u16:5 0 0 0 1 [k] page_mapped [kernel.kallsyms] 0 ... Link: http://lkml.kernel.org/n/tip-eorco9r0oeesjve77pkkg...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-c2c.c | 83 1 file changed, 83 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index d7b47c69aa07..222b1a34c788 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -13,6 +13,7 @@ #include "tool.h" #include "data.h" #include "sort.h" +#include struct c2c_hists { struct histshists; @@ -1727,6 +1728,85 @@ static int setup_nodes(struct perf_session *session) return 0; } +static void print_cacheline(struct c2c_hists *c2c_hists, + struct hist_entry *he_cl, + struct perf_hpp_list *hpp_list, + FILE *out) +{ + char bf[1000]; + struct perf_hpp hpp = { + .buf= bf, + .size = 1000, + }; + static bool once; + + if (!once) { + hists__fprintf_headers(&c2c_hists->hists, out); + once = true; + } else { + fprintf(out, "\n"); + } + + fprintf(out, " --\n"); + hist_entry__snprintf(he_cl, &hpp, hpp_list); + fprintf(out, "%s\n", bf); + fprintf(out, " --\n"); + + hists__fprintf(&c2c_hists->hists, false, 0, 0, 0, out, true); +} + +static void print_pareto(FILE *out) +{ + struct perf_hpp_list hpp_list; + struct rb_node *nd; + int ret; + + perf_hpp_list__init(&hpp_list); + ret = hpp_list__parse(&hpp_list, + "cl_rmt_hitm," + "cl_lcl_hitm," + "cl_stores_l1hit," + "cl_stores_l1miss," + "dcacheline", + NULL); + + if (WARN_ONCE(ret, "failed to setup sort entries\n")) + return; + + nd = rb_first(&c2c.hists.hists.entries); + + for (; nd; nd =
[PATCH 33/61] perf c2c report: Add hitm percent sort key
Adding HITM percent dimension key wrapper. It is to be displayed in the main cachelines overall output: percent_hitm It displays HITMs percentage for cacheline. It counts remote HITMs at the moment, but it is changed later to support local as well, based on the sort configuration. Link: http://lkml.kernel.org/n/tip-czd17qsh5u5z0yc1estz9...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-c2c.c | 90 1 file changed, 90 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index c5ca6daec2d6..82ad66e71401 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -562,6 +562,86 @@ tot_loads_cmp(struct perf_hpp_fmt *fmt __maybe_unused, return tot_recs_left - tot_recs_right; } +typedef double (get_percent_cb)(struct c2c_hist_entry *); + +static int +percent_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he, get_percent_cb get_percent) +{ + struct c2c_hist_entry *c2c_he; + int width = c2c_width(fmt, hpp, he->hists); + double per; + + c2c_he = container_of(he, struct c2c_hist_entry, he); + per = get_percent(c2c_he); + + if (use_browser) + return __hpp__slsmg_color_printf(hpp, "%*.2f%%", width - 1, per); + else + return hpp_color_scnprintf(hpp, "%*.2f%%", width - 1, per); +} + +static double percent_hitm(struct c2c_hist_entry *c2c_he) +{ + struct c2c_hists *hists; + struct c2c_stats *stats; + struct c2c_stats *total; + int tot, st; + double p; + + hists = container_of(c2c_he->he.hists, struct c2c_hists, hists); + stats = &c2c_he->stats; + total = &hists->stats; + + st = stats->rmt_hitm; + tot = total->rmt_hitm; + + p = tot ? (double) st / tot : 0; + + return 100 * p; +} + +static int +percent_hitm_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he) +{ + struct c2c_hist_entry *c2c_he; + int width = c2c_width(fmt, hpp, he->hists); + char buf[10]; + double per; + + c2c_he = container_of(he, struct c2c_hist_entry, he); + per = percent_hitm(c2c_he); + + snprintf(buf, 10, "%.2F%%", per); + return snprintf(hpp->buf, hpp->size, "%*s", width, buf); +} + +static int +percent_hitm_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he) +{ + return percent_color(fmt, hpp, he, percent_hitm); +} + +static int64_t +percent_hitm_cmp(struct perf_hpp_fmt *fmt __maybe_unused, +struct hist_entry *left, struct hist_entry *right) +{ + struct c2c_hist_entry *c2c_left; + struct c2c_hist_entry *c2c_right; + double per_left; + double per_right; + + c2c_left = container_of(left, struct c2c_hist_entry, he); + c2c_right = container_of(right, struct c2c_hist_entry, he); + + per_left = percent_hitm(c2c_left); + per_right = percent_hitm(c2c_right); + + return per_left - per_right; +} + #define HEADER_LOW(__h)\ { \ .line[1] = {\ @@ -765,6 +845,15 @@ static struct c2c_dimension dim_tot_loads = { .width = 7, }; +static struct c2c_dimension dim_percent_hitm = { + .header = HEADER_LOW("%hitm"), + .name = "percent_hitm", + .cmp= percent_hitm_cmp, + .entry = percent_hitm_entry, + .color = percent_hitm_color, + .width = 7, +}; + #undef HEADER_LOW #undef HEADER_BOTH #undef HEADER_SPAN @@ -792,6 +881,7 @@ static struct c2c_dimension *dimensions[] = { &dim_ld_llcmiss, &dim_tot_recs, &dim_tot_loads, + &dim_percent_hitm, NULL, }; -- 2.7.4
Re: [PATCH 5/7] openrisc: Support both old (or32) and new (or1k) toolchain
On 09/19/2016 02:11 AM, Stafford Horne wrote: On Mon, 19 Sep 2016, Guenter Roeck wrote: On 09/18/2016 11:02 PM, Stafford Horne wrote: On Sun, 18 Sep 2016, Guenter Roeck wrote: > Tested-by: Guenter Roeck > > If you plan to handle openrisc going forward, it would be great if you > could > consider updating MAINTAINERS. The web site and git repository have been > unreachable > for a long time. Thank you, Updating maintainers was kind of on my plans, but I figured I need to prove that I kind of know what I am doing. The alternative would be to mark it as Orphaned. Which, for all practical purpose, would be the correct state right now. +CC The openrisc list Understood, I don't think we would want that to happen. Look at the entry today: OPENRISC ARCHITECTURE M: Jonas Bonn W: http://openrisc.net S: Maintained T: git git://openrisc.net/~jonas/linux F: arch/openrisc/ At the very least, W: and T: are incorrect and need to be updated or removed. Plus, apparently there is a L:, and "T: https://github.com/openrisc/linux"; might be appropriate. Guenter
[PATCH 31/61] perf c2c report: Add total record sort key
Adding total record dimension key wrapper. It is to be displayed in the main cachelines overall output: tot_recs It displays sum of all cachelines accesses. Link: http://lkml.kernel.org/n/tip-wojujik7zzen770mxn295...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-c2c.c | 64 1 file changed, 64 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index e7e7890882c4..3f2f348479e3 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -456,6 +456,61 @@ ld_llcmiss_cmp(struct perf_hpp_fmt *fmt __maybe_unused, return llc_miss(&c2c_left->stats) - llc_miss(&c2c_right->stats); } +static uint64_t total_records(struct c2c_stats *stats) +{ + uint64_t lclmiss, ldcnt, total; + + lclmiss = stats->lcl_dram + + stats->rmt_dram + + stats->rmt_hitm + + stats->rmt_hit; + + ldcnt= lclmiss + + stats->ld_fbhit + + stats->ld_l1hit + + stats->ld_l2hit + + stats->ld_llchit + + stats->lcl_hitm; + + total= ldcnt + + stats->st_l1hit + + stats->st_l1miss; + + return total; +} + +static int +tot_recs_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he) +{ + struct c2c_hist_entry *c2c_he; + int width = c2c_width(fmt, hpp, he->hists); + uint64_t tot_recs; + + c2c_he = container_of(he, struct c2c_hist_entry, he); + tot_recs = total_records(&c2c_he->stats); + + return snprintf(hpp->buf, hpp->size, "%*" PRIu64, width, tot_recs); +} + +static int64_t +tot_recs_cmp(struct perf_hpp_fmt *fmt __maybe_unused, +struct hist_entry *left, struct hist_entry *right) +{ + struct c2c_hist_entry *c2c_left; + struct c2c_hist_entry *c2c_right; + uint64_t tot_recs_left; + uint64_t tot_recs_right; + + c2c_left = container_of(left, struct c2c_hist_entry, he); + c2c_right = container_of(right, struct c2c_hist_entry, he); + + tot_recs_left = total_records(&c2c_left->stats); + tot_recs_right = total_records(&c2c_right->stats); + + return tot_recs_left - tot_recs_right; +} + #define HEADER_LOW(__h)\ { \ .line[1] = {\ @@ -643,6 +698,14 @@ static struct c2c_dimension dim_ld_llcmiss = { .width = 7, }; +static struct c2c_dimension dim_tot_recs = { + .header = HEADER_BOTH("Total", "records"), + .name = "tot_recs", + .cmp= tot_recs_cmp, + .entry = tot_recs_entry, + .width = 7, +}; + #undef HEADER_LOW #undef HEADER_BOTH #undef HEADER_SPAN @@ -668,6 +731,7 @@ static struct c2c_dimension *dimensions[] = { &dim_ld_llchit, &dim_ld_rmthit, &dim_ld_llcmiss, + &dim_tot_recs, NULL, }; -- 2.7.4
[PATCH 18/61] perf c2c report: Fallback to standard dimensions
Fallback to standard dimensions in case we don't find the dimension within c2c ones. Link: http://lkml.kernel.org/n/tip-w3yrcawal0dr1w9pcu4gy...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-c2c.c | 12 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 6b58b537bc9d..a3481f86e2ae 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -213,8 +213,10 @@ static int c2c_hists__init_output(struct perf_hpp_list *hpp_list, char *name) { struct c2c_fmt *c2c_fmt = get_format(name); - if (!c2c_fmt) - return -1; + if (!c2c_fmt) { + reset_dimensions(); + return output_field_add(hpp_list, name); + } perf_hpp_list__column_register(hpp_list, &c2c_fmt->fmt); return 0; @@ -224,8 +226,10 @@ static int c2c_hists__init_sort(struct perf_hpp_list *hpp_list, char *name) { struct c2c_fmt *c2c_fmt = get_format(name); - if (!c2c_fmt) - return -1; + if (!c2c_fmt) { + reset_dimensions(); + return sort_dimension__add(hpp_list, name, NULL, 0); + } perf_hpp_list__register_sort_field(hpp_list, &c2c_fmt->fmt); return 0; -- 2.7.4
[PATCH 28/61] perf c2c report: Add loads related dimension keys
Adding 3 loads related dimension key wrappers. They are to be displayed in the main cachelines overall output: ld_fbhit, ld_l1hit, ld_l2hit They all display bare numbers of loads for FB (Fill Buffer), L1 and L2 cache. Link: http://lkml.kernel.org/n/tip-wxrzhy74zl8fvkvgjae3w...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-c2c.c | 30 ++ 1 file changed, 30 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index eb8bb158ad8a..8279033d9d83 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -412,6 +412,9 @@ STAT_FN(lcl_hitm) STAT_FN(store) STAT_FN(st_l1hit) STAT_FN(st_l1miss) +STAT_FN(ld_fbhit) +STAT_FN(ld_l1hit) +STAT_FN(ld_l2hit) #define HEADER_LOW(__h)\ { \ @@ -552,6 +555,30 @@ static struct c2c_dimension dim_cl_stores_l1miss = { .width = 7, }; +static struct c2c_dimension dim_ld_fbhit = { + .header = HEADER_SPAN("- Core Load Hit -", "FB", 2), + .name = "ld_fbhit", + .cmp= ld_fbhit_cmp, + .entry = ld_fbhit_entry, + .width = 7, +}; + +static struct c2c_dimension dim_ld_l1hit = { + .header = HEADER_SPAN_LOW("L1"), + .name = "ld_l1hit", + .cmp= ld_l1hit_cmp, + .entry = ld_l1hit_entry, + .width = 7, +}; + +static struct c2c_dimension dim_ld_l2hit = { + .header = HEADER_SPAN_LOW("L2"), + .name = "ld_l2hit", + .cmp= ld_l2hit_cmp, + .entry = ld_l2hit_entry, + .width = 7, +}; + #undef HEADER_LOW #undef HEADER_BOTH #undef HEADER_SPAN @@ -571,6 +598,9 @@ static struct c2c_dimension *dimensions[] = { &dim_stores_l1miss, &dim_cl_stores_l1hit, &dim_cl_stores_l1miss, + &dim_ld_fbhit, + &dim_ld_l1hit, + &dim_ld_l2hit, NULL, }; -- 2.7.4
[PATCH 38/61] perf c2c report: Add symbol and dso sort keys
Adding symbol and dso dimension key wrappers. They are to be displayed in the single cacheline output: symbol, dso They are wrappers for global sort_sym and sort_dso sort entries with c2c specific headers. Link: http://lkml.kernel.org/n/tip-6742e6g0r7n63y5wc4rrg...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-c2c.c | 18 ++ 1 file changed, 18 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 2966a388ce8b..b3dcd590e97a 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -1104,6 +1104,17 @@ static struct c2c_dimension dim_tid = { .se = &sort_thread, }; +static struct c2c_dimension dim_symbol = { + .name = "symbol", + .se = &sort_sym, +}; + +static struct c2c_dimension dim_dso = { + .header = HEADER_BOTH("Shared", "Object"), + .name = "dso", + .se = &sort_dso, +}; + #undef HEADER_LOW #undef HEADER_BOTH #undef HEADER_SPAN @@ -1140,6 +1151,8 @@ static struct c2c_dimension *dimensions[] = { &dim_dram_rmt, &dim_pid, &dim_tid, + &dim_symbol, + &dim_dso, NULL, }; @@ -1254,12 +1267,17 @@ static int c2c_hists__init_output(struct perf_hpp_list *hpp_list, char *name) static int c2c_hists__init_sort(struct perf_hpp_list *hpp_list, char *name) { struct c2c_fmt *c2c_fmt = get_format(name); + struct c2c_dimension *dim; if (!c2c_fmt) { reset_dimensions(); return sort_dimension__add(hpp_list, name, NULL, 0); } + dim = c2c_fmt->dim; + if (dim == &dim_dso) + hpp_list->dso = 1; + perf_hpp_list__register_sort_field(hpp_list, &c2c_fmt->fmt); return 0; } -- 2.7.4
[PATCH 34/61] perf c2c report: Add hitm/store percent related sort keys
Adding hitm/store percent dimension key wrappers. They are to be displayed in the single cacheline output: percent_rmt_hitm, percent_lcl_hitm, percent_stores_l1hit, percent_stores_l1miss They display percentage of HITMs/stores for specific offset in the cacheline. Link: http://lkml.kernel.org/n/tip-t365aosxtdut8sgrgn8mf...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-c2c.c | 206 +++ 1 file changed, 206 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 82ad66e71401..0613669cd8b4 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -642,6 +642,171 @@ percent_hitm_cmp(struct perf_hpp_fmt *fmt __maybe_unused, return per_left - per_right; } +static struct c2c_stats *he_stats(struct hist_entry *he) +{ + struct c2c_hist_entry *c2c_he; + + c2c_he = container_of(he, struct c2c_hist_entry, he); + return &c2c_he->stats; +} + +static struct c2c_stats *total_stats(struct hist_entry *he) +{ + struct c2c_hists *hists; + + hists = container_of(he->hists, struct c2c_hists, hists); + return &hists->stats; +} + +static double percent(int st, int tot) +{ + return tot ? 100. * (double) st / (double) tot : 0; +} + +#define PERCENT(__h, __f) percent(he_stats(__h)->__f, total_stats(__h)->__f) + +#define PERCENT_FN(__f) \ +static double percent_ ## __f(struct c2c_hist_entry *c2c_he) \ +{ \ + struct c2c_hists *hists; \ + \ + hists = container_of(c2c_he->he.hists, struct c2c_hists, hists); \ + return percent(c2c_he->stats.__f, hists->stats.__f); \ +} + +PERCENT_FN(rmt_hitm) +PERCENT_FN(lcl_hitm) +PERCENT_FN(st_l1hit) +PERCENT_FN(st_l1miss) + +static int +percent_rmt_hitm_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he) +{ + int width = c2c_width(fmt, hpp, he->hists); + double per = PERCENT(he, rmt_hitm); + char buf[10]; + + snprintf(buf, 10, "%.2F%%", per); + return snprintf(hpp->buf, hpp->size, "%*s", width, buf); +} + +static int +percent_rmt_hitm_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he) +{ + return percent_color(fmt, hpp, he, percent_rmt_hitm); +} + +static int64_t +percent_rmt_hitm_cmp(struct perf_hpp_fmt *fmt __maybe_unused, +struct hist_entry *left, struct hist_entry *right) +{ + double per_left; + double per_right; + + per_left = PERCENT(left, lcl_hitm); + per_right = PERCENT(right, lcl_hitm); + + return per_left - per_right; +} + +static int +percent_lcl_hitm_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he) +{ + int width = c2c_width(fmt, hpp, he->hists); + double per = PERCENT(he, lcl_hitm); + char buf[10]; + + snprintf(buf, 10, "%.2F%%", per); + return snprintf(hpp->buf, hpp->size, "%*s", width, buf); +} + +static int +percent_lcl_hitm_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he) +{ + return percent_color(fmt, hpp, he, percent_lcl_hitm); +} + +static int64_t +percent_lcl_hitm_cmp(struct perf_hpp_fmt *fmt __maybe_unused, +struct hist_entry *left, struct hist_entry *right) +{ + double per_left; + double per_right; + + per_left = PERCENT(left, lcl_hitm); + per_right = PERCENT(right, lcl_hitm); + + return per_left - per_right; +} + +static int +percent_stores_l1hit_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he) +{ + int width = c2c_width(fmt, hpp, he->hists); + double per = PERCENT(he, st_l1hit); + char buf[10]; + + snprintf(buf, 10, "%.2F%%", per); + return snprintf(hpp->buf, hpp->size, "%*s", width, buf); +} + +static int +percent_stores_l1hit_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he) +{ + return percent_color(fmt, hpp, he, percent_st_l1hit); +} + +static int64_t +percent_stores_l1hit_cmp(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left, struct hist_entry *right) +{ + double per_left; + double per_right; + + per_left = PERCENT(left, st_l1hit); + per_right = PERCENT(right, st_l1hit); + + return per_left - per_right; +} + +static int +percent_stores_l1miss_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he) +{ + int width = c2c_width(fmt, hpp, he->hists); +
[PATCH 36/61] perf c2c report: Add pid sort key
Adding pid dimension key wrapper. It is to be displayed in the single cacheline output: pid We currently don't have a single 'pid' sort/display entry, which would output just pid number, hence adding it into c2c code. Link: http://lkml.kernel.org/n/tip-3o23qrspxc99b04ci1swl...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-c2c.c | 25 + 1 file changed, 25 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 55f8b2fece3d..e17e01056284 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -810,6 +810,22 @@ percent_stores_l1miss_cmp(struct perf_hpp_fmt *fmt __maybe_unused, STAT_FN(lcl_dram) STAT_FN(rmt_dram) +static int +pid_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he) +{ + int width = c2c_width(fmt, hpp, he->hists); + + return snprintf(hpp->buf, hpp->size, "%*d", width, he->thread->pid_); +} + +static int64_t +pid_cmp(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left, struct hist_entry *right) +{ + return left->thread->pid_ - right->thread->pid_; +} + #define HEADER_LOW(__h)\ { \ .line[1] = {\ @@ -1074,6 +1090,14 @@ static struct c2c_dimension dim_dram_rmt = { .width = 8, }; +static struct c2c_dimension dim_pid = { + .header = HEADER_LOW("Pid"), + .name = "pid", + .cmp= pid_cmp, + .entry = pid_entry, + .width = 7, +}; + #undef HEADER_LOW #undef HEADER_BOTH #undef HEADER_SPAN @@ -1108,6 +1132,7 @@ static struct c2c_dimension *dimensions[] = { &dim_percent_stores_l1miss, &dim_dram_lcl, &dim_dram_rmt, + &dim_pid, NULL, }; -- 2.7.4
[PATCH 37/61] perf c2c report: Add tid sort key
Adding tid dimension key wrapper. It is to be displayed in the single cacheline output: tid It's a wrapper for global sort_thread sort entry with c2c specific header. Link: http://lkml.kernel.org/n/tip-fr0socae5skzvz5qbkl85...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-c2c.c | 7 +++ 1 file changed, 7 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index e17e01056284..2966a388ce8b 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -1098,6 +1098,12 @@ static struct c2c_dimension dim_pid = { .width = 7, }; +static struct c2c_dimension dim_tid = { + .header = HEADER_LOW("Tid"), + .name = "tid", + .se = &sort_thread, +}; + #undef HEADER_LOW #undef HEADER_BOTH #undef HEADER_SPAN @@ -1133,6 +1139,7 @@ static struct c2c_dimension *dimensions[] = { &dim_dram_lcl, &dim_dram_rmt, &dim_pid, + &dim_tid, NULL, }; -- 2.7.4
Re: [PATCH v2 0/3] Armada 7k/8k CP110 system controller fixes
Hi Stephen, Did you have any chance to take a look at v2? Do you have any remarks? Best regards. Marcin 2016-09-06 19:31 GMT+02:00 Marcin Wojtas : > Hi, > > Here is the second version of the patchset adding fixes to CP110 > system controller clock driver. As requested during review, > an additional patch is submitted, which covers migration of the > driver to clk_hw registration and API. > > Any feedback would be very welcome. > > Best regards, > Marcin > > Changelog: > v1 -> v2 > * replace setting CLK_IS_BASIC flag with clearing init structure fields > with memset > * minor improvements of allocation and error checking > * add migration to clk_hw > > Marcin Wojtas (3): > clk: mvebu: fix setting unwanted flags in CP110 gate clock > clk: mvebu: dynamically allocate resources in Armada CP110 system > controller > clk: mvebu: migrate CP110 system controller to clk_hw API and > registration > > drivers/clk/mvebu/cp110-system-controller.c | 155 > +++- > 1 file changed, 81 insertions(+), 74 deletions(-) > > -- > 1.8.3.1 >
[PATCH 41/61] perf c2c report: Add cpu cnt sort key
Adding cpu count dimension key wrapper. It is to be displayed in the single cacheline output: cpucnt It displays number of distinct cpus that hit cacheline. Link: http://lkml.kernel.org/n/tip-ib2kdwam52fby9u2k3ij6...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-c2c.c | 23 +++ 1 file changed, 23 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 1990c64f18ff..a4fea832e677 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -1015,6 +1015,20 @@ MEAN_ENTRY(mean_rmt_entry, rmt_hitm); MEAN_ENTRY(mean_lcl_entry, lcl_hitm); MEAN_ENTRY(mean_load_entry, load); +static int +cpucnt_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, +struct hist_entry *he) +{ + struct c2c_hist_entry *c2c_he; + int width = c2c_width(fmt, hpp, he->hists); + char buf[10]; + + c2c_he = container_of(he, struct c2c_hist_entry, he); + + snprintf(buf, 10, "%d", bitmap_weight(c2c_he->cpuset, c2c.cpus_cnt)); + return snprintf(hpp->buf, hpp->size, "%*s", width, buf); +} + #define HEADER_LOW(__h)\ { \ .line[1] = {\ @@ -1341,6 +1355,14 @@ static struct c2c_dimension dim_mean_load = { .width = 8, }; +static struct c2c_dimension dim_cpucnt = { + .header = HEADER_BOTH("cpu", "cnt"), + .name = "cpucnt", + .cmp= empty_cmp, + .entry = cpucnt_entry, + .width = 8, +}; + #undef HEADER_LOW #undef HEADER_BOTH #undef HEADER_SPAN @@ -1383,6 +1405,7 @@ static struct c2c_dimension *dimensions[] = { &dim_mean_rmt, &dim_mean_lcl, &dim_mean_load, + &dim_cpucnt, NULL, }; -- 2.7.4
[PATCH 23/61] perf c2c report: Add dcacheline dimension key
Adding dcacheline dimension key support. It displays cacheline address as hex number. Using c2c wrapper to standard 'dcacheline' object to defined own header and simple (just address) cacheline output. Link: http://lkml.kernel.org/n/tip-j5enppr8e7h27nskqhgq3...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-c2c.c | 38 ++ 1 file changed, 38 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index c21124e6bb63..060ee1050da9 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -1,5 +1,6 @@ #include #include +#include #include "util.h" #include "debug.h" #include "builtin.h" @@ -7,6 +8,7 @@ #include "mem-events.h" #include "session.h" #include "hist.h" +#include "sort.h" #include "tool.h" #include "data.h" #include "sort.h" @@ -271,6 +273,33 @@ static int c2c_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, return scnprintf(hpp->buf, hpp->size, "%*s", width, text); } +static char *hex_str(u64 val) +{ + static char buf[20]; + + snprintf(buf, 20, "0x%" PRIx64, val); + return buf; +} + +static int64_t +dcacheline_cmp(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left, struct hist_entry *right) +{ + return sort__dcacheline_cmp(left, right); +} + +static int dcacheline_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he) +{ + uint64_t addr = 0; + int width = c2c_width(fmt, hpp, he->hists); + + if (he->mem_info) + addr = cl_address(he->mem_info->daddr.addr); + + return snprintf(hpp->buf, hpp->size, "%*s", width, hex_str(addr)); +} + #define HEADER_LOW(__h)\ { \ .line[1] = {\ @@ -306,12 +335,21 @@ static int c2c_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, }, \ } +static struct c2c_dimension dim_dcacheline = { + .header = HEADER_LOW("Cacheline"), + .name = "dcacheline", + .cmp= dcacheline_cmp, + .entry = dcacheline_entry, + .width = 18, +}; + #undef HEADER_LOW #undef HEADER_BOTH #undef HEADER_SPAN #undef HEADER_SPAN_LOW static struct c2c_dimension *dimensions[] = { + &dim_dcacheline, NULL, }; -- 2.7.4
[PATCH 30/61] perf c2c report: Add llc load miss dimension key
Adding LLC load miss dimension key wrapper. It is to be displayed in the main cachelines overall output: ld_llcmiss It displays bare number of LLC misses for cacheline. Link: http://lkml.kernel.org/n/tip-wojujik7zzen770mxn295...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-c2c.c | 47 +++ 1 file changed, 47 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 2cb5252c0623..e7e7890882c4 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -418,6 +418,44 @@ STAT_FN(ld_l2hit) STAT_FN(ld_llchit) STAT_FN(rmt_hit) +static uint64_t llc_miss(struct c2c_stats *stats) +{ + uint64_t llcmiss; + + llcmiss = stats->lcl_dram + + stats->rmt_dram + + stats->rmt_hitm + + stats->rmt_hit; + + return llcmiss; +} + +static int +ld_llcmiss_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, +struct hist_entry *he) +{ + struct c2c_hist_entry *c2c_he; + int width = c2c_width(fmt, hpp, he->hists); + + c2c_he = container_of(he, struct c2c_hist_entry, he); + + return snprintf(hpp->buf, hpp->size, "%*lu", width, + llc_miss(&c2c_he->stats)); +} + +static int64_t +ld_llcmiss_cmp(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left, struct hist_entry *right) +{ + struct c2c_hist_entry *c2c_left; + struct c2c_hist_entry *c2c_right; + + c2c_left = container_of(left, struct c2c_hist_entry, he); + c2c_right = container_of(right, struct c2c_hist_entry, he); + + return llc_miss(&c2c_left->stats) - llc_miss(&c2c_right->stats); +} + #define HEADER_LOW(__h)\ { \ .line[1] = {\ @@ -597,6 +635,14 @@ static struct c2c_dimension dim_ld_rmthit = { .width = 8, }; +static struct c2c_dimension dim_ld_llcmiss = { + .header = HEADER_BOTH("LLC", "Ld Miss"), + .name = "ld_llcmiss", + .cmp= ld_llcmiss_cmp, + .entry = ld_llcmiss_entry, + .width = 7, +}; + #undef HEADER_LOW #undef HEADER_BOTH #undef HEADER_SPAN @@ -621,6 +667,7 @@ static struct c2c_dimension *dimensions[] = { &dim_ld_l2hit, &dim_ld_llchit, &dim_ld_rmthit, + &dim_ld_llcmiss, NULL, }; -- 2.7.4
[PATCH 43/61] perf c2c report: Setup number of header lines for hists
Allow to setup number of header lines for c2c hists objects. Link: http://lkml.kernel.org/n/tip-4ilsf0ulubrd4y96g7tnp...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-c2c.c | 19 +-- 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index c540917a70c4..f0983d2b26e3 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -100,11 +100,13 @@ static struct hist_entry_ops c2c_entry_ops = { }; static int c2c_hists__init(struct c2c_hists *hists, - const char *sort); + const char *sort, + int nr_header_lines); static struct c2c_hists* he__get_c2c_hists(struct hist_entry *he, - const char *sort) + const char *sort, + int nr_header_lines) { struct c2c_hist_entry *c2c_he; struct c2c_hists *hists; @@ -118,7 +120,7 @@ he__get_c2c_hists(struct hist_entry *he, if (!hists) return NULL; - ret = c2c_hists__init(hists, sort); + ret = c2c_hists__init(hists, sort, nr_header_lines); if (ret) free(hists); @@ -212,7 +214,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, if (!mi_dup) goto free_mi; - c2c_hists = he__get_c2c_hists(he, "offset"); + c2c_hists = he__get_c2c_hists(he, "offset", 2); if (!c2c_hists) goto free_mi_dup; @@ -1596,7 +1598,8 @@ static int hpp_list__parse(struct perf_hpp_list *hpp_list, } static int c2c_hists__init(struct c2c_hists *hists, - const char *sort) + const char *sort, + int nr_header_lines) { __hists__init(&hists->hists, &hists->list); @@ -1607,6 +1610,9 @@ static int c2c_hists__init(struct c2c_hists *hists, */ perf_hpp_list__init(&hists->list); + /* Overload number of header lines.*/ + hists->list.nr_header_lines = nr_header_lines; + return hpp_list__parse(&hists->list, NULL, sort); } @@ -1735,7 +1741,8 @@ static int perf_c2c__report(int argc, const char **argv) file.path = input_name; - err = c2c_hists__init(&c2c.hists, "dcacheline"); + + err = c2c_hists__init(&c2c.hists, "dcacheline", 2); if (err) { pr_debug("Failed to initialize hists\n"); goto out; -- 2.7.4
[PATCH 29/61] perf c2c report: Add llc and remote loads related dimension keys
Adding 2 LLC load related dimension key wrappers. They are to be displayed in the main cachelines overall output: ld_lclhit, ld_rmthit They display bare numbers of LLC and remote loads for cacheline. Link: http://lkml.kernel.org/n/tip-ahjg0voaufefboemjuj9y...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-c2c.c | 20 1 file changed, 20 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 8279033d9d83..2cb5252c0623 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -415,6 +415,8 @@ STAT_FN(st_l1miss) STAT_FN(ld_fbhit) STAT_FN(ld_l1hit) STAT_FN(ld_l2hit) +STAT_FN(ld_llchit) +STAT_FN(rmt_hit) #define HEADER_LOW(__h)\ { \ @@ -579,6 +581,22 @@ static struct c2c_dimension dim_ld_l2hit = { .width = 7, }; +static struct c2c_dimension dim_ld_llchit = { + .header = HEADER_SPAN("-- LLC Load Hit --", "Llc", 1), + .name = "ld_lclhit", + .cmp= ld_llchit_cmp, + .entry = ld_llchit_entry, + .width = 8, +}; + +static struct c2c_dimension dim_ld_rmthit = { + .header = HEADER_SPAN_LOW("Rmt"), + .name = "ld_rmthit", + .cmp= rmt_hit_cmp, + .entry = rmt_hit_entry, + .width = 8, +}; + #undef HEADER_LOW #undef HEADER_BOTH #undef HEADER_SPAN @@ -601,6 +619,8 @@ static struct c2c_dimension *dimensions[] = { &dim_ld_fbhit, &dim_ld_l1hit, &dim_ld_l2hit, + &dim_ld_llchit, + &dim_ld_rmthit, NULL, }; -- 2.7.4
[PATCH 40/61] perf c2c report: Add stats related sort keys
Adding statistic dimension key wrapper. It is to be displayed in the single cacheline output: median, mean_rmt, mean_lcl, mean_load, stddev It displays statistics hits related to cacheline accesses. Link: http://lkml.kernel.org/n/tip-m1r4uc9lcykf1jhpvwk2g...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-c2c.c | 80 1 file changed, 80 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 6b4224764ae4..1990c64f18ff 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -20,11 +20,20 @@ struct c2c_hists { struct c2c_statsstats; }; +struct compute_stats { + struct stats lcl_hitm; + struct stats rmt_hitm; + struct stats load; +}; + struct c2c_hist_entry { struct c2c_hists*hists; struct c2c_stats stats; unsigned long *cpuset; struct c2c_stats*node_stats; + + struct compute_stats cstats; + /* * must be at the end, * because of its callchain dynamic entry @@ -61,6 +70,10 @@ static void *c2c_he_zalloc(size_t size) if (!c2c_he->node_stats) return NULL; + init_stats(&c2c_he->cstats.lcl_hitm); + init_stats(&c2c_he->cstats.rmt_hitm); + init_stats(&c2c_he->cstats.load); + return &c2c_he->he; } @@ -120,6 +133,20 @@ static void c2c_he__set_cpu(struct c2c_hist_entry *c2c_he, set_bit(sample->cpu, c2c_he->cpuset); } +static void compute_stats(struct c2c_hist_entry *c2c_he, + struct c2c_stats *stats, + u64 weight) +{ + struct compute_stats *cstats = &c2c_he->cstats; + + if (stats->rmt_hitm) + update_stats(&cstats->rmt_hitm, weight); + else if (stats->lcl_hitm) + update_stats(&cstats->lcl_hitm, weight); + else if (stats->load) + update_stats(&cstats->load, weight); +} + static int process_sample_event(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, @@ -198,6 +225,8 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, c2c_add_stats(&c2c_hists->stats, &stats); c2c_add_stats(&c2c_he->node_stats[node], &stats); + compute_stats(c2c_he, &stats, sample->weight); + c2c_he__set_cpu(c2c_he, sample); hists__inc_nr_samples(&c2c_hists->hists, he->filtered); @@ -962,6 +991,30 @@ node_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, return 0; } +static int +mean_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he, double mean) +{ + int width = c2c_width(fmt, hpp, he->hists); + char buf[10]; + + snprintf(buf, 10, "%6.0f", mean); + return snprintf(hpp->buf, hpp->size, "%*s", width, buf); +} + +#define MEAN_ENTRY(__func, __val) \ +static int \ +__func(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, struct hist_entry *he) \ +{ \ + struct c2c_hist_entry *c2c_he; \ + c2c_he = container_of(he, struct c2c_hist_entry, he); \ + return mean_entry(fmt, hpp, he, avg_stats(&c2c_he->cstats.__val)); \ +} + +MEAN_ENTRY(mean_rmt_entry, rmt_hitm); +MEAN_ENTRY(mean_lcl_entry, lcl_hitm); +MEAN_ENTRY(mean_load_entry, load); + #define HEADER_LOW(__h)\ { \ .line[1] = {\ @@ -1264,6 +1317,30 @@ static struct c2c_dimension dim_node = { .width = 4, }; +static struct c2c_dimension dim_mean_rmt = { + .header = HEADER_SPAN("-- cycles --", "rmt hitm", 2), + .name = "mean_rmt", + .cmp= empty_cmp, + .entry = mean_rmt_entry, + .width = 8, +}; + +static struct c2c_dimension dim_mean_lcl = { + .header = HEADER_SPAN_LOW("lcl hitm"), + .name = "mean_lcl", + .cmp= empty_cmp, + .entry = mean_lcl_entry, + .width = 8, +}; + +static struct c2c_dimension dim_mean_load = { + .header = HEADER_SPAN_LOW("load"), + .name = "mean_load", + .cmp= empty_cmp, + .entry = mean_load_entry, + .width = 8, +}; + #undef HEADER_LOW #undef HEADER_BOTH #undef HEADER_SPAN @@ -1303,6 +1380,9 @@ static struct c2c_dimension *dimensions[] = { &dim_symbol, &dim_dso, &dim_node, +
[PATCH 35/61] perf c2c report: Add dram related sort keys
Adding dram related dimension key wrappers. They are to be displayed in the main cachelines overall output: dram_lcl, dram_rmt They display DRAM rmt/lcl access numbers for specific cacheline. Link: http://lkml.kernel.org/n/tip-tl3qqi9ehk6g1fla4z7y0...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-c2c.c | 21 + 1 file changed, 21 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 0613669cd8b4..55f8b2fece3d 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -807,6 +807,9 @@ percent_stores_l1miss_cmp(struct perf_hpp_fmt *fmt __maybe_unused, return per_left - per_right; } +STAT_FN(lcl_dram) +STAT_FN(rmt_dram) + #define HEADER_LOW(__h)\ { \ .line[1] = {\ @@ -1055,6 +1058,22 @@ static struct c2c_dimension dim_percent_stores_l1miss = { .width = 7, }; +static struct c2c_dimension dim_dram_lcl = { + .header = HEADER_SPAN("--- Load Dram ", "Lcl", 1), + .name = "dram_lcl", + .cmp= lcl_dram_cmp, + .entry = lcl_dram_entry, + .width = 8, +}; + +static struct c2c_dimension dim_dram_rmt = { + .header = HEADER_SPAN_LOW("Rmt"), + .name = "dram_rmt", + .cmp= rmt_dram_cmp, + .entry = rmt_dram_entry, + .width = 8, +}; + #undef HEADER_LOW #undef HEADER_BOTH #undef HEADER_SPAN @@ -1087,6 +1106,8 @@ static struct c2c_dimension *dimensions[] = { &dim_percent_lcl_hitm, &dim_percent_stores_l1hit, &dim_percent_stores_l1miss, + &dim_dram_lcl, + &dim_dram_rmt, NULL, }; -- 2.7.4
[PATCH 25/61] perf c2c report: Add iaddr dimension key
Adding iaddr dimension key support. It displays code address (as hex number) responsible for the accesses. Using c2c wrapper to standard 'symbol_iaddr' object to define own header and simple (just address) code address output. Link: http://lkml.kernel.org/n/tip-rhshygbst6kr75kju0muw...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-c2c.c | 29 + 1 file changed, 29 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 086e337e9d7d..a97e6d6c3b9b 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -326,6 +326,26 @@ offset_cmp(struct perf_hpp_fmt *fmt __maybe_unused, return (int64_t)(r - l); } +static int +iaddr_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he) +{ + uint64_t addr = 0; + int width = c2c_width(fmt, hpp, he->hists); + + if (he->mem_info) + addr = he->mem_info->iaddr.addr; + + return snprintf(hpp->buf, hpp->size, "%*s", width, hex_str(addr)); +} + +static int64_t +iaddr_cmp(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left, struct hist_entry *right) +{ + return sort__iaddr_cmp(left, right); +} + #define HEADER_LOW(__h)\ { \ .line[1] = {\ @@ -377,6 +397,14 @@ static struct c2c_dimension dim_offset = { .width = 18, }; +static struct c2c_dimension dim_iaddr = { + .header = HEADER_LOW("Code address"), + .name = "iaddr", + .cmp= iaddr_cmp, + .entry = iaddr_entry, + .width = 18, +}; + #undef HEADER_LOW #undef HEADER_BOTH #undef HEADER_SPAN @@ -385,6 +413,7 @@ static struct c2c_dimension dim_offset = { static struct c2c_dimension *dimensions[] = { &dim_dcacheline, &dim_offset, + &dim_iaddr, NULL, }; -- 2.7.4
linux-next: new scheduler messages span: 0-15 (max cpu_capacity = 589) when starting KVM guests
Dietmar, Ingo, Tejun, since commit cd92bfd3b8cb0ec2ee825e55a3aee704cd55aea9 sched/core: Store maximum per-CPU capacity in root domain I get tons of messages from the scheduler like [..] span: 0-15 (max cpu_capacity = 589) span: 0-15 (max cpu_capacity = 589) span: 0-15 (max cpu_capacity = 589) span: 0-15 (max cpu_capacity = 589) [..] whenever I start kvm guests with libvirt. The reason seems to be that libvirt via systemd/machined tries to move all guest vcpus into its cpuset and for whatever reasons, the way it is done will always call rebuild_sched_domains from the cgroup code. While the message alone is somewhat of a nuisance, I think rebuilding the scheduling domains for moving kvm vcpus is really expensive. Tejun, do you have an idea whats going on here? Is libvirt using the cgroup interface wrong (e.g. also d a memory migrate or whatever) Christian
[PATCH 44/61] perf c2c report: Set final resort fields
Set resort/display fields for both cachelines and single cacheline displays. Cachelines are sorted on: rmt_hitm will be made configurable in following patches. Following fields are display for cachelines: dcacheline tot_recs percent_hitm tot_hitm,lcl_hitm,rmt_hitm stores,stores_l1hit,stores_l1miss dram_lcl,dram_rmt ld_llcmiss tot_loads ld_fbhit,ld_l1hit,ld_l2hit ld_lclhit,ld_rmthit The single cacheline is sort by: offset,rmt_hitm,lcl_hitm will be made configurable in following patches. Following fields are display for each cacheline: percent_rmt_hitm percent_lcl_hitm percent_stores_l1hit percent_stores_l1miss offset pid tid mean_rmt mean_lcl mean_load cpucnt symbol dso node Link: http://lkml.kernel.org/n/tip-0rclftliywdq9qr2sjbug...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-c2c.c | 31 +++ 1 file changed, 31 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index f0983d2b26e3..d7b47c69aa07 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -1642,6 +1642,23 @@ static int resort_cl_cb(struct hist_entry *he) c2c_hists = c2c_he->hists; if (c2c_hists) { + c2c_hists__reinit(c2c_hists, + "percent_rmt_hitm," + "percent_lcl_hitm," + "percent_stores_l1hit," + "percent_stores_l1miss," + "offset," + "pid," + "tid," + "mean_rmt," + "mean_lcl," + "mean_load," + "cpucnt," + "symbol," + "dso," + "node", + "offset,rmt_hitm,lcl_hitm"); + hists__collapse_resort(&c2c_hists->hists, NULL); hists__output_resort_cb(&c2c_hists->hists, NULL, filter_cb); } @@ -1774,6 +1791,20 @@ static int perf_c2c__report(int argc, const char **argv) goto out_session; } + c2c_hists__reinit(&c2c.hists, + "dcacheline," + "tot_recs," + "percent_hitm," + "tot_hitm,lcl_hitm,rmt_hitm," + "stores,stores_l1hit,stores_l1miss," + "dram_lcl,dram_rmt," + "ld_llcmiss," + "tot_loads," + "ld_fbhit,ld_l1hit,ld_l2hit," + "ld_lclhit,ld_rmthit", + "rmt_hitm" + ); + ui_progress__init(&prog, c2c.hists.hists.nr_entries, "Sorting..."); hists__collapse_resort(&c2c.hists.hists, NULL); -- 2.7.4
[PATCH 32/61] perf c2c report: Add total loads sort key
Adding total loads dimension key wrapper. It is to be displayed in the main cachelines overall output: tot_loads It displays sum of all load accesses for cacheline. Link: http://lkml.kernel.org/n/tip-czd17qsh5u5z0yc1estz9...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-c2c.c | 60 1 file changed, 60 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 3f2f348479e3..c5ca6daec2d6 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -511,6 +511,57 @@ tot_recs_cmp(struct perf_hpp_fmt *fmt __maybe_unused, return tot_recs_left - tot_recs_right; } +static uint64_t total_loads(struct c2c_stats *stats) +{ + uint64_t lclmiss, ldcnt; + + lclmiss = stats->lcl_dram + + stats->rmt_dram + + stats->rmt_hitm + + stats->rmt_hit; + + ldcnt= lclmiss + + stats->ld_fbhit + + stats->ld_l1hit + + stats->ld_l2hit + + stats->ld_llchit + + stats->lcl_hitm; + + return ldcnt; +} + +static int +tot_loads_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he) +{ + struct c2c_hist_entry *c2c_he; + int width = c2c_width(fmt, hpp, he->hists); + uint64_t tot_recs; + + c2c_he = container_of(he, struct c2c_hist_entry, he); + tot_recs = total_loads(&c2c_he->stats); + + return snprintf(hpp->buf, hpp->size, "%*" PRIu64, width, tot_recs); +} + +static int64_t +tot_loads_cmp(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left, struct hist_entry *right) +{ + struct c2c_hist_entry *c2c_left; + struct c2c_hist_entry *c2c_right; + uint64_t tot_recs_left; + uint64_t tot_recs_right; + + c2c_left = container_of(left, struct c2c_hist_entry, he); + c2c_right = container_of(right, struct c2c_hist_entry, he); + + tot_recs_left = total_loads(&c2c_left->stats); + tot_recs_right = total_loads(&c2c_right->stats); + + return tot_recs_left - tot_recs_right; +} + #define HEADER_LOW(__h)\ { \ .line[1] = {\ @@ -706,6 +757,14 @@ static struct c2c_dimension dim_tot_recs = { .width = 7, }; +static struct c2c_dimension dim_tot_loads = { + .header = HEADER_BOTH("Total", "Loads"), + .name = "tot_loads", + .cmp= tot_loads_cmp, + .entry = tot_loads_entry, + .width = 7, +}; + #undef HEADER_LOW #undef HEADER_BOTH #undef HEADER_SPAN @@ -732,6 +791,7 @@ static struct c2c_dimension *dimensions[] = { &dim_ld_rmthit, &dim_ld_llcmiss, &dim_tot_recs, + &dim_tot_loads, NULL, }; -- 2.7.4
[PATCH 12/61] perf tools: Make hists__fprintf_headers function global
Will be used from external places in following patches. Link: http://lkml.kernel.org/n/tip-ydj205bfen9fgflnv39hn...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/ui/stdio/hist.c | 2 +- tools/perf/util/hist.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 3434d571ddd1..f6d5ac8772f4 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -697,7 +697,7 @@ hists__fprintf_standard_headers(struct hists *hists, return hpp_list->nr_header_lines + 2; } -static int hists__fprintf_headers(struct hists *hists, FILE *fp) +int hists__fprintf_headers(struct hists *hists, FILE *fp) { char bf[1024]; struct perf_hpp dummy_hpp = { diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index aa5ddfa1fa22..0e3493e33175 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -488,5 +488,6 @@ int hpp_color_scnprintf(struct perf_hpp *hpp, const char *fmt, ...); int __hpp__slsmg_color_printf(struct perf_hpp *hpp, const char *fmt, ...); int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp, struct perf_hpp_list *hpp_list); +int hists__fprintf_headers(struct hists *hists, FILE *fp); #endif /* __PERF_HIST_H */ -- 2.7.4
[PATCH 07/61] perf tools: Make reset_dimensions global
Will be used from external places in following patches. Link: http://lkml.kernel.org/n/tip-7garqfmx5izaqysde9jik...@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/util/sort.c | 2 +- tools/perf/util/sort.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 1884d7f9b9d2..9e1f6f75a50f 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -2748,7 +2748,7 @@ static int setup_output_list(struct perf_hpp_list *list, char *str) return ret; } -static void reset_dimensions(void) +void reset_dimensions(void) { unsigned int i; diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 28c0524c8702..3f743bf2acd4 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -268,4 +268,5 @@ int report_parse_ignore_callees_opt(const struct option *opt, const char *arg, i bool is_strict_order(const char *order); int hpp_dimension__add_output(unsigned col); +void reset_dimensions(void); #endif /* __PERF_SORT_H */ -- 2.7.4