[PATCH] perf annotate: cross arch annotate support fixes for ARM
For ARM we remove the list that contains non-arm insns, and instead add more maintainable branch instruction regex logic. Signed-off-by: Kim PhillipsAcked-by: Ravi Bangoria Cc: Namhyung Kim --- tools/perf/util/annotate.c | 177 + 1 file changed, 67 insertions(+), 110 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index b2c6cf3..52316f3 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -26,6 +26,7 @@ const char *disassembler_style; const char *objdump_path; static regex_t file_lineno; +static regex_t arm_call_insn, arm_jump_insn; static struct ins *ins__find(const char *name, const char *norm_arch); static int disasm_line__parse(char *line, char **namep, char **rawp); @@ -449,98 +450,7 @@ static struct ins instructions_x86[] = { { .name = "retq", .ops = _ops, }, }; -static struct ins instructions_arm[] = { - { .name = "add", .ops = _ops, }, - { .name = "addl", .ops = _ops, }, - { .name = "addq", .ops = _ops, }, - { .name = "addw", .ops = _ops, }, - { .name = "and", .ops = _ops, }, - { .name = "b", .ops = _ops, }, /* might also be a call */ - { .name = "bcc", .ops = _ops, }, - { .name = "bcs", .ops = _ops, }, - { .name = "beq", .ops = _ops, }, - { .name = "bge", .ops = _ops, }, - { .name = "bgt", .ops = _ops, }, - { .name = "bhi", .ops = _ops, }, - { .name = "bl",.ops = _ops, }, - { .name = "bls", .ops = _ops, }, - { .name = "blt", .ops = _ops, }, - { .name = "blx", .ops = _ops, }, - { .name = "bne", .ops = _ops, }, - { .name = "bts", .ops = _ops, }, - { .name = "call", .ops = _ops, }, - { .name = "callq", .ops = _ops, }, - { .name = "cmp", .ops = _ops, }, - { .name = "cmpb", .ops = _ops, }, - { .name = "cmpl", .ops = _ops, }, - { .name = "cmpq", .ops = _ops, }, - { .name = "cmpw", .ops = _ops, }, - { .name = "cmpxch", .ops = _ops, }, - { .name = "dec", .ops = _ops, }, - { .name = "decl", .ops = _ops, }, - { .name = "imul", .ops = _ops, }, - { .name = "inc", .ops = _ops, }, - { .name = "incl", .ops = _ops, }, - { .name = "ja",.ops = _ops, }, - { .name = "jae", .ops = _ops, }, - { .name = "jb",.ops = _ops, }, - { .name = "jbe", .ops = _ops, }, - { .name = "jc",.ops = _ops, }, - { .name = "jcxz", .ops = _ops, }, - { .name = "je",.ops = _ops, }, - { .name = "jecxz", .ops = _ops, }, - { .name = "jg",.ops = _ops, }, - { .name = "jge", .ops = _ops, }, - { .name = "jl",.ops = _ops, }, - { .name = "jle", .ops = _ops, }, - { .name = "jmp", .ops = _ops, }, - { .name = "jmpq", .ops = _ops, }, - { .name = "jna", .ops = _ops, }, - { .name = "jnae", .ops = _ops, }, - { .name = "jnb", .ops = _ops, }, - { .name = "jnbe", .ops = _ops, }, - { .name = "jnc", .ops = _ops, }, - { .name = "jne", .ops = _ops, }, - { .name = "jng", .ops = _ops, }, - { .name = "jnge", .ops = _ops, }, - { .name = "jnl", .ops = _ops, }, - { .name = "jnle", .ops = _ops, }, - { .name = "jno", .ops = _ops, }, - { .name = "jnp", .ops = _ops, }, - { .name = "jns", .ops = _ops, }, - { .name = "jnz", .ops = _ops, }, - { .name = "jo",.ops = _ops, }, - { .name = "jp",.ops = _ops, }, - { .name = "jpe", .ops = _ops, }, - { .name = "jpo", .ops = _ops, }, - { .name = "jrcxz", .ops = _ops, }, - { .name = "js",.ops = _ops, }, - { .name = "jz",.ops = _ops, }, - { .name = "lea", .ops = _ops, }, - { .name = "lock", .ops = _ops, }, - { .name = "mov", .ops = _ops, }, - { .name = "movb", .ops = _ops, }, - { .name = "movdqa",.ops = _ops, }, - { .name = "movl", .ops = _ops, }, - { .name = "movq", .ops = _ops, }, - { .name = "movslq", .ops = _ops, }, - { .name = "movzbl", .ops = _ops, }, - { .name = "movzwl", .ops = _ops, }, - { .name = "nop", .ops = _ops, }, - { .name = "nopl", .ops = _ops, }, - { .name = "nopw", .ops = _ops, }, - { .name = "or",.ops = _ops, }, - { .name = "orl", .ops = _ops, }, - { .name = "test", .ops = _ops, }, - { .name = "testb", .ops = _ops, }, - { .name = "testl", .ops = _ops, }, - { .name = "xadd", .ops = _ops, }, - { .name = "xbeginl", .ops = _ops, }, - { .name = "xbeginq", .ops = _ops, }, - { .name = "retq", .ops = _ops, }, -}; - -struct instructions_powerpc { +struct
[PATCH 3/3] powerpc/pseries: Add bitmap to track updated LMBs
Recent updates to the PAPR for memory hotplug has now made the reserved field of the ibm,dynamic-memory property a capabilities field. To support this update we can no longer use the reserved field to track which LMBs have been updated during a DLPAR operation. This patch adds a bitfield to track any LMBs that are updated during a DLPAR operation so that we can roll back to the state proior to the DLPAR operation if an error occurs. Signed-off-by: Nathan Fontenot--- arch/powerpc/platforms/pseries/hotplug-memory.c | 25 ++- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index a0371d1..4be1b61 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -27,6 +27,7 @@ static struct drconf_mem { u32 num_lmbs; struct of_drconf_cell *lmbs; + unsigned long *lmb_update_map; } drmem; #define for_each_lmb_range(lmb, start, end)\ @@ -46,15 +47,15 @@ static void lmb_set_aa_index(u32 lmb, u32 aa_index) { } static bool lmb_updated(u32 lmb) { - return drmem.lmbs[lmb].reserved; + return test_bit(lmb, drmem.lmb_update_map); } static void mark_lmb_updated(u32 lmb) { - drmem.lmbs[lmb].reserved = 1; + set_bit(lmb, drmem.lmb_update_map); } -static void rm_lmb_update(u32 lmb) { - drmem.lmbs[lmb].reserved = 0; +static void clear_lmb_updates(void) { + bitmap_zero(drmem.lmb_update_map, drmem.num_lmbs); } static bool lmb_reserved(u32 lmb) { @@ -99,6 +100,11 @@ static void __init update_drconf_memory(void) p = prop->value; drmem.num_lmbs = be32_to_cpu(*p++); drmem.lmbs = (struct of_drconf_cell *)p; + + if (!drmem.lmb_update_map) { + drmem.lmb_update_map = kmalloc(BITS_TO_LONGS(drmem.num_lmbs), + GFP_KERNEL); + } } unsigned long pseries_memory_block_size(void) @@ -484,8 +490,6 @@ static int dlpar_memory_remove_by_count(u32 lmbs_to_remove) if (rc) pr_err("Failed to add LMB back, drc index %x\n", lmb_drc_index(lmb)); - - rm_lmb_update(lmb); } rc = -EINVAL; @@ -496,8 +500,6 @@ static int dlpar_memory_remove_by_count(u32 lmbs_to_remove) pr_info("Memory at %llx was hot-removed\n", lmb_base_address(lmb)); - - rm_lmb_update(lmb); } rc = 0; } @@ -588,8 +590,6 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index) if (rc) pr_err("Failed to add LMB, drc index %x\n", lmb_drc_index(lmb)); - - rm_lmb_update(lmb); } rc = -EINVAL; } else { @@ -599,8 +599,6 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index) pr_info("Memory at %llx (drc index %x) was hot-removed\n", lmb_base_address(lmb), lmb_drc_index(lmb)); - - rm_lmb_update(lmb); } } @@ -732,7 +730,6 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add) pr_info("Memory at %llx (drc index %x) was hot-added\n", lmb_base_address(lmb), lmb_drc_index(lmb)); - rm_lmb_update(lmb); } } @@ -830,7 +827,6 @@ static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 drc_index) pr_info("Memory at %llx (drc index %x) was hot-added\n", lmb_base_address(lmb), lmb_drc_index(lmb)); - rm_lmb_update(lmb); } } @@ -843,6 +839,7 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog) int rc; lock_device_hotplug(); + clear_lmb_updates(); switch (hp_elog->action) { case PSERIES_HP_ELOG_ACTION_ADD:
[PATCH 2/3] powerpc/pseries: Remove no longer needed rtas_hp_event flag
Remove the use of the rtas_hp_event flag as it is no longer needed. the managememnet of the static dynamic-memory property does not go through of_update_property so we do not need to set this flag. Signed-off-by: Nathan Fontenot--- arch/powerpc/platforms/pseries/hotplug-memory.c |5 - 1 file changed, 5 deletions(-) diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 5173e49..a0371d1 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -24,8 +24,6 @@ #include #include "pseries.h" -static bool rtas_hp_event; - static struct drconf_mem { u32 num_lmbs; struct of_drconf_cell *lmbs; @@ -929,9 +927,6 @@ static int pseries_update_drconf_memory(struct of_reconfig_data *pr) __be32 *p; int i, rc = -EINVAL; - if (rtas_hp_event) - return 0; - update_drconf_memory(); memblock_size = pseries_memory_block_size();
[PATCH 1/3] powerpc/pseries: maintain single copy of ibm, dynamic-memory property
The ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory property of the device-tree can be fairly big on systems with a large amount of memory. A system with 1 TB of memory (256 MB LMBs) the property size is 94k, this equates to roughly a 30MB property size for a 32 TB system. This file size is not neccessarily huge, but the need to update this property every time we DLPAR add or remove an LMB could be problematic. Every time the property is updated a new copy of the property is made with the previous copy being added to the old_properties list. Due to the lack of reference counting on properties old versions of a property are never free'ed. One a large 32TB system we could easilty do several thousands of memory add/remove operations and thus create several thousand of copies of this property. This seems a bit wasteful with respect to system resources. This patch changes the pseries hotplug memory code to maintain a static reference to this property instead of creating a new copy for every LMB that we add or remove. In doing this we have to ensure that the property remains in BE format so a set of accessor methods are provided to get/set values from the property in the proper cpu format. This should provide an improvement in kernel resources as we will no longer have un-referenced copies of this property. Signed-off-by: Nathan Fontenot--- arch/powerpc/platforms/pseries/hotplug-memory.c | 520 ++- 1 file changed, 225 insertions(+), 295 deletions(-) diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index b708c5c..5173e49 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -26,6 +26,83 @@ static bool rtas_hp_event; +static struct drconf_mem { + u32 num_lmbs; + struct of_drconf_cell *lmbs; +} drmem; + +#define for_each_lmb_range(lmb, start, end)\ + for ((lmb) = (start); (lmb) < (end); (lmb)++) +#define for_each_lmb(lmb) for_each_lmb_range((lmb), 0, drmem.num_lmbs) + +static u64 lmb_base_address(u32 lmb) { + return be64_to_cpu(drmem.lmbs[lmb].base_addr); +} + +static u32 lmb_drc_index(u32 lmb) { + return be32_to_cpu(drmem.lmbs[lmb].drc_index); +} + +static void lmb_set_aa_index(u32 lmb, u32 aa_index) { + drmem.lmbs[lmb].aa_index = cpu_to_be32(aa_index); +} + +static bool lmb_updated(u32 lmb) { + return drmem.lmbs[lmb].reserved; +} + +static void mark_lmb_updated(u32 lmb) { + drmem.lmbs[lmb].reserved = 1; +} + +static void rm_lmb_update(u32 lmb) { + drmem.lmbs[lmb].reserved = 0; +} + +static bool lmb_reserved(u32 lmb) { + return be32_to_cpu(drmem.lmbs[lmb].flags) & DRCONF_MEM_RESERVED; +} + +static bool lmb_assigned(u32 lmb) { + return be32_to_cpu(drmem.lmbs[lmb].flags) & DRCONF_MEM_ASSIGNED; +} + +static void mark_lmb_assigned(u32 lmb) { + drmem.lmbs[lmb].flags |= cpu_to_be32(DRCONF_MEM_ASSIGNED); +} + +static void mark_lmb_unassigned(u32 lmb) { + drmem.lmbs[lmb].flags &= cpu_to_be32(~DRCONF_MEM_ASSIGNED); +} + +static int dlpar_acquire_lmb(u32 lmb) { + return dlpar_acquire_drc(be32_to_cpu(drmem.lmbs[lmb].drc_index)); +} + +static int dlpar_release_lmb(u32 lmb) { + return dlpar_release_drc(be32_to_cpu(drmem.lmbs[lmb].drc_index)); +} + +static void __init update_drconf_memory(void) +{ + struct device_node *dn; + struct property *prop; + __be32 *p; + + dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); + if (!dn) + return; + + prop = of_find_property(dn, "ibm,dynamic-memory", NULL); + of_node_put(dn); + if (!prop) + return; + + p = prop->value; + drmem.num_lmbs = be32_to_cpu(*p++); + drmem.lmbs = (struct of_drconf_cell *)p; +} + unsigned long pseries_memory_block_size(void) { struct device_node *np; @@ -99,98 +176,6 @@ static struct property *dlpar_clone_property(struct property *prop, return new_prop; } -static struct property *dlpar_clone_drconf_property(struct device_node *dn) -{ - struct property *prop, *new_prop; - struct of_drconf_cell *lmbs; - u32 num_lmbs, *p; - int i; - - prop = of_find_property(dn, "ibm,dynamic-memory", NULL); - if (!prop) - return NULL; - - new_prop = dlpar_clone_property(prop, prop->length); - if (!new_prop) - return NULL; - - /* Convert the property to cpu endian-ness */ - p = new_prop->value; - *p = be32_to_cpu(*p); - - num_lmbs = *p++; - lmbs = (struct of_drconf_cell *)p; - - for (i = 0; i < num_lmbs; i++) { - lmbs[i].base_addr = be64_to_cpu(lmbs[i].base_addr); - lmbs[i].drc_index = be32_to_cpu(lmbs[i].drc_index); - lmbs[i].flags = be32_to_cpu(lmbs[i].flags); - } - - return new_prop; -} -
[PATCH 0/3] powerpc/pseries: Manage single copy of ibm, dynamic-memory
The ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory property of the device-tree can be fairly big on systems with a large amount of memory. A system with 1 TB of memory (256 MB LMBs) the property size is 94k, this equates to roughly a 30MB property size for a 32 TB system. This file size is not neccessarily huge, but the need to update this property every time we DLPAR add or remove an LMB could be problematic. Every time the property is updated a new copy of the property is made with the previous copy being added to the old_properties list. Due to the lack of reference counting on properties old versions of a property are never free'ed. One a large 32TB system we could easilty do several thousands of memory add/remove operations and thus create several thousand of copies of this property. This seems a bit wasteful with respect to system resources. Patch 1/3: This patch changes the pseries hotplug memory code to maintain a static reference to this property instead of creating a new copy for every LMB that we add or remove. In doing this we have to ensure that the property remains in BE format so a set of accessor methods are provided to get/set values from the property in the proper cpu format. Patch 2/3: Remove a no longer need rtas_hp_event flag. Patch 3/3: Add a bit field to track updated LMBs during DLPAR add/remove operations. This should provide an improvement in kernel resources as we will no longer have un-referenced copies of this property. -Nathan --- Nathan Fontenot (3): powerpc/pseries: maintain single copy of ibm,dynamic-memory property powerpc/pseries: Remove no longer needed rtas_hp_event flag powerpc/pseries: Add bitmap to track updated LMBs arch/powerpc/platforms/pseries/hotplug-memory.c | 526 ++- 1 file changed, 224 insertions(+), 302 deletions(-)
[PATCH] powerpc/32: fix again csum_partial_copy_generic()
commit 7aef4136566b0 ("powerpc32: rewrite csum_partial_copy_generic() based on copy_tofrom_user()") introduced a bug when destination address is odd and len is lower than cacheline size. In that case the resulting csum value doesn't have to be rotated one byte because the cache-aligned copy part is skipped so no alignment is performed. Fixes: 7aef4136566b0 ("powerpc32: rewrite csum_partial_copy_generic() based on copy_tofrom_user()") Cc: sta...@vger.kernel.org Reported-by: Alessio Igor BoganiSigned-off-by: Christophe Leroy Tested-by: Alessio Igor Bogani --- arch/powerpc/lib/checksum_32.S | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S index 0a57fe6..aa8214f 100644 --- a/arch/powerpc/lib/checksum_32.S +++ b/arch/powerpc/lib/checksum_32.S @@ -127,18 +127,19 @@ _GLOBAL(csum_partial_copy_generic) stw r7,12(r1) stw r8,8(r1) - rlwinm r0,r4,3,0x8 - rlwnm r6,r6,r0,0,31 /* odd destination address: rotate one byte */ - cmplwi cr7,r0,0/* is destination address even ? */ addic r12,r6,0 addir6,r4,-4 neg r0,r4 addir4,r3,-4 andi. r0,r0,CACHELINE_MASK/* # bytes to start of cache line */ + crset 4*cr7+eq beq 58f cmplw 0,r5,r0 /* is this more than total to do? */ blt 63f /* if not much to do */ + rlwinm r7,r6,3,0x8 + rlwnm r12,r12,r7,0,31 /* odd destination address: rotate one byte */ + cmplwi cr7,r7,0/* is destination address even ? */ andi. r8,r0,3 /* get it word-aligned first */ mtctr r8 beq+61f -- 2.1.0
Re: Suspected regression?
Hi Christophe, On 26 August 2016 at 14:46, Christophe Leroywrote: [...] > Can you try the patch below ? I have identified that in case the packet is > smaller than a cacheline, it doesn't get cache-aligned so the result shall > not be rotated in case of odd dest address. > > This patch goes in addition to the previous fix (1bc8b816cb805) as it fixes > a different case. > > Christophe > > diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S > index 68f6862..3971cfb 100644 > --- a/arch/powerpc/lib/checksum_32.S > +++ b/arch/powerpc/lib/checksum_32.S > @@ -127,18 +127,19 @@ _GLOBAL(csum_partial_copy_generic) > stw r7,12(r1) > stw r8,8(r1) > > - rlwinm r0,r4,3,0x8 > - rlwnm r6,r6,r0,0,31 /* odd destination address: rotate one byte > */ > - cmplwi cr7,r0,0/* is destination address even ? */ > addic r12,r6,0 > addir6,r4,-4 > neg r0,r4 > addir4,r3,-4 > andi. r0,r0,CACHELINE_MASK/* # bytes to start of cache line */ > + crset 4*cr7+eq > beq 58f > > cmplw 0,r5,r0 /* is this more than total to do? */ > blt 63f /* if not much to do */ > + rlwinm r7,r6,3,0x8 > + rlwnm r12,r12,r7,0,31 /* odd destination address: rotate one byte > */ > + cmplwi cr7,r7,0/* is destination address even ? */ > andi. r8,r0,3 /* get it word-aligned first */ > mtctr r8 > beq+61f Yeah! It fixes my problem! Thank you very much! Ciao, Alessio
Re: Suspected regression?
Hi Alessio, Le 26/08/2016 à 04:32, Scott Wood a écrit : On Tue, 2016-08-23 at 13:34 +0200, Christophe Leroy wrote: Le 23/08/2016 à 11:20, Alessio Igor Bogani a écrit : Hi Christophe, Sorry for delay in reply I was on vacation. On 6 August 2016 at 11:29, christophe leroywrote: Alessio, Le 05/08/2016 à 09:51, Christophe Leroy a écrit : Le 19/07/2016 à 23:52, Scott Wood a écrit : On Tue, 2016-07-19 at 12:00 +0200, Alessio Igor Bogani wrote: Hi all, I have got two boards MVME5100 (MPC7410 cpu) and MVME7100 (MPC8641D cpu) for which I use the same cross-compiler (ppc7400). I tested these against kernel HEAD to found that these don't boot anymore (PID 1 crash). Bisecting results in first offending commit: 7aef4136566b0539a1a98391181e188905e33401 Removing it from HEAD make boards boot properly again. A third system based on P2010 isn't affected at all. Is it a regression or I have made something wrong? I booted both my next branch, and Linus's master on MPC8641HPCN and didn't see this -- though possibly your RFS is doing something different. Maybe that's the difference with P2010 as well. Is there any way you can debug the cause of the crash? Or send me a minimal RFS that demonstrates the problem (ideally with debug symbols on the userspace binaries)? I got from Alessio the below information: systemd[1]: Caught , core dump failed (child 137, code=killed, status=7/BUS). systemd[1]: Freezing execution. What can generate SIGBUS ? And shouldn't we also get some KERN_ERR trace, something like "unhandled signal 7 at ." ? As far as I can see, SIGBUS is mainly generated from alignment exception. According to 7410 Reference Manual, alignment exception can happen in the following cases: * An operand of a dcbz instruction is on a page that is write-through or cache-inhibited for a virtual mode access. * An attempt to execute a dcbz instruction occurs when the cache is disabled or locked. Could try with below patch to check if the dcbz insn is causing the SIGBUS ? Unfortunately that patch doesn't solve the problem. Is there a chance that cache behavior could settled by board firmware (PPCBug on the MPC7410 board and MotLoad on the MPC8641D one)? In that case what do you suggest me to looking for? If the removal of dcbz doesn't solve the issue, I don't think it is a cache related issue. As far as I understood, your init gets a SIGBUS signal, right ? Then we must identify the reason for that sigbus. My guess would be errors demand-loading a page via NFS. One approach might be to hack up the code so that both versions of csum_partial_copy_generic() are present, and call both each time. If the results differ or the copied bytes are wrong, then spit out a dump of the details. Can you try the patch below ? I have identified that in case the packet is smaller than a cacheline, it doesn't get cache-aligned so the result shall not be rotated in case of odd dest address. This patch goes in addition to the previous fix (1bc8b816cb805) as it fixes a different case. Christophe diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S index 68f6862..3971cfb 100644 --- a/arch/powerpc/lib/checksum_32.S +++ b/arch/powerpc/lib/checksum_32.S @@ -127,18 +127,19 @@ _GLOBAL(csum_partial_copy_generic) stw r7,12(r1) stw r8,8(r1) - rlwinm r0,r4,3,0x8 - rlwnm r6,r6,r0,0,31 /* odd destination address: rotate one byte */ - cmplwi cr7,r0,0/* is destination address even ? */ addic r12,r6,0 addir6,r4,-4 neg r0,r4 addir4,r3,-4 andi. r0,r0,CACHELINE_MASK/* # bytes to start of cache line */ + crset 4*cr7+eq beq 58f cmplw 0,r5,r0 /* is this more than total to do? */ blt 63f /* if not much to do */ + rlwinm r7,r6,3,0x8 + rlwnm r12,r12,r7,0,31 /* odd destination address: rotate one byte */ + cmplwi cr7,r7,0/* is destination address even ? */ andi. r8,r0,3 /* get it word-aligned first */ mtctr r8 beq+61f --
Re: [PATCH v6 2/7] perf annotate: Add cross arch annotate support
Hi Kim, I've tested your patch on x86 and powerpc and it looks fine to me. Can you please put your signed-off-by. Please add Act-by: Ravi Bangoriaas well. Regards, -Ravi On Wednesday 24 August 2016 02:06 AM, Kim Phillips wrote: > On Tue, 23 Aug 2016 11:17:16 +0900 > Namhyung Kim wrote: > >> On Tue, Aug 23, 2016 at 8:01 AM, Kim Phillips wrote: >>> On Fri, 19 Aug 2016 18:29:33 +0530 >>> Ravi Bangoria wrote: >>> Changes in v6: - Instead of adding only those instructions defined in #ifdef __arm__, add all instructions from default table to arm table. >>> Thanks, I've gone through the list and removed all not-ARM >>> instructions, and added some missing ARM branch instructions: >> Can we use regex patterns instead? > Yes, that helps prevent mistakes updating instruction lists - how does > this look?: > > diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c > index b2c6cf3..52316f3 100644 > --- a/tools/perf/util/annotate.c > +++ b/tools/perf/util/annotate.c > @@ -26,6 +26,7 @@ > const char *disassembler_style; > const char *objdump_path; > static regex_tfile_lineno; > +static regex_tarm_call_insn, arm_jump_insn; > > static struct ins *ins__find(const char *name, const char *norm_arch); > static int disasm_line__parse(char *line, char **namep, char **rawp); > @@ -449,98 +450,7 @@ static struct ins instructions_x86[] = { > { .name = "retq", .ops = _ops, }, > }; > > -static struct ins instructions_arm[] = { > - { .name = "add", .ops = _ops, }, > - { .name = "addl", .ops = _ops, }, > - { .name = "addq", .ops = _ops, }, > - { .name = "addw", .ops = _ops, }, > - { .name = "and", .ops = _ops, }, > - { .name = "b", .ops = _ops, }, /* might also be a call */ > - { .name = "bcc", .ops = _ops, }, > - { .name = "bcs", .ops = _ops, }, > - { .name = "beq", .ops = _ops, }, > - { .name = "bge", .ops = _ops, }, > - { .name = "bgt", .ops = _ops, }, > - { .name = "bhi", .ops = _ops, }, > - { .name = "bl",.ops = _ops, }, > - { .name = "bls", .ops = _ops, }, > - { .name = "blt", .ops = _ops, }, > - { .name = "blx", .ops = _ops, }, > - { .name = "bne", .ops = _ops, }, > - { .name = "bts", .ops = _ops, }, > - { .name = "call", .ops = _ops, }, > - { .name = "callq", .ops = _ops, }, > - { .name = "cmp", .ops = _ops, }, > - { .name = "cmpb", .ops = _ops, }, > - { .name = "cmpl", .ops = _ops, }, > - { .name = "cmpq", .ops = _ops, }, > - { .name = "cmpw", .ops = _ops, }, > - { .name = "cmpxch", .ops = _ops, }, > - { .name = "dec", .ops = _ops, }, > - { .name = "decl", .ops = _ops, }, > - { .name = "imul", .ops = _ops, }, > - { .name = "inc", .ops = _ops, }, > - { .name = "incl", .ops = _ops, }, > - { .name = "ja",.ops = _ops, }, > - { .name = "jae", .ops = _ops, }, > - { .name = "jb",.ops = _ops, }, > - { .name = "jbe", .ops = _ops, }, > - { .name = "jc",.ops = _ops, }, > - { .name = "jcxz", .ops = _ops, }, > - { .name = "je",.ops = _ops, }, > - { .name = "jecxz", .ops = _ops, }, > - { .name = "jg",.ops = _ops, }, > - { .name = "jge", .ops = _ops, }, > - { .name = "jl",.ops = _ops, }, > - { .name = "jle", .ops = _ops, }, > - { .name = "jmp", .ops = _ops, }, > - { .name = "jmpq", .ops = _ops, }, > - { .name = "jna", .ops = _ops, }, > - { .name = "jnae", .ops = _ops, }, > - { .name = "jnb", .ops = _ops, }, > - { .name = "jnbe", .ops = _ops, }, > - { .name = "jnc", .ops = _ops, }, > - { .name = "jne", .ops = _ops, }, > - { .name = "jng", .ops = _ops, }, > - { .name = "jnge", .ops = _ops, }, > - { .name = "jnl", .ops = _ops, }, > - { .name = "jnle", .ops = _ops, }, > - { .name = "jno", .ops = _ops, }, > - { .name = "jnp", .ops = _ops, }, > - { .name = "jns", .ops = _ops, }, > - { .name = "jnz", .ops = _ops, }, > - { .name = "jo",.ops = _ops, }, > - { .name = "jp",.ops = _ops, }, > - { .name = "jpe", .ops = _ops, }, > - { .name = "jpo", .ops = _ops, }, > - { .name = "jrcxz", .ops = _ops, }, > - { .name = "js",.ops = _ops, }, > - { .name = "jz",.ops = _ops, }, > - { .name = "lea", .ops = _ops, }, > - { .name = "lock", .ops = _ops, }, > - { .name = "mov", .ops = _ops, }, > - { .name = "movb", .ops = _ops, }, > - { .name = "movdqa",.ops = _ops, }, > - { .name = "movl", .ops = _ops, }, > - { .name = "movq", .ops = _ops, }, > - { .name = "movslq", .ops = _ops, }, > - { .name = "movzbl", .ops = _ops, }, > - { .name = "movzwl", .ops = _ops, }, > - {
Re: [PATCH] powerpc/fsl_pci: Size upper inbound window based on RAM size
Hi Scott, thanks for the patch! This one works for my setup: T4240, 12GB Ram and Radeon E6760. On 2016-08-26 08:38, Scott Wood wrote: This allows PCI devices that can only address (e.g.) 36 or 40 bit DMA to use direct DMA, at the cost of not being able to DMA to non-RAM addresses (this doesn't affect MSIs as there is a separate dedicated window for that) which we wouldn't have been able to do anyway if the RAM size didn't trigger the creation of the second inbound window. It also fixes an off-by-one error that set dma_direct_ops on PCI devices whose dma mask could address all the space below the DMA offset (previously 40 bits), but not the window that starts at the DMA offset. Signed-off-by: Scott WoodCc: Tillmann Heidsieck --- Tested-by: Tillmann Heidsieck
Re: [PATCH 00/44] usb: don't print on ENOMEM
Hi, On Thu, 25 Aug 2016 19:38:52 +0200 Wolfram Sang wrote: > Here is my next series to save memory by removing unneeded strings. It removes > in the usb subsystem all unspecific error messages after calling malloc-based > functions, i.e. (devm_)k[zcm]alloc. kmalloc prints enough information in that > case. If the message was specific (e.g. "can't save CLEAR_TT_BUFFER state"), I > left it. This series saves ~4.5KB of "out of memory" permutations in .text and > .rodata. For modified lines, (x == NULL) was replaced with (!NULL) as well. s/!NULL/!x/ Lothar Waßmann
Re: [RFC PATCH] powerpc: fsl_pci: fix inbound ATMU entries for systems with >4G RAM
On 08/26/2016 12:55 AM, Scott Wood wrote: > On 08/26/2016 12:26 AM, Tillmann Heidsieck wrote: >> On 2016-08-24 23:39, Scott Wood wrote: >>> BTW, for some reason your patch is not showing up in Patchwork. >> >> Are there some known pitfalls when sending patches to Patchwork? > > It's not the first time I've seen certain people's patches not show up > there, but I don't know what the root cause is. I do see the patch on Patchwork now; I guess it was just slow. -Scott
[PATCH] powerpc/fsl_pci: Size upper inbound window based on RAM size
This allows PCI devices that can only address (e.g.) 36 or 40 bit DMA to use direct DMA, at the cost of not being able to DMA to non-RAM addresses (this doesn't affect MSIs as there is a separate dedicated window for that) which we wouldn't have been able to do anyway if the RAM size didn't trigger the creation of the second inbound window. It also fixes an off-by-one error that set dma_direct_ops on PCI devices whose dma mask could address all the space below the DMA offset (previously 40 bits), but not the window that starts at the DMA offset. Signed-off-by: Scott WoodCc: Tillmann Heidsieck --- arch/powerpc/sysdev/fsl_pci.c | 12 +--- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 0ef9df4..d3a5974 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -111,8 +111,7 @@ static struct pci_ops fsl_indirect_pcie_ops = .write = indirect_write_config, }; -#define MAX_PHYS_ADDR_BITS 40 -static u64 pci64_dma_offset = 1ull << MAX_PHYS_ADDR_BITS; +static u64 pci64_dma_offset; #ifdef CONFIG_SWIOTLB static void setup_swiotlb_ops(struct pci_controller *hose) @@ -132,12 +131,10 @@ static int fsl_pci_dma_set_mask(struct device *dev, u64 dma_mask) return -EIO; /* -* Fixup PCI devices that are able to DMA to above the physical -* address width of the SoC such that we can address any internal -* SoC address from across PCI if needed +* Fix up PCI devices that are able to DMA to the large inbound +* mapping that allows addressing any RAM address from across PCI. */ - if ((dev_is_pci(dev)) && - dma_mask >= DMA_BIT_MASK(MAX_PHYS_ADDR_BITS)) { + if (dev_is_pci(dev) && dma_mask >= pci64_dma_offset * 2 - 1) { set_dma_ops(dev, _direct_ops); set_dma_offset(dev, pci64_dma_offset); } @@ -387,6 +384,7 @@ static void setup_pci_atmu(struct pci_controller *hose) mem_log++; piwar = (piwar & ~PIWAR_SZ_MASK) | (mem_log - 1); + pci64_dma_offset = 1ULL << mem_log; if (setup_inbound) { /* Setup inbound memory window */ -- 2.7.4
Re: linux-next: build warnings after merge of the kbuild tree
On Fri, Aug 26, 2016 at 01:58:03PM +1000, Nicholas Piggin wrote: > On Mon, 22 Aug 2016 20:47:58 +1000 > Nicholas Pigginwrote: > > > On Fri, 19 Aug 2016 20:44:55 +1000 > > Nicholas Piggin wrote: > > > > > On Fri, 19 Aug 2016 10:37:00 +0200 > > > Michal Marek wrote: > > > > > > > On 2016-08-19 07:09, Stephen Rothwell wrote: > > > > [snip] > > > > > > > > > > > > I may be missing something, but genksyms generates the crc's off the > > > > > preprocessed C source code and we don't have any for the asm files > > > > > ... > > > > > > > > Of course you are right. Which means that we are losing type information > > > > for these exports for CONFIG_MODVERSIONS purposes. I guess it's > > > > acceptable, since the asm functions are pretty basic and their > > > > signatures do not change. > > > > > > I don't completely agree. It would be nice to have the functionality > > > still there. > > > > > > What happens if you just run cmd_modversions on the as rule? It relies on > > > !defined(__ASSEMBLY__), but we're feeding the result to genksyms, not as. > > > It would require the header be included in the .S file and be protected > > > for > > > asm builds. > > > > > > This seems like it *could* be made to work, but there's a few problems. > > > > - .h files are not made for C consumption. Matter of manually adding the > > ifdef guards, which isn't terrible. > > > > - .S files do not all include their .h where the C declaration is. Also > > will cause some churn but doable and maybe not completely unreasonable. > > > > - genksyms parser barfs when it hits the assembly of the .S file. Best > > way to fix that seems just send the #include and EXPORT_SYMBOL lines > > from the .S to the preprocessor. That's a bit of a rabbit hole too, with > > some .S files being included, etc. > > > > I'm not sure what to do here. If nobody cares and we lose CRCs for .S > > exports, then okay we can whitelist those relocs easily. If we don't want > > to lose the functionality, the above might work but it's a bit intrusive > > an is going to require another cycle of prep patches to go through arch > > code first. > > > > Or suggestions for alternative approach? > > Here is a quick patch that I think should catch missing CRCs in > architecture independent way. If we merge something like this, we > can whitelist the symbols in arch/powerpc so people get steered to > the right place. > > Powerpc seems to be the only one really catching this, and it's > only as a side effect of a test run for CONFIG_RELOCATABLE kernels, > which means version failures probably slipped through other archs. > > I'll clean it up, do some more testing, and submit it unless > anybody dislikes it or has a better way to do it. > > Thanks, > Nick > > > diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c > index 4b8ffd3..1efc454 100644 > --- a/scripts/mod/modpost.c > +++ b/scripts/mod/modpost.c > @@ -609,6 +609,7 @@ static void handle_modversions(struct module *mod, struct > elf_info *info, > { > unsigned int crc; > enum export export; > + int is_crc = 0; should that not be a bool here ? > > if ((!is_vmlinux(mod->name) || mod->is_dot_o) && > strncmp(symname, "__ksymtab", 9) == 0) > @@ -618,6 +619,7 @@ static void handle_modversions(struct module *mod, struct > elf_info *info, > > /* CRC'd symbol */ > if (strncmp(symname, CRC_PFX, strlen(CRC_PFX)) == 0) { > + is_crc = 1; is_crc = true; > crc = (unsigned int) sym->st_value; > sym_update_crc(symname + strlen(CRC_PFX), mod, crc, > export); thx! hofrat
Re: [PATCH v6 2/7] perf annotate: Add cross arch annotate support
Hi, On Tue, Aug 23, 2016 at 03:36:17PM -0500, Kim Phillips wrote: > On Tue, 23 Aug 2016 11:17:16 +0900 > Namhyung Kimwrote: > > > On Tue, Aug 23, 2016 at 8:01 AM, Kim Phillips wrote: > > > On Fri, 19 Aug 2016 18:29:33 +0530 > > > Ravi Bangoria wrote: > > > > > >> Changes in v6: > > >> - Instead of adding only those instructions defined in #ifdef __arm__, > > >> add all instructions from default table to arm table. > > > Thanks, I've gone through the list and removed all not-ARM > > > instructions, and added some missing ARM branch instructions: > > > > Can we use regex patterns instead? > > Yes, that helps prevent mistakes updating instruction lists - how does > this look?: Much better! Thanks, Namhyung > > diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c > index b2c6cf3..52316f3 100644 > --- a/tools/perf/util/annotate.c > +++ b/tools/perf/util/annotate.c > @@ -26,6 +26,7 @@ > const char *disassembler_style; > const char *objdump_path; > static regex_tfile_lineno; > +static regex_tarm_call_insn, arm_jump_insn; > > static struct ins *ins__find(const char *name, const char *norm_arch); > static int disasm_line__parse(char *line, char **namep, char **rawp); > @@ -449,98 +450,7 @@ static struct ins instructions_x86[] = { > { .name = "retq", .ops = _ops, }, > }; > > -static struct ins instructions_arm[] = { > - { .name = "add", .ops = _ops, }, > - { .name = "addl", .ops = _ops, }, > - { .name = "addq", .ops = _ops, }, > - { .name = "addw", .ops = _ops, }, > - { .name = "and", .ops = _ops, }, > - { .name = "b", .ops = _ops, }, /* might also be a call */ > - { .name = "bcc", .ops = _ops, }, > - { .name = "bcs", .ops = _ops, }, > - { .name = "beq", .ops = _ops, }, > - { .name = "bge", .ops = _ops, }, > - { .name = "bgt", .ops = _ops, }, > - { .name = "bhi", .ops = _ops, }, > - { .name = "bl",.ops = _ops, }, > - { .name = "bls", .ops = _ops, }, > - { .name = "blt", .ops = _ops, }, > - { .name = "blx", .ops = _ops, }, > - { .name = "bne", .ops = _ops, }, > - { .name = "bts", .ops = _ops, }, > - { .name = "call", .ops = _ops, }, > - { .name = "callq", .ops = _ops, }, > - { .name = "cmp", .ops = _ops, }, > - { .name = "cmpb", .ops = _ops, }, > - { .name = "cmpl", .ops = _ops, }, > - { .name = "cmpq", .ops = _ops, }, > - { .name = "cmpw", .ops = _ops, }, > - { .name = "cmpxch", .ops = _ops, }, > - { .name = "dec", .ops = _ops, }, > - { .name = "decl", .ops = _ops, }, > - { .name = "imul", .ops = _ops, }, > - { .name = "inc", .ops = _ops, }, > - { .name = "incl", .ops = _ops, }, > - { .name = "ja",.ops = _ops, }, > - { .name = "jae", .ops = _ops, }, > - { .name = "jb",.ops = _ops, }, > - { .name = "jbe", .ops = _ops, }, > - { .name = "jc",.ops = _ops, }, > - { .name = "jcxz", .ops = _ops, }, > - { .name = "je",.ops = _ops, }, > - { .name = "jecxz", .ops = _ops, }, > - { .name = "jg",.ops = _ops, }, > - { .name = "jge", .ops = _ops, }, > - { .name = "jl",.ops = _ops, }, > - { .name = "jle", .ops = _ops, }, > - { .name = "jmp", .ops = _ops, }, > - { .name = "jmpq", .ops = _ops, }, > - { .name = "jna", .ops = _ops, }, > - { .name = "jnae", .ops = _ops, }, > - { .name = "jnb", .ops = _ops, }, > - { .name = "jnbe", .ops = _ops, }, > - { .name = "jnc", .ops = _ops, }, > - { .name = "jne", .ops = _ops, }, > - { .name = "jng", .ops = _ops, }, > - { .name = "jnge", .ops = _ops, }, > - { .name = "jnl", .ops = _ops, }, > - { .name = "jnle", .ops = _ops, }, > - { .name = "jno", .ops = _ops, }, > - { .name = "jnp", .ops = _ops, }, > - { .name = "jns", .ops = _ops, }, > - { .name = "jnz", .ops = _ops, }, > - { .name = "jo",.ops = _ops, }, > - { .name = "jp",.ops = _ops, }, > - { .name = "jpe", .ops = _ops, }, > - { .name = "jpo", .ops = _ops, }, > - { .name = "jrcxz", .ops = _ops, }, > - { .name = "js",.ops = _ops, }, > - { .name = "jz",.ops = _ops, }, > - { .name = "lea", .ops = _ops, }, > - { .name = "lock", .ops = _ops, }, > - { .name = "mov", .ops = _ops, }, > - { .name = "movb", .ops = _ops, }, > - { .name = "movdqa",.ops = _ops, }, > - { .name = "movl", .ops = _ops, }, > - { .name = "movq", .ops = _ops, }, > - { .name = "movslq", .ops = _ops, }, > - { .name = "movzbl", .ops = _ops, }, > - { .name = "movzwl", .ops = _ops, }, > - { .name = "nop", .ops = _ops, }, > - { .name = "nopl", .ops = _ops, }, > - { .name = "nopw", .ops = _ops, }, > - {