This is a note to let you know that I've just added the patch titled sparc: Use popc if possible for hweight routines.
to the 3.0-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary The filename of the patch is: sparc-use-popc-if-possible-for-hweight-routines.patch and it can be found in the queue-3.0 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <sta...@kernel.org> know about it. >From 4599e05015ea32b05c3052c8b109e8f1144c8711 Mon Sep 17 00:00:00 2001 From: "David S. Miller" <da...@davemloft.net> Date: Fri, 29 Jul 2011 09:42:07 -0700 Subject: sparc: Use popc if possible for hweight routines. From: "David S. Miller" <da...@davemloft.net> [ Upstream commit ef7c4d4675d2a9206f913f26ca1a5cd41bff9d41 ] Just like powerpc, we code patch at boot time. Signed-off-by: David S. Miller <da...@davemloft.net> Signed-off-by: Greg Kroah-Hartman <gre...@suse.de> --- arch/sparc/include/asm/bitops_64.h | 42 ++---------------------------- arch/sparc/kernel/entry.h | 7 +++++ arch/sparc/kernel/setup_64.c | 27 +++++++++++++++++++ arch/sparc/kernel/sparc_ksyms_64.c | 7 +++++ arch/sparc/kernel/vmlinux.lds.S | 6 +++- arch/sparc/lib/Makefile | 2 - arch/sparc/lib/hweight.S | 51 +++++++++++++++++++++++++++++++++++++ 7 files changed, 102 insertions(+), 40 deletions(-) create mode 100644 arch/sparc/lib/hweight.S --- a/arch/sparc/include/asm/bitops_64.h +++ b/arch/sparc/include/asm/bitops_64.h @@ -42,45 +42,11 @@ extern void change_bit(unsigned long nr, * of bits set) of a N-bit word */ -#ifdef ULTRA_HAS_POPULATION_COUNT +extern unsigned long __arch_hweight64(__u64 w); +extern unsigned int __arch_hweight32(unsigned int w); +extern unsigned int __arch_hweight16(unsigned int w); +extern unsigned int __arch_hweight8(unsigned int w); -static inline unsigned int __arch_hweight64(unsigned long w) -{ - unsigned int res; - - __asm__ ("popc %1,%0" : "=r" (res) : "r" (w)); - return res; -} - -static inline unsigned int __arch_hweight32(unsigned int w) -{ - unsigned int res; - - __asm__ ("popc %1,%0" : "=r" (res) : "r" (w & 0xffffffff)); - return res; -} - -static inline unsigned int __arch_hweight16(unsigned int w) -{ - unsigned int res; - - __asm__ ("popc %1,%0" : "=r" (res) : "r" (w & 0xffff)); - return res; -} - -static inline unsigned int __arch_hweight8(unsigned int w) -{ - unsigned int res; - - __asm__ ("popc %1,%0" : "=r" (res) : "r" (w & 0xff)); - return res; -} - -#else - -#include <asm-generic/bitops/arch_hweight.h> - -#endif #include <asm-generic/bitops/const_hweight.h> #include <asm-generic/bitops/lock.h> #endif /* __KERNEL__ */ --- a/arch/sparc/kernel/entry.h +++ b/arch/sparc/kernel/entry.h @@ -42,6 +42,13 @@ extern void fpsave(unsigned long *fpregs extern void fpload(unsigned long *fpregs, unsigned long *fsr); #else /* CONFIG_SPARC32 */ +struct popc_3insn_patch_entry { + unsigned int addr; + unsigned int insns[3]; +}; +extern struct popc_3insn_patch_entry __popc_3insn_patch, + __popc_3insn_patch_end; + extern void __init per_cpu_patch(void); extern void __init sun4v_patch(void); extern void __init boot_cpu_id_too_large(int cpu); --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -272,6 +272,30 @@ void __init sun4v_patch(void) sun4v_hvapi_init(); } +static void __init popc_patch(void) +{ + struct popc_3insn_patch_entry *p3; + + p3 = &__popc_3insn_patch; + while (p3 < &__popc_3insn_patch_end) { + unsigned long addr = p3->addr; + + *(unsigned int *) (addr + 0) = p3->insns[0]; + wmb(); + __asm__ __volatile__("flush %0" : : "r" (addr + 0)); + + *(unsigned int *) (addr + 4) = p3->insns[1]; + wmb(); + __asm__ __volatile__("flush %0" : : "r" (addr + 4)); + + *(unsigned int *) (addr + 8) = p3->insns[2]; + wmb(); + __asm__ __volatile__("flush %0" : : "r" (addr + 4)); + + p3++; + } +} + #ifdef CONFIG_SMP void __init boot_cpu_id_too_large(int cpu) { @@ -424,6 +448,9 @@ static void __init init_sparc64_elf_hwca sparc64_elf_hwcap = cap | mdesc_caps; report_hwcaps(sparc64_elf_hwcap); + + if (sparc64_elf_hwcap & AV_SPARC_POPC) + popc_patch(); } void __init setup_arch(char **cmdline_p) --- a/arch/sparc/kernel/sparc_ksyms_64.c +++ b/arch/sparc/kernel/sparc_ksyms_64.c @@ -8,6 +8,7 @@ #include <linux/module.h> #include <linux/pci.h> #include <linux/init.h> +#include <linux/bitops.h> #include <asm/system.h> #include <asm/cpudata.h> @@ -38,5 +39,11 @@ EXPORT_SYMBOL(sun4v_niagara_setperf); EXPORT_SYMBOL(sun4v_niagara2_getperf); EXPORT_SYMBOL(sun4v_niagara2_setperf); +/* from hweight.S */ +EXPORT_SYMBOL(__arch_hweight8); +EXPORT_SYMBOL(__arch_hweight16); +EXPORT_SYMBOL(__arch_hweight32); +EXPORT_SYMBOL(__arch_hweight64); + /* Exporting a symbol from /init/main.c */ EXPORT_SYMBOL(saved_command_line); --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -107,7 +107,11 @@ SECTIONS *(.sun4v_2insn_patch) __sun4v_2insn_patch_end = .; } - + .popc_3insn_patch : { + __popc_3insn_patch = .; + *(.popc_3insn_patch) + __popc_3insn_patch_end = .; + } PERCPU_SECTION(SMP_CACHE_BYTES) . = ALIGN(PAGE_SIZE); --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -37,7 +37,7 @@ lib-$(CONFIG_SPARC64) += GENmemcpy.o GEN lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o lib-$(CONFIG_SPARC64) += copy_in_user.o user_fixup.o memmove.o -lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o +lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o obj-y += iomap.o obj-$(CONFIG_SPARC32) += atomic32.o --- /dev/null +++ b/arch/sparc/lib/hweight.S @@ -0,0 +1,51 @@ +#include <linux/linkage.h> + + .text + .align 32 +ENTRY(__arch_hweight8) + ba,pt %xcc, __sw_hweight8 + nop + nop +ENDPROC(__arch_hweight8) + .section .popc_3insn_patch, "ax" + .word __arch_hweight8 + sllx %o0, 64-8, %g1 + retl + popc %g1, %o0 + .previous + +ENTRY(__arch_hweight16) + ba,pt %xcc, __sw_hweight16 + nop + nop +ENDPROC(__arch_hweight16) + .section .popc_3insn_patch, "ax" + .word __arch_hweight16 + sllx %o0, 64-16, %g1 + retl + popc %g1, %o0 + .previous + +ENTRY(__arch_hweight32) + ba,pt %xcc, __sw_hweight32 + nop + nop +ENDPROC(__arch_hweight32) + .section .popc_3insn_patch, "ax" + .word __arch_hweight32 + sllx %o0, 64-32, %g1 + retl + popc %g1, %o0 + .previous + +ENTRY(__arch_hweight64) + ba,pt %xcc, __sw_hweight64 + nop + nop +ENDPROC(__arch_hweight64) + .section .popc_3insn_patch, "ax" + .word __arch_hweight64 + retl + popc %o0, %o0 + nop + .previous Patches currently in stable-queue which might be from da...@davemloft.net are queue-3.0/ipv4-send-gratuitous-arp-for-secondary-ip-addresses-also.patch queue-3.0/ipv4-constrain-ufo-fragment-sizes-to-multiples-of-8-bytes.patch queue-3.0/sparc-detect-and-handle-ultrasparc-t3-cpu-types.patch queue-3.0/bonding-fix-string-comparison-errors.patch queue-3.0/sparc-use-popc-if-possible-for-hweight-routines.patch queue-3.0/drivers-net-niu.c-adjust-array-index.patch queue-3.0/sparc-set-reboot-cmd-using-reboot-data-hypervisor-call-if-available.patch queue-3.0/net-audit-drivers-to-identify-those-needing-iff_tx_skb_sharing-cleared.patch queue-3.0/sparc-size-mondo-queues-more-sanely.patch queue-3.0/sparc-don-t-leave-sparc_pmu_type-null-on-sun4v.patch queue-3.0/ipv4-use-rt_tos-after-some-rt_tos-conversions.patch queue-3.0/net-cap-number-of-elements-for-sendmmsg.patch queue-3.0/sch_sfq-fix-sfq_enqueue.patch queue-3.0/sparc-sanitize-cpu-feature-detection-and-reporting.patch queue-3.0/sparc-use-popc-when-possible-for-ffs-__ffs-ffz.patch queue-3.0/sparc-add-t3-sun4v-cpu-type-and-hypervisor-group-defines.patch queue-3.0/net-fix-security_socket_sendmsg-bypass-problem.patch queue-3.0/sparc-use-hweight64-in-popc-emulation.patch queue-3.0/0002-net-Compute-protocol-sequence-numbers-and-fragment-I.patch queue-3.0/sparc-add-some-missing-hypervisor-api-groups.patch queue-3.0/fix-cdc-phonet-build.patch queue-3.0/sparc-minor-tweaks-to-niagara-page-copy-clear.patch queue-3.0/ipv4-fix-the-reusing-of-routing-cache-entries.patch queue-3.0/ipv6-make-fragment-identifications-less-predictable.patch queue-3.0/sis190-rx-filter-init-is-needed-for-mac-address-change.patch queue-3.0/net-adjust-array-index.patch queue-3.0/sparc-access-kernel-tsb-using-physical-addressing-when-possible.patch queue-3.0/net-allow-netif_carrier-to-be-called-safely-from-irq.patch queue-3.0/sparc-fix-build-with-debug_pagealloc-enabled.patch queue-3.0/icmp-fix-regression-in-nexthop-resolution-during-replies.patch queue-3.0/xfrm-fix-key-lengths-for-rfc3686-ctr-aes.patch queue-3.0/0001-crypto-Move-md5_transform-to-lib-md5.c.patch queue-3.0/gre-fix-improper-error-handling.patch queue-3.0/net-add-iff_skb_tx_shared-flag-to-priv_flags.patch queue-3.0/r8169-add-support-for-d-link-530t-rev-c1-kernel-bug-38862.patch queue-3.0/sparc-don-t-do-expensive-hypervisor-pcr-write-unless-necessary.patch queue-3.0/net-sendmmsg-should-only-return-an-error-if-no-messages-were-sent.patch queue-3.0/mpt2sas-fixed-big-indian-issues-on-32-bit-ppc.patch _______________________________________________ stable mailing list stable@linux.kernel.org http://linux.kernel.org/mailman/listinfo/stable