Re: [PATCH v2 09/76] ARC: Checksum/byteorder/swab routines

2013-01-18 Thread Vineet Gupta
On Friday 18 January 2013 07:51 PM, Arnd Bergmann wrote:
> On Friday 18 January 2013, Vineet Gupta wrote:
>> TBD: do_csum still needs to be written in asm
> Do you actually expect a lot of improvement in do_csum?
> I would hope that gcc can actually generate a pretty
> good version of it, unless you have some 64-bit add-with-carry
> instruction or something else that you could make use of.

We do have 32bit add-with-carry and it has it's own set of micro-architectural
hazards and stalls which deserve careful insn scheduling. So yes the mileage 
would
vary but certainly deserves a try.

>
>> Signed-off-by: Vineet Gupta 
> Acked-by: Arnd Bergmann 

Thanks,
-Vineet

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 09/76] ARC: Checksum/byteorder/swab routines

2013-01-18 Thread Arnd Bergmann
On Friday 18 January 2013, Vineet Gupta wrote:
> TBD: do_csum still needs to be written in asm

Do you actually expect a lot of improvement in do_csum?
I would hope that gcc can actually generate a pretty
good version of it, unless you have some 64-bit add-with-carry
instruction or something else that you could make use of.

> Signed-off-by: Vineet Gupta 

Acked-by: Arnd Bergmann 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 09/76] ARC: Checksum/byteorder/swab routines

2013-01-18 Thread Vineet Gupta
TBD: do_csum still needs to be written in asm

Signed-off-by: Vineet Gupta 
---
 arch/arc/include/asm/byteorder.h |   18 +++
 arch/arc/include/asm/checksum.h  |  101 ++
 arch/arc/include/asm/swab.h  |   98 
 3 files changed, 217 insertions(+), 0 deletions(-)
 create mode 100644 arch/arc/include/asm/byteorder.h
 create mode 100644 arch/arc/include/asm/checksum.h
 create mode 100644 arch/arc/include/asm/swab.h

diff --git a/arch/arc/include/asm/byteorder.h b/arch/arc/include/asm/byteorder.h
new file mode 100644
index 000..9da71d4
--- /dev/null
+++ b/arch/arc/include/asm/byteorder.h
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_ARC_BYTEORDER_H
+#define __ASM_ARC_BYTEORDER_H
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#include 
+#else
+#include 
+#endif
+
+#endif /* ASM_ARC_BYTEORDER_H */
diff --git a/arch/arc/include/asm/checksum.h b/arch/arc/include/asm/checksum.h
new file mode 100644
index 000..1095729
--- /dev/null
+++ b/arch/arc/include/asm/checksum.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Joern Rennecke  : Jan 2012
+ *  -Insn Scheduling improvements to csum core routines.
+ *  = csum_fold( ) largely derived from ARM version.
+ *  = ip_fast_cum( ) to have module scheduling
+ *  -gcc 4.4.x broke networking. Alias analysis needed to be primed.
+ *   worked around by adding memory clobber to ip_fast_csum( )
+ *
+ * vineetg: May 2010
+ *  -Rewrote ip_fast_cscum( ) and csum_fold( ) with fast inline asm
+ */
+
+#ifndef _ASM_ARC_CHECKSUM_H
+#define _ASM_ARC_CHECKSUM_H
+
+/*
+ * Fold a partial checksum
+ *
+ *  The 2 swords comprising the 32bit sum are added, any carry to 16th bit
+ *  added back and final sword result inverted.
+ */
+static inline __sum16 csum_fold(__wsum s)
+{
+   unsigned r = s << 16 | s >> 16; /* ror */
+   s = ~s;
+   s -= r;
+   return s >> 16;
+}
+
+/*
+ * This is a version of ip_compute_csum() optimized for IP headers,
+ * which always checksum on 4 octet boundaries.
+ */
+static inline __sum16
+ip_fast_csum(const void *iph, unsigned int ihl)
+{
+   const void *ptr = iph;
+   unsigned int tmp, tmp2, sum;
+
+   __asm__(
+   "   ld.ab  %0, [%3, 4]  \n"
+   "   ld.ab  %2, [%3, 4]  \n"
+   "   sub%1, %4, 2\n"
+   "   lsr.f  lp_count, %1, 1  \n"
+   "   bcc0f   \n"
+   "   add.f  %0, %0, %2   \n"
+   "   ld.ab  %2, [%3, 4]  \n"
+   "0: lp 1f   \n"
+   "   ld.ab  %1, [%3, 4]  \n"
+   "   adc.f  %0, %0, %2   \n"
+   "   ld.ab  %2, [%3, 4]  \n"
+   "   adc.f  %0, %0, %1   \n"
+   "1: adc.f  %0, %0, %2   \n"
+   "   add.cs %0,%0,1  \n"
+   : "=&r"(sum), "=r"(tmp), "=&r"(tmp2), "+&r" (ptr)
+   : "r"(ihl)
+   : "cc", "lp_count", "memory");
+
+   return csum_fold(sum);
+}
+
+/*
+ * TCP pseudo Header is 12 bytes:
+ * SA [4], DA [4], zeroes [1], Proto[1], TCP Seg(hdr+data) Len [2]
+ */
+static inline __wsum
+csum_tcpudp_nofold(__be32 saddr, __be32 daddr, unsigned short len,
+  unsigned short proto, __wsum sum)
+{
+   __asm__ __volatile__(
+   "   add.f %0, %0, %1\n"
+   "   adc.f %0, %0, %2\n"
+   "   adc.f %0, %0, %3\n"
+   "   adc.f %0, %0, %4\n"
+   "   adc   %0, %0, 0 \n"
+   : "+&r"(sum)
+   : "r"(saddr), "r"(daddr),
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ "r"(len),
+#else
+ "r"(len << 8),
+#endif
+ "r"(htons(proto))
+   : "cc");
+
+   return sum;
+}
+
+#define csum_fold csum_fold
+#define ip_fast_csum ip_fast_csum
+#define csum_tcpudp_nofold csum_tcpudp_nofold
+
+#include 
+
+#endif /* _ASM_ARC_CHECKSUM_H */
diff --git a/arch/arc/include/asm/swab.h b/arch/arc/include/asm/swab.h
new file mode 100644
index 000..095599a
--- /dev/null
+++ b/arch/arc/include/asm/swab.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * vineetg: May 2011
+ *  -Support sin