Linus Torvalds writes:
I've just integrated the Mozilla SHA1 library implementation that Adgar
Toernig sent me into the standard git archive (but I did the integration
differently).
Here is a new PPC SHA1 patch that integrates better with this...
Interestingly, the Mozilla SHA1 code is about twice as fast as the openssl
code on my G5, and judging by the disassembly, it's because it's much
simpler. I think the openssl people have unrolled all the loops totally,
which tends to be a disaster on any half-way modern CPU. But hey, it could
be something as simple as optimization flags too.
Very interesting. On my G4 powerbook (since I am at LCA), for a
fsck-cache on a linux-2.6 tree, it takes 6.6 seconds with the openssl
SHA1, 10.7 seconds with the Mozilla SHA1, and ~5.8 seconds with my
SHA1. I'll test it on a G5 tonight, hopefully.
Paul.
diff -urN git.orig/Makefile git/Makefile
--- git.orig/Makefile 2005-04-22 16:23:44.0 +1000
+++ git/Makefile2005-04-22 16:43:31.0 +1000
@@ -34,9 +34,14 @@
SHA1_HEADER=mozilla-sha1/sha1.h
LIB_OBJS += mozilla-sha1/sha1.o
else
+ifdef PPC_SHA1
+ SHA1_HEADER=ppc/sha1.h
+ LIB_OBJS += ppc/sha1.o ppc/sha1ppc.o
+else
SHA1_HEADER=openssl/sha.h
LIBS += -lssl
endif
+endif
CFLAGS += '-DSHA1_HEADER=$(SHA1_HEADER)'
@@ -77,7 +82,7 @@
write-tree.o: $(LIB_H)
clean:
- rm -f *.o mozilla-sha1/*.o $(PROG) $(LIB_FILE)
+ rm -f *.o mozilla-sha1/*.o ppc/*.o $(PROG) $(LIB_FILE)
backup: clean
cd .. ; tar czvf dircache.tar.gz dir-cache
diff -urN git.orig/ppc/sha1.c git/ppc/sha1.c
--- /dev/null 2005-04-04 12:56:19.0 +1000
+++ git/ppc/sha1.c 2005-04-22 16:29:19.0 +1000
@@ -0,0 +1,72 @@
+/*
+ * SHA-1 implementation.
+ *
+ * Copyright (C) 2005 Paul Mackerras [EMAIL PROTECTED]
+ *
+ * This version assumes we are running on a big-endian machine.
+ * It calls an external sha1_core() to process blocks of 64 bytes.
+ */
+#include stdio.h
+#include string.h
+#include sha1.h
+
+extern void sha1_core(uint32_t *hash, const unsigned char *p,
+ unsigned int nblocks);
+
+int SHA1_Init(SHA_CTX *c)
+{
+ c-hash[0] = 0x67452301;
+ c-hash[1] = 0xEFCDAB89;
+ c-hash[2] = 0x98BADCFE;
+ c-hash[3] = 0x10325476;
+ c-hash[4] = 0xC3D2E1F0;
+ c-len = 0;
+ c-cnt = 0;
+ return 0;
+}
+
+int SHA1_Update(SHA_CTX *c, const void *ptr, unsigned long n)
+{
+ unsigned long nb;
+ const unsigned char *p = ptr;
+
+ c-len += n 3;
+ while (n != 0) {
+ if (c-cnt || n 64) {
+ nb = 64 - c-cnt;
+ if (nb n)
+ nb = n;
+ memcpy(c-buf.b[c-cnt], p, nb);
+ if ((c-cnt += nb) == 64) {
+ sha1_core(c-hash, c-buf.b, 1);
+ c-cnt = 0;
+ }
+ } else {
+ nb = n 6;
+ sha1_core(c-hash, p, nb);
+ nb = 6;
+ }
+ n -= nb;
+ p += nb;
+ }
+ return 0;
+}
+
+int SHA1_Final(unsigned char *hash, SHA_CTX *c)
+{
+ unsigned int cnt = c-cnt;
+
+ c-buf.b[cnt++] = 0x80;
+ if (cnt 56) {
+ if (cnt 64)
+ memset(c-buf.b[cnt], 0, 64 - cnt);
+ sha1_core(c-hash, c-buf.b, 1);
+ cnt = 0;
+ }
+ if (cnt 56)
+ memset(c-buf.b[cnt], 0, 56 - cnt);
+ c-buf.l[7] = c-len;
+ sha1_core(c-hash, c-buf.b, 1);
+ memcpy(hash, c-hash, 20);
+ return 0;
+}
diff -urN git.orig/ppc/sha1.h git/ppc/sha1.h
--- /dev/null 2005-04-04 12:56:19.0 +1000
+++ git/ppc/sha1.h 2005-04-22 16:45:28.0 +1000
@@ -0,0 +1,20 @@
+/*
+ * SHA-1 implementation.
+ *
+ * Copyright (C) 2005 Paul Mackerras [EMAIL PROTECTED]
+ */
+#include stdint.h
+
+typedef struct sha_context {
+ uint32_t hash[5];
+ uint32_t cnt;
+ uint64_t len;
+ union {
+ unsigned char b[64];
+ uint64_t l[8];
+ } buf;
+} SHA_CTX;
+
+int SHA1_Init(SHA_CTX *c);
+int SHA1_Update(SHA_CTX *c, const void *p, unsigned long n);
+int SHA1_Final(unsigned char *hash, SHA_CTX *c);
diff -urN git.orig/ppc/sha1ppc.S git/ppc/sha1ppc.S
--- /dev/null 2005-04-04 12:56:19.0 +1000
+++ git/ppc/sha1ppc.S 2005-04-22 16:29:19.0 +1000
@@ -0,0 +1,185 @@
+/*
+ * SHA-1 implementation for PowerPC.
+ *
+ * Copyright (C) 2005 Paul Mackerras.
+ */
+#define FS 80
+
+/*
+ * We roll the registers for T, A, B, C, D, E around on each
+ * iteration; T on iteration t is A on iteration t+1, and so on.
+ * We use registers 7 - 12 for this.
+ */
+#define RT(t) t)+5)%6)+7)
+#define RA(t) t)+4)%6)+7)
+#define RB(t) t)+3)%6)+7)
+#define RC(t) t)+2)%6)+7)
+#define RD(t) t)+1)%6)+7)
+#define RE(t)