Hi,

I've been working a bit with scottp to try and improve the SHA1
performance in Mono. We figured that by unrolling two more of the
loops, we could double performance as compared to what's in 1.9. Is
this patch ok to commit. If so, i can write up the necessary
changelogs and commit it myself, or someone else can go ahead and do
so.

Thanks,
Alan.
Index: SHA1CryptoServiceProvider.cs
===================================================================
--- SHA1CryptoServiceProvider.cs	(revision 102083)
+++ SHA1CryptoServiceProvider.cs	(working copy)
@@ -79,7 +79,7 @@
 			}
 
 			for (i=0; i<size-size%BLOCK_SIZE_BYTES; i += BLOCK_SIZE_BYTES) {
-				ProcessBlock (rgb, start+i);
+				ProcessBlock (rgb, (uint)(start+i));
 			}
 
 			if (size%BLOCK_SIZE_BYTES != 0) {
@@ -115,7 +115,7 @@
 			_H[4] = 0xC3D2E1F0;
 		}
 
-		private void ProcessBlock(byte[] inputBuffer, int inputOffset) 
+		private void ProcessBlock(byte[] inputBuffer, uint inputOffset) 
 		{
 			uint a, b, c, d, e;
 			int i;
@@ -125,18 +125,8 @@
 			// abc removal would not work on the fields
 			uint[] _H = this._H;
 			uint[] buff = this.buff;
-			for (i=0; i<16; i++) {
-				buff[i] = ((uint)(inputBuffer[inputOffset+4*i]) << 24)
-					| ((uint)(inputBuffer[inputOffset+4*i+1]) << 16)
-					| ((uint)(inputBuffer[inputOffset+4*i+2]) <<  8)
-					| ((uint)(inputBuffer[inputOffset+4*i+3]));
-			}
-
-			uint zt;
-			for (i=16; i<80; i++) {
-				zt = buff[i-3] ^ buff[i-8] ^ buff[i-14] ^ buff[i-16];
-				buff[i] = ((zt << 1) | (zt >> 31));
-			}
+			InitialiseBuff (buff, inputBuffer, inputOffset);
+			FillBuffer (buff);
 		
 			a = _H[0];
 			b = _H[1];
@@ -403,6 +393,159 @@
 			_H[4] += e;
 		}
 	
+        private unsafe void InitialiseBuff (uint[] buff, byte[] input, uint inputOffset)
+        {
+            buff[0] = (uint)((input[inputOffset + 0] << 24) | (input[inputOffset + 1] << 16) | (input[inputOffset + 2] << 8) | (input[inputOffset + 3]));
+            buff[1] = (uint)((input[inputOffset + 4] << 24) | (input[inputOffset + 5] << 16) | (input[inputOffset + 6] << 8) | (input[inputOffset + 7]));
+            buff[2] = (uint)((input[inputOffset + 8] << 24) | (input[inputOffset + 9] << 16) | (input[inputOffset + 10] << 8) | (input[inputOffset + 11]));
+            buff[3] = (uint)((input[inputOffset + 12] << 24) | (input[inputOffset + 13] << 16) | (input[inputOffset + 14] << 8) | (input[inputOffset + 15]));
+            buff[4] = (uint)((input[inputOffset + 16] << 24) | (input[inputOffset + 17] << 16) | (input[inputOffset + 18] << 8) | (input[inputOffset + 19]));
+            buff[5] = (uint)((input[inputOffset + 20] << 24) | (input[inputOffset + 21] << 16) | (input[inputOffset + 22] << 8) | (input[inputOffset + 23]));
+            buff[6] = (uint)((input[inputOffset + 24] << 24) | (input[inputOffset + 25] << 16) | (input[inputOffset + 26] << 8) | (input[inputOffset + 27]));
+            buff[7] = (uint)((input[inputOffset + 28] << 24) | (input[inputOffset + 29] << 16) | (input[inputOffset + 30] << 8) | (input[inputOffset + 31]));
+            buff[8] = (uint)((input[inputOffset + 32] << 24) | (input[inputOffset + 33] << 16) | (input[inputOffset + 34] << 8) | (input[inputOffset + 35]));
+            buff[9] = (uint)((input[inputOffset + 36] << 24) | (input[inputOffset + 37] << 16) | (input[inputOffset + 38] << 8) | (input[inputOffset + 39]));
+            buff[10] = (uint)((input[inputOffset + 40] << 24) | (input[inputOffset + 41] << 16) | (input[inputOffset + 42] << 8) | (input[inputOffset + 43]));
+            buff[11] = (uint)((input[inputOffset + 44] << 24) | (input[inputOffset + 45] << 16) | (input[inputOffset + 46] << 8) | (input[inputOffset + 47]));
+            buff[12] = (uint)((input[inputOffset + 48] << 24) | (input[inputOffset + 49] << 16) | (input[inputOffset + 50] << 8) | (input[inputOffset + 51]));
+            buff[13] = (uint)((input[inputOffset + 52] << 24) | (input[inputOffset + 53] << 16) | (input[inputOffset + 54] << 8) | (input[inputOffset + 55]));
+            buff[14] = (uint)((input[inputOffset + 56] << 24) | (input[inputOffset + 57] << 16) | (input[inputOffset + 58] << 8) | (input[inputOffset + 59]));
+            buff[15] = (uint)((input[inputOffset + 60] << 24) | (input[inputOffset + 61] << 16) | (input[inputOffset + 62] << 8) | (input[inputOffset + 63]));
+        }
+
+        private unsafe void FillBuffer (uint[] buff)
+        {
+            uint val;
+            val = buff[13] ^ buff[8] ^ buff[2] ^ buff[0];
+            buff[16] = (val << 1) | (val >> 31);
+            val = buff[14] ^ buff[9] ^ buff[3] ^ buff[1];
+            buff[17] = (val << 1) | (val >> 31);
+            val = buff[15] ^ buff[10] ^ buff[4] ^ buff[2];
+            buff[18] = (val << 1) | (val >> 31);
+            val = buff[16] ^ buff[11] ^ buff[5] ^ buff[3];
+            buff[19] = (val << 1) | (val >> 31);
+            val = buff[17] ^ buff[12] ^ buff[6] ^ buff[4];
+            buff[20] = (val << 1) | (val >> 31);
+            val = buff[18] ^ buff[13] ^ buff[7] ^ buff[5];
+            buff[21] = (val << 1) | (val >> 31);
+            val = buff[19] ^ buff[14] ^ buff[8] ^ buff[6];
+            buff[22] = (val << 1) | (val >> 31);
+            val = buff[20] ^ buff[15] ^ buff[9] ^ buff[7];
+            buff[23] = (val << 1) | (val >> 31);
+            val = buff[21] ^ buff[16] ^ buff[10] ^ buff[8];
+            buff[24] = (val << 1) | (val >> 31);
+            val = buff[22] ^ buff[17] ^ buff[11] ^ buff[9];
+            buff[25] = (val << 1) | (val >> 31);
+            val = buff[23] ^ buff[18] ^ buff[12] ^ buff[10];
+            buff[26] = (val << 1) | (val >> 31);
+            val = buff[24] ^ buff[19] ^ buff[13] ^ buff[11];
+            buff[27] = (val << 1) | (val >> 31);
+            val = buff[25] ^ buff[20] ^ buff[14] ^ buff[12];
+            buff[28] = (val << 1) | (val >> 31);
+            val = buff[26] ^ buff[21] ^ buff[15] ^ buff[13];
+            buff[29] = (val << 1) | (val >> 31);
+            val = buff[27] ^ buff[22] ^ buff[16] ^ buff[14];
+            buff[30] = (val << 1) | (val >> 31);
+            val = buff[28] ^ buff[23] ^ buff[17] ^ buff[15];
+            buff[31] = (val << 1) | (val >> 31);
+            val = buff[29] ^ buff[24] ^ buff[18] ^ buff[16];
+            buff[32] = (val << 1) | (val >> 31);
+            val = buff[30] ^ buff[25] ^ buff[19] ^ buff[17];
+            buff[33] = (val << 1) | (val >> 31);
+            val = buff[31] ^ buff[26] ^ buff[20] ^ buff[18];
+            buff[34] = (val << 1) | (val >> 31);
+            val = buff[32] ^ buff[27] ^ buff[21] ^ buff[19];
+            buff[35] = (val << 1) | (val >> 31);
+            val = buff[33] ^ buff[28] ^ buff[22] ^ buff[20];
+            buff[36] = (val << 1) | (val >> 31);
+            val = buff[34] ^ buff[29] ^ buff[23] ^ buff[21];
+            buff[37] = (val << 1) | (val >> 31);
+            val = buff[35] ^ buff[30] ^ buff[24] ^ buff[22];
+            buff[38] = (val << 1) | (val >> 31);
+            val = buff[36] ^ buff[31] ^ buff[25] ^ buff[23];
+            buff[39] = (val << 1) | (val >> 31);
+            val = buff[37] ^ buff[32] ^ buff[26] ^ buff[24];
+            buff[40] = (val << 1) | (val >> 31);
+            val = buff[38] ^ buff[33] ^ buff[27] ^ buff[25];
+            buff[41] = (val << 1) | (val >> 31);
+            val = buff[39] ^ buff[34] ^ buff[28] ^ buff[26];
+            buff[42] = (val << 1) | (val >> 31);
+            val = buff[40] ^ buff[35] ^ buff[29] ^ buff[27];
+            buff[43] = (val << 1) | (val >> 31);
+            val = buff[41] ^ buff[36] ^ buff[30] ^ buff[28];
+            buff[44] = (val << 1) | (val >> 31);
+            val = buff[42] ^ buff[37] ^ buff[31] ^ buff[29];
+            buff[45] = (val << 1) | (val >> 31);
+            val = buff[43] ^ buff[38] ^ buff[32] ^ buff[30];
+            buff[46] = (val << 1) | (val >> 31);
+            val = buff[44] ^ buff[39] ^ buff[33] ^ buff[31];
+            buff[47] = (val << 1) | (val >> 31);
+            val = buff[45] ^ buff[40] ^ buff[34] ^ buff[32];
+            buff[48] = (val << 1) | (val >> 31);
+            val = buff[46] ^ buff[41] ^ buff[35] ^ buff[33];
+            buff[49] = (val << 1) | (val >> 31);
+            val = buff[47] ^ buff[42] ^ buff[36] ^ buff[34];
+            buff[50] = (val << 1) | (val >> 31);
+            val = buff[48] ^ buff[43] ^ buff[37] ^ buff[35];
+            buff[51] = (val << 1) | (val >> 31);
+            val = buff[49] ^ buff[44] ^ buff[38] ^ buff[36];
+            buff[52] = (val << 1) | (val >> 31);
+            val = buff[50] ^ buff[45] ^ buff[39] ^ buff[37];
+            buff[53] = (val << 1) | (val >> 31);
+            val = buff[51] ^ buff[46] ^ buff[40] ^ buff[38];
+            buff[54] = (val << 1) | (val >> 31);
+            val = buff[52] ^ buff[47] ^ buff[41] ^ buff[39];
+            buff[55] = (val << 1) | (val >> 31);
+            val = buff[53] ^ buff[48] ^ buff[42] ^ buff[40];
+            buff[56] = (val << 1) | (val >> 31);
+            val = buff[54] ^ buff[49] ^ buff[43] ^ buff[41];
+            buff[57] = (val << 1) | (val >> 31);
+            val = buff[55] ^ buff[50] ^ buff[44] ^ buff[42];
+            buff[58] = (val << 1) | (val >> 31);
+            val = buff[56] ^ buff[51] ^ buff[45] ^ buff[43];
+            buff[59] = (val << 1) | (val >> 31);
+            val = buff[57] ^ buff[52] ^ buff[46] ^ buff[44];
+            buff[60] = (val << 1) | (val >> 31);
+            val = buff[58] ^ buff[53] ^ buff[47] ^ buff[45];
+            buff[61] = (val << 1) | (val >> 31);
+            val = buff[59] ^ buff[54] ^ buff[48] ^ buff[46];
+            buff[62] = (val << 1) | (val >> 31);
+            val = buff[60] ^ buff[55] ^ buff[49] ^ buff[47];
+            buff[63] = (val << 1) | (val >> 31);
+            val = buff[61] ^ buff[56] ^ buff[50] ^ buff[48];
+            buff[64] = (val << 1) | (val >> 31);
+            val = buff[62] ^ buff[57] ^ buff[51] ^ buff[49];
+            buff[65] = (val << 1) | (val >> 31);
+            val = buff[63] ^ buff[58] ^ buff[52] ^ buff[50];
+            buff[66] = (val << 1) | (val >> 31);
+            val = buff[64] ^ buff[59] ^ buff[53] ^ buff[51];
+            buff[67] = (val << 1) | (val >> 31);
+            val = buff[65] ^ buff[60] ^ buff[54] ^ buff[52];
+            buff[68] = (val << 1) | (val >> 31);
+            val = buff[66] ^ buff[61] ^ buff[55] ^ buff[53];
+            buff[69] = (val << 1) | (val >> 31);
+            val = buff[67] ^ buff[62] ^ buff[56] ^ buff[54];
+            buff[70] = (val << 1) | (val >> 31);
+            val = buff[68] ^ buff[63] ^ buff[57] ^ buff[55];
+            buff[71] = (val << 1) | (val >> 31);
+            val = buff[69] ^ buff[64] ^ buff[58] ^ buff[56];
+            buff[72] = (val << 1) | (val >> 31);
+            val = buff[70] ^ buff[65] ^ buff[59] ^ buff[57];
+            buff[73] = (val << 1) | (val >> 31);
+            val = buff[71] ^ buff[66] ^ buff[60] ^ buff[58];
+            buff[74] = (val << 1) | (val >> 31);
+            val = buff[72] ^ buff[67] ^ buff[61] ^ buff[59];
+            buff[75] = (val << 1) | (val >> 31);
+            val = buff[73] ^ buff[68] ^ buff[62] ^ buff[60];
+            buff[76] = (val << 1) | (val >> 31);
+            val = buff[74] ^ buff[69] ^ buff[63] ^ buff[61];
+            buff[77] = (val << 1) | (val >> 31);
+            val = buff[75] ^ buff[70] ^ buff[64] ^ buff[62];
+            buff[78] = (val << 1) | (val >> 31);
+            val = buff[76] ^ buff[71] ^ buff[65] ^ buff[63];
+            buff[79] = (val << 1) | (val >> 31);
+        }
+
 		private void ProcessFinalBlock (byte[] inputBuffer, int inputOffset, int inputCount) 
 		{
 			ulong total = count + (ulong)inputCount;
_______________________________________________
Mono-devel-list mailing list
Mono-devel-list@lists.ximian.com
http://lists.ximian.com/mailman/listinfo/mono-devel-list

Reply via email to