The branch main has been updated by ngie: URL: https://cgit.FreeBSD.org/src/commit/?id=77864b545b0aaa91bc78b1156c477825007a6233
commit 77864b545b0aaa91bc78b1156c477825007a6233 Author: Enji Cooper <n...@freebsd.org> AuthorDate: 2024-09-08 04:37:04 +0000 Commit: Enji Cooper <n...@freebsd.org> CommitDate: 2024-09-14 19:38:17 +0000 sys/crypto/openssl: update powerpc* ASM This change updates the crypto powerpc* ASM via the prescribed process documented in `crypto/openssl/FREEBSD-upgrade`. This change syncs the ASM with 3.0.15's generated ASM. MFC after: 1 week MFC with: a7148ab39c03abd4d1a84997c70bf96f15dd2a09 MFC with: cc717b574d7faa2e0b2de1a985076286cef74187 Differential Revision: https://reviews.freebsd.org/D46604 --- sys/crypto/openssl/powerpc/aesp8-ppc.S | 143 +++++++++++++++++--------- sys/crypto/openssl/powerpc/poly1305-ppc.S | 64 ++++++------ sys/crypto/openssl/powerpc64/aesp8-ppc.S | 143 +++++++++++++++++--------- sys/crypto/openssl/powerpc64/poly1305-ppc.S | 64 ++++++------ sys/crypto/openssl/powerpc64le/aesp8-ppc.S | 143 +++++++++++++++++--------- sys/crypto/openssl/powerpc64le/poly1305-ppc.S | 64 ++++++------ 6 files changed, 378 insertions(+), 243 deletions(-) diff --git a/sys/crypto/openssl/powerpc/aesp8-ppc.S b/sys/crypto/openssl/powerpc/aesp8-ppc.S index 21067ff2521c..d0fb0f70adbc 100644 --- a/sys/crypto/openssl/powerpc/aesp8-ppc.S +++ b/sys/crypto/openssl/powerpc/aesp8-ppc.S @@ -9,11 +9,12 @@ rcon: .byte 0x1b,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x1b,0x00,0x00,0x00 .byte 0x0d,0x0e,0x0f,0x0c,0x0d,0x0e,0x0f,0x0c,0x0d,0x0e,0x0f,0x0c,0x0d,0x0e,0x0f,0x0c .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +.long 0x0f102132, 0x43546576, 0x8798a9ba, 0xcbdcedfe .Lconsts: mflr 0 bcl 20,31,$+4 mflr 6 - addi 6,6,-0x48 + addi 6,6,-0x58 mtlr 0 blr .long 0 @@ -2347,6 +2348,18 @@ _aesp8_xts_encrypt6x: li 31,0x70 mtspr 256,0 + + xxlor 2, 32+10, 32+10 + vsldoi 10,11,10,1 + xxlor 1, 32+10, 32+10 + + + mr 31, 6 + bl .Lconsts + lxvw4x 0, 28, 6 + mr 6, 31 + li 31,0x70 + subi 9,9,3 lvx 23,0,6 @@ -2389,69 +2402,77 @@ _aesp8_xts_encrypt6x: vperm 31,31,22,7 lvx 25,3,7 + + + + + + + + vperm 0,2,4,5 subi 10,10,31 vxor 17,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 7,0,17 - vxor 8,8,11 + xxlor 32+1, 0, 0 + vpermxor 8, 8, 11, 1 .long 0x7C235699 vxor 18,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 12,1,18 - vxor 8,8,11 + xxlor 32+2, 0, 0 + vpermxor 8, 8, 11, 2 .long 0x7C5A5699 andi. 31,5,15 vxor 19,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 13,2,19 - vxor 8,8,11 + xxlor 32+3, 0, 0 + vpermxor 8, 8, 11, 3 .long 0x7C7B5699 sub 5,5,31 vxor 20,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 14,3,20 - vxor 8,8,11 + xxlor 32+4, 0, 0 + vpermxor 8, 8, 11, 4 .long 0x7C9C5699 subi 5,5,0x60 vxor 21,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 15,4,21 - vxor 8,8,11 + xxlor 32+5, 0, 0 + vpermxor 8, 8, 11, 5 .long 0x7CBD5699 addi 10,10,0x60 vxor 22,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 16,5,22 - vxor 8,8,11 + xxlor 32+0, 0, 0 + vpermxor 8, 8, 11, 0 vxor 31,31,23 mtctr 9 @@ -2477,6 +2498,8 @@ _aesp8_xts_encrypt6x: lvx 25,3,7 bdnz .Loop_xts_enc6x + xxlor 32+10, 1, 1 + subic 5,5,96 vxor 0,17,31 .long 0x10E7C508 @@ -2486,7 +2509,6 @@ _aesp8_xts_encrypt6x: vaddubm 8,8,8 .long 0x11ADC508 .long 0x11CEC508 - vsldoi 11,11,11,15 .long 0x11EFC508 .long 0x1210C508 @@ -2494,7 +2516,8 @@ _aesp8_xts_encrypt6x: vand 11,11,10 .long 0x10E7CD08 .long 0x118CCD08 - vxor 8,8,11 + xxlor 32+1, 0, 0 + vpermxor 8, 8, 11, 1 .long 0x11ADCD08 .long 0x11CECD08 vxor 1,18,31 @@ -2505,13 +2528,13 @@ _aesp8_xts_encrypt6x: and 0,0,5 vaddubm 8,8,8 - vsldoi 11,11,11,15 .long 0x10E7D508 .long 0x118CD508 vand 11,11,10 .long 0x11ADD508 .long 0x11CED508 - vxor 8,8,11 + xxlor 32+2, 0, 0 + vpermxor 8, 8, 11, 2 .long 0x11EFD508 .long 0x1210D508 @@ -2525,7 +2548,6 @@ _aesp8_xts_encrypt6x: vaddubm 8,8,8 .long 0x10E7DD08 .long 0x118CDD08 - vsldoi 11,11,11,15 .long 0x11ADDD08 .long 0x11CEDD08 vand 11,11,10 @@ -2533,7 +2555,8 @@ _aesp8_xts_encrypt6x: .long 0x1210DD08 addi 7,1,32+15 - vxor 8,8,11 + xxlor 32+3, 0, 0 + vpermxor 8, 8, 11, 3 .long 0x10E7E508 .long 0x118CE508 vxor 3,20,31 @@ -2542,7 +2565,6 @@ _aesp8_xts_encrypt6x: .long 0x11ADE508 .long 0x11CEE508 vaddubm 8,8,8 - vsldoi 11,11,11,15 .long 0x11EFE508 .long 0x1210E508 lvx 24,0,7 @@ -2550,7 +2572,8 @@ _aesp8_xts_encrypt6x: .long 0x10E7ED08 .long 0x118CED08 - vxor 8,8,11 + xxlor 32+4, 0, 0 + vpermxor 8, 8, 11, 4 .long 0x11ADED08 .long 0x11CEED08 vxor 4,21,31 @@ -2560,14 +2583,14 @@ _aesp8_xts_encrypt6x: .long 0x1210ED08 lvx 25,3,7 vaddubm 8,8,8 - vsldoi 11,11,11,15 .long 0x10E7F508 .long 0x118CF508 vand 11,11,10 .long 0x11ADF508 .long 0x11CEF508 - vxor 8,8,11 + xxlor 32+5, 0, 0 + vpermxor 8, 8, 11, 5 .long 0x11EFF508 .long 0x1210F508 vxor 5,22,31 @@ -2577,7 +2600,6 @@ _aesp8_xts_encrypt6x: .long 0x10E70509 .long 0x7C005699 vaddubm 8,8,8 - vsldoi 11,11,11,15 .long 0x118C0D09 .long 0x7C235699 .long 0x11AD1509 @@ -2590,7 +2612,10 @@ _aesp8_xts_encrypt6x: .long 0x11EF2509 .long 0x7C9C5699 - vxor 8,8,11 + xxlor 10, 32+0, 32+0 + xxlor 32+0, 0, 0 + vpermxor 8, 8, 11, 0 + xxlor 32+0, 10, 10 .long 0x11702D09 @@ -2623,6 +2648,8 @@ _aesp8_xts_encrypt6x: mtctr 9 beq .Loop_xts_enc6x + xxlor 32+10, 2, 2 + addic. 5,5,0x60 beq .Lxts_enc6x_zero cmpwi 5,0x20 @@ -2999,6 +3026,18 @@ _aesp8_xts_decrypt6x: li 31,0x70 mtspr 256,0 + + xxlor 2, 32+10, 32+10 + vsldoi 10,11,10,1 + xxlor 1, 32+10, 32+10 + + + mr 31, 6 + bl .Lconsts + lxvw4x 0, 28, 6 + mr 6, 31 + li 31,0x70 + subi 9,9,3 lvx 23,0,6 @@ -3046,64 +3085,64 @@ _aesp8_xts_decrypt6x: vxor 17,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 7,0,17 - vxor 8,8,11 + xxlor 32+1, 0, 0 + vpermxor 8, 8, 11, 1 .long 0x7C235699 vxor 18,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 12,1,18 - vxor 8,8,11 + xxlor 32+2, 0, 0 + vpermxor 8, 8, 11, 2 .long 0x7C5A5699 andi. 31,5,15 vxor 19,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 13,2,19 - vxor 8,8,11 + xxlor 32+3, 0, 0 + vpermxor 8, 8, 11, 3 .long 0x7C7B5699 sub 5,5,31 vxor 20,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 14,3,20 - vxor 8,8,11 + xxlor 32+4, 0, 0 + vpermxor 8, 8, 11, 4 .long 0x7C9C5699 subi 5,5,0x60 vxor 21,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 15,4,21 - vxor 8,8,11 + xxlor 32+5, 0, 0 + vpermxor 8, 8, 11, 5 .long 0x7CBD5699 addi 10,10,0x60 vxor 22,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 16,5,22 - vxor 8,8,11 + xxlor 32+0, 0, 0 + vpermxor 8, 8, 11, 0 vxor 31,31,23 mtctr 9 @@ -3129,6 +3168,8 @@ _aesp8_xts_decrypt6x: lvx 25,3,7 bdnz .Loop_xts_dec6x + xxlor 32+10, 1, 1 + subic 5,5,96 vxor 0,17,31 .long 0x10E7C548 @@ -3138,7 +3179,6 @@ _aesp8_xts_decrypt6x: vaddubm 8,8,8 .long 0x11ADC548 .long 0x11CEC548 - vsldoi 11,11,11,15 .long 0x11EFC548 .long 0x1210C548 @@ -3146,7 +3186,8 @@ _aesp8_xts_decrypt6x: vand 11,11,10 .long 0x10E7CD48 .long 0x118CCD48 - vxor 8,8,11 + xxlor 32+1, 0, 0 + vpermxor 8, 8, 11, 1 .long 0x11ADCD48 .long 0x11CECD48 vxor 1,18,31 @@ -3157,13 +3198,13 @@ _aesp8_xts_decrypt6x: and 0,0,5 vaddubm 8,8,8 - vsldoi 11,11,11,15 .long 0x10E7D548 .long 0x118CD548 vand 11,11,10 .long 0x11ADD548 .long 0x11CED548 - vxor 8,8,11 + xxlor 32+2, 0, 0 + vpermxor 8, 8, 11, 2 .long 0x11EFD548 .long 0x1210D548 @@ -3177,7 +3218,6 @@ _aesp8_xts_decrypt6x: vaddubm 8,8,8 .long 0x10E7DD48 .long 0x118CDD48 - vsldoi 11,11,11,15 .long 0x11ADDD48 .long 0x11CEDD48 vand 11,11,10 @@ -3185,7 +3225,8 @@ _aesp8_xts_decrypt6x: .long 0x1210DD48 addi 7,1,32+15 - vxor 8,8,11 + xxlor 32+3, 0, 0 + vpermxor 8, 8, 11, 3 .long 0x10E7E548 .long 0x118CE548 vxor 3,20,31 @@ -3194,7 +3235,6 @@ _aesp8_xts_decrypt6x: .long 0x11ADE548 .long 0x11CEE548 vaddubm 8,8,8 - vsldoi 11,11,11,15 .long 0x11EFE548 .long 0x1210E548 lvx 24,0,7 @@ -3202,7 +3242,8 @@ _aesp8_xts_decrypt6x: .long 0x10E7ED48 .long 0x118CED48 - vxor 8,8,11 + xxlor 32+4, 0, 0 + vpermxor 8, 8, 11, 4 .long 0x11ADED48 .long 0x11CEED48 vxor 4,21,31 @@ -3212,14 +3253,14 @@ _aesp8_xts_decrypt6x: .long 0x1210ED48 lvx 25,3,7 vaddubm 8,8,8 - vsldoi 11,11,11,15 .long 0x10E7F548 .long 0x118CF548 vand 11,11,10 .long 0x11ADF548 .long 0x11CEF548 - vxor 8,8,11 + xxlor 32+5, 0, 0 + vpermxor 8, 8, 11, 5 .long 0x11EFF548 .long 0x1210F548 vxor 5,22,31 @@ -3229,7 +3270,6 @@ _aesp8_xts_decrypt6x: .long 0x10E70549 .long 0x7C005699 vaddubm 8,8,8 - vsldoi 11,11,11,15 .long 0x118C0D49 .long 0x7C235699 .long 0x11AD1549 @@ -3242,7 +3282,10 @@ _aesp8_xts_decrypt6x: .long 0x11EF2549 .long 0x7C9C5699 - vxor 8,8,11 + xxlor 10, 32+0, 32+0 + xxlor 32+0, 0, 0 + vpermxor 8, 8, 11, 0 + xxlor 32+0, 10, 10 .long 0x12102D49 .long 0x7CBD5699 @@ -3273,6 +3316,8 @@ _aesp8_xts_decrypt6x: mtctr 9 beq .Loop_xts_dec6x + xxlor 32+10, 2, 2 + addic. 5,5,0x60 beq .Lxts_dec6x_zero cmpwi 5,0x20 diff --git a/sys/crypto/openssl/powerpc/poly1305-ppc.S b/sys/crypto/openssl/powerpc/poly1305-ppc.S index d6fe34693724..41ef89e4f384 100644 --- a/sys/crypto/openssl/powerpc/poly1305-ppc.S +++ b/sys/crypto/openssl/powerpc/poly1305-ppc.S @@ -473,7 +473,7 @@ __poly1305_mul: .align 5 __poly1305_blocks_vsx: - stwu 1,-384(1) + stwu 1,-368(1) mflr 0 li 10,167 li 11,183 @@ -484,12 +484,12 @@ __poly1305_blocks_vsx: addi 11,11,32 stvx 22,10,1 addi 10,10,32 - stvx 23,10,1 - addi 10,10,32 - stvx 24,11,1 + stvx 23,11,1 addi 11,11,32 - stvx 25,10,1 + stvx 24,10,1 addi 10,10,32 + stvx 25,11,1 + addi 11,11,32 stvx 26,10,1 addi 10,10,32 stvx 27,11,1 @@ -500,15 +500,15 @@ __poly1305_blocks_vsx: addi 11,11,32 stvx 30,10,1 stvx 31,11,1 - stw 12,360(1) + stw 12,344(1) li 12,-1 mtspr 256,12 - stw 27,364(1) - stw 28,368(1) - stw 29,372(1) - stw 30,376(1) - stw 31,380(1) - stw 0,388(1) + stw 27,348(1) + stw 28,352(1) + stw 29,356(1) + stw 30,360(1) + stw 31,364(1) + stw 0,372(1) bl .LPICmeup @@ -1209,7 +1209,7 @@ __poly1305_blocks_vsx: .align 4 .Ldone_vsx: - lwz 0,388(1) + lwz 0,372(1) li 27,4 li 28,8 li 29,12 @@ -1220,39 +1220,39 @@ __poly1305_blocks_vsx: .long 0x7C7D1919 .long 0x7C9E1919 - lwz 12,360(1) + lwz 12,344(1) mtlr 0 li 10,167 li 11,183 mtspr 256,12 lvx 20,10,1 addi 10,10,32 - lvx 21,10,1 - addi 10,10,32 - lvx 22,11,1 + lvx 21,11,1 addi 11,11,32 - lvx 23,10,1 + lvx 22,10,1 addi 10,10,32 - lvx 24,11,1 + lvx 23,11,1 addi 11,11,32 - lvx 25,10,1 + lvx 24,10,1 addi 10,10,32 - lvx 26,11,1 + lvx 25,11,1 addi 11,11,32 - lvx 27,10,1 + lvx 26,10,1 addi 10,10,32 - lvx 28,11,1 + lvx 27,11,1 addi 11,11,32 - lvx 29,10,1 + lvx 28,10,1 addi 10,10,32 - lvx 30,11,1 - lvx 31,10,1 - lwz 27,364(1) - lwz 28,368(1) - lwz 29,372(1) - lwz 30,376(1) - lwz 31,380(1) - addi 1,1,384 + lvx 29,11,1 + addi 11,11,32 + lvx 30,10,1 + lvx 31,11,1 + lwz 27,348(1) + lwz 28,352(1) + lwz 29,356(1) + lwz 30,360(1) + lwz 31,364(1) + addi 1,1,368 blr .long 0 .byte 0,12,0x04,1,0x80,5,4,0 diff --git a/sys/crypto/openssl/powerpc64/aesp8-ppc.S b/sys/crypto/openssl/powerpc64/aesp8-ppc.S index 5fdbf0552b26..f2d9eb3c4664 100644 --- a/sys/crypto/openssl/powerpc64/aesp8-ppc.S +++ b/sys/crypto/openssl/powerpc64/aesp8-ppc.S @@ -10,11 +10,12 @@ rcon: .byte 0x1b,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x1b,0x00,0x00,0x00 .byte 0x0d,0x0e,0x0f,0x0c,0x0d,0x0e,0x0f,0x0c,0x0d,0x0e,0x0f,0x0c,0x0d,0x0e,0x0f,0x0c .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +.long 0x0f102132, 0x43546576, 0x8798a9ba, 0xcbdcedfe .Lconsts: mflr 0 bcl 20,31,$+4 mflr 6 - addi 6,6,-0x48 + addi 6,6,-0x58 mtlr 0 blr .long 0 @@ -2364,6 +2365,18 @@ _aesp8_xts_encrypt6x: li 31,0x70 or 0,0,0 + + xxlor 2, 32+10, 32+10 + vsldoi 10,11,10,1 + xxlor 1, 32+10, 32+10 + + + mr 31, 6 + bl .Lconsts + lxvw4x 0, 28, 6 + mr 6, 31 + li 31,0x70 + subi 9,9,3 lvx 23,0,6 @@ -2406,69 +2419,77 @@ _aesp8_xts_encrypt6x: vperm 31,31,22,7 lvx 25,3,7 + + + + + + + + vperm 0,2,4,5 subi 10,10,31 vxor 17,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 7,0,17 - vxor 8,8,11 + xxlor 32+1, 0, 0 + vpermxor 8, 8, 11, 1 .long 0x7C235699 vxor 18,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 12,1,18 - vxor 8,8,11 + xxlor 32+2, 0, 0 + vpermxor 8, 8, 11, 2 .long 0x7C5A5699 andi. 31,5,15 vxor 19,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 13,2,19 - vxor 8,8,11 + xxlor 32+3, 0, 0 + vpermxor 8, 8, 11, 3 .long 0x7C7B5699 sub 5,5,31 vxor 20,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 14,3,20 - vxor 8,8,11 + xxlor 32+4, 0, 0 + vpermxor 8, 8, 11, 4 .long 0x7C9C5699 subi 5,5,0x60 vxor 21,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 15,4,21 - vxor 8,8,11 + xxlor 32+5, 0, 0 + vpermxor 8, 8, 11, 5 .long 0x7CBD5699 addi 10,10,0x60 vxor 22,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 16,5,22 - vxor 8,8,11 + xxlor 32+0, 0, 0 + vpermxor 8, 8, 11, 0 vxor 31,31,23 mtctr 9 @@ -2494,6 +2515,8 @@ _aesp8_xts_encrypt6x: lvx 25,3,7 bdnz .Loop_xts_enc6x + xxlor 32+10, 1, 1 + subic 5,5,96 vxor 0,17,31 .long 0x10E7C508 @@ -2503,7 +2526,6 @@ _aesp8_xts_encrypt6x: vaddubm 8,8,8 .long 0x11ADC508 .long 0x11CEC508 - vsldoi 11,11,11,15 .long 0x11EFC508 .long 0x1210C508 @@ -2511,7 +2533,8 @@ _aesp8_xts_encrypt6x: vand 11,11,10 .long 0x10E7CD08 .long 0x118CCD08 - vxor 8,8,11 + xxlor 32+1, 0, 0 + vpermxor 8, 8, 11, 1 .long 0x11ADCD08 .long 0x11CECD08 vxor 1,18,31 @@ -2522,13 +2545,13 @@ _aesp8_xts_encrypt6x: and 0,0,5 vaddubm 8,8,8 - vsldoi 11,11,11,15 .long 0x10E7D508 .long 0x118CD508 vand 11,11,10 .long 0x11ADD508 .long 0x11CED508 - vxor 8,8,11 + xxlor 32+2, 0, 0 + vpermxor 8, 8, 11, 2 .long 0x11EFD508 .long 0x1210D508 @@ -2542,7 +2565,6 @@ _aesp8_xts_encrypt6x: vaddubm 8,8,8 .long 0x10E7DD08 .long 0x118CDD08 - vsldoi 11,11,11,15 .long 0x11ADDD08 .long 0x11CEDD08 vand 11,11,10 @@ -2550,7 +2572,8 @@ _aesp8_xts_encrypt6x: .long 0x1210DD08 addi 7,1,64+15 - vxor 8,8,11 + xxlor 32+3, 0, 0 + vpermxor 8, 8, 11, 3 .long 0x10E7E508 .long 0x118CE508 vxor 3,20,31 @@ -2559,7 +2582,6 @@ _aesp8_xts_encrypt6x: .long 0x11ADE508 .long 0x11CEE508 vaddubm 8,8,8 - vsldoi 11,11,11,15 .long 0x11EFE508 .long 0x1210E508 lvx 24,0,7 @@ -2567,7 +2589,8 @@ _aesp8_xts_encrypt6x: .long 0x10E7ED08 .long 0x118CED08 - vxor 8,8,11 + xxlor 32+4, 0, 0 + vpermxor 8, 8, 11, 4 .long 0x11ADED08 .long 0x11CEED08 vxor 4,21,31 @@ -2577,14 +2600,14 @@ _aesp8_xts_encrypt6x: .long 0x1210ED08 lvx 25,3,7 vaddubm 8,8,8 - vsldoi 11,11,11,15 .long 0x10E7F508 .long 0x118CF508 vand 11,11,10 .long 0x11ADF508 .long 0x11CEF508 - vxor 8,8,11 + xxlor 32+5, 0, 0 + vpermxor 8, 8, 11, 5 .long 0x11EFF508 .long 0x1210F508 vxor 5,22,31 @@ -2594,7 +2617,6 @@ _aesp8_xts_encrypt6x: .long 0x10E70509 .long 0x7C005699 vaddubm 8,8,8 - vsldoi 11,11,11,15 .long 0x118C0D09 .long 0x7C235699 .long 0x11AD1509 @@ -2607,7 +2629,10 @@ _aesp8_xts_encrypt6x: .long 0x11EF2509 .long 0x7C9C5699 - vxor 8,8,11 + xxlor 10, 32+0, 32+0 + xxlor 32+0, 0, 0 + vpermxor 8, 8, 11, 0 + xxlor 32+0, 10, 10 .long 0x11702D09 @@ -2640,6 +2665,8 @@ _aesp8_xts_encrypt6x: mtctr 9 beq .Loop_xts_enc6x + xxlor 32+10, 2, 2 + addic. 5,5,0x60 beq .Lxts_enc6x_zero cmpwi 5,0x20 @@ -3016,6 +3043,18 @@ _aesp8_xts_decrypt6x: li 31,0x70 or 0,0,0 + + xxlor 2, 32+10, 32+10 + vsldoi 10,11,10,1 + xxlor 1, 32+10, 32+10 + + + mr 31, 6 + bl .Lconsts + lxvw4x 0, 28, 6 + mr 6, 31 + li 31,0x70 + subi 9,9,3 lvx 23,0,6 @@ -3063,64 +3102,64 @@ _aesp8_xts_decrypt6x: vxor 17,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 7,0,17 - vxor 8,8,11 + xxlor 32+1, 0, 0 + vpermxor 8, 8, 11, 1 .long 0x7C235699 vxor 18,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 12,1,18 - vxor 8,8,11 + xxlor 32+2, 0, 0 + vpermxor 8, 8, 11, 2 .long 0x7C5A5699 andi. 31,5,15 vxor 19,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 13,2,19 - vxor 8,8,11 + xxlor 32+3, 0, 0 + vpermxor 8, 8, 11, 3 .long 0x7C7B5699 sub 5,5,31 vxor 20,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 14,3,20 - vxor 8,8,11 + xxlor 32+4, 0, 0 + vpermxor 8, 8, 11, 4 .long 0x7C9C5699 subi 5,5,0x60 vxor 21,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 15,4,21 - vxor 8,8,11 + xxlor 32+5, 0, 0 + vpermxor 8, 8, 11, 5 .long 0x7CBD5699 addi 10,10,0x60 vxor 22,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 16,5,22 - vxor 8,8,11 + xxlor 32+0, 0, 0 + vpermxor 8, 8, 11, 0 vxor 31,31,23 mtctr 9 @@ -3146,6 +3185,8 @@ _aesp8_xts_decrypt6x: lvx 25,3,7 bdnz .Loop_xts_dec6x + xxlor 32+10, 1, 1 + subic 5,5,96 vxor 0,17,31 .long 0x10E7C548 @@ -3155,7 +3196,6 @@ _aesp8_xts_decrypt6x: vaddubm 8,8,8 .long 0x11ADC548 .long 0x11CEC548 - vsldoi 11,11,11,15 *** 826 LINES SKIPPED ***