sha1_mb_mgr_flush_avx2() and sha1_mb_mgr_submit_avx2() both allocate a
lot of stack space which is never used.  Also, many of the registers
being saved aren't being clobbered so there's no need to save them.

Signed-off-by: Josh Poimboeuf <jpoim...@redhat.com>
---
 arch/x86/crypto/sha-mb/sha1_mb_mgr_flush_avx2.S  | 32 ++----------------------
 arch/x86/crypto/sha-mb/sha1_mb_mgr_submit_avx2.S | 29 +++------------------
 2 files changed, 6 insertions(+), 55 deletions(-)

diff --git a/arch/x86/crypto/sha-mb/sha1_mb_mgr_flush_avx2.S 
b/arch/x86/crypto/sha-mb/sha1_mb_mgr_flush_avx2.S
index 85c4e1c..672eaeb 100644
--- a/arch/x86/crypto/sha-mb/sha1_mb_mgr_flush_avx2.S
+++ b/arch/x86/crypto/sha-mb/sha1_mb_mgr_flush_avx2.S
@@ -86,16 +86,6 @@
 #define extra_blocks    %arg2
 #define p               %arg2
 
-
-# STACK_SPACE needs to be an odd multiple of 8
-_XMM_SAVE_SIZE  = 10*16
-_GPR_SAVE_SIZE  = 8*8
-_ALIGN_SIZE     = 8
-
-_XMM_SAVE       = 0
-_GPR_SAVE       = _XMM_SAVE + _XMM_SAVE_SIZE
-STACK_SPACE     = _GPR_SAVE + _GPR_SAVE_SIZE + _ALIGN_SIZE
-
 .macro LABEL prefix n
 \prefix\n\():
 .endm
@@ -113,16 +103,7 @@ offset = \_offset
 # JOB* sha1_mb_mgr_flush_avx2(MB_MGR *state)
 # arg 1 : rcx : state
 ENTRY(sha1_mb_mgr_flush_avx2)
-       mov     %rsp, %r10
-       sub     $STACK_SPACE, %rsp
-       and     $~31, %rsp
-       mov     %rbx, _GPR_SAVE(%rsp)
-       mov     %r10, _GPR_SAVE+8*1(%rsp) #save rsp
-       mov     %rbp, _GPR_SAVE+8*3(%rsp)
-       mov     %r12, _GPR_SAVE+8*4(%rsp)
-       mov     %r13, _GPR_SAVE+8*5(%rsp)
-       mov     %r14, _GPR_SAVE+8*6(%rsp)
-       mov     %r15, _GPR_SAVE+8*7(%rsp)
+       push    %rbx
 
        # If bit (32+3) is set, then all lanes are empty
        mov     _unused_lanes(state), unused_lanes
@@ -230,16 +211,7 @@ len_is_0:
        mov     tmp2_w, offset(job_rax)
 
 return:
-
-       mov     _GPR_SAVE(%rsp), %rbx
-       mov     _GPR_SAVE+8*1(%rsp), %r10 #saved rsp
-       mov     _GPR_SAVE+8*3(%rsp), %rbp
-       mov     _GPR_SAVE+8*4(%rsp), %r12
-       mov     _GPR_SAVE+8*5(%rsp), %r13
-       mov     _GPR_SAVE+8*6(%rsp), %r14
-       mov     _GPR_SAVE+8*7(%rsp), %r15
-       mov     %r10, %rsp
-
+       pop     %rbx
        ret
 
 return_null:
diff --git a/arch/x86/crypto/sha-mb/sha1_mb_mgr_submit_avx2.S 
b/arch/x86/crypto/sha-mb/sha1_mb_mgr_submit_avx2.S
index 2ab9560..a5a14c62 100644
--- a/arch/x86/crypto/sha-mb/sha1_mb_mgr_submit_avx2.S
+++ b/arch/x86/crypto/sha-mb/sha1_mb_mgr_submit_avx2.S
@@ -94,25 +94,12 @@ DWORD_tmp   = %r9d
 
 lane_data       = %r10
 
-# STACK_SPACE needs to be an odd multiple of 8
-STACK_SPACE     = 8*8 + 16*10 + 8
-
 # JOB* submit_mb_mgr_submit_avx2(MB_MGR *state, job_sha1 *job)
 # arg 1 : rcx : state
 # arg 2 : rdx : job
 ENTRY(sha1_mb_mgr_submit_avx2)
-
-       mov     %rsp, %r10
-       sub     $STACK_SPACE, %rsp
-       and     $~31, %rsp
-
-       mov     %rbx, (%rsp)
-       mov     %r10, 8*2(%rsp) #save old rsp
-       mov     %rbp, 8*3(%rsp)
-       mov     %r12, 8*4(%rsp)
-       mov     %r13, 8*5(%rsp)
-       mov     %r14, 8*6(%rsp)
-       mov     %r15, 8*7(%rsp)
+       push    %rbx
+       push    %rbp
 
        mov     _unused_lanes(state), unused_lanes
        mov     unused_lanes, lane
@@ -203,16 +190,8 @@ len_is_0:
        movl    DWORD_tmp, _result_digest+1*16(job_rax)
 
 return:
-
-       mov     (%rsp), %rbx
-       mov     8*2(%rsp), %r10 #save old rsp
-       mov     8*3(%rsp), %rbp
-       mov     8*4(%rsp), %r12
-       mov     8*5(%rsp), %r13
-       mov     8*6(%rsp), %r14
-       mov     8*7(%rsp), %r15
-       mov     %r10, %rsp
-
+       pop     %rbp
+       pop     %rbx
        ret
 
 return_null:
-- 
2.4.3

Reply via email to