diff --git a/Makefile b/Makefile
index a333b378f1f7..43159b21a83f 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 VERSION = 5
 PATCHLEVEL = 4
-SUBLEVEL = 106
+SUBLEVEL = 107
 EXTRAVERSION =
 NAME = Kleptomaniac Octopus
 
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index 97f21cc66657..7f7fdb16bb96 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -71,6 +71,9 @@ void __sysreg32_restore_state(struct kvm_vcpu *vcpu);
 
 void __debug_switch_to_guest(struct kvm_vcpu *vcpu);
 void __debug_switch_to_host(struct kvm_vcpu *vcpu);
+void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu);
+void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu);
+
 
 void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
 void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c
index 0fc9872a1467..aead8a5fbe91 100644
--- a/arch/arm64/kvm/hyp/debug-sr.c
+++ b/arch/arm64/kvm/hyp/debug-sr.c
@@ -168,6 +168,21 @@ static void __hyp_text __debug_restore_state(struct 
kvm_vcpu *vcpu,
        write_sysreg(ctxt->sys_regs[MDCCINT_EL1], mdccint_el1);
 }
 
+void __hyp_text __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu)
+{
+       /*
+        * Non-VHE: Disable and flush SPE data generation
+        * VHE: The vcpu can run, but it can't hide.
+        */
+       __debug_save_spe_nvhe(&vcpu->arch.host_debug_state.pmscr_el1);
+
+}
+
+void __hyp_text __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu)
+{
+       __debug_restore_spe_nvhe(vcpu->arch.host_debug_state.pmscr_el1);
+}
+
 void __hyp_text __debug_switch_to_guest(struct kvm_vcpu *vcpu)
 {
        struct kvm_cpu_context *host_ctxt;
@@ -175,13 +190,6 @@ void __hyp_text __debug_switch_to_guest(struct kvm_vcpu 
*vcpu)
        struct kvm_guest_debug_arch *host_dbg;
        struct kvm_guest_debug_arch *guest_dbg;
 
-       /*
-        * Non-VHE: Disable and flush SPE data generation
-        * VHE: The vcpu can run, but it can't hide.
-        */
-       if (!has_vhe())
-               __debug_save_spe_nvhe(&vcpu->arch.host_debug_state.pmscr_el1);
-
        if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY))
                return;
 
@@ -201,8 +209,6 @@ void __hyp_text __debug_switch_to_host(struct kvm_vcpu 
*vcpu)
        struct kvm_guest_debug_arch *host_dbg;
        struct kvm_guest_debug_arch *guest_dbg;
 
-       if (!has_vhe())
-               __debug_restore_spe_nvhe(vcpu->arch.host_debug_state.pmscr_el1);
 
        if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY))
                return;
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 84964983198e..14607fac7ca3 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -682,6 +682,15 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
 
        __sysreg_save_state_nvhe(host_ctxt);
 
+       /*
+        * We must flush and disable the SPE buffer for nVHE, as
+        * the translation regime(EL1&0) is going to be loaded with
+        * that of the guest. And we must do this before we change the
+        * translation regime to EL2 (via MDCR_EL2_EPB == 0) and
+        * before we load guest Stage1.
+        */
+       __debug_save_host_buffers_nvhe(vcpu);
+
        __activate_vm(kern_hyp_va(vcpu->kvm));
        __activate_traps(vcpu);
 
@@ -720,11 +729,13 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
        if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)
                __fpsimd_save_fpexc32(vcpu);
 
+       __debug_switch_to_host(vcpu);
+
        /*
         * This must come after restoring the host sysregs, since a non-VHE
         * system may enable SPE here and make use of the TTBRs.
         */
-       __debug_switch_to_host(vcpu);
+       __debug_restore_host_buffers_nvhe(vcpu);
 
        if (pmu_switch_needed)
                __pmu_switch_to_host(host_ctxt);
diff --git a/arch/x86/crypto/aesni-intel_asm.S 
b/arch/x86/crypto/aesni-intel_asm.S
index 9afeb58c910e..dd954d8db629 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -319,7 +319,7 @@ _initial_blocks_\@:
 
        # Main loop - Encrypt/Decrypt remaining blocks
 
-       cmp     $0, %r13
+       test    %r13, %r13
        je      _zero_cipher_left_\@
        sub     $64, %r13
        je      _four_cipher_left_\@
@@ -438,7 +438,7 @@ _multiple_of_16_bytes_\@:
 
        mov PBlockLen(%arg2), %r12
 
-       cmp $0, %r12
+       test %r12, %r12
        je _partial_done\@
 
        GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
@@ -475,7 +475,7 @@ _T_8_\@:
        add     $8, %r10
        sub     $8, %r11
        psrldq  $8, %xmm0
-       cmp     $0, %r11
+       test    %r11, %r11
        je      _return_T_done_\@
 _T_4_\@:
        movd    %xmm0, %eax
@@ -483,7 +483,7 @@ _T_4_\@:
        add     $4, %r10
        sub     $4, %r11
        psrldq  $4, %xmm0
-       cmp     $0, %r11
+       test    %r11, %r11
        je      _return_T_done_\@
 _T_123_\@:
        movd    %xmm0, %eax
@@ -620,7 +620,7 @@ _get_AAD_blocks\@:
 
        /* read the last <16B of AAD */
 _get_AAD_rest\@:
-       cmp        $0, %r11
+       test       %r11, %r11
        je         _get_AAD_done\@
 
        READ_PARTIAL_BLOCK %r10, %r11, \TMP1, \TMP7
@@ -641,7 +641,7 @@ _get_AAD_done\@:
 .macro PARTIAL_BLOCK CYPH_PLAIN_OUT PLAIN_CYPH_IN PLAIN_CYPH_LEN DATA_OFFSET \
        AAD_HASH operation
        mov     PBlockLen(%arg2), %r13
-       cmp     $0, %r13
+       test    %r13, %r13
        je      _partial_block_done_\@  # Leave Macro if no partial blocks
        # Read in input data without over reading
        cmp     $16, \PLAIN_CYPH_LEN
@@ -693,7 +693,7 @@ _no_extra_mask_1_\@:
        PSHUFB_XMM      %xmm2, %xmm3
        pxor    %xmm3, \AAD_HASH
 
-       cmp     $0, %r10
+       test    %r10, %r10
        jl      _partial_incomplete_1_\@
 
        # GHASH computation for the last <16 Byte block
@@ -728,7 +728,7 @@ _no_extra_mask_2_\@:
        PSHUFB_XMM %xmm2, %xmm9
        pxor    %xmm9, \AAD_HASH
 
-       cmp     $0, %r10
+       test    %r10, %r10
        jl      _partial_incomplete_2_\@
 
        # GHASH computation for the last <16 Byte block
@@ -748,7 +748,7 @@ _encode_done_\@:
        PSHUFB_XMM      %xmm2, %xmm9
 .endif
        # output encrypted Bytes
-       cmp     $0, %r10
+       test    %r10, %r10
        jl      _partial_fill_\@
        mov     %r13, %r12
        mov     $16, %r13
@@ -1946,7 +1946,7 @@ ENTRY(aesni_set_key)
 ENDPROC(aesni_set_key)
 
 /*
- * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
+ * void aesni_enc(const void *ctx, u8 *dst, const u8 *src)
  */
 ENTRY(aesni_enc)
        FRAME_BEGIN
@@ -2137,7 +2137,7 @@ _aesni_enc4:
 ENDPROC(_aesni_enc4)
 
 /*
- * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
+ * void aesni_dec (const void *ctx, u8 *dst, const u8 *src)
  */
 ENTRY(aesni_dec)
        FRAME_BEGIN
@@ -2726,25 +2726,18 @@ ENDPROC(aesni_ctr_enc)
        pxor CTR, IV;
 
 /*
- * void aesni_xts_crypt8(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
- *                      bool enc, u8 *iv)
+ * void aesni_xts_encrypt(const struct crypto_aes_ctx *ctx, u8 *dst,
+ *                       const u8 *src, unsigned int len, le128 *iv)
  */
-ENTRY(aesni_xts_crypt8)
+ENTRY(aesni_xts_encrypt)
        FRAME_BEGIN
-       cmpb $0, %cl
-       movl $0, %ecx
-       movl $240, %r10d
-       leaq _aesni_enc4, %r11
-       leaq _aesni_dec4, %rax
-       cmovel %r10d, %ecx
-       cmoveq %rax, %r11
 
        movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK
        movups (IVP), IV
 
        mov 480(KEYP), KLEN
-       addq %rcx, KEYP
 
+.Lxts_enc_loop4:
        movdqa IV, STATE1
        movdqu 0x00(INP), INC
        pxor INC, STATE1
@@ -2768,71 +2761,103 @@ ENTRY(aesni_xts_crypt8)
        pxor INC, STATE4
        movdqu IV, 0x30(OUTP)
 
-       CALL_NOSPEC %r11
+       call _aesni_enc4
 
        movdqu 0x00(OUTP), INC
        pxor INC, STATE1
        movdqu STATE1, 0x00(OUTP)
 
-       _aesni_gf128mul_x_ble()
-       movdqa IV, STATE1
-       movdqu 0x40(INP), INC
-       pxor INC, STATE1
-       movdqu IV, 0x40(OUTP)
-
        movdqu 0x10(OUTP), INC
        pxor INC, STATE2
        movdqu STATE2, 0x10(OUTP)
 
-       _aesni_gf128mul_x_ble()
-       movdqa IV, STATE2
-       movdqu 0x50(INP), INC
-       pxor INC, STATE2
-       movdqu IV, 0x50(OUTP)
-
        movdqu 0x20(OUTP), INC
        pxor INC, STATE3
        movdqu STATE3, 0x20(OUTP)
 
-       _aesni_gf128mul_x_ble()
-       movdqa IV, STATE3
-       movdqu 0x60(INP), INC
-       pxor INC, STATE3
-       movdqu IV, 0x60(OUTP)
-
        movdqu 0x30(OUTP), INC
        pxor INC, STATE4
        movdqu STATE4, 0x30(OUTP)
 
        _aesni_gf128mul_x_ble()
-       movdqa IV, STATE4
-       movdqu 0x70(INP), INC
-       pxor INC, STATE4
-       movdqu IV, 0x70(OUTP)
 
-       _aesni_gf128mul_x_ble()
+       add $64, INP
+       add $64, OUTP
+       sub $64, LEN
+       ja .Lxts_enc_loop4
+
        movups IV, (IVP)
 
-       CALL_NOSPEC %r11
+       FRAME_END
+       ret
+ENDPROC(aesni_xts_encrypt)
+
+/*
+ * void aesni_xts_decrypt(const struct crypto_aes_ctx *ctx, u8 *dst,
+ *                       const u8 *src, unsigned int len, le128 *iv)
+ */
+ENTRY(aesni_xts_decrypt)
+       FRAME_BEGIN
+
+       movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK
+       movups (IVP), IV
+
+       mov 480(KEYP), KLEN
+       add $240, KEYP
 
-       movdqu 0x40(OUTP), INC
+.Lxts_dec_loop4:
+       movdqa IV, STATE1
+       movdqu 0x00(INP), INC
        pxor INC, STATE1
-       movdqu STATE1, 0x40(OUTP)
+       movdqu IV, 0x00(OUTP)
 
-       movdqu 0x50(OUTP), INC
+       _aesni_gf128mul_x_ble()
+       movdqa IV, STATE2
+       movdqu 0x10(INP), INC
+       pxor INC, STATE2
+       movdqu IV, 0x10(OUTP)
+
+       _aesni_gf128mul_x_ble()
+       movdqa IV, STATE3
+       movdqu 0x20(INP), INC
+       pxor INC, STATE3
+       movdqu IV, 0x20(OUTP)
+
+       _aesni_gf128mul_x_ble()
+       movdqa IV, STATE4
+       movdqu 0x30(INP), INC
+       pxor INC, STATE4
+       movdqu IV, 0x30(OUTP)
+
+       call _aesni_dec4
+
+       movdqu 0x00(OUTP), INC
+       pxor INC, STATE1
+       movdqu STATE1, 0x00(OUTP)
+
+       movdqu 0x10(OUTP), INC
        pxor INC, STATE2
-       movdqu STATE2, 0x50(OUTP)
+       movdqu STATE2, 0x10(OUTP)
 
-       movdqu 0x60(OUTP), INC
+       movdqu 0x20(OUTP), INC
        pxor INC, STATE3
-       movdqu STATE3, 0x60(OUTP)
+       movdqu STATE3, 0x20(OUTP)
 
-       movdqu 0x70(OUTP), INC
+       movdqu 0x30(OUTP), INC
        pxor INC, STATE4
-       movdqu STATE4, 0x70(OUTP)
+       movdqu STATE4, 0x30(OUTP)
+
+       _aesni_gf128mul_x_ble()
+
+       add $64, INP
+       add $64, OUTP
+       sub $64, LEN
+       ja .Lxts_dec_loop4
+
+       movups IV, (IVP)
 
        FRAME_END
        ret
-ENDPROC(aesni_xts_crypt8)
+ENDPROC(aesni_xts_decrypt)
 
 #endif
diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S 
b/arch/x86/crypto/aesni-intel_avx-x86_64.S
index 91c039ab5699..4e4d34956170 100644
--- a/arch/x86/crypto/aesni-intel_avx-x86_64.S
+++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S
@@ -370,7 +370,7 @@ _initial_num_blocks_is_0\@:
 
 
 _initial_blocks_encrypted\@:
-        cmp     $0, %r13
+        test    %r13, %r13
         je      _zero_cipher_left\@
 
         sub     $128, %r13
@@ -529,7 +529,7 @@ _multiple_of_16_bytes\@:
         vmovdqu HashKey(arg2), %xmm13
 
         mov PBlockLen(arg2), %r12
-        cmp $0, %r12
+        test %r12, %r12
         je _partial_done\@
 
        #GHASH computation for the last <16 Byte block
@@ -574,7 +574,7 @@ _T_8\@:
         add     $8, %r10
         sub     $8, %r11
         vpsrldq $8, %xmm9, %xmm9
-        cmp     $0, %r11
+        test    %r11, %r11
         je     _return_T_done\@
 _T_4\@:
         vmovd   %xmm9, %eax
@@ -582,7 +582,7 @@ _T_4\@:
         add     $4, %r10
         sub     $4, %r11
         vpsrldq     $4, %xmm9, %xmm9
-        cmp     $0, %r11
+        test    %r11, %r11
         je     _return_T_done\@
 _T_123\@:
         vmovd     %xmm9, %eax
@@ -626,7 +626,7 @@ _get_AAD_blocks\@:
        cmp     $16, %r11
        jge     _get_AAD_blocks\@
        vmovdqu \T8, \T7
-       cmp     $0, %r11
+       test    %r11, %r11
        je      _get_AAD_done\@
 
        vpxor   \T7, \T7, \T7
@@ -645,7 +645,7 @@ _get_AAD_rest8\@:
        vpxor   \T1, \T7, \T7
        jmp     _get_AAD_rest8\@
 _get_AAD_rest4\@:
-       cmp     $0, %r11
+       test    %r11, %r11
        jle      _get_AAD_rest0\@
        mov     (%r10), %eax
        movq    %rax, \T1
@@ -750,7 +750,7 @@ _done_read_partial_block_\@:
 .macro PARTIAL_BLOCK GHASH_MUL CYPH_PLAIN_OUT PLAIN_CYPH_IN PLAIN_CYPH_LEN 
DATA_OFFSET \
         AAD_HASH ENC_DEC
         mov    PBlockLen(arg2), %r13
-        cmp    $0, %r13
+        test   %r13, %r13
         je     _partial_block_done_\@  # Leave Macro if no partial blocks
         # Read in input data without over reading
         cmp    $16, \PLAIN_CYPH_LEN
@@ -802,7 +802,7 @@ _no_extra_mask_1_\@:
         vpshufb        %xmm2, %xmm3, %xmm3
         vpxor  %xmm3, \AAD_HASH, \AAD_HASH
 
-        cmp    $0, %r10
+        test   %r10, %r10
         jl     _partial_incomplete_1_\@
 
         # GHASH computation for the last <16 Byte block
@@ -837,7 +837,7 @@ _no_extra_mask_2_\@:
         vpshufb %xmm2, %xmm9, %xmm9
         vpxor  %xmm9, \AAD_HASH, \AAD_HASH
 
-        cmp    $0, %r10
+        test   %r10, %r10
         jl     _partial_incomplete_2_\@
 
         # GHASH computation for the last <16 Byte block
@@ -857,7 +857,7 @@ _encode_done_\@:
         vpshufb        %xmm2, %xmm9, %xmm9
 .endif
         # output encrypted Bytes
-        cmp    $0, %r10
+        test   %r10, %r10
         jl     _partial_fill_\@
         mov    %r13, %r12
         mov    $16, %r13
diff --git a/arch/x86/crypto/aesni-intel_glue.c 
b/arch/x86/crypto/aesni-intel_glue.c
index 88ad272aa2b4..18cfb76daa23 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -83,10 +83,8 @@ struct gcm_context_data {
 
 asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
                             unsigned int key_len);
-asmlinkage void aesni_enc(struct crypto_aes_ctx *ctx, u8 *out,
-                         const u8 *in);
-asmlinkage void aesni_dec(struct crypto_aes_ctx *ctx, u8 *out,
-                         const u8 *in);
+asmlinkage void aesni_enc(const void *ctx, u8 *out, const u8 *in);
+asmlinkage void aesni_dec(const void *ctx, u8 *out, const u8 *in);
 asmlinkage void aesni_ecb_enc(struct crypto_aes_ctx *ctx, u8 *out,
                              const u8 *in, unsigned int len);
 asmlinkage void aesni_ecb_dec(struct crypto_aes_ctx *ctx, u8 *out,
@@ -99,6 +97,12 @@ asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 
*out,
 #define AVX_GEN2_OPTSIZE 640
 #define AVX_GEN4_OPTSIZE 4096
 
+asmlinkage void aesni_xts_encrypt(const struct crypto_aes_ctx *ctx, u8 *out,
+                                 const u8 *in, unsigned int len, u8 *iv);
+
+asmlinkage void aesni_xts_decrypt(const struct crypto_aes_ctx *ctx, u8 *out,
+                                 const u8 *in, unsigned int len, u8 *iv);
+
 #ifdef CONFIG_X86_64
 
 static void (*aesni_ctr_enc_tfm)(struct crypto_aes_ctx *ctx, u8 *out,
@@ -106,9 +110,6 @@ static void (*aesni_ctr_enc_tfm)(struct crypto_aes_ctx 
*ctx, u8 *out,
 asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out,
                              const u8 *in, unsigned int len, u8 *iv);
 
-asmlinkage void aesni_xts_crypt8(struct crypto_aes_ctx *ctx, u8 *out,
-                                const u8 *in, bool enc, u8 *iv);
-
 /* asmlinkage void aesni_gcm_enc()
  * void *ctx,  AES Key schedule. Starts on a 16 byte boundary.
  * struct gcm_context_data.  May be uninitialized.
@@ -550,29 +551,24 @@ static int xts_aesni_setkey(struct crypto_skcipher *tfm, 
const u8 *key,
 }
 
 
-static void aesni_xts_tweak(void *ctx, u8 *out, const u8 *in)
-{
-       aesni_enc(ctx, out, in);
-}
-
-static void aesni_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
+static void aesni_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
 {
-       glue_xts_crypt_128bit_one(ctx, dst, src, iv, GLUE_FUNC_CAST(aesni_enc));
+       glue_xts_crypt_128bit_one(ctx, dst, src, iv, aesni_enc);
 }
 
-static void aesni_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
+static void aesni_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
 {
-       glue_xts_crypt_128bit_one(ctx, dst, src, iv, GLUE_FUNC_CAST(aesni_dec));
+       glue_xts_crypt_128bit_one(ctx, dst, src, iv, aesni_dec);
 }
 
-static void aesni_xts_enc8(void *ctx, u128 *dst, const u128 *src, le128 *iv)
+static void aesni_xts_enc32(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
 {
-       aesni_xts_crypt8(ctx, (u8 *)dst, (const u8 *)src, true, (u8 *)iv);
+       aesni_xts_encrypt(ctx, dst, src, 32 * AES_BLOCK_SIZE, (u8 *)iv);
 }
 
-static void aesni_xts_dec8(void *ctx, u128 *dst, const u128 *src, le128 *iv)
+static void aesni_xts_dec32(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
 {
-       aesni_xts_crypt8(ctx, (u8 *)dst, (const u8 *)src, false, (u8 *)iv);
+       aesni_xts_decrypt(ctx, dst, src, 32 * AES_BLOCK_SIZE, (u8 *)iv);
 }
 
 static const struct common_glue_ctx aesni_enc_xts = {
@@ -580,11 +576,11 @@ static const struct common_glue_ctx aesni_enc_xts = {
        .fpu_blocks_limit = 1,
 
        .funcs = { {
-               .num_blocks = 8,
-               .fn_u = { .xts = GLUE_XTS_FUNC_CAST(aesni_xts_enc8) }
+               .num_blocks = 32,
+               .fn_u = { .xts = aesni_xts_enc32 }
        }, {
                .num_blocks = 1,
-               .fn_u = { .xts = GLUE_XTS_FUNC_CAST(aesni_xts_enc) }
+               .fn_u = { .xts = aesni_xts_enc }
        } }
 };
 
@@ -593,11 +589,11 @@ static const struct common_glue_ctx aesni_dec_xts = {
        .fpu_blocks_limit = 1,
 
        .funcs = { {
-               .num_blocks = 8,
-               .fn_u = { .xts = GLUE_XTS_FUNC_CAST(aesni_xts_dec8) }
+               .num_blocks = 32,
+               .fn_u = { .xts = aesni_xts_dec32 }
        }, {
                .num_blocks = 1,
-               .fn_u = { .xts = GLUE_XTS_FUNC_CAST(aesni_xts_dec) }
+               .fn_u = { .xts = aesni_xts_dec }
        } }
 };
 
@@ -606,8 +602,7 @@ static int xts_encrypt(struct skcipher_request *req)
        struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
        struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-       return glue_xts_req_128bit(&aesni_enc_xts, req,
-                                  XTS_TWEAK_CAST(aesni_xts_tweak),
+       return glue_xts_req_128bit(&aesni_enc_xts, req, aesni_enc,
                                   aes_ctx(ctx->raw_tweak_ctx),
                                   aes_ctx(ctx->raw_crypt_ctx),
                                   false);
@@ -618,8 +613,7 @@ static int xts_decrypt(struct skcipher_request *req)
        struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
        struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-       return glue_xts_req_128bit(&aesni_dec_xts, req,
-                                  XTS_TWEAK_CAST(aesni_xts_tweak),
+       return glue_xts_req_128bit(&aesni_dec_xts, req, aesni_enc,
                                   aes_ctx(ctx->raw_tweak_ctx),
                                   aes_ctx(ctx->raw_crypt_ctx),
                                   true);
diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c 
b/arch/x86/crypto/camellia_aesni_avx2_glue.c
index a4f00128ea55..a8cc2c83fe1b 100644
--- a/arch/x86/crypto/camellia_aesni_avx2_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c
@@ -19,20 +19,17 @@
 #define CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS 32
 
 /* 32-way AVX2/AES-NI parallel cipher functions */
-asmlinkage void camellia_ecb_enc_32way(struct camellia_ctx *ctx, u8 *dst,
-                                      const u8 *src);
-asmlinkage void camellia_ecb_dec_32way(struct camellia_ctx *ctx, u8 *dst,
-                                      const u8 *src);
+asmlinkage void camellia_ecb_enc_32way(const void *ctx, u8 *dst, const u8 
*src);
+asmlinkage void camellia_ecb_dec_32way(const void *ctx, u8 *dst, const u8 
*src);
 
-asmlinkage void camellia_cbc_dec_32way(struct camellia_ctx *ctx, u8 *dst,
-                                      const u8 *src);
-asmlinkage void camellia_ctr_32way(struct camellia_ctx *ctx, u8 *dst,
-                                  const u8 *src, le128 *iv);
+asmlinkage void camellia_cbc_dec_32way(const void *ctx, u8 *dst, const u8 
*src);
+asmlinkage void camellia_ctr_32way(const void *ctx, u8 *dst, const u8 *src,
+                                  le128 *iv);
 
-asmlinkage void camellia_xts_enc_32way(struct camellia_ctx *ctx, u8 *dst,
-                                      const u8 *src, le128 *iv);
-asmlinkage void camellia_xts_dec_32way(struct camellia_ctx *ctx, u8 *dst,
-                                      const u8 *src, le128 *iv);
+asmlinkage void camellia_xts_enc_32way(const void *ctx, u8 *dst, const u8 *src,
+                                      le128 *iv);
+asmlinkage void camellia_xts_dec_32way(const void *ctx, u8 *dst, const u8 *src,
+                                      le128 *iv);
 
 static const struct common_glue_ctx camellia_enc = {
        .num_funcs = 4,
@@ -40,16 +37,16 @@ static const struct common_glue_ctx camellia_enc = {
 
        .funcs = { {
                .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
-               .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_enc_32way) }
+               .fn_u = { .ecb = camellia_ecb_enc_32way }
        }, {
                .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
-               .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_enc_16way) }
+               .fn_u = { .ecb = camellia_ecb_enc_16way }
        }, {
                .num_blocks = 2,
-               .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk_2way) }
+               .fn_u = { .ecb = camellia_enc_blk_2way }
        }, {
                .num_blocks = 1,
-               .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk) }
+               .fn_u = { .ecb = camellia_enc_blk }
        } }
 };
 
@@ -59,16 +56,16 @@ static const struct common_glue_ctx camellia_ctr = {
 
        .funcs = { {
                .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
-               .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_ctr_32way) }
+               .fn_u = { .ctr = camellia_ctr_32way }
        }, {
                .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
-               .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_ctr_16way) }
+               .fn_u = { .ctr = camellia_ctr_16way }
        }, {
                .num_blocks = 2,
-               .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr_2way) }
+               .fn_u = { .ctr = camellia_crypt_ctr_2way }
        }, {
                .num_blocks = 1,
-               .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr) }
+               .fn_u = { .ctr = camellia_crypt_ctr }
        } }
 };
 
@@ -78,13 +75,13 @@ static const struct common_glue_ctx camellia_enc_xts = {
 
        .funcs = { {
                .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
-               .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc_32way) }
+               .fn_u = { .xts = camellia_xts_enc_32way }
        }, {
                .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
-               .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc_16way) }
+               .fn_u = { .xts = camellia_xts_enc_16way }
        }, {
                .num_blocks = 1,
-               .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc) }
+               .fn_u = { .xts = camellia_xts_enc }
        } }
 };
 
@@ -94,16 +91,16 @@ static const struct common_glue_ctx camellia_dec = {
 
        .funcs = { {
                .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
-               .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_dec_32way) }
+               .fn_u = { .ecb = camellia_ecb_dec_32way }
        }, {
                .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
-               .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_dec_16way) }
+               .fn_u = { .ecb = camellia_ecb_dec_16way }
        }, {
                .num_blocks = 2,
-               .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk_2way) }
+               .fn_u = { .ecb = camellia_dec_blk_2way }
        }, {
                .num_blocks = 1,
-               .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk) }
+               .fn_u = { .ecb = camellia_dec_blk }
        } }
 };
 
@@ -113,16 +110,16 @@ static const struct common_glue_ctx camellia_dec_cbc = {
 
        .funcs = { {
                .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
-               .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_cbc_dec_32way) }
+               .fn_u = { .cbc = camellia_cbc_dec_32way }
        }, {
                .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
-               .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_cbc_dec_16way) }
+               .fn_u = { .cbc = camellia_cbc_dec_16way }
        }, {
                .num_blocks = 2,
-               .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_decrypt_cbc_2way) }
+               .fn_u = { .cbc = camellia_decrypt_cbc_2way }
        }, {
                .num_blocks = 1,
-               .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_dec_blk) }
+               .fn_u = { .cbc = camellia_dec_blk }
        } }
 };
 
@@ -132,13 +129,13 @@ static const struct common_glue_ctx camellia_dec_xts = {
 
        .funcs = { {
                .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
-               .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec_32way) }
+               .fn_u = { .xts = camellia_xts_dec_32way }
        }, {
                .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
-               .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec_16way) }
+               .fn_u = { .xts = camellia_xts_dec_16way }
        }, {
                .num_blocks = 1,
-               .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec) }
+               .fn_u = { .xts = camellia_xts_dec }
        } }
 };
 
@@ -161,8 +158,7 @@ static int ecb_decrypt(struct skcipher_request *req)
 
 static int cbc_encrypt(struct skcipher_request *req)
 {
-       return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(camellia_enc_blk),
-                                          req);
+       return glue_cbc_encrypt_req_128bit(camellia_enc_blk, req);
 }
 
 static int cbc_decrypt(struct skcipher_request *req)
@@ -180,8 +176,7 @@ static int xts_encrypt(struct skcipher_request *req)
        struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
        struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-       return glue_xts_req_128bit(&camellia_enc_xts, req,
-                                  XTS_TWEAK_CAST(camellia_enc_blk),
+       return glue_xts_req_128bit(&camellia_enc_xts, req, camellia_enc_blk,
                                   &ctx->tweak_ctx, &ctx->crypt_ctx, false);
 }
 
@@ -190,8 +185,7 @@ static int xts_decrypt(struct skcipher_request *req)
        struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
        struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-       return glue_xts_req_128bit(&camellia_dec_xts, req,
-                                  XTS_TWEAK_CAST(camellia_enc_blk),
+       return glue_xts_req_128bit(&camellia_dec_xts, req, camellia_enc_blk,
                                   &ctx->tweak_ctx, &ctx->crypt_ctx, true);
 }
 
diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c 
b/arch/x86/crypto/camellia_aesni_avx_glue.c
index f28d282779b8..31a82a79f4ac 100644
--- a/arch/x86/crypto/camellia_aesni_avx_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx_glue.c
@@ -18,41 +18,36 @@
 #define CAMELLIA_AESNI_PARALLEL_BLOCKS 16
 
 /* 16-way parallel cipher functions (avx/aes-ni) */
-asmlinkage void camellia_ecb_enc_16way(struct camellia_ctx *ctx, u8 *dst,
-                                      const u8 *src);
+asmlinkage void camellia_ecb_enc_16way(const void *ctx, u8 *dst, const u8 
*src);
 EXPORT_SYMBOL_GPL(camellia_ecb_enc_16way);
 
-asmlinkage void camellia_ecb_dec_16way(struct camellia_ctx *ctx, u8 *dst,
-                                      const u8 *src);
+asmlinkage void camellia_ecb_dec_16way(const void *ctx, u8 *dst, const u8 
*src);
 EXPORT_SYMBOL_GPL(camellia_ecb_dec_16way);
 
-asmlinkage void camellia_cbc_dec_16way(struct camellia_ctx *ctx, u8 *dst,
-                                      const u8 *src);
+asmlinkage void camellia_cbc_dec_16way(const void *ctx, u8 *dst, const u8 
*src);
 EXPORT_SYMBOL_GPL(camellia_cbc_dec_16way);
 
-asmlinkage void camellia_ctr_16way(struct camellia_ctx *ctx, u8 *dst,
-                                  const u8 *src, le128 *iv);
+asmlinkage void camellia_ctr_16way(const void *ctx, u8 *dst, const u8 *src,
+                                  le128 *iv);
 EXPORT_SYMBOL_GPL(camellia_ctr_16way);
 
-asmlinkage void camellia_xts_enc_16way(struct camellia_ctx *ctx, u8 *dst,
-                                      const u8 *src, le128 *iv);
+asmlinkage void camellia_xts_enc_16way(const void *ctx, u8 *dst, const u8 *src,
+                                      le128 *iv);
 EXPORT_SYMBOL_GPL(camellia_xts_enc_16way);
 
-asmlinkage void camellia_xts_dec_16way(struct camellia_ctx *ctx, u8 *dst,
-                                      const u8 *src, le128 *iv);
+asmlinkage void camellia_xts_dec_16way(const void *ctx, u8 *dst, const u8 *src,
+                                      le128 *iv);
 EXPORT_SYMBOL_GPL(camellia_xts_dec_16way);
 
-void camellia_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
+void camellia_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
 {
-       glue_xts_crypt_128bit_one(ctx, dst, src, iv,
-                                 GLUE_FUNC_CAST(camellia_enc_blk));
+       glue_xts_crypt_128bit_one(ctx, dst, src, iv, camellia_enc_blk);
 }
 EXPORT_SYMBOL_GPL(camellia_xts_enc);
 
-void camellia_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
+void camellia_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
 {
-       glue_xts_crypt_128bit_one(ctx, dst, src, iv,
-                                 GLUE_FUNC_CAST(camellia_dec_blk));
+       glue_xts_crypt_128bit_one(ctx, dst, src, iv, camellia_dec_blk);
 }
 EXPORT_SYMBOL_GPL(camellia_xts_dec);
 
@@ -62,13 +57,13 @@ static const struct common_glue_ctx camellia_enc = {
 
        .funcs = { {
                .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
-               .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_enc_16way) }
+               .fn_u = { .ecb = camellia_ecb_enc_16way }
        }, {
                .num_blocks = 2,
-               .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk_2way) }
+               .fn_u = { .ecb = camellia_enc_blk_2way }
        }, {
                .num_blocks = 1,
-               .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk) }
+               .fn_u = { .ecb = camellia_enc_blk }
        } }
 };
 
@@ -78,13 +73,13 @@ static const struct common_glue_ctx camellia_ctr = {
 
        .funcs = { {
                .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
-               .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_ctr_16way) }
+               .fn_u = { .ctr = camellia_ctr_16way }
        }, {
                .num_blocks = 2,
-               .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr_2way) }
+               .fn_u = { .ctr = camellia_crypt_ctr_2way }
        }, {
                .num_blocks = 1,
-               .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr) }
+               .fn_u = { .ctr = camellia_crypt_ctr }
        } }
 };
 
@@ -94,10 +89,10 @@ static const struct common_glue_ctx camellia_enc_xts = {
 
        .funcs = { {
                .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
-               .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc_16way) }
+               .fn_u = { .xts = camellia_xts_enc_16way }
        }, {
                .num_blocks = 1,
-               .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc) }
+               .fn_u = { .xts = camellia_xts_enc }
        } }
 };
 
@@ -107,13 +102,13 @@ static const struct common_glue_ctx camellia_dec = {
 
        .funcs = { {
                .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
-               .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_dec_16way) }
+               .fn_u = { .ecb = camellia_ecb_dec_16way }
        }, {
                .num_blocks = 2,
-               .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk_2way) }
+               .fn_u = { .ecb = camellia_dec_blk_2way }
        }, {
                .num_blocks = 1,
-               .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk) }
+               .fn_u = { .ecb = camellia_dec_blk }
        } }
 };
 
@@ -123,13 +118,13 @@ static const struct common_glue_ctx camellia_dec_cbc = {
 
        .funcs = { {
                .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
-               .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_cbc_dec_16way) }
+               .fn_u = { .cbc = camellia_cbc_dec_16way }
        }, {
                .num_blocks = 2,
-               .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_decrypt_cbc_2way) }
+               .fn_u = { .cbc = camellia_decrypt_cbc_2way }
        }, {
                .num_blocks = 1,
-               .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_dec_blk) }
+               .fn_u = { .cbc = camellia_dec_blk }
        } }
 };
 
@@ -139,10 +134,10 @@ static const struct common_glue_ctx camellia_dec_xts = {
 
        .funcs = { {
                .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
-               .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec_16way) }
+               .fn_u = { .xts = camellia_xts_dec_16way }
        }, {
                .num_blocks = 1,
-               .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec) }
+               .fn_u = { .xts = camellia_xts_dec }
        } }
 };
 
@@ -165,8 +160,7 @@ static int ecb_decrypt(struct skcipher_request *req)
 
 static int cbc_encrypt(struct skcipher_request *req)
 {
-       return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(camellia_enc_blk),
-                                          req);
+       return glue_cbc_encrypt_req_128bit(camellia_enc_blk, req);
 }
 
 static int cbc_decrypt(struct skcipher_request *req)
@@ -206,8 +200,7 @@ static int xts_encrypt(struct skcipher_request *req)
        struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
        struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-       return glue_xts_req_128bit(&camellia_enc_xts, req,
-                                  XTS_TWEAK_CAST(camellia_enc_blk),
+       return glue_xts_req_128bit(&camellia_enc_xts, req, camellia_enc_blk,
                                   &ctx->tweak_ctx, &ctx->crypt_ctx, false);
 }
 
@@ -216,8 +209,7 @@ static int xts_decrypt(struct skcipher_request *req)
        struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
        struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-       return glue_xts_req_128bit(&camellia_dec_xts, req,
-                                  XTS_TWEAK_CAST(camellia_enc_blk),
+       return glue_xts_req_128bit(&camellia_dec_xts, req, camellia_enc_blk,
                                   &ctx->tweak_ctx, &ctx->crypt_ctx, true);
 }
 
diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c
index 7c62db56ffe1..5f3ed5af68d7 100644
--- a/arch/x86/crypto/camellia_glue.c
+++ b/arch/x86/crypto/camellia_glue.c
@@ -18,19 +18,17 @@
 #include <asm/crypto/glue_helper.h>
 
 /* regular block cipher functions */
-asmlinkage void __camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst,
-                                  const u8 *src, bool xor);
+asmlinkage void __camellia_enc_blk(const void *ctx, u8 *dst, const u8 *src,
+                                  bool xor);
 EXPORT_SYMBOL_GPL(__camellia_enc_blk);
-asmlinkage void camellia_dec_blk(struct camellia_ctx *ctx, u8 *dst,
-                                const u8 *src);
+asmlinkage void camellia_dec_blk(const void *ctx, u8 *dst, const u8 *src);
 EXPORT_SYMBOL_GPL(camellia_dec_blk);
 
 /* 2-way parallel cipher functions */
-asmlinkage void __camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst,
-                                       const u8 *src, bool xor);
+asmlinkage void __camellia_enc_blk_2way(const void *ctx, u8 *dst, const u8 
*src,
+                                       bool xor);
 EXPORT_SYMBOL_GPL(__camellia_enc_blk_2way);
-asmlinkage void camellia_dec_blk_2way(struct camellia_ctx *ctx, u8 *dst,
-                                     const u8 *src);
+asmlinkage void camellia_dec_blk_2way(const void *ctx, u8 *dst, const u8 *src);
 EXPORT_SYMBOL_GPL(camellia_dec_blk_2way);
 
 static void camellia_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
@@ -1267,8 +1265,10 @@ static int camellia_setkey_skcipher(struct 
crypto_skcipher *tfm, const u8 *key,
        return camellia_setkey(&tfm->base, key, key_len);
 }
 
-void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src)
+void camellia_decrypt_cbc_2way(const void *ctx, u8 *d, const u8 *s)
 {
+       u128 *dst = (u128 *)d;
+       const u128 *src = (const u128 *)s;
        u128 iv = *src;
 
        camellia_dec_blk_2way(ctx, (u8 *)dst, (u8 *)src);
@@ -1277,9 +1277,11 @@ void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, 
const u128 *src)
 }
 EXPORT_SYMBOL_GPL(camellia_decrypt_cbc_2way);
 
-void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
+void camellia_crypt_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv)
 {
        be128 ctrblk;
+       u128 *dst = (u128 *)d;
+       const u128 *src = (const u128 *)s;
 
        if (dst != src)
                *dst = *src;
@@ -1291,9 +1293,11 @@ void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 
*src, le128 *iv)
 }
 EXPORT_SYMBOL_GPL(camellia_crypt_ctr);
 
-void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src, le128 *iv)
+void camellia_crypt_ctr_2way(const void *ctx, u8 *d, const u8 *s, le128 *iv)
 {
        be128 ctrblks[2];
+       u128 *dst = (u128 *)d;
+       const u128 *src = (const u128 *)s;
 
        if (dst != src) {
                dst[0] = src[0];
@@ -1315,10 +1319,10 @@ static const struct common_glue_ctx camellia_enc = {
 
        .funcs = { {
                .num_blocks = 2,
-               .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk_2way) }
+               .fn_u = { .ecb = camellia_enc_blk_2way }
        }, {
                .num_blocks = 1,
-               .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk) }
+               .fn_u = { .ecb = camellia_enc_blk }
        } }
 };
 
@@ -1328,10 +1332,10 @@ static const struct common_glue_ctx camellia_ctr = {
 
        .funcs = { {
                .num_blocks = 2,
-               .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr_2way) }
+               .fn_u = { .ctr = camellia_crypt_ctr_2way }
        }, {
                .num_blocks = 1,
-               .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr) }
+               .fn_u = { .ctr = camellia_crypt_ctr }
        } }
 };
 
@@ -1341,10 +1345,10 @@ static const struct common_glue_ctx camellia_dec = {
 
        .funcs = { {
                .num_blocks = 2,
-               .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk_2way) }
+               .fn_u = { .ecb = camellia_dec_blk_2way }
        }, {
                .num_blocks = 1,
-               .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk) }
+               .fn_u = { .ecb = camellia_dec_blk }
        } }
 };
 
@@ -1354,10 +1358,10 @@ static const struct common_glue_ctx camellia_dec_cbc = {
 
        .funcs = { {
                .num_blocks = 2,
-               .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_decrypt_cbc_2way) }
+               .fn_u = { .cbc = camellia_decrypt_cbc_2way }
        }, {
                .num_blocks = 1,
-               .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_dec_blk) }
+               .fn_u = { .cbc = camellia_dec_blk }
        } }
 };
 
@@ -1373,8 +1377,7 @@ static int ecb_decrypt(struct skcipher_request *req)
 
 static int cbc_encrypt(struct skcipher_request *req)
 {
-       return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(camellia_enc_blk),
-                                          req);
+       return glue_cbc_encrypt_req_128bit(camellia_enc_blk, req);
 }
 
 static int cbc_decrypt(struct skcipher_request *req)
diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c
index a8a38fffb4a9..da5297475f9e 100644
--- a/arch/x86/crypto/cast6_avx_glue.c
+++ b/arch/x86/crypto/cast6_avx_glue.c
@@ -20,20 +20,17 @@
 
 #define CAST6_PARALLEL_BLOCKS 8
 
-asmlinkage void cast6_ecb_enc_8way(struct cast6_ctx *ctx, u8 *dst,
-                                  const u8 *src);
-asmlinkage void cast6_ecb_dec_8way(struct cast6_ctx *ctx, u8 *dst,
-                                  const u8 *src);
-
-asmlinkage void cast6_cbc_dec_8way(struct cast6_ctx *ctx, u8 *dst,
-                                  const u8 *src);
-asmlinkage void cast6_ctr_8way(struct cast6_ctx *ctx, u8 *dst, const u8 *src,
+asmlinkage void cast6_ecb_enc_8way(const void *ctx, u8 *dst, const u8 *src);
+asmlinkage void cast6_ecb_dec_8way(const void *ctx, u8 *dst, const u8 *src);
+
+asmlinkage void cast6_cbc_dec_8way(const void *ctx, u8 *dst, const u8 *src);
+asmlinkage void cast6_ctr_8way(const void *ctx, u8 *dst, const u8 *src,
                               le128 *iv);
 
-asmlinkage void cast6_xts_enc_8way(struct cast6_ctx *ctx, u8 *dst,
-                                  const u8 *src, le128 *iv);
-asmlinkage void cast6_xts_dec_8way(struct cast6_ctx *ctx, u8 *dst,
-                                  const u8 *src, le128 *iv);
+asmlinkage void cast6_xts_enc_8way(const void *ctx, u8 *dst, const u8 *src,
+                                  le128 *iv);
+asmlinkage void cast6_xts_dec_8way(const void *ctx, u8 *dst, const u8 *src,
+                                  le128 *iv);
 
 static int cast6_setkey_skcipher(struct crypto_skcipher *tfm,
                                 const u8 *key, unsigned int keylen)
@@ -41,21 +38,21 @@ static int cast6_setkey_skcipher(struct crypto_skcipher 
*tfm,
        return cast6_setkey(&tfm->base, key, keylen);
 }
 
-static void cast6_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
+static void cast6_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
 {
-       glue_xts_crypt_128bit_one(ctx, dst, src, iv,
-                                 GLUE_FUNC_CAST(__cast6_encrypt));
+       glue_xts_crypt_128bit_one(ctx, dst, src, iv, __cast6_encrypt);
 }
 
-static void cast6_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
+static void cast6_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
 {
-       glue_xts_crypt_128bit_one(ctx, dst, src, iv,
-                                 GLUE_FUNC_CAST(__cast6_decrypt));
+       glue_xts_crypt_128bit_one(ctx, dst, src, iv, __cast6_decrypt);
 }
 
-static void cast6_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
+static void cast6_crypt_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv)
 {
        be128 ctrblk;
+       u128 *dst = (u128 *)d;
+       const u128 *src = (const u128 *)s;
 
        le128_to_be128(&ctrblk, iv);
        le128_inc(iv);
@@ -70,10 +67,10 @@ static const struct common_glue_ctx cast6_enc = {
 
        .funcs = { {
                .num_blocks = CAST6_PARALLEL_BLOCKS,
-               .fn_u = { .ecb = GLUE_FUNC_CAST(cast6_ecb_enc_8way) }
+               .fn_u = { .ecb = cast6_ecb_enc_8way }
        }, {
                .num_blocks = 1,
-               .fn_u = { .ecb = GLUE_FUNC_CAST(__cast6_encrypt) }
+               .fn_u = { .ecb = __cast6_encrypt }
        } }
 };
 
@@ -83,10 +80,10 @@ static const struct common_glue_ctx cast6_ctr = {
 
        .funcs = { {
                .num_blocks = CAST6_PARALLEL_BLOCKS,
-               .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(cast6_ctr_8way) }
+               .fn_u = { .ctr = cast6_ctr_8way }
        }, {
                .num_blocks = 1,
-               .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(cast6_crypt_ctr) }
+               .fn_u = { .ctr = cast6_crypt_ctr }
        } }
 };
 
@@ -96,10 +93,10 @@ static const struct common_glue_ctx cast6_enc_xts = {
 
        .funcs = { {
                .num_blocks = CAST6_PARALLEL_BLOCKS,
-               .fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_enc_8way) }
+               .fn_u = { .xts = cast6_xts_enc_8way }
        }, {
                .num_blocks = 1,
-               .fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_enc) }
+               .fn_u = { .xts = cast6_xts_enc }
        } }
 };
 
@@ -109,10 +106,10 @@ static const struct common_glue_ctx cast6_dec = {
 
        .funcs = { {
                .num_blocks = CAST6_PARALLEL_BLOCKS,
-               .fn_u = { .ecb = GLUE_FUNC_CAST(cast6_ecb_dec_8way) }
+               .fn_u = { .ecb = cast6_ecb_dec_8way }
        }, {
                .num_blocks = 1,
-               .fn_u = { .ecb = GLUE_FUNC_CAST(__cast6_decrypt) }
+               .fn_u = { .ecb = __cast6_decrypt }
        } }
 };
 
@@ -122,10 +119,10 @@ static const struct common_glue_ctx cast6_dec_cbc = {
 
        .funcs = { {
                .num_blocks = CAST6_PARALLEL_BLOCKS,
-               .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(cast6_cbc_dec_8way) }
+               .fn_u = { .cbc = cast6_cbc_dec_8way }
        }, {
                .num_blocks = 1,
-               .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__cast6_decrypt) }
+               .fn_u = { .cbc = __cast6_decrypt }
        } }
 };
 
@@ -135,10 +132,10 @@ static const struct common_glue_ctx cast6_dec_xts = {
 
        .funcs = { {
                .num_blocks = CAST6_PARALLEL_BLOCKS,
-               .fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_dec_8way) }
+               .fn_u = { .xts = cast6_xts_dec_8way }
        }, {
                .num_blocks = 1,
-               .fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_dec) }
+               .fn_u = { .xts = cast6_xts_dec }
        } }
 };
 
@@ -154,8 +151,7 @@ static int ecb_decrypt(struct skcipher_request *req)
 
 static int cbc_encrypt(struct skcipher_request *req)
 {
-       return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__cast6_encrypt),
-                                          req);
+       return glue_cbc_encrypt_req_128bit(__cast6_encrypt, req);
 }
 
 static int cbc_decrypt(struct skcipher_request *req)
@@ -199,8 +195,7 @@ static int xts_encrypt(struct skcipher_request *req)
        struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
        struct cast6_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-       return glue_xts_req_128bit(&cast6_enc_xts, req,
-                                  XTS_TWEAK_CAST(__cast6_encrypt),
+       return glue_xts_req_128bit(&cast6_enc_xts, req, __cast6_encrypt,
                                   &ctx->tweak_ctx, &ctx->crypt_ctx, false);
 }
 
@@ -209,8 +204,7 @@ static int xts_decrypt(struct skcipher_request *req)
        struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
        struct cast6_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-       return glue_xts_req_128bit(&cast6_dec_xts, req,
-                                  XTS_TWEAK_CAST(__cast6_encrypt),
+       return glue_xts_req_128bit(&cast6_dec_xts, req, __cast6_encrypt,
                                   &ctx->tweak_ctx, &ctx->crypt_ctx, true);
 }
 
diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c
index d15b99397480..d3d91a0abf88 100644
--- a/arch/x86/crypto/glue_helper.c
+++ b/arch/x86/crypto/glue_helper.c
@@ -134,7 +134,8 @@ int glue_cbc_decrypt_req_128bit(const struct 
common_glue_ctx *gctx,
                                src -= num_blocks - 1;
                                dst -= num_blocks - 1;
 
-                               gctx->funcs[i].fn_u.cbc(ctx, dst, src);
+                               gctx->funcs[i].fn_u.cbc(ctx, (u8 *)dst,
+                                                       (const u8 *)src);
 
                                nbytes -= func_bytes;
                                if (nbytes < bsize)
@@ -188,7 +189,9 @@ int glue_ctr_req_128bit(const struct common_glue_ctx *gctx,
 
                        /* Process multi-block batch */
                        do {
-                               gctx->funcs[i].fn_u.ctr(ctx, dst, src, &ctrblk);
+                               gctx->funcs[i].fn_u.ctr(ctx, (u8 *)dst,
+                                                       (const u8 *)src,
+                                                       &ctrblk);
                                src += num_blocks;
                                dst += num_blocks;
                                nbytes -= func_bytes;
@@ -210,7 +213,8 @@ int glue_ctr_req_128bit(const struct common_glue_ctx *gctx,
 
                be128_to_le128(&ctrblk, (be128 *)walk.iv);
                memcpy(&tmp, walk.src.virt.addr, nbytes);
-               gctx->funcs[gctx->num_funcs - 1].fn_u.ctr(ctx, &tmp, &tmp,
+               gctx->funcs[gctx->num_funcs - 1].fn_u.ctr(ctx, (u8 *)&tmp,
+                                                         (const u8 *)&tmp,
                                                          &ctrblk);
                memcpy(walk.dst.virt.addr, &tmp, nbytes);
                le128_to_be128((be128 *)walk.iv, &ctrblk);
@@ -240,7 +244,8 @@ static unsigned int __glue_xts_req_128bit(const struct 
common_glue_ctx *gctx,
 
                if (nbytes >= func_bytes) {
                        do {
-                               gctx->funcs[i].fn_u.xts(ctx, dst, src,
+                               gctx->funcs[i].fn_u.xts(ctx, (u8 *)dst,
+                                                       (const u8 *)src,
                                                        walk->iv);
 
                                src += num_blocks;
@@ -354,8 +359,8 @@ int glue_xts_req_128bit(const struct common_glue_ctx *gctx,
 }
 EXPORT_SYMBOL_GPL(glue_xts_req_128bit);
 
-void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src, le128 
*iv,
-                              common_glue_func_t fn)
+void glue_xts_crypt_128bit_one(const void *ctx, u8 *dst, const u8 *src,
+                              le128 *iv, common_glue_func_t fn)
 {
        le128 ivblk = *iv;
 
@@ -363,13 +368,13 @@ void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, 
const u128 *src, le128 *iv,
        gf128mul_x_ble(iv, &ivblk);
 
        /* CC <- T xor C */
-       u128_xor(dst, src, (u128 *)&ivblk);
+       u128_xor((u128 *)dst, (const u128 *)src, (u128 *)&ivblk);
 
        /* PP <- D(Key2,CC) */
-       fn(ctx, (u8 *)dst, (u8 *)dst);
+       fn(ctx, dst, dst);
 
        /* P <- T xor PP */
-       u128_xor(dst, dst, (u128 *)&ivblk);
+       u128_xor((u128 *)dst, (u128 *)dst, (u128 *)&ivblk);
 }
 EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit_one);
 
diff --git a/arch/x86/crypto/serpent_avx2_glue.c 
b/arch/x86/crypto/serpent_avx2_glue.c
index 13fd8d3d2da0..f973ace44ad3 100644
--- a/arch/x86/crypto/serpent_avx2_glue.c
+++ b/arch/x86/crypto/serpent_avx2_glue.c
@@ -19,18 +19,16 @@
 #define SERPENT_AVX2_PARALLEL_BLOCKS 16
 
 /* 16-way AVX2 parallel cipher functions */
-asmlinkage void serpent_ecb_enc_16way(struct serpent_ctx *ctx, u8 *dst,
-                                     const u8 *src);
-asmlinkage void serpent_ecb_dec_16way(struct serpent_ctx *ctx, u8 *dst,
-                                     const u8 *src);
-asmlinkage void serpent_cbc_dec_16way(void *ctx, u128 *dst, const u128 *src);
+asmlinkage void serpent_ecb_enc_16way(const void *ctx, u8 *dst, const u8 *src);
+asmlinkage void serpent_ecb_dec_16way(const void *ctx, u8 *dst, const u8 *src);
+asmlinkage void serpent_cbc_dec_16way(const void *ctx, u8 *dst, const u8 *src);
 
-asmlinkage void serpent_ctr_16way(void *ctx, u128 *dst, const u128 *src,
+asmlinkage void serpent_ctr_16way(const void *ctx, u8 *dst, const u8 *src,
                                  le128 *iv);
-asmlinkage void serpent_xts_enc_16way(struct serpent_ctx *ctx, u8 *dst,
-                                     const u8 *src, le128 *iv);
-asmlinkage void serpent_xts_dec_16way(struct serpent_ctx *ctx, u8 *dst,
-                                     const u8 *src, le128 *iv);
+asmlinkage void serpent_xts_enc_16way(const void *ctx, u8 *dst, const u8 *src,
+                                     le128 *iv);
+asmlinkage void serpent_xts_dec_16way(const void *ctx, u8 *dst, const u8 *src,
+                                     le128 *iv);
 
 static int serpent_setkey_skcipher(struct crypto_skcipher *tfm,
                                   const u8 *key, unsigned int keylen)
@@ -44,13 +42,13 @@ static const struct common_glue_ctx serpent_enc = {
 
        .funcs = { {
                .num_blocks = 16,
-               .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_enc_16way) }
+               .fn_u = { .ecb = serpent_ecb_enc_16way }
        }, {
                .num_blocks = 8,
-               .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_enc_8way_avx) }
+               .fn_u = { .ecb = serpent_ecb_enc_8way_avx }
        }, {
                .num_blocks = 1,
-               .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) }
+               .fn_u = { .ecb = __serpent_encrypt }
        } }
 };
 
@@ -60,13 +58,13 @@ static const struct common_glue_ctx serpent_ctr = {
 
        .funcs = { {
                .num_blocks = 16,
-               .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_ctr_16way) }
+               .fn_u = { .ctr = serpent_ctr_16way }
        },  {
                .num_blocks = 8,
-               .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_ctr_8way_avx) }
+               .fn_u = { .ctr = serpent_ctr_8way_avx }
        }, {
                .num_blocks = 1,
-               .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(__serpent_crypt_ctr) }
+               .fn_u = { .ctr = __serpent_crypt_ctr }
        } }
 };
 
@@ -76,13 +74,13 @@ static const struct common_glue_ctx serpent_enc_xts = {
 
        .funcs = { {
                .num_blocks = 16,
-               .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc_16way) }
+               .fn_u = { .xts = serpent_xts_enc_16way }
        }, {
                .num_blocks = 8,
-               .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc_8way_avx) }
+               .fn_u = { .xts = serpent_xts_enc_8way_avx }
        }, {
                .num_blocks = 1,
-               .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc) }
+               .fn_u = { .xts = serpent_xts_enc }
        } }
 };
 
@@ -92,13 +90,13 @@ static const struct common_glue_ctx serpent_dec = {
 
        .funcs = { {
                .num_blocks = 16,
-               .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_dec_16way) }
+               .fn_u = { .ecb = serpent_ecb_dec_16way }
        }, {
                .num_blocks = 8,
-               .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_dec_8way_avx) }
+               .fn_u = { .ecb = serpent_ecb_dec_8way_avx }
        }, {
                .num_blocks = 1,
-               .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) }
+               .fn_u = { .ecb = __serpent_decrypt }
        } }
 };
 
@@ -108,13 +106,13 @@ static const struct common_glue_ctx serpent_dec_cbc = {
 
        .funcs = { {
                .num_blocks = 16,
-               .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_cbc_dec_16way) }
+               .fn_u = { .cbc = serpent_cbc_dec_16way }
        }, {
                .num_blocks = 8,
-               .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_cbc_dec_8way_avx) }
+               .fn_u = { .cbc = serpent_cbc_dec_8way_avx }
        }, {
                .num_blocks = 1,
-               .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) }
+               .fn_u = { .cbc = __serpent_decrypt }
        } }
 };
 
@@ -124,13 +122,13 @@ static const struct common_glue_ctx serpent_dec_xts = {
 
        .funcs = { {
                .num_blocks = 16,
-               .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec_16way) }
+               .fn_u = { .xts = serpent_xts_dec_16way }
        }, {
                .num_blocks = 8,
-               .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec_8way_avx) }
+               .fn_u = { .xts = serpent_xts_dec_8way_avx }
        }, {
                .num_blocks = 1,
-               .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec) }
+               .fn_u = { .xts = serpent_xts_dec }
        } }
 };
 
@@ -146,8 +144,7 @@ static int ecb_decrypt(struct skcipher_request *req)
 
 static int cbc_encrypt(struct skcipher_request *req)
 {
-       return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__serpent_encrypt),
-                                          req);
+       return glue_cbc_encrypt_req_128bit(__serpent_encrypt, req);
 }
 
 static int cbc_decrypt(struct skcipher_request *req)
@@ -166,8 +163,8 @@ static int xts_encrypt(struct skcipher_request *req)
        struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
        return glue_xts_req_128bit(&serpent_enc_xts, req,
-                                  XTS_TWEAK_CAST(__serpent_encrypt),
-                                  &ctx->tweak_ctx, &ctx->crypt_ctx, false);
+                                  __serpent_encrypt, &ctx->tweak_ctx,
+                                  &ctx->crypt_ctx, false);
 }
 
 static int xts_decrypt(struct skcipher_request *req)
@@ -176,8 +173,8 @@ static int xts_decrypt(struct skcipher_request *req)
        struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
        return glue_xts_req_128bit(&serpent_dec_xts, req,
-                                  XTS_TWEAK_CAST(__serpent_encrypt),
-                                  &ctx->tweak_ctx, &ctx->crypt_ctx, true);
+                                  __serpent_encrypt, &ctx->tweak_ctx,
+                                  &ctx->crypt_ctx, true);
 }
 
 static struct skcipher_alg serpent_algs[] = {
diff --git a/arch/x86/crypto/serpent_avx_glue.c 
b/arch/x86/crypto/serpent_avx_glue.c
index 7d3dca38a5a2..7806d1cbe854 100644
--- a/arch/x86/crypto/serpent_avx_glue.c
+++ b/arch/x86/crypto/serpent_avx_glue.c
@@ -20,33 +20,35 @@
 #include <asm/crypto/serpent-avx.h>
 
 /* 8-way parallel cipher functions */
-asmlinkage void serpent_ecb_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst,
+asmlinkage void serpent_ecb_enc_8way_avx(const void *ctx, u8 *dst,
                                         const u8 *src);
 EXPORT_SYMBOL_GPL(serpent_ecb_enc_8way_avx);
 
-asmlinkage void serpent_ecb_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst,
+asmlinkage void serpent_ecb_dec_8way_avx(const void *ctx, u8 *dst,
                                         const u8 *src);
 EXPORT_SYMBOL_GPL(serpent_ecb_dec_8way_avx);
 
-asmlinkage void serpent_cbc_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst,
+asmlinkage void serpent_cbc_dec_8way_avx(const void *ctx, u8 *dst,
                                         const u8 *src);
 EXPORT_SYMBOL_GPL(serpent_cbc_dec_8way_avx);
 
-asmlinkage void serpent_ctr_8way_avx(struct serpent_ctx *ctx, u8 *dst,
-                                    const u8 *src, le128 *iv);
+asmlinkage void serpent_ctr_8way_avx(const void *ctx, u8 *dst, const u8 *src,
+                                    le128 *iv);
 EXPORT_SYMBOL_GPL(serpent_ctr_8way_avx);
 
-asmlinkage void serpent_xts_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst,
+asmlinkage void serpent_xts_enc_8way_avx(const void *ctx, u8 *dst,
                                         const u8 *src, le128 *iv);
 EXPORT_SYMBOL_GPL(serpent_xts_enc_8way_avx);
 
-asmlinkage void serpent_xts_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst,
+asmlinkage void serpent_xts_dec_8way_avx(const void *ctx, u8 *dst,
                                         const u8 *src, le128 *iv);
 EXPORT_SYMBOL_GPL(serpent_xts_dec_8way_avx);
 
-void __serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
+void __serpent_crypt_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv)
 {
        be128 ctrblk;
+       u128 *dst = (u128 *)d;
+       const u128 *src = (const u128 *)s;
 
        le128_to_be128(&ctrblk, iv);
        le128_inc(iv);
@@ -56,17 +58,15 @@ void __serpent_crypt_ctr(void *ctx, u128 *dst, const u128 
*src, le128 *iv)
 }
 EXPORT_SYMBOL_GPL(__serpent_crypt_ctr);
 
-void serpent_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
+void serpent_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
 {
-       glue_xts_crypt_128bit_one(ctx, dst, src, iv,
-                                 GLUE_FUNC_CAST(__serpent_encrypt));
+       glue_xts_crypt_128bit_one(ctx, dst, src, iv, __serpent_encrypt);
 }
 EXPORT_SYMBOL_GPL(serpent_xts_enc);
 
-void serpent_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
+void serpent_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
 {
-       glue_xts_crypt_128bit_one(ctx, dst, src, iv,
-                                 GLUE_FUNC_CAST(__serpent_decrypt));
+       glue_xts_crypt_128bit_one(ctx, dst, src, iv, __serpent_decrypt);
 }
 EXPORT_SYMBOL_GPL(serpent_xts_dec);
 
@@ -102,10 +102,10 @@ static const struct common_glue_ctx serpent_enc = {
 
        .funcs = { {
                .num_blocks = SERPENT_PARALLEL_BLOCKS,
-               .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_enc_8way_avx) }
+               .fn_u = { .ecb = serpent_ecb_enc_8way_avx }
        }, {
                .num_blocks = 1,
-               .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) }
+               .fn_u = { .ecb = __serpent_encrypt }
        } }
 };
 
@@ -115,10 +115,10 @@ static const struct common_glue_ctx serpent_ctr = {
 
        .funcs = { {
                .num_blocks = SERPENT_PARALLEL_BLOCKS,
-               .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_ctr_8way_avx) }
+               .fn_u = { .ctr = serpent_ctr_8way_avx }
        }, {
                .num_blocks = 1,
-               .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(__serpent_crypt_ctr) }
+               .fn_u = { .ctr = __serpent_crypt_ctr }
        } }
 };
 
@@ -128,10 +128,10 @@ static const struct common_glue_ctx serpent_enc_xts = {
 
        .funcs = { {
                .num_blocks = SERPENT_PARALLEL_BLOCKS,
-               .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc_8way_avx) }
+               .fn_u = { .xts = serpent_xts_enc_8way_avx }
        }, {
                .num_blocks = 1,
-               .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc) }
+               .fn_u = { .xts = serpent_xts_enc }
        } }
 };
 
@@ -141,10 +141,10 @@ static const struct common_glue_ctx serpent_dec = {
 
        .funcs = { {
                .num_blocks = SERPENT_PARALLEL_BLOCKS,
-               .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_dec_8way_avx) }
+               .fn_u = { .ecb = serpent_ecb_dec_8way_avx }
        }, {
                .num_blocks = 1,
-               .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) }
+               .fn_u = { .ecb = __serpent_decrypt }
        } }
 };
 
@@ -154,10 +154,10 @@ static const struct common_glue_ctx serpent_dec_cbc = {
 
        .funcs = { {
                .num_blocks = SERPENT_PARALLEL_BLOCKS,
-               .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_cbc_dec_8way_avx) }
+               .fn_u = { .cbc = serpent_cbc_dec_8way_avx }
        }, {
                .num_blocks = 1,
-               .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) }
+               .fn_u = { .cbc = __serpent_decrypt }
        } }
 };
 
@@ -167,10 +167,10 @@ static const struct common_glue_ctx serpent_dec_xts = {
 
        .funcs = { {
                .num_blocks = SERPENT_PARALLEL_BLOCKS,
-               .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec_8way_avx) }
+               .fn_u = { .xts = serpent_xts_dec_8way_avx }
        }, {
                .num_blocks = 1,
-               .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec) }
+               .fn_u = { .xts = serpent_xts_dec }
        } }
 };
 
@@ -186,8 +186,7 @@ static int ecb_decrypt(struct skcipher_request *req)
 
 static int cbc_encrypt(struct skcipher_request *req)
 {
-       return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__serpent_encrypt),
-                                          req);
+       return glue_cbc_encrypt_req_128bit(__serpent_encrypt, req);
 }
 
 static int cbc_decrypt(struct skcipher_request *req)
@@ -206,8 +205,8 @@ static int xts_encrypt(struct skcipher_request *req)
        struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
        return glue_xts_req_128bit(&serpent_enc_xts, req,
-                                  XTS_TWEAK_CAST(__serpent_encrypt),
-                                  &ctx->tweak_ctx, &ctx->crypt_ctx, false);
+                                  __serpent_encrypt, &ctx->tweak_ctx,
+                                  &ctx->crypt_ctx, false);
 }
 
 static int xts_decrypt(struct skcipher_request *req)
@@ -216,8 +215,8 @@ static int xts_decrypt(struct skcipher_request *req)
        struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
        return glue_xts_req_128bit(&serpent_dec_xts, req,
-                                  XTS_TWEAK_CAST(__serpent_encrypt),
-                                  &ctx->tweak_ctx, &ctx->crypt_ctx, true);
+                                  __serpent_encrypt, &ctx->tweak_ctx,
+                                  &ctx->crypt_ctx, true);
 }
 
 static struct skcipher_alg serpent_algs[] = {
diff --git a/arch/x86/crypto/serpent_sse2_glue.c 
b/arch/x86/crypto/serpent_sse2_glue.c
index 5fdf1931d069..4fed8d26b91a 100644
--- a/arch/x86/crypto/serpent_sse2_glue.c
+++ b/arch/x86/crypto/serpent_sse2_glue.c
@@ -31,9 +31,11 @@ static int serpent_setkey_skcipher(struct crypto_skcipher 
*tfm,
        return __serpent_setkey(crypto_skcipher_ctx(tfm), key, keylen);
 }
 
-static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src)
+static void serpent_decrypt_cbc_xway(const void *ctx, u8 *d, const u8 *s)
 {
        u128 ivs[SERPENT_PARALLEL_BLOCKS - 1];
+       u128 *dst = (u128 *)d;
+       const u128 *src = (const u128 *)s;
        unsigned int j;
 
        for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++)
@@ -45,9 +47,11 @@ static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, 
const u128 *src)
                u128_xor(dst + (j + 1), dst + (j + 1), ivs + j);
 }
 
-static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
+static void serpent_crypt_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv)
 {
        be128 ctrblk;
+       u128 *dst = (u128 *)d;
+       const u128 *src = (const u128 *)s;
 
        le128_to_be128(&ctrblk, iv);
        le128_inc(iv);
@@ -56,10 +60,12 @@ static void serpent_crypt_ctr(void *ctx, u128 *dst, const 
u128 *src, le128 *iv)
        u128_xor(dst, src, (u128 *)&ctrblk);
 }
 
-static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src,
+static void serpent_crypt_ctr_xway(const void *ctx, u8 *d, const u8 *s,
                                   le128 *iv)
 {
        be128 ctrblks[SERPENT_PARALLEL_BLOCKS];
+       u128 *dst = (u128 *)d;
+       const u128 *src = (const u128 *)s;
        unsigned int i;
 
        for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) {
@@ -79,10 +85,10 @@ static const struct common_glue_ctx serpent_enc = {
 
        .funcs = { {
                .num_blocks = SERPENT_PARALLEL_BLOCKS,
-               .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_enc_blk_xway) }
+               .fn_u = { .ecb = serpent_enc_blk_xway }
        }, {
                .num_blocks = 1,
-               .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) }
+               .fn_u = { .ecb = __serpent_encrypt }
        } }
 };
 
@@ -92,10 +98,10 @@ static const struct common_glue_ctx serpent_ctr = {
 
        .funcs = { {
                .num_blocks = SERPENT_PARALLEL_BLOCKS,
-               .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr_xway) }
+               .fn_u = { .ctr = serpent_crypt_ctr_xway }
        }, {
                .num_blocks = 1,
-               .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr) }
+               .fn_u = { .ctr = serpent_crypt_ctr }
        } }
 };
 
@@ -105,10 +111,10 @@ static const struct common_glue_ctx serpent_dec = {
 
        .funcs = { {
                .num_blocks = SERPENT_PARALLEL_BLOCKS,
-               .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_dec_blk_xway) }
+               .fn_u = { .ecb = serpent_dec_blk_xway }
        }, {
                .num_blocks = 1,
-               .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) }
+               .fn_u = { .ecb = __serpent_decrypt }
        } }
 };
 
@@ -118,10 +124,10 @@ static const struct common_glue_ctx serpent_dec_cbc = {
 
        .funcs = { {
                .num_blocks = SERPENT_PARALLEL_BLOCKS,
-               .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_decrypt_cbc_xway) }
+               .fn_u = { .cbc = serpent_decrypt_cbc_xway }
        }, {
                .num_blocks = 1,
-               .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) }
+               .fn_u = { .cbc = __serpent_decrypt }
        } }
 };
 
@@ -137,7 +143,7 @@ static int ecb_decrypt(struct skcipher_request *req)
 
 static int cbc_encrypt(struct skcipher_request *req)
 {
-       return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__serpent_encrypt),
+       return glue_cbc_encrypt_req_128bit(__serpent_encrypt,
                                           req);
 }
 
diff --git a/arch/x86/crypto/twofish_avx_glue.c 
b/arch/x86/crypto/twofish_avx_glue.c
index d561c821788b..3b36e97ec7ab 100644
--- a/arch/x86/crypto/twofish_avx_glue.c
+++ b/arch/x86/crypto/twofish_avx_glue.c
@@ -22,20 +22,17 @@
 #define TWOFISH_PARALLEL_BLOCKS 8
 
 /* 8-way parallel cipher functions */
-asmlinkage void twofish_ecb_enc_8way(struct twofish_ctx *ctx, u8 *dst,
-                                    const u8 *src);
-asmlinkage void twofish_ecb_dec_8way(struct twofish_ctx *ctx, u8 *dst,
-                                    const u8 *src);
+asmlinkage void twofish_ecb_enc_8way(const void *ctx, u8 *dst, const u8 *src);
+asmlinkage void twofish_ecb_dec_8way(const void *ctx, u8 *dst, const u8 *src);
 
-asmlinkage void twofish_cbc_dec_8way(struct twofish_ctx *ctx, u8 *dst,
-                                    const u8 *src);
-asmlinkage void twofish_ctr_8way(struct twofish_ctx *ctx, u8 *dst,
-                                const u8 *src, le128 *iv);
+asmlinkage void twofish_cbc_dec_8way(const void *ctx, u8 *dst, const u8 *src);
+asmlinkage void twofish_ctr_8way(const void *ctx, u8 *dst, const u8 *src,
+                                le128 *iv);
 
-asmlinkage void twofish_xts_enc_8way(struct twofish_ctx *ctx, u8 *dst,
-                                    const u8 *src, le128 *iv);
-asmlinkage void twofish_xts_dec_8way(struct twofish_ctx *ctx, u8 *dst,
-                                    const u8 *src, le128 *iv);
+asmlinkage void twofish_xts_enc_8way(const void *ctx, u8 *dst, const u8 *src,
+                                    le128 *iv);
+asmlinkage void twofish_xts_dec_8way(const void *ctx, u8 *dst, const u8 *src,
+                                    le128 *iv);
 
 static int twofish_setkey_skcipher(struct crypto_skcipher *tfm,
                                   const u8 *key, unsigned int keylen)
@@ -43,22 +40,19 @@ static int twofish_setkey_skcipher(struct crypto_skcipher 
*tfm,
        return twofish_setkey(&tfm->base, key, keylen);
 }
 
-static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
-                                       const u8 *src)
+static inline void twofish_enc_blk_3way(const void *ctx, u8 *dst, const u8 
*src)
 {
        __twofish_enc_blk_3way(ctx, dst, src, false);
 }
 
-static void twofish_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
+static void twofish_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
 {
-       glue_xts_crypt_128bit_one(ctx, dst, src, iv,
-                                 GLUE_FUNC_CAST(twofish_enc_blk));
+       glue_xts_crypt_128bit_one(ctx, dst, src, iv, twofish_enc_blk);
 }
 
-static void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
+static void twofish_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
 {
-       glue_xts_crypt_128bit_one(ctx, dst, src, iv,
-                                 GLUE_FUNC_CAST(twofish_dec_blk));
+       glue_xts_crypt_128bit_one(ctx, dst, src, iv, twofish_dec_blk);
 }
 
 struct twofish_xts_ctx {
@@ -93,13 +87,13 @@ static const struct common_glue_ctx twofish_enc = {
 
        .funcs = { {
                .num_blocks = TWOFISH_PARALLEL_BLOCKS,
-               .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_enc_8way) }
+               .fn_u = { .ecb = twofish_ecb_enc_8way }
        }, {
                .num_blocks = 3,
-               .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) }
+               .fn_u = { .ecb = twofish_enc_blk_3way }
        }, {
                .num_blocks = 1,
-               .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) }
+               .fn_u = { .ecb = twofish_enc_blk }
        } }
 };
 
@@ -109,13 +103,13 @@ static const struct common_glue_ctx twofish_ctr = {
 
        .funcs = { {
                .num_blocks = TWOFISH_PARALLEL_BLOCKS,
-               .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_ctr_8way) }
+               .fn_u = { .ctr = twofish_ctr_8way }
        }, {
                .num_blocks = 3,
-               .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr_3way) }
+               .fn_u = { .ctr = twofish_enc_blk_ctr_3way }
        }, {
                .num_blocks = 1,
-               .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr) }
+               .fn_u = { .ctr = twofish_enc_blk_ctr }
        } }
 };
 
@@ -125,10 +119,10 @@ static const struct common_glue_ctx twofish_enc_xts = {
 
        .funcs = { {
                .num_blocks = TWOFISH_PARALLEL_BLOCKS,
-               .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc_8way) }
+               .fn_u = { .xts = twofish_xts_enc_8way }
        }, {
                .num_blocks = 1,
-               .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc) }
+               .fn_u = { .xts = twofish_xts_enc }
        } }
 };
 
@@ -138,13 +132,13 @@ static const struct common_glue_ctx twofish_dec = {
 
        .funcs = { {
                .num_blocks = TWOFISH_PARALLEL_BLOCKS,
-               .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_dec_8way) }
+               .fn_u = { .ecb = twofish_ecb_dec_8way }
        }, {
                .num_blocks = 3,
-               .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) }
+               .fn_u = { .ecb = twofish_dec_blk_3way }
        }, {
                .num_blocks = 1,
-               .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) }
+               .fn_u = { .ecb = twofish_dec_blk }
        } }
 };
 
@@ -154,13 +148,13 @@ static const struct common_glue_ctx twofish_dec_cbc = {
 
        .funcs = { {
                .num_blocks = TWOFISH_PARALLEL_BLOCKS,
-               .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_cbc_dec_8way) }
+               .fn_u = { .cbc = twofish_cbc_dec_8way }
        }, {
                .num_blocks = 3,
-               .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) }
+               .fn_u = { .cbc = twofish_dec_blk_cbc_3way }
        }, {
                .num_blocks = 1,
-               .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) }
+               .fn_u = { .cbc = twofish_dec_blk }
        } }
 };
 
@@ -170,10 +164,10 @@ static const struct common_glue_ctx twofish_dec_xts = {
 
        .funcs = { {
                .num_blocks = TWOFISH_PARALLEL_BLOCKS,
-               .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec_8way) }
+               .fn_u = { .xts = twofish_xts_dec_8way }
        }, {
                .num_blocks = 1,
-               .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec) }
+               .fn_u = { .xts = twofish_xts_dec }
        } }
 };
 
@@ -189,8 +183,7 @@ static int ecb_decrypt(struct skcipher_request *req)
 
 static int cbc_encrypt(struct skcipher_request *req)
 {
-       return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(twofish_enc_blk),
-                                          req);
+       return glue_cbc_encrypt_req_128bit(twofish_enc_blk, req);
 }
 
 static int cbc_decrypt(struct skcipher_request *req)
@@ -208,8 +201,7 @@ static int xts_encrypt(struct skcipher_request *req)
        struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
        struct twofish_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-       return glue_xts_req_128bit(&twofish_enc_xts, req,
-                                  XTS_TWEAK_CAST(twofish_enc_blk),
+       return glue_xts_req_128bit(&twofish_enc_xts, req, twofish_enc_blk,
                                   &ctx->tweak_ctx, &ctx->crypt_ctx, false);
 }
 
@@ -218,8 +210,7 @@ static int xts_decrypt(struct skcipher_request *req)
        struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
        struct twofish_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-       return glue_xts_req_128bit(&twofish_dec_xts, req,
-                                  XTS_TWEAK_CAST(twofish_enc_blk),
+       return glue_xts_req_128bit(&twofish_dec_xts, req, twofish_enc_blk,
                                   &ctx->tweak_ctx, &ctx->crypt_ctx, true);
 }
 
diff --git a/arch/x86/crypto/twofish_glue_3way.c 
b/arch/x86/crypto/twofish_glue_3way.c
index 1dc9e29f221e..768af6075479 100644
--- a/arch/x86/crypto/twofish_glue_3way.c
+++ b/arch/x86/crypto/twofish_glue_3way.c
@@ -25,21 +25,22 @@ static int twofish_setkey_skcipher(struct crypto_skcipher 
*tfm,
        return twofish_setkey(&tfm->base, key, keylen);
 }
 
-static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
-                                       const u8 *src)
+static inline void twofish_enc_blk_3way(const void *ctx, u8 *dst, const u8 
*src)
 {
        __twofish_enc_blk_3way(ctx, dst, src, false);
 }
 
-static inline void twofish_enc_blk_xor_3way(struct twofish_ctx *ctx, u8 *dst,
+static inline void twofish_enc_blk_xor_3way(const void *ctx, u8 *dst,
                                            const u8 *src)
 {
        __twofish_enc_blk_3way(ctx, dst, src, true);
 }
 
-void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src)
+void twofish_dec_blk_cbc_3way(const void *ctx, u8 *d, const u8 *s)
 {
        u128 ivs[2];
+       u128 *dst = (u128 *)d;
+       const u128 *src = (const u128 *)s;
 
        ivs[0] = src[0];
        ivs[1] = src[1];
@@ -51,9 +52,11 @@ void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const 
u128 *src)
 }
 EXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way);
 
-void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
+void twofish_enc_blk_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv)
 {
        be128 ctrblk;
+       u128 *dst = (u128 *)d;
+       const u128 *src = (const u128 *)s;
 
        if (dst != src)
                *dst = *src;
@@ -66,10 +69,11 @@ void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 
*src, le128 *iv)
 }
 EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr);
 
-void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src,
-                             le128 *iv)
+void twofish_enc_blk_ctr_3way(const void *ctx, u8 *d, const u8 *s, le128 *iv)
 {
        be128 ctrblks[3];
+       u128 *dst = (u128 *)d;
+       const u128 *src = (const u128 *)s;
 
        if (dst != src) {
                dst[0] = src[0];
@@ -94,10 +98,10 @@ static const struct common_glue_ctx twofish_enc = {
 
        .funcs = { {
                .num_blocks = 3,
-               .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) }
+               .fn_u = { .ecb = twofish_enc_blk_3way }
        }, {
                .num_blocks = 1,
-               .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) }
+               .fn_u = { .ecb = twofish_enc_blk }
        } }
 };
 
@@ -107,10 +111,10 @@ static const struct common_glue_ctx twofish_ctr = {
 
        .funcs = { {
                .num_blocks = 3,
-               .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr_3way) }
+               .fn_u = { .ctr = twofish_enc_blk_ctr_3way }
        }, {
                .num_blocks = 1,
-               .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr) }
+               .fn_u = { .ctr = twofish_enc_blk_ctr }
        } }
 };
 
@@ -120,10 +124,10 @@ static const struct common_glue_ctx twofish_dec = {
 
        .funcs = { {
                .num_blocks = 3,
-               .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) }
+               .fn_u = { .ecb = twofish_dec_blk_3way }
        }, {
                .num_blocks = 1,
-               .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) }
+               .fn_u = { .ecb = twofish_dec_blk }
        } }
 };
 
@@ -133,10 +137,10 @@ static const struct common_glue_ctx twofish_dec_cbc = {
 
        .funcs = { {
                .num_blocks = 3,
-               .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) }
+               .fn_u = { .cbc = twofish_dec_blk_cbc_3way }
        }, {
                .num_blocks = 1,
-               .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) }
+               .fn_u = { .cbc = twofish_dec_blk }
        } }
 };
 
@@ -152,8 +156,7 @@ static int ecb_decrypt(struct skcipher_request *req)
 
 static int cbc_encrypt(struct skcipher_request *req)
 {
-       return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(twofish_enc_blk),
-                                          req);
+       return glue_cbc_encrypt_req_128bit(twofish_enc_blk, req);
 }
 
 static int cbc_decrypt(struct skcipher_request *req)
diff --git a/arch/x86/include/asm/crypto/camellia.h 
b/arch/x86/include/asm/crypto/camellia.h
index a5d86fc0593f..f1592619dd65 100644
--- a/arch/x86/include/asm/crypto/camellia.h
+++ b/arch/x86/include/asm/crypto/camellia.h
@@ -32,65 +32,60 @@ extern int xts_camellia_setkey(struct crypto_skcipher *tfm, 
const u8 *key,
                               unsigned int keylen);
 
 /* regular block cipher functions */
-asmlinkage void __camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst,
-                                  const u8 *src, bool xor);
-asmlinkage void camellia_dec_blk(struct camellia_ctx *ctx, u8 *dst,
-                                const u8 *src);
+asmlinkage void __camellia_enc_blk(const void *ctx, u8 *dst, const u8 *src,
+                                  bool xor);
+asmlinkage void camellia_dec_blk(const void *ctx, u8 *dst, const u8 *src);
 
 /* 2-way parallel cipher functions */
-asmlinkage void __camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst,
-                                       const u8 *src, bool xor);
-asmlinkage void camellia_dec_blk_2way(struct camellia_ctx *ctx, u8 *dst,
-                                     const u8 *src);
+asmlinkage void __camellia_enc_blk_2way(const void *ctx, u8 *dst, const u8 
*src,
+                                       bool xor);
+asmlinkage void camellia_dec_blk_2way(const void *ctx, u8 *dst, const u8 *src);
 
 /* 16-way parallel cipher functions (avx/aes-ni) */
-asmlinkage void camellia_ecb_enc_16way(struct camellia_ctx *ctx, u8 *dst,
-                                      const u8 *src);
-asmlinkage void camellia_ecb_dec_16way(struct camellia_ctx *ctx, u8 *dst,
-                                      const u8 *src);
-
-asmlinkage void camellia_cbc_dec_16way(struct camellia_ctx *ctx, u8 *dst,
-                                      const u8 *src);
-asmlinkage void camellia_ctr_16way(struct camellia_ctx *ctx, u8 *dst,
-                                  const u8 *src, le128 *iv);
-
-asmlinkage void camellia_xts_enc_16way(struct camellia_ctx *ctx, u8 *dst,
-                                      const u8 *src, le128 *iv);
-asmlinkage void camellia_xts_dec_16way(struct camellia_ctx *ctx, u8 *dst,
-                                      const u8 *src, le128 *iv);
-
-static inline void camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst,
-                                   const u8 *src)
+asmlinkage void camellia_ecb_enc_16way(const void *ctx, u8 *dst, const u8 
*src);
+asmlinkage void camellia_ecb_dec_16way(const void *ctx, u8 *dst, const u8 
*src);
+
+asmlinkage void camellia_cbc_dec_16way(const void *ctx, u8 *dst, const u8 
*src);
+asmlinkage void camellia_ctr_16way(const void *ctx, u8 *dst, const u8 *src,
+                                  le128 *iv);
+
+asmlinkage void camellia_xts_enc_16way(const void *ctx, u8 *dst, const u8 *src,
+                                      le128 *iv);
+asmlinkage void camellia_xts_dec_16way(const void *ctx, u8 *dst, const u8 *src,
+                                      le128 *iv);
+
+static inline void camellia_enc_blk(const void *ctx, u8 *dst, const u8 *src)
 {
        __camellia_enc_blk(ctx, dst, src, false);
 }
 
-static inline void camellia_enc_blk_xor(struct camellia_ctx *ctx, u8 *dst,
-                                       const u8 *src)
+static inline void camellia_enc_blk_xor(const void *ctx, u8 *dst, const u8 
*src)
 {
        __camellia_enc_blk(ctx, dst, src, true);
 }
 
-static inline void camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst,
+static inline void camellia_enc_blk_2way(const void *ctx, u8 *dst,
                                         const u8 *src)
 {
        __camellia_enc_blk_2way(ctx, dst, src, false);
 }
 
-static inline void camellia_enc_blk_xor_2way(struct camellia_ctx *ctx, u8 *dst,
+static inline void camellia_enc_blk_xor_2way(const void *ctx, u8 *dst,
                                             const u8 *src)
 {
        __camellia_enc_blk_2way(ctx, dst, src, true);
 }
 
 /* glue helpers */
-extern void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src);
-extern void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src,
+extern void camellia_decrypt_cbc_2way(const void *ctx, u8 *dst, const u8 *src);
+extern void camellia_crypt_ctr(const void *ctx, u8 *dst, const u8 *src,
                               le128 *iv);
-extern void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src,
+extern void camellia_crypt_ctr_2way(const void *ctx, u8 *dst, const u8 *src,
                                    le128 *iv);
 
-extern void camellia_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv);
-extern void camellia_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv);
+extern void camellia_xts_enc(const void *ctx, u8 *dst, const u8 *src,
+                            le128 *iv);
+extern void camellia_xts_dec(const void *ctx, u8 *dst, const u8 *src,
+                            le128 *iv);
 
 #endif /* ASM_X86_CAMELLIA_H */
diff --git a/arch/x86/include/asm/crypto/glue_helper.h 
b/arch/x86/include/asm/crypto/glue_helper.h
index 8d4a8e1226ee..777c0f63418c 100644
--- a/arch/x86/include/asm/crypto/glue_helper.h
+++ b/arch/x86/include/asm/crypto/glue_helper.h
@@ -11,18 +11,13 @@
 #include <asm/fpu/api.h>
 #include <crypto/b128ops.h>
 
-typedef void (*common_glue_func_t)(void *ctx, u8 *dst, const u8 *src);
-typedef void (*common_glue_cbc_func_t)(void *ctx, u128 *dst, const u128 *src);
-typedef void (*common_glue_ctr_func_t)(void *ctx, u128 *dst, const u128 *src,
+typedef void (*common_glue_func_t)(const void *ctx, u8 *dst, const u8 *src);
+typedef void (*common_glue_cbc_func_t)(const void *ctx, u8 *dst, const u8 
*src);
+typedef void (*common_glue_ctr_func_t)(const void *ctx, u8 *dst, const u8 *src,
                                       le128 *iv);
-typedef void (*common_glue_xts_func_t)(void *ctx, u128 *dst, const u128 *src,
+typedef void (*common_glue_xts_func_t)(const void *ctx, u8 *dst, const u8 *src,
                                       le128 *iv);
 
-#define GLUE_FUNC_CAST(fn) ((common_glue_func_t)(fn))
-#define GLUE_CBC_FUNC_CAST(fn) ((common_glue_cbc_func_t)(fn))
-#define GLUE_CTR_FUNC_CAST(fn) ((common_glue_ctr_func_t)(fn))
-#define GLUE_XTS_FUNC_CAST(fn) ((common_glue_xts_func_t)(fn))
-
 struct common_glue_func_entry {
        unsigned int num_blocks; /* number of blocks that @fn will process */
        union {
@@ -116,7 +111,8 @@ extern int glue_xts_req_128bit(const struct common_glue_ctx 
*gctx,
                               common_glue_func_t tweak_fn, void *tweak_ctx,
                               void *crypt_ctx, bool decrypt);
 
-extern void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src,
-                                     le128 *iv, common_glue_func_t fn);
+extern void glue_xts_crypt_128bit_one(const void *ctx, u8 *dst,
+                                     const u8 *src, le128 *iv,
+                                     common_glue_func_t fn);
 
 #endif /* _CRYPTO_GLUE_HELPER_H */
diff --git a/arch/x86/include/asm/crypto/serpent-avx.h 
b/arch/x86/include/asm/crypto/serpent-avx.h
index db7c9cc32234..251c2c89d7cf 100644
--- a/arch/x86/include/asm/crypto/serpent-avx.h
+++ b/arch/x86/include/asm/crypto/serpent-avx.h
@@ -15,26 +15,26 @@ struct serpent_xts_ctx {
        struct serpent_ctx crypt_ctx;
 };
 
-asmlinkage void serpent_ecb_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst,
+asmlinkage void serpent_ecb_enc_8way_avx(const void *ctx, u8 *dst,
                                         const u8 *src);
-asmlinkage void serpent_ecb_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst,
+asmlinkage void serpent_ecb_dec_8way_avx(const void *ctx, u8 *dst,
                                         const u8 *src);
 
-asmlinkage void serpent_cbc_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst,
+asmlinkage void serpent_cbc_dec_8way_avx(const void *ctx, u8 *dst,
                                         const u8 *src);
-asmlinkage void serpent_ctr_8way_avx(struct serpent_ctx *ctx, u8 *dst,
-                                    const u8 *src, le128 *iv);
+asmlinkage void serpent_ctr_8way_avx(const void *ctx, u8 *dst, const u8 *src,
+                                    le128 *iv);
 
-asmlinkage void serpent_xts_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst,
+asmlinkage void serpent_xts_enc_8way_avx(const void *ctx, u8 *dst,
                                         const u8 *src, le128 *iv);
-asmlinkage void serpent_xts_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst,
+asmlinkage void serpent_xts_dec_8way_avx(const void *ctx, u8 *dst,
                                         const u8 *src, le128 *iv);
 
-extern void __serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src,
+extern void __serpent_crypt_ctr(const void *ctx, u8 *dst, const u8 *src,
                                le128 *iv);
 
-extern void serpent_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv);
-extern void serpent_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv);
+extern void serpent_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 
*iv);
+extern void serpent_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 
*iv);
 
 extern int xts_serpent_setkey(struct crypto_skcipher *tfm, const u8 *key,
                              unsigned int keylen);
diff --git a/arch/x86/include/asm/crypto/serpent-sse2.h 
b/arch/x86/include/asm/crypto/serpent-sse2.h
index 1a345e8a7496..860ca248914b 100644
--- a/arch/x86/include/asm/crypto/serpent-sse2.h
+++ b/arch/x86/include/asm/crypto/serpent-sse2.h
@@ -9,25 +9,23 @@
 
 #define SERPENT_PARALLEL_BLOCKS 4
 
-asmlinkage void __serpent_enc_blk_4way(struct serpent_ctx *ctx, u8 *dst,
+asmlinkage void __serpent_enc_blk_4way(const struct serpent_ctx *ctx, u8 *dst,
                                       const u8 *src, bool xor);
-asmlinkage void serpent_dec_blk_4way(struct serpent_ctx *ctx, u8 *dst,
+asmlinkage void serpent_dec_blk_4way(const struct serpent_ctx *ctx, u8 *dst,
                                     const u8 *src);
 
-static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst,
-                                       const u8 *src)
+static inline void serpent_enc_blk_xway(const void *ctx, u8 *dst, const u8 
*src)
 {
        __serpent_enc_blk_4way(ctx, dst, src, false);
 }
 
-static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst,
-                                           const u8 *src)
+static inline void serpent_enc_blk_xway_xor(const struct serpent_ctx *ctx,
+                                           u8 *dst, const u8 *src)
 {
        __serpent_enc_blk_4way(ctx, dst, src, true);
 }
 
-static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst,
-                                       const u8 *src)
+static inline void serpent_dec_blk_xway(const void *ctx, u8 *dst, const u8 
*src)
 {
        serpent_dec_blk_4way(ctx, dst, src);
 }
@@ -36,25 +34,23 @@ static inline void serpent_dec_blk_xway(struct serpent_ctx 
*ctx, u8 *dst,
 
 #define SERPENT_PARALLEL_BLOCKS 8
 
-asmlinkage void __serpent_enc_blk_8way(struct serpent_ctx *ctx, u8 *dst,
+asmlinkage void __serpent_enc_blk_8way(const struct serpent_ctx *ctx, u8 *dst,
                                       const u8 *src, bool xor);
-asmlinkage void serpent_dec_blk_8way(struct serpent_ctx *ctx, u8 *dst,
+asmlinkage void serpent_dec_blk_8way(const struct serpent_ctx *ctx, u8 *dst,
                                     const u8 *src);
 
-static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst,
-                                  const u8 *src)
+static inline void serpent_enc_blk_xway(const void *ctx, u8 *dst, const u8 
*src)
 {
        __serpent_enc_blk_8way(ctx, dst, src, false);
 }
 
-static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst,
-                                      const u8 *src)
+static inline void serpent_enc_blk_xway_xor(const struct serpent_ctx *ctx,
+                                           u8 *dst, const u8 *src)
 {
        __serpent_enc_blk_8way(ctx, dst, src, true);
 }
 
-static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst,
-                                  const u8 *src)
+static inline void serpent_dec_blk_xway(const void *ctx, u8 *dst, const u8 
*src)
 {
        serpent_dec_blk_8way(ctx, dst, src);
 }
diff --git a/arch/x86/include/asm/crypto/twofish.h 
b/arch/x86/include/asm/crypto/twofish.h
index f618bf272b90..2c377a8042e1 100644
--- a/arch/x86/include/asm/crypto/twofish.h
+++ b/arch/x86/include/asm/crypto/twofish.h
@@ -7,22 +7,19 @@
 #include <crypto/b128ops.h>
 
 /* regular block cipher functions from twofish_x86_64 module */
-asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst,
-                               const u8 *src);
-asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst,
-                               const u8 *src);
+asmlinkage void twofish_enc_blk(const void *ctx, u8 *dst, const u8 *src);
+asmlinkage void twofish_dec_blk(const void *ctx, u8 *dst, const u8 *src);
 
 /* 3-way parallel cipher functions */
-asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
-                                      const u8 *src, bool xor);
-asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst,
-                                    const u8 *src);
+asmlinkage void __twofish_enc_blk_3way(const void *ctx, u8 *dst, const u8 *src,
+                                      bool xor);
+asmlinkage void twofish_dec_blk_3way(const void *ctx, u8 *dst, const u8 *src);
 
 /* helpers from twofish_x86_64-3way module */
-extern void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src);
-extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src,
+extern void twofish_dec_blk_cbc_3way(const void *ctx, u8 *dst, const u8 *src);
+extern void twofish_enc_blk_ctr(const void *ctx, u8 *dst, const u8 *src,
                                le128 *iv);
-extern void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src,
+extern void twofish_enc_blk_ctr_3way(const void *ctx, u8 *dst, const u8 *src,
                                     le128 *iv);
 
 #endif /* ASM_X86_TWOFISH_H */
diff --git a/crypto/cast6_generic.c b/crypto/cast6_generic.c
index a8248f8e2777..85328522c5ca 100644
--- a/crypto/cast6_generic.c
+++ b/crypto/cast6_generic.c
@@ -154,7 +154,7 @@ int cast6_setkey(struct crypto_tfm *tfm, const u8 *key, 
unsigned int keylen)
 EXPORT_SYMBOL_GPL(cast6_setkey);
 
 /*forward quad round*/
-static inline void Q(u32 *block, u8 *Kr, u32 *Km)
+static inline void Q(u32 *block, const u8 *Kr, const u32 *Km)
 {
        u32 I;
        block[2] ^= F1(block[3], Kr[0], Km[0]);
@@ -164,7 +164,7 @@ static inline void Q(u32 *block, u8 *Kr, u32 *Km)
 }
 
 /*reverse quad round*/
-static inline void QBAR(u32 *block, u8 *Kr, u32 *Km)
+static inline void QBAR(u32 *block, const u8 *Kr, const u32 *Km)
 {
        u32 I;
        block[3] ^= F1(block[0], Kr[3], Km[3]);
@@ -173,13 +173,14 @@ static inline void QBAR(u32 *block, u8 *Kr, u32 *Km)
        block[2] ^= F1(block[3], Kr[0], Km[0]);
 }
 
-void __cast6_encrypt(struct cast6_ctx *c, u8 *outbuf, const u8 *inbuf)
+void __cast6_encrypt(const void *ctx, u8 *outbuf, const u8 *inbuf)
 {
+       const struct cast6_ctx *c = ctx;
        const __be32 *src = (const __be32 *)inbuf;
        __be32 *dst = (__be32 *)outbuf;
        u32 block[4];
-       u32 *Km;
-       u8 *Kr;
+       const u32 *Km;
+       const u8 *Kr;
 
        block[0] = be32_to_cpu(src[0]);
        block[1] = be32_to_cpu(src[1]);
@@ -211,13 +212,14 @@ static void cast6_encrypt(struct crypto_tfm *tfm, u8 
*outbuf, const u8 *inbuf)
        __cast6_encrypt(crypto_tfm_ctx(tfm), outbuf, inbuf);
 }
 
-void __cast6_decrypt(struct cast6_ctx *c, u8 *outbuf, const u8 *inbuf)
+void __cast6_decrypt(const void *ctx, u8 *outbuf, const u8 *inbuf)
 {
+       const struct cast6_ctx *c = ctx;
        const __be32 *src = (const __be32 *)inbuf;
        __be32 *dst = (__be32 *)outbuf;
        u32 block[4];
-       u32 *Km;
-       u8 *Kr;
+       const u32 *Km;
+       const u8 *Kr;
 
        block[0] = be32_to_cpu(src[0]);
        block[1] = be32_to_cpu(src[1]);
diff --git a/crypto/serpent_generic.c b/crypto/serpent_generic.c
index 56fa665a4f01..492c1d0bfe06 100644
--- a/crypto/serpent_generic.c
+++ b/crypto/serpent_generic.c
@@ -449,8 +449,9 @@ int serpent_setkey(struct crypto_tfm *tfm, const u8 *key, 
unsigned int keylen)
 }
 EXPORT_SYMBOL_GPL(serpent_setkey);
 
-void __serpent_encrypt(struct serpent_ctx *ctx, u8 *dst, const u8 *src)
+void __serpent_encrypt(const void *c, u8 *dst, const u8 *src)
 {
+       const struct serpent_ctx *ctx = c;
        const u32 *k = ctx->expkey;
        const __le32 *s = (const __le32 *)src;
        __le32  *d = (__le32 *)dst;
@@ -514,8 +515,9 @@ static void serpent_encrypt(struct crypto_tfm *tfm, u8 
*dst, const u8 *src)
        __serpent_encrypt(ctx, dst, src);
 }
 
-void __serpent_decrypt(struct serpent_ctx *ctx, u8 *dst, const u8 *src)
+void __serpent_decrypt(const void *c, u8 *dst, const u8 *src)
 {
+       const struct serpent_ctx *ctx = c;
        const u32 *k = ctx->expkey;
        const __le32 *s = (const __le32 *)src;
        __le32  *d = (__le32 *)dst;
diff --git a/drivers/gpu/drm/i915/gvt/display.c 
b/drivers/gpu/drm/i915/gvt/display.c
index 59aa5e64acb0..21a562c2b1f5 100644
--- a/drivers/gpu/drm/i915/gvt/display.c
+++ b/drivers/gpu/drm/i915/gvt/display.c
@@ -172,21 +172,176 @@ static void emulate_monitor_status_change(struct 
intel_vgpu *vgpu)
        int pipe;
 
        if (IS_BROXTON(dev_priv)) {
+               enum transcoder trans;
+               enum port port;
+
+               /* Clear PIPE, DDI, PHY, HPD before setting new */
                vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) &= ~(BXT_DE_PORT_HP_DDIA |
                        BXT_DE_PORT_HP_DDIB |
                        BXT_DE_PORT_HP_DDIC);
 
+               for_each_pipe(dev_priv, pipe) {
+                       vgpu_vreg_t(vgpu, PIPECONF(pipe)) &=
+                               ~(PIPECONF_ENABLE | I965_PIPECONF_ACTIVE);
+                       vgpu_vreg_t(vgpu, DSPCNTR(pipe)) &= 
~DISPLAY_PLANE_ENABLE;
+                       vgpu_vreg_t(vgpu, SPRCTL(pipe)) &= ~SPRITE_ENABLE;
+                       vgpu_vreg_t(vgpu, CURCNTR(pipe)) &= ~MCURSOR_MODE;
+                       vgpu_vreg_t(vgpu, CURCNTR(pipe)) |= 
MCURSOR_MODE_DISABLE;
+               }
+
+               for (trans = TRANSCODER_A; trans <= TRANSCODER_EDP; trans++) {
+                       vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(trans)) &=
+                               ~(TRANS_DDI_BPC_MASK | 
TRANS_DDI_MODE_SELECT_MASK |
+                                 TRANS_DDI_PORT_MASK | TRANS_DDI_FUNC_ENABLE);
+               }
+               vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &=
+                       ~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK |
+                         TRANS_DDI_PORT_MASK);
+
+               for (port = PORT_A; port <= PORT_C; port++) {
+                       vgpu_vreg_t(vgpu, BXT_PHY_CTL(port)) &=
+                               ~BXT_PHY_LANE_ENABLED;
+                       vgpu_vreg_t(vgpu, BXT_PHY_CTL(port)) |=
+                               (BXT_PHY_CMNLANE_POWERDOWN_ACK |
+                                BXT_PHY_LANE_POWERDOWN_ACK);
+
+                       vgpu_vreg_t(vgpu, BXT_PORT_PLL_ENABLE(port)) &=
+                               ~(PORT_PLL_POWER_STATE | PORT_PLL_POWER_ENABLE |
+                                 PORT_PLL_REF_SEL | PORT_PLL_LOCK |
+                                 PORT_PLL_ENABLE);
+
+                       vgpu_vreg_t(vgpu, DDI_BUF_CTL(port)) &=
+                               ~(DDI_INIT_DISPLAY_DETECTED |
+                                 DDI_BUF_CTL_ENABLE);
+                       vgpu_vreg_t(vgpu, DDI_BUF_CTL(port)) |= DDI_BUF_IS_IDLE;
+               }
+               vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &=
+                       ~(PORTA_HOTPLUG_ENABLE | PORTA_HOTPLUG_STATUS_MASK);
+               vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &=
+                       ~(PORTB_HOTPLUG_ENABLE | PORTB_HOTPLUG_STATUS_MASK);
+               vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &=
+                       ~(PORTC_HOTPLUG_ENABLE | PORTC_HOTPLUG_STATUS_MASK);
+               /* No hpd_invert set in vgpu vbt, need to clear invert mask */
+               vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &= ~BXT_DDI_HPD_INVERT_MASK;
+               vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) &= 
~BXT_DE_PORT_HOTPLUG_MASK;
+
+               vgpu_vreg_t(vgpu, BXT_P_CR_GT_DISP_PWRON) &= ~(BIT(0) | BIT(1));
+               vgpu_vreg_t(vgpu, BXT_PORT_CL1CM_DW0(DPIO_PHY0)) &=
+                       ~PHY_POWER_GOOD;
+               vgpu_vreg_t(vgpu, BXT_PORT_CL1CM_DW0(DPIO_PHY1)) &=
+                       ~PHY_POWER_GOOD;
+               vgpu_vreg_t(vgpu, BXT_PHY_CTL_FAMILY(DPIO_PHY0)) &= ~BIT(30);
+               vgpu_vreg_t(vgpu, BXT_PHY_CTL_FAMILY(DPIO_PHY1)) &= ~BIT(30);
+
+               vgpu_vreg_t(vgpu, SFUSE_STRAP) &= ~SFUSE_STRAP_DDIB_DETECTED;
+               vgpu_vreg_t(vgpu, SFUSE_STRAP) &= ~SFUSE_STRAP_DDIC_DETECTED;
+
+               /*
+                * Only 1 PIPE enabled in current vGPU display and PIPE_A is
+                *  tied to TRANSCODER_A in HW, so it's safe to assume PIPE_A,
+                *   TRANSCODER_A can be enabled. PORT_x depends on the input of
+                *   setup_virtual_dp_monitor.
+                */
+               vgpu_vreg_t(vgpu, PIPECONF(PIPE_A)) |= PIPECONF_ENABLE;
+               vgpu_vreg_t(vgpu, PIPECONF(PIPE_A)) |= I965_PIPECONF_ACTIVE;
+
+               /*
+                * Golden M/N are calculated based on:
+                *   24 bpp, 4 lanes, 154000 pixel clk (from virtual EDID),
+                *   DP link clk 1620 MHz and non-constant_n.
+                * TODO: calculate DP link symbol clk and stream clk m/n.
+                */
+               vgpu_vreg_t(vgpu, PIPE_DATA_M1(TRANSCODER_A)) = 63 << 
TU_SIZE_SHIFT;
+               vgpu_vreg_t(vgpu, PIPE_DATA_M1(TRANSCODER_A)) |= 0x5b425e;
+               vgpu_vreg_t(vgpu, PIPE_DATA_N1(TRANSCODER_A)) = 0x800000;
+               vgpu_vreg_t(vgpu, PIPE_LINK_M1(TRANSCODER_A)) = 0x3cd6e;
+               vgpu_vreg_t(vgpu, PIPE_LINK_N1(TRANSCODER_A)) = 0x80000;
+
+               /* Enable per-DDI/PORT vreg */
                if (intel_vgpu_has_monitor_on_port(vgpu, PORT_A)) {
+                       vgpu_vreg_t(vgpu, BXT_P_CR_GT_DISP_PWRON) |= BIT(1);
+                       vgpu_vreg_t(vgpu, BXT_PORT_CL1CM_DW0(DPIO_PHY1)) |=
+                               PHY_POWER_GOOD;
+                       vgpu_vreg_t(vgpu, BXT_PHY_CTL_FAMILY(DPIO_PHY1)) |=
+                               BIT(30);
+                       vgpu_vreg_t(vgpu, BXT_PHY_CTL(PORT_A)) |=
+                               BXT_PHY_LANE_ENABLED;
+                       vgpu_vreg_t(vgpu, BXT_PHY_CTL(PORT_A)) &=
+                               ~(BXT_PHY_CMNLANE_POWERDOWN_ACK |
+                                 BXT_PHY_LANE_POWERDOWN_ACK);
+                       vgpu_vreg_t(vgpu, BXT_PORT_PLL_ENABLE(PORT_A)) |=
+                               (PORT_PLL_POWER_STATE | PORT_PLL_POWER_ENABLE |
+                                PORT_PLL_REF_SEL | PORT_PLL_LOCK |
+                                PORT_PLL_ENABLE);
+                       vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_A)) |=
+                               (DDI_BUF_CTL_ENABLE | 
DDI_INIT_DISPLAY_DETECTED);
+                       vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_A)) &=
+                               ~DDI_BUF_IS_IDLE;
+                       vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_EDP)) |=
+                               (TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST 
|
+                                TRANS_DDI_FUNC_ENABLE);
+                       vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |=
+                               PORTA_HOTPLUG_ENABLE;
                        vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |=
                                BXT_DE_PORT_HP_DDIA;
                }
 
                if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) {
+                       vgpu_vreg_t(vgpu, SFUSE_STRAP) |= 
SFUSE_STRAP_DDIB_DETECTED;
+                       vgpu_vreg_t(vgpu, BXT_P_CR_GT_DISP_PWRON) |= BIT(0);
+                       vgpu_vreg_t(vgpu, BXT_PORT_CL1CM_DW0(DPIO_PHY0)) |=
+                               PHY_POWER_GOOD;
+                       vgpu_vreg_t(vgpu, BXT_PHY_CTL_FAMILY(DPIO_PHY0)) |=
+                               BIT(30);
+                       vgpu_vreg_t(vgpu, BXT_PHY_CTL(PORT_B)) |=
+                               BXT_PHY_LANE_ENABLED;
+                       vgpu_vreg_t(vgpu, BXT_PHY_CTL(PORT_B)) &=
+                               ~(BXT_PHY_CMNLANE_POWERDOWN_ACK |
+                                 BXT_PHY_LANE_POWERDOWN_ACK);
+                       vgpu_vreg_t(vgpu, BXT_PORT_PLL_ENABLE(PORT_B)) |=
+                               (PORT_PLL_POWER_STATE | PORT_PLL_POWER_ENABLE |
+                                PORT_PLL_REF_SEL | PORT_PLL_LOCK |
+                                PORT_PLL_ENABLE);
+                       vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_B)) |=
+                               DDI_BUF_CTL_ENABLE;
+                       vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_B)) &=
+                               ~DDI_BUF_IS_IDLE;
+                       vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |=
+                               (TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST 
|
+                                (PORT_B << TRANS_DDI_PORT_SHIFT) |
+                                TRANS_DDI_FUNC_ENABLE);
+                       vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |=
+                               PORTB_HOTPLUG_ENABLE;
                        vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |=
                                BXT_DE_PORT_HP_DDIB;
                }
 
                if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C)) {
+                       vgpu_vreg_t(vgpu, SFUSE_STRAP) |= 
SFUSE_STRAP_DDIC_DETECTED;
+                       vgpu_vreg_t(vgpu, BXT_P_CR_GT_DISP_PWRON) |= BIT(0);
+                       vgpu_vreg_t(vgpu, BXT_PORT_CL1CM_DW0(DPIO_PHY0)) |=
+                               PHY_POWER_GOOD;
+                       vgpu_vreg_t(vgpu, BXT_PHY_CTL_FAMILY(DPIO_PHY0)) |=
+                               BIT(30);
+                       vgpu_vreg_t(vgpu, BXT_PHY_CTL(PORT_C)) |=
+                               BXT_PHY_LANE_ENABLED;
+                       vgpu_vreg_t(vgpu, BXT_PHY_CTL(PORT_C)) &=
+                               ~(BXT_PHY_CMNLANE_POWERDOWN_ACK |
+                                 BXT_PHY_LANE_POWERDOWN_ACK);
+                       vgpu_vreg_t(vgpu, BXT_PORT_PLL_ENABLE(PORT_C)) |=
+                               (PORT_PLL_POWER_STATE | PORT_PLL_POWER_ENABLE |
+                                PORT_PLL_REF_SEL | PORT_PLL_LOCK |
+                                PORT_PLL_ENABLE);
+                       vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_C)) |=
+                               DDI_BUF_CTL_ENABLE;
+                       vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_C)) &=
+                               ~DDI_BUF_IS_IDLE;
+                       vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |=
+                               (TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST 
|
+                                (PORT_B << TRANS_DDI_PORT_SHIFT) |
+                                TRANS_DDI_FUNC_ENABLE);
+                       vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |=
+                               PORTC_HOTPLUG_ENABLE;
                        vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |=
                                BXT_DE_PORT_HP_DDIC;
                }
@@ -511,6 +666,63 @@ void intel_vgpu_emulate_hotplug(struct intel_vgpu *vgpu, 
bool connected)
                vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |=
                                PORTD_HOTPLUG_STATUS_MASK;
                intel_vgpu_trigger_virtual_event(vgpu, DP_D_HOTPLUG);
+       } else if (IS_BROXTON(dev_priv)) {
+               if (intel_vgpu_has_monitor_on_port(vgpu, PORT_A)) {
+                       if (connected) {
+                               vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |=
+                                       BXT_DE_PORT_HP_DDIA;
+                       } else {
+                               vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) &=
+                                       ~BXT_DE_PORT_HP_DDIA;
+                       }
+                       vgpu_vreg_t(vgpu, GEN8_DE_PORT_IIR) |=
+                               BXT_DE_PORT_HP_DDIA;
+                       vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &=
+                               ~PORTA_HOTPLUG_STATUS_MASK;
+                       vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |=
+                               PORTA_HOTPLUG_LONG_DETECT;
+                       intel_vgpu_trigger_virtual_event(vgpu, DP_A_HOTPLUG);
+               }
+               if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) {
+                       if (connected) {
+                               vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |=
+                                       BXT_DE_PORT_HP_DDIB;
+                               vgpu_vreg_t(vgpu, SFUSE_STRAP) |=
+                                       SFUSE_STRAP_DDIB_DETECTED;
+                       } else {
+                               vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) &=
+                                       ~BXT_DE_PORT_HP_DDIB;
+                               vgpu_vreg_t(vgpu, SFUSE_STRAP) &=
+                                       ~SFUSE_STRAP_DDIB_DETECTED;
+                       }
+                       vgpu_vreg_t(vgpu, GEN8_DE_PORT_IIR) |=
+                               BXT_DE_PORT_HP_DDIB;
+                       vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &=
+                               ~PORTB_HOTPLUG_STATUS_MASK;
+                       vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |=
+                               PORTB_HOTPLUG_LONG_DETECT;
+                       intel_vgpu_trigger_virtual_event(vgpu, DP_B_HOTPLUG);
+               }
+               if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C)) {
+                       if (connected) {
+                               vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |=
+                                       BXT_DE_PORT_HP_DDIC;
+                               vgpu_vreg_t(vgpu, SFUSE_STRAP) |=
+                                       SFUSE_STRAP_DDIC_DETECTED;
+                       } else {
+                               vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) &=
+                                       ~BXT_DE_PORT_HP_DDIC;
+                               vgpu_vreg_t(vgpu, SFUSE_STRAP) &=
+                                       ~SFUSE_STRAP_DDIC_DETECTED;
+                       }
+                       vgpu_vreg_t(vgpu, GEN8_DE_PORT_IIR) |=
+                               BXT_DE_PORT_HP_DDIC;
+                       vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &=
+                               ~PORTC_HOTPLUG_STATUS_MASK;
+                       vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |=
+                               PORTC_HOTPLUG_LONG_DETECT;
+                       intel_vgpu_trigger_virtual_event(vgpu, DP_C_HOTPLUG);
+               }
        }
 }
 
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c 
b/drivers/gpu/drm/i915/gvt/handlers.c
index 689b07bc91c4..245c20d36f1b 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -1632,6 +1632,34 @@ static int edp_psr_imr_iir_write(struct intel_vgpu *vgpu,
        return 0;
 }
 
+/**
+ * FixMe:
+ * If guest fills non-priv batch buffer on ApolloLake/Broxton as Mesa i965 did:
+ * 717e7539124d (i965: Use a WC map and memcpy for the batch instead of 
pwrite.)
+ * Due to the missing flush of bb filled by VM vCPU, host GPU hangs on 
executing
+ * these MI_BATCH_BUFFER.
+ * Temporarily workaround this by setting SNOOP bit for PAT3 used by PPGTT
+ * PML4 PTE: PAT(0) PCD(1) PWT(1).
+ * The performance is still expected to be low, will need further improvement.
+ */
+static int bxt_ppat_low_write(struct intel_vgpu *vgpu, unsigned int offset,
+                             void *p_data, unsigned int bytes)
+{
+       u64 pat =
+               GEN8_PPAT(0, CHV_PPAT_SNOOP) |
+               GEN8_PPAT(1, 0) |
+               GEN8_PPAT(2, 0) |
+               GEN8_PPAT(3, CHV_PPAT_SNOOP) |
+               GEN8_PPAT(4, CHV_PPAT_SNOOP) |
+               GEN8_PPAT(5, CHV_PPAT_SNOOP) |
+               GEN8_PPAT(6, CHV_PPAT_SNOOP) |
+               GEN8_PPAT(7, CHV_PPAT_SNOOP);
+
+       vgpu_vreg(vgpu, offset) = lower_32_bits(pat);
+
+       return 0;
+}
+
 static int mmio_read_from_hw(struct intel_vgpu *vgpu,
                unsigned int offset, void *p_data, unsigned int bytes)
 {
@@ -2778,7 +2806,7 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
 
        MMIO_DH(GEN6_PCODE_MAILBOX, D_BDW_PLUS, NULL, mailbox_write);
 
-       MMIO_D(GEN8_PRIVATE_PAT_LO, D_BDW_PLUS);
+       MMIO_D(GEN8_PRIVATE_PAT_LO, D_BDW_PLUS & ~D_BXT);
        MMIO_D(GEN8_PRIVATE_PAT_HI, D_BDW_PLUS);
 
        MMIO_D(GAMTARBMODE, D_BDW_PLUS);
@@ -3104,7 +3132,7 @@ static int init_skl_mmio_info(struct intel_gvt *gvt)
                 NULL, NULL);
 
        MMIO_D(GAMT_CHKN_BIT_REG, D_KBL | D_CFL);
-       MMIO_D(GEN9_CTX_PREEMPT_REG, D_SKL_PLUS);
+       MMIO_D(GEN9_CTX_PREEMPT_REG, D_SKL_PLUS & ~D_BXT);
 
        return 0;
 }
@@ -3278,9 +3306,17 @@ static int init_bxt_mmio_info(struct intel_gvt *gvt)
        MMIO_D(GEN8_PUSHBUS_SHIFT, D_BXT);
        MMIO_D(GEN6_GFXPAUSE, D_BXT);
        MMIO_DFH(GEN8_L3SQCREG1, D_BXT, F_CMD_ACCESS, NULL, NULL);
+       MMIO_DFH(GEN8_L3CNTLREG, D_BXT, F_CMD_ACCESS, NULL, NULL);
+       MMIO_DFH(_MMIO(0x20D8), D_BXT, F_CMD_ACCESS, NULL, NULL);
+       MMIO_F(HSW_CS_GPR(0), 0x40, F_CMD_ACCESS, 0, 0, D_BXT, NULL, NULL);
+       MMIO_F(_MMIO(0x12600), 0x40, F_CMD_ACCESS, 0, 0, D_BXT, NULL, NULL);
+       MMIO_F(BCS_GPR(0), 0x40, F_CMD_ACCESS, 0, 0, D_BXT, NULL, NULL);
+       MMIO_F(_MMIO(0x1a600), 0x40, F_CMD_ACCESS, 0, 0, D_BXT, NULL, NULL);
 
        MMIO_DFH(GEN9_CTX_PREEMPT_REG, D_BXT, F_CMD_ACCESS, NULL, NULL);
 
+       MMIO_DH(GEN8_PRIVATE_PAT_LO, D_BXT, NULL, bxt_ppat_low_write);
+
        return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c
index a55178884d67..e0e7adc545a5 100644
--- a/drivers/gpu/drm/i915/gvt/mmio.c
+++ b/drivers/gpu/drm/i915/gvt/mmio.c
@@ -271,6 +271,11 @@ void intel_vgpu_reset_mmio(struct intel_vgpu *vgpu, bool 
dmlr)
                        vgpu_vreg_t(vgpu, BXT_PHY_CTL(PORT_C)) |=
                                    BXT_PHY_CMNLANE_POWERDOWN_ACK |
                                    BXT_PHY_LANE_POWERDOWN_ACK;
+                       vgpu_vreg_t(vgpu, SKL_FUSE_STATUS) |=
+                               SKL_FUSE_DOWNLOAD_STATUS |
+                               SKL_FUSE_PG_DIST_STATUS(SKL_PG0) |
+                               SKL_FUSE_PG_DIST_STATUS(SKL_PG1) |
+                               SKL_FUSE_PG_DIST_STATUS(SKL_PG2);
                }
        } else {
 #define GVT_GEN8_MMIO_RESET_OFFSET             (0x44200)
diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c
index 32e57635709a..4deb7fec5eb5 100644
--- a/drivers/gpu/drm/i915/gvt/vgpu.c
+++ b/drivers/gpu/drm/i915/gvt/vgpu.c
@@ -432,8 +432,9 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct 
intel_gvt *gvt,
        if (ret)
                goto out_clean_sched_policy;
 
-       /*TODO: add more platforms support */
-       if (IS_SKYLAKE(gvt->dev_priv) || IS_KABYLAKE(gvt->dev_priv))
+       if (IS_BROADWELL(gvt->dev_priv) || IS_BROXTON(gvt->dev_priv))
+               ret = intel_gvt_hypervisor_set_edid(vgpu, PORT_B);
+       else
                ret = intel_gvt_hypervisor_set_edid(vgpu, PORT_D);
        if (ret)
                goto out_clean_sched_policy;
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index 0b1223f360d9..f35757b63ea7 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -514,6 +514,19 @@ void b53_imp_vlan_setup(struct dsa_switch *ds, int 
cpu_port)
 }
 EXPORT_SYMBOL(b53_imp_vlan_setup);
 
+static void b53_port_set_learning(struct b53_device *dev, int port,
+                                 bool learning)
+{
+       u16 reg;
+
+       b53_read16(dev, B53_CTRL_PAGE, B53_DIS_LEARNING, &reg);
+       if (learning)
+               reg &= ~BIT(port);
+       else
+               reg |= BIT(port);
+       b53_write16(dev, B53_CTRL_PAGE, B53_DIS_LEARNING, reg);
+}
+
 int b53_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy)
 {
        struct b53_device *dev = ds->priv;
@@ -527,6 +540,7 @@ int b53_enable_port(struct dsa_switch *ds, int port, struct 
phy_device *phy)
        cpu_port = ds->ports[port].cpu_dp->index;
 
        b53_br_egress_floods(ds, port, true, true);
+       b53_port_set_learning(dev, port, false);
 
        if (dev->ops->irq_enable)
                ret = dev->ops->irq_enable(dev, port);
@@ -645,6 +659,7 @@ static void b53_enable_cpu_port(struct b53_device *dev, int 
port)
        b53_brcm_hdr_setup(dev->ds, port);
 
        b53_br_egress_floods(dev->ds, port, true, true);
+       b53_port_set_learning(dev, port, false);
 }
 
 static void b53_enable_mib(struct b53_device *dev)
@@ -1704,6 +1719,8 @@ int b53_br_join(struct dsa_switch *ds, int port, struct 
net_device *br)
        b53_write16(dev, B53_PVLAN_PAGE, B53_PVLAN_PORT_MASK(port), pvlan);
        dev->ports[port].vlan_ctl_mask = pvlan;
 
+       b53_port_set_learning(dev, port, true);
+
        return 0;
 }
 EXPORT_SYMBOL(b53_br_join);
@@ -1751,6 +1768,7 @@ void b53_br_leave(struct dsa_switch *ds, int port, struct 
net_device *br)
                vl->untag |= BIT(port) | BIT(cpu_port);
                b53_set_vlan_entry(dev, pvid, vl);
        }
+       b53_port_set_learning(dev, port, false);
 }
 EXPORT_SYMBOL(b53_br_leave);
 
diff --git a/drivers/net/dsa/b53/b53_regs.h b/drivers/net/dsa/b53/b53_regs.h
index c90985c294a2..b2c539a42154 100644
--- a/drivers/net/dsa/b53/b53_regs.h
+++ b/drivers/net/dsa/b53/b53_regs.h
@@ -115,6 +115,7 @@
 #define B53_UC_FLOOD_MASK              0x32
 #define B53_MC_FLOOD_MASK              0x34
 #define B53_IPMC_FLOOD_MASK            0x36
+#define B53_DIS_LEARNING               0x3c
 
 /*
  * Override Ports 0-7 State on devices with xMII interfaces (8 bit)
diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 6dd29bad1609..ca425c15953b 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -172,11 +172,6 @@ static int bcm_sf2_port_setup(struct dsa_switch *ds, int 
port,
        reg &= ~P_TXQ_PSM_VDD(port);
        core_writel(priv, reg, CORE_MEM_PSM_VDD_CTRL);
 
-       /* Enable learning */
-       reg = core_readl(priv, CORE_DIS_LEARN);
-       reg &= ~BIT(port);
-       core_writel(priv, reg, CORE_DIS_LEARN);
-
        /* Enable Broadcom tags for that port if requested */
        if (priv->brcm_tag_mask & BIT(port))
                b53_brcm_hdr_setup(ds, port);
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 08ca9441270d..a352c1704042 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -2048,8 +2048,17 @@ static u64 update_block_group_flags(struct btrfs_fs_info 
*fs_info, u64 flags)
        return flags;
 }
 
-int btrfs_inc_block_group_ro(struct btrfs_block_group_cache *cache)
-
+/*
+ * Mark one block group RO, can be called several times for the same block
+ * group.
+ *
+ * @cache:             the destination block group
+ * @do_chunk_alloc:    whether need to do chunk pre-allocation, this is to
+ *                     ensure we still have some free space after marking this
+ *                     block group RO.
+ */
+int btrfs_inc_block_group_ro(struct btrfs_block_group_cache *cache,
+                            bool do_chunk_alloc)
 {
        struct btrfs_fs_info *fs_info = cache->fs_info;
        struct btrfs_trans_handle *trans;
@@ -2079,25 +2088,29 @@ int btrfs_inc_block_group_ro(struct 
btrfs_block_group_cache *cache)
                goto again;
        }
 
-       /*
-        * if we are changing raid levels, try to allocate a corresponding
-        * block group with the new raid level.
-        */
-       alloc_flags = update_block_group_flags(fs_info, cache->flags);
-       if (alloc_flags != cache->flags) {
-               ret = btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
+       if (do_chunk_alloc) {
                /*
-                * ENOSPC is allowed here, we may have enough space
-                * already allocated at the new raid level to
-                * carry on
+                * If we are changing raid levels, try to allocate a
+                * corresponding block group with the new raid level.
                 */
-               if (ret == -ENOSPC)
-                       ret = 0;
-               if (ret < 0)
-                       goto out;
+               alloc_flags = update_block_group_flags(fs_info, cache->flags);
+               if (alloc_flags != cache->flags) {
+                       ret = btrfs_chunk_alloc(trans, alloc_flags,
+                                               CHUNK_ALLOC_FORCE);
+                       /*
+                        * ENOSPC is allowed here, we may have enough space
+                        * already allocated at the new raid level to carry on
+                        */
+                       if (ret == -ENOSPC)
+                               ret = 0;
+                       if (ret < 0)
+                               goto out;
+               }
        }
 
-       ret = inc_block_group_ro(cache, 0);
+       ret = inc_block_group_ro(cache, !do_chunk_alloc);
+       if (!do_chunk_alloc)
+               goto unlock_out;
        if (!ret)
                goto out;
        alloc_flags = btrfs_get_alloc_profile(fs_info, 
cache->space_info->flags);
@@ -2112,6 +2125,7 @@ int btrfs_inc_block_group_ro(struct 
btrfs_block_group_cache *cache)
                check_system_chunk(trans, alloc_flags);
                mutex_unlock(&fs_info->chunk_mutex);
        }
+unlock_out:
        mutex_unlock(&fs_info->ro_block_group_mutex);
 
        btrfs_end_transaction(trans);
diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h
index c391800388dd..0758e6d52acb 100644
--- a/fs/btrfs/block-group.h
+++ b/fs/btrfs/block-group.h
@@ -205,7 +205,8 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info);
 int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
                           u64 type, u64 chunk_offset, u64 size);
 void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans);
-int btrfs_inc_block_group_ro(struct btrfs_block_group_cache *cache);
+int btrfs_inc_block_group_ro(struct btrfs_block_group_cache *cache,
+                            bool do_chunk_alloc);
 void btrfs_dec_block_group_ro(struct btrfs_block_group_cache *cache);
 int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans);
 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 05b3e27b21d4..68b5d7c4aa49 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -4428,7 +4428,7 @@ int btrfs_relocate_block_group(struct btrfs_fs_info 
*fs_info, u64 group_start)
        rc->extent_root = extent_root;
        rc->block_group = bg;
 
-       ret = btrfs_inc_block_group_ro(rc->block_group);
+       ret = btrfs_inc_block_group_ro(rc->block_group, true);
        if (ret) {
                err = ret;
                goto out;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 93d7cb56e44b..e5db948daa12 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -3560,7 +3560,26 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
                 * -> btrfs_scrub_pause()
                 */
                scrub_pause_on(fs_info);
-               ret = btrfs_inc_block_group_ro(cache);
+
+               /*
+                * Don't do chunk preallocation for scrub.
+                *
+                * This is especially important for SYSTEM bgs, or we can hit
+                * -EFBIG from btrfs_finish_chunk_alloc() like:
+                * 1. The only SYSTEM bg is marked RO.
+                *    Since SYSTEM bg is small, that's pretty common.
+                * 2. New SYSTEM bg will be allocated
+                *    Due to regular version will allocate new chunk.
+                * 3. New SYSTEM bg is empty and will get cleaned up
+                *    Before cleanup really happens, it's marked RO again.
+                * 4. Empty SYSTEM bg get scrubbed
+                *    We go back to 2.
+                *
+                * This can easily boost the amount of SYSTEM chunks if cleaner
+                * thread can't be triggered fast enough, and use up all space
+                * of btrfs_super_block::sys_chunk_array
+                */
+               ret = btrfs_inc_block_group_ro(cache, false);
                if (!ret && sctx->is_dev_replace) {
                        /*
                         * If we are doing a device replace wait for any tasks
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index e3688312e9f1..43bacf0a6bd3 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -791,6 +791,7 @@ static inline u64 fuse_get_attr_version(struct fuse_conn 
*fc)
 
 static inline void fuse_make_bad(struct inode *inode)
 {
+       remove_inode_hash(inode);
        set_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state);
 }
 
diff --git a/include/crypto/cast6.h b/include/crypto/cast6.h
index c71f6ef47f0f..4c8d0c72f78d 100644
--- a/include/crypto/cast6.h
+++ b/include/crypto/cast6.h
@@ -19,7 +19,7 @@ int __cast6_setkey(struct cast6_ctx *ctx, const u8 *key,
                   unsigned int keylen, u32 *flags);
 int cast6_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen);
 
-void __cast6_encrypt(struct cast6_ctx *ctx, u8 *dst, const u8 *src);
-void __cast6_decrypt(struct cast6_ctx *ctx, u8 *dst, const u8 *src);
+void __cast6_encrypt(const void *ctx, u8 *dst, const u8 *src);
+void __cast6_decrypt(const void *ctx, u8 *dst, const u8 *src);
 
 #endif
diff --git a/include/crypto/serpent.h b/include/crypto/serpent.h
index 7dd780c5d058..75c7eaa20853 100644
--- a/include/crypto/serpent.h
+++ b/include/crypto/serpent.h
@@ -22,7 +22,7 @@ int __serpent_setkey(struct serpent_ctx *ctx, const u8 *key,
                     unsigned int keylen);
 int serpent_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen);
 
-void __serpent_encrypt(struct serpent_ctx *ctx, u8 *dst, const u8 *src);
-void __serpent_decrypt(struct serpent_ctx *ctx, u8 *dst, const u8 *src);
+void __serpent_encrypt(const void *ctx, u8 *dst, const u8 *src);
+void __serpent_decrypt(const void *ctx, u8 *dst, const u8 *src);
 
 #endif
diff --git a/include/crypto/xts.h b/include/crypto/xts.h
index 75fd96ff976b..15ae7fdc0478 100644
--- a/include/crypto/xts.h
+++ b/include/crypto/xts.h
@@ -8,8 +8,6 @@
 
 #define XTS_BLOCK_SIZE 16
 
-#define XTS_TWEAK_CAST(x) ((void (*)(void *, u8*, const u8*))(x))
-
 static inline int xts_check_key(struct crypto_tfm *tfm,
                                const u8 *key, unsigned int keylen)
 {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index e6a43c0fdee8..ab2a4b7dfca5 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4268,10 +4268,14 @@ static int retrieve_ptr_limit(const struct 
bpf_reg_state *ptr_reg,
 {
        bool mask_to_left = (opcode == BPF_ADD &&  off_is_neg) ||
                            (opcode == BPF_SUB && !off_is_neg);
-       u32 off;
+       u32 off, max;
 
        switch (ptr_reg->type) {
        case PTR_TO_STACK:
+               /* Offset 0 is out-of-bounds, but acceptable start for the
+                * left direction, see BPF_REG_FP.
+                */
+               max = MAX_BPF_STACK + mask_to_left;
                /* Indirect variable offset stack access is prohibited in
                 * unprivileged mode so it's not handled here.
                 */
@@ -4279,16 +4283,17 @@ static int retrieve_ptr_limit(const struct 
bpf_reg_state *ptr_reg,
                if (mask_to_left)
                        *ptr_limit = MAX_BPF_STACK + off;
                else
-                       *ptr_limit = -off;
-               return 0;
+                       *ptr_limit = -off - 1;
+               return *ptr_limit >= max ? -ERANGE : 0;
        case PTR_TO_MAP_VALUE:
+               max = ptr_reg->map_ptr->value_size;
                if (mask_to_left) {
                        *ptr_limit = ptr_reg->umax_value + ptr_reg->off;
                } else {
                        off = ptr_reg->smin_value + ptr_reg->off;
-                       *ptr_limit = ptr_reg->map_ptr->value_size - off;
+                       *ptr_limit = ptr_reg->map_ptr->value_size - off - 1;
                }
-               return 0;
+               return *ptr_limit >= max ? -ERANGE : 0;
        default:
                return -EINVAL;
        }
@@ -4341,6 +4346,7 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env,
        u32 alu_state, alu_limit;
        struct bpf_reg_state tmp;
        bool ret;
+       int err;
 
        if (can_skip_alu_sanitation(env, insn))
                return 0;
@@ -4356,10 +4362,13 @@ static int sanitize_ptr_alu(struct bpf_verifier_env 
*env,
        alu_state |= ptr_is_dst_reg ?
                     BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
 
-       if (retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg))
-               return 0;
-       if (update_alu_sanitation_state(aux, alu_state, alu_limit))
-               return -EACCES;
+       err = retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg);
+       if (err < 0)
+               return err;
+
+       err = update_alu_sanitation_state(aux, alu_state, alu_limit);
+       if (err < 0)
+               return err;
 do_sim:
        /* Simulate and find potential out-of-bounds access under
         * speculative execution from truncation as a result of
@@ -4467,7 +4476,7 @@ static int adjust_ptr_min_max_vals(struct 
bpf_verifier_env *env,
        case BPF_ADD:
                ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 
0);
                if (ret < 0) {
-                       verbose(env, "R%d tried to add from different maps or 
paths\n", dst);
+                       verbose(env, "R%d tried to add from different maps, 
paths, or prohibited types\n", dst);
                        return ret;
                }
                /* We can take a fixed offset as long as it doesn't overflow
@@ -4522,7 +4531,7 @@ static int adjust_ptr_min_max_vals(struct 
bpf_verifier_env *env,
        case BPF_SUB:
                ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 
0);
                if (ret < 0) {
-                       verbose(env, "R%d tried to sub from different maps or 
paths\n", dst);
+                       verbose(env, "R%d tried to sub from different maps, 
paths, or prohibited types\n", dst);
                        return ret;
                }
                if (dst_reg == off_reg) {
@@ -9077,7 +9086,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
                        off_reg = issrc ? insn->src_reg : insn->dst_reg;
                        if (isneg)
                                *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
-                       *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit - 
1);
+                       *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
                        *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
                        *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
                        *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c
index d6619edd53e5..edc505e07125 100644
--- a/net/dsa/tag_mtk.c
+++ b/net/dsa/tag_mtk.c
@@ -13,6 +13,7 @@
 #define MTK_HDR_LEN            4
 #define MTK_HDR_XMIT_UNTAGGED          0
 #define MTK_HDR_XMIT_TAGGED_TPID_8100  1
+#define MTK_HDR_XMIT_TAGGED_TPID_88A8  2
 #define MTK_HDR_RECV_SOURCE_PORT_MASK  GENMASK(2, 0)
 #define MTK_HDR_XMIT_DP_BIT_MASK       GENMASK(5, 0)
 #define MTK_HDR_XMIT_SA_DIS            BIT(6)
@@ -21,8 +22,8 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
                                    struct net_device *dev)
 {
        struct dsa_port *dp = dsa_slave_to_port(dev);
+       u8 xmit_tpid;
        u8 *mtk_tag;
-       bool is_vlan_skb = true;
        unsigned char *dest = eth_hdr(skb)->h_dest;
        bool is_multicast_skb = is_multicast_ether_addr(dest) &&
                                !is_broadcast_ether_addr(dest);
@@ -33,13 +34,20 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
         * the both special and VLAN tag at the same time and then look up VLAN
         * table with VID.
         */
-       if (!skb_vlan_tagged(skb)) {
+       switch (skb->protocol) {
+       case htons(ETH_P_8021Q):
+               xmit_tpid = MTK_HDR_XMIT_TAGGED_TPID_8100;
+               break;
+       case htons(ETH_P_8021AD):
+               xmit_tpid = MTK_HDR_XMIT_TAGGED_TPID_88A8;
+               break;
+       default:
                if (skb_cow_head(skb, MTK_HDR_LEN) < 0)
                        return NULL;
 
+               xmit_tpid = MTK_HDR_XMIT_UNTAGGED;
                skb_push(skb, MTK_HDR_LEN);
                memmove(skb->data, skb->data + MTK_HDR_LEN, 2 * ETH_ALEN);
-               is_vlan_skb = false;
        }
 
        mtk_tag = skb->data + 2 * ETH_ALEN;
@@ -47,8 +55,7 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
        /* Mark tag attribute on special tag insertion to notify hardware
         * whether that's a combined special tag with 802.1Q header.
         */
-       mtk_tag[0] = is_vlan_skb ? MTK_HDR_XMIT_TAGGED_TPID_8100 :
-                    MTK_HDR_XMIT_UNTAGGED;
+       mtk_tag[0] = xmit_tpid;
        mtk_tag[1] = (1 << dp->index) & MTK_HDR_XMIT_DP_BIT_MASK;
 
        /* Disable SA learning for multicast frames */
@@ -56,7 +63,7 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
                mtk_tag[1] |= MTK_HDR_XMIT_SA_DIS;
 
        /* Tag control information is kept for 802.1Q */
-       if (!is_vlan_skb) {
+       if (xmit_tpid == MTK_HDR_XMIT_UNTAGGED) {
                mtk_tag[2] = 0;
                mtk_tag[3] = 0;
        }
diff --git a/tools/testing/selftests/bpf/verifier/bounds_deduction.c 
b/tools/testing/selftests/bpf/verifier/bounds_deduction.c
index 1fd07a4f27ac..c162498a64fc 100644
--- a/tools/testing/selftests/bpf/verifier/bounds_deduction.c
+++ b/tools/testing/selftests/bpf/verifier/bounds_deduction.c
@@ -6,8 +6,9 @@
                BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
                BPF_EXIT_INSN(),
        },
-       .result = REJECT,
+       .errstr_unpriv = "R0 tried to sub from different maps, paths, or 
prohibited types",
        .errstr = "R0 tried to subtract pointer from scalar",
+       .result = REJECT,
 },
 {
        "check deducing bounds from const, 2",
@@ -20,6 +21,8 @@
                BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0),
                BPF_EXIT_INSN(),
        },
+       .errstr_unpriv = "R1 tried to sub from different maps, paths, or 
prohibited types",
+       .result_unpriv = REJECT,
        .result = ACCEPT,
        .retval = 1,
 },
@@ -31,8 +34,9 @@
                BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
                BPF_EXIT_INSN(),
        },
-       .result = REJECT,
+       .errstr_unpriv = "R0 tried to sub from different maps, paths, or 
prohibited types",
        .errstr = "R0 tried to subtract pointer from scalar",
+       .result = REJECT,
 },
 {
        "check deducing bounds from const, 4",
@@ -45,6 +49,8 @@
                BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0),
                BPF_EXIT_INSN(),
        },
+       .errstr_unpriv = "R1 tried to sub from different maps, paths, or 
prohibited types",
+       .result_unpriv = REJECT,
        .result = ACCEPT,
 },
 {
@@ -55,8 +61,9 @@
                BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
                BPF_EXIT_INSN(),
        },
-       .result = REJECT,
+       .errstr_unpriv = "R0 tried to sub from different maps, paths, or 
prohibited types",
        .errstr = "R0 tried to subtract pointer from scalar",
+       .result = REJECT,
 },
 {
        "check deducing bounds from const, 6",
@@ -67,8 +74,9 @@
                BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
                BPF_EXIT_INSN(),
        },
-       .result = REJECT,
+       .errstr_unpriv = "R0 tried to sub from different maps, paths, or 
prohibited types",
        .errstr = "R0 tried to subtract pointer from scalar",
+       .result = REJECT,
 },
 {
        "check deducing bounds from const, 7",
@@ -80,8 +88,9 @@
                            offsetof(struct __sk_buff, mark)),
                BPF_EXIT_INSN(),
        },
-       .result = REJECT,
+       .errstr_unpriv = "R1 tried to sub from different maps, paths, or 
prohibited types",
        .errstr = "dereference of modified ctx ptr",
+       .result = REJECT,
        .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
@@ -94,8 +103,9 @@
                            offsetof(struct __sk_buff, mark)),
                BPF_EXIT_INSN(),
        },
-       .result = REJECT,
+       .errstr_unpriv = "R1 tried to add from different maps, paths, or 
prohibited types",
        .errstr = "dereference of modified ctx ptr",
+       .result = REJECT,
        .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
@@ -106,8 +116,9 @@
                BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
                BPF_EXIT_INSN(),
        },
-       .result = REJECT,
+       .errstr_unpriv = "R0 tried to sub from different maps, paths, or 
prohibited types",
        .errstr = "R0 tried to subtract pointer from scalar",
+       .result = REJECT,
 },
 {
        "check deducing bounds from const, 10",
@@ -119,6 +130,6 @@
                BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
                BPF_EXIT_INSN(),
        },
-       .result = REJECT,
        .errstr = "math between ctx pointer and register with unbounded min 
value is not allowed",
+       .result = REJECT,
 },
diff --git a/tools/testing/selftests/bpf/verifier/unpriv.c 
b/tools/testing/selftests/bpf/verifier/unpriv.c
index 91bb77c24a2e..0d621c841db1 100644
--- a/tools/testing/selftests/bpf/verifier/unpriv.c
+++ b/tools/testing/selftests/bpf/verifier/unpriv.c
@@ -495,7 +495,7 @@
        .result = ACCEPT,
 },
 {
-       "unpriv: adding of fp",
+       "unpriv: adding of fp, reg",
        .insns = {
        BPF_MOV64_IMM(BPF_REG_0, 0),
        BPF_MOV64_IMM(BPF_REG_1, 0),
@@ -503,6 +503,19 @@
        BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, -8),
        BPF_EXIT_INSN(),
        },
+       .errstr_unpriv = "R1 tried to add from different maps, paths, or 
prohibited types",
+       .result_unpriv = REJECT,
+       .result = ACCEPT,
+},
+{
+       "unpriv: adding of fp, imm",
+       .insns = {
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0),
+       BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, -8),
+       BPF_EXIT_INSN(),
+       },
        .errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
        .result_unpriv = REJECT,
        .result = ACCEPT,
diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c 
b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
index a53d99cebd9f..00b59d5d7a7f 100644
--- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
+++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
@@ -169,7 +169,7 @@
        .fixup_map_array_48b = { 1 },
        .result = ACCEPT,
        .result_unpriv = REJECT,
-       .errstr_unpriv = "R2 tried to add from different maps or paths",
+       .errstr_unpriv = "R2 tried to add from different maps, paths, or 
prohibited types",
        .retval = 0,
 },
 {
@@ -516,6 +516,27 @@
        .result = ACCEPT,
        .retval = 0xabcdef12,
 },
+{
+       "map access: value_ptr += N, value_ptr -= N known scalar",
+       .insns = {
+       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+       BPF_LD_MAP_FD(BPF_REG_1, 0),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+       BPF_MOV32_IMM(BPF_REG_1, 0x12345678),
+       BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2),
+       BPF_MOV64_IMM(BPF_REG_1, 2),
+       BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+       BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .fixup_map_array_48b = { 3 },
+       .result = ACCEPT,
+       .retval = 0x12345678,
+},
 {
        "map access: unknown scalar += value_ptr, 1",
        .insns = {
  • Linux 5.4.107 Greg Kroah-Hartman
    • Re: Linux 5.4.107 Greg Kroah-Hartman

Reply via email to