Re: [PATCH v3 7/8] execmem: add support for cache of large ROX pages

2024-09-13 Thread Ard Biesheuvel
Hi Mike,

On Mon, 9 Sept 2024 at 08:51, Mike Rapoport  wrote:
>
> From: "Mike Rapoport (Microsoft)" 
>
> Using large pages to map text areas reduces iTLB pressure and improves
> performance.
>
> Extend execmem_alloc() with an ability to use huge pages with ROX
> permissions as a cache for smaller allocations.
>
> To populate the cache, a writable large page is allocated from vmalloc with
> VM_ALLOW_HUGE_VMAP, filled with invalid instructions and then remapped as
> ROX.
>
> Portions of that large page are handed out to execmem_alloc() callers
> without any changes to the permissions.
>
> When the memory is freed with execmem_free() it is invalidated again so
> that it won't contain stale instructions.
>
> The cache is enabled when an architecture sets EXECMEM_ROX_CACHE flag in
> definition of an execmem_range.
>
> Signed-off-by: Mike Rapoport (Microsoft) 
> ---
>  include/linux/execmem.h |   2 +
>  mm/execmem.c| 289 +++-
>  2 files changed, 286 insertions(+), 5 deletions(-)
>
> diff --git a/include/linux/execmem.h b/include/linux/execmem.h
> index dfdf19f8a5e8..7436aa547818 100644
> --- a/include/linux/execmem.h
> +++ b/include/linux/execmem.h
> @@ -77,12 +77,14 @@ struct execmem_range {
>
>  /**
>   * struct execmem_info - architecture parameters for code allocations
> + * @fill_trapping_insns: set memory to contain instructions that will trap
>   * @ranges: array of parameter sets defining architecture specific
>   * parameters for executable memory allocations. The ranges that are not
>   * explicitly initialized by an architecture use parameters defined for
>   * @EXECMEM_DEFAULT.
>   */
>  struct execmem_info {
> +   void (*fill_trapping_insns)(void *ptr, size_t size, bool writable);
> struct execmem_rangeranges[EXECMEM_TYPE_MAX];
>  };
>
> diff --git a/mm/execmem.c b/mm/execmem.c
> index 0f6691e9ffe6..f547c1f3c93d 100644
> --- a/mm/execmem.c
> +++ b/mm/execmem.c
> @@ -7,28 +7,88 @@
>   */
>
>  #include 
> +#include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>
> +#include 
> +
> +#include "internal.h"
> +
>  static struct execmem_info *execmem_info __ro_after_init;
>  static struct execmem_info default_execmem_info __ro_after_init;
>
> -static void *__execmem_alloc(struct execmem_range *range, size_t size)
> +#ifdef CONFIG_MMU
> +struct execmem_cache {
> +   struct mutex mutex;
> +   struct maple_tree busy_areas;
> +   struct maple_tree free_areas;
> +};
> +
> +static struct execmem_cache execmem_cache = {
> +   .mutex = __MUTEX_INITIALIZER(execmem_cache.mutex),
> +   .busy_areas = MTREE_INIT_EXT(busy_areas, MT_FLAGS_LOCK_EXTERN,
> +execmem_cache.mutex),
> +   .free_areas = MTREE_INIT_EXT(free_areas, MT_FLAGS_LOCK_EXTERN,
> +execmem_cache.mutex),
> +};
> +
> +static void execmem_cache_clean(struct work_struct *work)
> +{
> +   struct maple_tree *free_areas = &execmem_cache.free_areas;
> +   struct mutex *mutex = &execmem_cache.mutex;
> +   MA_STATE(mas, free_areas, 0, ULONG_MAX);
> +   void *area;
> +
> +   mutex_lock(mutex);
> +   mas_for_each(&mas, area, ULONG_MAX) {
> +   size_t size;
> +
> +   if (!xa_is_value(area))
> +   continue;
> +
> +   size = xa_to_value(area);
> +
> +   if (IS_ALIGNED(size, PMD_SIZE) &&
> +   IS_ALIGNED(mas.index, PMD_SIZE)) {
> +   void *ptr = (void *)mas.index;
> +
> +   mas_erase(&mas);
> +   vfree(ptr);
> +   }
> +   }
> +   mutex_unlock(mutex);
> +}
> +
> +static DECLARE_WORK(execmem_cache_clean_work, execmem_cache_clean);
> +
> +static void execmem_fill_trapping_insns(void *ptr, size_t size, bool 
> writable)
> +{
> +   if (execmem_info->fill_trapping_insns)
> +   execmem_info->fill_trapping_insns(ptr, size, writable);
> +   else
> +   memset(ptr, 0, size);

Does this really have to be a function pointer with a runtime check?

This could just be a __weak definition, with the arch providing an
override if the memset() is not appropriate.



Re: [PATCH v2 05/17] vdso: Avoid call to memset() by getrandom

2024-08-28 Thread Ard Biesheuvel
On Wed, 28 Aug 2024 at 14:57, Segher Boessenkool
 wrote:
>
> On Wed, Aug 28, 2024 at 12:24:12PM +, Arnd Bergmann wrote:
> > On Wed, Aug 28, 2024, at 11:18, Jason A. Donenfeld wrote:
> > > On Tue, Aug 27, 2024 at 05:53:30PM -0500, Segher Boessenkool wrote:
> > >> On Tue, Aug 27, 2024 at 11:08:19AM -0700, Eric Biggers wrote:
> > >> >
> > >> > Is there a compiler flag that could be used to disable the generation 
> > >> > of calls
> > >> > to memset?
> > >>
> > >> -fno-tree-loop-distribute-patterns .  But, as always, read up on it, see
> > >> what it actually does (and how it avoids your problem, and mostly: learn
> > >> what the actual problem *was*!)
> > >
> > > This might help with various loops, but it doesn't help with the matter
> > > that this patch fixes, which is struct initialization. I just tried it
> > > with the arm64 patch to no avail.
> >
> > Maybe -ffreestanding can help here? That should cause the vdso to be built
> > with the assumption that there is no libc, so it would neither add nor
> > remove standard library calls. Not sure if that causes other problems,
> > e.g. if the calling conventions are different.
>
> "GCC requires the freestanding
> environment provide 'memcpy', 'memmove', 'memset' and 'memcmp'."
>
> This is precisely to implement things like struct initialisation.  Maybe
> we should have a "-ffreeerstanding" or "-ffreefloating" or think of
> something funnier still environment as well, this problem has been there
> since the -ffreestanding flag has existed, but the problem is as old as
> the night.
>
> -fno-builtin might help a bit more, but just attack the problem at
> its root, like I suggested?
>

In my experience, this is likely to do the opposite: it causes the
compiler to 'forget' the semantics of memcpy() and memset(), so that
explicit trivial calls will no longer be elided and replaced with
plain loads and stores (as it can no longer guarantee the equivalence)

> (This isn't a new problem, originally it showed up as "GCC replaces
> (part of) my memcpy() implementation by a (recursive) call to memcpy()"
> and, well, that doesn't quite work!)
>

This needs to be fixed for Clang as well, so throwing GCC specific
flags at it will at best be a partial solution.

Omitting the struct assignment is a reasonable way to reduce the
likelihood that a memset() will be emitted, so for this patch

Acked-by: Ard Biesheuvel 

It is not a complete solution, unfortunately, and I guess there may be
other situations (compiler/arch combinations) where this might pop up
again.



Re: [PATCH v4 13/15] drm/amd/display: Use ARCH_HAS_KERNEL_FPU_SUPPORT

2024-04-11 Thread Ard Biesheuvel
(cc Arnd)

On Thu, 11 Apr 2024 at 03:11, Samuel Holland  wrote:
>
> Hi Thiago,
>
> On 2024-04-10 8:02 PM, Thiago Jung Bauermann wrote:
> > Samuel Holland  writes:
> >> On 2024-04-10 5:21 PM, Thiago Jung Bauermann wrote:
> >>>
> >>> Unfortunately this patch causes build failures on arm with allyesconfig
> >>> and allmodconfig. Tested with next-20240410.
> >
> > 
> >
> >> In both cases, the issue is that the toolchain requires runtime support to
> >> convert between `unsigned long long` and `double`, even when hardware FP is
> >> enabled. There was some past discussion about GCC inlining some of these
> >> conversions[1], but that did not get implemented.
> >
> > Thank you for the explanation and the bugzilla reference. I added a
> > comment there mentioning that the problem came up again with this patch
> > series.
> >
> >> The short-term fix would be to drop the `select 
> >> ARCH_HAS_KERNEL_FPU_SUPPORT` for
> >> 32-bit arm until we can provide these runtime library functions.
> >
> > Does this mean that patch 2 in this series:
> >
> > [PATCH v4 02/15] ARM: Implement ARCH_HAS_KERNEL_FPU_SUPPORT
> >
> > will be dropped?
>
> No, because later patches in the series (3, 6) depend on the definition of
> CC_FLAGS_FPU from that patch. I will need to send a fixup patch unless I can
> find a GPL-2 compatible implementation of the runtime library functions.
>

Is there really a point to doing that? Do 32-bit ARM systems even have
enough address space to the map the BARs of the AMD GPUs that need
this support?

Given that this was not enabled before, I don't think the upshot of
this series should be that we enable support for something on 32-bit
ARM that may cause headaches down the road without any benefit.

So I'd prefer a fixup patch that opts ARM out of this over adding
support code for 64-bit conversions.


Re: [PATCH 0/2] Deduplicate bin_attribute simple read() callbacks

2024-04-08 Thread Ard Biesheuvel
On Sat, 6 Apr 2024 at 15:52, Lukas Wunner  wrote:
>
> For my upcoming PCI device authentication v2 patches, I have the need
> to expose a simple buffer in virtual memory as a bin_attribute.
>
> It turns out we've duplicated the ->read() callback for such simple
> buffers a fair number of times across the tree.
>
> So instead of reinventing the wheel, I decided to introduce a common
> helper and eliminate all duplications I could find.
>
> I'm open to a bikeshedding discussion on the sysfs_bin_attr_simple_read()
> name. ;)
>
> Lukas Wunner (2):
>   sysfs: Add sysfs_bin_attr_simple_read() helper
>   treewide: Use sysfs_bin_attr_simple_read() helper
>

Acked-by: Ard Biesheuvel 

>  arch/powerpc/platforms/powernv/opal.c  | 10 +---
>  drivers/acpi/bgrt.c|  9 +---
>  drivers/firmware/dmi_scan.c| 12 ++
>  drivers/firmware/efi/rci2-table.c  | 10 +---
>  drivers/gpu/drm/i915/gvt/firmware.c| 26 +
>  .../intel/int340x_thermal/int3400_thermal.c|  9 +---
>  fs/sysfs/file.c| 27 
> ++
>  include/linux/sysfs.h  | 15 
>  init/initramfs.c   | 10 +---
>  kernel/module/sysfs.c  | 13 +--
>  10 files changed, 56 insertions(+), 85 deletions(-)
>
> --
> 2.43.0
>


Re: [PATCH v5 19/25] arm64/mm: Wire up PTE_CONT for user mappings

2024-02-13 Thread Ard Biesheuvel
On Tue, 13 Feb 2024 at 15:05, David Hildenbrand  wrote:
>
> On 13.02.24 15:02, Ryan Roberts wrote:
> > On 13/02/2024 13:45, David Hildenbrand wrote:
> >> On 13.02.24 14:33, Ard Biesheuvel wrote:
> >>> On Tue, 13 Feb 2024 at 14:21, Ryan Roberts  wrote:
> >>>>
> >>>> On 13/02/2024 13:13, David Hildenbrand wrote:
...
> >>>>> Just a thought, you could have a is_efi_mm() function that abstracts 
> >>>>> all that.
> >>>>>
> >>>>> diff --git a/include/linux/efi.h b/include/linux/efi.h
> >>>>> index c74f47711f0b..152f5fa66a2a 100644
> >>>>> --- a/include/linux/efi.h
> >>>>> +++ b/include/linux/efi.h
> >>>>> @@ -692,6 +692,15 @@ extern struct efi {
> >>>>>
> >>>>>extern struct mm_struct efi_mm;
> >>>>>
> >>>>> +static inline void is_efi_mm(struct mm_struct *mm)
> >>>>> +{
> >>>>> +#ifdef CONFIG_EFI
> >>>>> +   return mm == &efi_mm;
> >>>>> +#else
> >>>>> +   return false;
> >>>>> +#endif
> >>>>> +}
> >>>>> +
> >>>>>static inline int
> >>>>>efi_guidcmp (efi_guid_t left, efi_guid_t right)
> >>>>>{
> >>>>>
> >>>>>
> >>>>
> >>>> That would definitely work, but in that case, I might as well just check 
> >>>> for it
> >>>> in mm_is_user() (and personally I would change the name to mm_is_efi()):
> >>>>
> >>>>
> >>>> static inline bool mm_is_user(struct mm_struct *mm)
> >>>> {
> >>>>   return mm != &init_mm && !mm_is_efi(mm);
> >>>> }
> >>>>
> >>>> Any objections?
> >>>>
> >>>
> >>> Any reason not to use IS_ENABLED(CONFIG_EFI) in the above? The extern
> >>> declaration is visible to the compiler, and any references should
> >>> disappear before the linker could notice that efi_mm does not exist.
> >>>
> >>
> >> Sure, as long as the linker is happy why not. I'll let Ryan mess with that 
> >> :)
> >
> > I'm not sure if you are suggesting dropping the mm_is_efi() helper and just 
> > use
> > IS_ENABLED(CONFIG_EFI) in mm_is_user() to guard efi_mm, or if you are 
> > suggesting
> > using IS_ENABLED(CONFIG_EFI) in mm_is_efi() instead of the ifdefery?
> >
> > The former was what I did initially; It works great, but I didn't like that 
> > I
> > was introducing a new code dependecy between efi and arm64 (nothing else 
> > outside
> > of efi references efi_mm).
> >
> > So then concluded that it is safe to not worry about efi_mm (thanks for your
> > confirmation). But then David wanted a VM_WARN check, which reintroduces the
> > code dependency. So he suggested the mm_is_efi() helper to hide that... 
> > This is
> > all starting to feel circular...
>
> I think Ard meant that inside mm_is_efi(), we could avoid the #ifdef and
> simply use IS_ENABLED().
>

Yes.

static inline void mm_is_efi(struct mm_struct *mm)
{
return IS_ENABLED(CONFIG_EFI) && mm == &efi_mm;
}


Re: [PATCH v5 19/25] arm64/mm: Wire up PTE_CONT for user mappings

2024-02-13 Thread Ard Biesheuvel
On Tue, 13 Feb 2024 at 14:21, Ryan Roberts  wrote:
>
> On 13/02/2024 13:13, David Hildenbrand wrote:
> > On 13.02.24 14:06, Ryan Roberts wrote:
> >> On 13/02/2024 12:19, David Hildenbrand wrote:
> >>> On 13.02.24 13:06, Ryan Roberts wrote:
> >>>> On 12/02/2024 20:38, Ryan Roberts wrote:
> >>>>> [...]
> >>>>>
> >>>>>>>>> +static inline bool mm_is_user(struct mm_struct *mm)
> >>>>>>>>> +{
> >>>>>>>>> +/*
> >>>>>>>>> + * Don't attempt to apply the contig bit to kernel mappings, 
> >>>>>>>>> because
> >>>>>>>>> + * dynamically adding/removing the contig bit can cause page 
> >>>>>>>>> faults.
> >>>>>>>>> + * These racing faults are ok for user space, since they get
> >>>>>>>>> serialized
> >>>>>>>>> + * on the PTL. But kernel mappings can't tolerate faults.
> >>>>>>>>> + */
> >>>>>>>>> +return mm != &init_mm;
> >>>>>>>>> +}
> >>>>>>>>
> >>>>>>>> We also have the efi_mm as a non-user mm, though I don't think we
> >>>>>>>> manipulate
> >>>>>>>> that while it is live, and I'm not sure if that needs any special 
> >>>>>>>> handling.
> >>>>>>>
> >>>>>>> Well we never need this function in the hot (order-0 folio) path, so I
> >>>>>>> think I
> >>>>>>> could add a check for efi_mm here with performance implication. It's
> >>>>>>> probably
> >>>>>>> safest to explicitly exclude it? What do you think?
> >>>>>>
> >>>>>> Oops: This should have read "I think I could add a check for efi_mm 
> >>>>>> here
> >>>>>> *without* performance implication"
> >>>>>
> >>>>> It turns out that efi_mm is only defined when CONFIG_EFI is enabled. I 
> >>>>> can do
> >>>>> this:
> >>>>>
> >>>>> return mm != &init_mm && (!IS_ENABLED(CONFIG_EFI) || mm != &efi_mm);
> >>>>>
> >>>>> Is that acceptable? This is my preference, but nothing else outside of 
> >>>>> efi
> >>>>> references this symbol currently.
> >>>>>
> >>>>> Or perhaps I can convince myself that its safe to treat efi_mm like 
> >>>>> userspace.
> >>>>> There are a couple of things that need to be garanteed for it to be 
> >>>>> safe:
> >>>>>
> >>>>> - The PFNs of present ptes either need to have an associated struct
> >>>>> page or
> >>>>>   need to have the PTE_SPECIAL bit set (either pte_mkspecial() or
> >>>>>   pte_mkdevmap())
> >>>>>
> >>>>> - Live mappings must either be static (no changes that could cause
> >>>>> fold/unfold
> >>>>>   while live) or the system must be able to tolerate a temporary 
> >>>>> fault
> >>>>>
> >>>>> Mark suggests efi_mm is not manipulated while live, so that meets the 
> >>>>> latter
> >>>>> requirement, but I'm not sure about the former?
> >>>>
> >>>> I've gone through all the efi code, and conclude that, as Mark suggests, 
> >>>> the
> >>>> mappings are indeed static. And additionally, the ptes are populated 
> >>>> using only
> >>>> the _private_ ptep API, so there is no issue here. As just discussed 
> >>>> with Mark,
> >>>> my prefereence is to not make any changes to code, and just add a comment
> >>>> describing why efi_mm is safe.
> >>>>
> >>>> Details:
> >>>>
> >>>> * Registered with ptdump
> >>>>   * ptep_get_lockless()
> >>>> * efi_create_mapping -> create_pgd_mapping … -> init_pte:
> >>>>   * __ptep_get()
> >>>>   * __set_pte()
> >>>> * efi_memattr_apply_permissions -> efi_set_mapping_permissions … ->

Re: [PATCH 5/8] drivers: firmware: efi: libstub: enable generic commandline

2023-12-12 Thread Ard Biesheuvel
On Fri, 10 Nov 2023 at 02:39, Daniel Walker  wrote:
>
> This adds code to handle the generic command line changes.
> The efi code appears that it doesn't benefit as much from this design
> as it could.
>
> For example, if you had a prepend command line with "nokaslr" then
> you might be helpful to re-enable it in the boot loader or dts,
> but there appears to be no way to re-enable kaslr or some of the
> other options.
>
> The efi command line handling is incorrect. x86 and arm have an append
> system however the efi code prepends the command line.
>
> For example, you could have a non-upgradable bios which sends
>
> efi=disable_early_pci_dma
>
> This hypothetically could have been set because early pci dma caused
> issues on early versions of the product.
>
> Then later the early pci dma was made to work and the company desired
> to start using it. To override the bios you could set the CONFIG_CMDLINE
> to,
>
> efi=no_disable_early_pci_dma
>
> then parsing would normally start with the bios command line, then move
> to the CONFIG_CMDLINE and you would end up with early pci dma turned on.
>
> however, current efi code keeps early pci dma off because the bios
> arguments always override the built in.
>
> Per my reading this is different from the main body of x86, arm, and
> arm64.
>
> The generic command line provides both append and prepend, so it
> alleviates this issue if it's used. However not all architectures use
> it.
>
> It would be desirable to allow the efi stub to have it's builtin command
> line to be modified after compile, but I don't see a feasible way to do
> that currently.
>
> Cc: xe-linux-exter...@cisco.com
> Signed-off-by: Daniel Walker 

There are quite some 'might be's and 'hypothetical's in this commit log.

Is there an actual use case that you are addressing here? Without
that, this looks like unnecessary churn to me, tbh.

Note that this code executes in the context of platform firmware,
which may be old and buggy so we should be cautious about making
unnecessary changes here.


> ---
>  .../firmware/efi/libstub/efi-stub-helper.c| 29 +++
>  drivers/firmware/efi/libstub/efi-stub.c   |  9 ++
>  drivers/firmware/efi/libstub/efistub.h|  1 +
>  drivers/firmware/efi/libstub/x86-stub.c   | 14 +++--
>  4 files changed, 51 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c 
> b/drivers/firmware/efi/libstub/efi-stub-helper.c
> index bfa30625f5d0..952fa2cdff51 100644
> --- a/drivers/firmware/efi/libstub/efi-stub-helper.c
> +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c
> @@ -11,6 +11,7 @@
>
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>
> @@ -29,6 +30,34 @@ bool __pure __efi_soft_reserve_enabled(void)
> return !efi_nosoftreserve;
>  }
>
> +/**
> + * efi_handle_cmdline() - handle adding in built-in parts of the command line
> + * @cmdline:   kernel command line
> + *
> + * Add in the generic parts of the commandline and start the parsing of the
> + * command line.
> + *
> + * Return: status code
> + */
> +efi_status_t efi_handle_builtin_cmdline(char const *cmdline)
> +{
> +   efi_status_t status = EFI_SUCCESS;
> +
> +   if (sizeof(CMDLINE_STATIC_PREPEND) > 1)
> +   status |= efi_parse_options(CMDLINE_STATIC_PREPEND);
> +
> +   if (!IS_ENABLED(CONFIG_CMDLINE_OVERRIDE))
> +   status |= efi_parse_options(cmdline);
> +
> +   if (sizeof(CMDLINE_STATIC_APPEND) > 1)
> +   status |= efi_parse_options(CMDLINE_STATIC_APPEND);
> +
> +   if (status != EFI_SUCCESS)
> +   efi_err("Failed to parse options\n");
> +
> +   return status;
> +}
> +
>  /**
>   * efi_parse_options() - Parse EFI command line options
>   * @cmdline:   kernel command line
> diff --git a/drivers/firmware/efi/libstub/efi-stub.c 
> b/drivers/firmware/efi/libstub/efi-stub.c
> index f9c1e8a2bd1d..770abe95c0ee 100644
> --- a/drivers/firmware/efi/libstub/efi-stub.c
> +++ b/drivers/firmware/efi/libstub/efi-stub.c
> @@ -127,6 +127,14 @@ efi_status_t efi_handle_cmdline(efi_loaded_image_t 
> *image, char **cmdline_ptr)
> return EFI_OUT_OF_RESOURCES;
> }
>
> +#ifdef CONFIG_GENERIC_CMDLINE
> +   status = efi_handle_builtin_cmdline(cmdline);
> +   if (status != EFI_SUCCESS) {
> +   goto fail_free_cmdline;
> +   }
> +#endif
> +
> +#ifdef CONFIG_CMDLINE
> if (IS_ENABLED(CONFIG_CMDLINE_EXTEND) ||
> IS_ENABLED(CONFIG_CMDLINE_FORCE) ||
> cmdline_size == 0) {
> @@ -144,6 +152,7 @@ efi_status_t efi_handle_cmdline(efi_loaded_image_t 
> *image, char **cmdline_ptr)
> goto fail_free_cmdline;
> }
> }
> +#endif
>
> *cmdline_ptr = cmdline;
> return EFI_SUCCESS;
> diff --git a/drivers/firmware/efi/libstub/efistub.h 
> b/drivers/firmware/efi/libstub/efistub.h
> index 212687c30d79..1ac6631905c5 100644
> --- a/drivers/firmware/efi/l

Re: [RFC PATCH 01/21] crypto: scomp - Revert "add support for deflate rfc1950 (zlib)"

2023-08-03 Thread Ard Biesheuvel
Hello Giovanni,

On Thu, 3 Aug 2023 at 11:51, Giovanni Cabiddu
 wrote:
>
> Hi Ard,
>
> On Tue, Jul 18, 2023 at 01:58:27PM +0100, Ard Biesheuvel wrote:
> > This reverts commit a368f43d6e3a001e684e9191a27df384fbff12f5.
> >
> > "zlib-deflate" was introduced 6 years ago, but it does not have any
> > users. So let's remove the generic implementation and the test vectors,
> > but retain the "zlib-deflate" entry in the testmgr code to avoid
> > introducing warning messages on systems that implement zlib-deflate in
> > hardware.
> >
> > Note that RFC 1950 which forms the basis of this algorithm dates back to
> > 1996, and predates RFC 1951, on which the existing IPcomp is based and
> > which we have supported in the kernel since 2003. So it seems rather
> > unlikely that we will ever grow the need to support zlib-deflate.
> >
> > Signed-off-by: Ard Biesheuvel 
> Support for zlib-deflate was added for [1] but that work was not
> completed.
>

Any clue why zlib_deflate was chosen in this case?

/me also notes that this is another occurrence of the antipattern
where we use an asynchronous API and subsequently sleep on the
completion.

> Based on [2], either we leave this SW implementation or we remove the HW
> implementations in the QAT [3] and in the Hisilicon Zip [4] drivers.
>

That would work for me as well - dead code is just busywork.

> [1] 
> https://patchwork.kernel.org/project/linux-btrfs/patch/1467083180-111750-1-git-send-email-weigang...@intel.com/
> [2] https://lore.kernel.org/lkml/ziw%2fjtxdg6o1o...@gondor.apana.org.au/
> [3] 
> https://elixir.bootlin.com/linux/latest/source/drivers/crypto/intel/qat/qat_common/qat_comp_algs.c#L457
> [4] 
> https://elixir.bootlin.com/linux/latest/source/drivers/crypto/hisilicon/zip/zip_crypto.c#L754
>
> Regards,
>
> --
> Giovanni


Re: [RFC PATCH 00/21] crypto: consolidate and clean up compression APIs

2023-07-28 Thread Ard Biesheuvel
On Fri, 28 Jul 2023 at 11:59, Herbert Xu  wrote:
>
> On Fri, Jul 28, 2023 at 11:57:42AM +0200, Ard Biesheuvel wrote:
> >
> > So will IPcomp be able to simply assign those pages to the SKB afterwards?
>
> Yes that is the idea.  The network stack is very much in love with
> SG lists :)
>

Fair enough. But my point remains: this requires a lot of boilerplate
on the part of the driver, and it would be better if we could do this
in the acomp generic layer.

Does the IPcomp case always know the decompressed size upfront?


Re: [RFC PATCH 00/21] crypto: consolidate and clean up compression APIs

2023-07-28 Thread Ard Biesheuvel
On Fri, 28 Jul 2023 at 11:56, Herbert Xu  wrote:
>
> On Tue, Jul 18, 2023 at 02:58:26PM +0200, Ard Biesheuvel wrote:
> >
> > Patch #2 removes the support for on-the-fly allocation of destination
> > buffers and scatterlists from the Intel QAT driver. This is never used,
> > and not even implemented by all drivers (the HiSilicon ZIP driver does
> > not support it). The diffstat of this patch makes a good case why the
> > caller should be in charge of allocating the memory, not the driver.
>
> The implementation in qat may not be optimal, but being able to
> allocate memory in the algorithm is a big plus for IPComp at least.
>
> Being able to allocate memory page by page as you decompress
> means that:
>
> 1. We're not affected by memory fragmentation.
> 2. We don't waste memory by always allocating for the worst case.
>

So will IPcomp be able to simply assign those pages to the SKB afterwards?


Re: [RFC PATCH 20/21] crypto: deflate - implement acomp API directly

2023-07-21 Thread Ard Biesheuvel
On Fri, 21 Jul 2023 at 13:12, Simon Horman  wrote:
>
> On Tue, Jul 18, 2023 at 02:58:46PM +0200, Ard Biesheuvel wrote:
>
> ...
>
> > -static int deflate_comp_init(struct deflate_ctx *ctx)
> > +static int deflate_process(struct acomp_req *req, struct z_stream_s 
> > *stream,
> > +int (*process)(struct z_stream_s *, int))
> >  {
> > - int ret = 0;
> > - struct z_stream_s *stream = &ctx->comp_stream;
> > + unsigned int slen = req->slen;
> > + unsigned int dlen = req->dlen;
> > + struct scatter_walk src, dst;
> > + unsigned int scur, dcur;
> > + int ret;
> >
> > - stream->workspace = vzalloc(zlib_deflate_workspacesize(
> > - -DEFLATE_DEF_WINBITS, DEFLATE_DEF_MEMLEVEL));
> > - if (!stream->workspace) {
> > - ret = -ENOMEM;
> > - goto out;
> > - }
> > + stream->avail_in = stream->avail_out = 0;
> > +
> > + scatterwalk_start(&src, req->src);
> > + scatterwalk_start(&dst, req->dst);
> > +
> > + scur = dcur = 0;
> > +
> > + do {
> > + if (stream->avail_in == 0) {
> > + if (scur) {
> > + slen -= scur;
> > +
> > + scatterwalk_unmap(stream->next_in - scur);
> > + scatterwalk_advance(&src, scur);
> > + scatterwalk_done(&src, 0, slen);
> > + }
> > +
> > + scur = scatterwalk_clamp(&src, slen);
> > + if (scur) {
> > + stream->next_in = scatterwalk_map(&src);
> > + stream->avail_in = scur;
> > + }
> > + }
> > +
> > + if (stream->avail_out == 0) {
> > + if (dcur) {
> > + dlen -= dcur;
> > +
> > + scatterwalk_unmap(stream->next_out - dcur);
> > + scatterwalk_advance(&dst, dcur);
> > + scatterwalk_done(&dst, 1, dlen);
> > + }
> > +
> > + dcur = scatterwalk_clamp(&dst, dlen);
> > + if (!dcur)
> > + break;
>
> Hi Ard,
>
> I'm unsure if this can happen. But if this break occurs in the first
> iteration of this do loop, then ret will be used uninitialised below.
>
> Smatch noticed this.
>

Thanks.

This should not happen - it would mean req->dlen == 0, which is
rejected before this function is even called.

Whether or not it might ever happen in practice is a different matter,
of course, so I should probably initialize 'ret' to something sane.



> > +
> > + stream->next_out = scatterwalk_map(&dst);
> > + stream->avail_out = dcur;
> > + }
> > +
> > + ret = process(stream, (slen == scur) ? Z_FINISH : 
> > Z_SYNC_FLUSH);
> > + } while (ret == Z_OK);
> > +
> > + if (scur)
> > + scatterwalk_unmap(stream->next_in - scur);
> > + if (dcur)
> > + scatterwalk_unmap(stream->next_out - dcur);
> > +
> > + if (ret != Z_STREAM_END)
> > + return -EINVAL;
> > +
> > + req->dlen = stream->total_out;
> > + return 0;
> > +}
>
> ...


Re: [RFC PATCH 05/21] ubifs: Pass worst-case buffer size to compression routines

2023-07-19 Thread Ard Biesheuvel
On Wed, 19 Jul 2023 at 16:23, Zhihao Cheng  wrote:
>
> 在 2023/7/19 16:33, Ard Biesheuvel 写道:
> > On Wed, 19 Jul 2023 at 00:38, Eric Biggers  wrote:
> >>
> >> On Tue, Jul 18, 2023 at 02:58:31PM +0200, Ard Biesheuvel wrote:
> >>> Currently, the ubifs code allocates a worst case buffer size to
> >>> recompress a data node, but does not pass the size of that buffer to the
> >>> compression code. This means that the compression code will never use
>
> I think you mean the 'out_len' which describes the lengh of 'buf' is
> passed into ubifs_decompress, which effects the result of
> decompressor(eg. lz4 uses length to calculate the buffer end pos).
> So, we should pass the real lenghth of 'buf'.
>

Yes, that is what I meant.

But Eric makes a good point, and looking a bit more closely, there is
really no need for the multiplication here: we know the size of the
decompressed data, so we don't need the additional space.

I intend to drop this patch, and replace it with the following:

8<--

Currently, when truncating a data node, a decompression buffer is
allocated that is twice the size of the data node's uncompressed size.
However, the fact that this space is available is not communicated to
the compression routines, as out_len itself is not updated.

The additional space is not needed even in the theoretical worst case
where compression might lead to inadvertent expansion: first of all,
increasing the size of the input buffer does not help mitigate that
issue. And given the truncation of the data node and the fact that the
original data compressed well enough to pass the UBIFS_MIN_COMPRESS_DIFF
test, there is no way on this particular code path that compression
could result in expansion beyond the original decompressed size, and so
no mitigation is necessary to begin with.

So let's just drop WORST_COMPR_FACTOR here.

Signed-off-by: Ard Biesheuvel 

diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index dc52ac0f4a345f30..0b55cbfe0c30505e 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -1489,7 +1489,7 @@ static int truncate_data_node(const struct
ubifs_info *c, const struct inode *in
int err, dlen, compr_type, out_len, data_size;

out_len = le32_to_cpu(dn->size);
-   buf = kmalloc_array(out_len, WORST_COMPR_FACTOR, GFP_NOFS);
+   buf = kmalloc(out_len, GFP_NOFS);
if (!buf)
return -ENOMEM;


Re: [PATCH v2 9/9] efi: move screen_info into efi init code

2023-07-19 Thread Ard Biesheuvel
On Wed, 19 Jul 2023 at 14:41, Arnd Bergmann  wrote:
>
> From: Arnd Bergmann 
>
> After the vga console no longer relies on global screen_info, there are
> only two remaining use cases:
>
>  - on the x86 architecture, it is used for multiple boot methods
>(bzImage, EFI, Xen, kexec) to commicate the initial VGA or framebuffer
>settings to a number of device drivers.
>
>  - on other architectures, it is only used as part of the EFI stub,
>and only for the three sysfb framebuffers (simpledrm, simplefb, efifb).
>
> Remove the duplicate data structure definitions by moving it into the
> efi-init.c file that sets it up initially for the EFI case, leaving x86
> as an exception that retains its own definition for non-EFI boots.
>
> The added #ifdefs here are optional, I added them to further limit the
> reach of screen_info to configurations that have at least one of the
> users enabled.
>
> Signed-off-by: Arnd Bergmann 

Reviewed-by: Ard Biesheuvel 

> ---
>  arch/arm/kernel/setup.c   |  4 
>  arch/arm64/kernel/efi.c   |  4 
>  arch/arm64/kernel/image-vars.h|  2 ++
>  arch/ia64/kernel/setup.c  |  4 
>  arch/loongarch/kernel/efi.c   |  3 ++-
>  arch/loongarch/kernel/image-vars.h|  2 ++
>  arch/loongarch/kernel/setup.c |  5 -
>  arch/riscv/kernel/image-vars.h|  2 ++
>  arch/riscv/kernel/setup.c |  5 -
>  drivers/firmware/efi/efi-init.c   | 14 +-
>  drivers/firmware/efi/libstub/efi-stub-entry.c |  8 +++-
>  11 files changed, 28 insertions(+), 25 deletions(-)
>
> diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
> index 86c2751f56dcf..135b7eff03f72 100644
> --- a/arch/arm/kernel/setup.c
> +++ b/arch/arm/kernel/setup.c
> @@ -939,10 +939,6 @@ static struct screen_info vgacon_screen_info = {
>  };
>  #endif
>
> -#if defined(CONFIG_EFI)
> -struct screen_info screen_info;
> -#endif
> -
>  static int __init customize_machine(void)
>  {
> /*
> diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
> index 3afbe503b066f..ff2d5169d7f1f 100644
> --- a/arch/arm64/kernel/efi.c
> +++ b/arch/arm64/kernel/efi.c
> @@ -71,10 +71,6 @@ static __init pteval_t 
> create_mapping_protection(efi_memory_desc_t *md)
> return pgprot_val(PAGE_KERNEL_EXEC);
>  }
>
> -/* we will fill this structure from the stub, so don't put it in .bss */
> -struct screen_info screen_info __section(".data");
> -EXPORT_SYMBOL(screen_info);
> -
>  int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md)
>  {
> pteval_t prot_val = create_mapping_protection(md);
> diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
> index 35f3c79595137..5e4dc72ab1bda 100644
> --- a/arch/arm64/kernel/image-vars.h
> +++ b/arch/arm64/kernel/image-vars.h
> @@ -27,7 +27,9 @@ PROVIDE(__efistub__text   = _text);
>  PROVIDE(__efistub__end = _end);
>  PROVIDE(__efistub___inittext_end   = __inittext_end);
>  PROVIDE(__efistub__edata   = _edata);
> +#if defined(CONFIG_EFI_EARLYCON) || defined(CONFIG_SYSFB)
>  PROVIDE(__efistub_screen_info  = screen_info);
> +#endif
>  PROVIDE(__efistub__ctype   = _ctype);
>
>  PROVIDE(__pi___memcpy  = __pi_memcpy);
> diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
> index 82feae1323f40..e91a91b5e9142 100644
> --- a/arch/ia64/kernel/setup.c
> +++ b/arch/ia64/kernel/setup.c
> @@ -86,10 +86,6 @@ EXPORT_SYMBOL(local_per_cpu_offset);
>  #endif
>  unsigned long ia64_cycles_per_usec;
>  struct ia64_boot_param *ia64_boot_param;
> -#if defined(CONFIG_EFI)
> -/* No longer used on ia64, but needed for linking */
> -struct screen_info screen_info;
> -#endif
>  #ifdef CONFIG_VGA_CONSOLE
>  unsigned long vga_console_iobase;
>  unsigned long vga_console_membase;
> diff --git a/arch/loongarch/kernel/efi.c b/arch/loongarch/kernel/efi.c
> index 9fc10cea21e10..df7db34024e61 100644
> --- a/arch/loongarch/kernel/efi.c
> +++ b/arch/loongarch/kernel/efi.c
> @@ -115,7 +115,8 @@ void __init efi_init(void)
>
> set_bit(EFI_CONFIG_TABLES, &efi.flags);
>
> -   init_screen_info();
> +   if (IS_ENABLED(CONFIG_EFI_EARLYCON) || IS_ENABLED(CONFIG_SYSFB))
> +   init_screen_info();
>
> if (boot_memmap == EFI_INVALID_TABLE_ADDR)
> return;
> diff --git a/arch/loongarch/kernel/image-vars.h 
> b/arch/loongarch/kernel/image-vars.h
> index e561989d02de9..5087416b9678d 100644
> --

Re: [RFC PATCH 05/21] ubifs: Pass worst-case buffer size to compression routines

2023-07-19 Thread Ard Biesheuvel
On Wed, 19 Jul 2023 at 00:38, Eric Biggers  wrote:
>
> On Tue, Jul 18, 2023 at 02:58:31PM +0200, Ard Biesheuvel wrote:
> > Currently, the ubifs code allocates a worst case buffer size to
> > recompress a data node, but does not pass the size of that buffer to the
> > compression code. This means that the compression code will never use
> > the additional space, and might fail spuriously due to lack of space.
> >
> > So let's multiply out_len by WORST_COMPR_FACTOR after allocating the
> > buffer. Doing so is guaranteed not to overflow, given that the preceding
> > kmalloc_array() call would have failed otherwise.
> >
> > Signed-off-by: Ard Biesheuvel 
> > ---
> >  fs/ubifs/journal.c | 2 ++
> >  1 file changed, 2 insertions(+)
> >
> > diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
> > index dc52ac0f4a345f30..4e5961878f336033 100644
> > --- a/fs/ubifs/journal.c
> > +++ b/fs/ubifs/journal.c
> > @@ -1493,6 +1493,8 @@ static int truncate_data_node(const struct ubifs_info 
> > *c, const struct inode *in
> >   if (!buf)
> >   return -ENOMEM;
> >
> > + out_len *= WORST_COMPR_FACTOR;
> > +
> >   dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ;
> >   data_size = dn_size - UBIFS_DATA_NODE_SZ;
> >   compr_type = le16_to_cpu(dn->compr_type);
>
> This looks like another case where data that would be expanded by compression
> should just be stored uncompressed instead.
>
> In fact, it seems that UBIFS does that already.  ubifs_compress() has this:
>
> /*
>  * If the data compressed only slightly, it is better to leave it
>  * uncompressed to improve read speed.
>  */
> if (in_len - *out_len < UBIFS_MIN_COMPRESS_DIFF)
> goto no_compr;
>
> So it's unclear why the WORST_COMPR_FACTOR thing is needed at all.
>

It is not. The buffer is used for decompression in the truncation
path, so none of this logic even matters. Even if the subsequent
recompression of the truncated data node could result in expansion
beyond the uncompressed size of the original data (which seems
impossible to me), increasing the size of this buffer would not help
as it is the input buffer for the compression not the output buffer.


Re: [RFC PATCH 01/21] crypto: scomp - Revert "add support for deflate rfc1950 (zlib)"

2023-07-18 Thread Ard Biesheuvel
On Wed, 19 Jul 2023 at 00:54, Eric Biggers  wrote:
>
> On Tue, Jul 18, 2023 at 03:32:39PM -0700, Eric Biggers wrote:
> > On Tue, Jul 18, 2023 at 02:58:27PM +0200, Ard Biesheuvel wrote:
> > > This reverts commit a368f43d6e3a001e684e9191a27df384fbff12f5.
> > >
> > > "zlib-deflate" was introduced 6 years ago, but it does not have any
> > > users. So let's remove the generic implementation and the test vectors,
> > > but retain the "zlib-deflate" entry in the testmgr code to avoid
> > > introducing warning messages on systems that implement zlib-deflate in
> > > hardware.
> > >
> > > Note that RFC 1950 which forms the basis of this algorithm dates back to
> > > 1996, and predates RFC 1951, on which the existing IPcomp is based and
> > > which we have supported in the kernel since 2003. So it seems rather
> > > unlikely that we will ever grow the need to support zlib-deflate.
> > >
> > > Signed-off-by: Ard Biesheuvel 
> > > ---
> > >  crypto/deflate.c | 61 +---
> > >  crypto/testmgr.c |  8 +--
> > >  crypto/testmgr.h | 75 
> > >  3 files changed, 18 insertions(+), 126 deletions(-)
> >
> > So if this is really unused, it's probably fair to remove it on that basis.
> > However, it's not correct to claim that DEFLATE is obsoleted by zlib (the 
> > data
> > format).  zlib is just DEFLATE plus a checksum, as is gzip.
> >
> > Many users of zlib or gzip use an external checksum and therefore would be
> > better served by DEFLATE, avoiding a redundant builtin checksum.  Typically,
> > people have chosen zlib or gzip simply because their compression library
> > defaulted to it, they didn't understand the difference, and they overlooked 
> > that
> > they're paying the price for a redundant builtin checksum.
> >
> > An example of someone doing it right is EROFS, which is working on adding
> > DEFLATE support (not zlib or gzip!):
> > https://lore.kernel.org/r/20230713001441.30462-1-hsiang...@linux.alibaba.com
> >
> > Of course, they are using the library API instead of the clumsy crypto API.
> >
>
> Ah, I misread this patch, sorry.  It's actually removing support for zlib (the
> data format) from the scomp API, leaving just DEFLATE.  That's fine too; 
> again,
> it ultimately just depends on what is actually being used via the scomp API.
> But similarly you can't really claim that zlib is obsoleted by DEFLATE just
> because of the RFC dates.  As I mentioned, many people do use zlib (the data
> format), often just because it's the default of zlib (the library) and they
> didn't know any better.  For example, btrfs compression supports zlib.
>

I am not suggesting either is obsolete. I am merely pointing out that
zlib-deflate is as old as plain deflate, and so we could have
implemented both at the same time when IPcomp support was added, but
we never bothered.


[RFC PATCH 21/21] crypto: scompress - Drop the use of per-cpu scratch buffers

2023-07-18 Thread Ard Biesheuvel
The scomp to acomp adaptation layer allocates 256k of scratch buffers
per CPU in order to be able to present the input provided by the caller
via scatterlists as linear byte arrays to the underlying synchronous
compression drivers, most of which are thin wrappers around the various
compression algorithm library implementations we have in the kernel.

This sucks. With high core counts and SMT, this easily adds up to
multiple megabytes that are permanently tied up for this purpose, and
given that all acomp users pass either single pages or contiguous
buffers in lowmem, we can optimize for this pattern and just pass the
buffer directly if we can. This removes the need for scratch buffers,
and along with it, the arbitrary 128k upper bound on the input and
output size of the acomp API when the implementation happens to be scomp
based.

So add a scomp_map_sg() helper to try and obtain the virtual addresses
associated with the scatterlists, which is guaranteed to be successful
100% of the time given the existing users, which all fit the prerequisite
pattern. And as a fallback for other cases, use kvmalloc with GFP_KERNEL
to allocate buffers on the fly and free them again right after.

This puts the burden on future callers to either use a contiguous
buffer, or deal with the potentially blocking nature of GFP_KERNEL.
For IPcomp in particular, the only relevant compression algorithm is
'deflate' which is no longer implemented as an scomp, and so this change
will not affect it even if we decide to convert it to take advantage of
the ability to pass discontiguous scatterlists.

Signed-off-by: Ard Biesheuvel 
---
 crypto/scompress.c  | 159 ++--
 include/crypto/internal/scompress.h |   2 -
 2 files changed, 76 insertions(+), 85 deletions(-)

diff --git a/crypto/scompress.c b/crypto/scompress.c
index 3155cdce9116e092..1c050aa864bd604d 100644
--- a/crypto/scompress.c
+++ b/crypto/scompress.c
@@ -18,24 +18,11 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 
 #include "compress.h"
 
-struct scomp_scratch {
-   spinlock_t  lock;
-   void*src;
-   void*dst;
-};
-
-static DEFINE_PER_CPU(struct scomp_scratch, scomp_scratch) = {
-   .lock = __SPIN_LOCK_UNLOCKED(scomp_scratch.lock),
-};
-
 static const struct crypto_type crypto_scomp_type;
-static int scomp_scratch_users;
-static DEFINE_MUTEX(scomp_lock);
 
 static int __maybe_unused crypto_scomp_report(
struct sk_buff *skb, struct crypto_alg *alg)
@@ -58,56 +45,45 @@ static void crypto_scomp_show(struct seq_file *m, struct 
crypto_alg *alg)
seq_puts(m, "type : scomp\n");
 }
 
-static void crypto_scomp_free_scratches(void)
-{
-   struct scomp_scratch *scratch;
-   int i;
-
-   for_each_possible_cpu(i) {
-   scratch = per_cpu_ptr(&scomp_scratch, i);
-
-   vfree(scratch->src);
-   vfree(scratch->dst);
-   scratch->src = NULL;
-   scratch->dst = NULL;
-   }
-}
-
-static int crypto_scomp_alloc_scratches(void)
-{
-   struct scomp_scratch *scratch;
-   int i;
-
-   for_each_possible_cpu(i) {
-   void *mem;
-
-   scratch = per_cpu_ptr(&scomp_scratch, i);
-
-   mem = vmalloc_node(SCOMP_SCRATCH_SIZE, cpu_to_node(i));
-   if (!mem)
-   goto error;
-   scratch->src = mem;
-   mem = vmalloc_node(SCOMP_SCRATCH_SIZE, cpu_to_node(i));
-   if (!mem)
-   goto error;
-   scratch->dst = mem;
-   }
-   return 0;
-error:
-   crypto_scomp_free_scratches();
-   return -ENOMEM;
-}
-
 static int crypto_scomp_init_tfm(struct crypto_tfm *tfm)
 {
-   int ret = 0;
+   return 0;
+}
 
-   mutex_lock(&scomp_lock);
-   if (!scomp_scratch_users++)
-   ret = crypto_scomp_alloc_scratches();
-   mutex_unlock(&scomp_lock);
+/**
+ * scomp_map_sg - Return virtual address of memory described by a scatterlist
+ *
+ * @sg:The address of the scatterlist in memory
+ * @len:   The length of the buffer described by the scatterlist
+ *
+ * If the memory region described by scatterlist @sg consists of @len
+ * contiguous bytes in memory and is accessible via the linear mapping or via a
+ * single kmap(), return its virtual address.  Otherwise, return NULL.
+ */
+static void *scomp_map_sg(struct scatterlist *sg, unsigned int len)
+{
+   struct page *page;
+   unsigned int offset;
 
-   return ret;
+   while (sg_is_chain(sg))
+   sg = sg_next(sg);
+
+   if (!sg || sg_nents_for_len(sg, len) != 1)
+   return NULL;
+
+   page   = sg_page(sg) + (sg->offset >> PAGE_SHIFT);
+   offset = offset_in_page(sg->offset);
+
+   if (PageHighMem(page) && (offset + sg->length) > PAGE_SIZE)
+

[RFC PATCH 20/21] crypto: deflate - implement acomp API directly

2023-07-18 Thread Ard Biesheuvel
Drop the scomp implementation of deflate, which can only operate on
contiguous in- and output buffer, and replace it with an implementation
of acomp directly. This implementation walks the scatterlists, removing
the need for the caller to use scratch buffers to present the input and
output in a contiguous manner.

This is intended for use by the IPcomp code, which currently needs to
'linearize' SKBs in order for the compression to be able to consume the
input in a single chunk.

Signed-off-by: Ard Biesheuvel 
---
 crypto/deflate.c | 315 +++-
 include/crypto/scatterwalk.h |   2 +-
 2 files changed, 113 insertions(+), 204 deletions(-)

diff --git a/crypto/deflate.c b/crypto/deflate.c
index 0955040ca9e64146..112683473df2b588 100644
--- a/crypto/deflate.c
+++ b/crypto/deflate.c
@@ -6,246 +6,154 @@
  * by IPCOMP (RFC 3173 & RFC 2394).
  *
  * Copyright (c) 2003 James Morris 
- *
- * FIXME: deflate transforms will require up to a total of about 436k of kernel
- * memory on i386 (390k for compression, the rest for decompression), as the
- * current zlib kernel code uses a worst case pre-allocation system by default.
- * This needs to be fixed so that the amount of memory required is properly
- * related to the  winbits and memlevel parameters.
- *
- * The default winbits of 11 should suit most packets, and it may be something
- * to configure on a per-tfm basis in the future.
- *
- * Currently, compression history is not maintained between tfm calls, as
- * it is not needed for IPCOMP and keeps the code simpler.  It can be
- * implemented if someone wants it.
+ * Copyright (c) 2023 Google, LLC. 
  */
 #include 
 #include 
 #include 
 #include 
-#include 
-#include 
-#include 
 #include 
-#include 
+#include 
+#include 
+#include 
 
 #define DEFLATE_DEF_LEVEL  Z_DEFAULT_COMPRESSION
 #define DEFLATE_DEF_WINBITS11
 #define DEFLATE_DEF_MEMLEVEL   MAX_MEM_LEVEL
 
-struct deflate_ctx {
-   struct z_stream_s comp_stream;
-   struct z_stream_s decomp_stream;
+struct deflate_req_ctx {
+   struct z_stream_s stream;
+   u8 workspace[];
 };
 
-static int deflate_comp_init(struct deflate_ctx *ctx)
+static int deflate_process(struct acomp_req *req, struct z_stream_s *stream,
+  int (*process)(struct z_stream_s *, int))
 {
-   int ret = 0;
-   struct z_stream_s *stream = &ctx->comp_stream;
+   unsigned int slen = req->slen;
+   unsigned int dlen = req->dlen;
+   struct scatter_walk src, dst;
+   unsigned int scur, dcur;
+   int ret;
 
-   stream->workspace = vzalloc(zlib_deflate_workspacesize(
-   -DEFLATE_DEF_WINBITS, DEFLATE_DEF_MEMLEVEL));
-   if (!stream->workspace) {
-   ret = -ENOMEM;
-   goto out;
-   }
+   stream->avail_in = stream->avail_out = 0;
+
+   scatterwalk_start(&src, req->src);
+   scatterwalk_start(&dst, req->dst);
+
+   scur = dcur = 0;
+
+   do {
+   if (stream->avail_in == 0) {
+   if (scur) {
+   slen -= scur;
+
+   scatterwalk_unmap(stream->next_in - scur);
+   scatterwalk_advance(&src, scur);
+   scatterwalk_done(&src, 0, slen);
+   }
+
+   scur = scatterwalk_clamp(&src, slen);
+   if (scur) {
+   stream->next_in = scatterwalk_map(&src);
+   stream->avail_in = scur;
+   }
+   }
+
+   if (stream->avail_out == 0) {
+   if (dcur) {
+   dlen -= dcur;
+
+   scatterwalk_unmap(stream->next_out - dcur);
+   scatterwalk_advance(&dst, dcur);
+   scatterwalk_done(&dst, 1, dlen);
+   }
+
+   dcur = scatterwalk_clamp(&dst, dlen);
+   if (!dcur)
+   break;
+
+   stream->next_out = scatterwalk_map(&dst);
+   stream->avail_out = dcur;
+   }
+
+   ret = process(stream, (slen == scur) ? Z_FINISH : Z_SYNC_FLUSH);
+   } while (ret == Z_OK);
+
+   if (scur)
+   scatterwalk_unmap(stream->next_in - scur);
+   if (dcur)
+   scatterwalk_unmap(stream->next_out - dcur);
+
+   if (ret != Z_STREAM_END)
+   return -EINVAL;
+
+   req->dlen = stream->total_out;
+   return 0;
+}
+
+static int deflate_compress(struct acomp_req *req)
+{
+   struct deflate_req_ctx *ctx = acomp_request_ctx(req);
+   struct z_stream_s *stream = &ctx->stream;
+   int ret;

[RFC PATCH 19/21] crypto: remove obsolete 'comp' compression API

2023-07-18 Thread Ard Biesheuvel
The 'comp' compression API has been superseded by the acomp API, which
is a bit more cumbersome to use, but ultimately more flexible when it
comes to hardware implementations.

Now that all the users and implementations have been removed, let's
remove the core plumbing of the 'comp' API as well.

Signed-off-by: Ard Biesheuvel 
---
 Documentation/crypto/architecture.rst |   2 -
 crypto/Makefile   |   2 +-
 crypto/api.c  |   4 -
 crypto/compress.c |  32 -
 crypto/crypto_user_base.c |  16 ---
 crypto/crypto_user_stat.c |   4 -
 crypto/proc.c |   3 -
 crypto/testmgr.c  | 144 ++--
 include/linux/crypto.h|  49 +--
 9 files changed, 12 insertions(+), 244 deletions(-)

diff --git a/Documentation/crypto/architecture.rst 
b/Documentation/crypto/architecture.rst
index 646c3380a7edc4c6..ec7436aade15c2e6 100644
--- a/Documentation/crypto/architecture.rst
+++ b/Documentation/crypto/architecture.rst
@@ -196,8 +196,6 @@ the aforementioned cipher types:
 
 -  CRYPTO_ALG_TYPE_CIPHER Single block cipher
 
--  CRYPTO_ALG_TYPE_COMPRESS Compression
-
 -  CRYPTO_ALG_TYPE_AEAD Authenticated Encryption with Associated Data
(MAC)
 
diff --git a/crypto/Makefile b/crypto/Makefile
index 953a7e105e58c837..5775440c62e09eac 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -4,7 +4,7 @@
 #
 
 obj-$(CONFIG_CRYPTO) += crypto.o
-crypto-y := api.o cipher.o compress.o
+crypto-y := api.o cipher.o
 
 obj-$(CONFIG_CRYPTO_ENGINE) += crypto_engine.o
 obj-$(CONFIG_CRYPTO_FIPS) += fips.o
diff --git a/crypto/api.c b/crypto/api.c
index b9cc0c906efe0706..23d691a70bc3fb00 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -369,10 +369,6 @@ static unsigned int crypto_ctxsize(struct crypto_alg *alg, 
u32 type, u32 mask)
case CRYPTO_ALG_TYPE_CIPHER:
len += crypto_cipher_ctxsize(alg);
break;
-
-   case CRYPTO_ALG_TYPE_COMPRESS:
-   len += crypto_compress_ctxsize(alg);
-   break;
}
 
return len;
diff --git a/crypto/compress.c b/crypto/compress.c
deleted file mode 100644
index 9048fe390c463069..
--- a/crypto/compress.c
+++ /dev/null
@@ -1,32 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Cryptographic API.
- *
- * Compression operations.
- *
- * Copyright (c) 2002 James Morris 
- */
-#include 
-#include "internal.h"
-
-int crypto_comp_compress(struct crypto_comp *comp,
-const u8 *src, unsigned int slen,
-u8 *dst, unsigned int *dlen)
-{
-   struct crypto_tfm *tfm = crypto_comp_tfm(comp);
-
-   return tfm->__crt_alg->cra_compress.coa_compress(tfm, src, slen, dst,
-dlen);
-}
-EXPORT_SYMBOL_GPL(crypto_comp_compress);
-
-int crypto_comp_decompress(struct crypto_comp *comp,
-  const u8 *src, unsigned int slen,
-  u8 *dst, unsigned int *dlen)
-{
-   struct crypto_tfm *tfm = crypto_comp_tfm(comp);
-
-   return tfm->__crt_alg->cra_compress.coa_decompress(tfm, src, slen, dst,
-  dlen);
-}
-EXPORT_SYMBOL_GPL(crypto_comp_decompress);
diff --git a/crypto/crypto_user_base.c b/crypto/crypto_user_base.c
index 3fa20f12989f7ef2..c27484b0042e6bd8 100644
--- a/crypto/crypto_user_base.c
+++ b/crypto/crypto_user_base.c
@@ -85,17 +85,6 @@ static int crypto_report_cipher(struct sk_buff *skb, struct 
crypto_alg *alg)
   sizeof(rcipher), &rcipher);
 }
 
-static int crypto_report_comp(struct sk_buff *skb, struct crypto_alg *alg)
-{
-   struct crypto_report_comp rcomp;
-
-   memset(&rcomp, 0, sizeof(rcomp));
-
-   strscpy(rcomp.type, "compression", sizeof(rcomp.type));
-
-   return nla_put(skb, CRYPTOCFGA_REPORT_COMPRESS, sizeof(rcomp), &rcomp);
-}
-
 static int crypto_report_one(struct crypto_alg *alg,
 struct crypto_user_alg *ualg, struct sk_buff *skb)
 {
@@ -136,11 +125,6 @@ static int crypto_report_one(struct crypto_alg *alg,
if (crypto_report_cipher(skb, alg))
goto nla_put_failure;
 
-   break;
-   case CRYPTO_ALG_TYPE_COMPRESS:
-   if (crypto_report_comp(skb, alg))
-   goto nla_put_failure;
-
break;
}
 
diff --git a/crypto/crypto_user_stat.c b/crypto/crypto_user_stat.c
index d4f3d39b51376973..d3133eda2f528d17 100644
--- a/crypto/crypto_user_stat.c
+++ b/crypto/crypto_user_stat.c
@@ -86,10 +86,6 @@ static int crypto_reportstat_one(struct crypto_alg *alg,
if (crypto_report_cipher(skb, alg))
goto nla_put_failure;
break;
-   case CRYPTO_ALG_TYPE_COMPRESS:
-

[RFC PATCH 18/21] crypto: compress_null - drop obsolete 'comp' implementation

2023-07-18 Thread Ard Biesheuvel
The 'comp' API is obsolete and will be removed, so remove this comp
implementation.

Signed-off-by: Ard Biesheuvel 
---
 crypto/crypto_null.c | 31 
 crypto/testmgr.c |  3 --
 2 files changed, 5 insertions(+), 29 deletions(-)

diff --git a/crypto/crypto_null.c b/crypto/crypto_null.c
index 5b84b0f7cc178fcd..75e73b1d6df01cc6 100644
--- a/crypto/crypto_null.c
+++ b/crypto/crypto_null.c
@@ -24,16 +24,6 @@ static DEFINE_MUTEX(crypto_default_null_skcipher_lock);
 static struct crypto_sync_skcipher *crypto_default_null_skcipher;
 static int crypto_default_null_skcipher_refcnt;
 
-static int null_compress(struct crypto_tfm *tfm, const u8 *src,
-unsigned int slen, u8 *dst, unsigned int *dlen)
-{
-   if (slen > *dlen)
-   return -EINVAL;
-   memcpy(dst, src, slen);
-   *dlen = slen;
-   return 0;
-}
-
 static int null_init(struct shash_desc *desc)
 {
return 0;
@@ -121,7 +111,7 @@ static struct skcipher_alg skcipher_null = {
.decrypt=   null_skcipher_crypt,
 };
 
-static struct crypto_alg null_algs[] = { {
+static struct crypto_alg cipher_null = {
.cra_name   =   "cipher_null",
.cra_driver_name=   "cipher_null-generic",
.cra_flags  =   CRYPTO_ALG_TYPE_CIPHER,
@@ -134,19 +124,8 @@ static struct crypto_alg null_algs[] = { {
.cia_setkey =   null_setkey,
.cia_encrypt=   null_crypt,
.cia_decrypt=   null_crypt } }
-}, {
-   .cra_name   =   "compress_null",
-   .cra_driver_name=   "compress_null-generic",
-   .cra_flags  =   CRYPTO_ALG_TYPE_COMPRESS,
-   .cra_blocksize  =   NULL_BLOCK_SIZE,
-   .cra_ctxsize=   0,
-   .cra_module =   THIS_MODULE,
-   .cra_u  =   { .compress = {
-   .coa_compress   =   null_compress,
-   .coa_decompress =   null_compress } }
-} };
+};
 
-MODULE_ALIAS_CRYPTO("compress_null");
 MODULE_ALIAS_CRYPTO("digest_null");
 MODULE_ALIAS_CRYPTO("cipher_null");
 
@@ -189,7 +168,7 @@ static int __init crypto_null_mod_init(void)
 {
int ret = 0;
 
-   ret = crypto_register_algs(null_algs, ARRAY_SIZE(null_algs));
+   ret = crypto_register_alg(&cipher_null);
if (ret < 0)
goto out;
 
@@ -206,14 +185,14 @@ static int __init crypto_null_mod_init(void)
 out_unregister_shash:
crypto_unregister_shash(&digest_null);
 out_unregister_algs:
-   crypto_unregister_algs(null_algs, ARRAY_SIZE(null_algs));
+   crypto_unregister_alg(&cipher_null);
 out:
return ret;
 }
 
 static void __exit crypto_null_mod_fini(void)
 {
-   crypto_unregister_algs(null_algs, ARRAY_SIZE(null_algs));
+   crypto_unregister_alg(&cipher_null);
crypto_unregister_shash(&digest_null);
crypto_unregister_skcipher(&skcipher_null);
 }
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 4971351f55dbabb9..e4b6d67233763193 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -4633,9 +4633,6 @@ static const struct alg_test_desc alg_test_descs[] = {
.suite = {
.hash = __VECS(sm4_cmac128_tv_template)
}
-   }, {
-   .alg = "compress_null",
-   .test = alg_test_null,
}, {
.alg = "crc32",
.test = alg_test_hash,
-- 
2.39.2



[RFC PATCH 17/21] crypto: cavium/zip - drop obsolete 'comp' implementation

2023-07-18 Thread Ard Biesheuvel
The 'comp' API is obsolete and will be removed, so remove this comp
implementation.

Signed-off-by: Ard Biesheuvel 
---
 drivers/crypto/cavium/zip/zip_crypto.c | 40 
 drivers/crypto/cavium/zip/zip_crypto.h | 10 
 drivers/crypto/cavium/zip/zip_main.c   | 50 +---
 3 files changed, 1 insertion(+), 99 deletions(-)

diff --git a/drivers/crypto/cavium/zip/zip_crypto.c 
b/drivers/crypto/cavium/zip/zip_crypto.c
index 1046a746d36f551c..5edad3b1d1dc8398 100644
--- a/drivers/crypto/cavium/zip/zip_crypto.c
+++ b/drivers/crypto/cavium/zip/zip_crypto.c
@@ -195,46 +195,6 @@ static int zip_decompress(const u8 *src, unsigned int slen,
return ret;
 }
 
-/* Legacy Compress framework start */
-int zip_alloc_comp_ctx_deflate(struct crypto_tfm *tfm)
-{
-   struct zip_kernel_ctx *zip_ctx = crypto_tfm_ctx(tfm);
-
-   return zip_ctx_init(zip_ctx, 0);
-}
-
-int zip_alloc_comp_ctx_lzs(struct crypto_tfm *tfm)
-{
-   struct zip_kernel_ctx *zip_ctx = crypto_tfm_ctx(tfm);
-
-   return zip_ctx_init(zip_ctx, 1);
-}
-
-void zip_free_comp_ctx(struct crypto_tfm *tfm)
-{
-   struct zip_kernel_ctx *zip_ctx = crypto_tfm_ctx(tfm);
-
-   zip_ctx_exit(zip_ctx);
-}
-
-int  zip_comp_compress(struct crypto_tfm *tfm,
-  const u8 *src, unsigned int slen,
-  u8 *dst, unsigned int *dlen)
-{
-   struct zip_kernel_ctx *zip_ctx = crypto_tfm_ctx(tfm);
-
-   return zip_compress(src, slen, dst, dlen, zip_ctx);
-}
-
-int  zip_comp_decompress(struct crypto_tfm *tfm,
-const u8 *src, unsigned int slen,
-u8 *dst, unsigned int *dlen)
-{
-   struct zip_kernel_ctx *zip_ctx = crypto_tfm_ctx(tfm);
-
-   return zip_decompress(src, slen, dst, dlen, zip_ctx);
-} /* Legacy compress framework end */
-
 /* SCOMP framework start */
 void *zip_alloc_scomp_ctx_deflate(struct crypto_scomp *tfm)
 {
diff --git a/drivers/crypto/cavium/zip/zip_crypto.h 
b/drivers/crypto/cavium/zip/zip_crypto.h
index b59ddfcacd34447e..a1ae3825fb65c3b6 100644
--- a/drivers/crypto/cavium/zip/zip_crypto.h
+++ b/drivers/crypto/cavium/zip/zip_crypto.h
@@ -57,16 +57,6 @@ struct zip_kernel_ctx {
struct zip_operation zip_decomp;
 };
 
-int  zip_alloc_comp_ctx_deflate(struct crypto_tfm *tfm);
-int  zip_alloc_comp_ctx_lzs(struct crypto_tfm *tfm);
-void zip_free_comp_ctx(struct crypto_tfm *tfm);
-int  zip_comp_compress(struct crypto_tfm *tfm,
-  const u8 *src, unsigned int slen,
-  u8 *dst, unsigned int *dlen);
-int  zip_comp_decompress(struct crypto_tfm *tfm,
-const u8 *src, unsigned int slen,
-u8 *dst, unsigned int *dlen);
-
 void *zip_alloc_scomp_ctx_deflate(struct crypto_scomp *tfm);
 void *zip_alloc_scomp_ctx_lzs(struct crypto_scomp *tfm);
 void  zip_free_scomp_ctx(struct crypto_scomp *tfm, void *zip_ctx);
diff --git a/drivers/crypto/cavium/zip/zip_main.c 
b/drivers/crypto/cavium/zip/zip_main.c
index dc5b7bf7e1fd9867..abd58de4343ddd8e 100644
--- a/drivers/crypto/cavium/zip/zip_main.c
+++ b/drivers/crypto/cavium/zip/zip_main.c
@@ -371,36 +371,6 @@ static struct pci_driver zip_driver = {
 
 /* Kernel Crypto Subsystem Interface */
 
-static struct crypto_alg zip_comp_deflate = {
-   .cra_name   = "deflate",
-   .cra_driver_name= "deflate-cavium",
-   .cra_flags  = CRYPTO_ALG_TYPE_COMPRESS,
-   .cra_ctxsize= sizeof(struct zip_kernel_ctx),
-   .cra_priority   = 300,
-   .cra_module = THIS_MODULE,
-   .cra_init   = zip_alloc_comp_ctx_deflate,
-   .cra_exit   = zip_free_comp_ctx,
-   .cra_u  = { .compress = {
-   .coa_compress   = zip_comp_compress,
-   .coa_decompress = zip_comp_decompress
-} }
-};
-
-static struct crypto_alg zip_comp_lzs = {
-   .cra_name   = "lzs",
-   .cra_driver_name= "lzs-cavium",
-   .cra_flags  = CRYPTO_ALG_TYPE_COMPRESS,
-   .cra_ctxsize= sizeof(struct zip_kernel_ctx),
-   .cra_priority   = 300,
-   .cra_module = THIS_MODULE,
-   .cra_init   = zip_alloc_comp_ctx_lzs,
-   .cra_exit   = zip_free_comp_ctx,
-   .cra_u  = { .compress = {
-   .coa_compress   = zip_comp_compress,
-   .coa_decompress = zip_comp_decompress
-} }
-};
-
 static struct scomp_alg zip_scomp_deflate = {
.alloc_ctx  = zip_alloc_scomp_ctx_deflate,
.free_ctx   = zip_free_scomp_ctx,
@@ -431,22 +401,10 @@ static int zip_register_compression_device(void)
 {
int ret;
 
-   ret = crypto_register_alg(&zip_comp_deflate);
-   if (ret < 0) {
-   zip_err("Deflate algorithm registration f

[RFC PATCH 16/21] crypto: zstd - drop obsolete 'comp' implementation

2023-07-18 Thread Ard Biesheuvel
The 'comp' API is obsolete and will be removed, so remove this comp
implementation.

Signed-off-by: Ard Biesheuvel 
---
 crypto/zstd.c | 56 +---
 1 file changed, 1 insertion(+), 55 deletions(-)

diff --git a/crypto/zstd.c b/crypto/zstd.c
index 154a969c83a82277..c6e6f135c5812c9c 100644
--- a/crypto/zstd.c
+++ b/crypto/zstd.c
@@ -121,13 +121,6 @@ static void *zstd_alloc_ctx(struct crypto_scomp *tfm)
return ctx;
 }
 
-static int zstd_init(struct crypto_tfm *tfm)
-{
-   struct zstd_ctx *ctx = crypto_tfm_ctx(tfm);
-
-   return __zstd_init(ctx);
-}
-
 static void __zstd_exit(void *ctx)
 {
zstd_comp_exit(ctx);
@@ -140,13 +133,6 @@ static void zstd_free_ctx(struct crypto_scomp *tfm, void 
*ctx)
kfree_sensitive(ctx);
 }
 
-static void zstd_exit(struct crypto_tfm *tfm)
-{
-   struct zstd_ctx *ctx = crypto_tfm_ctx(tfm);
-
-   __zstd_exit(ctx);
-}
-
 static int __zstd_compress(const u8 *src, unsigned int slen,
   u8 *dst, unsigned int *dlen, void *ctx)
 {
@@ -161,14 +147,6 @@ static int __zstd_compress(const u8 *src, unsigned int 
slen,
return 0;
 }
 
-static int zstd_compress(struct crypto_tfm *tfm, const u8 *src,
-unsigned int slen, u8 *dst, unsigned int *dlen)
-{
-   struct zstd_ctx *ctx = crypto_tfm_ctx(tfm);
-
-   return __zstd_compress(src, slen, dst, dlen, ctx);
-}
-
 static int zstd_scompress(struct crypto_scomp *tfm, const u8 *src,
  unsigned int slen, u8 *dst, unsigned int *dlen,
  void *ctx)
@@ -189,14 +167,6 @@ static int __zstd_decompress(const u8 *src, unsigned int 
slen,
return 0;
 }
 
-static int zstd_decompress(struct crypto_tfm *tfm, const u8 *src,
-  unsigned int slen, u8 *dst, unsigned int *dlen)
-{
-   struct zstd_ctx *ctx = crypto_tfm_ctx(tfm);
-
-   return __zstd_decompress(src, slen, dst, dlen, ctx);
-}
-
 static int zstd_sdecompress(struct crypto_scomp *tfm, const u8 *src,
unsigned int slen, u8 *dst, unsigned int *dlen,
void *ctx)
@@ -204,19 +174,6 @@ static int zstd_sdecompress(struct crypto_scomp *tfm, 
const u8 *src,
return __zstd_decompress(src, slen, dst, dlen, ctx);
 }
 
-static struct crypto_alg alg = {
-   .cra_name   = "zstd",
-   .cra_driver_name= "zstd-generic",
-   .cra_flags  = CRYPTO_ALG_TYPE_COMPRESS,
-   .cra_ctxsize= sizeof(struct zstd_ctx),
-   .cra_module = THIS_MODULE,
-   .cra_init   = zstd_init,
-   .cra_exit   = zstd_exit,
-   .cra_u  = { .compress = {
-   .coa_compress   = zstd_compress,
-   .coa_decompress = zstd_decompress } }
-};
-
 static struct scomp_alg scomp = {
.alloc_ctx  = zstd_alloc_ctx,
.free_ctx   = zstd_free_ctx,
@@ -231,22 +188,11 @@ static struct scomp_alg scomp = {
 
 static int __init zstd_mod_init(void)
 {
-   int ret;
-
-   ret = crypto_register_alg(&alg);
-   if (ret)
-   return ret;
-
-   ret = crypto_register_scomp(&scomp);
-   if (ret)
-   crypto_unregister_alg(&alg);
-
-   return ret;
+   return crypto_register_scomp(&scomp);
 }
 
 static void __exit zstd_mod_fini(void)
 {
-   crypto_unregister_alg(&alg);
crypto_unregister_scomp(&scomp);
 }
 
-- 
2.39.2



[RFC PATCH 15/21] crypto: lzo - drop obsolete 'comp' implementation

2023-07-18 Thread Ard Biesheuvel
The 'comp' API is obsolete and will be removed, so remove this comp
implementation.

Signed-off-by: Ard Biesheuvel 
---
 crypto/lzo.c | 60 +---
 1 file changed, 1 insertion(+), 59 deletions(-)

diff --git a/crypto/lzo.c b/crypto/lzo.c
index ebda132dd22bf543..52558f9d41f3dcea 100644
--- a/crypto/lzo.c
+++ b/crypto/lzo.c
@@ -26,29 +26,11 @@ static void *lzo_alloc_ctx(struct crypto_scomp *tfm)
return ctx;
 }
 
-static int lzo_init(struct crypto_tfm *tfm)
-{
-   struct lzo_ctx *ctx = crypto_tfm_ctx(tfm);
-
-   ctx->lzo_comp_mem = lzo_alloc_ctx(NULL);
-   if (IS_ERR(ctx->lzo_comp_mem))
-   return -ENOMEM;
-
-   return 0;
-}
-
 static void lzo_free_ctx(struct crypto_scomp *tfm, void *ctx)
 {
kvfree(ctx);
 }
 
-static void lzo_exit(struct crypto_tfm *tfm)
-{
-   struct lzo_ctx *ctx = crypto_tfm_ctx(tfm);
-
-   lzo_free_ctx(NULL, ctx->lzo_comp_mem);
-}
-
 static int __lzo_compress(const u8 *src, unsigned int slen,
  u8 *dst, unsigned int *dlen, void *ctx)
 {
@@ -64,14 +46,6 @@ static int __lzo_compress(const u8 *src, unsigned int slen,
return 0;
 }
 
-static int lzo_compress(struct crypto_tfm *tfm, const u8 *src,
-   unsigned int slen, u8 *dst, unsigned int *dlen)
-{
-   struct lzo_ctx *ctx = crypto_tfm_ctx(tfm);
-
-   return __lzo_compress(src, slen, dst, dlen, ctx->lzo_comp_mem);
-}
-
 static int lzo_scompress(struct crypto_scomp *tfm, const u8 *src,
 unsigned int slen, u8 *dst, unsigned int *dlen,
 void *ctx)
@@ -94,12 +68,6 @@ static int __lzo_decompress(const u8 *src, unsigned int slen,
return 0;
 }
 
-static int lzo_decompress(struct crypto_tfm *tfm, const u8 *src,
- unsigned int slen, u8 *dst, unsigned int *dlen)
-{
-   return __lzo_decompress(src, slen, dst, dlen);
-}
-
 static int lzo_sdecompress(struct crypto_scomp *tfm, const u8 *src,
   unsigned int slen, u8 *dst, unsigned int *dlen,
   void *ctx)
@@ -107,19 +75,6 @@ static int lzo_sdecompress(struct crypto_scomp *tfm, const 
u8 *src,
return __lzo_decompress(src, slen, dst, dlen);
 }
 
-static struct crypto_alg alg = {
-   .cra_name   = "lzo",
-   .cra_driver_name= "lzo-generic",
-   .cra_flags  = CRYPTO_ALG_TYPE_COMPRESS,
-   .cra_ctxsize= sizeof(struct lzo_ctx),
-   .cra_module = THIS_MODULE,
-   .cra_init   = lzo_init,
-   .cra_exit   = lzo_exit,
-   .cra_u  = { .compress = {
-   .coa_compress   = lzo_compress,
-   .coa_decompress = lzo_decompress } }
-};
-
 static struct scomp_alg scomp = {
.alloc_ctx  = lzo_alloc_ctx,
.free_ctx   = lzo_free_ctx,
@@ -134,24 +89,11 @@ static struct scomp_alg scomp = {
 
 static int __init lzo_mod_init(void)
 {
-   int ret;
-
-   ret = crypto_register_alg(&alg);
-   if (ret)
-   return ret;
-
-   ret = crypto_register_scomp(&scomp);
-   if (ret) {
-   crypto_unregister_alg(&alg);
-   return ret;
-   }
-
-   return ret;
+   return crypto_register_scomp(&scomp);
 }
 
 static void __exit lzo_mod_fini(void)
 {
-   crypto_unregister_alg(&alg);
crypto_unregister_scomp(&scomp);
 }
 
-- 
2.39.2



[RFC PATCH 14/21] crypto: lzo-rle - drop obsolete 'comp' implementation

2023-07-18 Thread Ard Biesheuvel
The 'comp' API is obsolete and will be removed, so remove this comp
implementation.

Signed-off-by: Ard Biesheuvel 
---
 crypto/lzo-rle.c | 60 +---
 1 file changed, 1 insertion(+), 59 deletions(-)

diff --git a/crypto/lzo-rle.c b/crypto/lzo-rle.c
index 0631d975bfac1129..658d6aa46fe21e19 100644
--- a/crypto/lzo-rle.c
+++ b/crypto/lzo-rle.c
@@ -26,29 +26,11 @@ static void *lzorle_alloc_ctx(struct crypto_scomp *tfm)
return ctx;
 }
 
-static int lzorle_init(struct crypto_tfm *tfm)
-{
-   struct lzorle_ctx *ctx = crypto_tfm_ctx(tfm);
-
-   ctx->lzorle_comp_mem = lzorle_alloc_ctx(NULL);
-   if (IS_ERR(ctx->lzorle_comp_mem))
-   return -ENOMEM;
-
-   return 0;
-}
-
 static void lzorle_free_ctx(struct crypto_scomp *tfm, void *ctx)
 {
kvfree(ctx);
 }
 
-static void lzorle_exit(struct crypto_tfm *tfm)
-{
-   struct lzorle_ctx *ctx = crypto_tfm_ctx(tfm);
-
-   lzorle_free_ctx(NULL, ctx->lzorle_comp_mem);
-}
-
 static int __lzorle_compress(const u8 *src, unsigned int slen,
  u8 *dst, unsigned int *dlen, void *ctx)
 {
@@ -64,14 +46,6 @@ static int __lzorle_compress(const u8 *src, unsigned int 
slen,
return 0;
 }
 
-static int lzorle_compress(struct crypto_tfm *tfm, const u8 *src,
-   unsigned int slen, u8 *dst, unsigned int *dlen)
-{
-   struct lzorle_ctx *ctx = crypto_tfm_ctx(tfm);
-
-   return __lzorle_compress(src, slen, dst, dlen, ctx->lzorle_comp_mem);
-}
-
 static int lzorle_scompress(struct crypto_scomp *tfm, const u8 *src,
 unsigned int slen, u8 *dst, unsigned int *dlen,
 void *ctx)
@@ -94,12 +68,6 @@ static int __lzorle_decompress(const u8 *src, unsigned int 
slen,
return 0;
 }
 
-static int lzorle_decompress(struct crypto_tfm *tfm, const u8 *src,
- unsigned int slen, u8 *dst, unsigned int *dlen)
-{
-   return __lzorle_decompress(src, slen, dst, dlen);
-}
-
 static int lzorle_sdecompress(struct crypto_scomp *tfm, const u8 *src,
   unsigned int slen, u8 *dst, unsigned int *dlen,
   void *ctx)
@@ -107,19 +75,6 @@ static int lzorle_sdecompress(struct crypto_scomp *tfm, 
const u8 *src,
return __lzorle_decompress(src, slen, dst, dlen);
 }
 
-static struct crypto_alg alg = {
-   .cra_name   = "lzo-rle",
-   .cra_driver_name= "lzo-rle-generic",
-   .cra_flags  = CRYPTO_ALG_TYPE_COMPRESS,
-   .cra_ctxsize= sizeof(struct lzorle_ctx),
-   .cra_module = THIS_MODULE,
-   .cra_init   = lzorle_init,
-   .cra_exit   = lzorle_exit,
-   .cra_u  = { .compress = {
-   .coa_compress   = lzorle_compress,
-   .coa_decompress = lzorle_decompress } }
-};
-
 static struct scomp_alg scomp = {
.alloc_ctx  = lzorle_alloc_ctx,
.free_ctx   = lzorle_free_ctx,
@@ -134,24 +89,11 @@ static struct scomp_alg scomp = {
 
 static int __init lzorle_mod_init(void)
 {
-   int ret;
-
-   ret = crypto_register_alg(&alg);
-   if (ret)
-   return ret;
-
-   ret = crypto_register_scomp(&scomp);
-   if (ret) {
-   crypto_unregister_alg(&alg);
-   return ret;
-   }
-
-   return ret;
+   return crypto_register_scomp(&scomp);
 }
 
 static void __exit lzorle_mod_fini(void)
 {
-   crypto_unregister_alg(&alg);
crypto_unregister_scomp(&scomp);
 }
 
-- 
2.39.2



[RFC PATCH 13/21] crypto: lz4hc - drop obsolete 'comp' implementation

2023-07-18 Thread Ard Biesheuvel
The 'comp' API is obsolete and will be removed, so remove this comp
implementation.

Signed-off-by: Ard Biesheuvel 
---
 crypto/lz4hc.c | 63 +---
 1 file changed, 1 insertion(+), 62 deletions(-)

diff --git a/crypto/lz4hc.c b/crypto/lz4hc.c
index d7cc94aa2fcf42fa..5d6b13319f5e7683 100644
--- a/crypto/lz4hc.c
+++ b/crypto/lz4hc.c
@@ -26,29 +26,11 @@ static void *lz4hc_alloc_ctx(struct crypto_scomp *tfm)
return ctx;
 }
 
-static int lz4hc_init(struct crypto_tfm *tfm)
-{
-   struct lz4hc_ctx *ctx = crypto_tfm_ctx(tfm);
-
-   ctx->lz4hc_comp_mem = lz4hc_alloc_ctx(NULL);
-   if (IS_ERR(ctx->lz4hc_comp_mem))
-   return -ENOMEM;
-
-   return 0;
-}
-
 static void lz4hc_free_ctx(struct crypto_scomp *tfm, void *ctx)
 {
vfree(ctx);
 }
 
-static void lz4hc_exit(struct crypto_tfm *tfm)
-{
-   struct lz4hc_ctx *ctx = crypto_tfm_ctx(tfm);
-
-   lz4hc_free_ctx(NULL, ctx->lz4hc_comp_mem);
-}
-
 static int __lz4hc_compress_crypto(const u8 *src, unsigned int slen,
   u8 *dst, unsigned int *dlen, void *ctx)
 {
@@ -69,16 +51,6 @@ static int lz4hc_scompress(struct crypto_scomp *tfm, const 
u8 *src,
return __lz4hc_compress_crypto(src, slen, dst, dlen, ctx);
 }
 
-static int lz4hc_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
-unsigned int slen, u8 *dst,
-unsigned int *dlen)
-{
-   struct lz4hc_ctx *ctx = crypto_tfm_ctx(tfm);
-
-   return __lz4hc_compress_crypto(src, slen, dst, dlen,
-   ctx->lz4hc_comp_mem);
-}
-
 static int __lz4hc_decompress_crypto(const u8 *src, unsigned int slen,
 u8 *dst, unsigned int *dlen, void *ctx)
 {
@@ -98,26 +70,6 @@ static int lz4hc_sdecompress(struct crypto_scomp *tfm, const 
u8 *src,
return __lz4hc_decompress_crypto(src, slen, dst, dlen, NULL);
 }
 
-static int lz4hc_decompress_crypto(struct crypto_tfm *tfm, const u8 *src,
-  unsigned int slen, u8 *dst,
-  unsigned int *dlen)
-{
-   return __lz4hc_decompress_crypto(src, slen, dst, dlen, NULL);
-}
-
-static struct crypto_alg alg_lz4hc = {
-   .cra_name   = "lz4hc",
-   .cra_driver_name= "lz4hc-generic",
-   .cra_flags  = CRYPTO_ALG_TYPE_COMPRESS,
-   .cra_ctxsize= sizeof(struct lz4hc_ctx),
-   .cra_module = THIS_MODULE,
-   .cra_init   = lz4hc_init,
-   .cra_exit   = lz4hc_exit,
-   .cra_u  = { .compress = {
-   .coa_compress   = lz4hc_compress_crypto,
-   .coa_decompress = lz4hc_decompress_crypto } }
-};
-
 static struct scomp_alg scomp = {
.alloc_ctx  = lz4hc_alloc_ctx,
.free_ctx   = lz4hc_free_ctx,
@@ -132,24 +84,11 @@ static struct scomp_alg scomp = {
 
 static int __init lz4hc_mod_init(void)
 {
-   int ret;
-
-   ret = crypto_register_alg(&alg_lz4hc);
-   if (ret)
-   return ret;
-
-   ret = crypto_register_scomp(&scomp);
-   if (ret) {
-   crypto_unregister_alg(&alg_lz4hc);
-   return ret;
-   }
-
-   return ret;
+   return crypto_register_scomp(&scomp);
 }
 
 static void __exit lz4hc_mod_fini(void)
 {
-   crypto_unregister_alg(&alg_lz4hc);
crypto_unregister_scomp(&scomp);
 }
 
-- 
2.39.2



[RFC PATCH 12/21] crypto: lz4 - drop obsolete 'comp' implementation

2023-07-18 Thread Ard Biesheuvel
The 'comp' API is obsolete and will be removed, so remove this comp
implementation.

Signed-off-by: Ard Biesheuvel 
---
 crypto/lz4.c | 61 +---
 1 file changed, 1 insertion(+), 60 deletions(-)

diff --git a/crypto/lz4.c b/crypto/lz4.c
index 0606f8862e7872ad..c46b6cbd91ce10c0 100644
--- a/crypto/lz4.c
+++ b/crypto/lz4.c
@@ -27,29 +27,11 @@ static void *lz4_alloc_ctx(struct crypto_scomp *tfm)
return ctx;
 }
 
-static int lz4_init(struct crypto_tfm *tfm)
-{
-   struct lz4_ctx *ctx = crypto_tfm_ctx(tfm);
-
-   ctx->lz4_comp_mem = lz4_alloc_ctx(NULL);
-   if (IS_ERR(ctx->lz4_comp_mem))
-   return -ENOMEM;
-
-   return 0;
-}
-
 static void lz4_free_ctx(struct crypto_scomp *tfm, void *ctx)
 {
vfree(ctx);
 }
 
-static void lz4_exit(struct crypto_tfm *tfm)
-{
-   struct lz4_ctx *ctx = crypto_tfm_ctx(tfm);
-
-   lz4_free_ctx(NULL, ctx->lz4_comp_mem);
-}
-
 static int __lz4_compress_crypto(const u8 *src, unsigned int slen,
 u8 *dst, unsigned int *dlen, void *ctx)
 {
@@ -70,14 +52,6 @@ static int lz4_scompress(struct crypto_scomp *tfm, const u8 
*src,
return __lz4_compress_crypto(src, slen, dst, dlen, ctx);
 }
 
-static int lz4_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
-  unsigned int slen, u8 *dst, unsigned int *dlen)
-{
-   struct lz4_ctx *ctx = crypto_tfm_ctx(tfm);
-
-   return __lz4_compress_crypto(src, slen, dst, dlen, ctx->lz4_comp_mem);
-}
-
 static int __lz4_decompress_crypto(const u8 *src, unsigned int slen,
   u8 *dst, unsigned int *dlen, void *ctx)
 {
@@ -97,26 +71,6 @@ static int lz4_sdecompress(struct crypto_scomp *tfm, const 
u8 *src,
return __lz4_decompress_crypto(src, slen, dst, dlen, NULL);
 }
 
-static int lz4_decompress_crypto(struct crypto_tfm *tfm, const u8 *src,
-unsigned int slen, u8 *dst,
-unsigned int *dlen)
-{
-   return __lz4_decompress_crypto(src, slen, dst, dlen, NULL);
-}
-
-static struct crypto_alg alg_lz4 = {
-   .cra_name   = "lz4",
-   .cra_driver_name= "lz4-generic",
-   .cra_flags  = CRYPTO_ALG_TYPE_COMPRESS,
-   .cra_ctxsize= sizeof(struct lz4_ctx),
-   .cra_module = THIS_MODULE,
-   .cra_init   = lz4_init,
-   .cra_exit   = lz4_exit,
-   .cra_u  = { .compress = {
-   .coa_compress   = lz4_compress_crypto,
-   .coa_decompress = lz4_decompress_crypto } }
-};
-
 static struct scomp_alg scomp = {
.alloc_ctx  = lz4_alloc_ctx,
.free_ctx   = lz4_free_ctx,
@@ -131,24 +85,11 @@ static struct scomp_alg scomp = {
 
 static int __init lz4_mod_init(void)
 {
-   int ret;
-
-   ret = crypto_register_alg(&alg_lz4);
-   if (ret)
-   return ret;
-
-   ret = crypto_register_scomp(&scomp);
-   if (ret) {
-   crypto_unregister_alg(&alg_lz4);
-   return ret;
-   }
-
-   return ret;
+   return crypto_register_scomp(&scomp);
 }
 
 static void __exit lz4_mod_fini(void)
 {
-   crypto_unregister_alg(&alg_lz4);
crypto_unregister_scomp(&scomp);
 }
 
-- 
2.39.2



[RFC PATCH 11/21] crypto: deflate - drop obsolete 'comp' implementation

2023-07-18 Thread Ard Biesheuvel
No users of the obsolete 'comp' crypto compression API remain, so let's
drop the software deflate version of it.

Signed-off-by: Ard Biesheuvel 
---
 crypto/deflate.c | 58 +---
 1 file changed, 1 insertion(+), 57 deletions(-)

diff --git a/crypto/deflate.c b/crypto/deflate.c
index f4f127078fe2a5aa..0955040ca9e64146 100644
--- a/crypto/deflate.c
+++ b/crypto/deflate.c
@@ -130,13 +130,6 @@ static void *deflate_alloc_ctx(struct crypto_scomp *tfm)
return ctx;
 }
 
-static int deflate_init(struct crypto_tfm *tfm)
-{
-   struct deflate_ctx *ctx = crypto_tfm_ctx(tfm);
-
-   return __deflate_init(ctx);
-}
-
 static void __deflate_exit(void *ctx)
 {
deflate_comp_exit(ctx);
@@ -149,13 +142,6 @@ static void deflate_free_ctx(struct crypto_scomp *tfm, 
void *ctx)
kfree_sensitive(ctx);
 }
 
-static void deflate_exit(struct crypto_tfm *tfm)
-{
-   struct deflate_ctx *ctx = crypto_tfm_ctx(tfm);
-
-   __deflate_exit(ctx);
-}
-
 static int __deflate_compress(const u8 *src, unsigned int slen,
  u8 *dst, unsigned int *dlen, void *ctx)
 {
@@ -185,14 +171,6 @@ static int __deflate_compress(const u8 *src, unsigned int 
slen,
return ret;
 }
 
-static int deflate_compress(struct crypto_tfm *tfm, const u8 *src,
-   unsigned int slen, u8 *dst, unsigned int *dlen)
-{
-   struct deflate_ctx *dctx = crypto_tfm_ctx(tfm);
-
-   return __deflate_compress(src, slen, dst, dlen, dctx);
-}
-
 static int deflate_scompress(struct crypto_scomp *tfm, const u8 *src,
 unsigned int slen, u8 *dst, unsigned int *dlen,
 void *ctx)
@@ -241,14 +219,6 @@ static int __deflate_decompress(const u8 *src, unsigned 
int slen,
return ret;
 }
 
-static int deflate_decompress(struct crypto_tfm *tfm, const u8 *src,
- unsigned int slen, u8 *dst, unsigned int *dlen)
-{
-   struct deflate_ctx *dctx = crypto_tfm_ctx(tfm);
-
-   return __deflate_decompress(src, slen, dst, dlen, dctx);
-}
-
 static int deflate_sdecompress(struct crypto_scomp *tfm, const u8 *src,
   unsigned int slen, u8 *dst, unsigned int *dlen,
   void *ctx)
@@ -256,19 +226,6 @@ static int deflate_sdecompress(struct crypto_scomp *tfm, 
const u8 *src,
return __deflate_decompress(src, slen, dst, dlen, ctx);
 }
 
-static struct crypto_alg alg = {
-   .cra_name   = "deflate",
-   .cra_driver_name= "deflate-generic",
-   .cra_flags  = CRYPTO_ALG_TYPE_COMPRESS,
-   .cra_ctxsize= sizeof(struct deflate_ctx),
-   .cra_module = THIS_MODULE,
-   .cra_init   = deflate_init,
-   .cra_exit   = deflate_exit,
-   .cra_u  = { .compress = {
-   .coa_compress   = deflate_compress,
-   .coa_decompress = deflate_decompress } }
-};
-
 static struct scomp_alg scomp = {
.alloc_ctx  = deflate_alloc_ctx,
.free_ctx   = deflate_free_ctx,
@@ -283,24 +240,11 @@ static struct scomp_alg scomp = {
 
 static int __init deflate_mod_init(void)
 {
-   int ret;
-
-   ret = crypto_register_alg(&alg);
-   if (ret)
-   return ret;
-
-   ret = crypto_register_scomp(&scomp);
-   if (ret) {
-   crypto_unregister_alg(&alg);
-   return ret;
-   }
-
-   return ret;
+   return crypto_register_scomp(&scomp);
 }
 
 static void __exit deflate_mod_fini(void)
 {
-   crypto_unregister_alg(&alg);
crypto_unregister_scomp(&scomp);
 }
 
-- 
2.39.2



[RFC PATCH 10/21] crypto: 842 - drop obsolete 'comp' implementation

2023-07-18 Thread Ard Biesheuvel
The 'comp' API is obsolete and will be removed, so remove this comp
implementation.

Signed-off-by: Ard Biesheuvel 
---
 crypto/842.c | 63 +---
 1 file changed, 1 insertion(+), 62 deletions(-)

diff --git a/crypto/842.c b/crypto/842.c
index e59e54d769609ba6..5001d88cf727f74e 100644
--- a/crypto/842.c
+++ b/crypto/842.c
@@ -39,38 +39,11 @@ static void *crypto842_alloc_ctx(struct crypto_scomp *tfm)
return ctx;
 }
 
-static int crypto842_init(struct crypto_tfm *tfm)
-{
-   struct crypto842_ctx *ctx = crypto_tfm_ctx(tfm);
-
-   ctx->wmem = crypto842_alloc_ctx(NULL);
-   if (IS_ERR(ctx->wmem))
-   return -ENOMEM;
-
-   return 0;
-}
-
 static void crypto842_free_ctx(struct crypto_scomp *tfm, void *ctx)
 {
kfree(ctx);
 }
 
-static void crypto842_exit(struct crypto_tfm *tfm)
-{
-   struct crypto842_ctx *ctx = crypto_tfm_ctx(tfm);
-
-   crypto842_free_ctx(NULL, ctx->wmem);
-}
-
-static int crypto842_compress(struct crypto_tfm *tfm,
- const u8 *src, unsigned int slen,
- u8 *dst, unsigned int *dlen)
-{
-   struct crypto842_ctx *ctx = crypto_tfm_ctx(tfm);
-
-   return sw842_compress(src, slen, dst, dlen, ctx->wmem);
-}
-
 static int crypto842_scompress(struct crypto_scomp *tfm,
   const u8 *src, unsigned int slen,
   u8 *dst, unsigned int *dlen, void *ctx)
@@ -78,13 +51,6 @@ static int crypto842_scompress(struct crypto_scomp *tfm,
return sw842_compress(src, slen, dst, dlen, ctx);
 }
 
-static int crypto842_decompress(struct crypto_tfm *tfm,
-   const u8 *src, unsigned int slen,
-   u8 *dst, unsigned int *dlen)
-{
-   return sw842_decompress(src, slen, dst, dlen);
-}
-
 static int crypto842_sdecompress(struct crypto_scomp *tfm,
 const u8 *src, unsigned int slen,
 u8 *dst, unsigned int *dlen, void *ctx)
@@ -92,20 +58,6 @@ static int crypto842_sdecompress(struct crypto_scomp *tfm,
return sw842_decompress(src, slen, dst, dlen);
 }
 
-static struct crypto_alg alg = {
-   .cra_name   = "842",
-   .cra_driver_name= "842-generic",
-   .cra_priority   = 100,
-   .cra_flags  = CRYPTO_ALG_TYPE_COMPRESS,
-   .cra_ctxsize= sizeof(struct crypto842_ctx),
-   .cra_module = THIS_MODULE,
-   .cra_init   = crypto842_init,
-   .cra_exit   = crypto842_exit,
-   .cra_u  = { .compress = {
-   .coa_compress   = crypto842_compress,
-   .coa_decompress = crypto842_decompress } }
-};
-
 static struct scomp_alg scomp = {
.alloc_ctx  = crypto842_alloc_ctx,
.free_ctx   = crypto842_free_ctx,
@@ -121,25 +73,12 @@ static struct scomp_alg scomp = {
 
 static int __init crypto842_mod_init(void)
 {
-   int ret;
-
-   ret = crypto_register_alg(&alg);
-   if (ret)
-   return ret;
-
-   ret = crypto_register_scomp(&scomp);
-   if (ret) {
-   crypto_unregister_alg(&alg);
-   return ret;
-   }
-
-   return ret;
+   return crypto_register_scomp(&scomp);
 }
 subsys_initcall(crypto842_mod_init);
 
 static void __exit crypto842_mod_exit(void)
 {
-   crypto_unregister_alg(&alg);
crypto_unregister_scomp(&scomp);
 }
 module_exit(crypto842_mod_exit);
-- 
2.39.2



[RFC PATCH 09/21] crypto: nx - Migrate to scomp API

2023-07-18 Thread Ard Biesheuvel
The only remaining user of 842 compression has been migrated to the
acomp compression API, and so the NX hardware driver has to follow suit,
given that no users of the obsolete 'comp' API remain, and it is going
to be removed.

So migrate the NX driver code to scomp. These will be wrapped and
exposed as acomp implementation via the crypto subsystem's
acomp-to-scomp adaptation layer.

Signed-off-by: Ard Biesheuvel 
---
 drivers/crypto/nx/nx-842.c| 34 
 drivers/crypto/nx/nx-842.h| 14 
 drivers/crypto/nx/nx-common-powernv.c | 30 -
 drivers/crypto/nx/nx-common-pseries.c | 32 +-
 4 files changed, 57 insertions(+), 53 deletions(-)

diff --git a/drivers/crypto/nx/nx-842.c b/drivers/crypto/nx/nx-842.c
index 2ab90ec10e61ebe8..331b9cdf85e27044 100644
--- a/drivers/crypto/nx/nx-842.c
+++ b/drivers/crypto/nx/nx-842.c
@@ -101,9 +101,14 @@ static int update_param(struct nx842_crypto_param *p,
return 0;
 }
 
-int nx842_crypto_init(struct crypto_tfm *tfm, struct nx842_driver *driver)
+void *nx842_crypto_alloc_ctx(struct crypto_scomp *tfm,
+struct nx842_driver *driver)
 {
-   struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
+   struct nx842_crypto_ctx *ctx;
+
+   ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+   if (!ctx)
+   return ERR_PTR(-ENOMEM);
 
spin_lock_init(&ctx->lock);
ctx->driver = driver;
@@ -114,22 +119,23 @@ int nx842_crypto_init(struct crypto_tfm *tfm, struct 
nx842_driver *driver)
kfree(ctx->wmem);
free_page((unsigned long)ctx->sbounce);
free_page((unsigned long)ctx->dbounce);
-   return -ENOMEM;
+   kfree(ctx);
+   return ERR_PTR(-ENOMEM);
}
 
-   return 0;
+   return ctx;
 }
-EXPORT_SYMBOL_GPL(nx842_crypto_init);
+EXPORT_SYMBOL_GPL(nx842_crypto_alloc_ctx);
 
-void nx842_crypto_exit(struct crypto_tfm *tfm)
+void nx842_crypto_free_ctx(struct crypto_scomp *tfm, void *p)
 {
-   struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
+   struct nx842_crypto_ctx *ctx = p;
 
kfree(ctx->wmem);
free_page((unsigned long)ctx->sbounce);
free_page((unsigned long)ctx->dbounce);
 }
-EXPORT_SYMBOL_GPL(nx842_crypto_exit);
+EXPORT_SYMBOL_GPL(nx842_crypto_free_ctx);
 
 static void check_constraints(struct nx842_constraints *c)
 {
@@ -246,11 +252,11 @@ static int compress(struct nx842_crypto_ctx *ctx,
return update_param(p, slen, dskip + dlen);
 }
 
-int nx842_crypto_compress(struct crypto_tfm *tfm,
+int nx842_crypto_compress(struct crypto_scomp *tfm,
  const u8 *src, unsigned int slen,
- u8 *dst, unsigned int *dlen)
+ u8 *dst, unsigned int *dlen, void *pctx)
 {
-   struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
+   struct nx842_crypto_ctx *ctx = pctx;
struct nx842_crypto_header *hdr = &ctx->header;
struct nx842_crypto_param p;
struct nx842_constraints c = *ctx->driver->constraints;
@@ -429,11 +435,11 @@ static int decompress(struct nx842_crypto_ctx *ctx,
return update_param(p, slen + padding, dlen);
 }
 
-int nx842_crypto_decompress(struct crypto_tfm *tfm,
+int nx842_crypto_decompress(struct crypto_scomp *tfm,
const u8 *src, unsigned int slen,
-   u8 *dst, unsigned int *dlen)
+   u8 *dst, unsigned int *dlen, void *pctx)
 {
-   struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
+   struct nx842_crypto_ctx *ctx = pctx;
struct nx842_crypto_header *hdr;
struct nx842_crypto_param p;
struct nx842_constraints c = *ctx->driver->constraints;
diff --git a/drivers/crypto/nx/nx-842.h b/drivers/crypto/nx/nx-842.h
index 7590bfb24d79bf42..de9dc8df62ed9dcb 100644
--- a/drivers/crypto/nx/nx-842.h
+++ b/drivers/crypto/nx/nx-842.h
@@ -12,6 +12,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /* Restrictions on Data Descriptor List (DDL) and Entry (DDE) buffers
  *
@@ -177,13 +178,14 @@ struct nx842_crypto_ctx {
struct nx842_driver *driver;
 };
 
-int nx842_crypto_init(struct crypto_tfm *tfm, struct nx842_driver *driver);
-void nx842_crypto_exit(struct crypto_tfm *tfm);
-int nx842_crypto_compress(struct crypto_tfm *tfm,
+void *nx842_crypto_alloc_ctx(struct crypto_scomp *tfm,
+struct nx842_driver *driver);
+void nx842_crypto_free_ctx(struct crypto_scomp *tfm, void *ctx);
+int nx842_crypto_compress(struct crypto_scomp *tfm,
  const u8 *src, unsigned int slen,
- u8 *dst, unsigned int *dlen);
-int nx842_crypto_decompress(struct crypto_tfm *tfm,
+ u8 *dst, unsigned int *dlen, void *ctx);
+int nx842_crypto_decompress(struct crypto_scomp *tfm

[RFC PATCH 08/21] zram: Migrate to acomp compression API

2023-07-18 Thread Ard Biesheuvel
Switch from the deprecated 'comp' to the more recent 'acomp' API.

This involves using scatterlists and request objects to describe the in-
and output buffers, all of which happen to be contiguous in memory, and
reside either entirely in lowmem, or inside a single highmem page. This
makes the conversion quite straight-forward, and easy to back by either
a software or a hardware implementation.

Signed-off-by: Ard Biesheuvel 
---
 drivers/block/zram/zcomp.c| 67 +++-
 drivers/block/zram/zcomp.h|  7 +-
 drivers/block/zram/zram_drv.c | 12 +---
 3 files changed, 57 insertions(+), 29 deletions(-)

diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c
index 55af4efd79835666..12bdd288a153c455 100644
--- a/drivers/block/zram/zcomp.c
+++ b/drivers/block/zram/zcomp.c
@@ -11,6 +11,9 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
 
 #include "zcomp.h"
 
@@ -35,26 +38,32 @@ static const char * const backends[] = {
 
 static void zcomp_strm_free(struct zcomp_strm *zstrm)
 {
+   if (zstrm->req)
+   acomp_request_free(zstrm->req);
if (!IS_ERR_OR_NULL(zstrm->tfm))
-   crypto_free_comp(zstrm->tfm);
+   crypto_free_acomp(zstrm->tfm);
free_pages((unsigned long)zstrm->buffer, 1);
+   zstrm->req = NULL;
zstrm->tfm = NULL;
zstrm->buffer = NULL;
 }
 
 /*
- * Initialize zcomp_strm structure with ->tfm initialized by backend, and
- * ->buffer. Return a negative value on error.
+ * Initialize zcomp_strm structure with ->tfm and ->req initialized by
+ * backend, and ->buffer. Return a negative value on error.
  */
 static int zcomp_strm_init(struct zcomp_strm *zstrm, struct zcomp *comp)
 {
-   zstrm->tfm = crypto_alloc_comp(comp->name, 0, 0);
+   zstrm->tfm = crypto_alloc_acomp(comp->name, 0, CRYPTO_ALG_ASYNC);
+   if (!IS_ERR_OR_NULL(zstrm->tfm))
+   zstrm->req = acomp_request_alloc(zstrm->tfm);
+
/*
 * allocate 2 pages. 1 for compressed data, plus 1 extra for the
 * case when compressed size is larger than the original one
 */
zstrm->buffer = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1);
-   if (IS_ERR_OR_NULL(zstrm->tfm) || !zstrm->buffer) {
+   if (IS_ERR_OR_NULL(zstrm->tfm) || !zstrm->req || !zstrm->buffer) {
zcomp_strm_free(zstrm);
return -ENOMEM;
}
@@ -70,7 +79,7 @@ bool zcomp_available_algorithm(const char *comp)
 * This also means that we permit zcomp initialisation
 * with any compressing algorithm known to crypto api.
 */
-   return crypto_has_comp(comp, 0, 0) == 1;
+   return crypto_has_acomp(comp, 0, CRYPTO_ALG_ASYNC);
 }
 
 /* show available compressors */
@@ -95,7 +104,7 @@ ssize_t zcomp_available_show(const char *comp, char *buf)
 * Out-of-tree module known to crypto api or a missing
 * entry in `backends'.
 */
-   if (!known_algorithm && crypto_has_comp(comp, 0, 0) == 1)
+   if (!known_algorithm && crypto_has_acomp(comp, 0, CRYPTO_ALG_ASYNC))
sz += scnprintf(buf + sz, PAGE_SIZE - sz - 2,
"[%s] ", comp);
 
@@ -115,8 +124,14 @@ void zcomp_stream_put(struct zcomp *comp)
 }
 
 int zcomp_compress(struct zcomp_strm *zstrm,
-   const void *src, unsigned int *dst_len)
+  struct page *src, unsigned int *dst_len)
 {
+   struct scatterlist sg_src, sg_dst;
+   int ret;
+
+   sg_init_table(&sg_src, 1);
+   sg_set_page(&sg_src, src, PAGE_SIZE, 0);
+
/*
 * Our dst memory (zstrm->buffer) is always `2 * PAGE_SIZE' sized
 * because sometimes we can endup having a bigger compressed data
@@ -131,21 +146,39 @@ int zcomp_compress(struct zcomp_strm *zstrm,
 * the dst buffer, zram_drv will take care of the fact that
 * compressed buffer is too big.
 */
-   *dst_len = PAGE_SIZE * 2;
+   sg_init_one(&sg_dst, zstrm->buffer, PAGE_SIZE * 2);
 
-   return crypto_comp_compress(zstrm->tfm,
-   src, PAGE_SIZE,
-   zstrm->buffer, dst_len);
+   acomp_request_set_params(zstrm->req, &sg_src, &sg_dst, PAGE_SIZE,
+PAGE_SIZE * 2);
+
+   ret = crypto_acomp_compress(zstrm->req);
+   if (ret)
+   return ret;
+
+   *dst_len = zstrm->req->dlen;
+   return 0;
 }
 
 int zcomp_decompress(struct zcomp_strm *zstrm,
-   const void *src, unsigned int src_len, void *dst)
+const void *src, unsigned int src_len, struct page *dst)
 {
-   unsigned int dst_len = PAGE_SIZE;
+   struct scatterlist sg_src, sg_dst;
 
-   return crypto_comp_decompress(zstrm->tfm,
-  

[RFC PATCH 07/21] ubifs: Migrate to acomp compression API

2023-07-18 Thread Ard Biesheuvel
UBIFS is one of the remaining users of the obsolete 'comp' compression
API exposed by the crypto subsystem. Given that it operates strictly on
contiguous buffers that are either entirely in lowmem or covered by a
single page, the conversion to the acomp API is quite straight-forward.

Only synchronous acomp implementations are considered at the moment, and
whether or not a future conversion to permit asynchronous ones too will
be worth the effort remains to be seen.

Signed-off-by: Ard Biesheuvel 
---
 fs/ubifs/compress.c | 61 ++--
 fs/ubifs/file.c | 46 ---
 fs/ubifs/journal.c  | 19 --
 fs/ubifs/ubifs.h| 15 +++--
 4 files changed, 90 insertions(+), 51 deletions(-)

diff --git a/fs/ubifs/compress.c b/fs/ubifs/compress.c
index 75461777c466b1c9..570919b218a0a8cc 100644
--- a/fs/ubifs/compress.c
+++ b/fs/ubifs/compress.c
@@ -82,15 +82,15 @@ struct ubifs_compressor 
*ubifs_compressors[UBIFS_COMPR_TYPES_CNT];
 
 /**
  * ubifs_compress - compress data.
- * @in_buf: data to compress
+ * @in_sg: data to compress
  * @in_len: length of the data to compress
  * @out_buf: output buffer where compressed data should be stored
  * @out_len: output buffer length is returned here
  * @compr_type: type of compression to use on enter, actually used compression
  *  type on exit
  *
- * This function compresses input buffer @in_buf of length @in_len and stores
- * the result in the output buffer @out_buf and the resulting length in
+ * This function compresses input scatterlist @in_sg of length @in_len and
+ * stores the result in the output buffer @out_buf and the resulting length in
  * @out_len. If the input buffer does not compress, it is just copied to the
  * @out_buf. The same happens if @compr_type is %UBIFS_COMPR_NONE or if
  * compression error occurred.
@@ -98,11 +98,12 @@ struct ubifs_compressor 
*ubifs_compressors[UBIFS_COMPR_TYPES_CNT];
  * Note, if the input buffer was not compressed, it is copied to the output
  * buffer and %UBIFS_COMPR_NONE is returned in @compr_type.
  */
-void ubifs_compress(const struct ubifs_info *c, const void *in_buf,
+void ubifs_compress(const struct ubifs_info *c, struct scatterlist *in_sg,
int in_len, void *out_buf, int *out_len, int *compr_type)
 {
int err;
struct ubifs_compressor *compr = ubifs_compressors[*compr_type];
+   struct scatterlist out_sg;
 
if (*compr_type == UBIFS_COMPR_NONE)
goto no_compr;
@@ -111,10 +112,13 @@ void ubifs_compress(const struct ubifs_info *c, const 
void *in_buf,
if (in_len < UBIFS_MIN_COMPR_LEN)
goto no_compr;
 
+   sg_init_one(&out_sg, out_buf, *out_len);
+
if (compr->comp_mutex)
mutex_lock(compr->comp_mutex);
-   err = crypto_comp_compress(compr->cc, in_buf, in_len, out_buf,
-  (unsigned int *)out_len);
+   acomp_request_set_params(compr->req, in_sg, &out_sg, in_len, *out_len);
+   err = crypto_acomp_compress(compr->req);
+   *out_len = compr->req->dlen;
if (compr->comp_mutex)
mutex_unlock(compr->comp_mutex);
if (unlikely(err)) {
@@ -133,7 +137,7 @@ void ubifs_compress(const struct ubifs_info *c, const void 
*in_buf,
return;
 
 no_compr:
-   memcpy(out_buf, in_buf, in_len);
+   sg_copy_to_buffer(in_sg, 1, out_buf, in_len);
*out_len = in_len;
*compr_type = UBIFS_COMPR_NONE;
 }
@@ -142,19 +146,20 @@ void ubifs_compress(const struct ubifs_info *c, const 
void *in_buf,
  * ubifs_decompress - decompress data.
  * @in_buf: data to decompress
  * @in_len: length of the data to decompress
- * @out_buf: output buffer where decompressed data should
+ * @out_sg: output buffer where decompressed data should be stored
  * @out_len: output length is returned here
  * @compr_type: type of compression
  *
- * This function decompresses data from buffer @in_buf into buffer @out_buf.
+ * This function decompresses data from buffer @in_buf into scatterlist 
@out_sg.
  * The length of the uncompressed data is returned in @out_len. This functions
  * returns %0 on success or a negative error code on failure.
  */
-int ubifs_decompress(const struct ubifs_info *c, const void *in_buf,
-int in_len, void *out_buf, int *out_len, int compr_type)
+int ubifs_decompress(const struct ubifs_info *c, const void *in_buf, int 
in_len,
+struct scatterlist *out_sg, int *out_len, int compr_type)
 {
int err;
struct ubifs_compressor *compr;
+   struct scatterlist in_sg;
 
if (unlikely(compr_type < 0 || compr_type >= UBIFS_COMPR_TYPES_CNT)) {
ubifs_err(c, "invalid compression type %d", compr_type);
@@ -169,15 +174,18 @@ int ubifs_decompress(const struct ubifs_info *c, const 
void *in_buf,
}
 
if (compr_type == UBIFS_COMPR_NONE) {
-   memc

[RFC PATCH 06/21] ubifs: Avoid allocating buffer space unnecessarily

2023-07-18 Thread Ard Biesheuvel
The recompression scratch buffer is only used when the data node is
compressed, and there is no need to allocate it otherwise. So move the
allocation into the branch of the if() that actually makes use of it.

Signed-off-by: Ard Biesheuvel 
---
 fs/ubifs/journal.c | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index 4e5961878f336033..5ce618f82aed201b 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -1485,16 +1485,9 @@ static int truncate_data_node(const struct ubifs_info 
*c, const struct inode *in
  unsigned int block, struct ubifs_data_node *dn,
  int *new_len, int dn_size)
 {
-   void *buf;
+   void *buf = NULL;
int err, dlen, compr_type, out_len, data_size;
 
-   out_len = le32_to_cpu(dn->size);
-   buf = kmalloc_array(out_len, WORST_COMPR_FACTOR, GFP_NOFS);
-   if (!buf)
-   return -ENOMEM;
-
-   out_len *= WORST_COMPR_FACTOR;
-
dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ;
data_size = dn_size - UBIFS_DATA_NODE_SZ;
compr_type = le16_to_cpu(dn->compr_type);
@@ -1508,6 +1501,13 @@ static int truncate_data_node(const struct ubifs_info 
*c, const struct inode *in
if (compr_type == UBIFS_COMPR_NONE) {
out_len = *new_len;
} else {
+   out_len = le32_to_cpu(dn->size);
+   buf = kmalloc_array(out_len, WORST_COMPR_FACTOR, GFP_NOFS);
+   if (!buf)
+   return -ENOMEM;
+
+   out_len *= WORST_COMPR_FACTOR;
+
err = ubifs_decompress(c, &dn->data, dlen, buf, &out_len, 
compr_type);
if (err)
goto out;
-- 
2.39.2



[RFC PATCH 05/21] ubifs: Pass worst-case buffer size to compression routines

2023-07-18 Thread Ard Biesheuvel
Currently, the ubifs code allocates a worst case buffer size to
recompress a data node, but does not pass the size of that buffer to the
compression code. This means that the compression code will never use
the additional space, and might fail spuriously due to lack of space.

So let's multiply out_len by WORST_COMPR_FACTOR after allocating the
buffer. Doing so is guaranteed not to overflow, given that the preceding
kmalloc_array() call would have failed otherwise.

Signed-off-by: Ard Biesheuvel 
---
 fs/ubifs/journal.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index dc52ac0f4a345f30..4e5961878f336033 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -1493,6 +1493,8 @@ static int truncate_data_node(const struct ubifs_info *c, 
const struct inode *in
if (!buf)
return -ENOMEM;
 
+   out_len *= WORST_COMPR_FACTOR;
+
dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ;
data_size = dn_size - UBIFS_DATA_NODE_SZ;
compr_type = le16_to_cpu(dn->compr_type);
-- 
2.39.2



[RFC PATCH 04/21] net: ipcomp: Migrate to acomp API from deprecated comp API

2023-07-18 Thread Ard Biesheuvel
Migrate the IPcomp network compression code to the acomp API, in order
to drop the dependency on the obsolete 'comp' API which is going away.

For the time being, this is a rather mechanical conversion replacing
each comp TFM object with an acomp TFM/request object pair - this is
necessary because, at this point, there is still a 1:1 relation between
acomp tranforms and requests in the acomp-to-scomp adaptation layer, and
this deviates from the model used by AEADs and skciphers where the TFM
is fully reentrant, and operations using the same encryption keys can be
issued in parallel using individual request objects but the same TFM.

Also, this minimal conversion does not yet take advantage of the fact
that the acomp API takes scatterlists as input and output descriptors,
which in principle removes the need to linearize the SKBs. However,
given that compression code generally requires in- and output buffers to
be non-overlapping, scratch buffers will always be needed, and so
whether this conversion is worth while is TBD.

Signed-off-by: Ard Biesheuvel 
---
 include/crypto/acompress.h |   5 +
 include/net/ipcomp.h   |   4 +-
 net/xfrm/xfrm_algo.c   |   7 +-
 net/xfrm/xfrm_ipcomp.c | 107 +---
 4 files changed, 79 insertions(+), 44 deletions(-)

diff --git a/include/crypto/acompress.h b/include/crypto/acompress.h
index ccb6f3279bc8b32e..3f54e3d8815a9d0d 100644
--- a/include/crypto/acompress.h
+++ b/include/crypto/acompress.h
@@ -318,4 +318,9 @@ static inline int crypto_acomp_decompress(struct acomp_req 
*req)
return crypto_comp_errstat(alg, tfm->decompress(req));
 }
 
+static inline const char *crypto_acomp_name(struct crypto_acomp *acomp)
+{
+   return crypto_tfm_alg_name(crypto_acomp_tfm(acomp));
+}
+
 #endif
diff --git a/include/net/ipcomp.h b/include/net/ipcomp.h
index 8660a2a6d1fc76a7..bf27ac7e3ca952e2 100644
--- a/include/net/ipcomp.h
+++ b/include/net/ipcomp.h
@@ -7,12 +7,12 @@
 
 #define IPCOMP_SCRATCH_SIZE 65400
 
-struct crypto_comp;
+struct acomp_req;
 struct ip_comp_hdr;
 
 struct ipcomp_data {
u16 threshold;
-   struct crypto_comp * __percpu *tfms;
+   struct acomp_req * __percpu *reqs;
 };
 
 struct ip_comp_hdr;
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index 094734fbec967505..ca411bcebc53ad4f 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -5,6 +5,7 @@
  * Copyright (c) 2002 James Morris 
  */
 
+#include 
 #include 
 #include 
 #include 
@@ -674,7 +675,7 @@ static const struct xfrm_algo_list xfrm_ealg_list = {
 static const struct xfrm_algo_list xfrm_calg_list = {
.algs = calg_list,
.entries = ARRAY_SIZE(calg_list),
-   .type = CRYPTO_ALG_TYPE_COMPRESS,
+   .type = CRYPTO_ALG_TYPE_ACOMPRESS,
.mask = CRYPTO_ALG_TYPE_MASK,
 };
 
@@ -833,8 +834,8 @@ void xfrm_probe_algs(void)
}
 
for (i = 0; i < calg_entries(); i++) {
-   status = crypto_has_comp(calg_list[i].name, 0,
-CRYPTO_ALG_ASYNC);
+   status = crypto_has_acomp(calg_list[i].name, 0,
+ CRYPTO_ALG_ASYNC);
if (calg_list[i].available != status)
calg_list[i].available = status;
}
diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c
index 9c0fa0e1786a2d42..e29ef55e0f01d144 100644
--- a/net/xfrm/xfrm_ipcomp.c
+++ b/net/xfrm/xfrm_ipcomp.c
@@ -20,20 +20,21 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
 
-struct ipcomp_tfms {
+struct ipcomp_reqs {
struct list_head list;
-   struct crypto_comp * __percpu *tfms;
+   struct acomp_req * __percpu *reqs;
int users;
 };
 
 static DEFINE_MUTEX(ipcomp_resource_mutex);
 static void * __percpu *ipcomp_scratches;
 static int ipcomp_scratch_users;
-static LIST_HEAD(ipcomp_tfms_list);
+static LIST_HEAD(ipcomp_reqs_list);
 
 static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb)
 {
@@ -42,13 +43,19 @@ static int ipcomp_decompress(struct xfrm_state *x, struct 
sk_buff *skb)
int dlen = IPCOMP_SCRATCH_SIZE;
const u8 *start = skb->data;
u8 *scratch = *this_cpu_ptr(ipcomp_scratches);
-   struct crypto_comp *tfm = *this_cpu_ptr(ipcd->tfms);
-   int err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen);
-   int len;
+   struct acomp_req *req = *this_cpu_ptr(ipcd->reqs);
+   struct scatterlist sg_in, sg_out;
+   int err, len;
 
+   sg_init_one(&sg_in, start, plen);
+   sg_init_one(&sg_out, scratch, dlen);
+   acomp_request_set_params(req, &sg_in, &sg_out, plen, dlen);
+
+   err = crypto_acomp_decompress(req);
if (err)
return err;
 
+   dlen = req->dlen;
if (dlen < (plen + sizeof(struct ip_comp_hdr)))
return -EINVAL;
 
@@ -125,17 +132,24 @@ static int ipcomp_compress(struct xfrm_sta

[RFC PATCH 03/21] crypto: acompress - Drop destination scatterlist allocation feature

2023-07-18 Thread Ard Biesheuvel
The acomp crypto code will allocate a destination scatterlist and its
backing pages on the fly if no destination is passed. This feature is
not used, and given that the caller should own this memory, it is far
better if the caller allocates it. This is especially true for
decompression, where the output size is essentially unbounded, and so
the caller already needs to provide the size for this feature to work
reliably.

Signed-off-by: Ard Biesheuvel 
---
 crypto/acompress.c |  6 
 crypto/scompress.c | 14 +-
 crypto/testmgr.c   | 29 
 include/crypto/acompress.h | 16 ++-
 4 files changed, 4 insertions(+), 61 deletions(-)

diff --git a/crypto/acompress.c b/crypto/acompress.c
index 1c682810a484dcdf..431876b0ee2096fd 100644
--- a/crypto/acompress.c
+++ b/crypto/acompress.c
@@ -71,7 +71,6 @@ static int crypto_acomp_init_tfm(struct crypto_tfm *tfm)
 
acomp->compress = alg->compress;
acomp->decompress = alg->decompress;
-   acomp->dst_free = alg->dst_free;
acomp->reqsize = alg->reqsize;
 
if (alg->exit)
@@ -173,11 +172,6 @@ void acomp_request_free(struct acomp_req *req)
if (tfm->__crt_alg->cra_type != &crypto_acomp_type)
crypto_acomp_scomp_free_ctx(req);
 
-   if (req->flags & CRYPTO_ACOMP_ALLOC_OUTPUT) {
-   acomp->dst_free(req->dst);
-   req->dst = NULL;
-   }
-
__acomp_request_free(req);
 }
 EXPORT_SYMBOL_GPL(acomp_request_free);
diff --git a/crypto/scompress.c b/crypto/scompress.c
index 442a82c9de7def1f..3155cdce9116e092 100644
--- a/crypto/scompress.c
+++ b/crypto/scompress.c
@@ -122,12 +122,9 @@ static int scomp_acomp_comp_decomp(struct acomp_req *req, 
int dir)
if (!req->src || !req->slen || req->slen > SCOMP_SCRATCH_SIZE)
return -EINVAL;
 
-   if (req->dst && !req->dlen)
+   if (!req->dst || !req->dlen || req->dlen > SCOMP_SCRATCH_SIZE)
return -EINVAL;
 
-   if (!req->dlen || req->dlen > SCOMP_SCRATCH_SIZE)
-   req->dlen = SCOMP_SCRATCH_SIZE;
-
scratch = raw_cpu_ptr(&scomp_scratch);
spin_lock(&scratch->lock);
 
@@ -139,17 +136,9 @@ static int scomp_acomp_comp_decomp(struct acomp_req *req, 
int dir)
ret = crypto_scomp_decompress(scomp, scratch->src, req->slen,
  scratch->dst, &req->dlen, *ctx);
if (!ret) {
-   if (!req->dst) {
-   req->dst = sgl_alloc(req->dlen, GFP_ATOMIC, NULL);
-   if (!req->dst) {
-   ret = -ENOMEM;
-   goto out;
-   }
-   }
scatterwalk_map_and_copy(scratch->dst, req->dst, 0, req->dlen,
 1);
}
-out:
spin_unlock(&scratch->lock);
return ret;
 }
@@ -197,7 +186,6 @@ int crypto_init_scomp_ops_async(struct crypto_tfm *tfm)
 
crt->compress = scomp_acomp_compress;
crt->decompress = scomp_acomp_decompress;
-   crt->dst_free = sgl_free;
crt->reqsize = sizeof(void *);
 
return 0;
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index b41a8e8c1d1a1987..4971351f55dbabb9 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -3497,21 +3497,6 @@ static int test_acomp(struct crypto_acomp *tfm,
goto out;
}
 
-#ifdef CONFIG_CRYPTO_MANAGER_EXTRA_TESTS
-   crypto_init_wait(&wait);
-   sg_init_one(&src, input_vec, ilen);
-   acomp_request_set_params(req, &src, NULL, ilen, 0);
-
-   ret = crypto_wait_req(crypto_acomp_compress(req), &wait);
-   if (ret) {
-   pr_err("alg: acomp: compression failed on NULL dst 
buffer test %d for %s: ret=%d\n",
-  i + 1, algo, -ret);
-   kfree(input_vec);
-   acomp_request_free(req);
-   goto out;
-   }
-#endif
-
kfree(input_vec);
acomp_request_free(req);
}
@@ -3573,20 +3558,6 @@ static int test_acomp(struct crypto_acomp *tfm,
goto out;
}
 
-#ifdef CONFIG_CRYPTO_MANAGER_EXTRA_TESTS
-   crypto_init_wait(&wait);
-   acomp_request_set_params(req, &src, NULL, ilen, 0);
-
-   ret = crypto_wait_req(crypto_acomp_decompress(req), &wait);
-   if (ret) {
-   pr_err("alg: acomp: decompression failed on NULL dst 
buffer test %d for %s: ret=%d\n",
-  i + 1, algo, -ret);
-   kfree(input_vec);
-

[RFC PATCH 02/21] crypto: qat - Drop support for allocating destination buffers

2023-07-18 Thread Ard Biesheuvel
Remove the logic that allocates the destination scatterlist and backing
pages on the fly when no destination is provided: this is a rather
dubious proposition, given that the caller is in a far better position
to estimate the size of such a buffer, or how it should be allocated.

This feature has no current users, so let's remove it while we still
can.

Signed-off-by: Ard Biesheuvel 
---
 drivers/crypto/intel/qat/qat_common/qat_bl.c| 159 
 drivers/crypto/intel/qat/qat_common/qat_bl.h|   6 -
 drivers/crypto/intel/qat/qat_common/qat_comp_algs.c |  86 +--
 drivers/crypto/intel/qat/qat_common/qat_comp_req.h  |  10 --
 4 files changed, 1 insertion(+), 260 deletions(-)

diff --git a/drivers/crypto/intel/qat/qat_common/qat_bl.c 
b/drivers/crypto/intel/qat/qat_common/qat_bl.c
index 76baed0a76c0ee93..94f6a5fe0f3dea75 100644
--- a/drivers/crypto/intel/qat/qat_common/qat_bl.c
+++ b/drivers/crypto/intel/qat/qat_common/qat_bl.c
@@ -249,162 +249,3 @@ int qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev,
extra_dst_buff, sz_extra_dst_buff,
sskip, dskip, flags);
 }
-
-static void qat_bl_sgl_unmap(struct adf_accel_dev *accel_dev,
-struct qat_alg_buf_list *bl)
-{
-   struct device *dev = &GET_DEV(accel_dev);
-   int n = bl->num_bufs;
-   int i;
-
-   for (i = 0; i < n; i++)
-   if (!dma_mapping_error(dev, bl->buffers[i].addr))
-   dma_unmap_single(dev, bl->buffers[i].addr,
-bl->buffers[i].len, DMA_FROM_DEVICE);
-}
-
-static int qat_bl_sgl_map(struct adf_accel_dev *accel_dev,
- struct scatterlist *sgl,
- struct qat_alg_buf_list **bl)
-{
-   struct device *dev = &GET_DEV(accel_dev);
-   struct qat_alg_buf_list *bufl;
-   int node = dev_to_node(dev);
-   struct scatterlist *sg;
-   int n, i, sg_nctr;
-   size_t sz;
-
-   n = sg_nents(sgl);
-   sz = struct_size(bufl, buffers, n);
-   bufl = kzalloc_node(sz, GFP_KERNEL, node);
-   if (unlikely(!bufl))
-   return -ENOMEM;
-
-   for (i = 0; i < n; i++)
-   bufl->buffers[i].addr = DMA_MAPPING_ERROR;
-
-   sg_nctr = 0;
-   for_each_sg(sgl, sg, n, i) {
-   int y = sg_nctr;
-
-   if (!sg->length)
-   continue;
-
-   bufl->buffers[y].addr = dma_map_single(dev, sg_virt(sg),
-  sg->length,
-  DMA_FROM_DEVICE);
-   bufl->buffers[y].len = sg->length;
-   if (unlikely(dma_mapping_error(dev, bufl->buffers[y].addr)))
-   goto err_map;
-   sg_nctr++;
-   }
-   bufl->num_bufs = sg_nctr;
-   bufl->num_mapped_bufs = sg_nctr;
-
-   *bl = bufl;
-
-   return 0;
-
-err_map:
-   for (i = 0; i < n; i++)
-   if (!dma_mapping_error(dev, bufl->buffers[i].addr))
-   dma_unmap_single(dev, bufl->buffers[i].addr,
-bufl->buffers[i].len,
-DMA_FROM_DEVICE);
-   kfree(bufl);
-   *bl = NULL;
-
-   return -ENOMEM;
-}
-
-static void qat_bl_sgl_free_unmap(struct adf_accel_dev *accel_dev,
- struct scatterlist *sgl,
- struct qat_alg_buf_list *bl,
- bool free_bl)
-{
-   if (bl) {
-   qat_bl_sgl_unmap(accel_dev, bl);
-
-   if (free_bl)
-   kfree(bl);
-   }
-   if (sgl)
-   sgl_free(sgl);
-}
-
-static int qat_bl_sgl_alloc_map(struct adf_accel_dev *accel_dev,
-   struct scatterlist **sgl,
-   struct qat_alg_buf_list **bl,
-   unsigned int dlen,
-   gfp_t gfp)
-{
-   struct scatterlist *dst;
-   int ret;
-
-   dst = sgl_alloc(dlen, gfp, NULL);
-   if (!dst) {
-   dev_err(&GET_DEV(accel_dev), "sg_alloc failed\n");
-   return -ENOMEM;
-   }
-
-   ret = qat_bl_sgl_map(accel_dev, dst, bl);
-   if (ret)
-   goto err;
-
-   *sgl = dst;
-
-   return 0;
-
-err:
-   sgl_free(dst);
-   *sgl = NULL;
-   return ret;
-}
-
-int qat_bl_realloc_map_new_dst(struct adf_accel_dev *accel_dev,
-  struct scatterlist **sg,
-  unsigned int dlen,
-  struct qat_request_buffs *qat_bufs,
-  gfp_t gfp)
-{
-   struct device *dev = &GET_DEV(accel_dev);
-   dma_addr_t new_blp = DMA_MA

[RFC PATCH 01/21] crypto: scomp - Revert "add support for deflate rfc1950 (zlib)"

2023-07-18 Thread Ard Biesheuvel
This reverts commit a368f43d6e3a001e684e9191a27df384fbff12f5.

"zlib-deflate" was introduced 6 years ago, but it does not have any
users. So let's remove the generic implementation and the test vectors,
but retain the "zlib-deflate" entry in the testmgr code to avoid
introducing warning messages on systems that implement zlib-deflate in
hardware.

Note that RFC 1950 which forms the basis of this algorithm dates back to
1996, and predates RFC 1951, on which the existing IPcomp is based and
which we have supported in the kernel since 2003. So it seems rather
unlikely that we will ever grow the need to support zlib-deflate.

Signed-off-by: Ard Biesheuvel 
---
 crypto/deflate.c | 61 +---
 crypto/testmgr.c |  8 +--
 crypto/testmgr.h | 75 
 3 files changed, 18 insertions(+), 126 deletions(-)

diff --git a/crypto/deflate.c b/crypto/deflate.c
index b2a46f6dc961e71d..f4f127078fe2a5aa 100644
--- a/crypto/deflate.c
+++ b/crypto/deflate.c
@@ -39,24 +39,20 @@ struct deflate_ctx {
struct z_stream_s decomp_stream;
 };
 
-static int deflate_comp_init(struct deflate_ctx *ctx, int format)
+static int deflate_comp_init(struct deflate_ctx *ctx)
 {
int ret = 0;
struct z_stream_s *stream = &ctx->comp_stream;
 
stream->workspace = vzalloc(zlib_deflate_workspacesize(
-   MAX_WBITS, MAX_MEM_LEVEL));
+   -DEFLATE_DEF_WINBITS, DEFLATE_DEF_MEMLEVEL));
if (!stream->workspace) {
ret = -ENOMEM;
goto out;
}
-   if (format)
-   ret = zlib_deflateInit(stream, 3);
-   else
-   ret = zlib_deflateInit2(stream, DEFLATE_DEF_LEVEL, Z_DEFLATED,
-   -DEFLATE_DEF_WINBITS,
-   DEFLATE_DEF_MEMLEVEL,
-   Z_DEFAULT_STRATEGY);
+   ret = zlib_deflateInit2(stream, DEFLATE_DEF_LEVEL, Z_DEFLATED,
+   -DEFLATE_DEF_WINBITS, DEFLATE_DEF_MEMLEVEL,
+   Z_DEFAULT_STRATEGY);
if (ret != Z_OK) {
ret = -EINVAL;
goto out_free;
@@ -68,7 +64,7 @@ static int deflate_comp_init(struct deflate_ctx *ctx, int 
format)
goto out;
 }
 
-static int deflate_decomp_init(struct deflate_ctx *ctx, int format)
+static int deflate_decomp_init(struct deflate_ctx *ctx)
 {
int ret = 0;
struct z_stream_s *stream = &ctx->decomp_stream;
@@ -78,10 +74,7 @@ static int deflate_decomp_init(struct deflate_ctx *ctx, int 
format)
ret = -ENOMEM;
goto out;
}
-   if (format)
-   ret = zlib_inflateInit(stream);
-   else
-   ret = zlib_inflateInit2(stream, -DEFLATE_DEF_WINBITS);
+   ret = zlib_inflateInit2(stream, -DEFLATE_DEF_WINBITS);
if (ret != Z_OK) {
ret = -EINVAL;
goto out_free;
@@ -105,21 +98,21 @@ static void deflate_decomp_exit(struct deflate_ctx *ctx)
vfree(ctx->decomp_stream.workspace);
 }
 
-static int __deflate_init(void *ctx, int format)
+static int __deflate_init(void *ctx)
 {
int ret;
 
-   ret = deflate_comp_init(ctx, format);
+   ret = deflate_comp_init(ctx);
if (ret)
goto out;
-   ret = deflate_decomp_init(ctx, format);
+   ret = deflate_decomp_init(ctx);
if (ret)
deflate_comp_exit(ctx);
 out:
return ret;
 }
 
-static void *gen_deflate_alloc_ctx(struct crypto_scomp *tfm, int format)
+static void *deflate_alloc_ctx(struct crypto_scomp *tfm)
 {
struct deflate_ctx *ctx;
int ret;
@@ -128,7 +121,7 @@ static void *gen_deflate_alloc_ctx(struct crypto_scomp 
*tfm, int format)
if (!ctx)
return ERR_PTR(-ENOMEM);
 
-   ret = __deflate_init(ctx, format);
+   ret = __deflate_init(ctx);
if (ret) {
kfree(ctx);
return ERR_PTR(ret);
@@ -137,21 +130,11 @@ static void *gen_deflate_alloc_ctx(struct crypto_scomp 
*tfm, int format)
return ctx;
 }
 
-static void *deflate_alloc_ctx(struct crypto_scomp *tfm)
-{
-   return gen_deflate_alloc_ctx(tfm, 0);
-}
-
-static void *zlib_deflate_alloc_ctx(struct crypto_scomp *tfm)
-{
-   return gen_deflate_alloc_ctx(tfm, 1);
-}
-
 static int deflate_init(struct crypto_tfm *tfm)
 {
struct deflate_ctx *ctx = crypto_tfm_ctx(tfm);
 
-   return __deflate_init(ctx, 0);
+   return __deflate_init(ctx);
 }
 
 static void __deflate_exit(void *ctx)
@@ -286,7 +269,7 @@ static struct crypto_alg alg = {
.coa_decompress = deflate_decompress } }
 };
 
-static struct scomp_alg scomp[] = { {
+static struct scomp_alg scomp = {
.alloc_ctx  = deflate_alloc_ctx,
.free_ctx   = deflate_free_ctx,
.compress   = deflate_scompre

[RFC PATCH 00/21] crypto: consolidate and clean up compression APIs

2023-07-18 Thread Ard Biesheuvel
This series is presented as an RFC, because I haven't quite convinced
myself that the acomp API really needs both scatterlists and request
objects to encapsulate the in- and output buffers, and perhaps there are
more drastic simplifications that we might consider.

However, the current situation with comp, scomp and acomp APIs is
definitely something that needs cleaning up, and so I implemented this
series under the working assumption that we will keep the current acomp
semantics wrt scatterlists and request objects.

Patch #1 drops zlib-deflate support in software, along with the test
cases we have for it. This has no users and should have never been
added.

Patch #2 removes the support for on-the-fly allocation of destination
buffers and scatterlists from the Intel QAT driver. This is never used,
and not even implemented by all drivers (the HiSilicon ZIP driver does
not support it). The diffstat of this patch makes a good case why the
caller should be in charge of allocating the memory, not the driver.

Patch #3 removes this on-the-fly allocation from the core acomp API.

Patch #4 does a minimal conversion of IPcomp to the acomp API.

Patch #5 and #6 are independent UBIFS fixes for things I ran into while
working on patch #7.

Patch #7 converts UBIFS to the acomp API.

Patch #8 converts the zram block driver to the acomp API.

Patches #9 to #19 remove the existing 'comp' API implementations as well
as the core plumbing, now that all clients of the API have been
converted. (Note that pstore stopped using the 'comp' API as well, but
these changes are already queued elsewhere)

Patch #20 converts the generic deflate compression driver to the acomp
API, so that it can natively operate on discontiguous buffers, rather
than requiring scratch buffers. This is the only IPcomp compression
algorithm we actually implement in software in the kernel, and this
conversion could help IPcomp if we decide to convert it further, and
remove the code that 'linearizes' SKBs in order to present them to the
compression API as a contiguous range.

Patch #21 converts the acomp-to-scomp adaptation layer so it no longer
requires per-CPU scratch buffers. This takes advantage of the fact that
all existing users of the acomp API pass contiguous memory regions, and
so scratch buffers are only needed in exceptional cases, and can be
allocated and deallocated on the fly. This removes the need for
preallocated per-CPU scratch buffers that can easily add up to tens of
megabytes on modern systems with high core counts and SMT.

These changes have been build tested and only lightly runtime tested. In
particular, I haven't performed any thorough testing on the acomp
conversions of IPcomp, UBIFS and ZRAM. Any hints on which respective
methods and test cases to use here are highly appreciated.

Cc: Herbert Xu 
Cc: Eric Biggers 
Cc: Kees Cook 
Cc: Haren Myneni 
Cc: Nick Terrell 
Cc: Minchan Kim 
Cc: Sergey Senozhatsky 
Cc: Jens Axboe 
Cc: Giovanni Cabiddu 
Cc: Richard Weinberger 
Cc: David Ahern 
Cc: Eric Dumazet 
Cc: Jakub Kicinski 
Cc: Paolo Abeni 
Cc: Steffen Klassert 
Cc: linux-cry...@vger.kernel.org
Cc: linux-ker...@vger.kernel.org
Cc: linux-bl...@vger.kernel.org
Cc: qat-li...@intel.com
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-...@lists.infradead.org
Cc: net...@vger.kernel.org

Ard Biesheuvel (21):
  crypto: scomp - Revert "add support for deflate rfc1950 (zlib)"
  crypto: qat - Drop support for allocating destination buffers
  crypto: acompress - Drop destination scatterlist allocation feature
  net: ipcomp: Migrate to acomp API from deprecated comp API
  ubifs: Pass worst-case buffer size to compression routines
  ubifs: Avoid allocating buffer space unnecessarily
  ubifs: Migrate to acomp compression API
  zram: Migrate to acomp compression API
  crypto: nx - Migrate to scomp API
  crypto: 842 - drop obsolete 'comp' implementation
  crypto: deflate - drop obsolete 'comp' implementation
  crypto: lz4 - drop obsolete 'comp' implementation
  crypto: lz4hc - drop obsolete 'comp' implementation
  crypto: lzo-rle - drop obsolete 'comp' implementation
  crypto: lzo - drop obsolete 'comp' implementation
  crypto: zstd - drop obsolete 'comp' implementation
  crypto: cavium/zip - drop obsolete 'comp' implementation
  crypto: compress_null - drop obsolete 'comp' implementation
  crypto: remove obsolete 'comp' compression API
  crypto: deflate - implement acomp API directly
  crypto: scompress - Drop the use of per-cpu scratch buffers

 Documentation/crypto/architecture.rst   |   2 -
 crypto/842.c|  63 +---
 crypto/Makefile |   2 +-
 crypto/acompress.c  |   6 -
 crypto/api.c|   4 -
 crypto/compress.c

Re: [PATCH 18/21] ARM: drop SMP support for ARM11MPCore

2023-03-30 Thread Ard Biesheuvel
On Mon, 27 Mar 2023 at 14:18, Arnd Bergmann  wrote:
>
> From: Arnd Bergmann 
>
> The cache management operations for noncoherent DMA on ARMv6 work
> in two different ways:
>
>  * When CONFIG_DMA_CACHE_RWFO is set, speculative prefetches on in-flight
>DMA buffers lead to data corruption when the prefetched data is written
>back on top of data from the device.
>
>  * When CONFIG_DMA_CACHE_RWFO is disabled, a cache flush on one CPU
>is not seen by the other core(s), leading to inconsistent contents
>accross the system.
>
> As a consequence, neither configuration is actually safe to use in a
> general-purpose kernel that is used on both MPCore systems and ARM1176
> with prefetching enabled.
>
> We could add further workarounds to make the behavior more dynamic based
> on the system, but realistically, there are close to zero remaining
> users on any ARM11MPCore anyway, and nobody seems too interested in it,
> compared to the more popular ARM1176 used in BMC2835 and AST2500.
>
> The Oxnas platform has some minimal support in OpenWRT, but most of the
> drivers and dts files never made it into the mainline kernel, while the
> Arm Versatile/Realview platform mainly serves as a reference system but
> is not necessary to be kept working once all other ARM11MPCore are gone.
>
> Take the easy way out here and drop support for multiprocessing on
> ARMv6, along with the CONFIG_DMA_CACHE_RWFO option and the cache
> management implementation for it. This also helps with other ARMv6
> issues, but for the moment leaves the ability to build a kernel that
> can run on both ARMv7 SMP and single-processor ARMv6, which we probably
> want to stop supporting as well, but not as part of this series.
>
> Cc: Neil Armstrong 
> Cc: Daniel Golle 
> Cc: Linus Walleij 
> Cc: linux-ox...@groups.io
> Signed-off-by: Arnd Bergmann 

Acked-by: Ard Biesheuvel 


Re: ia64 removal (was: Re: lockref scalability on x86-64 vs cpu_relax)

2023-01-16 Thread Ard Biesheuvel
On Mon, 16 Jan 2023 at 10:33, John Paul Adrian Glaubitz
 wrote:
>
> Hi Ard!
>
> On 1/14/23 00:25, Ard Biesheuvel wrote:
> > Thanks for reporting back. I (mis)read the debian ports page [3],
> > which mentions Debian 7 as the highest Debian version that supports
> > IA64, and so I assumed that support had been dropped from Debian.
>
> This page talks about officially supported ports. Debian Ports is an
> unofficial spin maintained by a number of Debian Developers and external
> developers that are volunteering to maintain these ports.
>
> > However, if only a handful of people want to keep this port alive for
> > reasons of nostalgia, it is obviously obsolete, and we should ask
> > ourselves whether it is reasonable to expect Linux contributors to
> > keep spending time on this.
>
> You could say this about a lot of hardware, can't you?
>

Uhm, yes. Linux contributor effort is a scarce resource, and spending
it on architectures that nobody actually uses, such as alpha or ia64,
means it is not spent on things that are useful to more people.

I really do sympathize with the enthusiast/hobbyist PoV - I am also an
engineer that likes to tinker. So 'use' can be defined liberally here,
and cover running the latest Linux on ancient hardware just for
entertainment.

However, the question is not how you or I choose to spend (or waste)
their time. The question is whether it is reasonable *as a community*
to insist that everyone who contributes a cross-architecture change
also has to ensure that obsolete architectures such as i64 or alpha
are not left behind.

The original thread is an interesting example here - removing a
cpu_relax() in cmpxchg() that was only there because of IA64's clunky
SMT implementation. Perhaps this means that IA64 performance is going
to regress substantially for some workloads? Should anyone care?
Should we test such changes first? And how should we do that if there
is no maintainer and nobody has access to the hardware?

The other example is EFI, which i maintain. Should I require from
contributors that they build and boot test EFI changes on ia64 if I
myself don't even have access to the hardware? It is good to know that
things don't seem to be broken today, but if it is going to fall over,
it may take a while before anybody notices. What happens then?

> > Does the Debian ia64 port have any users? Or is the system that builds
> > the packages the only one that consumes them?
>
> There is the popcon statistics. However, that is opt-on and the numbers are
> not really trustworthy. We are getting feedback from time to time from people
> using it.
>
> Is there any problem with the ia64 port at the moment that would justify 
> removal?
>

I would argue that we should mark it obsolete at the very least, so
that it is crystal clear that regressing IA64 (either knowingly or
unknowingly) by a generic or cross-architecture change is not a
showstopper, even at build time. Then, if someone has the skill set
and the time on their hands, as well as access to actual hardware,
they can keep it alive if they want to.


Re: ia64 removal (was: Re: lockref scalability on x86-64 vs cpu_relax)

2023-01-13 Thread Ard Biesheuvel
On Fri, 13 Jan 2023 at 22:06, John Paul Adrian Glaubitz
 wrote:
>
> Hello Ard!
>
> > Can I take that as an ack on [0]? The EFI subsystem has evolved
> > substantially over the years, and there is really no way to do any
> > IA64 testing beyond build testing, so from that perspective, dropping
> > it entirely would be welcomed.
>
> ia64 is regularly tested in Debian and Gentoo [1][2].
>
> Debian's ia64 porterbox yttrium runs a recent kernel without issues:
>
> root@yttrium:~# uname -a
> Linux yttrium 5.19.0-2-mckinley #1 SMP Debian 5.19.11-1 (2022-09-24) ia64 
> GNU/Linux
> root@yttrium:~#
>
> root@yttrium:~# journalctl -b|head -n10
> Nov 14 14:46:10 yttrium kernel: Linux version 5.19.0-2-mckinley 
> (debian-ker...@lists.debian.org) (gcc-11 (Debian 11.3.0-6) 11.3.0, GNU ld 
> (GNU Binutils for Debian) 2.39) #1 SMP Debian 5.19.11-1 (2022-09-24)
> Nov 14 14:46:10 yttrium kernel: efi: EFI v2.10 by HP
> Nov 14 14:46:10 yttrium kernel: efi: SALsystab=0xdfdd63a18 ESI=0xdfdd63f18 
> ACPI 2.0=0x3d3c4014 HCDP=0xd8798 SMBIOS=0x3d368000
> Nov 14 14:46:10 yttrium kernel: PCDP: v3 at 0xd8798
> Nov 14 14:46:10 yttrium kernel: earlycon: uart8250 at I/O port 0x4000 
> (options '115200n8')
> Nov 14 14:46:10 yttrium kernel: printk: bootconsole [uart8250] enabled
> Nov 14 14:46:10 yttrium kernel: ACPI: Early table checksum verification 
> disabled
> Nov 14 14:46:10 yttrium kernel: ACPI: RSDP 0x3D3C4014 24 (v02 HP  
>   )
> Nov 14 14:46:10 yttrium kernel: ACPI: XSDT 0x3D3C4580 000124 (v01 HP  
>RX2800-2 0001  0113)
> Nov 14 14:46:10 yttrium kernel: ACPI: FACP 0x3D3BE000 F4 (v03 HP  
>RX2800-2 0001 HP   0001)
> root@yttrium:~#
>
> Same applies to the buildds:
>
> root@lifshitz:~# uname -a
> Linux lifshitz 6.0.0-4-mckinley #1 SMP Debian 6.0.8-1 (2022-11-11) ia64 
> GNU/Linux
> root@lifshitz:~#
>
> root@lenz:~# uname -a
> Linux lenz 6.0.0-4-mckinley #1 SMP Debian 6.0.8-1 (2022-11-11) ia64 GNU/Linux
> root@lenz:~#
>
> EFI works fine as well using the latest version of GRUB2.
>
> Thanks,
> Adrian
>
> > [1] https://cdimage.debian.org/cdimage/ports/snapshots/
> > [2] https://mirror.yandex.ru/gentoo-distfiles//releases/ia64/autobuilds/

Thanks for reporting back. I (mis)read the debian ports page [3],
which mentions Debian 7 as the highest Debian version that supports
IA64, and so I assumed that support had been dropped from Debian.

However, if only a handful of people want to keep this port alive for
reasons of nostalgia, it is obviously obsolete, and we should ask
ourselves whether it is reasonable to expect Linux contributors to
keep spending time on this.

Does the Debian ia64 port have any users? Or is the system that builds
the packages the only one that consumes them?


[3] https://www.debian.org/ports/ia64/


ia64 removal (was: Re: lockref scalability on x86-64 vs cpu_relax)

2023-01-12 Thread Ard Biesheuvel
On Fri, 13 Jan 2023 at 01:31, Luck, Tony  wrote:
>
> > Yeah, if it was ia64-only, it's a non-issue these days. It's dead and
> > in pure maintenance mode from a kernel perspective (if even that).
>
> There's not much "simultaneous" in the SMT on ia64. One thread in a
> spin loop will hog the core until the h/w switches to the other thread some
> number of cycles (hundreds, thousands? I really can remember). So I
> was pretty generous with dropping cpu_relax() into any kind of spin loop.
>
> Is it time yet for:
>
> $ git rm -r arch/ia64
>

Hi Tony,

Can I take that as an ack on [0]? The EFI subsystem has evolved
substantially over the years, and there is really no way to do any
IA64 testing beyond build testing, so from that perspective, dropping
it entirely would be welcomed.

Thanks,
Ard.



[0] 
https://git.kernel.org/pub/scm/linux/kernel/git/ardb/linux.git/commit/?h=remove-ia64


Re: [RFC] Objtool toolchain proposal: -fannotate-{jump-table,noreturn}

2022-09-20 Thread Ard Biesheuvel
On Thu, 15 Sept 2022 at 10:47, Peter Zijlstra  wrote:
>
> On Thu, Sep 15, 2022 at 10:56:58AM +0800, Chen Zhongjin wrote:
>
> > We have found some anonymous information on x86 in .rodata.
>
> Well yes, but that's still a bunch of heuristics on our side.
>
> > I'm not sure if those are *all* of Josh wanted on x86, however for arm64 we
> > did not found that in the same section so it is a problem on arm64 now.
>
> Nick found Bolt managed the ARM64 jumptables:
>
>   
> https://github.com/llvm/llvm-project/blob/main/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp#L484
>
> But that does look like a less than ideal solution too.
>
> > Does the compiler will emit these for all arches? At lease I tried and
> > didn't find anything meaningful (maybe I omitted it).
>
> That's the question; can we get the compiler to help us here in a well
> defined manner.

Do BTI landing pads help at all here? I.e., I assume that objtool just
treats any indirect call as a dangling edge in the control flow graph,
and the problem is identifying the valid targets. In the BTI case,
those will all start with a 'BTI J' instruction.


Re: [RFC] Objtool toolchain proposal: -fannotate-{jump-table,noreturn}

2022-09-11 Thread Ard Biesheuvel
On Sun, 11 Sept 2022 at 16:26, Peter Zijlstra  wrote:
>
> On Fri, Sep 09, 2022 at 11:07:04AM -0700, Josh Poimboeuf wrote:
> > Alternatives
> > 
> >
> > Another idea which has been floated in the past is for objtool to read
> > DWARF (or .eh_frame) to help it figure out the control flow.  That
> > hasn't been tried yet, but would be considerably more difficult and
> > fragile IMO.
>
> I though Ard played around with that a bit on ARM64. And yes, given that
> most toolchains consider DWARF itself best-effort, I'm not holding my
> breath there.
>

I have patches out that use unwind data to locate pointer auth
sign/authenticate instructions in the code, in order to patch them to
shadow call stack pushes and pops at runtime if pointer authentication
is not supported by the hardware. This has little to do with objtool
or reliable stack traces.

I still think DWARF could help to make objtool's job a bit easier, but
I don't think it will be of any use with jump tables or noreturn
functions in particular.


Re: [PATCH v2 0/7] Implement inline static calls on PPC32 - v2

2022-07-08 Thread Ard Biesheuvel
Hello Christophe,

On Fri, 8 Jul 2022 at 19:32, Christophe Leroy
 wrote:
>
> This series applies on top of the series v3 "objtool: Enable and
> implement --mcount option on powerpc" [1] rebased on powerpc-next branch
>
> A few modifications are done to core parts to enable powerpc
> implementation:
> - R_X86_64_PC32 is abstracted to R_REL32 so that it can then be
> redefined as R_PPC_REL32.
> - A call to static_call_init() is added to start_kernel() to avoid
> every architecture to have to call it
> - Trampoline address is provided to arch_static_call_transform() even
> when setting a site to fallback on a call to the trampoline when the
> target is too far.
>
> [1] 
> https://lore.kernel.org/lkml/70b6d08d-aced-7f4e-b958-a3c7ae1a9...@csgroup.eu/T/#rb3a073c54aba563a135fba891e0c34c46e47beef
>
> Christophe Leroy (7):
>   powerpc: Add missing asm/asm.h for objtool
>   objtool/powerpc: Activate objtool on PPC32
>   objtool: Add architecture specific R_REL32 macro
>   objtool/powerpc: Add necessary support for inline static calls
>   init: Call static_call_init() from start_kernel()
>   static_call_inline: Provide trampoline address when updating sites
>   powerpc/static_call: Implement inline static calls
>

Could you quantify the performance gains of moving from out-of-line,
patched tail-call branch instructions to full-fledged inline static
calls? On x86, the retpoline problem makes this glaringly obvious, but
on other architectures, the complexity of supporting this model may
outweigh the performance advantages.


Re: [PATCH] kprobes: Enable tracing for mololithic kernel images

2022-06-10 Thread Ard Biesheuvel
@kernel.org>, Masahiro Yamada , Jarkko Sakkinen 
, Sami Tolvanen , "Naveen N. Rao" 
, Marco Elver , Kees Cook 
, Steven Rostedt , Nathan 
Chancellor , "Russell King \(Oracle\)" 
, Mark Brown , Borislav Petkov 
, Alexander Egorenkov , Thomas 
Bogendoerfer , Parisc List 
, Nathaniel McCallum , 
Dmitry Torokhov , "David S. Miller" 
, "Kirill A. Shutemov" , 
Tobias Huschle , "Peter Zijlstra \(Intel\)" 
, "H. Peter Anvin" , sparclinux 
, Tiezhu Yang , Miroslav 
Benes , Chen Zhongjin , linux-riscv 
, the arch/x86 maintainers , 
Russell King , Ingo Molnar , Aaron 
Tomlin , Albert Ou , Heiko Carstens 
, Liao Chang , Paul Walmsley 
, Josh Poimboeuf , Thomas 
Richter , "open list:BROADCOM NVRAM DRIVER" 
, Changbin Du , Palmer 
Dabbelt , linuxppc-dev , 
linux-modu...@vger.kernel.org
Errors-To: linuxppc-dev-bounces+archive=mail-archive@lists.ozlabs.org
Sender: "Linuxppc-dev" 


On Thu, 9 Jun 2022 at 15:14, Jarkko Sakkinen  wrote:
>
> On Wed, Jun 08, 2022 at 09:12:34AM -0700, Song Liu wrote:
> > On Wed, Jun 8, 2022 at 7:21 AM Masami Hiramatsu  wrote:
> > >
> > > Hi Jarkko,
> > >
> > > On Wed, 8 Jun 2022 08:25:38 +0300
> > > Jarkko Sakkinen  wrote:
> > >
> > > > On Wed, Jun 08, 2022 at 10:35:42AM +0800, Guo Ren wrote:
> > > > > .
> > > > >
> > > > > On Wed, Jun 8, 2022 at 8:02 AM Jarkko Sakkinen  
> > > > > wrote:
> > > > > >
> > > > > > Tracing with kprobes while running a monolithic kernel is currently
> > > > > > impossible because CONFIG_KPROBES is dependent of CONFIG_MODULES.  
> > > > > > This
> > > > > > dependency is a result of kprobes code using the module allocator 
> > > > > > for the
> > > > > > trampoline code.
> > > > > >
> > > > > > Detaching kprobes from modules helps to squeeze down the user space,
> > > > > > e.g. when developing new core kernel features, while still having 
> > > > > > all
> > > > > > the nice tracing capabilities.
> > > > > >
> > > > > > For kernel/ and arch/*, move module_alloc() and module_memfree() to
> > > > > > module_alloc.c, and compile as part of vmlinux when either 
> > > > > > CONFIG_MODULES
> > > > > > or CONFIG_KPROBES is enabled.  In addition, flag kernel module 
> > > > > > specific
> > > > > > code with CONFIG_MODULES.
> > > > > >
> > > > > > As the result, kprobes can be used with a monolithic kernel.
> > > > > It's strange when MODULES is n, but vmlinux still obtains 
> > > > > module_alloc.
> > > > >
> > > > > Maybe we need a kprobe_alloc, right?
> > > >
> > > > Perhaps not the best name but at least it documents the fact that
> > > > they use the same allocator.
> > > >
> > > > Few years ago I carved up something "half-way there" for kprobes,
> > > > and I used the name text_alloc() [*].
> > > >
> > > > [*] 
> > > > https://lore.kernel.org/all/20200724050553.1724168-1-jarkko.sakki...@linux.intel.com/
> > >
> > > Yeah, I remember that. Thank you for updating your patch!
> > > I think the idea (split module_alloc() from CONFIG_MODULE) is good to me.
> > > If module support maintainers think this name is not good, you may be
> > > able to rename it as text_alloc() and make the module_alloc() as a
> > > wrapper of it.
> >
> > IIUC, most users of module_alloc() use it to allocate memory for text, 
> > except
> > that module code uses it for both text and data. Therefore, I guess calling 
> > it
> > text_alloc() is not 100% accurate until we change the module code (to use
> > a different API to allocate memory for data).
>
> After reading the feedback, I'd stay on using module_alloc() because
> it has arch-specific quirks baked in. Easier to deal with them in one
> place.
>

In that case, please ensure that you enable this only on architectures
where it is needed. arm64 implements alloc_insn_page() without relying
on module_alloc() so I would not expect to see any changes there.


Re: [PATCH] kprobes: Enable tracing for mololithic kernel images

2022-06-08 Thread Ard Biesheuvel
Deacon , Masahiro Yamada , Sami Tolvanen 
, "Naveen N. Rao" , Marco 
Elver , Kees Cook , Steven Rostedt 
, Nathan Chancellor , "Russell King 
\(Oracle\)" , Mark Brown , 
Borislav Petkov , Alexander Egorenkov , 
Thomas Bogendoerfer , linux-par...@vger.kernel.org, 
Nathaniel McCallum , Dmitry Torokhov 
, "David S. Miller" , "Kirill 
A. Shutemov" , Tobias Huschle 
, "Peter Zijlstra \(Intel\)" , "H. 
Peter Anvin" , sparcli...@vger.kernel.org, Tiezhu Yang 
, Miroslav Benes , Chen Zhongjin 
, linu
 x-ri...@lists.infradead.org, X86 ML , Russell King 
, Ingo Molnar , Aaron Tomlin 
, Albert Ou , Heiko Carstens 
, Liao Chang , Paul Walmsley 
, Josh Poimboeuf , Thomas 
Richter , linux-m...@vger.kernel.org, Changbin Du 
, Palmer Dabbelt , 
linuxppc-dev@lists.ozlabs.org, linux-modu...@vger.kernel.org
Errors-To: linuxppc-dev-bounces+archive=mail-archive@lists.ozlabs.org
Sender: "Linuxppc-dev" 


Hello Jarkko,

On Wed, 8 Jun 2022 at 02:02, Jarkko Sakkinen  wrote:
>
> Tracing with kprobes while running a monolithic kernel is currently
> impossible because CONFIG_KPROBES is dependent of CONFIG_MODULES.  This
> dependency is a result of kprobes code using the module allocator for the
> trampoline code.
>
> Detaching kprobes from modules helps to squeeze down the user space,
> e.g. when developing new core kernel features, while still having all
> the nice tracing capabilities.
>
> For kernel/ and arch/*, move module_alloc() and module_memfree() to
> module_alloc.c, and compile as part of vmlinux when either CONFIG_MODULES
> or CONFIG_KPROBES is enabled.  In addition, flag kernel module specific
> code with CONFIG_MODULES.
>
> As the result, kprobes can be used with a monolithic kernel.

I think I may have mentioned this the previous time as well, but I
don't think this is the right approach.

Kprobes uses alloc_insn_page() to allocate executable memory, but the
requirements for this memory are radically different compared to
loadable modules, which need to be within an arch-specific distance of
the core kernel, need KASAN backing etc etc.

This is why arm64, for instance, does not implement alloc_insn_page()
in terms of module_alloc() [and likely does not belong in this patch
for that reason]

Is there any reason kprobes cannot simply use vmalloc()?


>
> Signed-off-by: Jarkko Sakkinen 
> ---
> Tested with the help of BuildRoot and QEMU:
> - arm (function tracer)
> - arm64 (function tracer)
> - mips (function tracer)
> - powerpc (function tracer)
> - riscv (function tracer)
> - s390 (function tracer)
> - sparc (function tracer)
> - x86 (function tracer)
> - sh (function tracer, for the "pure" kernel/modules_alloc.c path)
> ---
>  arch/Kconfig   |  1 -
>  arch/arm/kernel/Makefile   |  5 +++
>  arch/arm/kernel/module.c   | 32 
>  arch/arm/kernel/module_alloc.c | 42 
>  arch/arm64/kernel/Makefile |  5 +++
>  arch/arm64/kernel/module.c | 47 ---
>  arch/arm64/kernel/module_alloc.c   | 57 
>  arch/mips/kernel/Makefile  |  5 +++
>  arch/mips/kernel/module.c  |  9 -
>  arch/mips/kernel/module_alloc.c| 18 +
>  arch/parisc/kernel/Makefile|  5 +++
>  arch/parisc/kernel/module.c| 11 --
>  arch/parisc/kernel/module_alloc.c  | 23 +++
>  arch/powerpc/kernel/Makefile   |  5 +++
>  arch/powerpc/kernel/module.c   | 37 --
>  arch/powerpc/kernel/module_alloc.c | 47 +++
>  arch/riscv/kernel/Makefile |  5 +++
>  arch/riscv/kernel/module.c | 10 -
>  arch/riscv/kernel/module_alloc.c   | 19 ++
>  arch/s390/kernel/Makefile  |  5 +++
>  arch/s390/kernel/module.c  | 17 -
>  arch/s390/kernel/module_alloc.c| 33 
>  arch/sparc/kernel/Makefile |  5 +++
>  arch/sparc/kernel/module.c | 30 ---
>  arch/sparc/kernel/module_alloc.c   | 39 +++
>  arch/x86/kernel/Makefile   |  5 +++
>  arch/x86/kernel/module.c   | 50 
>  arch/x86/kernel/module_alloc.c | 61 ++
>  kernel/Makefile|  5 +++
>  kernel/kprobes.c   | 10 +
>  kernel/module/main.c   | 17 -
>  kernel/module_alloc.c  | 26 +
>  kernel/trace/trace_kprobe.c| 10 -
>  33 files changed, 434 insertions(+), 262 deletions(-)
>  create mode 100644 arch/arm/kernel/module_alloc.c
>  create mode 100644 arch/arm64/kernel/module_alloc.c
>  create mode 100644 arch/mips/kernel/module_alloc.c
>  create mode 100644 arch/parisc/kernel/module_alloc.c
>  create mode 100644 arch/powerpc/kernel/module_alloc.c
>  create mode 100644 arch/riscv/kernel/module_alloc.c
>  create mode 100644 arch/s390/kernel/module_alloc.c
>  create mode 100644 arch/sparc/kernel/module_alloc.c
>  create mode 100644 arch/x86/kernel/mo

Re: [PATCH 08/14] arm64: simplify access_ok()

2022-02-15 Thread Ard Biesheuvel
On Tue, 15 Feb 2022 at 10:13, Arnd Bergmann  wrote:
>
> On Tue, Feb 15, 2022 at 9:17 AM Ard Biesheuvel  wrote:
> > On Mon, 14 Feb 2022 at 17:37, Arnd Bergmann  wrote:
> > > From: Arnd Bergmann 
> > >
> >
> > With set_fs() out of the picture, wouldn't it be sufficient to check
> > that bit #55 is clear? (the bit that selects between TTBR0 and TTBR1)
> > That would also remove the need to strip the tag from the address.
> >
> > Something like
> >
> > asm goto("tbnz  %0, #55, %2 \n"
> >  "tbnz  %1, #55, %2 \n"
> >  :: "r"(addr), "r"(addr + size - 1) :: notok);
> > return 1;
> > notok:
> > return 0;
> >
> > with an additional sanity check on the size which the compiler could
> > eliminate for compile-time constant values.
>
> That should work, but I don't see it as a clear enough advantage to
> have a custom implementation. For the constant-size case, it probably
> isn't better than a compiler-scheduled comparison against a
> constant limit, but it does hurt maintainability when the next person
> wants to change the behavior of access_ok() globally.
>

arm64 also has this leading up to the range check, and I think we'd no
longer need it:

if (IS_ENABLED(CONFIG_ARM64_TAGGED_ADDR_ABI) &&
(current->flags & PF_KTHREAD || test_thread_flag(TIF_TAGGED_ADDR)))
addr = untagged_addr(addr);

> If we want to get into micro-optimizing uaccess, I think a better target
> would be a CONFIG_CC_HAS_ASM_GOTO_OUTPUT version
> of __get_user()/__put_user as we have on x86 and powerpc.
>
>  Arnd


Re: [PATCH 08/14] arm64: simplify access_ok()

2022-02-15 Thread Ard Biesheuvel
On Mon, 14 Feb 2022 at 17:37, Arnd Bergmann  wrote:
>
> From: Arnd Bergmann 
>
> arm64 has an inline asm implementation of access_ok() that is derived from
> the 32-bit arm version and optimized for the case that both the limit and
> the size are variable. With set_fs() gone, the limit is always constant,
> and the size usually is as well, so just using the default implementation
> reduces the check into a comparison against a constant that can be
> scheduled by the compiler.
>
> On a defconfig build, this saves over 28KB of .text.
>
> Signed-off-by: Arnd Bergmann 
> ---
>  arch/arm64/include/asm/uaccess.h | 28 +---
>  1 file changed, 5 insertions(+), 23 deletions(-)
>
> diff --git a/arch/arm64/include/asm/uaccess.h 
> b/arch/arm64/include/asm/uaccess.h
> index 357f7bd9c981..e8dce0cc5eaa 100644
> --- a/arch/arm64/include/asm/uaccess.h
> +++ b/arch/arm64/include/asm/uaccess.h
> @@ -26,6 +26,8 @@
>  #include 
>  #include 
>
> +static inline int __access_ok(const void __user *ptr, unsigned long size);
> +
>  /*
>   * Test whether a block of memory is a valid user space address.
>   * Returns 1 if the range is valid, 0 otherwise.
> @@ -33,10 +35,8 @@
>   * This is equivalent to the following test:
>   * (u65)addr + (u65)size <= (u65)TASK_SIZE_MAX
>   */
> -static inline unsigned long __access_ok(const void __user *addr, unsigned 
> long size)
> +static inline int access_ok(const void __user *addr, unsigned long size)
>  {
> -   unsigned long ret, limit = TASK_SIZE_MAX - 1;
> -
> /*
>  * Asynchronous I/O running in a kernel thread does not have the
>  * TIF_TAGGED_ADDR flag of the process owning the mm, so always untag
> @@ -46,27 +46,9 @@ static inline unsigned long __access_ok(const void __user 
> *addr, unsigned long s
> (current->flags & PF_KTHREAD || 
> test_thread_flag(TIF_TAGGED_ADDR)))
> addr = untagged_addr(addr);
>
> -   __chk_user_ptr(addr);
> -   asm volatile(
> -   // A + B <= C + 1 for all A,B,C, in four easy steps:
> -   // 1: X = A + B; X' = X % 2^64
> -   "   adds%0, %3, %2\n"
> -   // 2: Set C = 0 if X > 2^64, to guarantee X' > C in step 4
> -   "   csel%1, xzr, %1, hi\n"
> -   // 3: Set X' = ~0 if X >= 2^64. For X == 2^64, this decrements X'
> -   //to compensate for the carry flag being set in step 4. For
> -   //X > 2^64, X' merely has to remain nonzero, which it does.
> -   "   csinv   %0, %0, xzr, cc\n"
> -   // 4: For X < 2^64, this gives us X' - C - 1 <= 0, where the -1
> -   //comes from the carry in being clear. Otherwise, we are
> -   //testing X' - C == 0, subject to the previous adjustments.
> -   "   sbcsxzr, %0, %1\n"
> -   "   cset%0, ls\n"
> -   : "=&r" (ret), "+r" (limit) : "Ir" (size), "0" (addr) : "cc");
> -
> -   return ret;
> +   return likely(__access_ok(addr, size));
>  }
> -#define __access_ok __access_ok
> +#define access_ok access_ok
>
>  #include 
>
> --
> 2.29.2
>

With set_fs() out of the picture, wouldn't it be sufficient to check
that bit #55 is clear? (the bit that selects between TTBR0 and TTBR1)
That would also remove the need to strip the tag from the address.

Something like

asm goto("tbnz  %0, #55, %2 \n"
 "tbnz  %1, #55, %2 \n"
 :: "r"(addr), "r"(addr + size - 1) :: notok);
return 1;
notok:
return 0;

with an additional sanity check on the size which the compiler could
eliminate for compile-time constant values.


Re: [PATCH v7 0/5] Allow guest access to EFI confidential computing secret area

2022-02-01 Thread Ard Biesheuvel
On Wed, 2 Feb 2022 at 08:10, Matthew Garrett  wrote:
>
> On Wed, Feb 02, 2022 at 08:05:23AM +0100, Greg KH wrote:
>
> > I see different platform patches trying to stick these blobs in
> > different locations and ways to access (securityfs, sysfs, char device
> > node), which seems crazy to me.  Why can't we at least pick one way to
> > access these to start with, and then have the filesystem layout be
> > platform-specific as needed, which will give the correct hints to
> > userspace as to what it needs to do here?
>
> Which other examples are you thinking of? I think this conversation may
> have accidentally become conflated with a different prior one and now
> we're talking at cross purposes.

This came up a while ago during review of one of the earlier revisions
of this patch set.

https://lore.kernel.org/linux-efi/yrzuiivizmfgj...@google.com/

which describes another two variations on the theme, for pKVM guests
as well as Android bare metal.


Re: [powerpc] ftrace warning kernel/trace/ftrace.c:2068 with code-patching selftests

2022-01-27 Thread Ard Biesheuvel
On Thu, 27 Jan 2022 at 15:55, Mark Rutland  wrote:
>
> On Thu, Jan 27, 2022 at 02:59:31PM +0100, Ard Biesheuvel wrote:
> > On Thu, 27 Jan 2022 at 14:24, Mark Rutland  wrote:
> > >
> > > On Thu, Jan 27, 2022 at 02:07:03PM +0100, Ard Biesheuvel wrote:
> > > > I suppose that on arm64, we can work around this by passing
> > > > --apply-dynamic-relocs to the linker, so that all R_AARCH64_RELATIVE
> > > > targets are prepopulated with the link time value of the respective
> > > > addresses. It does cause some bloat, which is why we disable that
> > > > today, but we could make that dependent on ftrace being enabled.
> > >
> > > We'd also need to teach the build-time sort to update the relocations, 
> > > unless
> > > you mean to also change the boot-time reloc code to RMW with the offset?
> >
> > Why would that be necessary? Every RELA entry has the same effect on
> > its target address, as it just adds a fixed offset.
>
> Currently in relocate_kernel() we generate the absolute address from the
> relocation alone, with the core of the relocation logic being as follows, with
> x9 being the pointer to a RELA entry, and x23 being the offset relative to the
> default load address:
>
> ldp x12, x13, [x9], #24
> ldr x14, [x9, #-8]
>
> add x14, x14, x23   // relocate
> str x14, [x12, x23]
>
> ... and (as per another reply), a sample RELA entry currently contains:
>
> 0x890b1ab0  // default load VA of pointer to update
> 0x0403  // R_AARCH64_RELATIVE
> 0x890b6000  // default load VA of addr to write
>
> So either:
>
> * That code stays as-is, and we must update the relocs to correspond to their
>   new sorted locations, or we'll blat the sorted values with the original
>   relocs as we do today.
>
> * The code needs to change to RMW: read the existing value, add the offset
>   (ignoring the content of the RELA entry's addend field), and write it back.
>   This is what I meant when I said "change the boot-time reloc code to RMW 
> with
>   the offset".
>
> Does that make sense, or have I misunderstood?
>

No you're right. We'd have to use different sequences here depending
on whether the relocation target is populated or not, which currently
we don't care about.


Re: [powerpc] ftrace warning kernel/trace/ftrace.c:2068 with code-patching selftests

2022-01-27 Thread Ard Biesheuvel
On Thu, 27 Jan 2022 at 14:24, Mark Rutland  wrote:
>
> On Thu, Jan 27, 2022 at 02:07:03PM +0100, Ard Biesheuvel wrote:
> > On Thu, 27 Jan 2022 at 13:59, Mark Rutland  wrote:
> > >
> > > On Thu, Jan 27, 2022 at 01:22:17PM +0100, Ard Biesheuvel wrote:
> > > > On Thu, 27 Jan 2022 at 13:20, Mark Rutland  wrote:
> > > > > On Thu, Jan 27, 2022 at 01:03:34PM +0100, Ard Biesheuvel wrote:
> > > > >
> > > > > > These architectures use place-relative extables for the same reason:
> > > > > > place relative references are resolved at build time rather than at
> > > > > > runtime during relocation, making a build time sort feasible.
> > > > > >
> > > > > > arch/alpha/include/asm/extable.h:#define ARCH_HAS_RELATIVE_EXTABLE
> > > > > > arch/arm64/include/asm/extable.h:#define ARCH_HAS_RELATIVE_EXTABLE
> > > > > > arch/ia64/include/asm/extable.h:#define ARCH_HAS_RELATIVE_EXTABLE
> > > > > > arch/parisc/include/asm/uaccess.h:#define ARCH_HAS_RELATIVE_EXTABLE
> > > > > > arch/powerpc/include/asm/extable.h:#define ARCH_HAS_RELATIVE_EXTABLE
> > > > > > arch/riscv/include/asm/extable.h:#define ARCH_HAS_RELATIVE_EXTABLE
> > > > > > arch/s390/include/asm/extable.h:#define ARCH_HAS_RELATIVE_EXTABLE
> > > > > > arch/x86/include/asm/extable.h:#define ARCH_HAS_RELATIVE_EXTABLE
> > > > > >
> > > > > > Note that the swap routine becomes something like the below, given
> > > > > > that the relative references need to be fixed up after the entry
> > > > > > changes place in the sorted list.
> > > > > >
> > > > > > static void swap_ex(void *a, void *b, int size)
> > > > > > {
> > > > > > struct exception_table_entry *x = a, *y = b, tmp;
> > > > > > int delta = b - a;
> > > > > >
> > > > > > tmp = *x;
> > > > > > x->insn = y->insn + delta;
> > > > > > y->insn = tmp.insn - delta;
> > > > > > ...
> > > > > > }
> > > > > >
> > > > > > As a bonus, the resulting footprint of the table in the image is
> > > > > > reduced by 8x, given that every 8 byte pointer has an accompanying 
> > > > > > 24
> > > > > > byte RELA record, so we go from 32 bytes to 4 bytes for every call 
> > > > > > to
> > > > > > __gnu_mcount_mc.
> > > > >
> > > > > Absolutely -- it'd be great if we could do that for the callsite 
> > > > > locations; the
> > > > > difficulty is that the entries are generated by the compiler itself, 
> > > > > so we'd
> > > > > either need some build/link time processing to convert each absolute 
> > > > > 64-bit
> > > > > value to a relative 32-bit offset, or new compiler options to 
> > > > > generate those as
> > > > > relative offsets from the outset.
> > > >
> > > > Don't we use scripts/recordmcount.pl for that?
> > >
> > > Not quite -- we could adjust it to do so, but today it doesn't consider
> > > existing mcount_loc entries, and just generates new ones where the 
> > > compiler has
> > > generated calls to mcount, which it finds by scanning the instructions in 
> > > the
> > > binary. Today it is not used:
> > >
> > > * On arm64 when we default to using `-fpatchable-function-entry=N`.  That 
> > > makes
> > >   the compiler insert 2 NOPs in the function prologue, and log the 
> > > location of
> > >   that NOP sled to a section called.  `__patchable_function_entries`.
> > >
> > >   We need the compiler to do that since we can't reliably identify 2 NOPs 
> > > in a
> > >   function prologue as being intended to be a patch site, as e.g. there 
> > > could
> > >   be notrace functions where the compiler had to insert NOPs for 
> > > alignment of a
> > >   subsequent brnach or similar.
> > >
> > > * On architectures with `-nop-mcount`. On these, it's necessary to use
> > >   `-mrecord-mcount` to have the compiler log the patch-site, for the same
> > >   reason as with `-fpatchable-function-entry`.
> > >
> > > * On architectures with `-mrecord-mcount` generally, since t

Re: [powerpc] ftrace warning kernel/trace/ftrace.c:2068 with code-patching selftests

2022-01-27 Thread Ard Biesheuvel
On Thu, 27 Jan 2022 at 13:59, Mark Rutland  wrote:
>
> On Thu, Jan 27, 2022 at 01:22:17PM +0100, Ard Biesheuvel wrote:
> > On Thu, 27 Jan 2022 at 13:20, Mark Rutland  wrote:
> > > On Thu, Jan 27, 2022 at 01:03:34PM +0100, Ard Biesheuvel wrote:
> > >
> > > > These architectures use place-relative extables for the same reason:
> > > > place relative references are resolved at build time rather than at
> > > > runtime during relocation, making a build time sort feasible.
> > > >
> > > > arch/alpha/include/asm/extable.h:#define ARCH_HAS_RELATIVE_EXTABLE
> > > > arch/arm64/include/asm/extable.h:#define ARCH_HAS_RELATIVE_EXTABLE
> > > > arch/ia64/include/asm/extable.h:#define ARCH_HAS_RELATIVE_EXTABLE
> > > > arch/parisc/include/asm/uaccess.h:#define ARCH_HAS_RELATIVE_EXTABLE
> > > > arch/powerpc/include/asm/extable.h:#define ARCH_HAS_RELATIVE_EXTABLE
> > > > arch/riscv/include/asm/extable.h:#define ARCH_HAS_RELATIVE_EXTABLE
> > > > arch/s390/include/asm/extable.h:#define ARCH_HAS_RELATIVE_EXTABLE
> > > > arch/x86/include/asm/extable.h:#define ARCH_HAS_RELATIVE_EXTABLE
> > > >
> > > > Note that the swap routine becomes something like the below, given
> > > > that the relative references need to be fixed up after the entry
> > > > changes place in the sorted list.
> > > >
> > > > static void swap_ex(void *a, void *b, int size)
> > > > {
> > > > struct exception_table_entry *x = a, *y = b, tmp;
> > > > int delta = b - a;
> > > >
> > > > tmp = *x;
> > > > x->insn = y->insn + delta;
> > > > y->insn = tmp.insn - delta;
> > > > ...
> > > > }
> > > >
> > > > As a bonus, the resulting footprint of the table in the image is
> > > > reduced by 8x, given that every 8 byte pointer has an accompanying 24
> > > > byte RELA record, so we go from 32 bytes to 4 bytes for every call to
> > > > __gnu_mcount_mc.
> > >
> > > Absolutely -- it'd be great if we could do that for the callsite 
> > > locations; the
> > > difficulty is that the entries are generated by the compiler itself, so 
> > > we'd
> > > either need some build/link time processing to convert each absolute 
> > > 64-bit
> > > value to a relative 32-bit offset, or new compiler options to generate 
> > > those as
> > > relative offsets from the outset.
> >
> > Don't we use scripts/recordmcount.pl for that?
>
> Not quite -- we could adjust it to do so, but today it doesn't consider
> existing mcount_loc entries, and just generates new ones where the compiler 
> has
> generated calls to mcount, which it finds by scanning the instructions in the
> binary. Today it is not used:
>
> * On arm64 when we default to using `-fpatchable-function-entry=N`.  That 
> makes
>   the compiler insert 2 NOPs in the function prologue, and log the location of
>   that NOP sled to a section called.  `__patchable_function_entries`.
>
>   We need the compiler to do that since we can't reliably identify 2 NOPs in a
>   function prologue as being intended to be a patch site, as e.g. there could
>   be notrace functions where the compiler had to insert NOPs for alignment of 
> a
>   subsequent brnach or similar.
>
> * On architectures with `-nop-mcount`. On these, it's necessary to use
>   `-mrecord-mcount` to have the compiler log the patch-site, for the same
>   reason as with `-fpatchable-function-entry`.
>
> * On architectures with `-mrecord-mcount` generally, since this avoids the
>   overhead of scanning each object.
>
> * On x86 when objtool is used.
>

Right.

I suppose that on arm64, we can work around this by passing
--apply-dynamic-relocs to the linker, so that all R_AARCH64_RELATIVE
targets are prepopulated with the link time value of the respective
addresses. It does cause some bloat, which is why we disable that
today, but we could make that dependent on ftrace being enabled.

I do wonder how much over head we accumulate, though, by having all
these relocations, but I suppose that is the situation today in any
case.


Re: [powerpc] ftrace warning kernel/trace/ftrace.c:2068 with code-patching selftests

2022-01-27 Thread Ard Biesheuvel
On Thu, 27 Jan 2022 at 13:20, Mark Rutland  wrote:
>
> On Thu, Jan 27, 2022 at 01:03:34PM +0100, Ard Biesheuvel wrote:
> > On Thu, 27 Jan 2022 at 12:47, Mark Rutland  wrote:
> > >
> > > [adding LKML so this is easier for others to find]
> > >
> > > If anyone wants to follow the thread from the start, it's at:
> > >
> > >   
> > > https://lore.kernel.org/linuxppc-dev/944d10da-8200-4ba9-8d0a-3bed9aa99...@linux.ibm.com/
> > >
> > > Ard, I was under the impression that the 32-bit arm kernel was (virtually)
> > > relocatable, but I couldn't spot where, and suspect I'm mistaken. Do you 
> > > know
> > > whether it currently does any boot-time dynamic relocation?
> >
> > No, it does not.
>
> Thanks for comfirming!
>
> So 32-bit arm should be able to opt into the build-time sort as-is.
>
> > > Steve asked for a bit more detail on IRC, so the below is an attempt to 
> > > explain
> > > what's actually going on here.
> > >
> > > The short answer is that relocatable kernels (e.g. those with KASLR 
> > > support)
> > > need to handle the kernel being loaded at (somewhat) arbitrary virtual
> > > addresses. Even where code can be position-independent, any pointers in 
> > > the
> > > kernel image (e.g. the contents of the mcount_loc table) need to be 
> > > updated to
> > > account for the specific VA the kernel was loaded at -- arch code does 
> > > this
> > > early at boot time by applying dynamic (ELF) relocations.
> >
> > These architectures use place-relative extables for the same reason:
> > place relative references are resolved at build time rather than at
> > runtime during relocation, making a build time sort feasible.
> >
> > arch/alpha/include/asm/extable.h:#define ARCH_HAS_RELATIVE_EXTABLE
> > arch/arm64/include/asm/extable.h:#define ARCH_HAS_RELATIVE_EXTABLE
> > arch/ia64/include/asm/extable.h:#define ARCH_HAS_RELATIVE_EXTABLE
> > arch/parisc/include/asm/uaccess.h:#define ARCH_HAS_RELATIVE_EXTABLE
> > arch/powerpc/include/asm/extable.h:#define ARCH_HAS_RELATIVE_EXTABLE
> > arch/riscv/include/asm/extable.h:#define ARCH_HAS_RELATIVE_EXTABLE
> > arch/s390/include/asm/extable.h:#define ARCH_HAS_RELATIVE_EXTABLE
> > arch/x86/include/asm/extable.h:#define ARCH_HAS_RELATIVE_EXTABLE
> >
> > Note that the swap routine becomes something like the below, given
> > that the relative references need to be fixed up after the entry
> > changes place in the sorted list.
> >
> > static void swap_ex(void *a, void *b, int size)
> > {
> > struct exception_table_entry *x = a, *y = b, tmp;
> > int delta = b - a;
> >
> > tmp = *x;
> > x->insn = y->insn + delta;
> > y->insn = tmp.insn - delta;
> > ...
> > }
> >
> > As a bonus, the resulting footprint of the table in the image is
> > reduced by 8x, given that every 8 byte pointer has an accompanying 24
> > byte RELA record, so we go from 32 bytes to 4 bytes for every call to
> > __gnu_mcount_mc.
>
> Absolutely -- it'd be great if we could do that for the callsite locations; 
> the
> difficulty is that the entries are generated by the compiler itself, so we'd
> either need some build/link time processing to convert each absolute 64-bit
> value to a relative 32-bit offset, or new compiler options to generate those 
> as
> relative offsets from the outset.
>

Don't we use scripts/recordmcount.pl for that?


Re: [powerpc] ftrace warning kernel/trace/ftrace.c:2068 with code-patching selftests

2022-01-27 Thread Ard Biesheuvel
On Thu, 27 Jan 2022 at 12:47, Mark Rutland  wrote:
>
> [adding LKML so this is easier for others to find]
>
> If anyone wants to follow the thread from the start, it's at:
>
>   
> https://lore.kernel.org/linuxppc-dev/944d10da-8200-4ba9-8d0a-3bed9aa99...@linux.ibm.com/
>
> Ard, I was under the impression that the 32-bit arm kernel was (virtually)
> relocatable, but I couldn't spot where, and suspect I'm mistaken. Do you know
> whether it currently does any boot-time dynamic relocation?
>

No, it does not.

..
> > Steve pointed me at this thread over IRC -- I'm not subscribed to this list 
> > so
> > grabbed a copy of the thread thus far via b4.
> >
> > On Tue, Jan 25, 2022 at 11:20:27AM +0800, Yinan Liu wrote:
> > > > Yeah, I think it's time to opt in, instead of opting out.
> >
> > I agree this must be opt-in rather than opt-out.
> >
> > However, I think most architectures were broken (in at least some
> > configurations) by commit:
> >
> >   72b3942a173c387b ("scripts: ftrace - move the sort-processing in 
> > ftrace_init")
> >
> > ... and so I don't think this fix is correct as-is, and we might want to 
> > revert
> > that or at least mark is as BROKEN for now.
>
> Steve asked for a bit more detail on IRC, so the below is an attempt to 
> explain
> what's actually going on here.
>
> The short answer is that relocatable kernels (e.g. those with KASLR support)
> need to handle the kernel being loaded at (somewhat) arbitrary virtual
> addresses. Even where code can be position-independent, any pointers in the
> kernel image (e.g. the contents of the mcount_loc table) need to be updated to
> account for the specific VA the kernel was loaded at -- arch code does this
> early at boot time by applying dynamic (ELF) relocations.
>

These architectures use place-relative extables for the same reason:
place relative references are resolved at build time rather than at
runtime during relocation, making a build time sort feasible.

arch/alpha/include/asm/extable.h:#define ARCH_HAS_RELATIVE_EXTABLE
arch/arm64/include/asm/extable.h:#define ARCH_HAS_RELATIVE_EXTABLE
arch/ia64/include/asm/extable.h:#define ARCH_HAS_RELATIVE_EXTABLE
arch/parisc/include/asm/uaccess.h:#define ARCH_HAS_RELATIVE_EXTABLE
arch/powerpc/include/asm/extable.h:#define ARCH_HAS_RELATIVE_EXTABLE
arch/riscv/include/asm/extable.h:#define ARCH_HAS_RELATIVE_EXTABLE
arch/s390/include/asm/extable.h:#define ARCH_HAS_RELATIVE_EXTABLE
arch/x86/include/asm/extable.h:#define ARCH_HAS_RELATIVE_EXTABLE

Note that the swap routine becomes something like the below, given
that the relative references need to be fixed up after the entry
changes place in the sorted list.

static void swap_ex(void *a, void *b, int size)
{
struct exception_table_entry *x = a, *y = b, tmp;
int delta = b - a;

tmp = *x;
x->insn = y->insn + delta;
y->insn = tmp.insn - delta;
...
}

As a bonus, the resulting footprint of the table in the image is
reduced by 8x, given that every 8 byte pointer has an accompanying 24
byte RELA record, so we go from 32 bytes to 4 bytes for every call to
__gnu_mcount_mc.



> Walking through how we get there, considering arm64 specifically:
>
> 1) When an object is created with traceable functions:
>
>The compiler records the addresses of the callsites into a section. Those
>are absolute virtual addresses, but the final virtual addresses are not yet
>known, so the compiler generates ELF relocations to tell the linker how to
>fill these in later.
>
>On arm64, since the compiler doesn't know the final value yet, it fills the
>actual values with zero for now. Other architectures might do differently.
>
>For example, per `objdump -r init/main.o`:
>
>| RELOCATION RECORDS FOR [__patchable_function_entries]:
>| OFFSET   TYPE  VALUE
>|  R_AARCH64_ABS64   .text+0x0028
>| 0008 R_AARCH64_ABS64   .text+0x0088
>| 0010 R_AARCH64_ABS64   .text+0x00e8
>
> 2) When vmlinux is linked:
>
>The linker script accumulates the callsite pointers from all the object
>files into the mcount_loc table. Since the kernel is relocatable, the
>runtime absolute addresses are still not yet known, but the offset relative
>to the kernel base is known, and so the linker consumes the absolute
>relocations created by the compiler and generates new relocations relative
>to the kernel's default load address so that these can be adjusted at boot
>time.
>
>On arm64, those are RELA and/or RELR relocations, which our vmlinux.lds.S
>accumulates those into a location in the initdata section that the kernel
>can find at boot time:
>
>  
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/arm64/kernel/vmlinux.lds.S?h=v5.17-rc1#n252
>
>For example, per `objdump -s vmlinux -j .rela.dyn`:
>
>| vmlinux: file format elf64-littleaarch64
>|
>

Re: [PATCH v2 00/13] Unify asm/unaligned.h around struct helper

2021-12-16 Thread Ard Biesheuvel
Hi Arnd,

(replying to an old thread as this came up in the discussion regarding
misaligned loads and stored in siphash() when compiled for ARM
[f7e5b9bfa6c8820407b64eabc1f29c9a87e8993d])

On Fri, 14 May 2021 at 12:02, Arnd Bergmann  wrote:
>
> From: Arnd Bergmann 
>
> The get_unaligned()/put_unaligned() helpers are traditionally architecture
> specific, with the two main variants being the "access-ok.h" version
> that assumes unaligned pointer accesses always work on a particular
> architecture, and the "le-struct.h" version that casts the data to a
> byte aligned type before dereferencing, for architectures that cannot
> always do unaligned accesses in hardware.
>
> Based on the discussion linked below, it appears that the access-ok
> version is not realiable on any architecture, but the struct version
> probably has no downsides. This series changes the code to use the
> same implementation on all architectures, addressing the few exceptions
> separately.
>
> I've included this version in the asm-generic tree for 5.14 already,
> addressing the few issues that were pointed out in the RFC. If there
> are any remaining problems, I hope those can be addressed as follow-up
> patches.
>

I think this series is a huge improvement, but it does not solve the
UB problem completely. As we found, there are open issues in the GCC
bugzilla regarding assumptions in the compiler that aligned quantities
either overlap entirely or not at all. (e.g.,
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100363)

CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS is used in many places to
conditionally emit code that violates C alignment rules. E.g., there
is this example in Documentation/core-api/unaligned-memory-access.rst:

bool ether_addr_equal(const u8 *addr1, const u8 *addr2)
{
#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
  u32 fold = ((*(const u32 *)addr1) ^ (*(const u32 *)addr2)) |
 ((*(const u16 *)(addr1 + 4)) ^ (*(const u16 *)(addr2 + 4)));
  return fold == 0;
#else
...

(which now deviates from its actual implementation, but the point is
the same) where CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS is used in the
wrong way (IMHO).

The pattern seems to be

#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
  // ignore alignment rules, just cast to a more aligned pointer type
#else
  // use unaligned accessors, which could be either cheap or expensive,
  // depending on the architecture
#endif

whereas the following pattern makes more sense, I think, and does not
violate any C rules in the common case:

#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
  // use unaligned accessors, which are cheap or even entirely free
#else
  // avoid unaligned accessors, as they are expensive; instead, reorganize
  // the data so we don't need them (similar to setting NET_IP_ALIGN to 2)
#endif

The only remaining problem here is reinterpreting a char* pointer to a
u32*, e.g., for accessing the IP address in an Ethernet frame when
NET_IP_ALIGN == 2, which could suffer from the same UB problem again,
as I understand it.

In the 32-bit ARM case (v6+) [which is admittedly an outlier] this
makes a substantial difference, as ARMv6 does have efficient unaligned
accessors (load/store word or halfword may be used on misaligned
addresses) but requires that load/store double-word and load/store
multiple are only used on 32-bit aligned addresses. GCC does the right
thing with the unaligned accessors, but blindly casting away
misalignment may result in alignment traps if the compiler happened to
emit load-double or load-multiple instructions for the memory access
in question.

Jason already verifed that in the siphash() case, the aligned and
unaligned versions of the code actually compile to the same machine
code on x86, as the unaligned accessors just disappear. I suspect this
to be the case for many instances where
CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS is being used, mostly in the
networking stack.

So I intend to dig a bit deeper into this, and perhaps propose some
changes where the interpretation of
CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS is documented more clearly, and
tweaked according to my suggestion above (while ensuring that codegen
does not suffer, of course)

Thoughts, concerns, objections?


--
Ard.




> Link: 
> https://lore.kernel.org/lkml/75d07691-1e4f-741f-9852-38c0b4f52...@synopsys.com/
> Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100363
> Link: https://lore.kernel.org/lkml/20210507220813.365382-14-a...@kernel.org/
> Link: git://git.kernel.org/pub/scm/linux/kernel/git/arnd/asm-generic.git 
> unaligned-rework-v2
>
>
> Arnd Bergmann (13):
>   asm-generic: use asm-generic/unaligned.h for most architectures
>   openrisc: always use unaligned-struct header
>   sh: remove unaligned access for sh4a
>   m68k: select CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
>   powerpc: use linux/unaligned/le_struct.h on LE power7
>   asm-generic: unaligned: remove byteshift helpers
>   asm-generic: unaligned always use struct helpers
>   partitions: msdos: fix one

Re: [RFC PATCH 3/8] s390: add CPU field to struct thread_info

2021-09-28 Thread Ard Biesheuvel
On Tue, 14 Sept 2021 at 14:11, Ard Biesheuvel  wrote:
>
> The CPU field will be moved back into thread_info even when
> THREAD_INFO_IN_TASK is enabled, so add it back to s390's definition of
> struct thread_info.
>
> Signed-off-by: Ard Biesheuvel 
> ---
>  arch/s390/include/asm/thread_info.h | 1 +
>  1 file changed, 1 insertion(+)
>

Heiko, Christian, Vasily,

Do you have any objections to this change? If you don't, could you
please ack it so it can be taken through another tree (or if that is
problematic for you, could you please propose another way of merging
these changes?)

Thanks,
Ard.

> diff --git a/arch/s390/include/asm/thread_info.h 
> b/arch/s390/include/asm/thread_info.h
> index e6674796aa6f..b2ffcb4fe000 100644
> --- a/arch/s390/include/asm/thread_info.h
> +++ b/arch/s390/include/asm/thread_info.h
> @@ -37,6 +37,7 @@
>  struct thread_info {
> unsigned long   flags;  /* low level flags */
> unsigned long   syscall_work;   /* SYSCALL_WORK_ flags */
> +   unsigned intcpu;/* current CPU */
>  };
>
>  /*
> --
> 2.30.2
>


Re: [RFC PATCH 4/8] powerpc: add CPU field to struct thread_info

2021-09-28 Thread Ard Biesheuvel
On Tue, 28 Sept 2021 at 02:16, Michael Ellerman  wrote:
>
> Michael Ellerman  writes:
> > Ard Biesheuvel  writes:
> >> On Tue, 14 Sept 2021 at 14:11, Ard Biesheuvel  wrote:
> >>>
> >>> The CPU field will be moved back into thread_info even when
> >>> THREAD_INFO_IN_TASK is enabled, so add it back to powerpc's definition
> >>> of struct thread_info.
> >>>
> >>> Signed-off-by: Ard Biesheuvel 
> >>
> >> Michael,
> >>
> >> Do you have any objections or issues with this patch or the subsequent
> >> ones cleaning up the task CPU kludge for ppc32? Christophe indicated
> >> that he was happy with it.
> >
> > No objections, it looks good to me, thanks for cleaning up that horror :)
> >
> > It didn't apply cleanly to master so I haven't tested it at all, if you can 
> > point me at a
> > git tree with the dependencies I'd be happy to run some tests over it.
>
> Actually I realised I can just drop the last patch.
>
> So that looks fine, passes my standard quick build & boot on qemu tests,
> and builds with/without stack protector enabled.
>

Thanks.

Do you have any opinion on how this series should be merged? Kees Cook
is willing to take them via his cross-arch tree, or you could carry
them if you prefer. Taking it via multiple trees at the same time is
going to be tricky, or take two cycles, with I'd prefer to avoid.

-- 
Ard.


Re: [RFC PATCH 4/8] powerpc: add CPU field to struct thread_info

2021-09-27 Thread Ard Biesheuvel
On Tue, 14 Sept 2021 at 14:11, Ard Biesheuvel  wrote:
>
> The CPU field will be moved back into thread_info even when
> THREAD_INFO_IN_TASK is enabled, so add it back to powerpc's definition
> of struct thread_info.
>
> Signed-off-by: Ard Biesheuvel 

Michael,

Do you have any objections or issues with this patch or the subsequent
ones cleaning up the task CPU kludge for ppc32? Christophe indicated
that he was happy with it.

Thanks,
Ard.


> ---
>  arch/powerpc/include/asm/thread_info.h | 3 +++
>  1 file changed, 3 insertions(+)
>
> diff --git a/arch/powerpc/include/asm/thread_info.h 
> b/arch/powerpc/include/asm/thread_info.h
> index b4ec6c7dd72e..5725029aaa29 100644
> --- a/arch/powerpc/include/asm/thread_info.h
> +++ b/arch/powerpc/include/asm/thread_info.h
> @@ -47,6 +47,9 @@
>  struct thread_info {
> int preempt_count;  /* 0 => preemptable,
><0 => BUG */
> +#ifdef CONFIG_SMP
> +   unsigned intcpu;
> +#endif
> unsigned long   local_flags;/* private flags for thread */
>  #ifdef CONFIG_LIVEPATCH
> unsigned long *livepatch_sp;
> --
> 2.30.2
>


Re: [PATCH 1/4] crypto: nintendo-aes - add a new AES driver

2021-09-22 Thread Ard Biesheuvel
On Wed, 22 Sept 2021 at 12:43, Emmanuel Gil Peyrot
 wrote:
>
> On Wed, Sep 22, 2021 at 12:10:41PM +0200, Ard Biesheuvel wrote:
> > On Tue, 21 Sept 2021 at 23:49, Emmanuel Gil Peyrot
> >  wrote:
> > >
> > > This engine implements AES in CBC mode, using 128-bit keys only.  It is
> > > present on both the Wii and the Wii U, and is apparently identical in
> > > both consoles.
> > >
> > > The hardware is capable of firing an interrupt when the operation is
> > > done, but this driver currently uses a busy loop, I’m not too sure
> > > whether it would be preferable to switch, nor how to achieve that.
> > >
> > > It also supports a mode where no operation is done, and thus could be
> > > used as a DMA copy engine, but I don’t know how to expose that to the
> > > kernel or whether it would even be useful.
> > >
> > > In my testing, on a Wii U, this driver reaches 80.7 MiB/s, while the
> > > aes-generic driver only reaches 30.9 MiB/s, so it is a quite welcome
> > > speedup.
> > >
> > > This driver was written based on reversed documentation, see:
> > > https://wiibrew.org/wiki/Hardware/AES
> > >
> > > Signed-off-by: Emmanuel Gil Peyrot 
> > > Tested-by: Emmanuel Gil Peyrot   # on Wii U
> >
> > This is redundant - everybody should test the code they submit.
>
> Indeed, except for the comment, as I haven’t been able to test on the
> Wii just yet and that’s kind of a call for doing exactly that. :)
>
> >
> > ...
> > > +   /* TODO: figure out how to use interrupts here, this will probably
> > > +* lower throughput but let the CPU do other things while the AES
> > > +* engine is doing its work. */
> >
> > So is it worthwhile like this? How much faster is it to use this
> > accelerator rather than the CPU?
>
> As I mentioned above, on my hardware it reaches 80.7 MiB/s using this
> busy loop instead of 30.9 MiB/s using aes-generic, measured using
> `cryptsetup benchmark --cipher=aes --key-size=128`.  I expect the
> difference would be even more pronounced on the Wii, with its CPU being
> clocked lower.
>

Ah apologies for not spotting that. This is a nice speedup.

> I will give a try at using the interrupt, but I fully expect a lower
> throughput alongside a lower CPU usage (for large requests).
>

You should consider latency as well. Is it really necessary to disable
interrupts as well? A scheduling blackout of ~1ms (for the worst case
of 64k of input @ 80 MB/s) may be tolerable but keeping interrupts
disabled for that long is probably not a great idea. (Just make sure
you use spin_lock_bh() to prevent deadlocks that could occur if your
code is called from softirq context)

But using the interrupt is obviously preferred. What's wrong with it?

Btw the crypto API does not permit AES-128 only - you will need to add
a fallback for other key sizes as well.


> >
> > > +   do {
> > > +   status = ioread32be(base + AES_CTRL);
> > > +   cpu_relax();
> > > +   } while ((status & AES_CTRL_EXEC) && --counter);
> > > +
> > > +   /* Do we ever get called with dst ≠ src?  If so we have to 
> > > invalidate
> > > +* dst in addition to the earlier flush of src. */
> > > +   if (unlikely(dst != src)) {
> > > +   for (i = 0; i < len; i += 32)
> > > +   __asm__("dcbi 0, %0" : : "r" (dst + i));
> > > +   __asm__("sync" : : : "memory");
> > > +   }
> > > +
> > > +   return counter ? 0 : 1;
> > > +}
> > > +
> > > +static void
> > > +nintendo_aes_crypt(const void *src, void *dst, u32 len, u8 *iv, int dir,
> > > +  bool firstchunk)
> > > +{
> > > +   u32 flags = 0;
> > > +   unsigned long iflags;
> > > +   int ret;
> > > +
> > > +   flags |= AES_CTRL_EXEC_INIT /* | AES_CTRL_IRQ */ | AES_CTRL_ENA;
> > > +
> > > +   if (dir == AES_DIR_DECRYPT)
> > > +   flags |= AES_CTRL_DEC;
> > > +
> > > +   if (!firstchunk)
> > > +   flags |= AES_CTRL_IV;
> > > +
> > > +   /* Start the critical section */
> > > +   spin_lock_irqsave(&lock, iflags);
> > > +
> > > +   if (firstchunk)
> > > +   writefield(AES_IV, iv);
> > > +
> > > +   ret = do_crypt(src, dst, len, flags);
> > > +   BUG_

Re: [PATCH 1/4] crypto: nintendo-aes - add a new AES driver

2021-09-22 Thread Ard Biesheuvel
On Tue, 21 Sept 2021 at 23:49, Emmanuel Gil Peyrot
 wrote:
>
> This engine implements AES in CBC mode, using 128-bit keys only.  It is
> present on both the Wii and the Wii U, and is apparently identical in
> both consoles.
>
> The hardware is capable of firing an interrupt when the operation is
> done, but this driver currently uses a busy loop, I’m not too sure
> whether it would be preferable to switch, nor how to achieve that.
>
> It also supports a mode where no operation is done, and thus could be
> used as a DMA copy engine, but I don’t know how to expose that to the
> kernel or whether it would even be useful.
>
> In my testing, on a Wii U, this driver reaches 80.7 MiB/s, while the
> aes-generic driver only reaches 30.9 MiB/s, so it is a quite welcome
> speedup.
>
> This driver was written based on reversed documentation, see:
> https://wiibrew.org/wiki/Hardware/AES
>
> Signed-off-by: Emmanuel Gil Peyrot 
> Tested-by: Emmanuel Gil Peyrot   # on Wii U

This is redundant - everybody should test the code they submit.

...
> +   /* TODO: figure out how to use interrupts here, this will probably
> +* lower throughput but let the CPU do other things while the AES
> +* engine is doing its work. */

So is it worthwhile like this? How much faster is it to use this
accelerator rather than the CPU?

> +   do {
> +   status = ioread32be(base + AES_CTRL);
> +   cpu_relax();
> +   } while ((status & AES_CTRL_EXEC) && --counter);
> +
> +   /* Do we ever get called with dst ≠ src?  If so we have to invalidate
> +* dst in addition to the earlier flush of src. */
> +   if (unlikely(dst != src)) {
> +   for (i = 0; i < len; i += 32)
> +   __asm__("dcbi 0, %0" : : "r" (dst + i));
> +   __asm__("sync" : : : "memory");
> +   }
> +
> +   return counter ? 0 : 1;
> +}
> +
> +static void
> +nintendo_aes_crypt(const void *src, void *dst, u32 len, u8 *iv, int dir,
> +  bool firstchunk)
> +{
> +   u32 flags = 0;
> +   unsigned long iflags;
> +   int ret;
> +
> +   flags |= AES_CTRL_EXEC_INIT /* | AES_CTRL_IRQ */ | AES_CTRL_ENA;
> +
> +   if (dir == AES_DIR_DECRYPT)
> +   flags |= AES_CTRL_DEC;
> +
> +   if (!firstchunk)
> +   flags |= AES_CTRL_IV;
> +
> +   /* Start the critical section */
> +   spin_lock_irqsave(&lock, iflags);
> +
> +   if (firstchunk)
> +   writefield(AES_IV, iv);
> +
> +   ret = do_crypt(src, dst, len, flags);
> +   BUG_ON(ret);
> +
> +   spin_unlock_irqrestore(&lock, iflags);
> +}
> +
> +static int nintendo_setkey_skcipher(struct crypto_skcipher *tfm, const u8 
> *key,
> +   unsigned int len)
> +{
> +   /* The hardware only supports AES-128 */
> +   if (len != AES_KEYSIZE_128)
> +   return -EINVAL;
> +
> +   writefield(AES_KEY, key);
> +   return 0;
> +}
> +
> +static int nintendo_skcipher_crypt(struct skcipher_request *req, int dir)
> +{
> +   struct skcipher_walk walk;
> +   unsigned int nbytes;
> +   int err;
> +   char ivbuf[AES_BLOCK_SIZE];
> +   unsigned int ivsize;
> +
> +   bool firstchunk = true;
> +
> +   /* Reset the engine */
> +   iowrite32be(0, base + AES_CTRL);
> +
> +   err = skcipher_walk_virt(&walk, req, false);
> +   ivsize = min(sizeof(ivbuf), walk.ivsize);
> +
> +   while ((nbytes = walk.nbytes) != 0) {
> +   unsigned int chunkbytes = round_down(nbytes, AES_BLOCK_SIZE);
> +   unsigned int ret = nbytes % AES_BLOCK_SIZE;
> +
> +   if (walk.total == chunkbytes && dir == AES_DIR_DECRYPT) {
> +   /* If this is the last chunk and we're decrypting, 
> take
> +* note of the IV (which is the last ciphertext block)
> +*/
> +   memcpy(ivbuf, walk.src.virt.addr + walk.total - 
> ivsize,
> +  ivsize);
> +   }
> +
> +   nintendo_aes_crypt(walk.src.virt.addr, walk.dst.virt.addr,
> +  chunkbytes, walk.iv, dir, firstchunk);
> +
> +   if (walk.total == chunkbytes && dir == AES_DIR_ENCRYPT) {
> +   /* If this is the last chunk and we're encrypting, 
> take
> +* note of the IV (which is the last ciphertext block)
> +*/
> +   memcpy(walk.iv,
> +  walk.dst.virt.addr + walk.total - ivsize,
> +  ivsize);
> +   } else if (walk.total == chunkbytes && dir == 
> AES_DIR_DECRYPT) {
> +   memcpy(walk.iv, ivbuf, ivsize);
> +   }
> +
> +   err = skcipher_walk_done(&walk, ret);
> +   firstchunk = false;
> +   }
> +
> +   return err;
> +}
> +
> +static int nintendo_cbc

Re: [RFC PATCH 0/8] Move task_struct::cpu back into thread_info

2021-09-21 Thread Ard Biesheuvel
On Tue, 14 Sept 2021 at 15:55, Mark Rutland  wrote:
>
> On Tue, Sep 14, 2021 at 02:10:28PM +0200, Ard Biesheuvel wrote:
> > Commit c65eacbe290b ("sched/core: Allow putting thread_info into
> > task_struct") mentions that, along with moving thread_info into
> > task_struct, the cpu field is moved out of the former into the latter,
> > but does not explain why.
>
> From what I recall of talking to Andy around that time, when converting
> arm64 over, the theory was that over time we'd move more and more out of
> thread_info and into task_struct or thread_struct, until task_struct
> supplanted thread_info entirely, and that all became generic.
>
> I think the key gain there was making things more *generic*, and there
> are other ways we could do that in future without moving more into
> task_struct (e.g. with a geenric thread_info and arch_thread_info inside
> that).
>
> With that in mind, and given the diffstat, I think this is worthwhile.
>
> FWIW, for the series:
>
> Acked-by: Mark Rutland 
>

Thanks.

Any comments on this from the various arch maintainers? Especially
power, as Christophe seems happy with this but there are 3 different
patches affecting power that need a maintainer ack.


Re: [RFC PATCH 1/8] arm64: add CPU field to struct thread_info

2021-09-21 Thread Ard Biesheuvel
On Thu, 16 Sept 2021 at 16:41, Catalin Marinas  wrote:
>
> On Tue, Sep 14, 2021 at 02:10:29PM +0200, Ard Biesheuvel wrote:
> > The CPU field will be moved back into thread_info even when
> > THREAD_INFO_IN_TASK is enabled, so add it back to arm64's definition of
> > struct thread_info.
> >
> > Signed-off-by: Ard Biesheuvel 
>
> Acked-by: Catalin Marinas 

Thanks. I take it this applies to patch #5 as well?


Re: [RFC PATCH 5/8] sched: move CPU field back into thread_info if THREAD_INFO_IN_TASK=y

2021-09-14 Thread Ard Biesheuvel
On Tue, 14 Sept 2021 at 17:59, Linus Torvalds
 wrote:
>
> On Tue, Sep 14, 2021 at 8:53 AM Ard Biesheuvel  wrote:
> >
> > task_cpu() takes a 'const struct task_struct *', whereas
> > task_thread_info() takes a 'struct task_struct *'.
>
> Oh, annoying, but that's easily fixed. Just make that
>
>static inline struct thread_info *task_thread_info(struct
> task_struct *task) ..
>
> be a simple
>
>   #define task_thread_info(tsk) (&(tsk)->thread_info)
>
> instead. That actually then matches the !THREAD_INFO_IN_TASK case anyway.
>
> Make the commit comment be about how that fixes the type problem.
>
> Because while in many cases inline functions are superior to macros,
> it clearly isn't the case in this case.
>

Works for me.


Re: [RFC PATCH 5/8] sched: move CPU field back into thread_info if THREAD_INFO_IN_TASK=y

2021-09-14 Thread Ard Biesheuvel
On Tue, 14 Sept 2021 at 17:49, Linus Torvalds
 wrote:
>
> On Tue, Sep 14, 2021 at 5:11 AM Ard Biesheuvel  wrote:
> >
> >  static inline unsigned int task_cpu(const struct task_struct *p)
> >  {
> >  #ifdef CONFIG_THREAD_INFO_IN_TASK
> > -   return READ_ONCE(p->cpu);
> > +   return READ_ONCE(p->thread_info.cpu);
> >  #else
> > return READ_ONCE(task_thread_info(p)->cpu);
> >  #endif
>
> Those two lines look different, but aren't.
>
> Please just remove the CONFIG_THREAD_INFO_IN_TASK conditional, and use
>
>   return READ_ONCE(task_thread_info(p)->cpu);
>
> unconditionally, which now does the right thing regardless.
>

Unfortunately not.

task_cpu() takes a 'const struct task_struct *', whereas
task_thread_info() takes a 'struct task_struct *'.

Since task_thread_info()-> is widely used as an lvalue, I would
need to update task_cpu()'s prototype and fix up all the callers, some
of which take the const flavor themselves. Or introduce
'const_task_thread_info()' which takes the const flavor, and cannot be
used to instantiate lvalues.

Suggestions welcome, but this is the cleanest I could come up with.


Re: [RFC PATCH 1/8] arm64: add CPU field to struct thread_info

2021-09-14 Thread Ard Biesheuvel
On Tue, 14 Sept 2021 at 17:41, Linus Torvalds
 wrote:
>
> On Tue, Sep 14, 2021 at 5:10 AM Ard Biesheuvel  wrote:
> >
> > The CPU field will be moved back into thread_info even when
> > THREAD_INFO_IN_TASK is enabled, so add it back to arm64's definition of
> > struct thread_info.
>
> The series looks sane to me, but it strikes me that it's inconsistent
> - here for arm64, you make it unconditional, but for the other
> architectures you end up putting it inside a #ifdef CONFIG_SMP.
>
> Was there some reason for this odd behavior?
>

Yes. CONFIG_SMP is a 'def_bool y' on arm64.


[RFC PATCH 8/8] ARM: rely on core code to keep thread_info::cpu updated

2021-09-14 Thread Ard Biesheuvel
Now that the core code switched back to using thread_info::cpu to keep
a task's CPU number, we no longer need to keep it in sync explicitly. So
just drop the code that does this.

Signed-off-by: Ard Biesheuvel 
---
This patch applies onto [0], which we hope to get merged for v5.16

[0] 
https://git.kernel.org/pub/scm/linux/kernel/git/ardb/linux.git/log/?h=arm32-ti-in-task-v5

 arch/arm/include/asm/switch_to.h | 14 --
 arch/arm/kernel/smp.c|  3 ---
 2 files changed, 17 deletions(-)

diff --git a/arch/arm/include/asm/switch_to.h b/arch/arm/include/asm/switch_to.h
index db2be1f6550d..61e4a3c4ca6e 100644
--- a/arch/arm/include/asm/switch_to.h
+++ b/arch/arm/include/asm/switch_to.h
@@ -23,23 +23,9 @@
  */
 extern struct task_struct *__switch_to(struct task_struct *, struct 
thread_info *, struct thread_info *);
 
-static inline void set_ti_cpu(struct task_struct *p)
-{
-#ifdef CONFIG_THREAD_INFO_IN_TASK
-   /*
-* The core code no longer maintains the thread_info::cpu field once
-* CONFIG_THREAD_INFO_IN_TASK is in effect, but we rely on it for
-* raw_smp_processor_id(), which cannot access struct task_struct*
-* directly for reasons of circular #inclusion hell.
-*/
-   task_thread_info(p)->cpu = p->cpu;
-#endif
-}
-
 #define switch_to(prev,next,last)  \
 do {   \
__complete_pending_tlbi();  \
-   set_ti_cpu(next);   \
if (IS_ENABLED(CONFIG_CURRENT_POINTER_IN_TPIDRURO)) \
__this_cpu_write(__entry_task, next);   \
last = __switch_to(prev,task_thread_info(prev), 
task_thread_info(next));\
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index cde5b6d8bac5..97ee6b1567e9 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -154,9 +154,6 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
secondary_data.swapper_pg_dir = get_arch_pgd(swapper_pg_dir);
 #endif
secondary_data.task = idle;
-   if (IS_ENABLED(CONFIG_THREAD_INFO_IN_TASK))
-   task_thread_info(idle)->cpu = cpu;
-
sync_cache_w(&secondary_data);
 
/*
-- 
2.30.2



[RFC PATCH 7/8] riscv: rely on core code to keep thread_info::cpu updated

2021-09-14 Thread Ard Biesheuvel
Now that the core code switched back to using thread_info::cpu to keep
a task's CPU number, we no longer need to keep it in sync explicitly. So
just drop the code that does this.

Signed-off-by: Ard Biesheuvel 
---
 arch/riscv/kernel/asm-offsets.c | 1 -
 arch/riscv/kernel/entry.S   | 5 -
 arch/riscv/kernel/head.S| 1 -
 3 files changed, 7 deletions(-)

diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c
index 90f8ce64fa6f..478d9f02dab5 100644
--- a/arch/riscv/kernel/asm-offsets.c
+++ b/arch/riscv/kernel/asm-offsets.c
@@ -33,7 +33,6 @@ void asm_offsets(void)
OFFSET(TASK_TI_PREEMPT_COUNT, task_struct, thread_info.preempt_count);
OFFSET(TASK_TI_KERNEL_SP, task_struct, thread_info.kernel_sp);
OFFSET(TASK_TI_USER_SP, task_struct, thread_info.user_sp);
-   OFFSET(TASK_TI_CPU, task_struct, thread_info.cpu);
 
OFFSET(TASK_THREAD_F0,  task_struct, thread.fstate.f[0]);
OFFSET(TASK_THREAD_F1,  task_struct, thread.fstate.f[1]);
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index 98f502654edd..459eb1714353 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -544,11 +544,6 @@ ENTRY(__switch_to)
REG_L s9,  TASK_THREAD_S9_RA(a4)
REG_L s10, TASK_THREAD_S10_RA(a4)
REG_L s11, TASK_THREAD_S11_RA(a4)
-   /* Swap the CPU entry around. */
-   lw a3, TASK_TI_CPU(a0)
-   lw a4, TASK_TI_CPU(a1)
-   sw a3, TASK_TI_CPU(a1)
-   sw a4, TASK_TI_CPU(a0)
/* The offset of thread_info in task_struct is zero. */
move tp, a1
ret
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index fce5184b22c3..d5ec30ef6f5d 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -317,7 +317,6 @@ clear_bss_done:
call setup_trap_vector
/* Restore C environment */
la tp, init_task
-   sw zero, TASK_TI_CPU(tp)
la sp, init_thread_union + THREAD_SIZE
 
 #ifdef CONFIG_KASAN
-- 
2.30.2



[RFC PATCH 6/8] powerpc: smp: remove hack to obtain offset of task_struct::cpu

2021-09-14 Thread Ard Biesheuvel
Instead of relying on awful hacks to obtain the offset of the cpu field
in struct task_struct, move it back into struct thread_info, which does
not create the same level of circular dependency hell when trying to
include the header file that defines it.

Signed-off-by: Ard Biesheuvel 
---
 arch/powerpc/Makefile | 11 ---
 arch/powerpc/include/asm/smp.h| 17 +
 arch/powerpc/kernel/asm-offsets.c |  2 --
 3 files changed, 1 insertion(+), 29 deletions(-)

diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index aa6808e70647..54cad1faa5d0 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -446,17 +446,6 @@ else
 endif
 endif
 
-ifdef CONFIG_SMP
-ifdef CONFIG_PPC32
-prepare: task_cpu_prepare
-
-PHONY += task_cpu_prepare
-task_cpu_prepare: prepare0
-   $(eval KBUILD_CFLAGS += -D_TASK_CPU=$(shell awk '{if ($$2 == 
"TASK_CPU") print $$3;}' include/generated/asm-offsets.h))
-
-endif # CONFIG_PPC32
-endif # CONFIG_SMP
-
 PHONY += checkbin
 # Check toolchain versions:
 # - gcc-4.6 is the minimum kernel-wide version so nothing required.
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index 7ef1cd8168a0..007332a4a732 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -87,22 +87,7 @@ int is_cpu_dead(unsigned int cpu);
 /* 32-bit */
 extern int smp_hw_index[];
 
-/*
- * This is particularly ugly: it appears we can't actually get the definition
- * of task_struct here, but we need access to the CPU this task is running on.
- * Instead of using task_struct we're using _TASK_CPU which is extracted from
- * asm-offsets.h by kbuild to get the current processor ID.
- *
- * This also needs to be safeguarded when building asm-offsets.s because at
- * that time _TASK_CPU is not defined yet. It could have been guarded by
- * _TASK_CPU itself, but we want the build to fail if _TASK_CPU is missing
- * when building something else than asm-offsets.s
- */
-#ifdef GENERATING_ASM_OFFSETS
-#define raw_smp_processor_id() (0)
-#else
-#define raw_smp_processor_id() (*(unsigned int *)((void *)current + 
_TASK_CPU))
-#endif
+#define raw_smp_processor_id() (current_thread_info()->cpu)
 #define hard_smp_processor_id()(smp_hw_index[smp_processor_id()])
 
 static inline int get_hard_smp_processor_id(int cpu)
diff --git a/arch/powerpc/kernel/asm-offsets.c 
b/arch/powerpc/kernel/asm-offsets.c
index e37e4546034e..cc05522f50bf 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -9,8 +9,6 @@
  * #defines from the assembly-language output.
  */
 
-#define GENERATING_ASM_OFFSETS /* asm/smp.h */
-
 #include 
 #include 
 #include 
-- 
2.30.2



[RFC PATCH 5/8] sched: move CPU field back into thread_info if THREAD_INFO_IN_TASK=y

2021-09-14 Thread Ard Biesheuvel
THREAD_INFO_IN_TASK moved the CPU field out of thread_info, but this
causes some issues on architectures that define raw_smp_processor_id()
in terms of this field, due to the fact that #include'ing linux/sched.h
to get at struct task_struct is problematic in terms of circular
dependencies.

Given that thread_info and task_struct are the same data structure
anyway when THREAD_INFO_IN_TASK=y, let's move it back so that having
access to the type definition of struct thread_info is sufficient to
reference the CPU number of the current task.

Signed-off-by: Ard Biesheuvel 
---
 arch/arm64/kernel/asm-offsets.c   | 1 -
 arch/arm64/kernel/head.S  | 2 +-
 arch/powerpc/kernel/asm-offsets.c | 2 +-
 arch/powerpc/kernel/smp.c | 2 +-
 include/linux/sched.h | 6 +-
 kernel/sched/sched.h  | 4 
 6 files changed, 4 insertions(+), 13 deletions(-)

diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index cee9f3e9f906..0bfc048221af 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -27,7 +27,6 @@
 int main(void)
 {
   DEFINE(TSK_ACTIVE_MM,offsetof(struct task_struct, 
active_mm));
-  DEFINE(TSK_CPU,  offsetof(struct task_struct, cpu));
   BLANK();
   DEFINE(TSK_TI_CPU,   offsetof(struct task_struct, thread_info.cpu));
   DEFINE(TSK_TI_FLAGS, offsetof(struct task_struct, 
thread_info.flags));
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 17962452e31d..6a98f1a38c29 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -412,7 +412,7 @@ SYM_FUNC_END(__create_page_tables)
scs_load \tsk
 
adr_l   \tmp1, __per_cpu_offset
-   ldr w\tmp2, [\tsk, #TSK_CPU]
+   ldr w\tmp2, [\tsk, #TSK_TI_CPU]
ldr \tmp1, [\tmp1, \tmp2, lsl #3]
set_this_cpu_offset \tmp1
.endm
diff --git a/arch/powerpc/kernel/asm-offsets.c 
b/arch/powerpc/kernel/asm-offsets.c
index e563d3222d69..e37e4546034e 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -93,7 +93,7 @@ int main(void)
 #endif /* CONFIG_PPC64 */
OFFSET(TASK_STACK, task_struct, stack);
 #ifdef CONFIG_SMP
-   OFFSET(TASK_CPU, task_struct, cpu);
+   OFFSET(TASK_CPU, task_struct, thread_info.cpu);
 #endif
 
 #ifdef CONFIG_LIVEPATCH
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 9cc7d3dbf439..512d875b45e0 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1223,7 +1223,7 @@ static void cpu_idle_thread_init(unsigned int cpu, struct 
task_struct *idle)
paca_ptrs[cpu]->kstack = (unsigned long)task_stack_page(idle) +
 THREAD_SIZE - STACK_FRAME_OVERHEAD;
 #endif
-   idle->cpu = cpu;
+   task_thread_info(idle)->cpu = cpu;
secondary_current = current_set[cpu] = idle;
 }
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e12b524426b0..37aa521078e7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -750,10 +750,6 @@ struct task_struct {
 #ifdef CONFIG_SMP
int on_cpu;
struct __call_single_node   wake_entry;
-#ifdef CONFIG_THREAD_INFO_IN_TASK
-   /* Current CPU: */
-   unsigned intcpu;
-#endif
unsigned intwakee_flips;
unsigned long   wakee_flip_decay_ts;
struct task_struct  *last_wakee;
@@ -2114,7 +2110,7 @@ static __always_inline bool need_resched(void)
 static inline unsigned int task_cpu(const struct task_struct *p)
 {
 #ifdef CONFIG_THREAD_INFO_IN_TASK
-   return READ_ONCE(p->cpu);
+   return READ_ONCE(p->thread_info.cpu);
 #else
return READ_ONCE(task_thread_info(p)->cpu);
 #endif
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 3d3e5793e117..79fcbad11450 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1926,11 +1926,7 @@ static inline void __set_task_cpu(struct task_struct *p, 
unsigned int cpu)
 * per-task data have been completed by this moment.
 */
smp_wmb();
-#ifdef CONFIG_THREAD_INFO_IN_TASK
-   WRITE_ONCE(p->cpu, cpu);
-#else
WRITE_ONCE(task_thread_info(p)->cpu, cpu);
-#endif
p->wake_cpu = cpu;
 #endif
 }
-- 
2.30.2



[RFC PATCH 4/8] powerpc: add CPU field to struct thread_info

2021-09-14 Thread Ard Biesheuvel
The CPU field will be moved back into thread_info even when
THREAD_INFO_IN_TASK is enabled, so add it back to powerpc's definition
of struct thread_info.

Signed-off-by: Ard Biesheuvel 
---
 arch/powerpc/include/asm/thread_info.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/include/asm/thread_info.h 
b/arch/powerpc/include/asm/thread_info.h
index b4ec6c7dd72e..5725029aaa29 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -47,6 +47,9 @@
 struct thread_info {
int preempt_count;  /* 0 => preemptable,
   <0 => BUG */
+#ifdef CONFIG_SMP
+   unsigned intcpu;
+#endif
unsigned long   local_flags;/* private flags for thread */
 #ifdef CONFIG_LIVEPATCH
unsigned long *livepatch_sp;
-- 
2.30.2



[RFC PATCH 3/8] s390: add CPU field to struct thread_info

2021-09-14 Thread Ard Biesheuvel
The CPU field will be moved back into thread_info even when
THREAD_INFO_IN_TASK is enabled, so add it back to s390's definition of
struct thread_info.

Signed-off-by: Ard Biesheuvel 
---
 arch/s390/include/asm/thread_info.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/s390/include/asm/thread_info.h 
b/arch/s390/include/asm/thread_info.h
index e6674796aa6f..b2ffcb4fe000 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -37,6 +37,7 @@
 struct thread_info {
unsigned long   flags;  /* low level flags */
unsigned long   syscall_work;   /* SYSCALL_WORK_ flags */
+   unsigned intcpu;/* current CPU */
 };
 
 /*
-- 
2.30.2



[RFC PATCH 2/8] x86: add CPU field to struct thread_info

2021-09-14 Thread Ard Biesheuvel
The CPU field will be moved back into thread_info even when
THREAD_INFO_IN_TASK is enabled, so add it back to x86's definition of
struct thread_info.

Signed-off-by: Ard Biesheuvel 
---
 arch/x86/include/asm/thread_info.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/include/asm/thread_info.h 
b/arch/x86/include/asm/thread_info.h
index cf132663c219..ebec69c35e95 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -57,6 +57,9 @@ struct thread_info {
unsigned long   flags;  /* low level flags */
unsigned long   syscall_work;   /* SYSCALL_WORK_ flags */
u32 status; /* thread synchronous flags */
+#ifdef CONFIG_SMP
+   u32 cpu;/* current CPU */
+#endif
 };
 
 #define INIT_THREAD_INFO(tsk)  \
-- 
2.30.2



[RFC PATCH 1/8] arm64: add CPU field to struct thread_info

2021-09-14 Thread Ard Biesheuvel
The CPU field will be moved back into thread_info even when
THREAD_INFO_IN_TASK is enabled, so add it back to arm64's definition of
struct thread_info.

Signed-off-by: Ard Biesheuvel 
---
 arch/arm64/include/asm/thread_info.h | 1 +
 arch/arm64/kernel/asm-offsets.c  | 1 +
 2 files changed, 2 insertions(+)

diff --git a/arch/arm64/include/asm/thread_info.h 
b/arch/arm64/include/asm/thread_info.h
index 6623c99f0984..c02bc8c183c3 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -42,6 +42,7 @@ struct thread_info {
void*scs_base;
void*scs_sp;
 #endif
+   u32 cpu;
 };
 
 #define thread_saved_pc(tsk)   \
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 551427ae8cc5..cee9f3e9f906 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -29,6 +29,7 @@ int main(void)
   DEFINE(TSK_ACTIVE_MM,offsetof(struct task_struct, 
active_mm));
   DEFINE(TSK_CPU,  offsetof(struct task_struct, cpu));
   BLANK();
+  DEFINE(TSK_TI_CPU,   offsetof(struct task_struct, thread_info.cpu));
   DEFINE(TSK_TI_FLAGS, offsetof(struct task_struct, 
thread_info.flags));
   DEFINE(TSK_TI_PREEMPT,   offsetof(struct task_struct, 
thread_info.preempt_count));
 #ifdef CONFIG_ARM64_SW_TTBR0_PAN
-- 
2.30.2



[RFC PATCH 0/8] Move task_struct::cpu back into thread_info

2021-09-14 Thread Ard Biesheuvel
Commit c65eacbe290b ("sched/core: Allow putting thread_info into
task_struct") mentions that, along with moving thread_info into
task_struct, the cpu field is moved out of the former into the latter,
but does not explain why.

While collaborating with Keith on adding THREAD_INFO_IN_TASK support to
ARM, we noticed that keeping CPU in task_struct is problematic for
architectures that define raw_smp_processor_id() in terms of this field,
as it requires linux/sched.h to be included, which causes a lot of pain
in terms of circular dependencies (or 'header soup', as the original
commit refers to it).

For examples of how existing architectures work around this, please
refer to patches #6 or #7. In the former case, it uses an awful
asm-offsets hack to index thread_info/current without using its type
definition. The latter approach simply keeps a copy of the task_struct
CPU field in thread_info, and keeps it in sync at context switch time.

Patch #8 reverts this latter approach for ARM, but this code is still
under review so it does not currently apply to mainline.

We also discussed introducing yet another Kconfig symbol to indicate
that the arch has THREAD_INFO_IN_TASK enabled but still prefers to keep
its CPU field in thread_info, but simply keeping it in thread_info in
all cases seems to be the cleanest approach here.

Cc: Keith Packard 
Cc: Russell King 
Cc: Catalin Marinas 
Cc: Will Deacon 
Cc: Michael Ellerman 
Cc: Benjamin Herrenschmidt 
Cc: Christophe Leroy 
Cc: Paul Mackerras 
Cc: Paul Walmsley 
Cc: Palmer Dabbelt 
Cc: Albert Ou 
Cc: Heiko Carstens 
Cc: Vasily Gorbik 
Cc: Christian Borntraeger 
Cc: Thomas Gleixner 
Cc: Ingo Molnar 
Cc: Borislav Petkov 
Cc: Peter Zijlstra 
Cc: Kees Cook 
Cc: Andy Lutomirski 
Cc: Linus Torvalds 
Cc: Arnd Bergmann 
Cc: linux-arm-ker...@lists.infradead.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-ri...@lists.infradead.org
Cc: linux-s...@vger.kernel.org

Ard Biesheuvel (8):
  arm64: add CPU field to struct thread_info
  x86: add CPU field to struct thread_info
  s390: add CPU field to struct thread_info
  powerpc: add CPU field to struct thread_info
  sched: move CPU field back into thread_info if THREAD_INFO_IN_TASK=y
  powerpc: smp: remove hack to obtain offset of task_struct::cpu
  riscv: rely on core code to keep thread_info::cpu updated
  ARM: rely on core code to keep thread_info::cpu updated

 arch/arm/include/asm/switch_to.h   | 14 --
 arch/arm/kernel/smp.c  |  3 ---
 arch/arm64/include/asm/thread_info.h   |  1 +
 arch/arm64/kernel/asm-offsets.c|  2 +-
 arch/arm64/kernel/head.S   |  2 +-
 arch/powerpc/Makefile  | 11 ---
 arch/powerpc/include/asm/smp.h | 17 +
 arch/powerpc/include/asm/thread_info.h |  3 +++
 arch/powerpc/kernel/asm-offsets.c  |  4 +---
 arch/powerpc/kernel/smp.c  |  2 +-
 arch/riscv/kernel/asm-offsets.c|  1 -
 arch/riscv/kernel/entry.S  |  5 -
 arch/riscv/kernel/head.S   |  1 -
 arch/s390/include/asm/thread_info.h|  1 +
 arch/x86/include/asm/thread_info.h |  3 +++
 include/linux/sched.h  |  6 +-
 kernel/sched/sched.h   |  4 
 17 files changed, 14 insertions(+), 66 deletions(-)

-- 
2.30.2



Re: [PATCH] powerpc/32: Add support for out-of-line static calls

2021-08-31 Thread Ard Biesheuvel
On Tue, 31 Aug 2021 at 10:53, Peter Zijlstra  wrote:
>
> On Tue, Aug 31, 2021 at 08:05:21AM +, Christophe Leroy wrote:
>
> > +#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \
> > + asm(".pushsection .text, \"ax\" \n" \
> > + ".align 4   \n" \
> > + ".globl " STATIC_CALL_TRAMP_STR(name) " \n" \
> > + STATIC_CALL_TRAMP_STR(name) ":  \n" \
> > + "   blr \n" \
> > + "   nop \n" \
> > + "   nop \n" \
> > + "   nop \n" \
> > + ".type " STATIC_CALL_TRAMP_STR(name) ", @function   \n" \
> > + ".size " STATIC_CALL_TRAMP_STR(name) ", . - " 
> > STATIC_CALL_TRAMP_STR(name) " \n" \
> > + ".popsection\n")
>
> > +static int patch_trampoline_32(u32 *addr, unsigned long target)
> > +{
> > + int err;
> > +
> > + err = patch_instruction(addr++, ppc_inst(PPC_RAW_LIS(_R12, 
> > PPC_HA(target;
> > + err |= patch_instruction(addr++, ppc_inst(PPC_RAW_ADDI(_R12, _R12, 
> > PPC_LO(target;
> > + err |= patch_instruction(addr++, ppc_inst(PPC_RAW_MTCTR(_R12)));
> > + err |= patch_instruction(addr, ppc_inst(PPC_RAW_BCTR()));
> > +
> > + return err;
> > +}
>
> There can be concurrent execution and modification; the above doesn't
> look safe in that regard. What happens if you've say, done the first
> two, but not the latter two and execution happens (on a different
> CPU or through IRQ context, etc..)?
>
> > +void arch_static_call_transform(void *site, void *tramp, void *func, bool 
> > tail)
> > +{
> > + int err;
> > + unsigned long target = (long)func;
> > +
> > + if (!tramp)
> > + return;
> > +
> > + mutex_lock(&text_mutex);
> > +
> > + if (!func)
> > + err = patch_instruction(tramp, ppc_inst(PPC_RAW_BLR()));
> > + else if (is_offset_in_branch_range((long)target - (long)tramp))
> > + err = patch_branch(tramp, target, 0);
>
> These two are single instruction modifications and I'm assuming the
> hardware is sane enough that execution sees either the old or the new
> instruction. So this should work.
>
> > + else if (IS_ENABLED(CONFIG_PPC32))
> > + err = patch_trampoline_32(tramp, target);
> > + else
> > + BUILD_BUG();
> > +
> > + mutex_unlock(&text_mutex);
> > +
> > + if (err)
> > + panic("%s: patching failed %pS at %pS\n", __func__, func, 
> > tramp);
> > +}
> > +EXPORT_SYMBOL_GPL(arch_static_call_transform);
>
> One possible solution that we explored on ARM64, was having the
> trampoline be in 2 slots:
>
>
> b 1f
>
> 1:  blr
> nop
> nop
> nop
>
> 2:  blr
> nop
> nop
> nop
>
> Where initially the first slot is active (per "b 1f"), then you write
> the second slot, and as a final act, re-write the initial branch to
> point to slot 2.
>
> Then you execute synchronize_rcu_tasks() under your text mutex
> (careful!) to ensure all users of your slot1 are gone and the next
> modification repeats the whole thing, except for using slot1 etc..
>
> Eventually I think Ard came up with the latest ARM64 proposal which puts
> a literal in a RO section (could be the text section I suppose) and
> loads and branches to that.
>

Yes. The main reason is simply that anything else is premature
optimization: we have a clear use case (CFI) where out-of-line static
calls are faster than compiler generated indirect calls, even if the
static call sequence is based on a literal load and an indirect
branch, but CFI is not upstream [yet].

Once other use cases emerge, we will revisit this.



> Anyway, the thing is, you can really only modify a single instruction at
> the time and need to ensure concurrent execution is correct.


Re: [PATCH 0/2] Fix arm64 boot regression in 5.14

2021-07-20 Thread Ard Biesheuvel
On Tue, 20 Jul 2021 at 14:35, Will Deacon  wrote:
>
> Hi folks,
>
> Jonathan reports [1] that commit c742199a014d ("mm/pgtable: add stubs
> for {pmd/pub}_{set/clear}_huge") breaks the boot on arm64 when huge
> mappings are used to map the kernel linear map but the VA size is
> configured such that PUDs are folded. This is because the non-functional
> pud_set_huge() stub is used to create the linear map, which results in
> 1GB holes and a fatal data abort when the kernel attemps to access them.
>
> Digging further into the issue, it also transpired that huge-vmap is
> silently disabled in these configurations as well [2], despite working
> correctly in 5.13. The latter issue causes the pgtable selftests to
> scream due to a failing consistency check [3].
>
> Rather than leave mainline in a terminally broken state for arm64 while
> we figure this out, revert the offending commit to get things working
> again. Unfortunately, reverting the change in isolation causes a build
> breakage for 32-bit PowerPC 8xx machines which recently started relying
> on the problematic stubs to support pte-level huge-vmap entries [4].
> Since Christophe is away at the moment, this series first reverts the
> PowerPC 8xx change in order to avoid breaking the build.
>
> I would really like this to land for -rc3 and I can take these via the
> arm64 fixes queue if the PowerPC folks are alright with them.
>
> Cheers,
>
> Will
>
> [1] https://lore.kernel.org/r/20210717160118.9855-1-jonat...@marek.ca
> [2] https://lore.kernel.org/r/20210719104918.GA6440@willie-the-truck
> [3] 
> https://lore.kernel.org/r/camuhmdxshordox-xxaeufdw3wx2peggfsqhvshvznkcgk-y...@mail.gmail.com/
> [4] 
> https://lore.kernel.org/r/8b972f1c03fb6bd59953035f0a3e4d26659de4f8.1620795204.git.christophe.le...@csgroup.eu/
>
> Cc: Ard Biesheuvel 
> Cc: Michael Ellerman 
> Cc: Thomas Gleixner 
> Cc: Benjamin Herrenschmidt 
> Cc: Christophe Leroy 
> Cc: Paul Mackerras 
> Cc: Jonathan Marek 
> Cc: Catalin Marinas 
> Cc: Andrew Morton 
> Cc: Nicholas Piggin  Cc: Mike Rapoport 
> Cc: Mark Rutland 
> Cc: Geert Uytterhoeven 
> Cc: Marc Zyngier 
> Cc: linuxppc-dev@lists.ozlabs.org
> Cc: linux-arm-ker...@lists.infradead.org
>
> --->8
>
> Jonathan Marek (1):
>   Revert "mm/pgtable: add stubs for {pmd/pub}_{set/clear}_huge"
>
> Will Deacon (1):
>   Revert "powerpc/8xx: add support for huge pages on VMAP and VMALLOC"
>

Reviewed-by: Ard Biesheuvel 


>  arch/arm64/mm/mmu.c  | 20 -
>  arch/powerpc/Kconfig |  2 +-
>  arch/powerpc/include/asm/nohash/32/mmu-8xx.h | 43 
>  arch/x86/mm/pgtable.c| 34 +++-
>  include/linux/pgtable.h  | 26 +---
>  5 files changed, 25 insertions(+), 100 deletions(-)
>
> --
> 2.32.0.402.g57bb445576-goog
>


Re: [PATCH 6/8] drivers: firmware: efi: libstub: enable generic commandline

2021-03-31 Thread Ard Biesheuvel
(+ Arvind)

On Tue, 30 Mar 2021 at 19:57, Daniel Walker  wrote:
>
> This adds code to handle the generic command line changes.
> The efi code appears that it doesn't benefit as much from this design
> as it could.
>
> For example, if you had a prepend command line with "nokaslr" then
> you might be helpful to re-enable it in the boot loader or dts,
> but there appears to be no way to re-enable kaslr or some of the
> other options.
>
> Cc: xe-linux-exter...@cisco.com
> Signed-off-by: Daniel Walker 
> ---
>  .../firmware/efi/libstub/efi-stub-helper.c| 35 +++
>  drivers/firmware/efi/libstub/efi-stub.c   |  7 
>  drivers/firmware/efi/libstub/efistub.h|  1 +
>  drivers/firmware/efi/libstub/x86-stub.c   | 13 +--
>  4 files changed, 54 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c 
> b/drivers/firmware/efi/libstub/efi-stub-helper.c
> index aa8da0a49829..c155837cedc9 100644
> --- a/drivers/firmware/efi/libstub/efi-stub-helper.c
> +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c
> @@ -13,6 +13,7 @@
>  #include 
>  #include 
>  #include  /* For CONSOLE_LOGLEVEL_* */
> +#include 
>  #include 
>  #include 
>
> @@ -172,6 +173,40 @@ int efi_printk(const char *fmt, ...)
> return printed;
>  }
>
> +/**
> + * efi_handle_cmdline() - handle adding in building parts of the command line
> + * @cmdline:   kernel command line
> + *
> + * Add in the generic parts of the commandline and start the parsing of the
> + * command line.
> + *
> + * Return: status code
> + */
> +efi_status_t efi_handle_cmdline(char const *cmdline)
> +{
> +   efi_status_t status;
> +
> +   status = efi_parse_options(CMDLINE_PREPEND);
> +   if (status != EFI_SUCCESS) {
> +   efi_err("Failed to parse options\n");
> +   return status;
> +   }

Even though I am not a fan of the 'success handling' pattern,
duplicating the exact same error handling three times is not great
either. Could we reuse more of the code here?

> +
> +   status = efi_parse_options(IS_ENABLED(CONFIG_CMDLINE_OVERRIDE) ? "" : 
> cmdline);

What is the point of calling efi_parse_options() with an empty string?



> +   if (status != EFI_SUCCESS) {
> +   efi_err("Failed to parse options\n");
> +   return status;
> +   }
> +
> +   status = efi_parse_options(CMDLINE_APPEND);
> +   if (status != EFI_SUCCESS) {
> +   efi_err("Failed to parse options\n");
> +   return status;
> +   }
> +
> +   return EFI_SUCCESS;
> +}
> +
>  /**
>   * efi_parse_options() - Parse EFI command line options
>   * @cmdline:   kernel command line
> diff --git a/drivers/firmware/efi/libstub/efi-stub.c 
> b/drivers/firmware/efi/libstub/efi-stub.c
> index 26e69788f27a..760480248adf 100644
> --- a/drivers/firmware/efi/libstub/efi-stub.c
> +++ b/drivers/firmware/efi/libstub/efi-stub.c
> @@ -172,6 +172,12 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle,
> goto fail;
> }
>
> +#ifdef CONFIG_GENERIC_CMDLINE
> +   status = efi_handle_cmdline(cmdline_ptr);
> +   if (status != EFI_SUCCESS) {
> +   goto fail_free_cmdline;
> +   }
> +#else
> if (IS_ENABLED(CONFIG_CMDLINE_EXTEND) ||
> IS_ENABLED(CONFIG_CMDLINE_FORCE) ||

Does this mean CONFIG_GENERIC_CMDLINE does not replace CMDLINE_EXTEND
/ CMDLINE_FORCE etc, but introduces yet another variant on top of
those?

That does not seem like an improvement to me. I think it is great that
you are cleaning this up, but only if it means we can get rid of the
old implementation.

> cmdline_size == 0) {
> @@ -189,6 +195,7 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle,
> goto fail_free_cmdline;
> }
> }
> +#endif
>
> efi_info("Booting Linux Kernel...\n");
>
> diff --git a/drivers/firmware/efi/libstub/efistub.h 
> b/drivers/firmware/efi/libstub/efistub.h
> index cde0a2ef507d..07c7f9fdfffc 100644
> --- a/drivers/firmware/efi/libstub/efistub.h
> +++ b/drivers/firmware/efi/libstub/efistub.h
> @@ -800,6 +800,7 @@ efi_status_t efi_relocate_kernel(unsigned long 
> *image_addr,
>  unsigned long alignment,
>  unsigned long min_addr);
>
> +efi_status_t efi_handle_cmdline(char const *cmdline);
>  efi_status_t efi_parse_options(char const *cmdline);
>
>  void efi_parse_option_graphics(char *option);
> diff --git a/drivers/firmware/efi/libstub/x86-stub.c 
> b/drivers/firmware/efi/libstub/x86-stub.c
> index f14c4ff5839f..30ad8fb7122d 100644
> --- a/drivers/firmware/efi/libstub/x86-stub.c
> +++ b/drivers/firmware/efi/libstub/x86-stub.c
> @@ -673,6 +673,8 @@ unsigned long efi_main(efi_handle_t handle,
> unsigned long bzimage_addr = (unsigned long)startup_32;
> unsigned long buffer_start, buffer_end;
> struct setup_header *hdr = &boot_params->hdr;
> + 

Re: [PATCH v2 7/7] CMDLINE: x86: convert to generic builtin command line

2021-03-08 Thread Ard Biesheuvel
On Tue, 9 Mar 2021 at 01:03, Daniel Walker  wrote:
>
> This updates the x86 code to use the CONFIG_GENERIC_CMDLINE
> option.
>
> Cc: xe-linux-exter...@cisco.com
> Signed-off-by: Ruslan Ruslichenko 
> Signed-off-by: Ruslan Bilovol 
> Signed-off-by: Daniel Walker 
> ---
>  arch/x86/Kconfig| 44 +
>  arch/x86/kernel/setup.c | 18 ++
>  drivers/firmware/efi/libstub/x86-stub.c |  2 +-
>  3 files changed, 4 insertions(+), 60 deletions(-)
>
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 21f851179ff0..3950f9bf9855 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -115,6 +115,7 @@ config X86
> select EDAC_SUPPORT
> select GENERIC_CLOCKEVENTS_BROADCASTif X86_64 || (X86_32 && 
> X86_LOCAL_APIC)
> select GENERIC_CLOCKEVENTS_MIN_ADJUST
> +   select GENERIC_CMDLINE
> select GENERIC_CMOS_UPDATE
> select GENERIC_CPU_AUTOPROBE
> select GENERIC_CPU_VULNERABILITIES
> @@ -2368,49 +2369,6 @@ choice
>
>  endchoice
>
> -config CMDLINE_BOOL
> -   bool "Built-in kernel command line"
> -   help
> - Allow for specifying boot arguments to the kernel at
> - build time.  On some systems (e.g. embedded ones), it is
> - necessary or convenient to provide some or all of the
> - kernel boot arguments with the kernel itself (that is,
> - to not rely on the boot loader to provide them.)
> -
> - To compile command line arguments into the kernel,
> - set this option to 'Y', then fill in the
> - boot arguments in CONFIG_CMDLINE.
> -
> - Systems with fully functional boot loaders (i.e. non-embedded)
> - should leave this option set to 'N'.
> -
> -config CMDLINE
> -   string "Built-in kernel command string"
> -   depends on CMDLINE_BOOL
> -   default ""
> -   help
> - Enter arguments here that should be compiled into the kernel
> - image and used at boot time.  If the boot loader provides a
> - command line at boot time, it is appended to this string to
> - form the full kernel command line, when the system boots.
> -
> - However, you can use the CONFIG_CMDLINE_OVERRIDE option to
> - change this behavior.
> -
> - In most cases, the command line (whether built-in or provided
> - by the boot loader) should specify the device for the root
> - file system.
> -
> -config CMDLINE_OVERRIDE
> -   bool "Built-in command line overrides boot loader arguments"
> -   depends on CMDLINE_BOOL && CMDLINE != ""
> -   help
> - Set this option to 'Y' to have the kernel ignore the boot loader
> - command line, and use ONLY the built-in command line.
> -
> - This is used to work around broken boot loaders.  This should
> - be set to 'N' under normal conditions.
> -
>  config MODIFY_LDT_SYSCALL
> bool "Enable the LDT (local descriptor table)" if EXPERT
> default y
> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
> index 740f3bdb3f61..e748c3e5c1ae 100644
> --- a/arch/x86/kernel/setup.c
> +++ b/arch/x86/kernel/setup.c
> @@ -48,6 +48,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>
>  /*
>   * max_low_pfn_mapped: highest directly mapped pfn < 4 GB
> @@ -162,9 +163,6 @@ unsigned long saved_video_mode;
>  #define RAMDISK_LOAD_FLAG  0x4000
>
>  static char __initdata command_line[COMMAND_LINE_SIZE];
> -#ifdef CONFIG_CMDLINE_BOOL
> -static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE;
> -#endif
>
>  #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
>  struct edd edd;
> @@ -884,19 +882,7 @@ void __init setup_arch(char **cmdline_p)
> bss_resource.start = __pa_symbol(__bss_start);
> bss_resource.end = __pa_symbol(__bss_stop)-1;
>
> -#ifdef CONFIG_CMDLINE_BOOL
> -#ifdef CONFIG_CMDLINE_OVERRIDE
> -   strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
> -#else
> -   if (builtin_cmdline[0]) {
> -   /* append boot loader cmdline to builtin */
> -   strlcat(builtin_cmdline, " ", COMMAND_LINE_SIZE);
> -   strlcat(builtin_cmdline, boot_command_line, 
> COMMAND_LINE_SIZE);
> -   strlcpy(boot_command_line, builtin_cmdline, 
> COMMAND_LINE_SIZE);
> -   }
> -#endif
> -#endif
> -
> +   cmdline_add_builtin(boot_command_line, NULL, COMMAND_LINE_SIZE);
> strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
> *cmdline_p = command_line;
>
> diff --git a/drivers/firmware/efi/libstub/x86-stub.c 
> b/drivers/firmware/efi/libstub/x86-stub.c
> index f14c4ff5839f..9538c9d4a0bc 100644
> --- a/drivers/firmware/efi/libstub/x86-stub.c
> +++ b/drivers/firmware/efi/libstub/x86-stub.c
> @@ -736,7 +736,7 @@ unsigned long efi_main(efi_handle_t handle,
> }
>
>  #ifdef CONFIG_CMDLINE_BOOL

Does this CMDLINE_BOOL check need to be dropped as well?


Re: [PATCH 1/2] crypto: talitos - Work around SEC6 ERRATA (AES-CTR mode data size error)

2021-01-21 Thread Ard Biesheuvel
On Thu, 21 Jan 2021 at 10:54, Christophe Leroy
 wrote:
>
>
>
> Le 21/01/2021 à 08:31, Ard Biesheuvel a écrit :
> > On Thu, 21 Jan 2021 at 06:35, Christophe Leroy
> >  wrote:
> >>
> >>
> >>
> >> Le 20/01/2021 à 23:23, Ard Biesheuvel a écrit :
> >>> On Wed, 20 Jan 2021 at 19:59, Christophe Leroy
> >>>  wrote:
> >>>>
> >>>> Talitos Security Engine AESU considers any input
> >>>> data size that is not a multiple of 16 bytes to be an error.
> >>>> This is not a problem in general, except for Counter mode
> >>>> that is a stream cipher and can have an input of any size.
> >>>>
> >>>> Test Manager for ctr(aes) fails on 4th test vector which has
> >>>> a length of 499 while all previous vectors which have a 16 bytes
> >>>> multiple length succeed.
> >>>>
> >>>> As suggested by Freescale, round up the input data length to the
> >>>> nearest 16 bytes.
> >>>>
> >>>> Fixes: 5e75ae1b3cef ("crypto: talitos - add new crypto modes")
> >>>> Signed-off-by: Christophe Leroy 
> >>>
> >>> Doesn't this cause the hardware to write outside the given buffer?
> >>
> >>
> >> Only the input length is modified. Not the output length.
> >>
> >> The ERRATA says:
> >>
> >> The input data length (in the descriptor) can be rounded up to the nearest 
> >> 16B. Set the
> >> data-in length (in the descriptor) to include X bytes of data beyond the 
> >> payload. Set the
> >> data-out length to only output the relevant payload (don't need to output 
> >> the padding).
> >> SEC reads from memory are not destructive, so the extra bytes included in 
> >> the AES-CTR
> >> operation can be whatever bytes are contiguously trailing the payload.
> >
> > So what happens if the input is not 16 byte aligned, and rounding it
> > up causes it to extend across a page boundary into a page that is not
> > mapped by the IOMMU/SMMU?
> >
>
> What is the IOMMU/SMMU ?
>
> The mpc8xx, mpc82xx and mpc83xx which embed the Talitos Security Engine don't 
> have such thing, the
> security engine uses DMA and has direct access to the memory bus for reading 
> and writing.
>

OK, good. So the only case where this could break is when the DMA
access spills over into a page that does not exist, and I suppose this
could only happen if the transfer involves a buffer located at the
very top of DRAM, right?


Re: [PATCH 1/2] crypto: talitos - Work around SEC6 ERRATA (AES-CTR mode data size error)

2021-01-20 Thread Ard Biesheuvel
On Thu, 21 Jan 2021 at 06:35, Christophe Leroy
 wrote:
>
>
>
> Le 20/01/2021 à 23:23, Ard Biesheuvel a écrit :
> > On Wed, 20 Jan 2021 at 19:59, Christophe Leroy
> >  wrote:
> >>
> >> Talitos Security Engine AESU considers any input
> >> data size that is not a multiple of 16 bytes to be an error.
> >> This is not a problem in general, except for Counter mode
> >> that is a stream cipher and can have an input of any size.
> >>
> >> Test Manager for ctr(aes) fails on 4th test vector which has
> >> a length of 499 while all previous vectors which have a 16 bytes
> >> multiple length succeed.
> >>
> >> As suggested by Freescale, round up the input data length to the
> >> nearest 16 bytes.
> >>
> >> Fixes: 5e75ae1b3cef ("crypto: talitos - add new crypto modes")
> >> Signed-off-by: Christophe Leroy 
> >
> > Doesn't this cause the hardware to write outside the given buffer?
>
>
> Only the input length is modified. Not the output length.
>
> The ERRATA says:
>
> The input data length (in the descriptor) can be rounded up to the nearest 
> 16B. Set the
> data-in length (in the descriptor) to include X bytes of data beyond the 
> payload. Set the
> data-out length to only output the relevant payload (don't need to output the 
> padding).
> SEC reads from memory are not destructive, so the extra bytes included in the 
> AES-CTR
> operation can be whatever bytes are contiguously trailing the payload.

So what happens if the input is not 16 byte aligned, and rounding it
up causes it to extend across a page boundary into a page that is not
mapped by the IOMMU/SMMU?


Re: [PATCH 1/2] crypto: talitos - Work around SEC6 ERRATA (AES-CTR mode data size error)

2021-01-20 Thread Ard Biesheuvel
On Wed, 20 Jan 2021 at 19:59, Christophe Leroy
 wrote:
>
> Talitos Security Engine AESU considers any input
> data size that is not a multiple of 16 bytes to be an error.
> This is not a problem in general, except for Counter mode
> that is a stream cipher and can have an input of any size.
>
> Test Manager for ctr(aes) fails on 4th test vector which has
> a length of 499 while all previous vectors which have a 16 bytes
> multiple length succeed.
>
> As suggested by Freescale, round up the input data length to the
> nearest 16 bytes.
>
> Fixes: 5e75ae1b3cef ("crypto: talitos - add new crypto modes")
> Signed-off-by: Christophe Leroy 

Doesn't this cause the hardware to write outside the given buffer?

> ---
>  drivers/crypto/talitos.c | 28 
>  drivers/crypto/talitos.h |  1 +
>  2 files changed, 17 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
> index 4fd85f31630a..b656983c1ef4 100644
> --- a/drivers/crypto/talitos.c
> +++ b/drivers/crypto/talitos.c
> @@ -1093,11 +1093,12 @@ static void ipsec_esp_decrypt_hwauth_done(struct 
> device *dev,
>   */
>  static int sg_to_link_tbl_offset(struct scatterlist *sg, int sg_count,
>  unsigned int offset, int datalen, int elen,
> -struct talitos_ptr *link_tbl_ptr)
> +struct talitos_ptr *link_tbl_ptr, int align)
>  {
> int n_sg = elen ? sg_count + 1 : sg_count;
> int count = 0;
> int cryptlen = datalen + elen;
> +   int padding = ALIGN(cryptlen, align) - cryptlen;
>
> while (cryptlen && sg && n_sg--) {
> unsigned int len = sg_dma_len(sg);
> @@ -1121,7 +1122,7 @@ static int sg_to_link_tbl_offset(struct scatterlist 
> *sg, int sg_count,
> offset += datalen;
> }
> to_talitos_ptr(link_tbl_ptr + count,
> -  sg_dma_address(sg) + offset, len, 0);
> +  sg_dma_address(sg) + offset, sg_next(sg) ? len 
> : len + padding, 0);
> to_talitos_ptr_ext_set(link_tbl_ptr + count, 0, 0);
> count++;
> cryptlen -= len;
> @@ -1144,10 +1145,11 @@ static int talitos_sg_map_ext(struct device *dev, 
> struct scatterlist *src,
>   unsigned int len, struct talitos_edesc *edesc,
>   struct talitos_ptr *ptr, int sg_count,
>   unsigned int offset, int tbl_off, int elen,
> - bool force)
> + bool force, int align)
>  {
> struct talitos_private *priv = dev_get_drvdata(dev);
> bool is_sec1 = has_ftr_sec1(priv);
> +   int aligned_len = ALIGN(len, align);
>
> if (!src) {
> to_talitos_ptr(ptr, 0, 0, is_sec1);
> @@ -1155,22 +1157,22 @@ static int talitos_sg_map_ext(struct device *dev, 
> struct scatterlist *src,
> }
> to_talitos_ptr_ext_set(ptr, elen, is_sec1);
> if (sg_count == 1 && !force) {
> -   to_talitos_ptr(ptr, sg_dma_address(src) + offset, len, 
> is_sec1);
> +   to_talitos_ptr(ptr, sg_dma_address(src) + offset, 
> aligned_len, is_sec1);
> return sg_count;
> }
> if (is_sec1) {
> -   to_talitos_ptr(ptr, edesc->dma_link_tbl + offset, len, 
> is_sec1);
> +   to_talitos_ptr(ptr, edesc->dma_link_tbl + offset, 
> aligned_len, is_sec1);
> return sg_count;
> }
> sg_count = sg_to_link_tbl_offset(src, sg_count, offset, len, elen,
> -&edesc->link_tbl[tbl_off]);
> +&edesc->link_tbl[tbl_off], align);
> if (sg_count == 1 && !force) {
> /* Only one segment now, so no link tbl needed*/
> copy_talitos_ptr(ptr, &edesc->link_tbl[tbl_off], is_sec1);
> return sg_count;
> }
> to_talitos_ptr(ptr, edesc->dma_link_tbl +
> -   tbl_off * sizeof(struct talitos_ptr), len, 
> is_sec1);
> +   tbl_off * sizeof(struct talitos_ptr), 
> aligned_len, is_sec1);
> to_talitos_ptr_ext_or(ptr, DESC_PTR_LNKTBL_JUMP, is_sec1);
>
> return sg_count;
> @@ -1182,7 +1184,7 @@ static int talitos_sg_map(struct device *dev, struct 
> scatterlist *src,
>   unsigned int offset, int tbl_off)
>  {
> return talitos_sg_map_ext(dev, src, len, edesc, ptr, sg_count, offset,
> - tbl_off, 0, false);
> + tbl_off, 0, false, 1);
>  }
>
>  /*
> @@ -1251,7 +1253,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, 
> struct aead_request *areq,
>
> ret = talitos_sg_map_ext(dev, areq->src, cryptlen, edesc, 
> &desc->ptr[4],
>  

Re: [PATCH] powerpc: avoid broken GCC __attribute__((optimize))

2020-10-28 Thread Ard Biesheuvel
On Wed, 28 Oct 2020 at 09:04, Ard Biesheuvel  wrote:
>
> Commit 7053f80d9696 ("powerpc/64: Prevent stack protection in early boot")
> introduced a couple of uses of __attribute__((optimize)) with function
> scope, to disable the stack protector in some early boot code.
>
> Unfortunately, and this is documented in the GCC man pages [0], overriding
> function attributes for optimization is broken, and is only supported for
> debug scenarios, not for production: the problem appears to be that
> setting GCC -f flags using this method will cause it to forget about some
> or all other optimization settings that have been applied.
>
> So the only safe way to disable the stack protector is to disable it for
> the entire source file.
>
> [0] https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html
>
> Cc: Michael Ellerman 
> Cc: Benjamin Herrenschmidt 
> Cc: Paul Mackerras 
> Cc: Nick Desaulniers 
> Cc: Arvind Sankar 
> Cc: Randy Dunlap 
> Cc: Josh Poimboeuf 
> Cc: Thomas Gleixner 
> Cc: Alexei Starovoitov 
> Cc: Daniel Borkmann 
> Cc: Peter Zijlstra (Intel) 
> Cc: Geert Uytterhoeven 
> Cc: Kees Cook 
> Fixes: 7053f80d9696 ("powerpc/64: Prevent stack protection in early boot")
> Signed-off-by: Ard Biesheuvel 
> ---
> Related discussion here:
> https://lore.kernel.org/lkml/CAMuHMdUg0WJHEcq6to0-eODpXPOywLot6UD2=gfhpzoj_hc...@mail.gmail.com/
>
> TL;DR using __attribute__((optimize("-fno-gcse"))) in the BPF interpreter
> causes the compiler to forget about -fno-asynchronous-unwind-tables passed
> on the command line, resulting in unexpected .eh_frame sections in vmlinux.
>
>  arch/powerpc/kernel/Makefile   | 3 +++
>  arch/powerpc/kernel/paca.c | 2 +-
>  arch/powerpc/kernel/setup.h| 6 --
>  arch/powerpc/kernel/setup_64.c | 2 +-
>  4 files changed, 5 insertions(+), 8 deletions(-)
>

FYI i was notified by one of the robots that I missed one occurrence
of __nostackprotector in arch/powerpc/kernel/paca.c

Let me know if I need to resend.


> diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
> index bf0bf1b900d2..fe2ef598e2ea 100644
> --- a/arch/powerpc/kernel/Makefile
> +++ b/arch/powerpc/kernel/Makefile
> @@ -173,6 +173,9 @@ KCOV_INSTRUMENT_cputable.o := n
>  KCOV_INSTRUMENT_setup_64.o := n
>  KCOV_INSTRUMENT_paca.o := n
>
> +CFLAGS_setup_64.o  += -fno-stack-protector
> +CFLAGS_paca.o  += -fno-stack-protector
> +
>  extra-$(CONFIG_PPC_FPU)+= fpu.o
>  extra-$(CONFIG_ALTIVEC)+= vector.o
>  extra-$(CONFIG_PPC64)  += entry_64.o
> diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
> index 0ad15768d762..fe70834d7283 100644
> --- a/arch/powerpc/kernel/paca.c
> +++ b/arch/powerpc/kernel/paca.c
> @@ -208,7 +208,7 @@ static struct rtas_args * __init new_rtas_args(int cpu, 
> unsigned long limit)
>  struct paca_struct **paca_ptrs __read_mostly;
>  EXPORT_SYMBOL(paca_ptrs);
>
> -void __init __nostackprotector initialise_paca(struct paca_struct *new_paca, 
> int cpu)
> +void __init initialise_paca(struct paca_struct *new_paca, int cpu)
>  {
>  #ifdef CONFIG_PPC_PSERIES
> new_paca->lppaca_ptr = NULL;
> diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h
> index 2ec835574cc9..2dd0d9cb5a20 100644
> --- a/arch/powerpc/kernel/setup.h
> +++ b/arch/powerpc/kernel/setup.h
> @@ -8,12 +8,6 @@
>  #ifndef __ARCH_POWERPC_KERNEL_SETUP_H
>  #define __ARCH_POWERPC_KERNEL_SETUP_H
>
> -#ifdef CONFIG_CC_IS_CLANG
> -#define __nostackprotector
> -#else
> -#define __nostackprotector 
> __attribute__((__optimize__("no-stack-protector")))
> -#endif
> -
>  void initialize_cache_info(void);
>  void irqstack_early_init(void);
>
> diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
> index bb9cab3641d7..da447a62ea1e 100644
> --- a/arch/powerpc/kernel/setup_64.c
> +++ b/arch/powerpc/kernel/setup_64.c
> @@ -283,7 +283,7 @@ void __init record_spr_defaults(void)
>   * device-tree is not accessible via normal means at this point.
>   */
>
> -void __init __nostackprotector early_setup(unsigned long dt_ptr)
> +void __init early_setup(unsigned long dt_ptr)
>  {
> static __initdata struct paca_struct boot_paca;
>
> --
> 2.17.1
>


[PATCH] powerpc: avoid broken GCC __attribute__((optimize))

2020-10-28 Thread Ard Biesheuvel
Commit 7053f80d9696 ("powerpc/64: Prevent stack protection in early boot")
introduced a couple of uses of __attribute__((optimize)) with function
scope, to disable the stack protector in some early boot code.

Unfortunately, and this is documented in the GCC man pages [0], overriding
function attributes for optimization is broken, and is only supported for
debug scenarios, not for production: the problem appears to be that
setting GCC -f flags using this method will cause it to forget about some
or all other optimization settings that have been applied.

So the only safe way to disable the stack protector is to disable it for
the entire source file.

[0] https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html

Cc: Michael Ellerman 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Nick Desaulniers 
Cc: Arvind Sankar 
Cc: Randy Dunlap 
Cc: Josh Poimboeuf 
Cc: Thomas Gleixner 
Cc: Alexei Starovoitov 
Cc: Daniel Borkmann 
Cc: Peter Zijlstra (Intel) 
Cc: Geert Uytterhoeven 
Cc: Kees Cook 
Fixes: 7053f80d9696 ("powerpc/64: Prevent stack protection in early boot")
Signed-off-by: Ard Biesheuvel 
---
Related discussion here:
https://lore.kernel.org/lkml/CAMuHMdUg0WJHEcq6to0-eODpXPOywLot6UD2=gfhpzoj_hc...@mail.gmail.com/

TL;DR using __attribute__((optimize("-fno-gcse"))) in the BPF interpreter
causes the compiler to forget about -fno-asynchronous-unwind-tables passed
on the command line, resulting in unexpected .eh_frame sections in vmlinux.

 arch/powerpc/kernel/Makefile   | 3 +++
 arch/powerpc/kernel/paca.c | 2 +-
 arch/powerpc/kernel/setup.h| 6 --
 arch/powerpc/kernel/setup_64.c | 2 +-
 4 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index bf0bf1b900d2..fe2ef598e2ea 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -173,6 +173,9 @@ KCOV_INSTRUMENT_cputable.o := n
 KCOV_INSTRUMENT_setup_64.o := n
 KCOV_INSTRUMENT_paca.o := n
 
+CFLAGS_setup_64.o  += -fno-stack-protector
+CFLAGS_paca.o  += -fno-stack-protector
+
 extra-$(CONFIG_PPC_FPU)+= fpu.o
 extra-$(CONFIG_ALTIVEC)+= vector.o
 extra-$(CONFIG_PPC64)  += entry_64.o
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 0ad15768d762..fe70834d7283 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -208,7 +208,7 @@ static struct rtas_args * __init new_rtas_args(int cpu, 
unsigned long limit)
 struct paca_struct **paca_ptrs __read_mostly;
 EXPORT_SYMBOL(paca_ptrs);
 
-void __init __nostackprotector initialise_paca(struct paca_struct *new_paca, 
int cpu)
+void __init initialise_paca(struct paca_struct *new_paca, int cpu)
 {
 #ifdef CONFIG_PPC_PSERIES
new_paca->lppaca_ptr = NULL;
diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h
index 2ec835574cc9..2dd0d9cb5a20 100644
--- a/arch/powerpc/kernel/setup.h
+++ b/arch/powerpc/kernel/setup.h
@@ -8,12 +8,6 @@
 #ifndef __ARCH_POWERPC_KERNEL_SETUP_H
 #define __ARCH_POWERPC_KERNEL_SETUP_H
 
-#ifdef CONFIG_CC_IS_CLANG
-#define __nostackprotector
-#else
-#define __nostackprotector __attribute__((__optimize__("no-stack-protector")))
-#endif
-
 void initialize_cache_info(void);
 void irqstack_early_init(void);
 
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index bb9cab3641d7..da447a62ea1e 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -283,7 +283,7 @@ void __init record_spr_defaults(void)
  * device-tree is not accessible via normal means at this point.
  */
 
-void __init __nostackprotector early_setup(unsigned long dt_ptr)
+void __init early_setup(unsigned long dt_ptr)
 {
static __initdata struct paca_struct boot_paca;
 
-- 
2.17.1



Re: [PATCH] tpm: of: avoid __va() translation for event log address

2020-09-27 Thread Ard Biesheuvel
On Mon, 28 Sep 2020 at 07:56, Christophe Leroy
 wrote:
>
>
>
> Le 28/09/2020 à 01:44, Jarkko Sakkinen a écrit :
> > On Fri, Sep 25, 2020 at 09:00:18AM -0300, Jason Gunthorpe wrote:
> >> On Fri, Sep 25, 2020 at 01:29:20PM +0300, Jarkko Sakkinen wrote:
> >>> On Fri, Sep 25, 2020 at 09:00:56AM +0200, Ard Biesheuvel wrote:
> >>>> On Fri, 25 Sep 2020 at 07:56, Jarkko Sakkinen
> >>>>  wrote:
> >>>>>
> >>>>> On Tue, Sep 22, 2020 at 11:41:28AM +0200, Ard Biesheuvel wrote:
> >>>>>> The TPM event log is provided to the OS by the firmware, by loading
> >>>>>> it into an area in memory and passing the physical address via a node
> >>>>>> in the device tree.
> >>>>>>
> >>>>>> Currently, we use __va() to access the memory via the kernel's linear
> >>>>>> map: however, it is not guaranteed that the linear map covers this
> >>>>>> particular address, as we may be running under HIGHMEM on a 32-bit
> >>>>>> architecture, or running firmware that uses a memory type for the
> >>>>>> event log that is omitted from the linear map (such as EfiReserved).
> >>>>>
> >>>>> Makes perfect sense to the level that I wonder if this should have a
> >>>>> fixes tag and/or needs to be backported to the stable kernels?
> >>>>>
> >>>>
> >>>> AIUI, the code was written specifically for ppc64, which is a
> >>>> non-highmem, non-EFI architecture. However, when we start reusing this
> >>>> driver for ARM, this issue could pop up.
> >>>>
> >>>> The code itself has been refactored a couple of times, so I think it
> >>>> will require different versions of the patch for different generations
> >>>> of stable kernels.
> >>>>
> >>>> So perhaps just add Cc: , and wait and see how
> >>>> far back it applies cleanly?
> >>>
> >>> Yeah, I think I'll cc it with some note before the diffstat.
> >>>
> >>> I'm thinking to cap it to only 5.x kernels (at least first) unless it is
> >>> dead easy to backport below that.
> >>
> >> I have this vauge recollection of pointing at this before and being
> >> told that it had to be __va for some PPC reason?
> >>
> >> Do check with the PPC people first, I see none on the CC list.
> >>
> >> Jason
> >
> > Thanks, added arch/powerpc maintainers.
> >
>
> As far as I can see, memremap() won't work on PPC32 at least:
>
> IIUC, memremap() calls arch_memremap_wb()
> arch_memremap_wb() calls ioremap_cache()
> In case of failure, then ioremap_wt() and ioremap_wc() are tried.
>
> All ioremap calls end up in __ioremap_caller() which will return NULL in case 
> you try to ioremap RAM.
>
> So the statement "So instead, use memremap(), which will reuse the linear 
> mapping if
> it is valid, or create another mapping otherwise." seems to be wrong, at 
> least for PPC32.
>
> Even for PPC64 which doesn't seem to have the RAM check, I can't see that it 
> will "reuse the linear
> mapping".
>

It is there, please look again. Before any of the above happens,
memremap() will call try_ram_remap() for regions that are covered by a
IORESOURCE_SYSTEM_RAM, and map it using __va() if its PFN is valid and
it is not highmem.

So as far as I can tell, this change has no effect on PPC at all
unless its RAM is not described as IORESOURCE_SYSTEM_RAM.


Re: [PATCH v1] soc: fsl: rcpm: Add ACPI support

2020-09-15 Thread Ard Biesheuvel

On 9/16/20 3:32 AM, Ran Wang wrote:

Hi Ard,

On Tuesday, September 15, 2020 7:10 PM, Ard Biesheuvel wrote:

Subject: Re: [PATCH v1] soc: fsl: rcpm: Add ACPI support

On 9/15/20 1:06 PM, kuldip dwivedi wrote:

Add ACPI support in fsl RCPM driver. This is required to support ACPI
S3 state. S3 is the ACPI sleep state that is known as "sleep" or
"suspend to RAM".
It essentially turns off most power of the system but keeps memory
powered.

Signed-off-by: tanveer 
Signed-off-by: kuldip dwivedi 


Why does the OS need to program this device? Can't this be done by
firmware?


This device is use to tell HW which IP (such as USB, SDHC, SATA, etc) should 
not be
clock gated during system enter low power state (to allow that IP work as a
wakeup source). And user does this configuration in device tree.


The point of ACPI is *not* to describe a DT topology using a table 
format that is not suited for it. The point of ACPI is to describe a 
machine that is more abstracted from the hardware than is typically 
possible with DT, where the abstractions are implemented by AML code 
that is provided by the firmware, but executed in the context of the OS.


So the idea is *not* finding the shortest possible path to get your 
existing DT driver code running on a system that boots via ACPI. 
Instead, you should carefully think about the abstract ACPI machine that 
you will expose to the OS, and hide everything else in firmware.


In this particular case, it seems like your USB, SDHC and SATA device 
objects may need power state dependent AML methods that program this 
block directly.





So implement
this RCPM driver to do it in kernel rather than firmware.

Regards,
Ran


---

Notes:
  1. Add ACPI match table
  2. NXP team members are added for confirming HID changes
  3. There is only one node in ACPI so no need to check for
 current device explicitly
  4. These changes are tested on LX2160A and LS1046A platforms

   drivers/soc/fsl/rcpm.c | 22 +++---
   1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/drivers/soc/fsl/rcpm.c b/drivers/soc/fsl/rcpm.c index
a093dbe6d2cb..e75a436fb159 100644
--- a/drivers/soc/fsl/rcpm.c
+++ b/drivers/soc/fsl/rcpm.c
@@ -2,10 +2,12 @@
   //
   // rcpm.c - Freescale QorIQ RCPM driver
   //
-// Copyright 2019 NXP
+// Copyright 2019-2020 NXP
+// Copyright 2020 Puresoftware Ltd.
   //
   // Author: Ran Wang 

+#include 
   #include 
   #include 
   #include 
@@ -57,8 +59,13 @@ static int rcpm_pm_prepare(struct device *dev)
rcpm->wakeup_cells + 1);

/*  Wakeup source should refer to current rcpm device */
-   if (ret || (np->phandle != value[0]))
-   continue;
+   if (is_acpi_node(dev->fwnode)) {
+   if (ret)
+   continue;
+   } else {
+   if (ret || (np->phandle != value[0]))
+   continue;
+   }

/* Property "#fsl,rcpm-wakeup-cells" of rcpm node defines the
 * number of IPPDEXPCR register cells, and "fsl,rcpm-wakeup"
@@ -139,10 +146,19 @@ static const struct of_device_id rcpm_of_match[]

= {

   };
   MODULE_DEVICE_TABLE(of, rcpm_of_match);

+#ifdef CONFIG_ACPI
+static const struct acpi_device_id rcpm_acpi_match[] = {
+   { "NXP0015", },
+   { }
+};
+MODULE_DEVICE_TABLE(acpi, rcpm_acpi_match); #endif
+
   static struct platform_driver rcpm_driver = {
.driver = {
.name = "rcpm",
.of_match_table = rcpm_of_match,
+   .acpi_match_table = ACPI_PTR(rcpm_acpi_match),
.pm = &rcpm_pm_ops,
},
.probe = rcpm_probe,







Re: [PATCH v1] soc: fsl: rcpm: Add ACPI support

2020-09-15 Thread Ard Biesheuvel

On 9/15/20 1:06 PM, kuldip dwivedi wrote:

Add ACPI support in fsl RCPM driver. This is required
to support ACPI S3 state. S3 is the ACPI sleep state
that is known as "sleep" or "suspend to RAM".
It essentially turns off most power of the system but
keeps memory powered.

Signed-off-by: tanveer 
Signed-off-by: kuldip dwivedi 


Why does the OS need to program this device? Can't this be done by firmware?


---

Notes:
 1. Add ACPI match table
 2. NXP team members are added for confirming HID changes
 3. There is only one node in ACPI so no need to check for
current device explicitly
 4. These changes are tested on LX2160A and LS1046A platforms

  drivers/soc/fsl/rcpm.c | 22 +++---
  1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/drivers/soc/fsl/rcpm.c b/drivers/soc/fsl/rcpm.c
index a093dbe6d2cb..e75a436fb159 100644
--- a/drivers/soc/fsl/rcpm.c
+++ b/drivers/soc/fsl/rcpm.c
@@ -2,10 +2,12 @@
  //
  // rcpm.c - Freescale QorIQ RCPM driver
  //
-// Copyright 2019 NXP
+// Copyright 2019-2020 NXP
+// Copyright 2020 Puresoftware Ltd.
  //
  // Author: Ran Wang 
  
+#include 

  #include 
  #include 
  #include 
@@ -57,8 +59,13 @@ static int rcpm_pm_prepare(struct device *dev)
rcpm->wakeup_cells + 1);
  
  		/*  Wakeup source should refer to current rcpm device */

-   if (ret || (np->phandle != value[0]))
-   continue;
+   if (is_acpi_node(dev->fwnode)) {
+   if (ret)
+   continue;
+   } else {
+   if (ret || (np->phandle != value[0]))
+   continue;
+   }
  
  		/* Property "#fsl,rcpm-wakeup-cells" of rcpm node defines the

 * number of IPPDEXPCR register cells, and "fsl,rcpm-wakeup"
@@ -139,10 +146,19 @@ static const struct of_device_id rcpm_of_match[] = {
  };
  MODULE_DEVICE_TABLE(of, rcpm_of_match);
  
+#ifdef CONFIG_ACPI

+static const struct acpi_device_id rcpm_acpi_match[] = {
+   { "NXP0015", },
+   { }
+};
+MODULE_DEVICE_TABLE(acpi, rcpm_acpi_match);
+#endif
+
  static struct platform_driver rcpm_driver = {
.driver = {
.name = "rcpm",
.of_match_table = rcpm_of_match,
+   .acpi_match_table = ACPI_PTR(rcpm_acpi_match),
.pm = &rcpm_pm_ops,
},
.probe = rcpm_probe,





Re: [PATCH v2 1/3] module: Rename module_alloc() to text_alloc() and move to kernel proper

2020-07-23 Thread Ard Biesheuvel
On Thu, 23 Jul 2020 at 04:52, Jarkko Sakkinen
 wrote:
>
> On Thu, Jul 16, 2020 at 06:49:09PM +0200, Christophe Leroy wrote:
> > Jarkko Sakkinen  a écrit :
> >
> > > Rename module_alloc() to text_alloc() and module_memfree() to
> > > text_memfree(), and move them to kernel/text.c, which is unconditionally
> > > compiled to the kernel proper. This allows kprobes, ftrace and bpf to
> > > allocate space for executable code without requiring to compile the 
> > > modules
> > > support (CONFIG_MODULES=y) in.
> >
> > You are not changing enough in powerpc to have this work.
> > On powerpc 32 bits (6xx), when STRICT_KERNEL_RWX is selected, the vmalloc
> > space is set to NX (no exec) at segment level (ie by 256Mbytes zone) unless
> > CONFIG_MODULES is selected.
> >
> > Christophe
>
> This has been deduced down to:
>
> https://lore.kernel.org/lkml/20200717030422.679972-1-jarkko.sakki...@linux.intel.com/
>
> I.e. not intruding PPC anymore :-)
>

Ok, so after the elaborate discussion we had between Jessica, Russell,
Peter, Will, Mark, you and myself, where we pointed out that
a) a single text_alloc() abstraction for bpf, kprobes and ftrace does
not fit other architectures very well, and
b) that module_alloc() is not suitable as a default to base text_alloc() on,

you went ahead and implemented that anyway, but only cc'ing Peter,
akpm, Masami and the mm list this time?

Sorry, but that is not how it works. Once people get pulled into a
discussion, you cannot dismiss them or their feedback like that and go
off and do your own thing anyway. Generic features like this are
tricky to get right, and it will likely take many iterations and input
from many different people.


Re: [PATCH v2 1/3] module: Rename module_alloc() to text_alloc() and move to kernel proper

2020-07-14 Thread Ard Biesheuvel
On Tue, 14 Jul 2020 at 16:33, Mark Rutland  wrote:
>
> On Tue, Jul 14, 2020 at 03:01:09PM +0200, Peter Zijlstra wrote:
> > On Tue, Jul 14, 2020 at 03:19:24PM +0300, Ard Biesheuvel wrote:
> > > So perhaps the answer is to have text_alloc() not with a 'where'
> > > argument but with a 'why' argument. Or more simply, just have separate
> > > alloc/free APIs for each case, with generic versions that can be
> > > overridden by the architecture.
> >
> > Well, there only seem to be 2 cases here, either the pointer needs to
> > fit in some immediate displacement, or not.
>
> On some arches you have a few choices for immediates depending on
> compiler options, e.g. on arm64:
>
> * +/- 128M with B
> * +/-4G with ADRP+ADD+BR
> * +/- 48/64 bits with a series of MOVK* + BR
>
> ... and you might build core kernel one way and modules another, and
> either could depend on configuration.
>
> > On x86 we seem have the advantage of a fairly large immediate
> > displacement as compared to many other architectures (due to our
> > variable sized instructions). And thus have been fairly liberal with our
> > usage of it (also our indirect jmps/calls suck, double so with
> > RETCH-POLINE).
> >
> > Still, the indirect jump, as mentioned by Russel should work for
> > arbitrarily placed code for us too.
> >
> >
> > So I'm thinking that something like:
> >
> > enum ptr_type {
> >   immediate_displacement,
> >   absolute,
> > };
> >
> > void *text_alloc(unsigned long size, enum ptr_type type)
> > {
> >   unsigned long vstart = VMALLOC_START;
> >   unsigned long vend   = VMALLOC_END;
> >
> >   if (type == immediate_displacement) {
> >   vstart = MODULES_VADDR;
> >   vend   = MODULES_END;
> >   }
> >
> >   return __vmalloc_node_range(size, TEXT_ALIGN, vstart, vend,
> >   GFP_KERNEL, PAGE_KERNEL_EXEC, 0,
> >   NUMA_NO_NODE, _RET_IP_);
> > }
> >
> > void text_free(void *ptr)
> > {
> >   vfree(ptr);
> > }
>
> I think it'd be easier to read with separate functions, e.g.
>
>   text_alloc_imm_offset(unsigned long size);
>   text_alloc_absolute(unsigned long size);
>

On arm64, we have a 128M window close to the core kernel for modules,
and a separate 128m window for bpf  programs, which are kept in
relative branching range of each other, but could be far away from
kernel+modules, and so having 'close' and 'far' as the only
distinction is insufficient.

> > Should work for all cases. Yes, we might then want something like a per
> > arch:
> >
> >   {BPF,FTRACE,KPROBE}_TEXT_TYPE
>
> ... at that point why not:
>
>   text_alloc_ftrace();
>   text_alloc_module();
>   text_alloc_bpf();
>   text_alloc_kprobe();
>
> ... etc which an arch can alias however it wants? e.g. x86 can have
> those all go to a common text_alloc_generic(), and that could even be a
> generic option for arches that don't care to distinguish these cases.
>

That is basically what i meant with separate alloc/free APIs, which i
think is the sanest approach here.

> Then if there are new places that want to allocate text we have to
> consider their requirements when adding them, too.
>
> Thanks,
> Mark.


Re: [PATCH v2 1/3] module: Rename module_alloc() to text_alloc() and move to kernel proper

2020-07-14 Thread Ard Biesheuvel
On Tue, 14 Jul 2020 at 14:31, Peter Zijlstra  wrote:
>
> On Tue, Jul 14, 2020 at 11:28:27AM +0100, Will Deacon wrote:
>
> > As Ard says, module_alloc() _is_ special, in the sense that the virtual
> > memory it allocates wants to be close to the kernel text, whereas the
> > concept of allocating executable memory is broader and doesn't have these
> > restrictions. So, while I'm in favour of having a text_alloc() interface
> > that can be used by callers which only require an executable mapping, I'd
> > much prefer for the module_alloc() code to remain for, err, modules.
>
> So on x86 all those things (kprobes, bpf, ftrace) require that same
> closeness.
>
> An interface like the late vmalloc_exec() will simply not work for us.
>

Fair enough. So for x86, implementing text_alloc() as an alias of
module_alloc() makes sense. But that is not the case in general.

> We recently talked about arm64-kprobes and how you're not doing any of
> the optimizations and fully rely on the exception return. And I see
> you're one of the few archs that has bpf_jit_alloc_exec() (also,
> shouldn't you be using PAGE_KERNEL_EXEC there?). But the BPF core seems
> to use module_alloc() as a default means of allocating text.
>

Indeed. Which means it uses up module space which may be scarce,
especially on 32-bit ARM, and gets backed by kasan shadow pages, which
only makes sense for modules (if CONFIG_KASAN=y)

>
> So what should this look like? Have a text_alloc() with an argument that
> indicates where? But then I suppose we also need a means to manage PLT
> entries. Otherwise I don't exactly see how you're going to call BPF
> code, or how that BPF stuff is going to call back into its helpers.
>

If x86 chooses to back its implementation of text_alloc() by
module_alloc(), that is absolutely fine. But arm64 has no use for
text_alloc() at all today (bpf and kprobes don't use module_alloc(),
and ftrace does not implement dynamic trampoline allocation), and in
the general case, bpf, kprobes, ftrace and the module loader all have
different requirements that deviate subtly between architectures.

So perhaps the answer is to have text_alloc() not with a 'where'
argument but with a 'why' argument. Or more simply, just have separate
alloc/free APIs for each case, with generic versions that can be
overridden by the architecture.


Re: [PATCH 1/3] module: Rename module_alloc() to text_alloc() and move to kernel proper

2020-07-14 Thread Ard Biesheuvel
On Tue, 14 Jul 2020 at 12:53, Jarkko Sakkinen
 wrote:
>
> On Mon, Jul 13, 2020 at 10:49:48PM +0300, Ard Biesheuvel wrote:
> > This patch suggests that there are other reasons why conflating
> > allocation of module space and allocating  text pages for other uses
> > is a bad idea, but switching all users to text_alloc() is a step in
> > the wrong direction. It would be better to stop using module_alloc()
> > in core code except in the module loader, and have a generic
> > text_alloc() that can be overridden by the arch if necessary. Note
> > that x86  and s390 are the only architectures that use module_alloc()
> > in ftrace code.
>
> This series essentially does this: introduces text_alloc() and
> text_memfree(), which have generic implementations in kernel/text.c.
> Those can be overriddent by arch specific implementations.
>
> What you think should be done differently than in my patch set?
>

On arm64, module_alloc is only used by the module loader, and so
pulling it out and renaming it will cause unused code to be
incorporated into the kernel when building without module support,
which is the use case you claim to be addressing.

Module_alloc has semantics that are intimately tied to the module
loader, but over the years, it ended up being (ab)used by other
subsystems, which don't require those semantics but just need n pages
of vmalloc space with executable permissions.

So the correct approach is to make text_alloc() implement just that,
generically, and switch bpf etc to use it. Then, only on architectures
that need it, override it with an implementation that has the required
additional semantics.

Refactoring 10+ architectures like this without any regard for how
text_alloc() deviates from module_alloc() just creates a lot of churn
that others will have to clean up after you.


Re: [PATCH 1/3] module: Rename module_alloc() to text_alloc() and move to kernel proper

2020-07-13 Thread Ard Biesheuvel
On Tue, 14 Jul 2020 at 05:04, Steven Rostedt  wrote:
>
> On Mon, 13 Jul 2020 22:49:48 +0300
> Ard Biesheuvel  wrote:
>
> > On arm64, we no longer use module_alloc for bpf or kprobes, to avoid
> > wasting va space on code that does not need to be loaded close to the
> > kernel. Also, module_alloc() allocates kasan shadow, which is
> > unnecessary for kprobes or bpf programs, which don't have kasan
> > instrumentation.
> >
> > This patch suggests that there are other reasons why conflating
> > allocation of module space and allocating  text pages for other uses
> > is a bad idea, but switching all users to text_alloc() is a step in
> > the wrong direction. It would be better to stop using module_alloc()
> > in core code except in the module loader, and have a generic
> > text_alloc() that can be overridden by the arch if necessary. Note
> > that x86  and s390 are the only architectures that use module_alloc()
> > in ftrace code.
> >
> > Please have a look at alloc_insn_page() or bpf_jit_alloc_exec() in the
> > arm64 tree to see what I mean.
>
> Hmm, so you have another method for allocating memory for trampolines?
> (I haven't looked at those functions you pointed out, out of sheer
> laziness ;-)
>
> It would be nice to implement the trampoline optimization in arm, which
> x86 has (see arch_ftrace_update_trampoline() and
> arch_ftrace_trampoline_func()).
>
> It helps when you have two different callbacks for different functions
> (like having live patching enabled and function tracing enabled, or
> kprobes using ftrace). Each callback will get its own allocated
> trampoline to jump to instead of jumping to the a trampoline that calls
> a looping function that tests to see which callback wants to be called
> by the traced function.
>

So in what sense are ftrace trampolines like kernel modules, apart
from the fact that they are executable pages that live in the vmalloc
space?


Re: [PATCH 1/3] module: Rename module_alloc() to text_alloc() and move to kernel proper

2020-07-13 Thread Ard Biesheuvel
On Mon, 13 Jul 2020 at 21:21, Jarkko Sakkinen
 wrote:
>
> Rename module_alloc() to text_alloc() and module_memfree() to
> text_memfree(), and move them to kernel/text.c, which is unconditionally
> compiled to the kernel proper. This allows kprobes, ftrace and bpf to
> allocate space for executable code without requiring to compile the modules
> support (CONFIG_MODULES=y) in.
>

On arm64, we no longer use module_alloc for bpf or kprobes, to avoid
wasting va space on code that does not need to be loaded close to the
kernel. Also, module_alloc() allocates kasan shadow, which is
unnecessary for kprobes or bpf programs, which don't have kasan
instrumentation.

This patch suggests that there are other reasons why conflating
allocation of module space and allocating  text pages for other uses
is a bad idea, but switching all users to text_alloc() is a step in
the wrong direction. It would be better to stop using module_alloc()
in core code except in the module loader, and have a generic
text_alloc() that can be overridden by the arch if necessary. Note
that x86  and s390 are the only architectures that use module_alloc()
in ftrace code.

Please have a look at alloc_insn_page() or bpf_jit_alloc_exec() in the
arm64 tree to see what I mean.



> Cc: Andi Kleen 
> Suggested-by: Peter Zijlstra 
> Signed-off-by: Jarkko Sakkinen 
> ---
>  arch/arm/kernel/Makefile |  3 +-
>  arch/arm/kernel/module.c | 21 ---
>  arch/arm/kernel/text.c   | 33 ++
>  arch/arm64/kernel/Makefile   |  2 +-
>  arch/arm64/kernel/module.c   | 42 --
>  arch/arm64/kernel/text.c | 54 
>  arch/mips/kernel/Makefile|  2 +-
>  arch/mips/kernel/module.c|  9 -
>  arch/mips/kernel/text.c  | 19 ++
>  arch/mips/net/bpf_jit.c  |  4 +--
>  arch/nds32/kernel/Makefile   |  2 +-
>  arch/nds32/kernel/module.c   |  7 
>  arch/nds32/kernel/text.c | 12 +++
>  arch/nios2/kernel/Makefile   |  1 +
>  arch/nios2/kernel/module.c   | 19 --
>  arch/nios2/kernel/text.c | 34 ++
>  arch/parisc/kernel/Makefile  |  2 +-
>  arch/parisc/kernel/module.c  | 11 --
>  arch/parisc/kernel/text.c| 22 
>  arch/powerpc/net/bpf_jit_comp.c  |  4 +--
>  arch/riscv/kernel/Makefile   |  1 +
>  arch/riscv/kernel/module.c   | 12 ---
>  arch/riscv/kernel/text.c | 20 +++
>  arch/s390/kernel/Makefile|  2 +-
>  arch/s390/kernel/ftrace.c|  2 +-
>  arch/s390/kernel/module.c| 16 -
>  arch/s390/kernel/text.c  | 23 
>  arch/sparc/kernel/Makefile   |  1 +
>  arch/sparc/kernel/module.c   | 30 
>  arch/sparc/kernel/text.c | 39 +
>  arch/sparc/net/bpf_jit_comp_32.c |  6 ++--
>  arch/unicore32/kernel/Makefile   |  1 +
>  arch/unicore32/kernel/module.c   |  7 
>  arch/unicore32/kernel/text.c | 18 ++
>  arch/x86/kernel/Makefile |  1 +
>  arch/x86/kernel/ftrace.c |  4 +--
>  arch/x86/kernel/kprobes/core.c   |  4 +--
>  arch/x86/kernel/module.c | 49 --
>  arch/x86/kernel/text.c   | 60 
>  include/linux/moduleloader.h |  4 +--
>  kernel/Makefile  |  2 +-
>  kernel/bpf/core.c|  4 +--
>  kernel/kprobes.c |  4 +--
>  kernel/module.c  | 37 ++--
>  kernel/text.c| 25 +
>  45 files changed, 400 insertions(+), 275 deletions(-)
>  create mode 100644 arch/arm/kernel/text.c
>  create mode 100644 arch/arm64/kernel/text.c
>  create mode 100644 arch/mips/kernel/text.c
>  create mode 100644 arch/nds32/kernel/text.c
>  create mode 100644 arch/nios2/kernel/text.c
>  create mode 100644 arch/parisc/kernel/text.c
>  create mode 100644 arch/riscv/kernel/text.c
>  create mode 100644 arch/s390/kernel/text.c
>  create mode 100644 arch/sparc/kernel/text.c
>  create mode 100644 arch/unicore32/kernel/text.c
>  create mode 100644 arch/x86/kernel/text.c
>  create mode 100644 kernel/text.c
>
> diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
> index 89e5d864e923..69bfacfd60ef 100644
> --- a/arch/arm/kernel/Makefile
> +++ b/arch/arm/kernel/Makefile
> @@ -19,7 +19,8 @@ CFLAGS_REMOVE_return_address.o = -pg
>  obj-y  := elf.o entry-common.o irq.o opcodes.o \
>process.o ptrace.o reboot.o \
>setup.o signal.o sigreturn_codes.o \
> -  stacktrace.o sys_arm.o time.o traps.o
> +  stacktrace.o sys_arm.o time.o traps.o \
> +  text.o
>
>  ifneq ($(CONFIG_ARM_UNWIND),y)
>  obj-$(CONFIG_FRAME_POINTER)+= return_address.o
> diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c
> index e15444b25ca0..13e3442a6b9f 100644
> --- a/arch/arm/kernel/modu

Re: [PATCH 0/7] sha1 library cleanup

2020-05-03 Thread Ard Biesheuvel
On Sat, 2 May 2020 at 20:28, Eric Biggers  wrote:
>
>  sounds very generic and important, like it's the
> header to include if you're doing cryptographic hashing in the kernel.
> But actually it only includes the library implementation of the SHA-1
> compression function (not even the full SHA-1).  This should basically
> never be used anymore; SHA-1 is no longer considered secure, and there
> are much better ways to do cryptographic hashing in the kernel.
>
> Also the function is named just "sha_transform()", which makes it
> unclear which version of SHA is meant.
>
> Therefore, this series cleans things up by moving these SHA-1
> declarations into  where they better belong, and changing
> the names to say SHA-1 rather than just SHA.
>
> As future work, we should split sha.h into sha1.h and sha2.h and try to
> remove the remaining uses of SHA-1.  For example, the remaining use in
> drivers/char/random.c is probably one that can be gotten rid of.
>
> This patch series applies to cryptodev/master.
>
> Eric Biggers (7):
>   mptcp: use SHA256_BLOCK_SIZE, not SHA_MESSAGE_BYTES
>   crypto: powerpc/sha1 - remove unused temporary workspace
>   crypto: powerpc/sha1 - prefix the "sha1_" functions
>   crypto: s390/sha1 - prefix the "sha1_" functions
>   crypto: lib/sha1 - rename "sha" to "sha1"
>   crypto: lib/sha1 - remove unnecessary includes of linux/cryptohash.h
>   crypto: lib/sha1 - fold linux/cryptohash.h into crypto/sha.h
>

For the series,

Acked-by: Ard Biesheuvel 

>  Documentation/security/siphash.rst  |  2 +-
>  arch/arm/crypto/sha1_glue.c |  1 -
>  arch/arm/crypto/sha1_neon_glue.c|  1 -
>  arch/arm/crypto/sha256_glue.c   |  1 -
>  arch/arm/crypto/sha256_neon_glue.c  |  1 -
>  arch/arm/kernel/armksyms.c  |  1 -
>  arch/arm64/crypto/sha256-glue.c |  1 -
>  arch/arm64/crypto/sha512-glue.c |  1 -
>  arch/microblaze/kernel/microblaze_ksyms.c   |  1 -
>  arch/mips/cavium-octeon/crypto/octeon-md5.c |  1 -
>  arch/powerpc/crypto/md5-glue.c  |  1 -
>  arch/powerpc/crypto/sha1-spe-glue.c |  1 -
>  arch/powerpc/crypto/sha1.c  | 33 ++---
>  arch/powerpc/crypto/sha256-spe-glue.c   |  1 -
>  arch/s390/crypto/sha1_s390.c| 12 
>  arch/sparc/crypto/md5_glue.c|  1 -
>  arch/sparc/crypto/sha1_glue.c   |  1 -
>  arch/sparc/crypto/sha256_glue.c |  1 -
>  arch/sparc/crypto/sha512_glue.c |  1 -
>  arch/unicore32/kernel/ksyms.c   |  1 -
>  arch/x86/crypto/sha1_ssse3_glue.c   |  1 -
>  arch/x86/crypto/sha256_ssse3_glue.c |  1 -
>  arch/x86/crypto/sha512_ssse3_glue.c |  1 -
>  crypto/sha1_generic.c   |  5 ++--
>  drivers/char/random.c   |  8 ++---
>  drivers/crypto/atmel-sha.c  |  1 -
>  drivers/crypto/chelsio/chcr_algo.c  |  1 -
>  drivers/crypto/chelsio/chcr_ipsec.c |  1 -
>  drivers/crypto/omap-sham.c  |  1 -
>  fs/f2fs/hash.c  |  1 -
>  include/crypto/sha.h| 10 +++
>  include/linux/cryptohash.h  | 14 -
>  include/linux/filter.h  |  4 +--
>  include/net/tcp.h   |  1 -
>  kernel/bpf/core.c   | 18 +--
>  lib/crypto/chacha.c |  1 -
>  lib/sha1.c  | 24 ---
>  net/core/secure_seq.c   |  1 -
>  net/ipv6/addrconf.c | 10 +++
>  net/ipv6/seg6_hmac.c|  1 -
>  net/mptcp/crypto.c  |  4 +--
>  41 files changed, 69 insertions(+), 104 deletions(-)
>  delete mode 100644 include/linux/cryptohash.h
>
>
> base-commit: 12b3cf9093542d9f752a4968815ece836159013f
> --
> 2.26.2
>


Re: [PATCH v2] ima: add a new CONFIG for loading arch-specific policies

2020-03-03 Thread Ard Biesheuvel
On Wed, 4 Mar 2020 at 03:34, Nayna Jain  wrote:
>
> Every time a new architecture defines the IMA architecture specific
> functions - arch_ima_get_secureboot() and arch_ima_get_policy(), the IMA
> include file needs to be updated. To avoid this "noise", this patch
> defines a new IMA Kconfig IMA_SECURE_AND_OR_TRUSTED_BOOT option, allowing
> the different architectures to select it.
>
> Suggested-by: Linus Torvalds 
> Signed-off-by: Nayna Jain 
> Cc: Ard Biesheuvel 
> Cc: Philipp Rudo 
> Cc: Michael Ellerman 

Acked-by: Ard Biesheuvel 

for the x86 bits, but I'm not an x86 maintainer. Also, you may need to
split this if you want to permit arch maintainers to pick up their
parts individually.


> ---
> v2:
> * Fixed the issue identified by Mimi. Thanks Mimi, Ard, Heiko and Michael for
> discussing the fix.
>
>  arch/powerpc/Kconfig   | 1 +
>  arch/s390/Kconfig  | 1 +
>  arch/x86/Kconfig   | 1 +
>  include/linux/ima.h| 3 +--
>  security/integrity/ima/Kconfig | 9 +
>  5 files changed, 13 insertions(+), 2 deletions(-)
>
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index 497b7d0b2d7e..a5cfde432983 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -979,6 +979,7 @@ config PPC_SECURE_BOOT
> bool
> depends on PPC_POWERNV
> depends on IMA_ARCH_POLICY
> +   select IMA_SECURE_AND_OR_TRUSTED_BOOT
> help
>   Systems with firmware secure boot enabled need to define security
>   policies to extend secure boot to the OS. This config allows a user
> diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
> index 8abe77536d9d..4a502fbcb800 100644
> --- a/arch/s390/Kconfig
> +++ b/arch/s390/Kconfig
> @@ -195,6 +195,7 @@ config S390
> select ARCH_HAS_FORCE_DMA_UNENCRYPTED
> select SWIOTLB
> select GENERIC_ALLOCATOR
> +   select IMA_SECURE_AND_OR_TRUSTED_BOOT if IMA_ARCH_POLICY
>
>
>  config SCHED_OMIT_FRAME_POINTER
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index beea77046f9b..7f5bfaf0cbd2 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -230,6 +230,7 @@ config X86
> select VIRT_TO_BUS
> select X86_FEATURE_NAMESif PROC_FS
> select PROC_PID_ARCH_STATUS if PROC_FS
> +   select IMA_SECURE_AND_OR_TRUSTED_BOOT   if EFI && IMA_ARCH_POLICY
>
>  config INSTRUCTION_DECODER
> def_bool y
> diff --git a/include/linux/ima.h b/include/linux/ima.h
> index 1659217e9b60..aefe758f4466 100644
> --- a/include/linux/ima.h
> +++ b/include/linux/ima.h
> @@ -30,8 +30,7 @@ extern void ima_kexec_cmdline(const void *buf, int size);
>  extern void ima_add_kexec_buffer(struct kimage *image);
>  #endif
>
> -#if (defined(CONFIG_X86) && defined(CONFIG_EFI)) || defined(CONFIG_S390) \
> -   || defined(CONFIG_PPC_SECURE_BOOT)
> +#ifdef CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT
>  extern bool arch_ima_get_secureboot(void);
>  extern const char * const *arch_get_ima_policy(void);
>  #else
> diff --git a/security/integrity/ima/Kconfig b/security/integrity/ima/Kconfig
> index 3f3ee4e2eb0d..d17972aa413a 100644
> --- a/security/integrity/ima/Kconfig
> +++ b/security/integrity/ima/Kconfig
> @@ -327,3 +327,12 @@ config IMA_QUEUE_EARLY_BOOT_KEYS
> depends on IMA_MEASURE_ASYMMETRIC_KEYS
> depends on SYSTEM_TRUSTED_KEYRING
> default y
> +
> +config IMA_SECURE_AND_OR_TRUSTED_BOOT
> +   bool
> +   depends on IMA
> +   depends on IMA_ARCH_POLICY

Doesn't the latter already depend on the former?

> +   default n
> +   help
> +  This option is selected by architectures to enable secure and/or
> +  trusted boot based on IMA runtime policies.
> --
> 2.13.6
>


Re: [PATCH] ima: add a new CONFIG for loading arch-specific policies

2020-03-02 Thread Ard Biesheuvel
On Mon, 2 Mar 2020 at 15:48, Mimi Zohar  wrote:
>
> On Wed, 2020-02-26 at 14:10 -0500, Nayna Jain wrote:
> > Every time a new architecture defines the IMA architecture specific
> > functions - arch_ima_get_secureboot() and arch_ima_get_policy(), the IMA
> > include file needs to be updated. To avoid this "noise", this patch
> > defines a new IMA Kconfig IMA_SECURE_AND_OR_TRUSTED_BOOT option, allowing
> > the different architectures to select it.
> >
> > Suggested-by: Linus Torvalds 
> > Signed-off-by: Nayna Jain 
> > Cc: Ard Biesheuvel 
> > Cc: Martin Schwidefsky 
> > Cc: Philipp Rudo 
> > Cc: Michael Ellerman 
> > ---
> >  arch/powerpc/Kconfig   | 2 +-
> >  arch/s390/Kconfig  | 1 +
> >  arch/x86/Kconfig   | 1 +
> >  include/linux/ima.h| 3 +--
> >  security/integrity/ima/Kconfig | 9 +
> >  5 files changed, 13 insertions(+), 3 deletions(-)
> >
> > diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> > index 497b7d0b2d7e..b8ce1b995633 100644
> > --- a/arch/powerpc/Kconfig
> > +++ b/arch/powerpc/Kconfig
> > @@ -246,6 +246,7 @@ config PPC
> >   select SYSCTL_EXCEPTION_TRACE
> >   select THREAD_INFO_IN_TASK
> >   select VIRT_TO_BUS  if !PPC64
> > + select IMA_SECURE_AND_OR_TRUSTED_BOOT   if PPC_SECURE_BOOT
> >   #
> >   # Please keep this list sorted alphabetically.
> >   #
> > @@ -978,7 +979,6 @@ config PPC_SECURE_BOOT
> >   prompt "Enable secure boot support"
> >   bool
> >   depends on PPC_POWERNV
> > - depends on IMA_ARCH_POLICY
> >   help
> > Systems with firmware secure boot enabled need to define security
> > policies to extend secure boot to the OS. This config allows a user
> > diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
> > index 8abe77536d9d..90ff3633ade6 100644
> > --- a/arch/s390/Kconfig
> > +++ b/arch/s390/Kconfig
> > @@ -195,6 +195,7 @@ config S390
> >   select ARCH_HAS_FORCE_DMA_UNENCRYPTED
> >   select SWIOTLB
> >   select GENERIC_ALLOCATOR
> > + select IMA_SECURE_AND_OR_TRUSTED_BOOT
> >
> >
> >  config SCHED_OMIT_FRAME_POINTER
> > diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> > index beea77046f9b..cafa66313fe2 100644
> > --- a/arch/x86/Kconfig
> > +++ b/arch/x86/Kconfig
> > @@ -230,6 +230,7 @@ config X86
> >   select VIRT_TO_BUS
> >   select X86_FEATURE_NAMESif PROC_FS
> >   select PROC_PID_ARCH_STATUS if PROC_FS
> > + select IMA_SECURE_AND_OR_TRUSTED_BOOT   if EFI
>
> Not everyone is interested in enabling IMA or requiring IMA runtime
> policies.  With this patch, enabling IMA_ARCH_POLICY is therefore
> still left up to the person building the kernel.  As a result, I'm
> seeing the following warning, which is kind of cool.
>
> WARNING: unmet direct dependencies detected for
> IMA_SECURE_AND_OR_TRUSTED_BOOT
>   Depends on [n]: INTEGRITY [=y] && IMA [=y] && IMA_ARCH_POLICY [=n]
>   Selected by [y]:
>   - X86 [=y] && EFI [=y]
>
> Ard, Michael, Martin, just making sure this type of warning is
> acceptable before upstreaming this patch.  I would appreciate your
> tags.
>

Ehm, no, warnings like these are not really acceptable. It means there
is an inconsistency in the way the Kconfig dependencies are defined.

Does this help:

  select IMA_SECURE_AND_OR_TRUSTED_BOOT   if EFI && IMA_ARCH_POLICY

?


>
> >
> >  config INSTRUCTION_DECODER
> >   def_bool y
> > diff --git a/include/linux/ima.h b/include/linux/ima.h
> > index 1659217e9b60..aefe758f4466 100644
> > --- a/include/linux/ima.h
> > +++ b/include/linux/ima.h
> > @@ -30,8 +30,7 @@ extern void ima_kexec_cmdline(const void *buf, int size);
> >  extern void ima_add_kexec_buffer(struct kimage *image);
> >  #endif
> >
> > -#if (defined(CONFIG_X86) && defined(CONFIG_EFI)) || defined(CONFIG_S390) \
> > - || defined(CONFIG_PPC_SECURE_BOOT)
> > +#ifdef CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT
> >  extern bool arch_ima_get_secureboot(void);
> >  extern const char * const *arch_get_ima_policy(void);
> >  #else
> > diff --git a/security/integrity/ima/Kconfig b/security/integrity/ima/Kconfig
> > index 3f3ee4e2eb0d..d17972aa413a 100644
> > --- a/security/integrity/ima/Kconfig
> > +++ b/security/integrity/ima/Kconfig
> > @@ -327,3 +327,12 @@ config IMA_QUEUE_EARLY_BOOT_KEYS
> >   depends on IMA_MEASURE_ASYMMETRIC_KEYS
> >   depends on SYSTEM_TRUSTED_KEYRING
> >   default y
> > +
> > +config IMA_SECURE_AND_OR_TRUSTED_BOOT
> > + bool
> > + depends on IMA
> > + depends on IMA_ARCH_POLICY
> > + default n
> > + help
> > +This option is selected by architectures to enable secure and/or
> > +trusted boot based on IMA runtime policies.
>
>
>
>


[PATCH] powerpc/archrandom: fix arch_get_random_seed_int()

2019-12-04 Thread Ard Biesheuvel
Commit 01c9348c7620ec65

  powerpc: Use hardware RNG for arch_get_random_seed_* not arch_get_random_*

updated arch_get_random_[int|long]() to be NOPs, and moved the hardware
RNG backing to arch_get_random_seed_[int|long]() instead. However, it
failed to take into account that arch_get_random_int() was implemented
in terms of arch_get_random_long(), and so we ended up with a version
of the former that is essentially a NOP as well.

Fix this by calling arch_get_random_seed_long() from
arch_get_random_seed_int() instead.

Fixes: 01c9348c7620ec65 ("powerpc: Use hardware RNG for arch_get_random_seed_* 
not arch_get_random_*")
Signed-off-by: Ard Biesheuvel 
---
 arch/powerpc/include/asm/archrandom.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/archrandom.h 
b/arch/powerpc/include/asm/archrandom.h
index 9c63b596e6ce..a09595f00cab 100644
--- a/arch/powerpc/include/asm/archrandom.h
+++ b/arch/powerpc/include/asm/archrandom.h
@@ -28,7 +28,7 @@ static inline int arch_get_random_seed_int(unsigned int *v)
unsigned long val;
int rc;
 
-   rc = arch_get_random_long(&val);
+   rc = arch_get_random_seed_long(&val);
if (rc)
*v = val;
 
-- 
2.17.1



Re: [PATCH 0/6] Improvements for random.h/archrandom.h

2019-10-29 Thread Ard Biesheuvel
On Mon, 28 Oct 2019 at 22:06, Richard Henderson
 wrote:
>
> During patch review for an addition of archrandom.h for arm64,
> it was suggeted that the arch_random_get_* functions should be
> marked __must_check.  Which does sound like a good idea, since
> the by-reference integer output may be uninitialized when the
> boolean result is false.
>
> In addition, I noticed a few other minor inconsistencies between
> the different architectures: x86 defines some functional macros
> outside CONFIG_ARCH_RANDOM, and powerpc isn't using bool.
>
>
> r~
>
>
> Richard Henderson (6):
>   random: Mark CONFIG_ARCH_RANDOM functions __must_check
>   x86: Move arch_has_random* inside CONFIG_ARCH_RANDOM
>   x86: Mark archrandom.h functions __must_check
>   powerpc: Use bool in archrandom.h
>   powerpc: Mark archrandom.h functions __must_check
>   s390x: Mark archrandom.h functions __must_check
>

Modulo the nit in reply to 1/6:

Reviewed-by: Ard Biesheuvel 


>  arch/powerpc/include/asm/archrandom.h | 24 +---
>  arch/s390/include/asm/archrandom.h|  8 
>  arch/x86/include/asm/archrandom.h | 24 
>  include/linux/random.h|  8 
>  4 files changed, 33 insertions(+), 31 deletions(-)
>
> --
> 2.17.1
>
>
> ___
> linux-arm-kernel mailing list
> linux-arm-ker...@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel


Re: [PATCH 1/6] random: Mark CONFIG_ARCH_RANDOM functions __must_check

2019-10-29 Thread Ard Biesheuvel
On Mon, 28 Oct 2019 at 22:06, Richard Henderson
 wrote:
>
> We cannot use the pointer output without validating the
> success of the random read.
>
> Signed-off-by: Richard Henderson 
> ---
> Cc: Kees Cook 
> Cc: "H. Peter Anvin" 
> Cc: linux-a...@vger.kernel.org
> ---
>  include/linux/random.h | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/include/linux/random.h b/include/linux/random.h
> index f189c927fdea..84947b489649 100644
> --- a/include/linux/random.h
> +++ b/include/linux/random.h
> @@ -167,11 +167,11 @@ static inline void prandom_seed_state(struct rnd_state 
> *state, u64 seed)
>  #ifdef CONFIG_ARCH_RANDOM
>  # include 
>  #else
> -static inline bool arch_get_random_long(unsigned long *v)
> +static inline bool __must_check arch_get_random_long(unsigned long *v)
>  {
> return 0;

For symmetry with the other cleanups, you should probably change these
into 'return false' as well

>  }
> -static inline bool arch_get_random_int(unsigned int *v)
> +static inline bool __must_check arch_get_random_int(unsigned int *v)
>  {
> return 0;
>  }
> @@ -179,11 +179,11 @@ static inline bool arch_has_random(void)
>  {
> return 0;
>  }
> -static inline bool arch_get_random_seed_long(unsigned long *v)
> +static inline bool __must_check arch_get_random_seed_long(unsigned long *v)
>  {
> return 0;
>  }
> -static inline bool arch_get_random_seed_int(unsigned int *v)
> +static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
>  {
> return 0;
>  }
> --
> 2.17.1
>
>
> ___
> linux-arm-kernel mailing list
> linux-arm-ker...@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel


Re: [PATCH v2 0/3] crypto: powerpc - convert SPE AES algorithms to skcipher API

2019-10-15 Thread Ard Biesheuvel
On Tue, 15 Oct 2019 at 04:45, Eric Biggers  wrote:
>
> This series converts the glue code for the PowerPC SPE implementations
> of AES-ECB, AES-CBC, AES-CTR, and AES-XTS from the deprecated
> "blkcipher" API to the "skcipher" API.  This is needed in order for the
> blkcipher API to be removed.
>
> Patch 1-2 are fixes.  Patch 3 is the actual conversion.
>
> Tested with:
>
> export ARCH=powerpc CROSS_COMPILE=powerpc-linux-gnu-
> make mpc85xx_defconfig
> cat >> .config << EOF
> # CONFIG_MODULES is not set
> # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
> CONFIG_DEBUG_KERNEL=y
> CONFIG_CRYPTO_MANAGER_EXTRA_TESTS=y
> CONFIG_CRYPTO_AES=y
> CONFIG_CRYPTO_CBC=y
> CONFIG_CRYPTO_CTR=y
> CONFIG_CRYPTO_ECB=y
> CONFIG_CRYPTO_XTS=y
> CONFIG_CRYPTO_AES_PPC_SPE=y
> EOF
> make olddefconfig
> make -j32
> qemu-system-ppc -M mpc8544ds -cpu e500 -nographic \
> -kernel arch/powerpc/boot/zImage \
> -append cryptomgr.fuzz_iterations=1000
>
> Note that xts-ppc-spe still fails the comparison tests due to the lack
> of ciphertext stealing support.  This is not addressed by this series.
>
> Changed since v1:
>
> - Split fixes into separate patches.
>
> - Made ppc_aes_setkey_skcipher() call ppc_aes_setkey(), rather than
>   creating a separate expand_key() function.  This keeps the code
>   shorter.
>
> Eric Biggers (3):
>   crypto: powerpc - don't unnecessarily use atomic scatterwalk
>   crypto: powerpc - don't set ivsize for AES-ECB
>   crypto: powerpc - convert SPE AES algorithms to skcipher API
>

For the series

Reviewed-by: Ard Biesheuvel 
Tested-by: Ard Biesheuvel 


>  arch/powerpc/crypto/aes-spe-glue.c | 389 -
>  crypto/Kconfig |   1 +
>  2 files changed, 166 insertions(+), 224 deletions(-)
>
> --
> 2.23.0
>


Re: [PATCH] crypto: powerpc - convert SPE AES algorithms to skcipher API

2019-10-14 Thread Ard Biesheuvel
On Mon, 14 Oct 2019 at 19:38, Eric Biggers  wrote:
>
> On Mon, Oct 14, 2019 at 10:45:22AM +0200, Ard Biesheuvel wrote:
> > Hi Eric,
> >
> > On Sat, 12 Oct 2019 at 04:32, Eric Biggers  wrote:
> > >
> > > From: Eric Biggers 
> > >
> > > Convert the glue code for the PowerPC SPE implementations of AES-ECB,
> > > AES-CBC, AES-CTR, and AES-XTS from the deprecated "blkcipher" API to the
> > > "skcipher" API.
> > >
> > > Tested with:
> > >
> > > export ARCH=powerpc CROSS_COMPILE=powerpc-linux-gnu-
> > > make mpc85xx_defconfig
> > > cat >> .config << EOF
> > > # CONFIG_MODULES is not set
> > > # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
> > > CONFIG_DEBUG_KERNEL=y
> > > CONFIG_CRYPTO_MANAGER_EXTRA_TESTS=y
> > > CONFIG_CRYPTO_AES=y
> > > CONFIG_CRYPTO_CBC=y
> > > CONFIG_CRYPTO_CTR=y
> > > CONFIG_CRYPTO_ECB=y
> > > CONFIG_CRYPTO_XTS=y
> > > CONFIG_CRYPTO_AES_PPC_SPE=y
> > > EOF
> > > make olddefconfig
> > > make -j32
> > > qemu-system-ppc -M mpc8544ds -cpu e500 -nographic \
> > > -kernel arch/powerpc/boot/zImage \
> > > -append cryptomgr.fuzz_iterations=1000
> > >
> > > Note that xts-ppc-spe still fails the comparison tests due to the lack
> > > of ciphertext stealing support.  This is not addressed by this patch.
> > >
> > > Signed-off-by: Eric Biggers 
> > > ---
> > >  arch/powerpc/crypto/aes-spe-glue.c | 416 +
> > >  crypto/Kconfig |   1 +
> > >  2 files changed, 186 insertions(+), 231 deletions(-)
> > >
> > > diff --git a/arch/powerpc/crypto/aes-spe-glue.c 
> > > b/arch/powerpc/crypto/aes-spe-glue.c
> > > index 3a4ca7d32477..374e3e51e998 100644
> > > --- a/arch/powerpc/crypto/aes-spe-glue.c
> > > +++ b/arch/powerpc/crypto/aes-spe-glue.c
> > > @@ -17,6 +17,7 @@
> > >  #include 
> > >  #include 
> > >  #include 
> > > +#include 
> > >  #include 
> > >
> > >  /*
> > > @@ -86,17 +87,13 @@ static void spe_end(void)
> > > preempt_enable();
> > >  }
> > >
> > > -static int ppc_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key,
> > > -   unsigned int key_len)
> > > +static int expand_key(struct ppc_aes_ctx *ctx,
> > > + const u8 *in_key, unsigned int key_len)
> > >  {
> > > -   struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
> > > -
> > > if (key_len != AES_KEYSIZE_128 &&
> > > key_len != AES_KEYSIZE_192 &&
> > > -   key_len != AES_KEYSIZE_256) {
> > > -   tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
> > > +   key_len != AES_KEYSIZE_256)
> > > return -EINVAL;
> > > -   }
> > >
> > > switch (key_len) {
> > > case AES_KEYSIZE_128:
> > > @@ -114,17 +111,40 @@ static int ppc_aes_setkey(struct crypto_tfm *tfm, 
> > > const u8 *in_key,
> > > }
> > >
> > > ppc_generate_decrypt_key(ctx->key_dec, ctx->key_enc, key_len);
> > > +   return 0;
> > > +}
> > >
> > > +static int ppc_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key,
> > > +   unsigned int key_len)
> > > +{
> > > +   struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
> > > +
> > > +   if (expand_key(ctx, in_key, key_len) != 0) {
> > > +   tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
> > > +   return -EINVAL;
> > > +   }
> > > +   return 0;
> > > +}
> > > +
> > > +static int ppc_aes_setkey_skcipher(struct crypto_skcipher *tfm,
> > > +  const u8 *in_key, unsigned int key_len)
> > > +{
> > > +   struct ppc_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
> > > +
> > > +   if (expand_key(ctx, in_key, key_len) != 0) {
> > > +   crypto_skcipher_set_flags(tfm, 
> > > CRYPTO_TFM_RES_BAD_KEY_LEN);
> > > +   return -EINVAL;
> > > +   }
> > > return 0;
> > >  }
> > >
> > > -sta

  1   2   3   >