Calling a function with 10 arguments is inefficient on many architectures: x86-64 allows for up to 6 register parameters, Arm64 for up to 8. Everything else needs passing on the stack, i.e. forcing the compiler to emit stack manipulation insns at every call site.
Shrinks generated code on x86 (with gcc15) by over 250 bytes. The gains on Arm64 are a little less. Signed-off-by: Jan Beulich <[email protected]> --- Why is it, btw, that the declarations live in xen/vpci.h? These functions aren't supposed to be called from outside xen/drivers/vpci/, are they? In which case their decls may better live in a private header? --- a/xen/drivers/vpci/vpci.c +++ b/xen/drivers/vpci/vpci.c @@ -573,6 +573,14 @@ int vpci_add_register_mask(struct vpci * return 0; } +int vpci_add_register(struct vpci *vpci, vpci_read_t *read_handler, + vpci_write_t *write_handler, unsigned int offset, + unsigned int size, void *data) +{ + return vpci_add_register_mask(vpci, read_handler, write_handler, offset, + size, data, 0, 0, 0, 0); +} + int vpci_remove_registers(struct vpci *vpci, unsigned int start, unsigned int size) { --- a/xen/include/xen/vpci.h +++ b/xen/include/xen/vpci.h @@ -60,15 +60,11 @@ int __must_check vpci_add_register_mask( void *data, uint32_t ro_mask, uint32_t rw1c_mask, uint32_t rsvdp_mask, uint32_t rsvdz_mask); -static inline int __must_check vpci_add_register(struct vpci *vpci, - vpci_read_t *read_handler, - vpci_write_t *write_handler, - unsigned int offset, - unsigned int size, void *data) -{ - return vpci_add_register_mask(vpci, read_handler, write_handler, offset, - size, data, 0, 0, 0, 0); -} +int __must_check vpci_add_register(struct vpci *vpci, + vpci_read_t *read_handler, + vpci_write_t *write_handler, + unsigned int offset, unsigned int size, + void *data); int vpci_remove_registers(struct vpci *vpci, unsigned int start, unsigned int size);
