Update to commit 1b929c02afd3 ("Linux 6.2-rc1"). Signed-off-by: Avihai Horon <avih...@nvidia.com> Reviewed-by: Cédric Le Goater <c...@redhat.com> Reviewed-by: Michael S. Tsirkin <m...@redhat.com> --- include/standard-headers/drm/drm_fourcc.h | 63 +++- include/standard-headers/linux/ethtool.h | 81 ++++- include/standard-headers/linux/fuse.h | 20 +- .../linux/input-event-codes.h | 4 + include/standard-headers/linux/pci_regs.h | 2 + include/standard-headers/linux/virtio_blk.h | 19 ++ include/standard-headers/linux/virtio_bt.h | 8 + include/standard-headers/linux/virtio_net.h | 4 + linux-headers/asm-arm64/kvm.h | 1 + linux-headers/asm-generic/hugetlb_encode.h | 26 +- linux-headers/asm-generic/mman-common.h | 2 + linux-headers/asm-mips/mman.h | 2 + linux-headers/asm-riscv/kvm.h | 7 + linux-headers/asm-x86/kvm.h | 11 +- linux-headers/linux/kvm.h | 32 +- linux-headers/linux/psci.h | 14 + linux-headers/linux/userfaultfd.h | 4 + linux-headers/linux/vfio.h | 278 +++++++++++++++++- 18 files changed, 522 insertions(+), 56 deletions(-)
diff --git a/include/standard-headers/drm/drm_fourcc.h b/include/standard-headers/drm/drm_fourcc.h index 48b620cbef..69cab17b38 100644 --- a/include/standard-headers/drm/drm_fourcc.h +++ b/include/standard-headers/drm/drm_fourcc.h @@ -98,18 +98,42 @@ extern "C" { #define DRM_FORMAT_INVALID 0 /* color index */ +#define DRM_FORMAT_C1 fourcc_code('C', '1', ' ', ' ') /* [7:0] C0:C1:C2:C3:C4:C5:C6:C7 1:1:1:1:1:1:1:1 eight pixels/byte */ +#define DRM_FORMAT_C2 fourcc_code('C', '2', ' ', ' ') /* [7:0] C0:C1:C2:C3 2:2:2:2 four pixels/byte */ +#define DRM_FORMAT_C4 fourcc_code('C', '4', ' ', ' ') /* [7:0] C0:C1 4:4 two pixels/byte */ #define DRM_FORMAT_C8 fourcc_code('C', '8', ' ', ' ') /* [7:0] C */ -/* 8 bpp Red */ +/* 1 bpp Darkness (inverse relationship between channel value and brightness) */ +#define DRM_FORMAT_D1 fourcc_code('D', '1', ' ', ' ') /* [7:0] D0:D1:D2:D3:D4:D5:D6:D7 1:1:1:1:1:1:1:1 eight pixels/byte */ + +/* 2 bpp Darkness (inverse relationship between channel value and brightness) */ +#define DRM_FORMAT_D2 fourcc_code('D', '2', ' ', ' ') /* [7:0] D0:D1:D2:D3 2:2:2:2 four pixels/byte */ + +/* 4 bpp Darkness (inverse relationship between channel value and brightness) */ +#define DRM_FORMAT_D4 fourcc_code('D', '4', ' ', ' ') /* [7:0] D0:D1 4:4 two pixels/byte */ + +/* 8 bpp Darkness (inverse relationship between channel value and brightness) */ +#define DRM_FORMAT_D8 fourcc_code('D', '8', ' ', ' ') /* [7:0] D */ + +/* 1 bpp Red (direct relationship between channel value and brightness) */ +#define DRM_FORMAT_R1 fourcc_code('R', '1', ' ', ' ') /* [7:0] R0:R1:R2:R3:R4:R5:R6:R7 1:1:1:1:1:1:1:1 eight pixels/byte */ + +/* 2 bpp Red (direct relationship between channel value and brightness) */ +#define DRM_FORMAT_R2 fourcc_code('R', '2', ' ', ' ') /* [7:0] R0:R1:R2:R3 2:2:2:2 four pixels/byte */ + +/* 4 bpp Red (direct relationship between channel value and brightness) */ +#define DRM_FORMAT_R4 fourcc_code('R', '4', ' ', ' ') /* [7:0] R0:R1 4:4 two pixels/byte */ + +/* 8 bpp Red (direct relationship between channel value and brightness) */ #define DRM_FORMAT_R8 fourcc_code('R', '8', ' ', ' ') /* [7:0] R */ -/* 10 bpp Red */ +/* 10 bpp Red (direct relationship between channel value and brightness) */ #define DRM_FORMAT_R10 fourcc_code('R', '1', '0', ' ') /* [15:0] x:R 6:10 little endian */ -/* 12 bpp Red */ +/* 12 bpp Red (direct relationship between channel value and brightness) */ #define DRM_FORMAT_R12 fourcc_code('R', '1', '2', ' ') /* [15:0] x:R 4:12 little endian */ -/* 16 bpp Red */ +/* 16 bpp Red (direct relationship between channel value and brightness) */ #define DRM_FORMAT_R16 fourcc_code('R', '1', '6', ' ') /* [15:0] R little endian */ /* 16 bpp RG */ @@ -204,7 +228,9 @@ extern "C" { #define DRM_FORMAT_VYUY fourcc_code('V', 'Y', 'U', 'Y') /* [31:0] Y1:Cb0:Y0:Cr0 8:8:8:8 little endian */ #define DRM_FORMAT_AYUV fourcc_code('A', 'Y', 'U', 'V') /* [31:0] A:Y:Cb:Cr 8:8:8:8 little endian */ +#define DRM_FORMAT_AVUY8888 fourcc_code('A', 'V', 'U', 'Y') /* [31:0] A:Cr:Cb:Y 8:8:8:8 little endian */ #define DRM_FORMAT_XYUV8888 fourcc_code('X', 'Y', 'U', 'V') /* [31:0] X:Y:Cb:Cr 8:8:8:8 little endian */ +#define DRM_FORMAT_XVUY8888 fourcc_code('X', 'V', 'U', 'Y') /* [31:0] X:Cr:Cb:Y 8:8:8:8 little endian */ #define DRM_FORMAT_VUY888 fourcc_code('V', 'U', '2', '4') /* [23:0] Cr:Cb:Y 8:8:8 little endian */ #define DRM_FORMAT_VUY101010 fourcc_code('V', 'U', '3', '0') /* Y followed by U then V, 10:10:10. Non-linear modifier only */ @@ -717,6 +743,35 @@ extern "C" { */ #define DRM_FORMAT_MOD_VIVANTE_SPLIT_SUPER_TILED fourcc_mod_code(VIVANTE, 4) +/* + * Vivante TS (tile-status) buffer modifiers. They can be combined with all of + * the color buffer tiling modifiers defined above. When TS is present it's a + * separate buffer containing the clear/compression status of each tile. The + * modifiers are defined as VIVANTE_MOD_TS_c_s, where c is the color buffer + * tile size in bytes covered by one entry in the status buffer and s is the + * number of status bits per entry. + * We reserve the top 8 bits of the Vivante modifier space for tile status + * clear/compression modifiers, as future cores might add some more TS layout + * variations. + */ +#define VIVANTE_MOD_TS_64_4 (1ULL << 48) +#define VIVANTE_MOD_TS_64_2 (2ULL << 48) +#define VIVANTE_MOD_TS_128_4 (3ULL << 48) +#define VIVANTE_MOD_TS_256_4 (4ULL << 48) +#define VIVANTE_MOD_TS_MASK (0xfULL << 48) + +/* + * Vivante compression modifiers. Those depend on a TS modifier being present + * as the TS bits get reinterpreted as compression tags instead of simple + * clear markers when compression is enabled. + */ +#define VIVANTE_MOD_COMP_DEC400 (1ULL << 52) +#define VIVANTE_MOD_COMP_MASK (0xfULL << 52) + +/* Masking out the extension bits will yield the base modifier. */ +#define VIVANTE_MOD_EXT_MASK (VIVANTE_MOD_TS_MASK | \ + VIVANTE_MOD_COMP_MASK) + /* NVIDIA frame buffer modifiers */ /* diff --git a/include/standard-headers/linux/ethtool.h b/include/standard-headers/linux/ethtool.h index 4537da20cc..87176ab075 100644 --- a/include/standard-headers/linux/ethtool.h +++ b/include/standard-headers/linux/ethtool.h @@ -159,8 +159,10 @@ static inline uint32_t ethtool_cmd_speed(const struct ethtool_cmd *ep) * in its bus driver structure (e.g. pci_driver::name). Must * not be an empty string. * @version: Driver version string; may be an empty string - * @fw_version: Firmware version string; may be an empty string - * @erom_version: Expansion ROM version string; may be an empty string + * @fw_version: Firmware version string; driver defined; may be an + * empty string + * @erom_version: Expansion ROM version string; driver defined; may be + * an empty string * @bus_info: Device bus address. This should match the dev_name() * string for the underlying bus device, if there is one. May be * an empty string. @@ -179,10 +181,6 @@ static inline uint32_t ethtool_cmd_speed(const struct ethtool_cmd *ep) * * Users can use the %ETHTOOL_GSSET_INFO command to get the number of * strings in any string set (from Linux 2.6.34). - * - * Drivers should set at most @driver, @version, @fw_version and - * @bus_info in their get_drvinfo() implementation. The ethtool - * core fills in the other fields using other driver operations. */ struct ethtool_drvinfo { uint32_t cmd; @@ -736,6 +734,51 @@ enum ethtool_module_power_mode { ETHTOOL_MODULE_POWER_MODE_HIGH, }; +/** + * enum ethtool_podl_pse_admin_state - operational state of the PoDL PSE + * functions. IEEE 802.3-2018 30.15.1.1.2 aPoDLPSEAdminState + * @ETHTOOL_PODL_PSE_ADMIN_STATE_UNKNOWN: state of PoDL PSE functions are + * unknown + * @ETHTOOL_PODL_PSE_ADMIN_STATE_DISABLED: PoDL PSE functions are disabled + * @ETHTOOL_PODL_PSE_ADMIN_STATE_ENABLED: PoDL PSE functions are enabled + */ +enum ethtool_podl_pse_admin_state { + ETHTOOL_PODL_PSE_ADMIN_STATE_UNKNOWN = 1, + ETHTOOL_PODL_PSE_ADMIN_STATE_DISABLED, + ETHTOOL_PODL_PSE_ADMIN_STATE_ENABLED, +}; + +/** + * enum ethtool_podl_pse_pw_d_status - power detection status of the PoDL PSE. + * IEEE 802.3-2018 30.15.1.1.3 aPoDLPSEPowerDetectionStatus: + * @ETHTOOL_PODL_PSE_PW_D_STATUS_UNKNOWN: PoDL PSE + * @ETHTOOL_PODL_PSE_PW_D_STATUS_DISABLED: "The enumeration “disabled” is + * asserted true when the PoDL PSE state diagram variable mr_pse_enable is + * false" + * @ETHTOOL_PODL_PSE_PW_D_STATUS_SEARCHING: "The enumeration “searching” is + * asserted true when either of the PSE state diagram variables + * pi_detecting or pi_classifying is true." + * @ETHTOOL_PODL_PSE_PW_D_STATUS_DELIVERING: "The enumeration “deliveringPower” + * is asserted true when the PoDL PSE state diagram variable pi_powered is + * true." + * @ETHTOOL_PODL_PSE_PW_D_STATUS_SLEEP: "The enumeration “sleep” is asserted + * true when the PoDL PSE state diagram variable pi_sleeping is true." + * @ETHTOOL_PODL_PSE_PW_D_STATUS_IDLE: "The enumeration “idle” is asserted true + * when the logical combination of the PoDL PSE state diagram variables + * pi_prebiased*!pi_sleeping is true." + * @ETHTOOL_PODL_PSE_PW_D_STATUS_ERROR: "The enumeration “error” is asserted + * true when the PoDL PSE state diagram variable overload_held is true." + */ +enum ethtool_podl_pse_pw_d_status { + ETHTOOL_PODL_PSE_PW_D_STATUS_UNKNOWN = 1, + ETHTOOL_PODL_PSE_PW_D_STATUS_DISABLED, + ETHTOOL_PODL_PSE_PW_D_STATUS_SEARCHING, + ETHTOOL_PODL_PSE_PW_D_STATUS_DELIVERING, + ETHTOOL_PODL_PSE_PW_D_STATUS_SLEEP, + ETHTOOL_PODL_PSE_PW_D_STATUS_IDLE, + ETHTOOL_PODL_PSE_PW_D_STATUS_ERROR, +}; + /** * struct ethtool_gstrings - string set for data tagging * @cmd: Command number = %ETHTOOL_GSTRINGS @@ -1692,6 +1735,13 @@ enum ethtool_link_mode_bit_indices { ETHTOOL_LINK_MODE_100baseFX_Half_BIT = 90, ETHTOOL_LINK_MODE_100baseFX_Full_BIT = 91, ETHTOOL_LINK_MODE_10baseT1L_Full_BIT = 92, + ETHTOOL_LINK_MODE_800000baseCR8_Full_BIT = 93, + ETHTOOL_LINK_MODE_800000baseKR8_Full_BIT = 94, + ETHTOOL_LINK_MODE_800000baseDR8_Full_BIT = 95, + ETHTOOL_LINK_MODE_800000baseDR8_2_Full_BIT = 96, + ETHTOOL_LINK_MODE_800000baseSR8_Full_BIT = 97, + ETHTOOL_LINK_MODE_800000baseVR8_Full_BIT = 98, + /* must be last entry */ __ETHTOOL_LINK_MODE_MASK_NBITS }; @@ -1803,6 +1853,7 @@ enum ethtool_link_mode_bit_indices { #define SPEED_100000 100000 #define SPEED_200000 200000 #define SPEED_400000 400000 +#define SPEED_800000 800000 #define SPEED_UNKNOWN -1 @@ -1840,6 +1891,20 @@ static inline int ethtool_validate_duplex(uint8_t duplex) #define MASTER_SLAVE_STATE_SLAVE 3 #define MASTER_SLAVE_STATE_ERR 4 +/* These are used to throttle the rate of data on the phy interface when the + * native speed of the interface is higher than the link speed. These should + * not be used for phy interfaces which natively support multiple speeds (e.g. + * MII or SGMII). + */ +/* No rate matching performed. */ +#define RATE_MATCH_NONE 0 +/* The phy sends pause frames to throttle the MAC. */ +#define RATE_MATCH_PAUSE 1 +/* The phy asserts CRS to prevent the MAC from transmitting. */ +#define RATE_MATCH_CRS 2 +/* The MAC is programmed with a sufficiently-large IPG. */ +#define RATE_MATCH_OPEN_LOOP 3 + /* Which connector port. */ #define PORT_TP 0x00 #define PORT_AUI 0x01 @@ -2033,8 +2098,8 @@ enum ethtool_reset_flags { * reported consistently by PHYLIB. Read-only. * @master_slave_cfg: Master/slave port mode. * @master_slave_state: Master/slave port state. + * @rate_matching: Rate adaptation performed by the PHY * @reserved: Reserved for future use; see the note on reserved space. - * @reserved1: Reserved for future use; see the note on reserved space. * @link_mode_masks: Variable length bitmaps. * * If autonegotiation is disabled, the speed and @duplex represent the @@ -2085,7 +2150,7 @@ struct ethtool_link_settings { uint8_t transceiver; uint8_t master_slave_cfg; uint8_t master_slave_state; - uint8_t reserved1[1]; + uint8_t rate_matching; uint32_t reserved[7]; uint32_t link_mode_masks[]; /* layout of link_mode_masks fields: diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h index bda06258be..a1af78d989 100644 --- a/include/standard-headers/linux/fuse.h +++ b/include/standard-headers/linux/fuse.h @@ -194,6 +194,13 @@ * - add FUSE_SECURITY_CTX init flag * - add security context to create, mkdir, symlink, and mknod requests * - add FUSE_HAS_INODE_DAX, FUSE_ATTR_DAX + * + * 7.37 + * - add FUSE_TMPFILE + * + * 7.38 + * - add FUSE_EXPIRE_ONLY flag to fuse_notify_inval_entry + * - add FOPEN_PARALLEL_DIRECT_WRITES */ #ifndef _LINUX_FUSE_H @@ -225,7 +232,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 36 +#define FUSE_KERNEL_MINOR_VERSION 38 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -297,6 +304,7 @@ struct fuse_file_lock { * FOPEN_CACHE_DIR: allow caching this directory * FOPEN_STREAM: the file is stream-like (no file position at all) * FOPEN_NOFLUSH: don't flush data cache on close (unless FUSE_WRITEBACK_CACHE) + * FOPEN_PARALLEL_DIRECT_WRITES: Allow concurrent direct writes on the same inode */ #define FOPEN_DIRECT_IO (1 << 0) #define FOPEN_KEEP_CACHE (1 << 1) @@ -304,6 +312,7 @@ struct fuse_file_lock { #define FOPEN_CACHE_DIR (1 << 3) #define FOPEN_STREAM (1 << 4) #define FOPEN_NOFLUSH (1 << 5) +#define FOPEN_PARALLEL_DIRECT_WRITES (1 << 6) /** * INIT request/reply flags @@ -484,6 +493,12 @@ struct fuse_file_lock { */ #define FUSE_SETXATTR_ACL_KILL_SGID (1 << 0) +/** + * notify_inval_entry flags + * FUSE_EXPIRE_ONLY + */ +#define FUSE_EXPIRE_ONLY (1 << 0) + enum fuse_opcode { FUSE_LOOKUP = 1, FUSE_FORGET = 2, /* no reply */ @@ -533,6 +548,7 @@ enum fuse_opcode { FUSE_SETUPMAPPING = 48, FUSE_REMOVEMAPPING = 49, FUSE_SYNCFS = 50, + FUSE_TMPFILE = 51, /* CUSE specific operations */ CUSE_INIT = 4096, @@ -911,7 +927,7 @@ struct fuse_notify_inval_inode_out { struct fuse_notify_inval_entry_out { uint64_t parent; uint32_t namelen; - uint32_t padding; + uint32_t flags; }; struct fuse_notify_delete_out { diff --git a/include/standard-headers/linux/input-event-codes.h b/include/standard-headers/linux/input-event-codes.h index 50790aee5a..f6bab08540 100644 --- a/include/standard-headers/linux/input-event-codes.h +++ b/include/standard-headers/linux/input-event-codes.h @@ -614,6 +614,9 @@ #define KEY_KBD_LAYOUT_NEXT 0x248 /* AC Next Keyboard Layout Select */ #define KEY_EMOJI_PICKER 0x249 /* Show/hide emoji picker (HUTRR101) */ #define KEY_DICTATE 0x24a /* Start or Stop Voice Dictation Session (HUTRR99) */ +#define KEY_CAMERA_ACCESS_ENABLE 0x24b /* Enables programmatic access to camera devices. (HUTRR72) */ +#define KEY_CAMERA_ACCESS_DISABLE 0x24c /* Disables programmatic access to camera devices. (HUTRR72) */ +#define KEY_CAMERA_ACCESS_TOGGLE 0x24d /* Toggles the current state of the camera access control. (HUTRR72) */ #define KEY_BRIGHTNESS_MIN 0x250 /* Set Brightness to Minimum */ #define KEY_BRIGHTNESS_MAX 0x251 /* Set Brightness to Maximum */ @@ -862,6 +865,7 @@ #define ABS_TOOL_WIDTH 0x1c #define ABS_VOLUME 0x20 +#define ABS_PROFILE 0x21 #define ABS_MISC 0x28 diff --git a/include/standard-headers/linux/pci_regs.h b/include/standard-headers/linux/pci_regs.h index 57b8e2ffb1..85ab127881 100644 --- a/include/standard-headers/linux/pci_regs.h +++ b/include/standard-headers/linux/pci_regs.h @@ -1058,6 +1058,7 @@ /* Precision Time Measurement */ #define PCI_PTM_CAP 0x04 /* PTM Capability */ #define PCI_PTM_CAP_REQ 0x00000001 /* Requester capable */ +#define PCI_PTM_CAP_RES 0x00000002 /* Responder capable */ #define PCI_PTM_CAP_ROOT 0x00000004 /* Root capable */ #define PCI_PTM_GRANULARITY_MASK 0x0000FF00 /* Clock granularity */ #define PCI_PTM_CTRL 0x08 /* PTM Control */ @@ -1119,6 +1120,7 @@ #define PCI_DOE_STATUS_DATA_OBJECT_READY 0x80000000 /* Data Object Ready */ #define PCI_DOE_WRITE 0x10 /* DOE Write Data Mailbox Register */ #define PCI_DOE_READ 0x14 /* DOE Read Data Mailbox Register */ +#define PCI_DOE_CAP_SIZEOF 0x18 /* Size of DOE register block */ /* DOE Data Object - note not actually registers */ #define PCI_DOE_DATA_OBJECT_HEADER_1_VID 0x0000ffff diff --git a/include/standard-headers/linux/virtio_blk.h b/include/standard-headers/linux/virtio_blk.h index 2dcc90826a..e81715cd70 100644 --- a/include/standard-headers/linux/virtio_blk.h +++ b/include/standard-headers/linux/virtio_blk.h @@ -40,6 +40,7 @@ #define VIRTIO_BLK_F_MQ 12 /* support more than one vq */ #define VIRTIO_BLK_F_DISCARD 13 /* DISCARD is supported */ #define VIRTIO_BLK_F_WRITE_ZEROES 14 /* WRITE ZEROES is supported */ +#define VIRTIO_BLK_F_SECURE_ERASE 16 /* Secure Erase is supported */ /* Legacy feature bits */ #ifndef VIRTIO_BLK_NO_LEGACY @@ -119,6 +120,21 @@ struct virtio_blk_config { uint8_t write_zeroes_may_unmap; uint8_t unused1[3]; + + /* the next 3 entries are guarded by VIRTIO_BLK_F_SECURE_ERASE */ + /* + * The maximum secure erase sectors (in 512-byte sectors) for + * one segment. + */ + __virtio32 max_secure_erase_sectors; + /* + * The maximum number of secure erase segments in a + * secure erase command. + */ + __virtio32 max_secure_erase_seg; + /* Secure erase commands must be aligned to this number of sectors. */ + __virtio32 secure_erase_sector_alignment; + } QEMU_PACKED; /* @@ -153,6 +169,9 @@ struct virtio_blk_config { /* Write zeroes command */ #define VIRTIO_BLK_T_WRITE_ZEROES 13 +/* Secure erase command */ +#define VIRTIO_BLK_T_SECURE_ERASE 14 + #ifndef VIRTIO_BLK_NO_LEGACY /* Barrier before this op. */ #define VIRTIO_BLK_T_BARRIER 0x80000000 diff --git a/include/standard-headers/linux/virtio_bt.h b/include/standard-headers/linux/virtio_bt.h index 245e1eff4b..a11ecc3f92 100644 --- a/include/standard-headers/linux/virtio_bt.h +++ b/include/standard-headers/linux/virtio_bt.h @@ -9,6 +9,7 @@ #define VIRTIO_BT_F_VND_HCI 0 /* Indicates vendor command support */ #define VIRTIO_BT_F_MSFT_EXT 1 /* Indicates MSFT vendor support */ #define VIRTIO_BT_F_AOSP_EXT 2 /* Indicates AOSP vendor support */ +#define VIRTIO_BT_F_CONFIG_V2 3 /* Use second version configuration */ enum virtio_bt_config_type { VIRTIO_BT_CONFIG_TYPE_PRIMARY = 0, @@ -28,4 +29,11 @@ struct virtio_bt_config { uint16_t msft_opcode; } QEMU_PACKED; +struct virtio_bt_config_v2 { + uint8_t type; + uint8_t alignment; + uint16_t vendor; + uint16_t msft_opcode; +}; + #endif /* _LINUX_VIRTIO_BT_H */ diff --git a/include/standard-headers/linux/virtio_net.h b/include/standard-headers/linux/virtio_net.h index 42c68caf71..c0e797067a 100644 --- a/include/standard-headers/linux/virtio_net.h +++ b/include/standard-headers/linux/virtio_net.h @@ -57,6 +57,9 @@ * Steering */ #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ #define VIRTIO_NET_F_NOTF_COAL 53 /* Device supports notifications coalescing */ +#define VIRTIO_NET_F_GUEST_USO4 54 /* Guest can handle USOv4 in. */ +#define VIRTIO_NET_F_GUEST_USO6 55 /* Guest can handle USOv6 in. */ +#define VIRTIO_NET_F_HOST_USO 56 /* Host can handle USO in. */ #define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */ #define VIRTIO_NET_F_RSS 60 /* Supports RSS RX steering */ #define VIRTIO_NET_F_RSC_EXT 61 /* extended coalescing info */ @@ -130,6 +133,7 @@ struct virtio_net_hdr_v1 { #define VIRTIO_NET_HDR_GSO_TCPV4 1 /* GSO frame, IPv4 TCP (TSO) */ #define VIRTIO_NET_HDR_GSO_UDP 3 /* GSO frame, IPv4 UDP (UFO) */ #define VIRTIO_NET_HDR_GSO_TCPV6 4 /* GSO frame, IPv6 TCP */ +#define VIRTIO_NET_HDR_GSO_UDP_L4 5 /* GSO frame, IPv4& IPv6 UDP (USO) */ #define VIRTIO_NET_HDR_GSO_ECN 0x80 /* TCP has ECN set */ uint8_t gso_type; __virtio16 hdr_len; /* Ethernet + IP + tcp/udp hdrs */ diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h index 4bf2d7246e..a7cfefb3a8 100644 --- a/linux-headers/asm-arm64/kvm.h +++ b/linux-headers/asm-arm64/kvm.h @@ -43,6 +43,7 @@ #define __KVM_HAVE_VCPU_EVENTS #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 +#define KVM_DIRTY_LOG_PAGE_OFFSET 64 #define KVM_REG_SIZE(id) \ (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) diff --git a/linux-headers/asm-generic/hugetlb_encode.h b/linux-headers/asm-generic/hugetlb_encode.h index 4f3d5aaa11..de687009bf 100644 --- a/linux-headers/asm-generic/hugetlb_encode.h +++ b/linux-headers/asm-generic/hugetlb_encode.h @@ -20,18 +20,18 @@ #define HUGETLB_FLAG_ENCODE_SHIFT 26 #define HUGETLB_FLAG_ENCODE_MASK 0x3f -#define HUGETLB_FLAG_ENCODE_16KB (14 << HUGETLB_FLAG_ENCODE_SHIFT) -#define HUGETLB_FLAG_ENCODE_64KB (16 << HUGETLB_FLAG_ENCODE_SHIFT) -#define HUGETLB_FLAG_ENCODE_512KB (19 << HUGETLB_FLAG_ENCODE_SHIFT) -#define HUGETLB_FLAG_ENCODE_1MB (20 << HUGETLB_FLAG_ENCODE_SHIFT) -#define HUGETLB_FLAG_ENCODE_2MB (21 << HUGETLB_FLAG_ENCODE_SHIFT) -#define HUGETLB_FLAG_ENCODE_8MB (23 << HUGETLB_FLAG_ENCODE_SHIFT) -#define HUGETLB_FLAG_ENCODE_16MB (24 << HUGETLB_FLAG_ENCODE_SHIFT) -#define HUGETLB_FLAG_ENCODE_32MB (25 << HUGETLB_FLAG_ENCODE_SHIFT) -#define HUGETLB_FLAG_ENCODE_256MB (28 << HUGETLB_FLAG_ENCODE_SHIFT) -#define HUGETLB_FLAG_ENCODE_512MB (29 << HUGETLB_FLAG_ENCODE_SHIFT) -#define HUGETLB_FLAG_ENCODE_1GB (30 << HUGETLB_FLAG_ENCODE_SHIFT) -#define HUGETLB_FLAG_ENCODE_2GB (31 << HUGETLB_FLAG_ENCODE_SHIFT) -#define HUGETLB_FLAG_ENCODE_16GB (34 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_16KB (14U << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_64KB (16U << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_512KB (19U << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_1MB (20U << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_2MB (21U << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_8MB (23U << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_16MB (24U << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_32MB (25U << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_256MB (28U << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_512MB (29U << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_1GB (30U << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_2GB (31U << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_16GB (34U << HUGETLB_FLAG_ENCODE_SHIFT) #endif /* _ASM_GENERIC_HUGETLB_ENCODE_H_ */ diff --git a/linux-headers/asm-generic/mman-common.h b/linux-headers/asm-generic/mman-common.h index 6c1aa92a92..6ce1f1ceb4 100644 --- a/linux-headers/asm-generic/mman-common.h +++ b/linux-headers/asm-generic/mman-common.h @@ -77,6 +77,8 @@ #define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */ +#define MADV_COLLAPSE 25 /* Synchronous hugepage collapse */ + /* compatibility flags */ #define MAP_FILE 0 diff --git a/linux-headers/asm-mips/mman.h b/linux-headers/asm-mips/mman.h index 1be428663c..c6e1fc77c9 100644 --- a/linux-headers/asm-mips/mman.h +++ b/linux-headers/asm-mips/mman.h @@ -103,6 +103,8 @@ #define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */ +#define MADV_COLLAPSE 25 /* Synchronous hugepage collapse */ + /* compatibility flags */ #define MAP_FILE 0 diff --git a/linux-headers/asm-riscv/kvm.h b/linux-headers/asm-riscv/kvm.h index 7351417afd..92af6f3f05 100644 --- a/linux-headers/asm-riscv/kvm.h +++ b/linux-headers/asm-riscv/kvm.h @@ -48,6 +48,10 @@ struct kvm_sregs { /* CONFIG registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ struct kvm_riscv_config { unsigned long isa; + unsigned long zicbom_block_size; + unsigned long mvendorid; + unsigned long marchid; + unsigned long mimpid; }; /* CORE registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ @@ -98,6 +102,9 @@ enum KVM_RISCV_ISA_EXT_ID { KVM_RISCV_ISA_EXT_M, KVM_RISCV_ISA_EXT_SVPBMT, KVM_RISCV_ISA_EXT_SSTC, + KVM_RISCV_ISA_EXT_SVINVAL, + KVM_RISCV_ISA_EXT_ZIHINTPAUSE, + KVM_RISCV_ISA_EXT_ZICBOM, KVM_RISCV_ISA_EXT_MAX, }; diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h index 46de10a809..2747d2ce14 100644 --- a/linux-headers/asm-x86/kvm.h +++ b/linux-headers/asm-x86/kvm.h @@ -53,14 +53,6 @@ /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 -struct kvm_memory_alias { - __u32 slot; /* this has a different namespace than memory slots */ - __u32 flags; - __u64 guest_phys_addr; - __u64 memory_size; - __u64 target_phys_addr; -}; - /* for KVM_GET_IRQCHIP and KVM_SET_IRQCHIP */ struct kvm_pic_state { __u8 last_irr; /* edge detection */ @@ -214,6 +206,8 @@ struct kvm_msr_list { struct kvm_msr_filter_range { #define KVM_MSR_FILTER_READ (1 << 0) #define KVM_MSR_FILTER_WRITE (1 << 1) +#define KVM_MSR_FILTER_RANGE_VALID_MASK (KVM_MSR_FILTER_READ | \ + KVM_MSR_FILTER_WRITE) __u32 flags; __u32 nmsrs; /* number of msrs in bitmap */ __u32 base; /* MSR index the bitmap starts at */ @@ -224,6 +218,7 @@ struct kvm_msr_filter_range { struct kvm_msr_filter { #define KVM_MSR_FILTER_DEFAULT_ALLOW (0 << 0) #define KVM_MSR_FILTER_DEFAULT_DENY (1 << 0) +#define KVM_MSR_FILTER_VALID_MASK (KVM_MSR_FILTER_DEFAULT_DENY) __u32 flags; struct kvm_msr_filter_range ranges[KVM_MSR_FILTER_MAX_RANGES]; }; diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index ebdafa576d..30b2795d10 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -86,14 +86,6 @@ struct kvm_debug_guest { /* *** End of deprecated interfaces *** */ -/* for KVM_CREATE_MEMORY_REGION */ -struct kvm_memory_region { - __u32 slot; - __u32 flags; - __u64 guest_phys_addr; - __u64 memory_size; /* bytes */ -}; - /* for KVM_SET_USER_MEMORY_REGION */ struct kvm_userspace_memory_region { __u32 slot; @@ -104,9 +96,9 @@ struct kvm_userspace_memory_region { }; /* - * The bit 0 ~ bit 15 of kvm_memory_region::flags are visible for userspace, - * other bits are reserved for kvm internal use which are defined in - * include/linux/kvm_host.h. + * The bit 0 ~ bit 15 of kvm_userspace_memory_region::flags are visible for + * userspace, other bits are reserved for kvm internal use which are defined + * in include/linux/kvm_host.h. */ #define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0) #define KVM_MEM_READONLY (1UL << 1) @@ -483,6 +475,9 @@ struct kvm_run { #define KVM_MSR_EXIT_REASON_INVAL (1 << 0) #define KVM_MSR_EXIT_REASON_UNKNOWN (1 << 1) #define KVM_MSR_EXIT_REASON_FILTER (1 << 2) +#define KVM_MSR_EXIT_REASON_VALID_MASK (KVM_MSR_EXIT_REASON_INVAL | \ + KVM_MSR_EXIT_REASON_UNKNOWN | \ + KVM_MSR_EXIT_REASON_FILTER) __u32 reason; /* kernel -> user */ __u32 index; /* kernel -> user */ __u64 data; /* kernel <-> user */ @@ -1175,6 +1170,9 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_VM_DISABLE_NX_HUGE_PAGES 220 #define KVM_CAP_S390_ZPCI_OP 221 #define KVM_CAP_S390_CPU_TOPOLOGY 222 +#define KVM_CAP_DIRTY_LOG_RING_ACQ_REL 223 +#define KVM_CAP_S390_PROTECTED_ASYNC_DISABLE 224 +#define KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP 225 #ifdef KVM_CAP_IRQ_ROUTING @@ -1264,6 +1262,7 @@ struct kvm_x86_mce { #define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3) #define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4) #define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5) +#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6) struct kvm_xen_hvm_config { __u32 flags; @@ -1434,18 +1433,12 @@ struct kvm_vfio_spapr_tce { __s32 tablefd; }; -/* - * ioctls for VM fds - */ -#define KVM_SET_MEMORY_REGION _IOW(KVMIO, 0x40, struct kvm_memory_region) /* * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns * a vcpu fd. */ #define KVM_CREATE_VCPU _IO(KVMIO, 0x41) #define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log) -/* KVM_SET_MEMORY_ALIAS is obsolete: */ -#define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias) #define KVM_SET_NR_MMU_PAGES _IO(KVMIO, 0x44) #define KVM_GET_NR_MMU_PAGES _IO(KVMIO, 0x45) #define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46, \ @@ -1737,6 +1730,8 @@ enum pv_cmd_id { KVM_PV_UNSHARE_ALL, KVM_PV_INFO, KVM_PV_DUMP, + KVM_PV_ASYNC_CLEANUP_PREPARE, + KVM_PV_ASYNC_CLEANUP_PERFORM, }; struct kvm_pv_cmd { @@ -1767,6 +1762,7 @@ struct kvm_xen_hvm_attr { union { __u8 long_mode; __u8 vector; + __u8 runstate_update_flag; struct { __u64 gfn; } shared_info; @@ -1807,6 +1803,8 @@ struct kvm_xen_hvm_attr { /* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ #define KVM_XEN_ATTR_TYPE_EVTCHN 0x3 #define KVM_XEN_ATTR_TYPE_XEN_VERSION 0x4 +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG */ +#define KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG 0x5 /* Per-vCPU Xen attributes */ #define KVM_XEN_VCPU_GET_ATTR _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr) diff --git a/linux-headers/linux/psci.h b/linux-headers/linux/psci.h index 213b2a0f70..e60dfd8907 100644 --- a/linux-headers/linux/psci.h +++ b/linux-headers/linux/psci.h @@ -48,12 +48,26 @@ #define PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU PSCI_0_2_FN64(7) #define PSCI_1_0_FN_PSCI_FEATURES PSCI_0_2_FN(10) +#define PSCI_1_0_FN_CPU_FREEZE PSCI_0_2_FN(11) +#define PSCI_1_0_FN_CPU_DEFAULT_SUSPEND PSCI_0_2_FN(12) +#define PSCI_1_0_FN_NODE_HW_STATE PSCI_0_2_FN(13) #define PSCI_1_0_FN_SYSTEM_SUSPEND PSCI_0_2_FN(14) #define PSCI_1_0_FN_SET_SUSPEND_MODE PSCI_0_2_FN(15) +#define PSCI_1_0_FN_STAT_RESIDENCY PSCI_0_2_FN(16) +#define PSCI_1_0_FN_STAT_COUNT PSCI_0_2_FN(17) + #define PSCI_1_1_FN_SYSTEM_RESET2 PSCI_0_2_FN(18) +#define PSCI_1_1_FN_MEM_PROTECT PSCI_0_2_FN(19) +#define PSCI_1_1_FN_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN(19) +#define PSCI_1_0_FN64_CPU_DEFAULT_SUSPEND PSCI_0_2_FN64(12) +#define PSCI_1_0_FN64_NODE_HW_STATE PSCI_0_2_FN64(13) #define PSCI_1_0_FN64_SYSTEM_SUSPEND PSCI_0_2_FN64(14) +#define PSCI_1_0_FN64_STAT_RESIDENCY PSCI_0_2_FN64(16) +#define PSCI_1_0_FN64_STAT_COUNT PSCI_0_2_FN64(17) + #define PSCI_1_1_FN64_SYSTEM_RESET2 PSCI_0_2_FN64(18) +#define PSCI_1_1_FN64_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN64(19) /* PSCI v0.2 power state encoding for CPU_SUSPEND function */ #define PSCI_0_2_POWER_STATE_ID_MASK 0xffff diff --git a/linux-headers/linux/userfaultfd.h b/linux-headers/linux/userfaultfd.h index a3a377cd44..ba5d0df52f 100644 --- a/linux-headers/linux/userfaultfd.h +++ b/linux-headers/linux/userfaultfd.h @@ -12,6 +12,10 @@ #include <linux/types.h> +/* ioctls for /dev/userfaultfd */ +#define USERFAULTFD_IOC 0xAA +#define USERFAULTFD_IOC_NEW _IO(USERFAULTFD_IOC, 0x00) + /* * If the UFFDIO_API is upgraded someday, the UFFDIO_UNREGISTER and * UFFDIO_WAKE ioctls should be defined as _IOW and not as _IOR. In diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h index ede44b5572..c59692ce0b 100644 --- a/linux-headers/linux/vfio.h +++ b/linux-headers/linux/vfio.h @@ -819,12 +819,20 @@ struct vfio_device_feature { * VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P means that RUNNING_P2P * is supported in addition to the STOP_COPY states. * + * VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_PRE_COPY means that + * PRE_COPY is supported in addition to the STOP_COPY states. + * + * VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P | VFIO_MIGRATION_PRE_COPY + * means that RUNNING_P2P, PRE_COPY and PRE_COPY_P2P are supported + * in addition to the STOP_COPY states. + * * Other combinations of flags have behavior to be defined in the future. */ struct vfio_device_feature_migration { __aligned_u64 flags; #define VFIO_MIGRATION_STOP_COPY (1 << 0) #define VFIO_MIGRATION_P2P (1 << 1) +#define VFIO_MIGRATION_PRE_COPY (1 << 2) }; #define VFIO_DEVICE_FEATURE_MIGRATION 1 @@ -875,8 +883,13 @@ struct vfio_device_feature_mig_state { * RESUMING - The device is stopped and is loading a new internal state * ERROR - The device has failed and must be reset * - * And 1 optional state to support VFIO_MIGRATION_P2P: + * And optional states to support VFIO_MIGRATION_P2P: * RUNNING_P2P - RUNNING, except the device cannot do peer to peer DMA + * And VFIO_MIGRATION_PRE_COPY: + * PRE_COPY - The device is running normally but tracking internal state + * changes + * And VFIO_MIGRATION_P2P | VFIO_MIGRATION_PRE_COPY: + * PRE_COPY_P2P - PRE_COPY, except the device cannot do peer to peer DMA * * The FSM takes actions on the arcs between FSM states. The driver implements * the following behavior for the FSM arcs: @@ -908,20 +921,48 @@ struct vfio_device_feature_mig_state { * * To abort a RESUMING session the device must be reset. * + * PRE_COPY -> RUNNING * RUNNING_P2P -> RUNNING * While in RUNNING the device is fully operational, the device may generate * interrupts, DMA, respond to MMIO, all vfio device regions are functional, * and the device may advance its internal state. * + * The PRE_COPY arc will terminate a data transfer session. + * + * PRE_COPY_P2P -> RUNNING_P2P * RUNNING -> RUNNING_P2P * STOP -> RUNNING_P2P * While in RUNNING_P2P the device is partially running in the P2P quiescent * state defined below. * + * The PRE_COPY_P2P arc will terminate a data transfer session. + * + * RUNNING -> PRE_COPY + * RUNNING_P2P -> PRE_COPY_P2P * STOP -> STOP_COPY - * This arc begin the process of saving the device state and will return a - * new data_fd. + * PRE_COPY, PRE_COPY_P2P and STOP_COPY form the "saving group" of states + * which share a data transfer session. Moving between these states alters + * what is streamed in session, but does not terminate or otherwise affect + * the associated fd. + * + * These arcs begin the process of saving the device state and will return a + * new data_fd. The migration driver may perform actions such as enabling + * dirty logging of device state when entering PRE_COPY or PER_COPY_P2P. + * + * Each arc does not change the device operation, the device remains + * RUNNING, P2P quiesced or in STOP. The STOP_COPY state is described below + * in PRE_COPY_P2P -> STOP_COPY. + * + * PRE_COPY -> PRE_COPY_P2P + * Entering PRE_COPY_P2P continues all the behaviors of PRE_COPY above. + * However, while in the PRE_COPY_P2P state, the device is partially running + * in the P2P quiescent state defined below, like RUNNING_P2P. + * + * PRE_COPY_P2P -> PRE_COPY + * This arc allows returning the device to a full RUNNING behavior while + * continuing all the behaviors of PRE_COPY. * + * PRE_COPY_P2P -> STOP_COPY * While in the STOP_COPY state the device has the same behavior as STOP * with the addition that the data transfers session continues to stream the * migration state. End of stream on the FD indicates the entire device @@ -939,6 +980,13 @@ struct vfio_device_feature_mig_state { * device state for this arc if required to prepare the device to receive the * migration data. * + * STOP_COPY -> PRE_COPY + * STOP_COPY -> PRE_COPY_P2P + * These arcs are not permitted and return error if requested. Future + * revisions of this API may define behaviors for these arcs, in this case + * support will be discoverable by a new flag in + * VFIO_DEVICE_FEATURE_MIGRATION. + * * any -> ERROR * ERROR cannot be specified as a device state, however any transition request * can be failed with an errno return and may then move the device_state into @@ -950,7 +998,7 @@ struct vfio_device_feature_mig_state { * The optional peer to peer (P2P) quiescent state is intended to be a quiescent * state for the device for the purposes of managing multiple devices within a * user context where peer-to-peer DMA between devices may be active. The - * RUNNING_P2P states must prevent the device from initiating + * RUNNING_P2P and PRE_COPY_P2P states must prevent the device from initiating * any new P2P DMA transactions. If the device can identify P2P transactions * then it can stop only P2P DMA, otherwise it must stop all DMA. The migration * driver must complete any such outstanding operations prior to completing the @@ -963,6 +1011,8 @@ struct vfio_device_feature_mig_state { * above FSM arcs. As there are multiple paths through the FSM arcs the path * should be selected based on the following rules: * - Select the shortest path. + * - The path cannot have saving group states as interior arcs, only + * starting/end states. * Refer to vfio_mig_get_next_state() for the result of the algorithm. * * The automatic transit through the FSM arcs that make up the combination @@ -976,6 +1026,9 @@ struct vfio_device_feature_mig_state { * support them. The user can discover if these states are supported by using * VFIO_DEVICE_FEATURE_MIGRATION. By using combination transitions the user can * avoid knowing about these optional states if the kernel driver supports them. + * + * Arcs touching PRE_COPY and PRE_COPY_P2P are removed if support for PRE_COPY + * is not present. */ enum vfio_device_mig_state { VFIO_DEVICE_STATE_ERROR = 0, @@ -984,8 +1037,225 @@ enum vfio_device_mig_state { VFIO_DEVICE_STATE_STOP_COPY = 3, VFIO_DEVICE_STATE_RESUMING = 4, VFIO_DEVICE_STATE_RUNNING_P2P = 5, + VFIO_DEVICE_STATE_PRE_COPY = 6, + VFIO_DEVICE_STATE_PRE_COPY_P2P = 7, +}; + +/** + * VFIO_MIG_GET_PRECOPY_INFO - _IO(VFIO_TYPE, VFIO_BASE + 21) + * + * This ioctl is used on the migration data FD in the precopy phase of the + * migration data transfer. It returns an estimate of the current data sizes + * remaining to be transferred. It allows the user to judge when it is + * appropriate to leave PRE_COPY for STOP_COPY. + * + * This ioctl is valid only in PRE_COPY states and kernel driver should + * return -EINVAL from any other migration state. + * + * The vfio_precopy_info data structure returned by this ioctl provides + * estimates of data available from the device during the PRE_COPY states. + * This estimate is split into two categories, initial_bytes and + * dirty_bytes. + * + * The initial_bytes field indicates the amount of initial precopy + * data available from the device. This field should have a non-zero initial + * value and decrease as migration data is read from the device. + * It is recommended to leave PRE_COPY for STOP_COPY only after this field + * reaches zero. Leaving PRE_COPY earlier might make things slower. + * + * The dirty_bytes field tracks device state changes relative to data + * previously retrieved. This field starts at zero and may increase as + * the internal device state is modified or decrease as that modified + * state is read from the device. + * + * Userspace may use the combination of these fields to estimate the + * potential data size available during the PRE_COPY phases, as well as + * trends relative to the rate the device is dirtying its internal + * state, but these fields are not required to have any bearing relative + * to the data size available during the STOP_COPY phase. + * + * Drivers have a lot of flexibility in when and what they transfer during the + * PRE_COPY phase, and how they report this from VFIO_MIG_GET_PRECOPY_INFO. + * + * During pre-copy the migration data FD has a temporary "end of stream" that is + * reached when both initial_bytes and dirty_byte are zero. For instance, this + * may indicate that the device is idle and not currently dirtying any internal + * state. When read() is done on this temporary end of stream the kernel driver + * should return ENOMSG from read(). Userspace can wait for more data (which may + * never come) by using poll. + * + * Once in STOP_COPY the migration data FD has a permanent end of stream + * signaled in the usual way by read() always returning 0 and poll always + * returning readable. ENOMSG may not be returned in STOP_COPY. + * Support for this ioctl is mandatory if a driver claims to support + * VFIO_MIGRATION_PRE_COPY. + * + * Return: 0 on success, -1 and errno set on failure. + */ +struct vfio_precopy_info { + __u32 argsz; + __u32 flags; + __aligned_u64 initial_bytes; + __aligned_u64 dirty_bytes; +}; + +#define VFIO_MIG_GET_PRECOPY_INFO _IO(VFIO_TYPE, VFIO_BASE + 21) + +/* + * Upon VFIO_DEVICE_FEATURE_SET, allow the device to be moved into a low power + * state with the platform-based power management. Device use of lower power + * states depends on factors managed by the runtime power management core, + * including system level support and coordinating support among dependent + * devices. Enabling device low power entry does not guarantee lower power + * usage by the device, nor is a mechanism provided through this feature to + * know the current power state of the device. If any device access happens + * (either from the host or through the vfio uAPI) when the device is in the + * low power state, then the host will move the device out of the low power + * state as necessary prior to the access. Once the access is completed, the + * device may re-enter the low power state. For single shot low power support + * with wake-up notification, see + * VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP below. Access to mmap'd + * device regions is disabled on LOW_POWER_ENTRY and may only be resumed after + * calling LOW_POWER_EXIT. + */ +#define VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY 3 + +/* + * This device feature has the same behavior as + * VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY with the exception that the user + * provides an eventfd for wake-up notification. When the device moves out of + * the low power state for the wake-up, the host will not allow the device to + * re-enter a low power state without a subsequent user call to one of the low + * power entry device feature IOCTLs. Access to mmap'd device regions is + * disabled on LOW_POWER_ENTRY_WITH_WAKEUP and may only be resumed after the + * low power exit. The low power exit can happen either through LOW_POWER_EXIT + * or through any other access (where the wake-up notification has been + * generated). The access to mmap'd device regions will not trigger low power + * exit. + * + * The notification through the provided eventfd will be generated only when + * the device has entered and is resumed from a low power state after + * calling this device feature IOCTL. A device that has not entered low power + * state, as managed through the runtime power management core, will not + * generate a notification through the provided eventfd on access. Calling the + * LOW_POWER_EXIT feature is optional in the case where notification has been + * signaled on the provided eventfd that a resume from low power has occurred. + */ +struct vfio_device_low_power_entry_with_wakeup { + __s32 wakeup_eventfd; + __u32 reserved; +}; + +#define VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP 4 + +/* + * Upon VFIO_DEVICE_FEATURE_SET, disallow use of device low power states as + * previously enabled via VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY or + * VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP device features. + * This device feature IOCTL may itself generate a wakeup eventfd notification + * in the latter case if the device had previously entered a low power state. + */ +#define VFIO_DEVICE_FEATURE_LOW_POWER_EXIT 5 + +/* + * Upon VFIO_DEVICE_FEATURE_SET start/stop device DMA logging. + * VFIO_DEVICE_FEATURE_PROBE can be used to detect if the device supports + * DMA logging. + * + * DMA logging allows a device to internally record what DMAs the device is + * initiating and report them back to userspace. It is part of the VFIO + * migration infrastructure that allows implementing dirty page tracking + * during the pre copy phase of live migration. Only DMA WRITEs are logged, + * and this API is not connected to VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE. + * + * When DMA logging is started a range of IOVAs to monitor is provided and the + * device can optimize its logging to cover only the IOVA range given. Each + * DMA that the device initiates inside the range will be logged by the device + * for later retrieval. + * + * page_size is an input that hints what tracking granularity the device + * should try to achieve. If the device cannot do the hinted page size then + * it's the driver choice which page size to pick based on its support. + * On output the device will return the page size it selected. + * + * ranges is a pointer to an array of + * struct vfio_device_feature_dma_logging_range. + * + * The core kernel code guarantees to support by minimum num_ranges that fit + * into a single kernel page. User space can try higher values but should give + * up if the above can't be achieved as of some driver limitations. + * + * A single call to start device DMA logging can be issued and a matching stop + * should follow at the end. Another start is not allowed in the meantime. + */ +struct vfio_device_feature_dma_logging_control { + __aligned_u64 page_size; + __u32 num_ranges; + __u32 __reserved; + __aligned_u64 ranges; }; +struct vfio_device_feature_dma_logging_range { + __aligned_u64 iova; + __aligned_u64 length; +}; + +#define VFIO_DEVICE_FEATURE_DMA_LOGGING_START 6 + +/* + * Upon VFIO_DEVICE_FEATURE_SET stop device DMA logging that was started + * by VFIO_DEVICE_FEATURE_DMA_LOGGING_START + */ +#define VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP 7 + +/* + * Upon VFIO_DEVICE_FEATURE_GET read back and clear the device DMA log + * + * Query the device's DMA log for written pages within the given IOVA range. + * During querying the log is cleared for the IOVA range. + * + * bitmap is a pointer to an array of u64s that will hold the output bitmap + * with 1 bit reporting a page_size unit of IOVA. The mapping of IOVA to bits + * is given by: + * bitmap[(addr - iova)/page_size] & (1ULL << (addr % 64)) + * + * The input page_size can be any power of two value and does not have to + * match the value given to VFIO_DEVICE_FEATURE_DMA_LOGGING_START. The driver + * will format its internal logging to match the reporting page size, possibly + * by replicating bits if the internal page size is lower than requested. + * + * The LOGGING_REPORT will only set bits in the bitmap and never clear or + * perform any initialization of the user provided bitmap. + * + * If any error is returned userspace should assume that the dirty log is + * corrupted. Error recovery is to consider all memory dirty and try to + * restart the dirty tracking, or to abort/restart the whole migration. + * + * If DMA logging is not enabled, an error will be returned. + * + */ +struct vfio_device_feature_dma_logging_report { + __aligned_u64 iova; + __aligned_u64 length; + __aligned_u64 page_size; + __aligned_u64 bitmap; +}; + +#define VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT 8 + +/* + * Upon VFIO_DEVICE_FEATURE_GET read back the estimated data length that will + * be required to complete stop copy. + * + * Note: Can be called on each device state. + */ + +struct vfio_device_feature_mig_data_size { + __aligned_u64 stop_copy_length; +}; + +#define VFIO_DEVICE_FEATURE_MIG_DATA_SIZE 9 + /* -------- API for Type1 VFIO IOMMU -------- */ /** -- 2.26.3