From 8dd765a5d769c521d73931850d1c8708fbc490cb Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 16 Oct 2023 12:41:24 +0530 Subject: xen: Make struct privcmd_irqfd's layout architecture independent Using indirect pointers in an ioctl command argument means that the layout is architecture specific, in particular we can't use the same one from 32-bit compat tasks. The general recommendation is to have __u64 members and use u64_to_user_ptr() to access it from the kernel if we are unable to avoid the pointers altogether. Fixes: f8941e6c4c71 ("xen: privcmd: Add support for irqfd") Reported-by: Arnd Bergmann Closes: https://lore.kernel.org/all/268a2031-63b8-4c7d-b1e5-8ab83ca80b4a@app.fastmail.com/ Signed-off-by: Viresh Kumar Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/a4ef0d4a68fc858b34a81fd3f9877d9b6898eb77.1697439990.git.viresh.kumar@linaro.org Signed-off-by: Juergen Gross --- include/uapi/xen/privcmd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/xen/privcmd.h b/include/uapi/xen/privcmd.h index 375718ba4ab6..b143fafce84d 100644 --- a/include/uapi/xen/privcmd.h +++ b/include/uapi/xen/privcmd.h @@ -102,7 +102,7 @@ struct privcmd_mmap_resource { #define PRIVCMD_IRQFD_FLAG_DEASSIGN (1 << 0) struct privcmd_irqfd { - void __user *dm_op; + __u64 dm_op; __u32 size; /* Size of structure pointed by dm_op */ __u32 fd; __u32 flags; -- cgit v1.2.3 From 767e33ca47dd8ace7769e0b0c19d7b0c38b2f72d Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 16 Oct 2023 12:41:25 +0530 Subject: xen: irqfd: Use _IOW instead of the internal _IOC() macro _IOC() an internal helper that we should not use in driver code. In particular, we got the data direction wrong here, which breaks a number of tools, as having "_IOC_NONE" should never be paired with a nonzero size. Use _IOW() instead. Fixes: f8941e6c4c71 ("xen: privcmd: Add support for irqfd") Reported-by: Arnd Bergmann Closes: https://lore.kernel.org/all/268a2031-63b8-4c7d-b1e5-8ab83ca80b4a@app.fastmail.com/ Signed-off-by: Viresh Kumar Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/599ca6f1b9dd2f0e6247ea37bee3ea6827404b6d.1697439990.git.viresh.kumar@linaro.org Signed-off-by: Juergen Gross --- include/uapi/xen/privcmd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/xen/privcmd.h b/include/uapi/xen/privcmd.h index b143fafce84d..e145bca5105c 100644 --- a/include/uapi/xen/privcmd.h +++ b/include/uapi/xen/privcmd.h @@ -138,6 +138,6 @@ struct privcmd_irqfd { #define IOCTL_PRIVCMD_MMAP_RESOURCE \ _IOC(_IOC_NONE, 'P', 7, sizeof(struct privcmd_mmap_resource)) #define IOCTL_PRIVCMD_IRQFD \ - _IOC(_IOC_NONE, 'P', 8, sizeof(struct privcmd_irqfd)) + _IOW('P', 8, struct privcmd_irqfd) #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */ -- cgit v1.2.3 From f0d7db7b33243d2aeeff14dbdde4ccc0687ef257 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 16 Oct 2023 12:41:27 +0530 Subject: xen: privcmd: Add support for ioeventfd Virtio guests send VIRTIO_MMIO_QUEUE_NOTIFY notification when they need to notify the backend of an update to the status of the virtqueue. The backend or another entity, polls the MMIO address for updates to know when the notification is sent. It works well if the backend does this polling by itself. But as we move towards generic backend implementations, we end up implementing this in a separate user-space program. Generally, the Virtio backends are implemented to work with the Eventfd based mechanism. In order to make such backends work with Xen, another software layer needs to do the polling and send an event via eventfd to the backend once the notification from guest is received. This results in an extra context switch. This is not a new problem in Linux though. It is present with other hypervisors like KVM, etc. as well. The generic solution implemented in the kernel for them is to provide an IOCTL call to pass the address to poll and eventfd, which lets the kernel take care of polling and raise an event on the eventfd, instead of handling this in user space (which involves an extra context switch). This patch adds similar support for xen. Inspired by existing implementations for KVM, etc.. This also copies ioreq.h header file (only struct ioreq and related macros) from Xen's source tree (Top commit 5d84f07fe6bf ("xen/pci: drop remaining uses of bool_t")). Signed-off-by: Viresh Kumar Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/b20d83efba6453037d0c099912813c79c81f7714.1697439990.git.viresh.kumar@linaro.org Signed-off-by: Juergen Gross --- include/uapi/xen/privcmd.h | 18 ++++++++++++++ include/xen/interface/hvm/ioreq.h | 51 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) create mode 100644 include/xen/interface/hvm/ioreq.h (limited to 'include') diff --git a/include/uapi/xen/privcmd.h b/include/uapi/xen/privcmd.h index e145bca5105c..8b8c5d1420fe 100644 --- a/include/uapi/xen/privcmd.h +++ b/include/uapi/xen/privcmd.h @@ -110,6 +110,22 @@ struct privcmd_irqfd { __u8 pad[2]; }; +/* For privcmd_ioeventfd::flags */ +#define PRIVCMD_IOEVENTFD_FLAG_DEASSIGN (1 << 0) + +struct privcmd_ioeventfd { + __u64 ioreq; + __u64 ports; + __u64 addr; + __u32 addr_len; + __u32 event_fd; + __u32 vcpus; + __u32 vq; + __u32 flags; + domid_t dom; + __u8 pad[2]; +}; + /* * @cmd: IOCTL_PRIVCMD_HYPERCALL * @arg: &privcmd_hypercall_t @@ -139,5 +155,7 @@ struct privcmd_irqfd { _IOC(_IOC_NONE, 'P', 7, sizeof(struct privcmd_mmap_resource)) #define IOCTL_PRIVCMD_IRQFD \ _IOW('P', 8, struct privcmd_irqfd) +#define IOCTL_PRIVCMD_IOEVENTFD \ + _IOW('P', 9, struct privcmd_ioeventfd) #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */ diff --git a/include/xen/interface/hvm/ioreq.h b/include/xen/interface/hvm/ioreq.h new file mode 100644 index 000000000000..b02cfeae7eb5 --- /dev/null +++ b/include/xen/interface/hvm/ioreq.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: MIT */ +/* + * ioreq.h: I/O request definitions for device models + * Copyright (c) 2004, Intel Corporation. + */ + +#ifndef __XEN_PUBLIC_HVM_IOREQ_H__ +#define __XEN_PUBLIC_HVM_IOREQ_H__ + +#define IOREQ_READ 1 +#define IOREQ_WRITE 0 + +#define STATE_IOREQ_NONE 0 +#define STATE_IOREQ_READY 1 +#define STATE_IOREQ_INPROCESS 2 +#define STATE_IORESP_READY 3 + +#define IOREQ_TYPE_PIO 0 /* pio */ +#define IOREQ_TYPE_COPY 1 /* mmio ops */ +#define IOREQ_TYPE_PCI_CONFIG 2 +#define IOREQ_TYPE_TIMEOFFSET 7 +#define IOREQ_TYPE_INVALIDATE 8 /* mapcache */ + +/* + * VMExit dispatcher should cooperate with instruction decoder to + * prepare this structure and notify service OS and DM by sending + * virq. + * + * For I/O type IOREQ_TYPE_PCI_CONFIG, the physical address is formatted + * as follows: + * + * 63....48|47..40|39..35|34..32|31........0 + * SEGMENT |BUS |DEV |FN |OFFSET + */ +struct ioreq { + uint64_t addr; /* physical address */ + uint64_t data; /* data (or paddr of data) */ + uint32_t count; /* for rep prefixes */ + uint32_t size; /* size in bytes */ + uint32_t vp_eport; /* evtchn for notifications to/from device model */ + uint16_t _pad0; + uint8_t state:4; + uint8_t data_is_ptr:1; /* if 1, data above is the guest paddr + * of the real data to use. */ + uint8_t dir:1; /* 1=read, 0=write */ + uint8_t df:1; + uint8_t _pad1:1; + uint8_t type; /* I/O type */ +}; + +#endif /* __XEN_PUBLIC_HVM_IOREQ_H__ */ -- cgit v1.2.3