summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/acpi/cppc_acpi.h5
-rw-r--r--include/asm-generic/div64.h14
-rw-r--r--include/asm-generic/early_ioremap.h6
-rw-r--r--include/asm-generic/mshyperv.h21
-rw-r--r--include/asm-generic/pci_iomap.h2
-rw-r--r--include/asm-generic/vmlinux.lds.h4
-rw-r--r--include/drm/ttm/ttm_tt.h3
-rw-r--r--include/linux/bootconfig.h4
-rw-r--r--include/linux/cacheinfo.h18
-rw-r--r--include/linux/ceph/ceph_fs.h1
-rw-r--r--include/linux/cgroup-defs.h107
-rw-r--r--include/linux/cgroup.h22
-rw-r--r--include/linux/compat.h39
-rw-r--r--include/linux/compiler-clang.h13
-rw-r--r--include/linux/compiler-gcc.h11
-rw-r--r--include/linux/compiler.h2
-rw-r--r--include/linux/compiler_attributes.h43
-rw-r--r--include/linux/compiler_types.h6
-rw-r--r--include/linux/cpu.h6
-rw-r--r--include/linux/cpufreq.h75
-rw-r--r--include/linux/cpuhotplug.h132
-rw-r--r--include/linux/damon.h268
-rw-r--r--include/linux/dax.h41
-rw-r--r--include/linux/dmaengine.h3
-rw-r--r--include/linux/energy_model.h8
-rw-r--r--include/linux/eventpoll.h18
-rw-r--r--include/linux/file.h7
-rw-r--r--include/linux/fs.h4
-rw-r--r--include/linux/highmem-internal.h27
-rw-r--r--include/linux/hugetlb.h9
-rw-r--r--include/linux/memblock.h1
-rw-r--r--include/linux/memory.h55
-rw-r--r--include/linux/memory_hotplug.h34
-rw-r--r--include/linux/mmap_lock.h17
-rw-r--r--include/linux/mmzone.h19
-rw-r--r--include/linux/once.h2
-rw-r--r--include/linux/overflow.h138
-rw-r--r--include/linux/page-flags.h17
-rw-r--r--include/linux/page_ext.h2
-rw-r--r--include/linux/page_idle.h6
-rw-r--r--include/linux/pagemap.h7
-rw-r--r--include/linux/pci-acpi.h3
-rw-r--r--include/linux/pci-epc.h57
-rw-r--r--include/linux/pci-epf.h16
-rw-r--r--include/linux/pci.h159
-rw-r--r--include/linux/pci_hotplug.h2
-rw-r--r--include/linux/pci_ids.h3
-rw-r--r--include/linux/platform_data/dma-dw.h21
-rw-r--r--include/linux/pwm.h2
-rw-r--r--include/linux/qcom_scm.h14
-rw-r--r--include/linux/rwsem.h12
-rw-r--r--include/linux/sched/user.h3
-rw-r--r--include/linux/skbuff.h2
-rw-r--r--include/linux/slub_def.h6
-rw-r--r--include/linux/syscalls.h3
-rw-r--r--include/linux/thermal.h7
-rw-r--r--include/linux/threads.h2
-rw-r--r--include/linux/time64.h9
-rw-r--r--include/linux/uaccess.h10
-rw-r--r--include/linux/uio.h6
-rw-r--r--include/linux/units.h10
-rw-r--r--include/linux/vdpa.h62
-rw-r--r--include/linux/vhost_iotlb.h3
-rw-r--r--include/linux/vmalloc.h3
-rw-r--r--include/net/dsa.h5
-rw-r--r--include/trace/events/damon.h43
-rw-r--r--include/trace/events/mmflags.h2
-rw-r--r--include/trace/events/page_ref.h4
-rw-r--r--include/uapi/asm-generic/unistd.h10
-rw-r--r--include/uapi/linux/cxl_mem.h2
-rw-r--r--include/uapi/linux/idxd.h24
-rw-r--r--include/uapi/linux/vduse.h306
-rw-r--r--include/uapi/linux/virtio_ids.h9
-rw-r--r--include/uapi/linux/virtio_pcidev.h5
-rw-r--r--include/uapi/linux/virtio_vsock.h3
-rw-r--r--include/uapi/misc/habanalabs.h186
76 files changed, 1510 insertions, 721 deletions
diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h
index 9f4985b4d64d..bc159a9b4a73 100644
--- a/include/acpi/cppc_acpi.h
+++ b/include/acpi/cppc_acpi.h
@@ -135,6 +135,7 @@ struct cppc_cpudata {
#ifdef CONFIG_ACPI_CPPC_LIB
extern int cppc_get_desired_perf(int cpunum, u64 *desired_perf);
+extern int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf);
extern int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs);
extern int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls);
extern int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps);
@@ -149,6 +150,10 @@ static inline int cppc_get_desired_perf(int cpunum, u64 *desired_perf)
{
return -ENOTSUPP;
}
+static inline int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf)
+{
+ return -ENOTSUPP;
+}
static inline int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs)
{
return -ENOTSUPP;
diff --git a/include/asm-generic/div64.h b/include/asm-generic/div64.h
index cd905b44a630..13f5aa68a455 100644
--- a/include/asm-generic/div64.h
+++ b/include/asm-generic/div64.h
@@ -57,17 +57,11 @@
/*
* If the divisor happens to be constant, we determine the appropriate
* inverse at compile time to turn the division into a few inline
- * multiplications which ought to be much faster. And yet only if compiling
- * with a sufficiently recent gcc version to perform proper 64-bit constant
- * propagation.
+ * multiplications which ought to be much faster.
*
* (It is unfortunate that gcc doesn't perform all this internally.)
*/
-#ifndef __div64_const32_is_OK
-#define __div64_const32_is_OK (__GNUC__ >= 4)
-#endif
-
#define __div64_const32(n, ___b) \
({ \
/* \
@@ -230,8 +224,7 @@ extern uint32_t __div64_32(uint64_t *dividend, uint32_t divisor);
is_power_of_2(__base)) { \
__rem = (n) & (__base - 1); \
(n) >>= ilog2(__base); \
- } else if (__div64_const32_is_OK && \
- __builtin_constant_p(__base) && \
+ } else if (__builtin_constant_p(__base) && \
__base != 0) { \
uint32_t __res_lo, __n_lo = (n); \
(n) = __div64_const32(n, __base); \
@@ -241,8 +234,9 @@ extern uint32_t __div64_32(uint64_t *dividend, uint32_t divisor);
} else if (likely(((n) >> 32) == 0)) { \
__rem = (uint32_t)(n) % __base; \
(n) = (uint32_t)(n) / __base; \
- } else \
+ } else { \
__rem = __div64_32(&(n), __base); \
+ } \
__rem; \
})
diff --git a/include/asm-generic/early_ioremap.h b/include/asm-generic/early_ioremap.h
index 9def22e6e2b3..9d0479f50f97 100644
--- a/include/asm-generic/early_ioremap.h
+++ b/include/asm-generic/early_ioremap.h
@@ -19,12 +19,6 @@ extern void *early_memremap_prot(resource_size_t phys_addr,
extern void early_iounmap(void __iomem *addr, unsigned long size);
extern void early_memunmap(void *addr, unsigned long size);
-/*
- * Weak function called by early_ioremap_reset(). It does nothing, but
- * architectures may provide their own version to do any needed cleanups.
- */
-extern void early_ioremap_shutdown(void);
-
#if defined(CONFIG_GENERIC_EARLY_IOREMAP) && defined(CONFIG_MMU)
/* Arch-specific initialization */
extern void early_ioremap_init(void);
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index c1ab6a6e72b5..d3eae6cdbacb 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -197,10 +197,12 @@ static inline int hv_cpu_number_to_vp_number(int cpu_number)
return hv_vp_index[cpu_number];
}
-static inline int cpumask_to_vpset(struct hv_vpset *vpset,
- const struct cpumask *cpus)
+static inline int __cpumask_to_vpset(struct hv_vpset *vpset,
+ const struct cpumask *cpus,
+ bool exclude_self)
{
int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1;
+ int this_cpu = smp_processor_id();
/* valid_bank_mask can represent up to 64 banks */
if (hv_max_vp_index / 64 >= 64)
@@ -218,6 +220,8 @@ static inline int cpumask_to_vpset(struct hv_vpset *vpset,
* Some banks may end up being empty but this is acceptable.
*/
for_each_cpu(cpu, cpus) {
+ if (exclude_self && cpu == this_cpu)
+ continue;
vcpu = hv_cpu_number_to_vp_number(cpu);
if (vcpu == VP_INVAL)
return -1;
@@ -232,6 +236,19 @@ static inline int cpumask_to_vpset(struct hv_vpset *vpset,
return nr_bank;
}
+static inline int cpumask_to_vpset(struct hv_vpset *vpset,
+ const struct cpumask *cpus)
+{
+ return __cpumask_to_vpset(vpset, cpus, false);
+}
+
+static inline int cpumask_to_vpset_noself(struct hv_vpset *vpset,
+ const struct cpumask *cpus)
+{
+ WARN_ON_ONCE(preemptible());
+ return __cpumask_to_vpset(vpset, cpus, true);
+}
+
void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die);
bool hv_is_hyperv_initialized(void);
bool hv_is_hibernation_supported(void);
diff --git a/include/asm-generic/pci_iomap.h b/include/asm-generic/pci_iomap.h
index d4f16dcc2ed7..df636c6d8e6c 100644
--- a/include/asm-generic/pci_iomap.h
+++ b/include/asm-generic/pci_iomap.h
@@ -52,4 +52,4 @@ static inline void __iomem *pci_iomap_wc_range(struct pci_dev *dev, int bar,
}
#endif
-#endif /* __ASM_GENERIC_IO_H */
+#endif /* __ASM_GENERIC_PCI_IOMAP_H */
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index aa50bf2959fe..f2984af2b85b 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -116,11 +116,7 @@
* GCC 4.5 and later have a 32 bytes section alignment for structures.
* Except GCC 4.9, that feels the need to align on 64 bytes.
*/
-#if __GNUC__ == 4 && __GNUC_MINOR__ == 9
-#define STRUCT_ALIGNMENT 64
-#else
#define STRUCT_ALIGNMENT 32
-#endif
#define STRUCT_ALIGN() . = ALIGN(STRUCT_ALIGNMENT)
/*
diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h
index 818680c6a8ed..b20e89d321b0 100644
--- a/include/drm/ttm/ttm_tt.h
+++ b/include/drm/ttm/ttm_tt.h
@@ -27,11 +27,12 @@
#ifndef _TTM_TT_H_
#define _TTM_TT_H_
+#include <linux/pagemap.h>
#include <linux/types.h>
#include <drm/ttm/ttm_caching.h>
#include <drm/ttm/ttm_kmap_iter.h>
-struct ttm_bo_device;
+struct ttm_device;
struct ttm_tt;
struct ttm_resource;
struct ttm_buffer_object;
diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h
index abe089c27529..537e1b991f11 100644
--- a/include/linux/bootconfig.h
+++ b/include/linux/bootconfig.h
@@ -110,7 +110,7 @@ static inline __init bool xbc_node_is_leaf(struct xbc_node *node)
}
/* Tree-based key-value access APIs */
-struct xbc_node * __init xbc_node_find_child(struct xbc_node *parent,
+struct xbc_node * __init xbc_node_find_subkey(struct xbc_node *parent,
const char *key);
const char * __init xbc_node_find_value(struct xbc_node *parent,
@@ -148,7 +148,7 @@ xbc_find_value(const char *key, struct xbc_node **vnode)
*/
static inline struct xbc_node * __init xbc_find_node(const char *key)
{
- return xbc_node_find_child(NULL, key);
+ return xbc_node_find_subkey(NULL, key);
}
/**
diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h
index 4f72b47973c3..2f909ed084c6 100644
--- a/include/linux/cacheinfo.h
+++ b/include/linux/cacheinfo.h
@@ -79,24 +79,6 @@ struct cpu_cacheinfo {
bool cpu_map_populated;
};
-/*
- * Helpers to make sure "func" is executed on the cpu whose cache
- * attributes are being detected
- */
-#define DEFINE_SMP_CALL_CACHE_FUNCTION(func) \
-static inline void _##func(void *ret) \
-{ \
- int cpu = smp_processor_id(); \
- *(int *)ret = __##func(cpu); \
-} \
- \
-int func(unsigned int cpu) \
-{ \
- int ret; \
- smp_call_function_single(cpu, _##func, &ret, true); \
- return ret; \
-}
-
struct cpu_cacheinfo *get_cpu_cacheinfo(unsigned int cpu);
int init_cache_level(unsigned int cpu);
int populate_cache_leaves(unsigned int cpu);
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index e41a811026f6..bc2699feddbe 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -299,6 +299,7 @@ enum {
CEPH_SESSION_FLUSHMSG_ACK,
CEPH_SESSION_FORCE_RO,
CEPH_SESSION_REJECT,
+ CEPH_SESSION_REQUEST_FLUSH_MDLOG,
};
extern const char *ceph_session_op_name(int op);
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index e1c705fdfa7c..db2e147e069f 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -752,107 +752,54 @@ static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) {}
* sock_cgroup_data is embedded at sock->sk_cgrp_data and contains
* per-socket cgroup information except for memcg association.
*
- * On legacy hierarchies, net_prio and net_cls controllers directly set
- * attributes on each sock which can then be tested by the network layer.
- * On the default hierarchy, each sock is associated with the cgroup it was
- * created in and the networking layer can match the cgroup directly.
- *
- * To avoid carrying all three cgroup related fields separately in sock,
- * sock_cgroup_data overloads (prioidx, classid) and the cgroup pointer.
- * On boot, sock_cgroup_data records the cgroup that the sock was created
- * in so that cgroup2 matches can be made; however, once either net_prio or
- * net_cls starts being used, the area is overridden to carry prioidx and/or
- * classid. The two modes are distinguished by whether the lowest bit is
- * set. Clear bit indicates cgroup pointer while set bit prioidx and
- * classid.
- *
- * While userland may start using net_prio or net_cls at any time, once
- * either is used, cgroup2 matching no longer works. There is no reason to
- * mix the two and this is in line with how legacy and v2 compatibility is
- * handled. On mode switch, cgroup references which are already being
- * pointed to by socks may be leaked. While this can be remedied by adding
- * synchronization around sock_cgroup_data, given that the number of leaked
- * cgroups is bound and highly unlikely to be high, this seems to be the
- * better trade-off.
+ * On legacy hierarchies, net_prio and net_cls controllers directly
+ * set attributes on each sock which can then be tested by the network
+ * layer. On the default hierarchy, each sock is associated with the
+ * cgroup it was created in and the networking layer can match the
+ * cgroup directly.
*/
struct sock_cgroup_data {
- union {
-#ifdef __LITTLE_ENDIAN
- struct {
- u8 is_data : 1;
- u8 no_refcnt : 1;
- u8 unused : 6;
- u8 padding;
- u16 prioidx;
- u32 classid;
- } __packed;
-#else
- struct {
- u32 classid;
- u16 prioidx;
- u8 padding;
- u8 unused : 6;
- u8 no_refcnt : 1;
- u8 is_data : 1;
- } __packed;
+ struct cgroup *cgroup; /* v2 */
+#ifdef CONFIG_CGROUP_NET_CLASSID
+ u32 classid; /* v1 */
+#endif
+#ifdef CONFIG_CGROUP_NET_PRIO
+ u16 prioidx; /* v1 */
#endif
- u64 val;
- };
};
-/*
- * There's a theoretical window where the following accessors race with
- * updaters and return part of the previous pointer as the prioidx or
- * classid. Such races are short-lived and the result isn't critical.
- */
static inline u16 sock_cgroup_prioidx(const struct sock_cgroup_data *skcd)
{
- /* fallback to 1 which is always the ID of the root cgroup */
- return (skcd->is_data & 1) ? skcd->prioidx : 1;
+#ifdef CONFIG_CGROUP_NET_PRIO
+ return READ_ONCE(skcd->prioidx);
+#else
+ return 1;
+#endif
}
static inline u32 sock_cgroup_classid(const struct sock_cgroup_data *skcd)
{
- /* fallback to 0 which is the unconfigured default classid */
- return (skcd->is_data & 1) ? skcd->classid : 0;
+#ifdef CONFIG_CGROUP_NET_CLASSID
+ return READ_ONCE(skcd->classid);
+#else
+ return 0;
+#endif
}
-/*
- * If invoked concurrently, the updaters may clobber each other. The
- * caller is responsible for synchronization.
- */
static inline void sock_cgroup_set_prioidx(struct sock_cgroup_data *skcd,
u16 prioidx)
{
- struct sock_cgroup_data skcd_buf = {{ .val = READ_ONCE(skcd->val) }};
-
- if (sock_cgroup_prioidx(&skcd_buf) == prioidx)
- return;
-
- if (!(skcd_buf.is_data & 1)) {
- skcd_buf.val = 0;
- skcd_buf.is_data = 1;
- }
-
- skcd_buf.prioidx = prioidx;
- WRITE_ONCE(skcd->val, skcd_buf.val); /* see sock_cgroup_ptr() */
+#ifdef CONFIG_CGROUP_NET_PRIO
+ WRITE_ONCE(skcd->prioidx, prioidx);
+#endif
}
static inline void sock_cgroup_set_classid(struct sock_cgroup_data *skcd,
u32 classid)
{
- struct sock_cgroup_data skcd_buf = {{ .val = READ_ONCE(skcd->val) }};
-
- if (sock_cgroup_classid(&skcd_buf) == classid)
- return;
-
- if (!(skcd_buf.is_data & 1)) {
- skcd_buf.val = 0;
- skcd_buf.is_data = 1;
- }
-
- skcd_buf.classid = classid;
- WRITE_ONCE(skcd->val, skcd_buf.val); /* see sock_cgroup_ptr() */
+#ifdef CONFIG_CGROUP_NET_CLASSID
+ WRITE_ONCE(skcd->classid, classid);
+#endif
}
#else /* CONFIG_SOCK_CGROUP_DATA */
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 7bf60454a313..75c151413fda 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -829,33 +829,13 @@ static inline void cgroup_account_cputime_field(struct task_struct *task,
*/
#ifdef CONFIG_SOCK_CGROUP_DATA
-#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID)
-extern spinlock_t cgroup_sk_update_lock;
-#endif
-
-void cgroup_sk_alloc_disable(void);
void cgroup_sk_alloc(struct sock_cgroup_data *skcd);
void cgroup_sk_clone(struct sock_cgroup_data *skcd);
void cgroup_sk_free(struct sock_cgroup_data *skcd);
static inline struct cgroup *sock_cgroup_ptr(struct sock_cgroup_data *skcd)
{
-#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID)
- unsigned long v;
-
- /*
- * @skcd->val is 64bit but the following is safe on 32bit too as we
- * just need the lower ulong to be written and read atomically.
- */
- v = READ_ONCE(skcd->val);
-
- if (v & 3)
- return &cgrp_dfl_root.cgrp;
-
- return (struct cgroup *)(unsigned long)v ?: &cgrp_dfl_root.cgrp;
-#else
- return (struct cgroup *)(unsigned long)skcd->val;
-#endif
+ return skcd->cgroup;
}
#else /* CONFIG_CGROUP_DATA */
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 8e0598c7d1d1..1c758b0e0359 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -395,14 +395,6 @@ struct compat_kexec_segment;
struct compat_mq_attr;
struct compat_msgbuf;
-#define BITS_PER_COMPAT_LONG (8*sizeof(compat_long_t))
-
-#define BITS_TO_COMPAT_LONGS(bits) DIV_ROUND_UP(bits, BITS_PER_COMPAT_LONG)
-
-long compat_get_bitmap(unsigned long *mask, const compat_ulong_t __user *umask,
- unsigned long bitmap_size);
-long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask,
- unsigned long bitmap_size);
void copy_siginfo_to_external32(struct compat_siginfo *to,
const struct kernel_siginfo *from);
int copy_siginfo_from_user32(kernel_siginfo_t *to,
@@ -519,8 +511,6 @@ extern long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
struct epoll_event; /* fortunately, this one is fixed-layout */
-extern void __user *compat_alloc_user_space(unsigned long len);
-
int compat_restore_altstack(const compat_stack_t __user *uss);
int __compat_save_altstack(compat_stack_t __user *, unsigned long);
#define unsafe_compat_save_altstack(uss, sp, label) do { \
@@ -807,26 +797,6 @@ asmlinkage long compat_sys_execve(const char __user *filename, const compat_uptr
/* mm/fadvise.c: No generic prototype for fadvise64_64 */
/* mm/, CONFIG_MMU only */
-asmlinkage long compat_sys_mbind(compat_ulong_t start, compat_ulong_t len,
- compat_ulong_t mode,
- compat_ulong_t __user *nmask,
- compat_ulong_t maxnode, compat_ulong_t flags);
-asmlinkage long compat_sys_get_mempolicy(int __user *policy,
- compat_ulong_t __user *nmask,
- compat_ulong_t maxnode,
- compat_ulong_t addr,
- compat_ulong_t flags);
-asmlinkage long compat_sys_set_mempolicy(int mode, compat_ulong_t __user *nmask,
- compat_ulong_t maxnode);
-asmlinkage long compat_sys_migrate_pages(compat_pid_t pid,
- compat_ulong_t maxnode, const compat_ulong_t __user *old_nodes,
- const compat_ulong_t __user *new_nodes);
-asmlinkage long compat_sys_move_pages(pid_t pid, compat_ulong_t nr_pages,
- __u32 __user *pages,
- const int __user *nodes,
- int __user *status,
- int flags);
-
asmlinkage long compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid,
compat_pid_t pid, int sig,
struct compat_siginfo __user *uinfo);
@@ -976,6 +946,15 @@ static inline bool in_compat_syscall(void) { return false; }
#endif /* CONFIG_COMPAT */
+#define BITS_PER_COMPAT_LONG (8*sizeof(compat_long_t))
+
+#define BITS_TO_COMPAT_LONGS(bits) DIV_ROUND_UP(bits, BITS_PER_COMPAT_LONG)
+
+long compat_get_bitmap(unsigned long *mask, const compat_ulong_t __user *umask,
+ unsigned long bitmap_size);
+long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask,
+ unsigned long bitmap_size);
+
/*
* Some legacy ABIs like the i386 one use less than natural alignment for 64-bit
* types, and will need special compat treatment for that. Most architectures
diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index 49b0ac8b6fd3..3c4de9b6c6e3 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -62,19 +62,6 @@
#define __no_sanitize_coverage
#endif
-/*
- * Not all versions of clang implement the type-generic versions
- * of the builtin overflow checkers. Fortunately, clang implements
- * __has_builtin allowing us to avoid awkward version
- * checks. Unfortunately, we don't know which version of gcc clang
- * pretends to be, so the macro may or may not be defined.
- */
-#if __has_builtin(__builtin_mul_overflow) && \
- __has_builtin(__builtin_add_overflow) && \
- __has_builtin(__builtin_sub_overflow)
-#define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1
-#endif
-
#if __has_feature(shadow_call_stack)
# define __noscs __attribute__((__no_sanitize__("shadow-call-stack")))
#endif
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index cb9217fc60af..bd2b881c6b63 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -43,9 +43,6 @@
#define __compiletime_object_size(obj) __builtin_object_size(obj, 0)
-#define __compiletime_warning(message) __attribute__((__warning__(message)))
-#define __compiletime_error(message) __attribute__((__error__(message)))
-
#if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__)
#define __latent_entropy __attribute__((latent_entropy))
#endif
@@ -98,10 +95,8 @@
#if GCC_VERSION >= 70000
#define KASAN_ABI_VERSION 5
-#elif GCC_VERSION >= 50000
+#else
#define KASAN_ABI_VERSION 4
-#elif GCC_VERSION >= 40902
-#define KASAN_ABI_VERSION 3
#endif
#if __has_attribute(__no_sanitize_address__)
@@ -128,10 +123,6 @@
#define __no_sanitize_coverage
#endif
-#if GCC_VERSION >= 50100
-#define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1
-#endif
-
/*
* Turn individual warnings and errors on and off locally, depending
* on version.
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index b67261a1e3e9..3d5af56337bd 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -188,6 +188,8 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
(typeof(ptr)) (__ptr + (off)); })
#endif
+#define absolute_pointer(val) RELOC_HIDE((void *)(val), 0)
+
#ifndef OPTIMIZER_HIDE_VAR
/* Make the optimizer believe the variable can be manipulated arbitrarily. */
#define OPTIMIZER_HIDE_VAR(var) \
diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h
index 2487be0e7199..e6ec63403965 100644
--- a/include/linux/compiler_attributes.h
+++ b/include/linux/compiler_attributes.h
@@ -21,26 +21,6 @@
*/
/*
- * __has_attribute is supported on gcc >= 5, clang >= 2.9 and icc >= 17.
- * In the meantime, to support gcc < 5, we implement __has_attribute
- * by hand.
- */
-#ifndef __has_attribute
-# define __has_attribute(x) __GCC4_has_attribute_##x
-# define __GCC4_has_attribute___assume_aligned__ 1
-# define __GCC4_has_attribute___copy__ 0
-# define __GCC4_has_attribute___designated_init__ 0
-# define __GCC4_has_attribute___externally_visible__ 1
-# define __GCC4_has_attribute___no_caller_saved_registers__ 0
-# define __GCC4_has_attribute___noclone__ 1
-# define __GCC4_has_attribute___no_profile_instrument_function__ 0
-# define __GCC4_has_attribute___nonstring__ 0
-# define __GCC4_has_attribute___no_sanitize_address__ 1
-# define __GCC4_has_attribute___no_sanitize_undefined__ 1
-# define __GCC4_has_attribute___fallthrough__ 0
-#endif
-
-/*
* gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-alias-function-attribute
*/
#define __alias(symbol) __attribute__((__alias__(#symbol)))
@@ -74,7 +54,6 @@
* compiler should see some alignment anyway, when the return value is
* massaged by 'flags = ptr & 3; ptr &= ~3;').
*
- * Optional: only supported since gcc >= 4.9
* Optional: not supported by icc
*
* gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-assume_005faligned-function-attribute
@@ -138,6 +117,17 @@
#endif
/*
+ * Optional: only supported since clang >= 14.0
+ *
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-error-function-attribute
+ */
+#if __has_attribute(__error__)
+# define __compiletime_error(msg) __attribute__((__error__(msg)))
+#else
+# define __compiletime_error(msg)
+#endif
+
+/*
* Optional: not supported by clang
*
* gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-externally_005fvisible-function-attribute
@@ -299,6 +289,17 @@
#define __must_check __attribute__((__warn_unused_result__))
/*
+ * Optional: only supported since clang >= 14.0
+ *
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-warning-function-attribute
+ */
+#if __has_attribute(__warning__)
+# define __compiletime_warning(msg) __attribute__((__warning__(msg)))
+#else
+# define __compiletime_warning(msg)
+#endif
+
+/*
* gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-weak-function-attribute
* gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-weak-variable-attribute
*/
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index e4ea86fc584d..b6ff83a714ca 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -294,12 +294,6 @@ struct ftrace_likely_data {
#ifndef __compiletime_object_size
# define __compiletime_object_size(obj) -1
#endif
-#ifndef __compiletime_warning
-# define __compiletime_warning(message)
-#endif
-#ifndef __compiletime_error
-# define __compiletime_error(message)
-#endif
#ifdef __OPTIMIZE__
# define __compiletime_assert(condition, msg, prefix, suffix) \
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 94a578a96202..9cf51e41e697 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -143,12 +143,6 @@ static inline int remove_cpu(unsigned int cpu) { return -EPERM; }
static inline void smp_shutdown_nonboot_cpus(unsigned int primary_cpu) { }
#endif /* !CONFIG_HOTPLUG_CPU */
-/* Wrappers which go away once all code is converted */
-static inline void cpu_hotplug_begin(void) { cpus_write_lock(); }
-static inline void cpu_hotplug_done(void) { cpus_write_unlock(); }
-static inline void get_online_cpus(void) { cpus_read_lock(); }
-static inline void put_online_cpus(void) { cpus_read_unlock(); }
-
#ifdef CONFIG_PM_SLEEP_SMP
extern int freeze_secondary_cpus(int primary);
extern void thaw_secondary_cpus(void);
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 9fd719475fcd..ff88bb3e44fc 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -9,10 +9,14 @@
#define _LINUX_CPUFREQ_H
#include <linux/clk.h>
+#include <linux/cpu.h>
#include <linux/cpumask.h>
#include <linux/completion.h>
#include <linux/kobject.h>
#include <linux/notifier.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/pm_opp.h>
#include <linux/pm_qos.h>
#include <linux/spinlock.h>
#include <linux/sysfs.h>
@@ -365,14 +369,17 @@ struct cpufreq_driver {
int (*suspend)(struct cpufreq_policy *policy);
int (*resume)(struct cpufreq_policy *policy);
- /* Will be called after the driver is fully initialized */
- void (*ready)(struct cpufreq_policy *policy);
-
struct freq_attr **attr;
/* platform specific boost support code */
bool boost_enabled;
int (*set_boost)(struct cpufreq_policy *policy, int state);
+
+ /*
+ * Set by drivers that want to register with the energy model after the
+ * policy is properly initialized, but before the governor is started.
+ */
+ void (*register_em)(struct cpufreq_policy *policy);
};
/* flags */
@@ -995,6 +1002,55 @@ static inline int cpufreq_table_count_valid_entries(const struct cpufreq_policy
return count;
}
+
+static inline int parse_perf_domain(int cpu, const char *list_name,
+ const char *cell_name)
+{
+ struct device_node *cpu_np;
+ struct of_phandle_args args;
+ int ret;
+
+ cpu_np = of_cpu_device_node_get(cpu);
+ if (!cpu_np)
+ return -ENODEV;
+
+ ret = of_parse_phandle_with_args(cpu_np, list_name, cell_name, 0,
+ &args);
+ if (ret < 0)
+ return ret;
+
+ of_node_put(cpu_np);
+
+ return args.args[0];
+}
+
+static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_name,
+ const char *cell_name, struct cpumask *cpumask)
+{
+ int target_idx;
+ int cpu, ret;
+
+ ret = parse_perf_domain(pcpu, list_name, cell_name);
+ if (ret < 0)
+ return ret;
+
+ target_idx = ret;
+ cpumask_set_cpu(pcpu, cpumask);
+
+ for_each_possible_cpu(cpu) {
+ if (cpu == pcpu)
+ continue;
+
+ ret = parse_perf_domain(pcpu, list_name, cell_name);
+ if (ret < 0)
+ continue;
+
+ if (target_idx == ret)
+ cpumask_set_cpu(cpu, cpumask);
+ }
+
+ return target_idx;
+}
#else
static inline int cpufreq_boost_trigger_state(int state)
{
@@ -1014,6 +1070,12 @@ static inline bool policy_has_boost_freq(struct cpufreq_policy *policy)
{
return false;
}
+
+static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_name,
+ const char *cell_name, struct cpumask *cpumask)
+{
+ return -EOPNOTSUPP;
+}
#endif
#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
@@ -1035,7 +1097,6 @@ void arch_set_freq_scale(const struct cpumask *cpus,
{
}
#endif
-
/* the following are really really optional */
extern struct freq_attr cpufreq_freq_attr_scaling_available_freqs;
extern struct freq_attr cpufreq_freq_attr_scaling_boost_freqs;
@@ -1046,4 +1107,10 @@ unsigned int cpufreq_generic_get(unsigned int cpu);
void cpufreq_generic_init(struct cpufreq_policy *policy,
struct cpufreq_frequency_table *table,
unsigned int transition_latency);
+
+static inline void cpufreq_register_em_with_opp(struct cpufreq_policy *policy)
+{
+ dev_pm_opp_of_register_em(get_cpu_device(policy->cpu),
+ policy->related_cpus);
+}
#endif /* _LINUX_CPUFREQ_H */
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 39cf84a30b9f..832d8a74fa59 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -22,8 +22,42 @@
* AP_ACTIVE AP_ACTIVE
*/
+/*
+ * CPU hotplug states. The state machine invokes the installed state
+ * startup callbacks sequentially from CPUHP_OFFLINE + 1 to CPUHP_ONLINE
+ * during a CPU online operation. During a CPU offline operation the
+ * installed teardown callbacks are invoked in the reverse order from
+ * CPU_ONLINE - 1 down to CPUHP_OFFLINE.
+ *
+ * The state space has three sections: PREPARE, STARTING and ONLINE.
+ *
+ * PREPARE: The callbacks are invoked on a control CPU before the
+ * hotplugged CPU is started up or after the hotplugged CPU has died.
+ *
+ * STARTING: The callbacks are invoked on the hotplugged CPU from the low level
+ * hotplug startup/teardown code with interrupts disabled.
+ *
+ * ONLINE: The callbacks are invoked on the hotplugged CPU from the per CPU
+ * hotplug thread with interrupts and preemption enabled.
+ *
+ * Adding explicit states to this enum is only necessary when:
+ *
+ * 1) The state is within the STARTING section
+ *
+ * 2) The state has ordering constraints vs. other states in the
+ * same section.
+ *
+ * If neither #1 nor #2 apply, please use the dynamic state space when
+ * setting up a state by using CPUHP_PREPARE_DYN or CPUHP_PREPARE_ONLINE
+ * for the @state argument of the setup function.
+ *
+ * See Documentation/core-api/cpu_hotplug.rst for further information and
+ * examples.
+ */
enum cpuhp_state {
CPUHP_INVALID = -1,
+
+ /* PREPARE section invoked on a control CPU */
CPUHP_OFFLINE = 0,
CPUHP_CREATE_THREADS,
CPUHP_PERF_PREPARE,
@@ -95,6 +129,11 @@ enum cpuhp_state {
CPUHP_BP_PREPARE_DYN,
CPUHP_BP_PREPARE_DYN_END = CPUHP_BP_PREPARE_DYN + 20,
CPUHP_BRINGUP_CPU,
+
+ /*
+ * STARTING section invoked on the hotplugged CPU in low level
+ * bringup and teardown code.
+ */
CPUHP_AP_IDLE_DEAD,
CPUHP_AP_OFFLINE,
CPUHP_AP_SCHED_STARTING,
@@ -155,6 +194,8 @@ enum cpuhp_state {
CPUHP_AP_ARM_CACHE_B15_RAC_DYING,
CPUHP_AP_ONLINE,
CPUHP_TEARDOWN_CPU,
+
+ /* Online section invoked on the hotplugged CPU from the hotplug thread */
CPUHP_AP_ONLINE_IDLE,
CPUHP_AP_SCHED_WAIT_EMPTY,
CPUHP_AP_SMPBOOT_THREADS,
@@ -216,14 +257,15 @@ int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state, const char *name,
int (*teardown)(unsigned int cpu),
bool multi_instance);
/**
- * cpuhp_setup_state - Setup hotplug state callbacks with calling the callbacks
+ * cpuhp_setup_state - Setup hotplug state callbacks with calling the @startup
+ * callback
* @state: The state for which the calls are installed
* @name: Name of the callback (will be used in debug output)
- * @startup: startup callback function
- * @teardown: teardown callback function
+ * @startup: startup callback function or NULL if not required
+ * @teardown: teardown callback function or NULL if not required
*
- * Installs the callback functions and invokes the startup callback on
- * the present cpus which have already reached the @state.
+ * Installs the callback functions and invokes the @startup callback on
+ * the online cpus which have already reached the @state.
*/
static inline int cpuhp_setup_state(enum cpuhp_state state,
const char *name,
@@ -233,6 +275,18 @@ static inline int cpuhp_setup_state(enum cpuhp_state state,
return __cpuhp_setup_state(state, name, true, startup, teardown, false);
}
+/**
+ * cpuhp_setup_state_cpuslocked - Setup hotplug state callbacks with calling
+ * @startup callback from a cpus_read_lock()
+ * held region
+ * @state: The state for which the calls are installed
+ * @name: Name of the callback (will be used in debug output)
+ * @startup: startup callback function or NULL if not required
+ * @teardown: teardown callback function or NULL if not required
+ *
+ * Same as cpuhp_setup_state() except that it must be invoked from within a
+ * cpus_read_lock() held region.
+ */
static inline int cpuhp_setup_state_cpuslocked(enum cpuhp_state state,
const char *name,
int (*startup)(unsigned int cpu),
@@ -244,14 +298,14 @@ static inline int cpuhp_setup_state_cpuslocked(enum cpuhp_state state,
/**
* cpuhp_setup_state_nocalls - Setup hotplug state callbacks without calling the
- * callbacks
+ * @startup callback
* @state: The state for which the calls are installed
* @name: Name of the callback.
- * @startup: startup callback function
- * @teardown: teardown callback function
+ * @startup: startup callback function or NULL if not required
+ * @teardown: teardown callback function or NULL if not required
*
- * Same as @cpuhp_setup_state except that no calls are executed are invoked
- * during installation of this callback. NOP if SMP=n or HOTPLUG_CPU=n.
+ * Same as cpuhp_setup_state() except that the @startup callback is not
+ * invoked during installation. NOP if SMP=n or HOTPLUG_CPU=n.
*/
static inline int cpuhp_setup_state_nocalls(enum cpuhp_state state,
const char *name,
@@ -262,6 +316,19 @@ static inline int cpuhp_setup_state_nocalls(enum cpuhp_state state,
false);
}
+/**
+ * cpuhp_setup_state_nocalls_cpuslocked - Setup hotplug state callbacks without
+ * invoking the @startup callback from
+ * a cpus_read_lock() held region
+ * callbacks
+ * @state: The state for which the calls are installed
+ * @name: Name of the callback.
+ * @startup: startup callback function or NULL if not required
+ * @teardown: teardown callback function or NULL if not required
+ *
+ * Same as cpuhp_setup_state_nocalls() except that it must be invoked from
+ * within a cpus_read_lock() held region.
+ */
static inline int cpuhp_setup_state_nocalls_cpuslocked(enum cpuhp_state state,
const char *name,
int (*startup)(unsigned int cpu),
@@ -275,13 +342,13 @@ static inline int cpuhp_setup_state_nocalls_cpuslocked(enum cpuhp_state state,
* cpuhp_setup_state_multi - Add callbacks for multi state
* @state: The state for which the calls are installed
* @name: Name of the callback.
- * @startup: startup callback function
- * @teardown: teardown callback function
+ * @startup: startup callback function or NULL if not required
+ * @teardown: teardown callback function or NULL if not required
*
* Sets the internal multi_instance flag and prepares a state to work as a multi
* instance callback. No callbacks are invoked at this point. The callbacks are
* invoked once an instance for this state are registered via
- * @cpuhp_state_add_instance or @cpuhp_state_add_instance_nocalls.
+ * cpuhp_state_add_instance() or cpuhp_state_add_instance_nocalls()
*/
static inline int cpuhp_setup_state_multi(enum cpuhp_state state,
const char *name,
@@ -306,9 +373,10 @@ int __cpuhp_state_add_instance_cpuslocked(enum cpuhp_state state,
* @state: The state for which the instance is installed
* @node: The node for this individual state.
*
- * Installs the instance for the @state and invokes the startup callback on
- * the present cpus which have already reached the @state. The @state must have
- * been earlier marked as multi-instance by @cpuhp_setup_state_multi.
+ * Installs the instance for the @state and invokes the registered startup
+ * callback on the online cpus which have already reached the @state. The
+ * @state must have been earlier marked as multi-instance by
+ * cpuhp_setup_state_multi().
*/
static inline int cpuhp_state_add_instance(enum cpuhp_state state,
struct hlist_node *node)
@@ -322,8 +390,9 @@ static inline int cpuhp_state_add_instance(enum cpuhp_state state,
* @state: The state for which the instance is installed
* @node: The node for this individual state.
*
- * Installs the instance for the @state The @state must have been earlier
- * marked as multi-instance by @cpuhp_setup_state_multi.
+ * Installs the instance for the @state. The @state must have been earlier
+ * marked as multi-instance by cpuhp_setup_state_multi. NOP if SMP=n or
+ * HOTPLUG_CPU=n.
*/
static inline int cpuhp_state_add_instance_nocalls(enum cpuhp_state state,
struct hlist_node *node)
@@ -331,6 +400,17 @@ static inline int cpuhp_state_add_instance_nocalls(enum cpuhp_state state,
return __cpuhp_state_add_instance(state, node, false);
}
+/**
+ * cpuhp_state_add_instance_nocalls_cpuslocked - Add an instance for a state
+ * without invoking the startup
+ * callback from a cpus_read_lock()
+ * held region.
+ * @state: The state for which the instance is installed
+ * @node: The node for this individual state.
+ *
+ * Same as cpuhp_state_add_instance_nocalls() except that it must be
+ * invoked from within a cpus_read_lock() held region.
+ */
static inline int
cpuhp_state_add_instance_nocalls_cpuslocked(enum cpuhp_state state,
struct hlist_node *node)
@@ -346,7 +426,7 @@ void __cpuhp_remove_state_cpuslocked(enum cpuhp_state state, bool invoke);
* @state: The state for which the calls are removed
*
* Removes the callback functions and invokes the teardown callback on
- * the present cpus which have already reached the @state.
+ * the online cpus which have already reached the @state.
*/
static inline void cpuhp_remove_state(enum cpuhp_state state)
{
@@ -355,7 +435,7 @@ static inline void cpuhp_remove_state(enum cpuhp_state state)
/**
* cpuhp_remove_state_nocalls - Remove hotplug state callbacks without invoking
- * teardown
+ * the teardown callback
* @state: The state for which the calls are removed
*/
static inline void cpuhp_remove_state_nocalls(enum cpuhp_state state)
@@ -363,6 +443,14 @@ static inline void cpuhp_remove_state_nocalls(enum cpuhp_state state)
__cpuhp_remove_state(state, false);
}
+/**
+ * cpuhp_remove_state_nocalls_cpuslocked - Remove hotplug state callbacks without invoking
+ * teardown from a cpus_read_lock() held region.
+ * @state: The state for which the calls are removed
+ *
+ * Same as cpuhp_remove_state nocalls() except that it must be invoked
+ * from within a cpus_read_lock() held region.
+ */
static inline void cpuhp_remove_state_nocalls_cpuslocked(enum cpuhp_state state)
{
__cpuhp_remove_state_cpuslocked(state, false);
@@ -390,8 +478,8 @@ int __cpuhp_state_remove_instance(enum cpuhp_state state,
* @state: The state from which the instance is removed
* @node: The node for this individual state.
*
- * Removes the instance and invokes the teardown callback on the present cpus
- * which have already reached the @state.
+ * Removes the instance and invokes the teardown callback on the online cpus
+ * which have already reached @state.
*/
static inline int cpuhp_state_remove_instance(enum cpuhp_state state,
struct hlist_node *node)
diff --git a/include/linux/damon.h b/include/linux/damon.h
new file mode 100644
index 000000000000..d68b67b8d458
--- /dev/null
+++ b/include/linux/damon.h
@@ -0,0 +1,268 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * DAMON api
+ *
+ * Author: SeongJae Park <sjpark@amazon.de>
+ */
+
+#ifndef _DAMON_H_
+#define _DAMON_H_
+
+#include <linux/mutex.h>
+#include <linux/time64.h>
+#include <linux/types.h>
+
+/* Minimal region size. Every damon_region is aligned by this. */
+#define DAMON_MIN_REGION PAGE_SIZE
+
+/**
+ * struct damon_addr_range - Represents an address region of [@start, @end).
+ * @start: Start address of the region (inclusive).
+ * @end: End address of the region (exclusive).
+ */
+struct damon_addr_range {
+ unsigned long start;
+ unsigned long end;
+};
+
+/**
+ * struct damon_region - Represents a monitoring target region.
+ * @ar: The address range of the region.
+ * @sampling_addr: Address of the sample for the next access check.
+ * @nr_accesses: Access frequency of this region.
+ * @list: List head for siblings.
+ */
+struct damon_region {
+ struct damon_addr_range ar;
+ unsigned long sampling_addr;
+ unsigned int nr_accesses;
+ struct list_head list;
+};
+
+/**
+ * struct damon_target - Represents a monitoring target.
+ * @id: Unique identifier for this target.
+ * @nr_regions: Number of monitoring target regions of this target.
+ * @regions_list: Head of the monitoring target regions of this target.
+ * @list: List head for siblings.
+ *
+ * Each monitoring context could have multiple targets. For example, a context
+ * for virtual memory address spaces could have multiple target processes. The
+ * @id of each target should be unique among the targets of the context. For
+ * example, in the virtual address monitoring context, it could be a pidfd or
+ * an address of an mm_struct.
+ */
+struct damon_target {
+ unsigned long id;
+ unsigned int nr_regions;
+ struct list_head regions_list;
+ struct list_head list;
+};
+
+struct damon_ctx;
+
+/**
+ * struct damon_primitive Monitoring primitives for given use cases.
+ *
+ * @init: Initialize primitive-internal data structures.
+ * @update: Update primitive-internal data structures.
+ * @prepare_access_checks: Prepare next access check of target regions.
+ * @check_accesses: Check the accesses to target regions.
+ * @reset_aggregated: Reset aggregated accesses monitoring results.
+ * @target_valid: Determine if the target is valid.
+ * @cleanup: Clean up the context.
+ *
+ * DAMON can be extended for various address spaces and usages. For this,
+ * users should register the low level primitives for their target address
+ * space and usecase via the &damon_ctx.primitive. Then, the monitoring thread
+ * (&damon_ctx.kdamond) calls @init and @prepare_access_checks before starting
+ * the monitoring, @update after each &damon_ctx.primitive_update_interval, and
+ * @check_accesses, @target_valid and @prepare_access_checks after each
+ * &damon_ctx.sample_interval. Finally, @reset_aggregated is called after each
+ * &damon_ctx.aggr_interval.
+ *
+ * @init should initialize primitive-internal data structures. For example,
+ * this could be used to construct proper monitoring target regions and link
+ * those to @damon_ctx.adaptive_targets.
+ * @update should update the primitive-internal data structures. For example,
+ * this could be used to update monitoring target regions for current status.
+ * @prepare_access_checks should manipulate the monitoring regions to be
+ * prepared for the next access check.
+ * @check_accesses should check the accesses to each region that made after the
+ * last preparation and update the number of observed accesses of each region.
+ * It should also return max number of observed accesses that made as a result
+ * of its update. The value will be used for regions adjustment threshold.
+ * @reset_aggregated should reset the access monitoring results that aggregated
+ * by @check_accesses.
+ * @target_valid should check whether the target is still valid for the
+ * monitoring.
+ * @cleanup is called from @kdamond just before its termination.
+ */
+struct damon_primitive {
+ void (*init)(struct damon_ctx *context);
+ void (*update)(struct damon_ctx *context);
+ void (*prepare_access_checks)(struct damon_ctx *context);
+ unsigned int (*check_accesses)(struct damon_ctx *context);
+ void (*reset_aggregated)(struct damon_ctx *context);
+ bool (*target_valid)(void *target);
+ void (*cleanup)(struct damon_ctx *context);
+};
+
+/*
+ * struct damon_callback Monitoring events notification callbacks.
+ *
+ * @before_start: Called before starting the monitoring.
+ * @after_sampling: Called after each sampling.
+ * @after_aggregation: Called after each aggregation.
+ * @before_terminate: Called before terminating the monitoring.
+ * @private: User private data.
+ *
+ * The monitoring thread (&damon_ctx.kdamond) calls @before_start and
+ * @before_terminate just before starting and finishing the monitoring,
+ * respectively. Therefore, those are good places for installing and cleaning
+ * @private.
+ *
+ * The monitoring thread calls @after_sampling and @after_aggregation for each
+ * of the sampling intervals and aggregation intervals, respectively.
+ * Therefore, users can safely access the monitoring results without additional
+ * protection. For the reason, users are recommended to use these callback for
+ * the accesses to the results.
+ *
+ * If any callback returns non-zero, monitoring stops.
+ */
+struct damon_callback {
+ void *private;
+
+ int (*before_start)(struct damon_ctx *context);
+ int (*after_sampling)(struct damon_ctx *context);
+ int (*after_aggregation)(struct damon_ctx *context);
+ int (*before_terminate)(struct damon_ctx *context);
+};
+
+/**
+ * struct damon_ctx - Represents a context for each monitoring. This is the
+ * main interface that allows users to set the attributes and get the results
+ * of the monitoring.
+ *
+ * @sample_interval: The time between access samplings.
+ * @aggr_interval: The time between monitor results aggregations.
+ * @primitive_update_interval: The time between monitoring primitive updates.
+ *
+ * For each @sample_interval, DAMON checks whether each region is accessed or
+ * not. It aggregates and keeps the access information (number of accesses to
+ * each region) for @aggr_interval time. DAMON also checks whether the target
+ * memory regions need update (e.g., by ``mmap()`` calls from the application,
+ * in case of virtual memory monitoring) and applies the changes for each
+ * @primitive_update_interval. All time intervals are in micro-seconds.
+ * Please refer to &struct damon_primitive and &struct damon_callback for more
+ * detail.
+ *
+ * @kdamond: Kernel thread who does the monitoring.
+ * @kdamond_stop: Notifies whether kdamond should stop.
+ * @kdamond_lock: Mutex for the synchronizations with @kdamond.
+ *
+ * For each monitoring context, one kernel thread for the monitoring is
+ * created. The pointer to the thread is stored in @kdamond.
+ *
+ * Once started, the monitoring thread runs until explicitly required to be
+ * terminated or every monitoring target is invalid. The validity of the
+ * targets is checked via the &damon_primitive.target_valid of @primitive. The
+ * termination can also be explicitly requested by writing non-zero to
+ * @kdamond_stop. The thread sets @kdamond to NULL when it terminates.
+ * Therefore, users can know whether the monitoring is ongoing or terminated by
+ * reading @kdamond. Reads and writes to @kdamond and @kdamond_stop from
+ * outside of the monitoring thread must be protected by @kdamond_lock.
+ *
+ * Note that the monitoring thread protects only @kdamond and @kdamond_stop via
+ * @kdamond_lock. Accesses to other fields must be protected by themselves.
+ *
+ * @primitive: Set of monitoring primitives for given use cases.
+ * @callback: Set of callbacks for monitoring events notifications.
+ *
+ * @min_nr_regions: The minimum number of adaptive monitoring regions.
+ * @max_nr_regions: The maximum number of adaptive monitoring regions.
+ * @adaptive_targets: Head of monitoring targets (&damon_target) list.
+ */
+struct damon_ctx {
+ unsigned long sample_interval;
+ unsigned long aggr_interval;
+ unsigned long primitive_update_interval;
+
+/* private: internal use only */
+ struct timespec64 last_aggregation;
+ struct timespec64 last_primitive_update;
+
+/* public: */
+ struct task_struct *kdamond;
+ bool kdamond_stop;
+ struct mutex kdamond_lock;
+
+ struct damon_primitive primitive;
+ struct damon_callback callback;
+
+ unsigned long min_nr_regions;
+ unsigned long max_nr_regions;
+ struct list_head adaptive_targets;
+};
+
+#define damon_next_region(r) \
+ (container_of(r->list.next, struct damon_region, list))
+
+#define damon_prev_region(r) \
+ (container_of(r->list.prev, struct damon_region, list))
+
+#define damon_for_each_region(r, t) \
+ list_for_each_entry(r, &t->regions_list, list)
+
+#define damon_for_each_region_safe(r, next, t) \
+ list_for_each_entry_safe(r, next, &t->regions_list, list)
+
+#define damon_for_each_target(t, ctx) \
+ list_for_each_entry(t, &(ctx)->adaptive_targets, list)
+
+#define damon_for_each_target_safe(t, next, ctx) \
+ list_for_each_entry_safe(t, next, &(ctx)->adaptive_targets, list)
+
+#ifdef CONFIG_DAMON
+
+struct damon_region *damon_new_region(unsigned long start, unsigned long end);
+inline void damon_insert_region(struct damon_region *r,
+ struct damon_region *prev, struct damon_region *next,
+ struct damon_target *t);
+void damon_add_region(struct damon_region *r, struct damon_target *t);
+void damon_destroy_region(struct damon_region *r, struct damon_target *t);
+
+struct damon_target *damon_new_target(unsigned long id);
+void damon_add_target(struct damon_ctx *ctx, struct damon_target *t);
+void damon_free_target(struct damon_target *t);
+void damon_destroy_target(struct damon_target *t);
+unsigned int damon_nr_regions(struct damon_target *t);
+
+struct damon_ctx *damon_new_ctx(void);
+void damon_destroy_ctx(struct damon_ctx *ctx);
+int damon_set_targets(struct damon_ctx *ctx,
+ unsigned long *ids, ssize_t nr_ids);
+int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int,
+ unsigned long aggr_int, unsigned long primitive_upd_int,
+ unsigned long min_nr_reg, unsigned long max_nr_reg);
+int damon_nr_running_ctxs(void);
+
+int damon_start(struct damon_ctx **ctxs, int nr_ctxs);
+int damon_stop(struct damon_ctx **ctxs, int nr_ctxs);
+
+#endif /* CONFIG_DAMON */
+
+#ifdef CONFIG_DAMON_VADDR
+
+/* Monitoring primitives for virtual memory address spaces */
+void damon_va_init(struct damon_ctx *ctx);
+void damon_va_update(struct damon_ctx *ctx);
+void damon_va_prepare_access_checks(struct damon_ctx *ctx);
+unsigned int damon_va_check_accesses(struct damon_ctx *ctx);
+bool damon_va_target_valid(void *t);
+void damon_va_cleanup(struct damon_ctx *ctx);
+void damon_va_set_primitives(struct damon_ctx *ctx);
+
+#endif /* CONFIG_DAMON_VADDR */
+
+#endif /* _DAMON_H */
diff --git a/include/linux/dax.h b/include/linux/dax.h
index b52f084aa643..2619d94c308d 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -41,7 +41,6 @@ struct dax_operations {
extern struct attribute_group dax_attribute_group;
#if IS_ENABLED(CONFIG_DAX)
-struct dax_device *dax_get_by_host(const char *host);
struct dax_device *alloc_dax(void *private, const char *host,
const struct dax_operations *ops, unsigned long flags);
void put_dax(struct dax_device *dax_dev);
@@ -58,8 +57,6 @@ static inline void set_dax_synchronous(struct dax_device *dax_dev)
{
__set_dax_synchronous(dax_dev);
}
-bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev,
- int blocksize, sector_t start, sector_t len);
/*
* Check if given mapping is supported by the file / underlying device.
*/
@@ -73,10 +70,6 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma,
return dax_synchronous(dax_dev);
}
#else
-static inline struct dax_device *dax_get_by_host(const char *host)
-{
- return NULL;
-}
static inline struct dax_device *alloc_dax(void *private, const char *host,
const struct dax_operations *ops, unsigned long flags)
{
@@ -106,12 +99,6 @@ static inline bool dax_synchronous(struct dax_device *dax_dev)
static inline void set_dax_synchronous(struct dax_device *dax_dev)
{
}
-static inline bool dax_supported(struct dax_device *dax_dev,
- struct block_device *bdev, int blocksize, sector_t start,
- sector_t len)
-{
- return false;
-}
static inline bool daxdev_mapping_supported(struct vm_area_struct *vma,
struct dax_device *dax_dev)
{
@@ -122,22 +109,12 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma,
struct writeback_control;
int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff);
#if IS_ENABLED(CONFIG_FS_DAX)
-bool __bdev_dax_supported(struct block_device *bdev, int blocksize);
-static inline bool bdev_dax_supported(struct block_device *bdev, int blocksize)
-{
- return __bdev_dax_supported(bdev, blocksize);
-}
-
-bool __generic_fsdax_supported(struct dax_device *dax_dev,
+bool generic_fsdax_supported(struct dax_device *dax_dev,
struct block_device *bdev, int blocksize, sector_t start,
sector_t sectors);
-static inline bool generic_fsdax_supported(struct dax_device *dax_dev,
- struct block_device *bdev, int blocksize, sector_t start,
- sector_t sectors)
-{
- return __generic_fsdax_supported(dax_dev, bdev, blocksize, start,
- sectors);
-}
+
+bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev,
+ int blocksize, sector_t start, sector_t len);
static inline void fs_put_dax(struct dax_device *dax_dev)
{
@@ -153,15 +130,11 @@ struct page *dax_layout_busy_page_range(struct address_space *mapping, loff_t st
dax_entry_t dax_lock_page(struct page *page);
void dax_unlock_page(struct page *page, dax_entry_t cookie);
#else
-static inline bool bdev_dax_supported(struct block_device *bdev,
- int blocksize)
-{
- return false;
-}
+#define generic_fsdax_supported NULL
-static inline bool generic_fsdax_supported(struct dax_device *dax_dev,
+static inline bool dax_supported(struct dax_device *dax_dev,
struct block_device *bdev, int blocksize, sector_t start,
- sector_t sectors)
+ sector_t len)
{
return false;
}
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 93c3ca5fdafd..e5c2c9e71bf1 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -380,6 +380,7 @@ enum dma_slave_buswidth {
DMA_SLAVE_BUSWIDTH_16_BYTES = 16,
DMA_SLAVE_BUSWIDTH_32_BYTES = 32,
DMA_SLAVE_BUSWIDTH_64_BYTES = 64,
+ DMA_SLAVE_BUSWIDTH_128_BYTES = 128,
};
/**
@@ -398,7 +399,7 @@ enum dma_slave_buswidth {
* @src_addr_width: this is the width in bytes of the source (RX)
* register where DMA data shall be read. If the source
* is memory this may be ignored depending on architecture.
- * Legal values: 1, 2, 3, 4, 8, 16, 32, 64.
+ * Legal values: 1, 2, 3, 4, 8, 16, 32, 64, 128.
* @dst_addr_width: same as src_addr_width but for destination
* target (TX) mutatis mutandis.
* @src_maxburst: the maximum number of words (note: words, as in
diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h
index 1834752c5617..39dcadd492b5 100644
--- a/include/linux/energy_model.h
+++ b/include/linux/energy_model.h
@@ -11,7 +11,7 @@
#include <linux/types.h>
/**
- * em_perf_state - Performance state of a performance domain
+ * struct em_perf_state - Performance state of a performance domain
* @frequency: The frequency in KHz, for consistency with CPUFreq
* @power: The power consumed at this level (by 1 CPU or by a registered
* device). It can be a total power: static and dynamic.
@@ -25,7 +25,7 @@ struct em_perf_state {
};
/**
- * em_perf_domain - Performance domain
+ * struct em_perf_domain - Performance domain
* @table: List of performance states, in ascending order
* @nr_perf_states: Number of performance states
* @milliwatts: Flag indicating the power values are in milli-Watts
@@ -103,12 +103,12 @@ void em_dev_unregister_perf_domain(struct device *dev);
/**
* em_cpu_energy() - Estimates the energy consumed by the CPUs of a
- performance domain
+ * performance domain
* @pd : performance domain for which energy has to be estimated
* @max_util : highest utilization among CPUs of the domain
* @sum_util : sum of the utilization of all CPUs in the domain
* @allowed_cpu_cap : maximum allowed CPU capacity for the @pd, which
- might reflect reduced frequency (due to thermal)
+ * might reflect reduced frequency (due to thermal)
*
* This function must be used only for CPU devices. There is no validation,
* i.e. if the EM is a CPU type and has cpumask allocated. It is called from
diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
index 593322c946e6..3337745d81bd 100644
--- a/include/linux/eventpoll.h
+++ b/include/linux/eventpoll.h
@@ -68,4 +68,22 @@ static inline void eventpoll_release(struct file *file) {}
#endif
+#if defined(CONFIG_ARM) && defined(CONFIG_OABI_COMPAT)
+/* ARM OABI has an incompatible struct layout and needs a special handler */
+extern struct epoll_event __user *
+epoll_put_uevent(__poll_t revents, __u64 data,
+ struct epoll_event __user *uevent);
+#else
+static inline struct epoll_event __user *
+epoll_put_uevent(__poll_t revents, __u64 data,
+ struct epoll_event __user *uevent)
+{
+ if (__put_user(revents, &uevent->events) ||
+ __put_user(data, &uevent->data))
+ return NULL;
+
+ return uevent+1;
+}
+#endif
+
#endif /* #ifndef _LINUX_EVENTPOLL_H */
diff --git a/include/linux/file.h b/include/linux/file.h
index 2de2e4613d7b..51e830b4fe3a 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -94,6 +94,9 @@ extern void fd_install(unsigned int fd, struct file *file);
extern int __receive_fd(struct file *file, int __user *ufd,
unsigned int o_flags);
+
+extern int receive_fd(struct file *file, unsigned int o_flags);
+
static inline int receive_fd_user(struct file *file, int __user *ufd,
unsigned int o_flags)
{
@@ -101,10 +104,6 @@ static inline int receive_fd_user(struct file *file, int __user *ufd,
return -EFAULT;
return __receive_fd(file, ufd, o_flags);
}
-static inline int receive_fd(struct file *file, unsigned int o_flags)
-{
- return __receive_fd(file, NULL, o_flags);
-}
int receive_fd_replace(int new_fd, struct file *file, unsigned int o_flags);
extern void flush_delayed_fput(void);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 37ad9a730a89..e7a633353fd2 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3439,6 +3439,8 @@ extern int buffer_migrate_page_norefs(struct address_space *,
#define buffer_migrate_page_norefs NULL
#endif
+int may_setattr(struct user_namespace *mnt_userns, struct inode *inode,
+ unsigned int ia_valid);
int setattr_prepare(struct user_namespace *, struct dentry *, struct iattr *);
extern int inode_newsize_ok(const struct inode *, loff_t offset);
void setattr_copy(struct user_namespace *, struct inode *inode,
@@ -3592,7 +3594,7 @@ int proc_nr_dentry(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos);
int proc_nr_inodes(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos);
-int __init get_filesystem_list(char *buf);
+int __init list_bdev_fs_names(char *buf, size_t size);
#define __FMODE_EXEC ((__force int) FMODE_EXEC)
#define __FMODE_NONOTIFY ((__force int) FMODE_NONOTIFY)
diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h
index 7902c7d8b55f..4aa1031d3e4c 100644
--- a/include/linux/highmem-internal.h
+++ b/include/linux/highmem-internal.h
@@ -90,7 +90,11 @@ static inline void __kunmap_local(void *vaddr)
static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot)
{
- preempt_disable();
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ migrate_disable();
+ else
+ preempt_disable();
+
pagefault_disable();
return __kmap_local_page_prot(page, prot);
}
@@ -102,7 +106,11 @@ static inline void *kmap_atomic(struct page *page)
static inline void *kmap_atomic_pfn(unsigned long pfn)
{
- preempt_disable();
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ migrate_disable();
+ else
+ preempt_disable();
+
pagefault_disable();
return __kmap_local_pfn_prot(pfn, kmap_prot);
}
@@ -111,7 +119,10 @@ static inline void __kunmap_atomic(void *addr)
{
kunmap_local_indexed(addr);
pagefault_enable();
- preempt_enable();
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ migrate_enable();
+ else
+ preempt_enable();
}
unsigned int __nr_free_highpages(void);
@@ -179,7 +190,10 @@ static inline void __kunmap_local(void *addr)
static inline void *kmap_atomic(struct page *page)
{
- preempt_disable();
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ migrate_disable();
+ else
+ preempt_disable();
pagefault_disable();
return page_address(page);
}
@@ -200,7 +214,10 @@ static inline void __kunmap_atomic(void *addr)
kunmap_flush_on_unmap(addr);
#endif
pagefault_enable();
- preempt_enable();
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ migrate_enable();
+ else
+ preempt_enable();
}
static inline unsigned int nr_free_highpages(void) { return 0; }
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index f7ca1a3870ea..1faebe1cd0ed 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -858,6 +858,11 @@ static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
void hugetlb_report_usage(struct seq_file *m, struct mm_struct *mm);
+static inline void hugetlb_count_init(struct mm_struct *mm)
+{
+ atomic_long_set(&mm->hugetlb_usage, 0);
+}
+
static inline void hugetlb_count_add(long l, struct mm_struct *mm)
{
atomic_long_add(l, &mm->hugetlb_usage);
@@ -1042,6 +1047,10 @@ static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
return &mm->page_table_lock;
}
+static inline void hugetlb_count_init(struct mm_struct *mm)
+{
+}
+
static inline void hugetlb_report_usage(struct seq_file *f, struct mm_struct *m)
{
}
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index b066024c62e3..34de69b3b8ba 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -118,6 +118,7 @@ int memblock_mark_nomap(phys_addr_t base, phys_addr_t size);
int memblock_clear_nomap(phys_addr_t base, phys_addr_t size);
void memblock_free_all(void);
+void memblock_free_ptr(void *ptr, size_t size);
void reset_node_managed_pages(pg_data_t *pgdat);
void reset_all_zones_managed_pages(void);
diff --git a/include/linux/memory.h b/include/linux/memory.h
index d9a0b61cd432..7efc0a7c14c9 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -23,6 +23,48 @@
#define MIN_MEMORY_BLOCK_SIZE (1UL << SECTION_SIZE_BITS)
+/**
+ * struct memory_group - a logical group of memory blocks
+ * @nid: The node id for all memory blocks inside the memory group.
+ * @blocks: List of all memory blocks belonging to this memory group.
+ * @present_kernel_pages: Present (online) memory outside ZONE_MOVABLE of this
+ * memory group.
+ * @present_movable_pages: Present (online) memory in ZONE_MOVABLE of this
+ * memory group.
+ * @is_dynamic: The memory group type: static vs. dynamic
+ * @s.max_pages: Valid with &memory_group.is_dynamic == false. The maximum
+ * number of pages we'll have in this static memory group.
+ * @d.unit_pages: Valid with &memory_group.is_dynamic == true. Unit in pages
+ * in which memory is added/removed in this dynamic memory group.
+ * This granularity defines the alignment of a unit in physical
+ * address space; it has to be at least as big as a single
+ * memory block.
+ *
+ * A memory group logically groups memory blocks; each memory block
+ * belongs to at most one memory group. A memory group corresponds to
+ * a memory device, such as a DIMM or a NUMA node, which spans multiple
+ * memory blocks and might even span multiple non-contiguous physical memory
+ * ranges.
+ *
+ * Modification of members after registration is serialized by memory
+ * hot(un)plug code.
+ */
+struct memory_group {
+ int nid;
+ struct list_head memory_blocks;
+ unsigned long present_kernel_pages;
+ unsigned long present_movable_pages;
+ bool is_dynamic;
+ union {
+ struct {
+ unsigned long max_pages;
+ } s;
+ struct {
+ unsigned long unit_pages;
+ } d;
+ };
+};
+
struct memory_block {
unsigned long start_section_nr;
unsigned long state; /* serialized by the dev->lock */
@@ -34,6 +76,8 @@ struct memory_block {
* lay at the beginning of the memory block.
*/
unsigned long nr_vmemmap_pages;
+ struct memory_group *group; /* group (if any) for this block */
+ struct list_head group_next; /* next block inside memory group */
};
int arch_get_memory_phys_device(unsigned long start_pfn);
@@ -86,7 +130,8 @@ static inline int memory_notify(unsigned long val, void *v)
extern int register_memory_notifier(struct notifier_block *nb);
extern void unregister_memory_notifier(struct notifier_block *nb);
int create_memory_block_devices(unsigned long start, unsigned long size,
- unsigned long vmemmap_pages);
+ unsigned long vmemmap_pages,
+ struct memory_group *group);
void remove_memory_block_devices(unsigned long start, unsigned long size);
extern void memory_dev_init(void);
extern int memory_notify(unsigned long val, void *v);
@@ -96,6 +141,14 @@ extern int walk_memory_blocks(unsigned long start, unsigned long size,
void *arg, walk_memory_blocks_func_t func);
extern int for_each_memory_block(void *arg, walk_memory_blocks_func_t func);
#define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<<PAGE_SHIFT)
+
+extern int memory_group_register_static(int nid, unsigned long max_pages);
+extern int memory_group_register_dynamic(int nid, unsigned long unit_pages);
+extern int memory_group_unregister(int mgid);
+struct memory_group *memory_group_find_by_id(int mgid);
+typedef int (*walk_memory_groups_func_t)(struct memory_group *, void *);
+int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func,
+ struct memory_group *excluded, void *arg);
#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
#ifdef CONFIG_MEMORY_HOTPLUG
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index a7fd2c3ccb77..e5a867c950b2 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -12,6 +12,7 @@ struct zone;
struct pglist_data;
struct mem_section;
struct memory_block;
+struct memory_group;
struct resource;
struct vmem_altmap;
@@ -50,6 +51,11 @@ typedef int __bitwise mhp_t;
* Only selected architectures support it with SPARSE_VMEMMAP.
*/
#define MHP_MEMMAP_ON_MEMORY ((__force mhp_t)BIT(1))
+/*
+ * The nid field specifies a memory group id (mgid) instead. The memory group
+ * implies the node id (nid).
+ */
+#define MHP_NID_IS_MGID ((__force mhp_t)BIT(2))
/*
* Extended parameters for memory hotplug:
@@ -95,13 +101,15 @@ static inline void zone_seqlock_init(struct zone *zone)
extern int zone_grow_free_lists(struct zone *zone, unsigned long new_nr_pages);
extern int zone_grow_waitqueues(struct zone *zone, unsigned long nr_pages);
extern int add_one_highpage(struct page *page, int pfn, int bad_ppro);
-extern void adjust_present_page_count(struct zone *zone, long nr_pages);
+extern void adjust_present_page_count(struct page *page,
+ struct memory_group *group,
+ long nr_pages);
/* VM interface that may be used by firmware interface */
extern int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages,
struct zone *zone);
extern void mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages);
extern int online_pages(unsigned long pfn, unsigned long nr_pages,
- struct zone *zone);
+ struct zone *zone, struct memory_group *group);
extern struct zone *test_pages_in_a_zone(unsigned long start_pfn,
unsigned long end_pfn);
extern void __offline_isolated_pages(unsigned long start_pfn,
@@ -130,8 +138,7 @@ static inline bool movable_node_is_enabled(void)
return movable_node_enabled;
}
-extern void arch_remove_memory(int nid, u64 start, u64 size,
- struct vmem_altmap *altmap);
+extern void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap);
extern void __remove_pages(unsigned long start_pfn, unsigned long nr_pages,
struct vmem_altmap *altmap);
@@ -292,25 +299,27 @@ static inline void pgdat_resize_init(struct pglist_data *pgdat) {}
#ifdef CONFIG_MEMORY_HOTREMOVE
extern void try_offline_node(int nid);
-extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
-extern int remove_memory(int nid, u64 start, u64 size);
-extern void __remove_memory(int nid, u64 start, u64 size);
-extern int offline_and_remove_memory(int nid, u64 start, u64 size);
+extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages,
+ struct memory_group *group);
+extern int remove_memory(u64 start, u64 size);
+extern void __remove_memory(u64 start, u64 size);
+extern int offline_and_remove_memory(u64 start, u64 size);
#else
static inline void try_offline_node(int nid) {}
-static inline int offline_pages(unsigned long start_pfn, unsigned long nr_pages)
+static inline int offline_pages(unsigned long start_pfn, unsigned long nr_pages,
+ struct memory_group *group)
{
return -EINVAL;
}
-static inline int remove_memory(int nid, u64 start, u64 size)
+static inline int remove_memory(u64 start, u64 size)
{
return -EBUSY;
}
-static inline void __remove_memory(int nid, u64 start, u64 size) {}
+static inline void __remove_memory(u64 start, u64 size) {}
#endif /* CONFIG_MEMORY_HOTREMOVE */
extern void set_zone_contiguous(struct zone *zone);
@@ -339,7 +348,8 @@ extern void sparse_remove_section(struct mem_section *ms,
unsigned long map_offset, struct vmem_altmap *altmap);
extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map,
unsigned long pnum);
-extern struct zone *zone_for_pfn_range(int online_type, int nid, unsigned start_pfn,
+extern struct zone *zone_for_pfn_range(int online_type, int nid,
+ struct memory_group *group, unsigned long start_pfn,
unsigned long nr_pages);
extern int arch_create_linear_mapping(int nid, u64 start, u64 size,
struct mhp_params *params);
diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
index 0540f0156f58..96e113e23d04 100644
--- a/include/linux/mmap_lock.h
+++ b/include/linux/mmap_lock.h
@@ -101,14 +101,14 @@ static inline bool mmap_write_trylock(struct mm_struct *mm)
static inline void mmap_write_unlock(struct mm_struct *mm)
{
- up_write(&mm->mmap_lock);
__mmap_lock_trace_released(mm, true);
+ up_write(&mm->mmap_lock);
}
static inline void mmap_write_downgrade(struct mm_struct *mm)
{
- downgrade_write(&mm->mmap_lock);
__mmap_lock_trace_acquire_returned(mm, false, true);
+ downgrade_write(&mm->mmap_lock);
}
static inline void mmap_read_lock(struct mm_struct *mm)
@@ -140,23 +140,14 @@ static inline bool mmap_read_trylock(struct mm_struct *mm)
static inline void mmap_read_unlock(struct mm_struct *mm)
{
- up_read(&mm->mmap_lock);
__mmap_lock_trace_released(mm, false);
-}
-
-static inline bool mmap_read_trylock_non_owner(struct mm_struct *mm)
-{
- if (mmap_read_trylock(mm)) {
- rwsem_release(&mm->mmap_lock.dep_map, _RET_IP_);
- return true;
- }
- return false;
+ up_read(&mm->mmap_lock);
}
static inline void mmap_read_unlock_non_owner(struct mm_struct *mm)
{
- up_read_non_owner(&mm->mmap_lock);
__mmap_lock_trace_released(mm, false);
+ up_read_non_owner(&mm->mmap_lock);
}
static inline void mmap_assert_locked(struct mm_struct *mm)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 1bd5f5955f9a..6a1d79d84675 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -540,6 +540,10 @@ struct zone {
* is calculated as:
* present_pages = spanned_pages - absent_pages(pages in holes);
*
+ * present_early_pages is present pages existing within the zone
+ * located on memory available since early boot, excluding hotplugged
+ * memory.
+ *
* managed_pages is present pages managed by the buddy system, which
* is calculated as (reserved_pages includes pages allocated by the
* bootmem allocator):
@@ -572,6 +576,9 @@ struct zone {
atomic_long_t managed_pages;
unsigned long spanned_pages;
unsigned long present_pages;
+#if defined(CONFIG_MEMORY_HOTPLUG)
+ unsigned long present_early_pages;
+#endif
#ifdef CONFIG_CMA
unsigned long cma_pages;
#endif
@@ -1525,18 +1532,6 @@ void sparse_init(void);
#define subsection_map_init(_pfn, _nr_pages) do {} while (0)
#endif /* CONFIG_SPARSEMEM */
-/*
- * If it is possible to have holes within a MAX_ORDER_NR_PAGES, then we
- * need to check pfn validity within that MAX_ORDER_NR_PAGES block.
- * pfn_valid_within() should be used in this case; we optimise this away
- * when we have no holes within a MAX_ORDER_NR_PAGES block.
- */
-#ifdef CONFIG_HOLES_IN_ZONE
-#define pfn_valid_within(pfn) pfn_valid(pfn)
-#else
-#define pfn_valid_within(pfn) (1)
-#endif
-
#endif /* !__GENERATING_BOUNDS.H */
#endif /* !__ASSEMBLY__ */
#endif /* _LINUX_MMZONE_H */
diff --git a/include/linux/once.h b/include/linux/once.h
index ae6f4eb41cbe..d361fb14ac3a 100644
--- a/include/linux/once.h
+++ b/include/linux/once.h
@@ -16,7 +16,7 @@ void __do_once_done(bool *done, struct static_key_true *once_key,
* out the condition into a nop. DO_ONCE() guarantees type safety of
* arguments!
*
- * Not that the following is not equivalent ...
+ * Note that the following is not equivalent ...
*
* DO_ONCE(func, arg);
* DO_ONCE(func, arg);
diff --git a/include/linux/overflow.h b/include/linux/overflow.h
index 0f12345c21fb..4669632bd72b 100644
--- a/include/linux/overflow.h
+++ b/include/linux/overflow.h
@@ -6,12 +6,9 @@
#include <linux/limits.h>
/*
- * In the fallback code below, we need to compute the minimum and
- * maximum values representable in a given type. These macros may also
- * be useful elsewhere, so we provide them outside the
- * COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW block.
- *
- * It would seem more obvious to do something like
+ * We need to compute the minimum and maximum values representable in a given
+ * type. These macros may also be useful elsewhere. It would seem more obvious
+ * to do something like:
*
* #define type_min(T) (T)(is_signed_type(T) ? (T)1 << (8*sizeof(T)-1) : 0)
* #define type_max(T) (T)(is_signed_type(T) ? ((T)1 << (8*sizeof(T)-1)) - 1 : ~(T)0)
@@ -54,7 +51,6 @@ static inline bool __must_check __must_check_overflow(bool overflow)
return unlikely(overflow);
}
-#ifdef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW
/*
* For simplicity and code hygiene, the fallback code below insists on
* a, b and *d having the same type (similar to the min() and max()
@@ -90,134 +86,6 @@ static inline bool __must_check __must_check_overflow(bool overflow)
__builtin_mul_overflow(__a, __b, __d); \
}))
-#else
-
-
-/* Checking for unsigned overflow is relatively easy without causing UB. */
-#define __unsigned_add_overflow(a, b, d) ({ \
- typeof(a) __a = (a); \
- typeof(b) __b = (b); \
- typeof(d) __d = (d); \
- (void) (&__a == &__b); \
- (void) (&__a == __d); \
- *__d = __a + __b; \
- *__d < __a; \
-})
-#define __unsigned_sub_overflow(a, b, d) ({ \
- typeof(a) __a = (a); \
- typeof(b) __b = (b); \
- typeof(d) __d = (d); \
- (void) (&__a == &__b); \
- (void) (&__a == __d); \
- *__d = __a - __b; \
- __a < __b; \
-})
-/*
- * If one of a or b is a compile-time constant, this avoids a division.
- */
-#define __unsigned_mul_overflow(a, b, d) ({ \
- typeof(a) __a = (a); \
- typeof(b) __b = (b); \
- typeof(d) __d = (d); \
- (void) (&__a == &__b); \
- (void) (&__a == __d); \
- *__d = __a * __b; \
- __builtin_constant_p(__b) ? \
- __b > 0 && __a > type_max(typeof(__a)) / __b : \
- __a > 0 && __b > type_max(typeof(__b)) / __a; \
-})
-
-/*
- * For signed types, detecting overflow is much harder, especially if
- * we want to avoid UB. But the interface of these macros is such that
- * we must provide a result in *d, and in fact we must produce the
- * result promised by gcc's builtins, which is simply the possibly
- * wrapped-around value. Fortunately, we can just formally do the
- * operations in the widest relevant unsigned type (u64) and then
- * truncate the result - gcc is smart enough to generate the same code
- * with and without the (u64) casts.
- */
-
-/*
- * Adding two signed integers can overflow only if they have the same
- * sign, and overflow has happened iff the result has the opposite
- * sign.
- */
-#define __signed_add_overflow(a, b, d) ({ \
- typeof(a) __a = (a); \
- typeof(b) __b = (b); \
- typeof(d) __d = (d); \
- (void) (&__a == &__b); \
- (void) (&__a == __d); \
- *__d = (u64)__a + (u64)__b; \
- (((~(__a ^ __b)) & (*__d ^ __a)) \
- & type_min(typeof(__a))) != 0; \
-})
-
-/*
- * Subtraction is similar, except that overflow can now happen only
- * when the signs are opposite. In this case, overflow has happened if
- * the result has the opposite sign of a.
- */
-#define __signed_sub_overflow(a, b, d) ({ \
- typeof(a) __a = (a); \
- typeof(b) __b = (b); \
- typeof(d) __d = (d); \
- (void) (&__a == &__b); \
- (void) (&__a == __d); \
- *__d = (u64)__a - (u64)__b; \
- ((((__a ^ __b)) & (*__d ^ __a)) \
- & type_min(typeof(__a))) != 0; \
-})
-
-/*
- * Signed multiplication is rather hard. gcc always follows C99, so
- * division is truncated towards 0. This means that we can write the
- * overflow check like this:
- *
- * (a > 0 && (b > MAX/a || b < MIN/a)) ||
- * (a < -1 && (b > MIN/a || b < MAX/a) ||
- * (a == -1 && b == MIN)
- *
- * The redundant casts of -1 are to silence an annoying -Wtype-limits
- * (included in -Wextra) warning: When the type is u8 or u16, the
- * __b_c_e in check_mul_overflow obviously selects
- * __unsigned_mul_overflow, but unfortunately gcc still parses this
- * code and warns about the limited range of __b.
- */
-
-#define __signed_mul_overflow(a, b, d) ({ \
- typeof(a) __a = (a); \
- typeof(b) __b = (b); \
- typeof(d) __d = (d); \
- typeof(a) __tmax = type_max(typeof(a)); \
- typeof(a) __tmin = type_min(typeof(a)); \
- (void) (&__a == &__b); \
- (void) (&__a == __d); \
- *__d = (u64)__a * (u64)__b; \
- (__b > 0 && (__a > __tmax/__b || __a < __tmin/__b)) || \
- (__b < (typeof(__b))-1 && (__a > __tmin/__b || __a < __tmax/__b)) || \
- (__b == (typeof(__b))-1 && __a == __tmin); \
-})
-
-
-#define check_add_overflow(a, b, d) __must_check_overflow( \
- __builtin_choose_expr(is_signed_type(typeof(a)), \
- __signed_add_overflow(a, b, d), \
- __unsigned_add_overflow(a, b, d)))
-
-#define check_sub_overflow(a, b, d) __must_check_overflow( \
- __builtin_choose_expr(is_signed_type(typeof(a)), \
- __signed_sub_overflow(a, b, d), \
- __unsigned_sub_overflow(a, b, d)))
-
-#define check_mul_overflow(a, b, d) __must_check_overflow( \
- __builtin_choose_expr(is_signed_type(typeof(a)), \
- __signed_mul_overflow(a, b, d), \
- __unsigned_mul_overflow(a, b, d)))
-
-#endif /* COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW */
-
/** check_shl_overflow() - Calculate a left-shifted value and check overflow
*
* @a: Value to be shifted
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 1ace27c4a8e0..a558d67ee86f 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -131,7 +131,7 @@ enum pageflags {
#ifdef CONFIG_MEMORY_FAILURE
PG_hwpoison, /* hardware poisoned page. Don't touch */
#endif
-#if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT)
+#if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT)
PG_young,
PG_idle,
#endif
@@ -178,6 +178,8 @@ enum pageflags {
PG_reported = PG_uptodate,
};
+#define PAGEFLAGS_MASK ((1UL << NR_PAGEFLAGS) - 1)
+
#ifndef __GENERATING_BOUNDS_H
static inline unsigned long _compound_head(const struct page *page)
@@ -439,7 +441,7 @@ PAGEFLAG_FALSE(HWPoison)
#define __PG_HWPOISON 0
#endif
-#if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT)
+#if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT)
TESTPAGEFLAG(Young, young, PF_ANY)
SETPAGEFLAG(Young, young, PF_ANY)
TESTCLEARFLAG(Young, young, PF_ANY)
@@ -778,6 +780,15 @@ static inline int PageSlabPfmemalloc(struct page *page)
return PageActive(page);
}
+/*
+ * A version of PageSlabPfmemalloc() for opportunistic checks where the page
+ * might have been freed under us and not be a PageSlab anymore.
+ */
+static inline int __PageSlabPfmemalloc(struct page *page)
+{
+ return PageActive(page);
+}
+
static inline void SetPageSlabPfmemalloc(struct page *page)
{
VM_BUG_ON_PAGE(!PageSlab(page), page);
@@ -822,7 +833,7 @@ static inline void ClearPageSlabPfmemalloc(struct page *page)
* alloc-free cycle to prevent from reusing the page.
*/
#define PAGE_FLAGS_CHECK_AT_PREP \
- (((1UL << NR_PAGEFLAGS) - 1) & ~__PG_HWPOISON)
+ (PAGEFLAGS_MASK & ~__PG_HWPOISON)
#define PAGE_FLAGS_PRIVATE \
(1UL << PG_private | 1UL << PG_private_2)
diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h
index aff81ba31bd8..fabb2e1e087f 100644
--- a/include/linux/page_ext.h
+++ b/include/linux/page_ext.h
@@ -19,7 +19,7 @@ struct page_ext_operations {
enum page_ext_flags {
PAGE_EXT_OWNER,
PAGE_EXT_OWNER_ALLOCATED,
-#if defined(CONFIG_IDLE_PAGE_TRACKING) && !defined(CONFIG_64BIT)
+#if defined(CONFIG_PAGE_IDLE_FLAG) && !defined(CONFIG_64BIT)
PAGE_EXT_YOUNG,
PAGE_EXT_IDLE,
#endif
diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h
index 1e894d34bdce..d8a6aecf99cb 100644
--- a/include/linux/page_idle.h
+++ b/include/linux/page_idle.h
@@ -6,7 +6,7 @@
#include <linux/page-flags.h>
#include <linux/page_ext.h>
-#ifdef CONFIG_IDLE_PAGE_TRACKING
+#ifdef CONFIG_PAGE_IDLE_FLAG
#ifdef CONFIG_64BIT
static inline bool page_is_young(struct page *page)
@@ -106,7 +106,7 @@ static inline void clear_page_idle(struct page *page)
}
#endif /* CONFIG_64BIT */
-#else /* !CONFIG_IDLE_PAGE_TRACKING */
+#else /* !CONFIG_PAGE_IDLE_FLAG */
static inline bool page_is_young(struct page *page)
{
@@ -135,6 +135,6 @@ static inline void clear_page_idle(struct page *page)
{
}
-#endif /* CONFIG_IDLE_PAGE_TRACKING */
+#endif /* CONFIG_PAGE_IDLE_FLAG */
#endif /* _LINUX_MM_PAGE_IDLE_H */
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 5dcf446f42e5..62db6b0176b9 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -521,18 +521,17 @@ static inline struct page *read_mapping_page(struct address_space *mapping,
*/
static inline pgoff_t page_to_index(struct page *page)
{
- pgoff_t pgoff;
+ struct page *head;
if (likely(!PageTransTail(page)))
return page->index;
+ head = compound_head(page);
/*
* We don't initialize ->index for tail pages: calculate based on
* head page
*/
- pgoff = compound_head(page)->index;
- pgoff += page - compound_head(page);
- return pgoff;
+ return head->index + page - head;
}
extern pgoff_t hugetlb_basepage_index(struct page *page);
diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h
index 5ba475ca9078..f16de399d2de 100644
--- a/include/linux/pci-acpi.h
+++ b/include/linux/pci-acpi.h
@@ -122,6 +122,9 @@ static inline void pci_acpi_add_edr_notifier(struct pci_dev *pdev) { }
static inline void pci_acpi_remove_edr_notifier(struct pci_dev *pdev) { }
#endif /* CONFIG_PCIE_EDR */
+int pci_acpi_set_companion_lookup_hook(struct acpi_device *(*func)(struct pci_dev *));
+void pci_acpi_clear_companion_lookup_hook(void);
+
#else /* CONFIG_ACPI */
static inline void acpi_pci_add_bus(struct pci_bus *bus) { }
static inline void acpi_pci_remove_bus(struct pci_bus *bus) { }
diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h
index 50a649d33e68..a48778e1a4ee 100644
--- a/include/linux/pci-epc.h
+++ b/include/linux/pci-epc.h
@@ -62,31 +62,32 @@ pci_epc_interface_string(enum pci_epc_interface_type type)
* @owner: the module owner containing the ops
*/
struct pci_epc_ops {
- int (*write_header)(struct pci_epc *epc, u8 func_no,
+ int (*write_header)(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
struct pci_epf_header *hdr);
- int (*set_bar)(struct pci_epc *epc, u8 func_no,
+ int (*set_bar)(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
struct pci_epf_bar *epf_bar);
- void (*clear_bar)(struct pci_epc *epc, u8 func_no,
+ void (*clear_bar)(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
struct pci_epf_bar *epf_bar);
- int (*map_addr)(struct pci_epc *epc, u8 func_no,
+ int (*map_addr)(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
phys_addr_t addr, u64 pci_addr, size_t size);
- void (*unmap_addr)(struct pci_epc *epc, u8 func_no,
+ void (*unmap_addr)(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
phys_addr_t addr);
- int (*set_msi)(struct pci_epc *epc, u8 func_no, u8 interrupts);
- int (*get_msi)(struct pci_epc *epc, u8 func_no);
- int (*set_msix)(struct pci_epc *epc, u8 func_no, u16 interrupts,
- enum pci_barno, u32 offset);
- int (*get_msix)(struct pci_epc *epc, u8 func_no);
- int (*raise_irq)(struct pci_epc *epc, u8 func_no,
+ int (*set_msi)(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
+ u8 interrupts);
+ int (*get_msi)(struct pci_epc *epc, u8 func_no, u8 vfunc_no);
+ int (*set_msix)(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
+ u16 interrupts, enum pci_barno, u32 offset);
+ int (*get_msix)(struct pci_epc *epc, u8 func_no, u8 vfunc_no);
+ int (*raise_irq)(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
enum pci_epc_irq_type type, u16 interrupt_num);
- int (*map_msi_irq)(struct pci_epc *epc, u8 func_no,
+ int (*map_msi_irq)(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
phys_addr_t phys_addr, u8 interrupt_num,
u32 entry_size, u32 *msi_data,
u32 *msi_addr_offset);
int (*start)(struct pci_epc *epc);
void (*stop)(struct pci_epc *epc);
const struct pci_epc_features* (*get_features)(struct pci_epc *epc,
- u8 func_no);
+ u8 func_no, u8 vfunc_no);
struct module *owner;
};
@@ -128,6 +129,8 @@ struct pci_epc_mem {
* single window.
* @num_windows: number of windows supported by device
* @max_functions: max number of functions that can be configured in this EPC
+ * @max_vfs: Array indicating the maximum number of virtual functions that can
+ * be associated with each physical function
* @group: configfs group representing the PCI EPC device
* @lock: mutex to protect pci_epc ops
* @function_num_map: bitmap to manage physical function number
@@ -141,6 +144,7 @@ struct pci_epc {
struct pci_epc_mem *mem;
unsigned int num_windows;
u8 max_functions;
+ u8 *max_vfs;
struct config_group *group;
/* mutex to protect against concurrent access of EP controller */
struct mutex lock;
@@ -208,31 +212,32 @@ void pci_epc_linkup(struct pci_epc *epc);
void pci_epc_init_notify(struct pci_epc *epc);
void pci_epc_remove_epf(struct pci_epc *epc, struct pci_epf *epf,
enum pci_epc_interface_type type);
-int pci_epc_write_header(struct pci_epc *epc, u8 func_no,
+int pci_epc_write_header(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
struct pci_epf_header *hdr);
-int pci_epc_set_bar(struct pci_epc *epc, u8 func_no,
+int pci_epc_set_bar(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
struct pci_epf_bar *epf_bar);
-void pci_epc_clear_bar(struct pci_epc *epc, u8 func_no,
+void pci_epc_clear_bar(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
struct pci_epf_bar *epf_bar);
-int pci_epc_map_addr(struct pci_epc *epc, u8 func_no,
+int pci_epc_map_addr(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
phys_addr_t phys_addr,
u64 pci_addr, size_t size);
-void pci_epc_unmap_addr(struct pci_epc *epc, u8 func_no,
+void pci_epc_unmap_addr(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
phys_addr_t phys_addr);
-int pci_epc_set_msi(struct pci_epc *epc, u8 func_no, u8 interrupts);
-int pci_epc_get_msi(struct pci_epc *epc, u8 func_no);
-int pci_epc_set_msix(struct pci_epc *epc, u8 func_no, u16 interrupts,
- enum pci_barno, u32 offset);
-int pci_epc_get_msix(struct pci_epc *epc, u8 func_no);
-int pci_epc_map_msi_irq(struct pci_epc *epc, u8 func_no,
+int pci_epc_set_msi(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
+ u8 interrupts);
+int pci_epc_get_msi(struct pci_epc *epc, u8 func_no, u8 vfunc_no);
+int pci_epc_set_msix(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
+ u16 interrupts, enum pci_barno, u32 offset);
+int pci_epc_get_msix(struct pci_epc *epc, u8 func_no, u8 vfunc_no);
+int pci_epc_map_msi_irq(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
phys_addr_t phys_addr, u8 interrupt_num,
u32 entry_size, u32 *msi_data, u32 *msi_addr_offset);
-int pci_epc_raise_irq(struct pci_epc *epc, u8 func_no,
+int pci_epc_raise_irq(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
enum pci_epc_irq_type type, u16 interrupt_num);
int pci_epc_start(struct pci_epc *epc);
void pci_epc_stop(struct pci_epc *epc);
const struct pci_epc_features *pci_epc_get_features(struct pci_epc *epc,
- u8 func_no);
+ u8 func_no, u8 vfunc_no);
enum pci_barno
pci_epc_get_first_free_bar(const struct pci_epc_features *epc_features);
enum pci_barno pci_epc_get_next_free_bar(const struct pci_epc_features
diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h
index 8292420426f3..009a07147c61 100644
--- a/include/linux/pci-epf.h
+++ b/include/linux/pci-epf.h
@@ -121,8 +121,10 @@ struct pci_epf_bar {
* @bar: represents the BAR of EPF device
* @msi_interrupts: number of MSI interrupts required by this function
* @msix_interrupts: number of MSI-X interrupts required by this function
- * @func_no: unique function number within this endpoint device
+ * @func_no: unique (physical) function number within this endpoint device
+ * @vfunc_no: unique virtual function number within a physical function
* @epc: the EPC device to which this EPF device is bound
+ * @epf_pf: the physical EPF device to which this virtual EPF device is bound
* @driver: the EPF driver to which this EPF device is bound
* @list: to add pci_epf as a list of PCI endpoint functions to pci_epc
* @nb: notifier block to notify EPF of any EPC events (like linkup)
@@ -133,6 +135,10 @@ struct pci_epf_bar {
* @sec_epc_bar: represents the BAR of EPF device associated with secondary EPC
* @sec_epc_func_no: unique (physical) function number within the secondary EPC
* @group: configfs group associated with the EPF device
+ * @is_bound: indicates if bind notification to function driver has been invoked
+ * @is_vf: true - virtual function, false - physical function
+ * @vfunction_num_map: bitmap to manage virtual function number
+ * @pci_vepf: list of virtual endpoint functions associated with this function
*/
struct pci_epf {
struct device dev;
@@ -142,8 +148,10 @@ struct pci_epf {
u8 msi_interrupts;
u16 msix_interrupts;
u8 func_no;
+ u8 vfunc_no;
struct pci_epc *epc;
+ struct pci_epf *epf_pf;
struct pci_epf_driver *driver;
struct list_head list;
struct notifier_block nb;
@@ -156,6 +164,10 @@ struct pci_epf {
struct pci_epf_bar sec_epc_bar[6];
u8 sec_epc_func_no;
struct config_group *group;
+ unsigned int is_bound;
+ unsigned int is_vf;
+ unsigned long vfunction_num_map;
+ struct list_head pci_vepf;
};
/**
@@ -199,4 +211,6 @@ int pci_epf_bind(struct pci_epf *epf);
void pci_epf_unbind(struct pci_epf *epf);
struct config_group *pci_epf_type_add_cfs(struct pci_epf *epf,
struct config_group *group);
+int pci_epf_add_vepf(struct pci_epf *epf_pf, struct pci_epf *epf_vf);
+void pci_epf_remove_vepf(struct pci_epf *epf_pf, struct pci_epf *epf_vf);
#endif /* __LINUX_PCI_EPF_H */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 34d7d94ddf6d..cd8aa6fce204 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -49,6 +49,12 @@
PCI_STATUS_SIG_TARGET_ABORT | \
PCI_STATUS_PARITY)
+/* Number of reset methods used in pci_reset_fn_methods array in pci.c */
+#define PCI_NUM_RESET_METHODS 7
+
+#define PCI_RESET_PROBE true
+#define PCI_RESET_DO_RESET false
+
/*
* The PCI interface treats multi-function devices as independent
* devices. The slot/function address of each device is encoded
@@ -288,21 +294,14 @@ enum pci_bus_speed {
enum pci_bus_speed pcie_get_speed_cap(struct pci_dev *dev);
enum pcie_link_width pcie_get_width_cap(struct pci_dev *dev);
-struct pci_cap_saved_data {
- u16 cap_nr;
- bool cap_extended;
- unsigned int size;
- u32 data[];
-};
-
-struct pci_cap_saved_state {
- struct hlist_node next;
- struct pci_cap_saved_data cap;
+struct pci_vpd {
+ struct mutex lock;
+ unsigned int len;
+ u8 cap;
};
struct irq_affinity;
struct pcie_link_state;
-struct pci_vpd;
struct pci_sriov;
struct pci_p2pdma;
struct rcec_ea;
@@ -333,6 +332,7 @@ struct pci_dev {
struct rcec_ea *rcec_ea; /* RCEC cached endpoint association */
struct pci_dev *rcec; /* Associated RCEC device */
#endif
+ u32 devcap; /* PCIe Device Capabilities */
u8 pcie_cap; /* PCIe capability offset */
u8 msi_cap; /* MSI capability offset */
u8 msix_cap; /* MSI-X capability offset */
@@ -388,6 +388,7 @@ struct pci_dev {
supported from root to here */
u16 l1ss; /* L1SS Capability pointer */
#endif
+ unsigned int pasid_no_tlp:1; /* PASID works without TLP Prefix */
unsigned int eetlp_prefix_path:1; /* End-to-End TLP Prefix */
pci_channel_state_t error_state; /* Current connectivity state */
@@ -427,7 +428,6 @@ struct pci_dev {
unsigned int state_saved:1;
unsigned int is_physfn:1;
unsigned int is_virtfn:1;
- unsigned int reset_fn:1;
unsigned int is_hotplug_bridge:1;
unsigned int shpc_managed:1; /* SHPC owned by shpchp */
unsigned int is_thunderbolt:1; /* Thunderbolt controller */
@@ -473,7 +473,7 @@ struct pci_dev {
#ifdef CONFIG_PCI_MSI
const struct attribute_group **msi_irq_groups;
#endif
- struct pci_vpd *vpd;
+ struct pci_vpd vpd;
#ifdef CONFIG_PCIE_DPC
u16 dpc_cap;
unsigned int dpc_rp_extensions:1;
@@ -505,6 +505,9 @@ struct pci_dev {
char *driver_override; /* Driver name to force a match */
unsigned long priv_flags; /* Private flags for the PCI driver */
+
+ /* These methods index pci_reset_fn_methods[] */
+ u8 reset_methods[PCI_NUM_RESET_METHODS]; /* In priority order */
};
static inline struct pci_dev *pci_physfn(struct pci_dev *dev)
@@ -526,6 +529,16 @@ static inline int pci_channel_offline(struct pci_dev *pdev)
return (pdev->error_state != pci_channel_io_normal);
}
+/*
+ * Currently in ACPI spec, for each PCI host bridge, PCI Segment
+ * Group number is limited to a 16-bit value, therefore (int)-1 is
+ * not a valid PCI domain number, and can be used as a sentinel
+ * value indicating ->domain_nr is not set by the driver (and
+ * CONFIG_PCI_DOMAINS_GENERIC=y archs will set it with
+ * pci_bus_find_domain_nr()).
+ */
+#define PCI_DOMAIN_NR_NOT_SET (-1)
+
struct pci_host_bridge {
struct device dev;
struct pci_bus *bus; /* Root bus */
@@ -533,6 +546,7 @@ struct pci_host_bridge {
struct pci_ops *child_ops;
void *sysdata;
int busnr;
+ int domain_nr;
struct list_head windows; /* resource_entry */
struct list_head dma_ranges; /* dma ranges resource list */
u8 (*swizzle_irq)(struct pci_dev *, u8 *); /* Platform IRQ swizzler */
@@ -1257,7 +1271,7 @@ u32 pcie_bandwidth_available(struct pci_dev *dev, struct pci_dev **limiting_dev,
enum pci_bus_speed *speed,
enum pcie_link_width *width);
void pcie_print_link_status(struct pci_dev *dev);
-bool pcie_has_flr(struct pci_dev *dev);
+int pcie_reset_flr(struct pci_dev *dev, bool probe);
int pcie_flr(struct pci_dev *dev);
int __pci_reset_function_locked(struct pci_dev *dev);
int pci_reset_function(struct pci_dev *dev);
@@ -1307,12 +1321,6 @@ int pci_load_saved_state(struct pci_dev *dev,
struct pci_saved_state *state);
int pci_load_and_free_saved_state(struct pci_dev *dev,
struct pci_saved_state **state);
-struct pci_cap_saved_state *pci_find_saved_cap(struct pci_dev *dev, char cap);
-struct pci_cap_saved_state *pci_find_saved_ext_cap(struct pci_dev *dev,
- u16 cap);
-int pci_add_cap_save_buffer(struct pci_dev *dev, char cap, unsigned int size);
-int pci_add_ext_cap_save_buffer(struct pci_dev *dev,
- u16 cap, unsigned int size);
int pci_platform_power_transition(struct pci_dev *dev, pci_power_t state);
int pci_set_power_state(struct pci_dev *dev, pci_power_t state);
pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state);
@@ -1779,8 +1787,9 @@ static inline void pci_disable_device(struct pci_dev *dev) { }
static inline int pcim_enable_device(struct pci_dev *pdev) { return -EIO; }
static inline int pci_assign_resource(struct pci_dev *dev, int i)
{ return -EBUSY; }
-static inline int __pci_register_driver(struct pci_driver *drv,
- struct module *owner)
+static inline int __must_check __pci_register_driver(struct pci_driver *drv,
+ struct module *owner,
+ const char *mod_name)
{ return 0; }
static inline int pci_register_driver(struct pci_driver *drv)
{ return 0; }
@@ -1920,9 +1929,7 @@ int pci_iobar_pfn(struct pci_dev *pdev, int bar, struct vm_area_struct *vma);
#define pci_resource_end(dev, bar) ((dev)->resource[(bar)].end)
#define pci_resource_flags(dev, bar) ((dev)->resource[(bar)].flags)
#define pci_resource_len(dev,bar) \
- ((pci_resource_start((dev), (bar)) == 0 && \
- pci_resource_end((dev), (bar)) == \
- pci_resource_start((dev), (bar))) ? 0 : \
+ ((pci_resource_end((dev), (bar)) == 0) ? 0 : \
\
(pci_resource_end((dev), (bar)) - \
pci_resource_start((dev), (bar)) + 1))
@@ -2289,20 +2296,6 @@ int pci_enable_atomic_ops_to_root(struct pci_dev *dev, u32 cap_mask);
#define PCI_VPD_LRDT_RO_DATA PCI_VPD_LRDT_ID(PCI_VPD_LTIN_RO_DATA)
#define PCI_VPD_LRDT_RW_DATA PCI_VPD_LRDT_ID(PCI_VPD_LTIN_RW_DATA)
-/* Small Resource Data Type Tag Item Names */
-#define PCI_VPD_STIN_END 0x0f /* End */
-
-#define PCI_VPD_SRDT_END (PCI_VPD_STIN_END << 3)
-
-#define PCI_VPD_SRDT_TIN_MASK 0x78
-#define PCI_VPD_SRDT_LEN_MASK 0x07
-#define PCI_VPD_LRDT_TIN_MASK 0x7f
-
-#define PCI_VPD_LRDT_TAG_SIZE 3
-#define PCI_VPD_SRDT_TAG_SIZE 1
-
-#define PCI_VPD_INFO_FLD_HDR_SIZE 3
-
#define PCI_VPD_RO_KEYWORD_PARTNO "PN"
#define PCI_VPD_RO_KEYWORD_SERIALNO "SN"
#define PCI_VPD_RO_KEYWORD_MFR_ID "MN"
@@ -2310,83 +2303,45 @@ int pci_enable_atomic_ops_to_root(struct pci_dev *dev, u32 cap_mask);
#define PCI_VPD_RO_KEYWORD_CHKSUM "RV"
/**
- * pci_vpd_lrdt_size - Extracts the Large Resource Data Type length
- * @lrdt: Pointer to the beginning of the Large Resource Data Type tag
- *
- * Returns the extracted Large Resource Data Type length.
- */
-static inline u16 pci_vpd_lrdt_size(const u8 *lrdt)
-{
- return (u16)lrdt[1] + ((u16)lrdt[2] << 8);
-}
-
-/**
- * pci_vpd_lrdt_tag - Extracts the Large Resource Data Type Tag Item
- * @lrdt: Pointer to the beginning of the Large Resource Data Type tag
- *
- * Returns the extracted Large Resource Data Type Tag item.
- */
-static inline u16 pci_vpd_lrdt_tag(const u8 *lrdt)
-{
- return (u16)(lrdt[0] & PCI_VPD_LRDT_TIN_MASK);
-}
-
-/**
- * pci_vpd_srdt_size - Extracts the Small Resource Data Type length
- * @srdt: Pointer to the beginning of the Small Resource Data Type tag
- *
- * Returns the extracted Small Resource Data Type length.
- */
-static inline u8 pci_vpd_srdt_size(const u8 *srdt)
-{
- return (*srdt) & PCI_VPD_SRDT_LEN_MASK;
-}
-
-/**
- * pci_vpd_srdt_tag - Extracts the Small Resource Data Type Tag Item
- * @srdt: Pointer to the beginning of the Small Resource Data Type tag
+ * pci_vpd_alloc - Allocate buffer and read VPD into it
+ * @dev: PCI device
+ * @size: pointer to field where VPD length is returned
*
- * Returns the extracted Small Resource Data Type Tag Item.
+ * Returns pointer to allocated buffer or an ERR_PTR in case of failure
*/
-static inline u8 pci_vpd_srdt_tag(const u8 *srdt)
-{
- return ((*srdt) & PCI_VPD_SRDT_TIN_MASK) >> 3;
-}
+void *pci_vpd_alloc(struct pci_dev *dev, unsigned int *size);
/**
- * pci_vpd_info_field_size - Extracts the information field length
- * @info_field: Pointer to the beginning of an information field header
+ * pci_vpd_find_id_string - Locate id string in VPD
+ * @buf: Pointer to buffered VPD data
+ * @len: The length of the buffer area in which to search
+ * @size: Pointer to field where length of id string is returned
*
- * Returns the extracted information field length.
+ * Returns the index of the id string or -ENOENT if not found.
*/
-static inline u8 pci_vpd_info_field_size(const u8 *info_field)
-{
- return info_field[2];
-}
+int pci_vpd_find_id_string(const u8 *buf, unsigned int len, unsigned int *size);
/**
- * pci_vpd_find_tag - Locates the Resource Data Type tag provided
- * @buf: Pointer to buffered vpd data
- * @len: The length of the vpd buffer
- * @rdt: The Resource Data Type to search for
+ * pci_vpd_find_ro_info_keyword - Locate info field keyword in VPD RO section
+ * @buf: Pointer to buffered VPD data
+ * @len: The length of the buffer area in which to search
+ * @kw: The keyword to search for
+ * @size: Pointer to field where length of found keyword data is returned
*
- * Returns the index where the Resource Data Type was found or
- * -ENOENT otherwise.
+ * Returns the index of the information field keyword data or -ENOENT if
+ * not found.
*/
-int pci_vpd_find_tag(const u8 *buf, unsigned int len, u8 rdt);
+int pci_vpd_find_ro_info_keyword(const void *buf, unsigned int len,
+ const char *kw, unsigned int *size);
/**
- * pci_vpd_find_info_keyword - Locates an information field keyword in the VPD
- * @buf: Pointer to buffered vpd data
- * @off: The offset into the buffer at which to begin the search
- * @len: The length of the buffer area, relative to off, in which to search
- * @kw: The keyword to search for
+ * pci_vpd_check_csum - Check VPD checksum
+ * @buf: Pointer to buffered VPD data
+ * @len: VPD size
*
- * Returns the index where the information field keyword was found or
- * -ENOENT otherwise.
+ * Returns 1 if VPD has no checksum, otherwise 0 or an errno
*/
-int pci_vpd_find_info_keyword(const u8 *buf, unsigned int off,
- unsigned int len, const char *kw);
+int pci_vpd_check_csum(const void *buf, unsigned int len);
/* PCI <-> OF binding helpers */
#ifdef CONFIG_OF
diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h
index 2dac431d94ac..3a10d6ec3ee7 100644
--- a/include/linux/pci_hotplug.h
+++ b/include/linux/pci_hotplug.h
@@ -44,7 +44,7 @@ struct hotplug_slot_ops {
int (*get_attention_status) (struct hotplug_slot *slot, u8 *value);
int (*get_latch_status) (struct hotplug_slot *slot, u8 *value);
int (*get_adapter_status) (struct hotplug_slot *slot, u8 *value);
- int (*reset_slot) (struct hotplug_slot *slot, int probe);
+ int (*reset_slot) (struct hotplug_slot *slot, bool probe);
};
/**
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 06eccef155ad..011f2f1ea5bb 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2453,7 +2453,8 @@
#define PCI_VENDOR_ID_TDI 0x192E
#define PCI_DEVICE_ID_TDI_EHCI 0x0101
-#define PCI_VENDOR_ID_FREESCALE 0x1957
+#define PCI_VENDOR_ID_FREESCALE 0x1957 /* duplicate: NXP */
+#define PCI_VENDOR_ID_NXP 0x1957 /* duplicate: FREESCALE */
#define PCI_DEVICE_ID_MPC8308 0xc006
#define PCI_DEVICE_ID_MPC8315E 0x00b4
#define PCI_DEVICE_ID_MPC8315 0x00b5
diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h
index b34a094b2258..860ba4bc5ead 100644
--- a/include/linux/platform_data/dma-dw.h
+++ b/include/linux/platform_data/dma-dw.h
@@ -41,36 +41,39 @@ struct dw_dma_slave {
/**
* struct dw_dma_platform_data - Controller configuration parameters
+ * @nr_masters: Number of AHB masters supported by the controller
* @nr_channels: Number of channels supported by hardware (max 8)
* @chan_allocation_order: Allocate channels starting from 0 or 7
* @chan_priority: Set channel priority increasing from 0 to 7 or 7 to 0.
* @block_size: Maximum block size supported by the controller
- * @nr_masters: Number of AHB masters supported by the controller
* @data_width: Maximum data width supported by hardware per AHB master
* (in bytes, power of 2)
* @multi_block: Multi block transfers supported by hardware per channel.
* @max_burst: Maximum value of burst transaction size supported by hardware
* per channel (in units of CTL.SRC_TR_WIDTH/CTL.DST_TR_WIDTH).
* @protctl: Protection control signals setting per channel.
+ * @quirks: Optional platform quirks.
*/
struct dw_dma_platform_data {
- unsigned int nr_channels;
+ u32 nr_masters;
+ u32 nr_channels;
#define CHAN_ALLOCATION_ASCENDING 0 /* zero to seven */
#define CHAN_ALLOCATION_DESCENDING 1 /* seven to zero */
- unsigned char chan_allocation_order;
+ u32 chan_allocation_order;
#define CHAN_PRIORITY_ASCENDING 0 /* chan0 highest */
#define CHAN_PRIORITY_DESCENDING 1 /* chan7 highest */
- unsigned char chan_priority;
- unsigned int block_size;
- unsigned char nr_masters;
- unsigned char data_width[DW_DMA_MAX_NR_MASTERS];
- unsigned char multi_block[DW_DMA_MAX_NR_CHANNELS];
+ u32 chan_priority;
+ u32 block_size;
+ u32 data_width[DW_DMA_MAX_NR_MASTERS];
+ u32 multi_block[DW_DMA_MAX_NR_CHANNELS];
u32 max_burst[DW_DMA_MAX_NR_CHANNELS];
#define CHAN_PROTCTL_PRIVILEGED BIT(0)
#define CHAN_PROTCTL_BUFFERABLE BIT(1)
#define CHAN_PROTCTL_CACHEABLE BIT(2)
#define CHAN_PROTCTL_MASK GENMASK(2, 0)
- unsigned char protctl;
+ u32 protctl;
+#define DW_DMA_QUIRK_XBAR_PRESENT BIT(0)
+ u32 quirks;
};
#endif /* _PLATFORM_DATA_DMA_DW_H */
diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index a0b7e43049d5..725c9b784e60 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -404,7 +404,7 @@ int pwm_set_chip_data(struct pwm_device *pwm, void *data);
void *pwm_get_chip_data(struct pwm_device *pwm);
int pwmchip_add(struct pwm_chip *chip);
-int pwmchip_remove(struct pwm_chip *chip);
+void pwmchip_remove(struct pwm_chip *chip);
int devm_pwmchip_add(struct device *dev, struct pwm_chip *chip);
diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h
index 0165824c5128..c0475d1c9885 100644
--- a/include/linux/qcom_scm.h
+++ b/include/linux/qcom_scm.h
@@ -109,6 +109,12 @@ extern int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt,
u32 *resp);
extern int qcom_scm_qsmmu500_wait_safe_toggle(bool en);
+
+extern int qcom_scm_lmh_dcvsh(u32 payload_fn, u32 payload_reg, u32 payload_val,
+ u64 limit_node, u32 node_id, u64 version);
+extern int qcom_scm_lmh_profile_change(u32 profile_id);
+extern bool qcom_scm_lmh_dcvsh_available(void);
+
#else
#include <linux/errno.h>
@@ -170,5 +176,13 @@ static inline int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt,
static inline int qcom_scm_qsmmu500_wait_safe_toggle(bool en)
{ return -ENODEV; }
+
+static inline int qcom_scm_lmh_dcvsh(u32 payload_fn, u32 payload_reg, u32 payload_val,
+ u64 limit_node, u32 node_id, u64 version)
+ { return -ENODEV; }
+
+static inline int qcom_scm_lmh_profile_change(u32 profile_id) { return -ENODEV; }
+
+static inline bool qcom_scm_lmh_dcvsh_available(void) { return -ENODEV; }
#endif
#endif
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 426e98e0b675..352c6127cb90 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -142,22 +142,14 @@ struct rw_semaphore {
#define DECLARE_RWSEM(lockname) \
struct rw_semaphore lockname = __RWSEM_INITIALIZER(lockname)
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-extern void __rwsem_init(struct rw_semaphore *rwsem, const char *name,
+extern void __init_rwsem(struct rw_semaphore *rwsem, const char *name,
struct lock_class_key *key);
-#else
-static inline void __rwsem_init(struct rw_semaphore *rwsem, const char *name,
- struct lock_class_key *key)
-{
-}
-#endif
#define init_rwsem(sem) \
do { \
static struct lock_class_key __key; \
\
- init_rwbase_rt(&(sem)->rwbase); \
- __rwsem_init((sem), #sem, &__key); \
+ __init_rwsem((sem), #sem, &__key); \
} while (0)
static __always_inline int rwsem_is_locked(struct rw_semaphore *sem)
diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
index 2462f7d07695..00ed419dd464 100644
--- a/include/linux/sched/user.h
+++ b/include/linux/sched/user.h
@@ -4,6 +4,7 @@
#include <linux/uidgid.h>
#include <linux/atomic.h>
+#include <linux/percpu_counter.h>
#include <linux/refcount.h>
#include <linux/ratelimit.h>
@@ -13,7 +14,7 @@
struct user_struct {
refcount_t __count; /* reference count */
#ifdef CONFIG_EPOLL
- atomic_long_t epoll_watches; /* The number of file descriptors currently watched */
+ struct percpu_counter epoll_watches; /* The number of file descriptors currently watched */
#endif
unsigned long unix_inflight; /* How many files in flight in unix sockets */
atomic_long_t pipe_bufs; /* how many pages are allocated in pipe buffers */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 6bdb0db3e825..841e2f0f5240 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1940,7 +1940,7 @@ static inline void __skb_insert(struct sk_buff *newsk,
WRITE_ONCE(newsk->prev, prev);
WRITE_ONCE(next->prev, newsk);
WRITE_ONCE(prev->next, newsk);
- list->qlen++;
+ WRITE_ONCE(list->qlen, list->qlen + 1);
}
static inline void __skb_queue_splice(const struct sk_buff_head *list,
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index dcde82a4434c..85499f0586b0 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -10,6 +10,7 @@
#include <linux/kfence.h>
#include <linux/kobject.h>
#include <linux/reciprocal_div.h>
+#include <linux/local_lock.h>
enum stat_item {
ALLOC_FASTPATH, /* Allocation from cpu slab */
@@ -40,6 +41,10 @@ enum stat_item {
CPU_PARTIAL_DRAIN, /* Drain cpu partial to node partial */
NR_SLUB_STAT_ITEMS };
+/*
+ * When changing the layout, make sure freelist and tid are still compatible
+ * with this_cpu_cmpxchg_double() alignment requirements.
+ */
struct kmem_cache_cpu {
void **freelist; /* Pointer to next available object */
unsigned long tid; /* Globally unique transaction id */
@@ -47,6 +52,7 @@ struct kmem_cache_cpu {
#ifdef CONFIG_SLUB_CPU_PARTIAL
struct page *partial; /* Partially allocated frozen slabs */
#endif
+ local_lock_t lock; /* Protects the fields above */
#ifdef CONFIG_SLUB_STATS
unsigned stat[NR_SLUB_STAT_ITEMS];
#endif
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 60a3ab0ad2cc..252243c7783d 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -1373,6 +1373,9 @@ long ksys_old_shmctl(int shmid, int cmd, struct shmid_ds __user *buf);
long compat_ksys_semtimedop(int semid, struct sembuf __user *tsems,
unsigned int nsops,
const struct old_timespec32 __user *timeout);
+long __do_semtimedop(int semid, struct sembuf *tsems, unsigned int nsops,
+ const struct timespec64 *timeout,
+ struct ipc_namespace *ns);
int __sys_getsockopt(int fd, int level, int optname, char __user *optval,
int __user *optlen);
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index d296f3b88fb9..c314893970b3 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -285,7 +285,7 @@ struct thermal_zone_params {
};
/**
- * struct thermal_zone_of_device_ops - scallbacks for handling DT based zones
+ * struct thermal_zone_of_device_ops - callbacks for handling DT based zones
*
* Mandatory:
* @get_temp: a pointer to a function that reads the sensor temperature.
@@ -404,12 +404,13 @@ static inline void thermal_zone_device_unregister(
struct thermal_zone_device *tz)
{ }
static inline struct thermal_cooling_device *
-thermal_cooling_device_register(char *type, void *devdata,
+thermal_cooling_device_register(const char *type, void *devdata,
const struct thermal_cooling_device_ops *ops)
{ return ERR_PTR(-ENODEV); }
static inline struct thermal_cooling_device *
thermal_of_cooling_device_register(struct device_node *np,
- char *type, void *devdata, const struct thermal_cooling_device_ops *ops)
+ const char *type, void *devdata,
+ const struct thermal_cooling_device_ops *ops)
{ return ERR_PTR(-ENODEV); }
static inline struct thermal_cooling_device *
devm_thermal_of_cooling_device_register(struct device *dev,
diff --git a/include/linux/threads.h b/include/linux/threads.h
index 18d5a74bcc3d..c34173e6c5f1 100644
--- a/include/linux/threads.h
+++ b/include/linux/threads.h
@@ -38,7 +38,7 @@
* Define a minimum number of pids per cpu. Heuristically based
* on original pid max of 32k for 32 cpus. Also, increase the
* minimum settable value for pid_max on the running system based
- * on similar defaults. See kernel/pid.c:pidmap_init() for details.
+ * on similar defaults. See kernel/pid.c:pid_idr_init() for details.
*/
#define PIDS_PER_CPU_DEFAULT 1024
#define PIDS_PER_CPU_MIN 8
diff --git a/include/linux/time64.h b/include/linux/time64.h
index 5117cb5b5656..81b9686a2079 100644
--- a/include/linux/time64.h
+++ b/include/linux/time64.h
@@ -25,7 +25,9 @@ struct itimerspec64 {
#define TIME64_MIN (-TIME64_MAX - 1)
#define KTIME_MAX ((s64)~((u64)1 << 63))
+#define KTIME_MIN (-KTIME_MAX - 1)
#define KTIME_SEC_MAX (KTIME_MAX / NSEC_PER_SEC)
+#define KTIME_SEC_MIN (KTIME_MIN / NSEC_PER_SEC)
/*
* Limits for settimeofday():
@@ -124,10 +126,13 @@ static inline bool timespec64_valid_settod(const struct timespec64 *ts)
*/
static inline s64 timespec64_to_ns(const struct timespec64 *ts)
{
- /* Prevent multiplication overflow */
- if ((unsigned long long)ts->tv_sec >= KTIME_SEC_MAX)
+ /* Prevent multiplication overflow / underflow */
+ if (ts->tv_sec >= KTIME_SEC_MAX)
return KTIME_MAX;
+ if (ts->tv_sec <= KTIME_SEC_MIN)
+ return KTIME_MIN;
+
return ((s64) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec;
}
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index c05e903cef02..ac0394087f7d 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -200,16 +200,6 @@ copy_to_user(void __user *to, const void *from, unsigned long n)
n = _copy_to_user(to, from, n);
return n;
}
-#ifdef CONFIG_COMPAT
-static __always_inline unsigned long __must_check
-copy_in_user(void __user *to, const void __user *from, unsigned long n)
-{
- might_fault();
- if (access_ok(to, n) && access_ok(from, n))
- n = raw_copy_in_user(to, from, n);
- return n;
-}
-#endif
#ifndef copy_mc_to_kernel
/*
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 82c3c3e819e0..5265024e8b90 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -47,6 +47,7 @@ struct iov_iter {
};
loff_t xarray_start;
};
+ size_t truncated;
};
static inline enum iter_type iov_iter_type(const struct iov_iter *i)
@@ -254,8 +255,10 @@ static inline void iov_iter_truncate(struct iov_iter *i, u64 count)
* conversion in assignement is by definition greater than all
* values of size_t, including old i->count.
*/
- if (i->count > count)
+ if (i->count > count) {
+ i->truncated += i->count - count;
i->count = count;
+ }
}
/*
@@ -264,6 +267,7 @@ static inline void iov_iter_truncate(struct iov_iter *i, u64 count)
*/
static inline void iov_iter_reexpand(struct iov_iter *i, size_t count)
{
+ i->truncated -= count - i->count;
i->count = count;
}
diff --git a/include/linux/units.h b/include/linux/units.h
index 4a25e0cc8fb3..681fc652e3d7 100644
--- a/include/linux/units.h
+++ b/include/linux/units.h
@@ -20,9 +20,13 @@
#define PICO 1000000000000ULL
#define FEMTO 1000000000000000ULL
-#define MILLIWATT_PER_WATT 1000L
-#define MICROWATT_PER_MILLIWATT 1000L
-#define MICROWATT_PER_WATT 1000000L
+#define HZ_PER_KHZ 1000UL
+#define KHZ_PER_MHZ 1000UL
+#define HZ_PER_MHZ 1000000UL
+
+#define MILLIWATT_PER_WATT 1000UL
+#define MICROWATT_PER_MILLIWATT 1000UL
+#define MICROWATT_PER_WATT 1000000UL
#define ABSOLUTE_ZERO_MILLICELSIUS -273150
diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
index 8cfe49d201dd..3972ab765de1 100644
--- a/include/linux/vdpa.h
+++ b/include/linux/vdpa.h
@@ -43,17 +43,17 @@ struct vdpa_vq_state_split {
* @last_used_idx: used index
*/
struct vdpa_vq_state_packed {
- u16 last_avail_counter:1;
- u16 last_avail_idx:15;
- u16 last_used_counter:1;
- u16 last_used_idx:15;
+ u16 last_avail_counter:1;
+ u16 last_avail_idx:15;
+ u16 last_used_counter:1;
+ u16 last_used_idx:15;
};
struct vdpa_vq_state {
- union {
- struct vdpa_vq_state_split split;
- struct vdpa_vq_state_packed packed;
- };
+ union {
+ struct vdpa_vq_state_split split;
+ struct vdpa_vq_state_packed packed;
+ };
};
struct vdpa_mgmt_dev;
@@ -65,6 +65,7 @@ struct vdpa_mgmt_dev;
* @config: the configuration ops for this device.
* @index: device index
* @features_valid: were features initialized? for legacy guests
+ * @use_va: indicate whether virtual address must be used by this device
* @nvqs: maximum number of supported virtqueues
* @mdev: management device pointer; caller must setup when registering device as part
* of dev_add() mgmtdev ops callback before invoking _vdpa_register_device().
@@ -75,6 +76,7 @@ struct vdpa_device {
const struct vdpa_config_ops *config;
unsigned int index;
bool features_valid;
+ bool use_va;
int nvqs;
struct vdpa_mgmt_dev *mdev;
};
@@ -90,6 +92,16 @@ struct vdpa_iova_range {
};
/**
+ * Corresponding file area for device memory mapping
+ * @file: vma->vm_file for the mapping
+ * @offset: mapping offset in the vm_file
+ */
+struct vdpa_map_file {
+ struct file *file;
+ u64 offset;
+};
+
+/**
* struct vdpa_config_ops - operations for configuring a vDPA device.
* Note: vDPA device drivers are required to implement all of the
* operations unless it is mentioned to be optional in the following
@@ -131,7 +143,7 @@ struct vdpa_iova_range {
* @vdev: vdpa device
* @idx: virtqueue index
* @state: pointer to returned state (last_avail_idx)
- * @get_vq_notification: Get the notification area for a virtqueue
+ * @get_vq_notification: Get the notification area for a virtqueue
* @vdev: vdpa device
* @idx: virtqueue index
* Returns the notifcation area
@@ -171,6 +183,9 @@ struct vdpa_iova_range {
* @set_status: Set the device status
* @vdev: vdpa device
* @status: virtio device status
+ * @reset: Reset device
+ * @vdev: vdpa device
+ * Returns integer: success (0) or error (< 0)
* @get_config_size: Get the size of the configuration space
* @vdev: vdpa device
* Returns size_t: configuration size
@@ -255,6 +270,7 @@ struct vdpa_config_ops {
u32 (*get_vendor_id)(struct vdpa_device *vdev);
u8 (*get_status)(struct vdpa_device *vdev);
void (*set_status)(struct vdpa_device *vdev, u8 status);
+ int (*reset)(struct vdpa_device *vdev);
size_t (*get_config_size)(struct vdpa_device *vdev);
void (*get_config)(struct vdpa_device *vdev, unsigned int offset,
void *buf, unsigned int len);
@@ -266,7 +282,7 @@ struct vdpa_config_ops {
/* DMA ops */
int (*set_map)(struct vdpa_device *vdev, struct vhost_iotlb *iotlb);
int (*dma_map)(struct vdpa_device *vdev, u64 iova, u64 size,
- u64 pa, u32 perm);
+ u64 pa, u32 perm, void *opaque);
int (*dma_unmap)(struct vdpa_device *vdev, u64 iova, u64 size);
/* Free device resources */
@@ -275,7 +291,8 @@ struct vdpa_config_ops {
struct vdpa_device *__vdpa_alloc_device(struct device *parent,
const struct vdpa_config_ops *config,
- size_t size, const char *name);
+ size_t size, const char *name,
+ bool use_va);
/**
* vdpa_alloc_device - allocate and initilaize a vDPA device
@@ -285,15 +302,16 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent,
* @parent: the parent device
* @config: the bus operations that is supported by this device
* @name: name of the vdpa device
+ * @use_va: indicate whether virtual address must be used by this device
*
* Return allocated data structure or ERR_PTR upon error
*/
-#define vdpa_alloc_device(dev_struct, member, parent, config, name) \
+#define vdpa_alloc_device(dev_struct, member, parent, config, name, use_va) \
container_of(__vdpa_alloc_device( \
parent, config, \
sizeof(dev_struct) + \
BUILD_BUG_ON_ZERO(offsetof( \
- dev_struct, member)), name), \
+ dev_struct, member)), name, use_va), \
dev_struct, member)
int vdpa_register_device(struct vdpa_device *vdev, int nvqs);
@@ -348,27 +366,27 @@ static inline struct device *vdpa_get_dma_dev(struct vdpa_device *vdev)
return vdev->dma_dev;
}
-static inline void vdpa_reset(struct vdpa_device *vdev)
+static inline int vdpa_reset(struct vdpa_device *vdev)
{
- const struct vdpa_config_ops *ops = vdev->config;
+ const struct vdpa_config_ops *ops = vdev->config;
vdev->features_valid = false;
- ops->set_status(vdev, 0);
+ return ops->reset(vdev);
}
static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features)
{
- const struct vdpa_config_ops *ops = vdev->config;
+ const struct vdpa_config_ops *ops = vdev->config;
vdev->features_valid = true;
- return ops->set_features(vdev, features);
+ return ops->set_features(vdev, features);
}
-
-static inline void vdpa_get_config(struct vdpa_device *vdev, unsigned offset,
- void *buf, unsigned int len)
+static inline void vdpa_get_config(struct vdpa_device *vdev,
+ unsigned int offset, void *buf,
+ unsigned int len)
{
- const struct vdpa_config_ops *ops = vdev->config;
+ const struct vdpa_config_ops *ops = vdev->config;
/*
* Config accesses aren't supposed to trigger before features are set.
diff --git a/include/linux/vhost_iotlb.h b/include/linux/vhost_iotlb.h
index 6b09b786a762..2d0e2f52f938 100644
--- a/include/linux/vhost_iotlb.h
+++ b/include/linux/vhost_iotlb.h
@@ -17,6 +17,7 @@ struct vhost_iotlb_map {
u32 perm;
u32 flags_padding;
u64 __subtree_last;
+ void *opaque;
};
#define VHOST_IOTLB_FLAG_RETIRE 0x1
@@ -29,6 +30,8 @@ struct vhost_iotlb {
unsigned int flags;
};
+int vhost_iotlb_add_range_ctx(struct vhost_iotlb *iotlb, u64 start, u64 last,
+ u64 addr, unsigned int perm, void *opaque);
int vhost_iotlb_add_range(struct vhost_iotlb *iotlb, u64 start, u64 last,
u64 addr, unsigned int perm);
void vhost_iotlb_del_range(struct vhost_iotlb *iotlb, u64 start, u64 last);
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 2644425b6dce..671d402c3778 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -225,9 +225,6 @@ static inline bool is_vm_area_hugepages(const void *addr)
}
#ifdef CONFIG_MMU
-int vmap_range(unsigned long addr, unsigned long end,
- phys_addr_t phys_addr, pgprot_t prot,
- unsigned int max_page_shift);
void vunmap_range(unsigned long addr, unsigned long end);
static inline void set_vm_flush_reset_perms(void *addr)
{
diff --git a/include/net/dsa.h b/include/net/dsa.h
index f9a17145255a..258867eff230 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -447,6 +447,11 @@ static inline bool dsa_port_is_user(struct dsa_port *dp)
return dp->type == DSA_PORT_TYPE_USER;
}
+static inline bool dsa_port_is_unused(struct dsa_port *dp)
+{
+ return dp->type == DSA_PORT_TYPE_UNUSED;
+}
+
static inline bool dsa_is_unused_port(struct dsa_switch *ds, int p)
{
return dsa_to_port(ds, p)->type == DSA_PORT_TYPE_UNUSED;
diff --git a/include/trace/events/damon.h b/include/trace/events/damon.h
new file mode 100644
index 000000000000..2f422f4f1fb9
--- /dev/null
+++ b/include/trace/events/damon.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM damon
+
+#if !defined(_TRACE_DAMON_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_DAMON_H
+
+#include <linux/damon.h>
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(damon_aggregated,
+
+ TP_PROTO(struct damon_target *t, struct damon_region *r,
+ unsigned int nr_regions),
+
+ TP_ARGS(t, r, nr_regions),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, target_id)
+ __field(unsigned int, nr_regions)
+ __field(unsigned long, start)
+ __field(unsigned long, end)
+ __field(unsigned int, nr_accesses)
+ ),
+
+ TP_fast_assign(
+ __entry->target_id = t->id;
+ __entry->nr_regions = nr_regions;
+ __entry->start = r->ar.start;
+ __entry->end = r->ar.end;
+ __entry->nr_accesses = r->nr_accesses;
+ ),
+
+ TP_printk("target_id=%lu nr_regions=%u %lu-%lu: %u",
+ __entry->target_id, __entry->nr_regions,
+ __entry->start, __entry->end, __entry->nr_accesses)
+);
+
+#endif /* _TRACE_DAMON_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index 0b53e855c4ac..116ed4d5d0f8 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -75,7 +75,7 @@
#define IF_HAVE_PG_HWPOISON(flag,string)
#endif
-#if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT)
+#if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT)
#define IF_HAVE_PG_IDLE(flag,string) ,{1UL << flag, string}
#else
#define IF_HAVE_PG_IDLE(flag,string)
diff --git a/include/trace/events/page_ref.h b/include/trace/events/page_ref.h
index 5d2ea93956ce..8a99c1cd417b 100644
--- a/include/trace/events/page_ref.h
+++ b/include/trace/events/page_ref.h
@@ -38,7 +38,7 @@ DECLARE_EVENT_CLASS(page_ref_mod_template,
TP_printk("pfn=0x%lx flags=%s count=%d mapcount=%d mapping=%p mt=%d val=%d",
__entry->pfn,
- show_page_flags(__entry->flags & ((1UL << NR_PAGEFLAGS) - 1)),
+ show_page_flags(__entry->flags & PAGEFLAGS_MASK),
__entry->count,
__entry->mapcount, __entry->mapping, __entry->mt,
__entry->val)
@@ -88,7 +88,7 @@ DECLARE_EVENT_CLASS(page_ref_mod_and_test_template,
TP_printk("pfn=0x%lx flags=%s count=%d mapcount=%d mapping=%p mt=%d val=%d ret=%d",
__entry->pfn,
- show_page_flags(__entry->flags & ((1UL << NR_PAGEFLAGS) - 1)),
+ show_page_flags(__entry->flags & PAGEFLAGS_MASK),
__entry->count,
__entry->mapcount, __entry->mapping, __entry->mt,
__entry->val, __entry->ret)
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 14c8fe863c6d..1c5fb86d455a 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -673,15 +673,15 @@ __SYSCALL(__NR_madvise, sys_madvise)
#define __NR_remap_file_pages 234
__SYSCALL(__NR_remap_file_pages, sys_remap_file_pages)
#define __NR_mbind 235
-__SC_COMP(__NR_mbind, sys_mbind, compat_sys_mbind)
+__SYSCALL(__NR_mbind, sys_mbind)
#define __NR_get_mempolicy 236
-__SC_COMP(__NR_get_mempolicy, sys_get_mempolicy, compat_sys_get_mempolicy)
+__SYSCALL(__NR_get_mempolicy, sys_get_mempolicy)
#define __NR_set_mempolicy 237
-__SC_COMP(__NR_set_mempolicy, sys_set_mempolicy, compat_sys_set_mempolicy)
+__SYSCALL(__NR_set_mempolicy, sys_set_mempolicy)
#define __NR_migrate_pages 238
-__SC_COMP(__NR_migrate_pages, sys_migrate_pages, compat_sys_migrate_pages)
+__SYSCALL(__NR_migrate_pages, sys_migrate_pages)
#define __NR_move_pages 239
-__SC_COMP(__NR_move_pages, sys_move_pages, compat_sys_move_pages)
+__SYSCALL(__NR_move_pages, sys_move_pages)
#endif
#define __NR_rt_tgsigqueueinfo 240
diff --git a/include/uapi/linux/cxl_mem.h b/include/uapi/linux/cxl_mem.h
index f6e8a005b113..8d206f27bb6d 100644
--- a/include/uapi/linux/cxl_mem.h
+++ b/include/uapi/linux/cxl_mem.h
@@ -50,7 +50,7 @@ enum { CXL_CMDS };
#define ___C(a, b) { b }
static const struct {
const char *name;
-} cxl_command_names[] = { CXL_CMDS };
+} cxl_command_names[] __attribute__((__unused__)) = { CXL_CMDS };
/*
* Here's how this actually breaks out:
diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h
index edc346a77c91..c750eac09fc9 100644
--- a/include/uapi/linux/idxd.h
+++ b/include/uapi/linux/idxd.h
@@ -9,6 +9,30 @@
#include <stdint.h>
#endif
+/* Driver command error status */
+enum idxd_scmd_stat {
+ IDXD_SCMD_DEV_ENABLED = 0x80000010,
+ IDXD_SCMD_DEV_NOT_ENABLED = 0x80000020,
+ IDXD_SCMD_WQ_ENABLED = 0x80000021,
+ IDXD_SCMD_DEV_DMA_ERR = 0x80020000,
+ IDXD_SCMD_WQ_NO_GRP = 0x80030000,
+ IDXD_SCMD_WQ_NO_NAME = 0x80040000,
+ IDXD_SCMD_WQ_NO_SVM = 0x80050000,
+ IDXD_SCMD_WQ_NO_THRESH = 0x80060000,
+ IDXD_SCMD_WQ_PORTAL_ERR = 0x80070000,
+ IDXD_SCMD_WQ_RES_ALLOC_ERR = 0x80080000,
+ IDXD_SCMD_PERCPU_ERR = 0x80090000,
+ IDXD_SCMD_DMA_CHAN_ERR = 0x800a0000,
+ IDXD_SCMD_CDEV_ERR = 0x800b0000,
+ IDXD_SCMD_WQ_NO_SWQ_SUPPORT = 0x800c0000,
+ IDXD_SCMD_WQ_NONE_CONFIGURED = 0x800d0000,
+ IDXD_SCMD_WQ_NO_SIZE = 0x800e0000,
+ IDXD_SCMD_WQ_NO_PRIV = 0x800f0000,
+};
+
+#define IDXD_SCMD_SOFTERR_MASK 0x80000000
+#define IDXD_SCMD_SOFTERR_SHIFT 16
+
/* Descriptor flags */
#define IDXD_OP_FLAG_FENCE 0x0001
#define IDXD_OP_FLAG_BOF 0x0002
diff --git a/include/uapi/linux/vduse.h b/include/uapi/linux/vduse.h
new file mode 100644
index 000000000000..7cfe1c1280c0
--- /dev/null
+++ b/include/uapi/linux/vduse.h
@@ -0,0 +1,306 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_VDUSE_H_
+#define _UAPI_VDUSE_H_
+
+#include <linux/types.h>
+
+#define VDUSE_BASE 0x81
+
+/* The ioctls for control device (/dev/vduse/control) */
+
+#define VDUSE_API_VERSION 0
+
+/*
+ * Get the version of VDUSE API that kernel supported (VDUSE_API_VERSION).
+ * This is used for future extension.
+ */
+#define VDUSE_GET_API_VERSION _IOR(VDUSE_BASE, 0x00, __u64)
+
+/* Set the version of VDUSE API that userspace supported. */
+#define VDUSE_SET_API_VERSION _IOW(VDUSE_BASE, 0x01, __u64)
+
+/**
+ * struct vduse_dev_config - basic configuration of a VDUSE device
+ * @name: VDUSE device name, needs to be NUL terminated
+ * @vendor_id: virtio vendor id
+ * @device_id: virtio device id
+ * @features: virtio features
+ * @vq_num: the number of virtqueues
+ * @vq_align: the allocation alignment of virtqueue's metadata
+ * @reserved: for future use, needs to be initialized to zero
+ * @config_size: the size of the configuration space
+ * @config: the buffer of the configuration space
+ *
+ * Structure used by VDUSE_CREATE_DEV ioctl to create VDUSE device.
+ */
+struct vduse_dev_config {
+#define VDUSE_NAME_MAX 256
+ char name[VDUSE_NAME_MAX];
+ __u32 vendor_id;
+ __u32 device_id;
+ __u64 features;
+ __u32 vq_num;
+ __u32 vq_align;
+ __u32 reserved[13];
+ __u32 config_size;
+ __u8 config[];
+};
+
+/* Create a VDUSE device which is represented by a char device (/dev/vduse/$NAME) */
+#define VDUSE_CREATE_DEV _IOW(VDUSE_BASE, 0x02, struct vduse_dev_config)
+
+/*
+ * Destroy a VDUSE device. Make sure there are no more references
+ * to the char device (/dev/vduse/$NAME).
+ */
+#define VDUSE_DESTROY_DEV _IOW(VDUSE_BASE, 0x03, char[VDUSE_NAME_MAX])
+
+/* The ioctls for VDUSE device (/dev/vduse/$NAME) */
+
+/**
+ * struct vduse_iotlb_entry - entry of IOTLB to describe one IOVA region [start, last]
+ * @offset: the mmap offset on returned file descriptor
+ * @start: start of the IOVA region
+ * @last: last of the IOVA region
+ * @perm: access permission of the IOVA region
+ *
+ * Structure used by VDUSE_IOTLB_GET_FD ioctl to find an overlapped IOVA region.
+ */
+struct vduse_iotlb_entry {
+ __u64 offset;
+ __u64 start;
+ __u64 last;
+#define VDUSE_ACCESS_RO 0x1
+#define VDUSE_ACCESS_WO 0x2
+#define VDUSE_ACCESS_RW 0x3
+ __u8 perm;
+};
+
+/*
+ * Find the first IOVA region that overlaps with the range [start, last]
+ * and return the corresponding file descriptor. Return -EINVAL means the
+ * IOVA region doesn't exist. Caller should set start and last fields.
+ */
+#define VDUSE_IOTLB_GET_FD _IOWR(VDUSE_BASE, 0x10, struct vduse_iotlb_entry)
+
+/*
+ * Get the negotiated virtio features. It's a subset of the features in
+ * struct vduse_dev_config which can be accepted by virtio driver. It's
+ * only valid after FEATURES_OK status bit is set.
+ */
+#define VDUSE_DEV_GET_FEATURES _IOR(VDUSE_BASE, 0x11, __u64)
+
+/**
+ * struct vduse_config_data - data used to update configuration space
+ * @offset: the offset from the beginning of configuration space
+ * @length: the length to write to configuration space
+ * @buffer: the buffer used to write from
+ *
+ * Structure used by VDUSE_DEV_SET_CONFIG ioctl to update device
+ * configuration space.
+ */
+struct vduse_config_data {
+ __u32 offset;
+ __u32 length;
+ __u8 buffer[];
+};
+
+/* Set device configuration space */
+#define VDUSE_DEV_SET_CONFIG _IOW(VDUSE_BASE, 0x12, struct vduse_config_data)
+
+/*
+ * Inject a config interrupt. It's usually used to notify virtio driver
+ * that device configuration space has changed.
+ */
+#define VDUSE_DEV_INJECT_CONFIG_IRQ _IO(VDUSE_BASE, 0x13)
+
+/**
+ * struct vduse_vq_config - basic configuration of a virtqueue
+ * @index: virtqueue index
+ * @max_size: the max size of virtqueue
+ * @reserved: for future use, needs to be initialized to zero
+ *
+ * Structure used by VDUSE_VQ_SETUP ioctl to setup a virtqueue.
+ */
+struct vduse_vq_config {
+ __u32 index;
+ __u16 max_size;
+ __u16 reserved[13];
+};
+
+/*
+ * Setup the specified virtqueue. Make sure all virtqueues have been
+ * configured before the device is attached to vDPA bus.
+ */
+#define VDUSE_VQ_SETUP _IOW(VDUSE_BASE, 0x14, struct vduse_vq_config)
+
+/**
+ * struct vduse_vq_state_split - split virtqueue state
+ * @avail_index: available index
+ */
+struct vduse_vq_state_split {
+ __u16 avail_index;
+};
+
+/**
+ * struct vduse_vq_state_packed - packed virtqueue state
+ * @last_avail_counter: last driver ring wrap counter observed by device
+ * @last_avail_idx: device available index
+ * @last_used_counter: device ring wrap counter
+ * @last_used_idx: used index
+ */
+struct vduse_vq_state_packed {
+ __u16 last_avail_counter;
+ __u16 last_avail_idx;
+ __u16 last_used_counter;
+ __u16 last_used_idx;
+};
+
+/**
+ * struct vduse_vq_info - information of a virtqueue
+ * @index: virtqueue index
+ * @num: the size of virtqueue
+ * @desc_addr: address of desc area
+ * @driver_addr: address of driver area
+ * @device_addr: address of device area
+ * @split: split virtqueue state
+ * @packed: packed virtqueue state
+ * @ready: ready status of virtqueue
+ *
+ * Structure used by VDUSE_VQ_GET_INFO ioctl to get virtqueue's information.
+ */
+struct vduse_vq_info {
+ __u32 index;
+ __u32 num;
+ __u64 desc_addr;
+ __u64 driver_addr;
+ __u64 device_addr;
+ union {
+ struct vduse_vq_state_split split;
+ struct vduse_vq_state_packed packed;
+ };
+ __u8 ready;
+};
+
+/* Get the specified virtqueue's information. Caller should set index field. */
+#define VDUSE_VQ_GET_INFO _IOWR(VDUSE_BASE, 0x15, struct vduse_vq_info)
+
+/**
+ * struct vduse_vq_eventfd - eventfd configuration for a virtqueue
+ * @index: virtqueue index
+ * @fd: eventfd, -1 means de-assigning the eventfd
+ *
+ * Structure used by VDUSE_VQ_SETUP_KICKFD ioctl to setup kick eventfd.
+ */
+struct vduse_vq_eventfd {
+ __u32 index;
+#define VDUSE_EVENTFD_DEASSIGN -1
+ int fd;
+};
+
+/*
+ * Setup kick eventfd for specified virtqueue. The kick eventfd is used
+ * by VDUSE kernel module to notify userspace to consume the avail vring.
+ */
+#define VDUSE_VQ_SETUP_KICKFD _IOW(VDUSE_BASE, 0x16, struct vduse_vq_eventfd)
+
+/*
+ * Inject an interrupt for specific virtqueue. It's used to notify virtio driver
+ * to consume the used vring.
+ */
+#define VDUSE_VQ_INJECT_IRQ _IOW(VDUSE_BASE, 0x17, __u32)
+
+/* The control messages definition for read(2)/write(2) on /dev/vduse/$NAME */
+
+/**
+ * enum vduse_req_type - request type
+ * @VDUSE_GET_VQ_STATE: get the state for specified virtqueue from userspace
+ * @VDUSE_SET_STATUS: set the device status
+ * @VDUSE_UPDATE_IOTLB: Notify userspace to update the memory mapping for
+ * specified IOVA range via VDUSE_IOTLB_GET_FD ioctl
+ */
+enum vduse_req_type {
+ VDUSE_GET_VQ_STATE,
+ VDUSE_SET_STATUS,
+ VDUSE_UPDATE_IOTLB,
+};
+
+/**
+ * struct vduse_vq_state - virtqueue state
+ * @index: virtqueue index
+ * @split: split virtqueue state
+ * @packed: packed virtqueue state
+ */
+struct vduse_vq_state {
+ __u32 index;
+ union {
+ struct vduse_vq_state_split split;
+ struct vduse_vq_state_packed packed;
+ };
+};
+
+/**
+ * struct vduse_dev_status - device status
+ * @status: device status
+ */
+struct vduse_dev_status {
+ __u8 status;
+};
+
+/**
+ * struct vduse_iova_range - IOVA range [start, last]
+ * @start: start of the IOVA range
+ * @last: last of the IOVA range
+ */
+struct vduse_iova_range {
+ __u64 start;
+ __u64 last;
+};
+
+/**
+ * struct vduse_dev_request - control request
+ * @type: request type
+ * @request_id: request id
+ * @reserved: for future use
+ * @vq_state: virtqueue state, only index field is available
+ * @s: device status
+ * @iova: IOVA range for updating
+ * @padding: padding
+ *
+ * Structure used by read(2) on /dev/vduse/$NAME.
+ */
+struct vduse_dev_request {
+ __u32 type;
+ __u32 request_id;
+ __u32 reserved[4];
+ union {
+ struct vduse_vq_state vq_state;
+ struct vduse_dev_status s;
+ struct vduse_iova_range iova;
+ __u32 padding[32];
+ };
+};
+
+/**
+ * struct vduse_dev_response - response to control request
+ * @request_id: corresponding request id
+ * @result: the result of request
+ * @reserved: for future use, needs to be initialized to zero
+ * @vq_state: virtqueue state
+ * @padding: padding
+ *
+ * Structure used by write(2) on /dev/vduse/$NAME.
+ */
+struct vduse_dev_response {
+ __u32 request_id;
+#define VDUSE_REQ_RESULT_OK 0x00
+#define VDUSE_REQ_RESULT_FAILED 0x01
+ __u32 result;
+ __u32 reserved[4];
+ union {
+ struct vduse_vq_state vq_state;
+ __u32 padding[32];
+ };
+};
+
+#endif /* _UAPI_VDUSE_H_ */
diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h
index 50d352f5e86f..80d76b75bccd 100644
--- a/include/uapi/linux/virtio_ids.h
+++ b/include/uapi/linux/virtio_ids.h
@@ -54,9 +54,18 @@
#define VIRTIO_ID_SOUND 25 /* virtio sound */
#define VIRTIO_ID_FS 26 /* virtio filesystem */
#define VIRTIO_ID_PMEM 27 /* virtio pmem */
+#define VIRTIO_ID_RPMB 28 /* virtio rpmb */
#define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */
+#define VIRTIO_ID_VIDEO_ENCODER 30 /* virtio video encoder */
+#define VIRTIO_ID_VIDEO_DECODER 31 /* virtio video decoder */
#define VIRTIO_ID_SCMI 32 /* virtio SCMI */
+#define VIRTIO_ID_NITRO_SEC_MOD 33 /* virtio nitro secure module*/
#define VIRTIO_ID_I2C_ADAPTER 34 /* virtio i2c adapter */
+#define VIRTIO_ID_WATCHDOG 35 /* virtio watchdog */
+#define VIRTIO_ID_CAN 36 /* virtio can */
+#define VIRTIO_ID_DMABUF 37 /* virtio dmabuf */
+#define VIRTIO_ID_PARAM_SERV 38 /* virtio parameter server */
+#define VIRTIO_ID_AUDIO_POLICY 39 /* virtio audio policy */
#define VIRTIO_ID_BT 40 /* virtio bluetooth */
#define VIRTIO_ID_GPIO 41 /* virtio gpio */
diff --git a/include/uapi/linux/virtio_pcidev.h b/include/uapi/linux/virtio_pcidev.h
index 89daa88bcfef..668b07ce515b 100644
--- a/include/uapi/linux/virtio_pcidev.h
+++ b/include/uapi/linux/virtio_pcidev.h
@@ -9,13 +9,14 @@
/**
* enum virtio_pcidev_ops - virtual PCI device operations
+ * @VIRTIO_PCIDEV_OP_RESERVED: reserved to catch errors
* @VIRTIO_PCIDEV_OP_CFG_READ: read config space, size is 1, 2, 4 or 8;
* the @data field should be filled in by the device (in little endian).
* @VIRTIO_PCIDEV_OP_CFG_WRITE: write config space, size is 1, 2, 4 or 8;
* the @data field contains the data to write (in little endian).
- * @VIRTIO_PCIDEV_OP_BAR_READ: read BAR mem/pio, size can be variable;
+ * @VIRTIO_PCIDEV_OP_MMIO_READ: read BAR mem/pio, size can be variable;
* the @data field should be filled in by the device (in little endian).
- * @VIRTIO_PCIDEV_OP_BAR_WRITE: write BAR mem/pio, size can be variable;
+ * @VIRTIO_PCIDEV_OP_MMIO_WRITE: write BAR mem/pio, size can be variable;
* the @data field contains the data to write (in little endian).
* @VIRTIO_PCIDEV_OP_MMIO_MEMSET: memset MMIO, size is variable but
* the @data field only has one byte (unlike @VIRTIO_PCIDEV_OP_MMIO_WRITE)
diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h
index 3dd3555b2740..64738838bee5 100644
--- a/include/uapi/linux/virtio_vsock.h
+++ b/include/uapi/linux/virtio_vsock.h
@@ -97,7 +97,8 @@ enum virtio_vsock_shutdown {
/* VIRTIO_VSOCK_OP_RW flags values */
enum virtio_vsock_rw {
- VIRTIO_VSOCK_SEQ_EOR = 1,
+ VIRTIO_VSOCK_SEQ_EOM = 1,
+ VIRTIO_VSOCK_SEQ_EOR = 2,
};
#endif /* _UAPI_LINUX_VIRTIO_VSOCK_H */
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index a47a731e4527..7cc2a0f3f2f5 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -276,7 +276,17 @@ enum hl_device_status {
HL_DEVICE_STATUS_OPERATIONAL,
HL_DEVICE_STATUS_IN_RESET,
HL_DEVICE_STATUS_MALFUNCTION,
- HL_DEVICE_STATUS_NEEDS_RESET
+ HL_DEVICE_STATUS_NEEDS_RESET,
+ HL_DEVICE_STATUS_IN_DEVICE_CREATION,
+ HL_DEVICE_STATUS_LAST = HL_DEVICE_STATUS_IN_DEVICE_CREATION
+};
+
+enum hl_server_type {
+ HL_SERVER_TYPE_UNKNOWN = 0,
+ HL_SERVER_GAUDI_HLS1 = 1,
+ HL_SERVER_GAUDI_HLS1H = 2,
+ HL_SERVER_GAUDI_TYPE1 = 3,
+ HL_SERVER_GAUDI_TYPE2 = 4
};
/* Opcode for management ioctl
@@ -337,17 +347,49 @@ enum hl_device_status {
#define HL_INFO_VERSION_MAX_LEN 128
#define HL_INFO_CARD_NAME_MAX_LEN 16
+/**
+ * struct hl_info_hw_ip_info - hardware information on various IPs in the ASIC
+ * @sram_base_address: The first SRAM physical base address that is free to be
+ * used by the user.
+ * @dram_base_address: The first DRAM virtual or physical base address that is
+ * free to be used by the user.
+ * @dram_size: The DRAM size that is available to the user.
+ * @sram_size: The SRAM size that is available to the user.
+ * @num_of_events: The number of events that can be received from the f/w. This
+ * is needed so the user can what is the size of the h/w events
+ * array he needs to pass to the kernel when he wants to fetch
+ * the event counters.
+ * @device_id: PCI device ID of the ASIC.
+ * @module_id: Module ID of the ASIC for mezzanine cards in servers
+ * (From OCP spec).
+ * @first_available_interrupt_id: The first available interrupt ID for the user
+ * to be used when it works with user interrupts.
+ * @server_type: Server type that the Gaudi ASIC is currently installed in.
+ * The value is according to enum hl_server_type
+ * @cpld_version: CPLD version on the board.
+ * @psoc_pci_pll_nr: PCI PLL NR value. Needed by the profiler in some ASICs.
+ * @psoc_pci_pll_nf: PCI PLL NF value. Needed by the profiler in some ASICs.
+ * @psoc_pci_pll_od: PCI PLL OD value. Needed by the profiler in some ASICs.
+ * @psoc_pci_pll_div_factor: PCI PLL DIV factor value. Needed by the profiler
+ * in some ASICs.
+ * @tpc_enabled_mask: Bit-mask that represents which TPCs are enabled. Relevant
+ * for Goya/Gaudi only.
+ * @dram_enabled: Whether the DRAM is enabled.
+ * @cpucp_version: The CPUCP f/w version.
+ * @card_name: The card name as passed by the f/w.
+ * @dram_page_size: The DRAM physical page size.
+ */
struct hl_info_hw_ip_info {
__u64 sram_base_address;
__u64 dram_base_address;
__u64 dram_size;
__u32 sram_size;
__u32 num_of_events;
- __u32 device_id; /* PCI Device ID */
- __u32 module_id; /* For mezzanine cards in servers (From OCP spec.) */
+ __u32 device_id;
+ __u32 module_id;
__u32 reserved;
__u16 first_available_interrupt_id;
- __u16 reserved2;
+ __u16 server_type;
__u32 cpld_version;
__u32 psoc_pci_pll_nr;
__u32 psoc_pci_pll_nf;
@@ -358,7 +400,7 @@ struct hl_info_hw_ip_info {
__u8 pad[2];
__u8 cpucp_version[HL_INFO_VERSION_MAX_LEN];
__u8 card_name[HL_INFO_CARD_NAME_MAX_LEN];
- __u64 reserved3;
+ __u64 reserved2;
__u64 dram_page_size;
};
@@ -628,12 +670,21 @@ struct hl_cs_chunk {
__u64 cb_handle;
/* Relevant only when HL_CS_FLAGS_WAIT or
- * HL_CS_FLAGS_COLLECTIVE_WAIT is set.
+ * HL_CS_FLAGS_COLLECTIVE_WAIT is set
* This holds address of array of u64 values that contain
- * signal CS sequence numbers. The wait described by this job
- * will listen on all those signals (wait event per signal)
+ * signal CS sequence numbers. The wait described by
+ * this job will listen on all those signals
+ * (wait event per signal)
*/
__u64 signal_seq_arr;
+
+ /*
+ * Relevant only when HL_CS_FLAGS_WAIT or
+ * HL_CS_FLAGS_COLLECTIVE_WAIT is set
+ * along with HL_CS_FLAGS_ENCAP_SIGNALS.
+ * This is the CS sequence which has the encapsulated signals.
+ */
+ __u64 encaps_signal_seq;
};
/* Index of queue to put the CB on */
@@ -651,6 +702,17 @@ struct hl_cs_chunk {
* Number of entries in signal_seq_arr
*/
__u32 num_signal_seq_arr;
+
+ /* Relevant only when HL_CS_FLAGS_WAIT or
+ * HL_CS_FLAGS_COLLECTIVE_WAIT is set along
+ * with HL_CS_FLAGS_ENCAP_SIGNALS
+ * This set the signals range that the user want to wait for
+ * out of the whole reserved signals range.
+ * e.g if the signals range is 20, and user don't want
+ * to wait for signal 8, so he set this offset to 7, then
+ * he call the API again with 9 and so on till 20.
+ */
+ __u32 encaps_signal_offset;
};
/* HL_CS_CHUNK_FLAGS_* */
@@ -678,6 +740,28 @@ struct hl_cs_chunk {
#define HL_CS_FLAGS_CUSTOM_TIMEOUT 0x200
#define HL_CS_FLAGS_SKIP_RESET_ON_TIMEOUT 0x400
+/*
+ * The encapsulated signals CS is merged into the existing CS ioctls.
+ * In order to use this feature need to follow the below procedure:
+ * 1. Reserve signals, set the CS type to HL_CS_FLAGS_RESERVE_SIGNALS_ONLY
+ * the output of this API will be the SOB offset from CFG_BASE.
+ * this address will be used to patch CB cmds to do the signaling for this
+ * SOB by incrementing it's value.
+ * for reverting the reservation use HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY
+ * CS type, note that this might fail if out-of-sync happened to the SOB
+ * value, in case other signaling request to the same SOB occurred between
+ * reserve-unreserve calls.
+ * 2. Use the staged CS to do the encapsulated signaling jobs.
+ * use HL_CS_FLAGS_STAGED_SUBMISSION and HL_CS_FLAGS_STAGED_SUBMISSION_FIRST
+ * along with HL_CS_FLAGS_ENCAP_SIGNALS flag, and set encaps_signal_offset
+ * field. This offset allows app to wait on part of the reserved signals.
+ * 3. Use WAIT/COLLECTIVE WAIT CS along with HL_CS_FLAGS_ENCAP_SIGNALS flag
+ * to wait for the encapsulated signals.
+ */
+#define HL_CS_FLAGS_ENCAP_SIGNALS 0x800
+#define HL_CS_FLAGS_RESERVE_SIGNALS_ONLY 0x1000
+#define HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY 0x2000
+
#define HL_CS_STATUS_SUCCESS 0
#define HL_MAX_JOBS_PER_CS 512
@@ -690,10 +774,35 @@ struct hl_cs_in {
/* holds address of array of hl_cs_chunk for execution phase */
__u64 chunks_execute;
- /* Sequence number of a staged submission CS
- * valid only if HL_CS_FLAGS_STAGED_SUBMISSION is set
- */
- __u64 seq;
+ union {
+ /*
+ * Sequence number of a staged submission CS
+ * valid only if HL_CS_FLAGS_STAGED_SUBMISSION is set and
+ * HL_CS_FLAGS_STAGED_SUBMISSION_FIRST is unset.
+ */
+ __u64 seq;
+
+ /*
+ * Encapsulated signals handle id
+ * Valid for two flows:
+ * 1. CS with encapsulated signals:
+ * when HL_CS_FLAGS_STAGED_SUBMISSION and
+ * HL_CS_FLAGS_STAGED_SUBMISSION_FIRST
+ * and HL_CS_FLAGS_ENCAP_SIGNALS are set.
+ * 2. unreserve signals:
+ * valid when HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY is set.
+ */
+ __u32 encaps_sig_handle_id;
+
+ /* Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set */
+ struct {
+ /* Encapsulated signals number */
+ __u32 encaps_signals_count;
+
+ /* Encapsulated signals queue index (stream) */
+ __u32 encaps_signals_q_idx;
+ };
+ };
/* Number of chunks in restore phase array. Maximum number is
* HL_MAX_JOBS_PER_CS
@@ -718,14 +827,31 @@ struct hl_cs_in {
};
struct hl_cs_out {
+ union {
+ /*
+ * seq holds the sequence number of the CS to pass to wait
+ * ioctl. All values are valid except for 0 and ULLONG_MAX
+ */
+ __u64 seq;
+
+ /* Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set */
+ struct {
+ /* This is the resereved signal handle id */
+ __u32 handle_id;
+
+ /* This is the signals count */
+ __u32 count;
+ };
+ };
+
+ /* HL_CS_STATUS */
+ __u32 status;
+
/*
- * seq holds the sequence number of the CS to pass to wait ioctl. All
- * values are valid except for 0 and ULLONG_MAX
+ * SOB base address offset
+ * Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set
*/
- __u64 seq;
- /* HL_CS_STATUS_* */
- __u32 status;
- __u32 pad;
+ __u32 sob_base_addr_offset;
};
union hl_cs_args {
@@ -735,11 +861,18 @@ union hl_cs_args {
#define HL_WAIT_CS_FLAGS_INTERRUPT 0x2
#define HL_WAIT_CS_FLAGS_INTERRUPT_MASK 0xFFF00000
+#define HL_WAIT_CS_FLAGS_MULTI_CS 0x4
+
+#define HL_WAIT_MULTI_CS_LIST_MAX_LEN 32
struct hl_wait_cs_in {
union {
struct {
- /* Command submission sequence number */
+ /*
+ * In case of wait_cs holds the CS sequence number.
+ * In case of wait for multi CS hold a user pointer to
+ * an array of CS sequence numbers
+ */
__u64 seq;
/* Absolute timeout to wait for command submission
* in microseconds
@@ -767,12 +900,17 @@ struct hl_wait_cs_in {
/* Context ID - Currently not in use */
__u32 ctx_id;
+
/* HL_WAIT_CS_FLAGS_*
* If HL_WAIT_CS_FLAGS_INTERRUPT is set, this field should include
* interrupt id according to HL_WAIT_CS_FLAGS_INTERRUPT_MASK, in order
* not to specify an interrupt id ,set mask to all 1s.
*/
__u32 flags;
+
+ /* Multi CS API info- valid entries in multi-CS array */
+ __u8 seq_arr_len;
+ __u8 pad[7];
};
#define HL_WAIT_CS_STATUS_COMPLETED 0
@@ -789,8 +927,15 @@ struct hl_wait_cs_out {
__u32 status;
/* HL_WAIT_CS_STATUS_FLAG* */
__u32 flags;
- /* valid only if HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD is set */
+ /*
+ * valid only if HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD is set
+ * for wait_cs: timestamp of CS completion
+ * for wait_multi_cs: timestamp of FIRST CS completion
+ */
__s64 timestamp_nsec;
+ /* multi CS completion bitmap */
+ __u32 cs_completion_map;
+ __u32 pad;
};
union hl_wait_cs_args {
@@ -813,6 +958,7 @@ union hl_wait_cs_args {
#define HL_MEM_CONTIGUOUS 0x1
#define HL_MEM_SHARED 0x2
#define HL_MEM_USERPTR 0x4
+#define HL_MEM_FORCE_HINT 0x8
struct hl_mem_in {
union {