From 356ed64991c6847a0c4f2e8fa3b1133f7a14f1fc Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Tue, 14 Sep 2021 10:33:51 +0800 Subject: bpf: Handle return value of BPF_PROG_TYPE_STRUCT_OPS prog Currently if a function ptr in struct_ops has a return value, its caller will get a random return value from it, because the return value of related BPF_PROG_TYPE_STRUCT_OPS prog is just dropped. So adding a new flag BPF_TRAMP_F_RET_FENTRY_RET to tell bpf trampoline to save and return the return value of struct_ops prog if ret_size of the function ptr is greater than 0. Also restricting the flag to be used alone. Fixes: 85d33df357b6 ("bpf: Introduce BPF_MAP_TYPE_STRUCT_OPS") Signed-off-by: Hou Tao Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20210914023351.3664499-1-houtao1@huawei.com --- include/linux/bpf.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f4c16f19f83e..020a7d5bf470 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -578,11 +578,12 @@ struct btf_func_model { * programs only. Should not be used with normal calls and indirect calls. */ #define BPF_TRAMP_F_SKIP_FRAME BIT(2) - /* Store IP address of the caller on the trampoline stack, * so it's available for trampoline's programs. */ #define BPF_TRAMP_F_IP_ARG BIT(3) +/* Return the return value of fentry prog. Only used by bpf_struct_ops. */ +#define BPF_TRAMP_F_RET_FENTRY_RET BIT(4) /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 * bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2 -- cgit v1.2.3 From e840f42a49925707fca90e6c7a4095118fdb8c4d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 19 Sep 2021 14:09:49 +0100 Subject: KVM: arm64: Fix PMU probe ordering Russell reported that since 5.13, KVM's probing of the PMU has started to fail on his HW. As it turns out, there is an implicit ordering dependency between the architectural PMU probing code and and KVM's own probing. If, due to probe ordering reasons, KVM probes before the PMU driver, it will fail to detect the PMU and prevent it from being advertised to guests as well as the VMM. Obviously, this is one probing too many, and we should be able to deal with any ordering. Add a callback from the PMU code into KVM to advertise the registration of a host CPU PMU, allowing for any probing order. Fixes: 5421db1be3b1 ("KVM: arm64: Divorce the perf code from oprofile helpers") Reported-by: "Russell King (Oracle)" Tested-by: Russell King (Oracle) Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/YUYRKVflRtUytzy5@shell.armlinux.org.uk Cc: stable@vger.kernel.org --- include/linux/perf/arm_pmu.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 505480217cf1..2512e2f9cd4e 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -163,6 +163,12 @@ int arm_pmu_acpi_probe(armpmu_init_fn init_fn); static inline int arm_pmu_acpi_probe(armpmu_init_fn init_fn) { return 0; } #endif +#ifdef CONFIG_KVM +void kvm_host_pmu_init(struct arm_pmu *pmu); +#else +#define kvm_host_pmu_init(x) do { } while(0) +#endif + /* Internal functions only for core arm_pmu code */ struct arm_pmu *armpmu_alloc(void); struct arm_pmu *armpmu_alloc_atomic(void); -- cgit v1.2.3 From c86a2d9058c5a4a05d20ef89e699b7a6b2c89da6 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Fri, 17 Sep 2021 00:27:05 +0200 Subject: cpumask: Omit terminating null byte in cpumap_print_{list,bitmask}_to_buf The changes in the patch series [1] introduced a terminating null byte when reading from cpulist or cpumap sysfs files, for example: $ xxd /sys/devices/system/node/node0/cpulist 00000000: 302d 310a 00 0-1.. Before this change, the output looked as follows: $ xxd /sys/devices/system/node/node0/cpulist 00000000: 302d 310a 0-1. Fix this regression by excluding the terminating null byte from the returned length in cpumap_print_list_to_buf and cpumap_print_bitmask_to_buf. [1] https://lore.kernel.org/all/20210806110251.560-1-song.bao.hua@hisilicon.com/ Fixes: 1fae562983ca ("cpumask: introduce cpumap_print_list/bitmask_to_buf to support large bitmask and list") Acked-by: Barry Song Acked-by: Yury Norov Signed-off-by: Tobias Klauser Link: https://lore.kernel.org/r/20210916222705.13554-1-tklauser@distanz.ch Signed-off-by: Greg Kroah-Hartman --- include/linux/cpumask.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 5d4d07a9e1ed..1e7399fc69c0 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -996,14 +996,15 @@ cpumap_print_to_pagebuf(bool list, char *buf, const struct cpumask *mask) * cpumask; Typically used by bin_attribute to export cpumask bitmask * ABI. * - * Returns the length of how many bytes have been copied. + * Returns the length of how many bytes have been copied, excluding + * terminating '\0'. */ static inline ssize_t cpumap_print_bitmask_to_buf(char *buf, const struct cpumask *mask, loff_t off, size_t count) { return bitmap_print_bitmask_to_buf(buf, cpumask_bits(mask), - nr_cpu_ids, off, count); + nr_cpu_ids, off, count) - 1; } /** @@ -1018,7 +1019,7 @@ cpumap_print_list_to_buf(char *buf, const struct cpumask *mask, loff_t off, size_t count) { return bitmap_print_list_to_buf(buf, cpumask_bits(mask), - nr_cpu_ids, off, count); + nr_cpu_ids, off, count) - 1; } #if NR_CPUS <= BITS_PER_LONG -- cgit v1.2.3 From 4eeef2424153e79910d65248b5e1abf137d050e9 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 10 Sep 2021 11:32:19 -0700 Subject: KVM: x86: Query vcpu->vcpu_idx directly and drop its accessor Read vcpu->vcpu_idx directly instead of bouncing through the one-line wrapper, kvm_vcpu_get_idx(), and drop the wrapper. The wrapper is a remnant of the original implementation and serves no purpose; remove it before it gains more users. Back when kvm_vcpu_get_idx() was added by commit 497d72d80a78 ("KVM: Add kvm_vcpu_get_idx to get vcpu index in kvm->vcpus"), the implementation was more than just a simple wrapper as vcpu->vcpu_idx did not exist and retrieving the index meant walking over the vCPU array to find the given vCPU. When vcpu_idx was introduced by commit 8750e72a79dd ("KVM: remember position in kvm->vcpus array"), the helper was left behind, likely to avoid extra thrash (but even then there were only two users, the original arm usage having been removed at some point in the past). No functional change intended. Suggested-by: Vitaly Kuznetsov Signed-off-by: Sean Christopherson Reviewed-by: Maxim Levitsky Reviewed-by: Vitaly Kuznetsov Message-Id: <20210910183220.2397812-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 041ca7f15ea4..000ea73dd324 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -721,11 +721,6 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id) return NULL; } -static inline int kvm_vcpu_get_idx(struct kvm_vcpu *vcpu) -{ - return vcpu->vcpu_idx; -} - #define kvm_for_each_memslot(memslot, slots) \ for (memslot = &slots->memslots[0]; \ memslot < slots->memslots + slots->used_slots; memslot++) \ -- cgit v1.2.3 From 6bc6db000295332bae2c1e8815d7450b72923d23 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Sat, 18 Sep 2021 08:56:29 +0800 Subject: KVM: Remove tlbs_dirty There is no user of tlbs_dirty. Signed-off-by: Lai Jiangshan Signed-off-by: Paolo Bonzini Message-Id: <20210918005636.3675-4-jiangshanlai@gmail.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 000ea73dd324..0f18df7fe874 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -608,7 +608,6 @@ struct kvm { unsigned long mmu_notifier_range_start; unsigned long mmu_notifier_range_end; #endif - long tlbs_dirty; struct list_head devices; u64 manual_dirty_log_protect; struct dentry *debugfs_dentry; -- cgit v1.2.3 From 5501765a02a6c324f78581e6bb8209d054fe13ae Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Wed, 15 Sep 2021 10:09:38 -0700 Subject: driver core: fw_devlink: Add support for FWNODE_FLAG_NEEDS_CHILD_BOUND_ON_ADD If a parent device is also a supplier to a child device, fw_devlink=on by design delays the probe() of the child device until the probe() of the parent finishes successfully. However, some drivers of such parent devices (where parent is also a supplier) expect the child device to finish probing successfully as soon as they are added using device_add() and before the probe() of the parent device has completed successfully. One example of such a case is discussed in the link mentioned below. Add a flag to make fw_devlink=on not enforce these supplier-consumer relationships, so these drivers can continue working. Link: https://lore.kernel.org/netdev/CAGETcx_uj0V4DChME-gy5HGKTYnxLBX=TH2rag29f_p=UcG+Tg@mail.gmail.com/ Fixes: ea718c699055 ("Revert "Revert "driver core: Set fw_devlink=on by default""") Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20210915170940.617415-3-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/fwnode.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 59828516ebaf..9f4ad719bfe3 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -22,10 +22,15 @@ struct device; * LINKS_ADDED: The fwnode has already be parsed to add fwnode links. * NOT_DEVICE: The fwnode will never be populated as a struct device. * INITIALIZED: The hardware corresponding to fwnode has been initialized. + * NEEDS_CHILD_BOUND_ON_ADD: For this fwnode/device to probe successfully, its + * driver needs its child devices to be bound with + * their respective drivers as soon as they are + * added. */ -#define FWNODE_FLAG_LINKS_ADDED BIT(0) -#define FWNODE_FLAG_NOT_DEVICE BIT(1) -#define FWNODE_FLAG_INITIALIZED BIT(2) +#define FWNODE_FLAG_LINKS_ADDED BIT(0) +#define FWNODE_FLAG_NOT_DEVICE BIT(1) +#define FWNODE_FLAG_INITIALIZED BIT(2) +#define FWNODE_FLAG_NEEDS_CHILD_BOUND_ON_ADD BIT(3) struct fwnode_handle { struct fwnode_handle *secondary; -- cgit v1.2.3 From f792565326825ed806626da50c6f9a928f1079c1 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Wed, 29 Sep 2021 12:43:13 -0700 Subject: perf/core: fix userpage->time_enabled of inactive events Users of rdpmc rely on the mmapped user page to calculate accurate time_enabled. Currently, userpage->time_enabled is only updated when the event is added to the pmu. As a result, inactive event (due to counter multiplexing) does not have accurate userpage->time_enabled. This can be reproduced with something like: /* open 20 task perf_event "cycles", to create multiplexing */ fd = perf_event_open(); /* open task perf_event "cycles" */ userpage = mmap(fd); /* use mmap and rdmpc */ while (true) { time_enabled_mmap = xxx; /* use logic in perf_event_mmap_page */ time_enabled_read = read(fd).time_enabled; if (time_enabled_mmap > time_enabled_read) BUG(); } Fix this by updating userpage for inactive events in merge_sched_in. Suggested-by: Peter Zijlstra (Intel) Reported-and-tested-by: Lucian Grijincu Signed-off-by: Song Liu Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20210929194313.2398474-1-songliubraving@fb.com --- include/linux/perf_event.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index fe156a8170aa..9b60bb89d86a 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -683,7 +683,9 @@ struct perf_event { /* * timestamp shadows the actual context timing but it can * be safely used in NMI interrupt context. It reflects the - * context time as it was when the event was last scheduled in. + * context time as it was when the event was last scheduled in, + * or when ctx_sched_in failed to schedule the event because we + * run out of PMC. * * ctx_time already accounts for ctx->timestamp. Therefore to * compute ctx_time for a sample, simply add perf_clock(). -- cgit v1.2.3 From 83d40a61046f73103b4e5d8f1310261487ff63b0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 20 Sep 2021 15:31:11 +0200 Subject: sched: Always inline is_percpu_thread() vmlinux.o: warning: objtool: check_preemption_disabled()+0x81: call to is_percpu_thread() leaves .noinstr.text section Reported-by: Stephen Rothwell Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20210928084218.063371959@infradead.org --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 39039ce8ac4c..c1a927ddec64 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1720,7 +1720,7 @@ extern struct pid *cad_pid; #define tsk_used_math(p) ((p)->flags & PF_USED_MATH) #define used_math() tsk_used_math(current) -static inline bool is_percpu_thread(void) +static __always_inline bool is_percpu_thread(void) { #ifdef CONFIG_SMP return (current->flags & PF_NO_SETAFFINITY) && -- cgit v1.2.3