summaryrefslogtreecommitdiff
path: root/kernel/static_call.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-10-12 13:58:15 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-10-12 13:58:15 -0700
commitdd502a81077a5f3b3e19fa9a1accffdcab5ad5bc (patch)
treee993e8d13deadff82ec22d34e68860c71e5443f7 /kernel/static_call.c
parent34eb62d868d729e9a252aa497277081fb652eeed (diff)
parent69e0ad37c9f32d5aa1beb02aab4ec0cd055be013 (diff)
Merge tag 'core-static_call-2020-10-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull static call support from Ingo Molnar: "This introduces static_call(), which is the idea of static_branch() applied to indirect function calls. Remove a data load (indirection) by modifying the text. They give the flexibility of function pointers, but with better performance. (This is especially important for cases where retpolines would otherwise be used, as retpolines can be pretty slow.) API overview: DECLARE_STATIC_CALL(name, func); DEFINE_STATIC_CALL(name, func); DEFINE_STATIC_CALL_NULL(name, typename); static_call(name)(args...); static_call_cond(name)(args...); static_call_update(name, func); x86 is supported via text patching, otherwise basic indirect calls are used, with function pointers. There's a second variant using inline code patching, inspired by jump-labels, implemented on x86 as well. The new APIs are utilized in the x86 perf code, a heavy user of function pointers, where static calls speed up the PMU handler by 4.2% (!). The generic implementation is not really excercised on other architectures, outside of the trivial test_static_call_init() self-test" * tag 'core-static_call-2020-10-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (21 commits) static_call: Fix return type of static_call_init tracepoint: Fix out of sync data passing by static caller tracepoint: Fix overly long tracepoint names x86/perf, static_call: Optimize x86_pmu methods tracepoint: Optimize using static_call() static_call: Allow early init static_call: Add some validation static_call: Handle tail-calls static_call: Add static_call_cond() x86/alternatives: Teach text_poke_bp() to emulate RET static_call: Add simple self-test for static calls x86/static_call: Add inline static call implementation for x86-64 x86/static_call: Add out-of-line static call implementation static_call: Avoid kprobes on inline static_call()s static_call: Add inline static call infrastructure static_call: Add basic static call infrastructure compiler.h: Make __ADDRESSABLE() symbol truly unique jump_label,module: Fix module lifetime for __jump_label_mod_text_reserved() module: Properly propagate MODULE_STATE_COMING failure module: Fix up module_notifier return values ...
Diffstat (limited to 'kernel/static_call.c')
-rw-r--r--kernel/static_call.c482
1 files changed, 482 insertions, 0 deletions
diff --git a/kernel/static_call.c b/kernel/static_call.c
new file mode 100644
index 000000000000..84565c2a41b8
--- /dev/null
+++ b/kernel/static_call.c
@@ -0,0 +1,482 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/init.h>
+#include <linux/static_call.h>
+#include <linux/bug.h>
+#include <linux/smp.h>
+#include <linux/sort.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/cpu.h>
+#include <linux/processor.h>
+#include <asm/sections.h>
+
+extern struct static_call_site __start_static_call_sites[],
+ __stop_static_call_sites[];
+
+static bool static_call_initialized;
+
+/* mutex to protect key modules/sites */
+static DEFINE_MUTEX(static_call_mutex);
+
+static void static_call_lock(void)
+{
+ mutex_lock(&static_call_mutex);
+}
+
+static void static_call_unlock(void)
+{
+ mutex_unlock(&static_call_mutex);
+}
+
+static inline void *static_call_addr(struct static_call_site *site)
+{
+ return (void *)((long)site->addr + (long)&site->addr);
+}
+
+
+static inline struct static_call_key *static_call_key(const struct static_call_site *site)
+{
+ return (struct static_call_key *)
+ (((long)site->key + (long)&site->key) & ~STATIC_CALL_SITE_FLAGS);
+}
+
+/* These assume the key is word-aligned. */
+static inline bool static_call_is_init(struct static_call_site *site)
+{
+ return ((long)site->key + (long)&site->key) & STATIC_CALL_SITE_INIT;
+}
+
+static inline bool static_call_is_tail(struct static_call_site *site)
+{
+ return ((long)site->key + (long)&site->key) & STATIC_CALL_SITE_TAIL;
+}
+
+static inline void static_call_set_init(struct static_call_site *site)
+{
+ site->key = ((long)static_call_key(site) | STATIC_CALL_SITE_INIT) -
+ (long)&site->key;
+}
+
+static int static_call_site_cmp(const void *_a, const void *_b)
+{
+ const struct static_call_site *a = _a;
+ const struct static_call_site *b = _b;
+ const struct static_call_key *key_a = static_call_key(a);
+ const struct static_call_key *key_b = static_call_key(b);
+
+ if (key_a < key_b)
+ return -1;
+
+ if (key_a > key_b)
+ return 1;
+
+ return 0;
+}
+
+static void static_call_site_swap(void *_a, void *_b, int size)
+{
+ long delta = (unsigned long)_a - (unsigned long)_b;
+ struct static_call_site *a = _a;
+ struct static_call_site *b = _b;
+ struct static_call_site tmp = *a;
+
+ a->addr = b->addr - delta;
+ a->key = b->key - delta;
+
+ b->addr = tmp.addr + delta;
+ b->key = tmp.key + delta;
+}
+
+static inline void static_call_sort_entries(struct static_call_site *start,
+ struct static_call_site *stop)
+{
+ sort(start, stop - start, sizeof(struct static_call_site),
+ static_call_site_cmp, static_call_site_swap);
+}
+
+static inline bool static_call_key_has_mods(struct static_call_key *key)
+{
+ return !(key->type & 1);
+}
+
+static inline struct static_call_mod *static_call_key_next(struct static_call_key *key)
+{
+ if (!static_call_key_has_mods(key))
+ return NULL;
+
+ return key->mods;
+}
+
+static inline struct static_call_site *static_call_key_sites(struct static_call_key *key)
+{
+ if (static_call_key_has_mods(key))
+ return NULL;
+
+ return (struct static_call_site *)(key->type & ~1);
+}
+
+void __static_call_update(struct static_call_key *key, void *tramp, void *func)
+{
+ struct static_call_site *site, *stop;
+ struct static_call_mod *site_mod, first;
+
+ cpus_read_lock();
+ static_call_lock();
+
+ if (key->func == func)
+ goto done;
+
+ key->func = func;
+
+ arch_static_call_transform(NULL, tramp, func, false);
+
+ /*
+ * If uninitialized, we'll not update the callsites, but they still
+ * point to the trampoline and we just patched that.
+ */
+ if (WARN_ON_ONCE(!static_call_initialized))
+ goto done;
+
+ first = (struct static_call_mod){
+ .next = static_call_key_next(key),
+ .mod = NULL,
+ .sites = static_call_key_sites(key),
+ };
+
+ for (site_mod = &first; site_mod; site_mod = site_mod->next) {
+ struct module *mod = site_mod->mod;
+
+ if (!site_mod->sites) {
+ /*
+ * This can happen if the static call key is defined in
+ * a module which doesn't use it.
+ *
+ * It also happens in the has_mods case, where the
+ * 'first' entry has no sites associated with it.
+ */
+ continue;
+ }
+
+ stop = __stop_static_call_sites;
+
+#ifdef CONFIG_MODULES
+ if (mod) {
+ stop = mod->static_call_sites +
+ mod->num_static_call_sites;
+ }
+#endif
+
+ for (site = site_mod->sites;
+ site < stop && static_call_key(site) == key; site++) {
+ void *site_addr = static_call_addr(site);
+
+ if (static_call_is_init(site)) {
+ /*
+ * Don't write to call sites which were in
+ * initmem and have since been freed.
+ */
+ if (!mod && system_state >= SYSTEM_RUNNING)
+ continue;
+ if (mod && !within_module_init((unsigned long)site_addr, mod))
+ continue;
+ }
+
+ if (!kernel_text_address((unsigned long)site_addr)) {
+ WARN_ONCE(1, "can't patch static call site at %pS",
+ site_addr);
+ continue;
+ }
+
+ arch_static_call_transform(site_addr, NULL, func,
+ static_call_is_tail(site));
+ }
+ }
+
+done:
+ static_call_unlock();
+ cpus_read_unlock();
+}
+EXPORT_SYMBOL_GPL(__static_call_update);
+
+static int __static_call_init(struct module *mod,
+ struct static_call_site *start,
+ struct static_call_site *stop)
+{
+ struct static_call_site *site;
+ struct static_call_key *key, *prev_key = NULL;
+ struct static_call_mod *site_mod;
+
+ if (start == stop)
+ return 0;
+
+ static_call_sort_entries(start, stop);
+
+ for (site = start; site < stop; site++) {
+ void *site_addr = static_call_addr(site);
+
+ if ((mod && within_module_init((unsigned long)site_addr, mod)) ||
+ (!mod && init_section_contains(site_addr, 1)))
+ static_call_set_init(site);
+
+ key = static_call_key(site);
+ if (key != prev_key) {
+ prev_key = key;
+
+ /*
+ * For vmlinux (!mod) avoid the allocation by storing
+ * the sites pointer in the key itself. Also see
+ * __static_call_update()'s @first.
+ *
+ * This allows architectures (eg. x86) to call
+ * static_call_init() before memory allocation works.
+ */
+ if (!mod) {
+ key->sites = site;
+ key->type |= 1;
+ goto do_transform;
+ }
+
+ site_mod = kzalloc(sizeof(*site_mod), GFP_KERNEL);
+ if (!site_mod)
+ return -ENOMEM;
+
+ /*
+ * When the key has a direct sites pointer, extract
+ * that into an explicit struct static_call_mod, so we
+ * can have a list of modules.
+ */
+ if (static_call_key_sites(key)) {
+ site_mod->mod = NULL;
+ site_mod->next = NULL;
+ site_mod->sites = static_call_key_sites(key);
+
+ key->mods = site_mod;
+
+ site_mod = kzalloc(sizeof(*site_mod), GFP_KERNEL);
+ if (!site_mod)
+ return -ENOMEM;
+ }
+
+ site_mod->mod = mod;
+ site_mod->sites = site;
+ site_mod->next = static_call_key_next(key);
+ key->mods = site_mod;
+ }
+
+do_transform:
+ arch_static_call_transform(site_addr, NULL, key->func,
+ static_call_is_tail(site));
+ }
+
+ return 0;
+}
+
+static int addr_conflict(struct static_call_site *site, void *start, void *end)
+{
+ unsigned long addr = (unsigned long)static_call_addr(site);
+
+ if (addr <= (unsigned long)end &&
+ addr + CALL_INSN_SIZE > (unsigned long)start)
+ return 1;
+
+ return 0;
+}
+
+static int __static_call_text_reserved(struct static_call_site *iter_start,
+ struct static_call_site *iter_stop,
+ void *start, void *end)
+{
+ struct static_call_site *iter = iter_start;
+
+ while (iter < iter_stop) {
+ if (addr_conflict(iter, start, end))
+ return 1;
+ iter++;
+ }
+
+ return 0;
+}
+
+#ifdef CONFIG_MODULES
+
+static int __static_call_mod_text_reserved(void *start, void *end)
+{
+ struct module *mod;
+ int ret;
+
+ preempt_disable();
+ mod = __module_text_address((unsigned long)start);
+ WARN_ON_ONCE(__module_text_address((unsigned long)end) != mod);
+ if (!try_module_get(mod))
+ mod = NULL;
+ preempt_enable();
+
+ if (!mod)
+ return 0;
+
+ ret = __static_call_text_reserved(mod->static_call_sites,
+ mod->static_call_sites + mod->num_static_call_sites,
+ start, end);
+
+ module_put(mod);
+
+ return ret;
+}
+
+static int static_call_add_module(struct module *mod)
+{
+ return __static_call_init(mod, mod->static_call_sites,
+ mod->static_call_sites + mod->num_static_call_sites);
+}
+
+static void static_call_del_module(struct module *mod)
+{
+ struct static_call_site *start = mod->static_call_sites;
+ struct static_call_site *stop = mod->static_call_sites +
+ mod->num_static_call_sites;
+ struct static_call_key *key, *prev_key = NULL;
+ struct static_call_mod *site_mod, **prev;
+ struct static_call_site *site;
+
+ for (site = start; site < stop; site++) {
+ key = static_call_key(site);
+ if (key == prev_key)
+ continue;
+
+ prev_key = key;
+
+ for (prev = &key->mods, site_mod = key->mods;
+ site_mod && site_mod->mod != mod;
+ prev = &site_mod->next, site_mod = site_mod->next)
+ ;
+
+ if (!site_mod)
+ continue;
+
+ *prev = site_mod->next;
+ kfree(site_mod);
+ }
+}
+
+static int static_call_module_notify(struct notifier_block *nb,
+ unsigned long val, void *data)
+{
+ struct module *mod = data;
+ int ret = 0;
+
+ cpus_read_lock();
+ static_call_lock();
+
+ switch (val) {
+ case MODULE_STATE_COMING:
+ ret = static_call_add_module(mod);
+ if (ret) {
+ WARN(1, "Failed to allocate memory for static calls");
+ static_call_del_module(mod);
+ }
+ break;
+ case MODULE_STATE_GOING:
+ static_call_del_module(mod);
+ break;
+ }
+
+ static_call_unlock();
+ cpus_read_unlock();
+
+ return notifier_from_errno(ret);
+}
+
+static struct notifier_block static_call_module_nb = {
+ .notifier_call = static_call_module_notify,
+};
+
+#else
+
+static inline int __static_call_mod_text_reserved(void *start, void *end)
+{
+ return 0;
+}
+
+#endif /* CONFIG_MODULES */
+
+int static_call_text_reserved(void *start, void *end)
+{
+ int ret = __static_call_text_reserved(__start_static_call_sites,
+ __stop_static_call_sites, start, end);
+
+ if (ret)
+ return ret;
+
+ return __static_call_mod_text_reserved(start, end);
+}
+
+int __init static_call_init(void)
+{
+ int ret;
+
+ if (static_call_initialized)
+ return 0;
+
+ cpus_read_lock();
+ static_call_lock();
+ ret = __static_call_init(NULL, __start_static_call_sites,
+ __stop_static_call_sites);
+ static_call_unlock();
+ cpus_read_unlock();
+
+ if (ret) {
+ pr_err("Failed to allocate memory for static_call!\n");
+ BUG();
+ }
+
+ static_call_initialized = true;
+
+#ifdef CONFIG_MODULES
+ register_module_notifier(&static_call_module_nb);
+#endif
+ return 0;
+}
+early_initcall(static_call_init);
+
+#ifdef CONFIG_STATIC_CALL_SELFTEST
+
+static int func_a(int x)
+{
+ return x+1;
+}
+
+static int func_b(int x)
+{
+ return x+2;
+}
+
+DEFINE_STATIC_CALL(sc_selftest, func_a);
+
+static struct static_call_data {
+ int (*func)(int);
+ int val;
+ int expect;
+} static_call_data [] __initdata = {
+ { NULL, 2, 3 },
+ { func_b, 2, 4 },
+ { func_a, 2, 3 }
+};
+
+static int __init test_static_call_init(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(static_call_data); i++ ) {
+ struct static_call_data *scd = &static_call_data[i];
+
+ if (scd->func)
+ static_call_update(sc_selftest, scd->func);
+
+ WARN_ON(static_call(sc_selftest)(scd->val) != scd->expect);
+ }
+
+ return 0;
+}
+early_initcall(test_static_call_init);
+
+#endif /* CONFIG_STATIC_CALL_SELFTEST */