summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/configs/tiny.config1
-rw-r--r--kernel/module/main.c94
-rw-r--r--kernel/rcu/tree_stall.h26
-rw-r--r--kernel/sched/ext_idle.c37
-rw-r--r--kernel/trace/ftrace.c2
-rw-r--r--kernel/trace/ring_buffer.c4
-rw-r--r--kernel/trace/trace_events_filter.c192
7 files changed, 280 insertions, 76 deletions
diff --git a/kernel/configs/tiny.config b/kernel/configs/tiny.config
index b753695c5a8f..5dd0f0a34a73 100644
--- a/kernel/configs/tiny.config
+++ b/kernel/configs/tiny.config
@@ -2,3 +2,4 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_KERNEL_XZ=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
+CONFIG_LD_DEAD_CODE_DATA_ELIMINATION=y
diff --git a/kernel/module/main.c b/kernel/module/main.c
index 3d64e69cc03e..413ac6ea3702 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -170,6 +170,30 @@ static inline void add_taint_module(struct module *mod, unsigned flag,
}
/*
+ * Like strncmp(), except s/-/_/g as per scripts/Makefile.lib:name-fix-token rule.
+ */
+static int mod_strncmp(const char *str_a, const char *str_b, size_t n)
+{
+ for (int i = 0; i < n; i++) {
+ char a = str_a[i];
+ char b = str_b[i];
+ int d;
+
+ if (a == '-') a = '_';
+ if (b == '-') b = '_';
+
+ d = a - b;
+ if (d)
+ return d;
+
+ if (!a)
+ break;
+ }
+
+ return 0;
+}
+
+/*
* A thread that wants to hold a reference to a module only while it
* is running can call this to safely exit.
*/
@@ -1083,6 +1107,46 @@ static char *get_modinfo(const struct load_info *info, const char *tag)
return get_next_modinfo(info, tag, NULL);
}
+/**
+ * verify_module_namespace() - does @modname have access to this symbol's @namespace
+ * @namespace: export symbol namespace
+ * @modname: module name
+ *
+ * If @namespace is prefixed with "module:" to indicate it is a module namespace
+ * then test if @modname matches any of the comma separated patterns.
+ *
+ * The patterns only support tail-glob.
+ */
+static bool verify_module_namespace(const char *namespace, const char *modname)
+{
+ size_t len, modlen = strlen(modname);
+ const char *prefix = "module:";
+ const char *sep;
+ bool glob;
+
+ if (!strstarts(namespace, prefix))
+ return false;
+
+ for (namespace += strlen(prefix); *namespace; namespace = sep) {
+ sep = strchrnul(namespace, ',');
+ len = sep - namespace;
+
+ glob = false;
+ if (sep[-1] == '*') {
+ len--;
+ glob = true;
+ }
+
+ if (*sep)
+ sep++;
+
+ if (mod_strncmp(namespace, modname, len) == 0 && (glob || len == modlen))
+ return true;
+ }
+
+ return false;
+}
+
static int verify_namespace_is_imported(const struct load_info *info,
const struct kernel_symbol *sym,
struct module *mod)
@@ -1092,6 +1156,10 @@ static int verify_namespace_is_imported(const struct load_info *info,
namespace = kernel_symbol_namespace(sym);
if (namespace && namespace[0]) {
+
+ if (verify_module_namespace(namespace, mod->name))
+ return 0;
+
for_each_modinfo_entry(imported_namespace, info, "import_ns") {
if (strcmp(namespace, imported_namespace) == 0)
return 0;
@@ -1658,15 +1726,30 @@ static void module_license_taint_check(struct module *mod, const char *license)
}
}
-static void setup_modinfo(struct module *mod, struct load_info *info)
+static int setup_modinfo(struct module *mod, struct load_info *info)
{
const struct module_attribute *attr;
+ char *imported_namespace;
int i;
for (i = 0; (attr = modinfo_attrs[i]); i++) {
if (attr->setup)
attr->setup(mod, get_modinfo(info, attr->attr.name));
}
+
+ for_each_modinfo_entry(imported_namespace, info, "import_ns") {
+ /*
+ * 'module:' prefixed namespaces are implicit, disallow
+ * explicit imports.
+ */
+ if (strstarts(imported_namespace, "module:")) {
+ pr_err("%s: module tries to import module namespace: %s\n",
+ mod->name, imported_namespace);
+ return -EPERM;
+ }
+ }
+
+ return 0;
}
static void free_modinfo(struct module *mod)
@@ -3323,7 +3406,9 @@ static int load_module(struct load_info *info, const char __user *uargs,
goto free_unload;
/* Set up MODINFO_ATTR fields */
- setup_modinfo(mod, info);
+ err = setup_modinfo(mod, info);
+ if (err)
+ goto free_modinfo;
/* Fix up syms, so that st_value is a pointer to location. */
err = simplify_symbols(mod, info);
@@ -3386,11 +3471,12 @@ static int load_module(struct load_info *info, const char __user *uargs,
goto sysfs_cleanup;
}
+ if (codetag_load_module(mod))
+ goto sysfs_cleanup;
+
/* Get rid of temporary copy. */
free_copy(info, flags);
- codetag_load_module(mod);
-
/* Done! */
trace_module_load(mod);
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index 56b21219442b..486c00536207 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -20,6 +20,28 @@
int sysctl_panic_on_rcu_stall __read_mostly;
int sysctl_max_rcu_stall_to_panic __read_mostly;
+#ifdef CONFIG_SYSFS
+
+static unsigned int rcu_stall_count;
+
+static ssize_t rcu_stall_count_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *page)
+{
+ return sysfs_emit(page, "%u\n", rcu_stall_count);
+}
+
+static struct kobj_attribute rcu_stall_count_attr = __ATTR_RO(rcu_stall_count);
+
+static __init int kernel_rcu_stall_sysfs_init(void)
+{
+ sysfs_add_file_to_group(kernel_kobj, &rcu_stall_count_attr.attr, NULL);
+ return 0;
+}
+
+late_initcall(kernel_rcu_stall_sysfs_init);
+
+#endif // CONFIG_SYSFS
+
#ifdef CONFIG_PROVE_RCU
#define RCU_STALL_DELAY_DELTA (5 * HZ)
#else
@@ -784,6 +806,10 @@ static void check_cpu_stall(struct rcu_data *rdp)
if (kvm_check_and_clear_guest_paused())
return;
+#ifdef CONFIG_SYSFS
+ ++rcu_stall_count;
+#endif
+
rcu_stall_notifier_call_chain(RCU_STALL_NOTIFY_NORM, (void *)j - gps);
if (READ_ONCE(csd_lock_suppress_rcu_stall) && csd_lock_is_stuck()) {
pr_err("INFO: %s detected stall, but suppressed full report due to a stuck CSD-lock.\n", rcu_state.name);
diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c
index 66da03cc0b33..6d29d3cbc670 100644
--- a/kernel/sched/ext_idle.c
+++ b/kernel/sched/ext_idle.c
@@ -138,6 +138,7 @@ found:
goto retry;
}
+#ifdef CONFIG_NUMA
/*
* Tracks nodes that have not yet been visited when searching for an idle
* CPU across all available nodes.
@@ -186,6 +187,13 @@ static s32 pick_idle_cpu_from_online_nodes(const struct cpumask *cpus_allowed, i
return cpu;
}
+#else
+static inline s32
+pick_idle_cpu_from_online_nodes(const struct cpumask *cpus_allowed, int node, u64 flags)
+{
+ return -EBUSY;
+}
+#endif
/*
* Find an idle CPU in the system, starting from @node.
@@ -447,11 +455,18 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
const struct cpumask *llc_cpus = NULL, *numa_cpus = NULL;
const struct cpumask *allowed = cpus_allowed ?: p->cpus_ptr;
int node = scx_cpu_node_if_enabled(prev_cpu);
+ bool is_prev_allowed;
s32 cpu;
preempt_disable();
/*
+ * Check whether @prev_cpu is still within the allowed set. If not,
+ * we can still try selecting a nearby CPU.
+ */
+ is_prev_allowed = cpumask_test_cpu(prev_cpu, allowed);
+
+ /*
* Determine the subset of CPUs usable by @p within @cpus_allowed.
*/
if (allowed != p->cpus_ptr) {
@@ -465,21 +480,6 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
cpu = -EBUSY;
goto out_enable;
}
-
- /*
- * If @prev_cpu is not in the allowed CPUs, skip topology
- * optimizations and try to pick any idle CPU usable by the
- * task.
- *
- * If %SCX_OPS_BUILTIN_IDLE_PER_NODE is enabled, prioritize
- * the current node, as it may optimize some waker->wakee
- * workloads.
- */
- if (!cpumask_test_cpu(prev_cpu, allowed)) {
- node = scx_cpu_node_if_enabled(smp_processor_id());
- cpu = scx_pick_idle_cpu(allowed, node, flags);
- goto out_enable;
- }
}
/*
@@ -525,7 +525,7 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
* then avoid a migration.
*/
cpu = smp_processor_id();
- if (cpus_share_cache(cpu, prev_cpu) &&
+ if (is_prev_allowed && cpus_share_cache(cpu, prev_cpu) &&
scx_idle_test_and_clear_cpu(prev_cpu)) {
cpu = prev_cpu;
goto out_unlock;
@@ -562,7 +562,8 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
/*
* Keep using @prev_cpu if it's part of a fully idle core.
*/
- if (cpumask_test_cpu(prev_cpu, idle_cpumask(node)->smt) &&
+ if (is_prev_allowed &&
+ cpumask_test_cpu(prev_cpu, idle_cpumask(node)->smt) &&
scx_idle_test_and_clear_cpu(prev_cpu)) {
cpu = prev_cpu;
goto out_unlock;
@@ -611,7 +612,7 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
/*
* Use @prev_cpu if it's idle.
*/
- if (scx_idle_test_and_clear_cpu(prev_cpu)) {
+ if (is_prev_allowed && scx_idle_test_and_clear_cpu(prev_cpu)) {
cpu = prev_cpu;
goto out_unlock;
}
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index a7291685902e..4203fad56b6c 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -188,7 +188,7 @@ static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip,
op->saved_func(ip, parent_ip, op, fregs);
}
-static void ftrace_sync_ipi(void *data)
+void ftrace_sync_ipi(void *data)
{
/* Probably not needed, but do it anyway */
smp_rmb();
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index e24509bd0af5..00fc38d70e86 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -6795,7 +6795,7 @@ int ring_buffer_subbuf_order_set(struct trace_buffer *buffer, int order)
old_size = buffer->subbuf_size;
/* prevent another thread from changing buffer sizes */
- mutex_lock(&buffer->mutex);
+ guard(mutex)(&buffer->mutex);
atomic_inc(&buffer->record_disabled);
/* Make sure all commits have finished */
@@ -6900,7 +6900,6 @@ int ring_buffer_subbuf_order_set(struct trace_buffer *buffer, int order)
}
atomic_dec(&buffer->record_disabled);
- mutex_unlock(&buffer->mutex);
return 0;
@@ -6909,7 +6908,6 @@ error:
buffer->subbuf_size = old_size;
atomic_dec(&buffer->record_disabled);
- mutex_unlock(&buffer->mutex);
for_each_buffer_cpu(buffer, cpu) {
cpu_buffer = buffer->buffers[cpu];
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 2048560264bb..ea8b364b6818 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -1250,7 +1250,9 @@ static void append_filter_err(struct trace_array *tr,
static inline struct event_filter *event_filter(struct trace_event_file *file)
{
- return file->filter;
+ return rcu_dereference_protected(file->filter,
+ lockdep_is_held(&event_mutex));
+
}
/* caller must hold event_mutex */
@@ -1320,7 +1322,7 @@ void free_event_filter(struct event_filter *filter)
static inline void __remove_filter(struct trace_event_file *file)
{
filter_disable(file);
- remove_filter_string(file->filter);
+ remove_filter_string(event_filter(file));
}
static void filter_free_subsystem_preds(struct trace_subsystem_dir *dir,
@@ -1335,22 +1337,139 @@ static void filter_free_subsystem_preds(struct trace_subsystem_dir *dir,
}
}
+struct filter_list {
+ struct list_head list;
+ struct event_filter *filter;
+};
+
+struct filter_head {
+ struct list_head list;
+ struct rcu_head rcu;
+};
+
+
+static void free_filter_list(struct rcu_head *rhp)
+{
+ struct filter_head *filter_list = container_of(rhp, struct filter_head, rcu);
+ struct filter_list *filter_item, *tmp;
+
+ list_for_each_entry_safe(filter_item, tmp, &filter_list->list, list) {
+ __free_filter(filter_item->filter);
+ list_del(&filter_item->list);
+ kfree(filter_item);
+ }
+ kfree(filter_list);
+}
+
+static void free_filter_list_tasks(struct rcu_head *rhp)
+{
+ call_rcu(rhp, free_filter_list);
+}
+
+/*
+ * The tracepoint_synchronize_unregister() is a double rcu call.
+ * It calls synchronize_rcu_tasks_trace() followed by synchronize_rcu().
+ * Instead of waiting for it, simply call these via the call_rcu*()
+ * variants.
+ */
+static void delay_free_filter(struct filter_head *head)
+{
+ call_rcu_tasks_trace(&head->rcu, free_filter_list_tasks);
+}
+
+static void try_delay_free_filter(struct event_filter *filter)
+{
+ struct filter_head *head;
+ struct filter_list *item;
+
+ head = kmalloc(sizeof(*head), GFP_KERNEL);
+ if (!head)
+ goto free_now;
+
+ INIT_LIST_HEAD(&head->list);
+
+ item = kmalloc(sizeof(*item), GFP_KERNEL);
+ if (!item) {
+ kfree(head);
+ goto free_now;
+ }
+
+ item->filter = filter;
+ list_add_tail(&item->list, &head->list);
+ delay_free_filter(head);
+ return;
+
+ free_now:
+ /* Make sure the filter is not being used */
+ tracepoint_synchronize_unregister();
+ __free_filter(filter);
+}
+
static inline void __free_subsystem_filter(struct trace_event_file *file)
{
- __free_filter(file->filter);
+ __free_filter(event_filter(file));
file->filter = NULL;
}
+static inline void event_set_filter(struct trace_event_file *file,
+ struct event_filter *filter)
+{
+ rcu_assign_pointer(file->filter, filter);
+}
+
+static inline void event_clear_filter(struct trace_event_file *file)
+{
+ RCU_INIT_POINTER(file->filter, NULL);
+}
+
static void filter_free_subsystem_filters(struct trace_subsystem_dir *dir,
- struct trace_array *tr)
+ struct trace_array *tr,
+ struct event_filter *filter)
{
struct trace_event_file *file;
+ struct filter_head *head;
+ struct filter_list *item;
+
+ head = kmalloc(sizeof(*head), GFP_KERNEL);
+ if (!head)
+ goto free_now;
+
+ INIT_LIST_HEAD(&head->list);
+
+ item = kmalloc(sizeof(*item), GFP_KERNEL);
+ if (!item) {
+ kfree(head);
+ goto free_now;
+ }
+
+ item->filter = filter;
+ list_add_tail(&item->list, &head->list);
list_for_each_entry(file, &tr->events, list) {
if (file->system != dir)
continue;
+ item = kmalloc(sizeof(*item), GFP_KERNEL);
+ if (!item)
+ goto free_now;
+ item->filter = event_filter(file);
+ list_add_tail(&item->list, &head->list);
+ event_clear_filter(file);
+ }
+
+ delay_free_filter(head);
+ return;
+ free_now:
+ tracepoint_synchronize_unregister();
+
+ if (head)
+ free_filter_list(&head->rcu);
+
+ list_for_each_entry(file, &tr->events, list) {
+ if (file->system != dir || !file->filter)
+ continue;
__free_subsystem_filter(file);
}
+ __free_filter(filter);
}
int filter_assign_type(const char *type)
@@ -2120,22 +2239,6 @@ static inline void event_set_filtered_flag(struct trace_event_file *file)
trace_buffered_event_enable();
}
-static inline void event_set_filter(struct trace_event_file *file,
- struct event_filter *filter)
-{
- rcu_assign_pointer(file->filter, filter);
-}
-
-static inline void event_clear_filter(struct trace_event_file *file)
-{
- RCU_INIT_POINTER(file->filter, NULL);
-}
-
-struct filter_list {
- struct list_head list;
- struct event_filter *filter;
-};
-
static int process_system_preds(struct trace_subsystem_dir *dir,
struct trace_array *tr,
struct filter_parse_error *pe,
@@ -2144,11 +2247,16 @@ static int process_system_preds(struct trace_subsystem_dir *dir,
struct trace_event_file *file;
struct filter_list *filter_item;
struct event_filter *filter = NULL;
- struct filter_list *tmp;
- LIST_HEAD(filter_list);
+ struct filter_head *filter_list;
bool fail = true;
int err;
+ filter_list = kmalloc(sizeof(*filter_list), GFP_KERNEL);
+ if (!filter_list)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&filter_list->list);
+
list_for_each_entry(file, &tr->events, list) {
if (file->system != dir)
@@ -2175,7 +2283,7 @@ static int process_system_preds(struct trace_subsystem_dir *dir,
if (!filter_item)
goto fail_mem;
- list_add_tail(&filter_item->list, &filter_list);
+ list_add_tail(&filter_item->list, &filter_list->list);
/*
* Regardless of if this returned an error, we still
* replace the filter for the call.
@@ -2195,31 +2303,22 @@ static int process_system_preds(struct trace_subsystem_dir *dir,
* Do a synchronize_rcu() and to ensure all calls are
* done with them before we free them.
*/
- tracepoint_synchronize_unregister();
- list_for_each_entry_safe(filter_item, tmp, &filter_list, list) {
- __free_filter(filter_item->filter);
- list_del(&filter_item->list);
- kfree(filter_item);
- }
+ delay_free_filter(filter_list);
return 0;
fail:
/* No call succeeded */
- list_for_each_entry_safe(filter_item, tmp, &filter_list, list) {
- list_del(&filter_item->list);
- kfree(filter_item);
- }
+ free_filter_list(&filter_list->rcu);
parse_error(pe, FILT_ERR_BAD_SUBSYS_FILTER, 0);
return -EINVAL;
fail_mem:
__free_filter(filter);
+
/* If any call succeeded, we still need to sync */
if (!fail)
- tracepoint_synchronize_unregister();
- list_for_each_entry_safe(filter_item, tmp, &filter_list, list) {
- __free_filter(filter_item->filter);
- list_del(&filter_item->list);
- kfree(filter_item);
- }
+ delay_free_filter(filter_list);
+ else
+ free_filter_list(&filter_list->rcu);
+
return -ENOMEM;
}
@@ -2361,9 +2460,7 @@ int apply_event_filter(struct trace_event_file *file, char *filter_string)
event_clear_filter(file);
- /* Make sure the filter is not being used */
- tracepoint_synchronize_unregister();
- __free_filter(filter);
+ try_delay_free_filter(filter);
return 0;
}
@@ -2387,11 +2484,8 @@ int apply_event_filter(struct trace_event_file *file, char *filter_string)
event_set_filter(file, filter);
- if (tmp) {
- /* Make sure the call is done with the filter */
- tracepoint_synchronize_unregister();
- __free_filter(tmp);
- }
+ if (tmp)
+ try_delay_free_filter(tmp);
}
return err;
@@ -2417,9 +2511,7 @@ int apply_subsystem_event_filter(struct trace_subsystem_dir *dir,
filter = system->filter;
system->filter = NULL;
/* Ensure all filters are no longer used */
- tracepoint_synchronize_unregister();
- filter_free_subsystem_filters(dir, tr);
- __free_filter(filter);
+ filter_free_subsystem_filters(dir, tr, filter);
return 0;
}