diff options
author | Guillaume Knispel <guillaume.knispel@supersonicimagine.com> | 2017-09-08 16:17:55 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-08 18:26:51 -0700 |
commit | 0cfb6aee70bddbef6ec796b255f588ce0e126766 (patch) | |
tree | f83b1e24d55b1aef178169f1d747d404d665be83 /ipc/util.c | |
parent | e4243b8062c13b8cb3d91695dc353cb9e6a0da25 (diff) |
ipc: optimize semget/shmget/msgget for lots of keys
ipc_findkey() used to scan all objects to look for the wanted key. This
is slow when using a high number of keys. This change adds an rhashtable
of kern_ipc_perm objects in ipc_ids, so that one lookup cease to be O(n).
This change gives a 865% improvement of benchmark reaim.jobs_per_min on a
56 threads Intel(R) Xeon(R) CPU E5-2695 v3 @ 2.30GHz with 256G memory [1]
Other (more micro) benchmark results, by the author: On an i5 laptop, the
following loop executed right after a reboot took, without and with this
change:
for (int i = 0, k=0x424242; i < KEYS; ++i)
semget(k++, 1, IPC_CREAT | 0600);
total total max single max single
KEYS without with call without call with
1 3.5 4.9 µs 3.5 4.9
10 7.6 8.6 µs 3.7 4.7
32 16.2 15.9 µs 4.3 5.3
100 72.9 41.8 µs 3.7 4.7
1000 5,630.0 502.0 µs * *
10000 1,340,000.0 7,240.0 µs * *
31900 17,600,000.0 22,200.0 µs * *
*: unreliable measure: high variance
The duration for a lookup-only usage was obtained by the same loop once
the keys are present:
total total max single max single
KEYS without with call without call with
1 2.1 2.5 µs 2.1 2.5
10 4.5 4.8 µs 2.2 2.3
32 13.0 10.8 µs 2.3 2.8
100 82.9 25.1 µs * 2.3
1000 5,780.0 217.0 µs * *
10000 1,470,000.0 2,520.0 µs * *
31900 17,400,000.0 7,810.0 µs * *
Finally, executing each semget() in a new process gave, when still
summing only the durations of these syscalls:
creation:
total total
KEYS without with
1 3.7 5.0 µs
10 32.9 36.7 µs
32 125.0 109.0 µs
100 523.0 353.0 µs
1000 20,300.0 3,280.0 µs
10000 2,470,000.0 46,700.0 µs
31900 27,800,000.0 219,000.0 µs
lookup-only:
total total
KEYS without with
1 2.5 2.7 µs
10 25.4 24.4 µs
32 106.0 72.6 µs
100 591.0 352.0 µs
1000 22,400.0 2,250.0 µs
10000 2,510,000.0 25,700.0 µs
31900 28,200,000.0 115,000.0 µs
[1] http://lkml.kernel.org/r/20170814060507.GE23258@yexl-desktop
Link: http://lkml.kernel.org/r/20170815194954.ck32ta2z35yuzpwp@debix
Signed-off-by: Guillaume Knispel <guillaume.knispel@supersonicimagine.com>
Reviewed-by: Marc Pardo <marc.pardo@supersonicimagine.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Kees Cook <keescook@chromium.org>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: "Peter Zijlstra (Intel)" <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Serge Hallyn <serge@hallyn.com>
Cc: Andrey Vagin <avagin@openvz.org>
Cc: Guillaume Knispel <guillaume.knispel@supersonicimagine.com>
Cc: Marc Pardo <marc.pardo@supersonicimagine.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'ipc/util.c')
-rw-r--r-- | ipc/util.c | 100 |
1 files changed, 76 insertions, 24 deletions
diff --git a/ipc/util.c b/ipc/util.c index 069bb22c9f64..78755873cc5b 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -83,27 +83,46 @@ struct ipc_proc_iface { */ static int __init ipc_init(void) { - sem_init(); - msg_init(); + int err_sem, err_msg; + + err_sem = sem_init(); + WARN(err_sem, "ipc: sysv sem_init failed: %d\n", err_sem); + err_msg = msg_init(); + WARN(err_msg, "ipc: sysv msg_init failed: %d\n", err_msg); shm_init(); - return 0; + + return err_msg ? err_msg : err_sem; } device_initcall(ipc_init); +static const struct rhashtable_params ipc_kht_params = { + .head_offset = offsetof(struct kern_ipc_perm, khtnode), + .key_offset = offsetof(struct kern_ipc_perm, key), + .key_len = FIELD_SIZEOF(struct kern_ipc_perm, key), + .locks_mul = 1, + .automatic_shrinking = true, +}; + /** * ipc_init_ids - initialise ipc identifiers * @ids: ipc identifier set * * Set up the sequence range to use for the ipc identifier range (limited - * below IPCMNI) then initialise the ids idr. + * below IPCMNI) then initialise the keys hashtable and ids idr. */ -void ipc_init_ids(struct ipc_ids *ids) +int ipc_init_ids(struct ipc_ids *ids) { + int err; ids->in_use = 0; ids->seq = 0; ids->next_id = -1; init_rwsem(&ids->rwsem); + err = rhashtable_init(&ids->key_ht, &ipc_kht_params); + if (err) + return err; idr_init(&ids->ipcs_idr); + ids->tables_initialized = true; + return 0; } #ifdef CONFIG_PROC_FS @@ -147,28 +166,20 @@ void __init ipc_init_proc_interface(const char *path, const char *header, * Returns the locked pointer to the ipc structure if found or NULL * otherwise. If key is found ipc points to the owning ipc structure * - * Called with ipc_ids.rwsem held. + * Called with writer ipc_ids.rwsem held. */ static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key) { - struct kern_ipc_perm *ipc; - int next_id; - int total; - - for (total = 0, next_id = 0; total < ids->in_use; next_id++) { - ipc = idr_find(&ids->ipcs_idr, next_id); - - if (ipc == NULL) - continue; + struct kern_ipc_perm *ipcp = NULL; - if (ipc->key != key) { - total++; - continue; - } + if (likely(ids->tables_initialized)) + ipcp = rhashtable_lookup_fast(&ids->key_ht, &key, + ipc_kht_params); + if (ipcp) { rcu_read_lock(); - ipc_lock_object(ipc); - return ipc; + ipc_lock_object(ipcp); + return ipcp; } return NULL; @@ -221,13 +232,13 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int size) { kuid_t euid; kgid_t egid; - int id; + int id, err; int next_id = ids->next_id; if (size > IPCMNI) size = IPCMNI; - if (ids->in_use >= size) + if (!ids->tables_initialized || ids->in_use >= size) return -ENOSPC; idr_preload(GFP_KERNEL); @@ -246,6 +257,15 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int size) (next_id < 0) ? 0 : ipcid_to_idx(next_id), 0, GFP_NOWAIT); idr_preload_end(); + + if (id >= 0 && new->key != IPC_PRIVATE) { + err = rhashtable_insert_fast(&ids->key_ht, &new->khtnode, + ipc_kht_params); + if (err < 0) { + idr_remove(&ids->ipcs_idr, id); + id = err; + } + } if (id < 0) { spin_unlock(&new->lock); rcu_read_unlock(); @@ -377,6 +397,20 @@ static int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids, return err; } +/** + * ipc_kht_remove - remove an ipc from the key hashtable + * @ids: ipc identifier set + * @ipcp: ipc perm structure containing the key to remove + * + * ipc_ids.rwsem (as a writer) and the spinlock for this ID are held + * before this function is called, and remain locked on the exit. + */ +static void ipc_kht_remove(struct ipc_ids *ids, struct kern_ipc_perm *ipcp) +{ + if (ipcp->key != IPC_PRIVATE) + rhashtable_remove_fast(&ids->key_ht, &ipcp->khtnode, + ipc_kht_params); +} /** * ipc_rmid - remove an ipc identifier @@ -391,10 +425,25 @@ void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp) int lid = ipcid_to_idx(ipcp->id); idr_remove(&ids->ipcs_idr, lid); + ipc_kht_remove(ids, ipcp); ids->in_use--; ipcp->deleted = true; } +/** + * ipc_set_key_private - switch the key of an existing ipc to IPC_PRIVATE + * @ids: ipc identifier set + * @ipcp: ipc perm structure containing the key to modify + * + * ipc_ids.rwsem (as a writer) and the spinlock for this ID are held + * before this function is called, and remain locked on the exit. + */ +void ipc_set_key_private(struct ipc_ids *ids, struct kern_ipc_perm *ipcp) +{ + ipc_kht_remove(ids, ipcp); + ipcp->key = IPC_PRIVATE; +} + int ipc_rcu_getref(struct kern_ipc_perm *ptr) { return refcount_inc_not_zero(&ptr->refcount); @@ -485,7 +534,7 @@ void ipc64_perm_to_ipc_perm(struct ipc64_perm *in, struct ipc_perm *out) } /** - * ipc_obtain_object + * ipc_obtain_object_idr * @ids: ipc identifier set * @id: ipc id to look for * @@ -499,6 +548,9 @@ struct kern_ipc_perm *ipc_obtain_object_idr(struct ipc_ids *ids, int id) struct kern_ipc_perm *out; int lid = ipcid_to_idx(id); + if (unlikely(!ids->tables_initialized)) + return ERR_PTR(-EINVAL); + out = idr_find(&ids->ipcs_idr, lid); if (!out) return ERR_PTR(-EINVAL); |