diff options
Diffstat (limited to 'net/xfrm')
-rw-r--r-- | net/xfrm/Kconfig | 12 | ||||
-rw-r--r-- | net/xfrm/Makefile | 7 | ||||
-rw-r--r-- | net/xfrm/xfrm_algo.c | 729 | ||||
-rw-r--r-- | net/xfrm/xfrm_input.c | 89 | ||||
-rw-r--r-- | net/xfrm/xfrm_policy.c | 1367 | ||||
-rw-r--r-- | net/xfrm/xfrm_state.c | 1037 | ||||
-rw-r--r-- | net/xfrm/xfrm_user.c | 1253 |
7 files changed, 4494 insertions, 0 deletions
diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig new file mode 100644 index 000000000000..58ca6a972c48 --- /dev/null +++ b/net/xfrm/Kconfig @@ -0,0 +1,12 @@ +# +# XFRM configuration +# +config XFRM_USER + tristate "IPsec user configuration interface" + depends on INET && XFRM + ---help--- + Support for IPsec user configuration interface used + by native Linux tools. + + If unsure, say Y. + diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile new file mode 100644 index 000000000000..693aac1aa833 --- /dev/null +++ b/net/xfrm/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for the XFRM subsystem. +# + +obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_input.o xfrm_algo.o +obj-$(CONFIG_XFRM_USER) += xfrm_user.o + diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c new file mode 100644 index 000000000000..080aae243ce0 --- /dev/null +++ b/net/xfrm/xfrm_algo.c @@ -0,0 +1,729 @@ +/* + * xfrm algorithm interface + * + * Copyright (c) 2002 James Morris <jmorris@intercode.com.au> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/pfkeyv2.h> +#include <linux/crypto.h> +#include <net/xfrm.h> +#if defined(CONFIG_INET_AH) || defined(CONFIG_INET_AH_MODULE) || defined(CONFIG_INET6_AH) || defined(CONFIG_INET6_AH_MODULE) +#include <net/ah.h> +#endif +#if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE) +#include <net/esp.h> +#endif +#include <asm/scatterlist.h> + +/* + * Algorithms supported by IPsec. These entries contain properties which + * are used in key negotiation and xfrm processing, and are used to verify + * that instantiated crypto transforms have correct parameters for IPsec + * purposes. + */ +static struct xfrm_algo_desc aalg_list[] = { +{ + .name = "digest_null", + + .uinfo = { + .auth = { + .icv_truncbits = 0, + .icv_fullbits = 0, + } + }, + + .desc = { + .sadb_alg_id = SADB_X_AALG_NULL, + .sadb_alg_ivlen = 0, + .sadb_alg_minbits = 0, + .sadb_alg_maxbits = 0 + } +}, +{ + .name = "md5", + + .uinfo = { + .auth = { + .icv_truncbits = 96, + .icv_fullbits = 128, + } + }, + + .desc = { + .sadb_alg_id = SADB_AALG_MD5HMAC, + .sadb_alg_ivlen = 0, + .sadb_alg_minbits = 128, + .sadb_alg_maxbits = 128 + } +}, +{ + .name = "sha1", + + .uinfo = { + .auth = { + .icv_truncbits = 96, + .icv_fullbits = 160, + } + }, + + .desc = { + .sadb_alg_id = SADB_AALG_SHA1HMAC, + .sadb_alg_ivlen = 0, + .sadb_alg_minbits = 160, + .sadb_alg_maxbits = 160 + } +}, +{ + .name = "sha256", + + .uinfo = { + .auth = { + .icv_truncbits = 96, + .icv_fullbits = 256, + } + }, + + .desc = { + .sadb_alg_id = SADB_X_AALG_SHA2_256HMAC, + .sadb_alg_ivlen = 0, + .sadb_alg_minbits = 256, + .sadb_alg_maxbits = 256 + } +}, +{ + .name = "ripemd160", + + .uinfo = { + .auth = { + .icv_truncbits = 96, + .icv_fullbits = 160, + } + }, + + .desc = { + .sadb_alg_id = SADB_X_AALG_RIPEMD160HMAC, + .sadb_alg_ivlen = 0, + .sadb_alg_minbits = 160, + .sadb_alg_maxbits = 160 + } +}, +}; + +static struct xfrm_algo_desc ealg_list[] = { +{ + .name = "cipher_null", + + .uinfo = { + .encr = { + .blockbits = 8, + .defkeybits = 0, + } + }, + + .desc = { + .sadb_alg_id = SADB_EALG_NULL, + .sadb_alg_ivlen = 0, + .sadb_alg_minbits = 0, + .sadb_alg_maxbits = 0 + } +}, +{ + .name = "des", + + .uinfo = { + .encr = { + .blockbits = 64, + .defkeybits = 64, + } + }, + + .desc = { + .sadb_alg_id = SADB_EALG_DESCBC, + .sadb_alg_ivlen = 8, + .sadb_alg_minbits = 64, + .sadb_alg_maxbits = 64 + } +}, +{ + .name = "des3_ede", + + .uinfo = { + .encr = { + .blockbits = 64, + .defkeybits = 192, + } + }, + + .desc = { + .sadb_alg_id = SADB_EALG_3DESCBC, + .sadb_alg_ivlen = 8, + .sadb_alg_minbits = 192, + .sadb_alg_maxbits = 192 + } +}, +{ + .name = "cast128", + + .uinfo = { + .encr = { + .blockbits = 64, + .defkeybits = 128, + } + }, + + .desc = { + .sadb_alg_id = SADB_X_EALG_CASTCBC, + .sadb_alg_ivlen = 8, + .sadb_alg_minbits = 40, + .sadb_alg_maxbits = 128 + } +}, +{ + .name = "blowfish", + + .uinfo = { + .encr = { + .blockbits = 64, + .defkeybits = 128, + } + }, + + .desc = { + .sadb_alg_id = SADB_X_EALG_BLOWFISHCBC, + .sadb_alg_ivlen = 8, + .sadb_alg_minbits = 40, + .sadb_alg_maxbits = 448 + } +}, +{ + .name = "aes", + + .uinfo = { + .encr = { + .blockbits = 128, + .defkeybits = 128, + } + }, + + .desc = { + .sadb_alg_id = SADB_X_EALG_AESCBC, + .sadb_alg_ivlen = 8, + .sadb_alg_minbits = 128, + .sadb_alg_maxbits = 256 + } +}, +{ + .name = "serpent", + + .uinfo = { + .encr = { + .blockbits = 128, + .defkeybits = 128, + } + }, + + .desc = { + .sadb_alg_id = SADB_X_EALG_SERPENTCBC, + .sadb_alg_ivlen = 8, + .sadb_alg_minbits = 128, + .sadb_alg_maxbits = 256, + } +}, +{ + .name = "twofish", + + .uinfo = { + .encr = { + .blockbits = 128, + .defkeybits = 128, + } + }, + + .desc = { + .sadb_alg_id = SADB_X_EALG_TWOFISHCBC, + .sadb_alg_ivlen = 8, + .sadb_alg_minbits = 128, + .sadb_alg_maxbits = 256 + } +}, +}; + +static struct xfrm_algo_desc calg_list[] = { +{ + .name = "deflate", + .uinfo = { + .comp = { + .threshold = 90, + } + }, + .desc = { .sadb_alg_id = SADB_X_CALG_DEFLATE } +}, +{ + .name = "lzs", + .uinfo = { + .comp = { + .threshold = 90, + } + }, + .desc = { .sadb_alg_id = SADB_X_CALG_LZS } +}, +{ + .name = "lzjh", + .uinfo = { + .comp = { + .threshold = 50, + } + }, + .desc = { .sadb_alg_id = SADB_X_CALG_LZJH } +}, +}; + +static inline int aalg_entries(void) +{ + return ARRAY_SIZE(aalg_list); +} + +static inline int ealg_entries(void) +{ + return ARRAY_SIZE(ealg_list); +} + +static inline int calg_entries(void) +{ + return ARRAY_SIZE(calg_list); +} + +/* Todo: generic iterators */ +struct xfrm_algo_desc *xfrm_aalg_get_byid(int alg_id) +{ + int i; + + for (i = 0; i < aalg_entries(); i++) { + if (aalg_list[i].desc.sadb_alg_id == alg_id) { + if (aalg_list[i].available) + return &aalg_list[i]; + else + break; + } + } + return NULL; +} +EXPORT_SYMBOL_GPL(xfrm_aalg_get_byid); + +struct xfrm_algo_desc *xfrm_ealg_get_byid(int alg_id) +{ + int i; + + for (i = 0; i < ealg_entries(); i++) { + if (ealg_list[i].desc.sadb_alg_id == alg_id) { + if (ealg_list[i].available) + return &ealg_list[i]; + else + break; + } + } + return NULL; +} +EXPORT_SYMBOL_GPL(xfrm_ealg_get_byid); + +struct xfrm_algo_desc *xfrm_calg_get_byid(int alg_id) +{ + int i; + + for (i = 0; i < calg_entries(); i++) { + if (calg_list[i].desc.sadb_alg_id == alg_id) { + if (calg_list[i].available) + return &calg_list[i]; + else + break; + } + } + return NULL; +} +EXPORT_SYMBOL_GPL(xfrm_calg_get_byid); + +static struct xfrm_algo_desc *xfrm_get_byname(struct xfrm_algo_desc *list, + int entries, char *name, + int probe) +{ + int i, status; + + if (!name) + return NULL; + + for (i = 0; i < entries; i++) { + if (strcmp(name, list[i].name)) + continue; + + if (list[i].available) + return &list[i]; + + if (!probe) + break; + + status = crypto_alg_available(name, 0); + if (!status) + break; + + list[i].available = status; + return &list[i]; + } + return NULL; +} + +struct xfrm_algo_desc *xfrm_aalg_get_byname(char *name, int probe) +{ + return xfrm_get_byname(aalg_list, aalg_entries(), name, probe); +} +EXPORT_SYMBOL_GPL(xfrm_aalg_get_byname); + +struct xfrm_algo_desc *xfrm_ealg_get_byname(char *name, int probe) +{ + return xfrm_get_byname(ealg_list, ealg_entries(), name, probe); +} +EXPORT_SYMBOL_GPL(xfrm_ealg_get_byname); + +struct xfrm_algo_desc *xfrm_calg_get_byname(char *name, int probe) +{ + return xfrm_get_byname(calg_list, calg_entries(), name, probe); +} +EXPORT_SYMBOL_GPL(xfrm_calg_get_byname); + +struct xfrm_algo_desc *xfrm_aalg_get_byidx(unsigned int idx) +{ + if (idx >= aalg_entries()) + return NULL; + + return &aalg_list[idx]; +} +EXPORT_SYMBOL_GPL(xfrm_aalg_get_byidx); + +struct xfrm_algo_desc *xfrm_ealg_get_byidx(unsigned int idx) +{ + if (idx >= ealg_entries()) + return NULL; + + return &ealg_list[idx]; +} +EXPORT_SYMBOL_GPL(xfrm_ealg_get_byidx); + +/* + * Probe for the availability of crypto algorithms, and set the available + * flag for any algorithms found on the system. This is typically called by + * pfkey during userspace SA add, update or register. + */ +void xfrm_probe_algs(void) +{ +#ifdef CONFIG_CRYPTO + int i, status; + + BUG_ON(in_softirq()); + + for (i = 0; i < aalg_entries(); i++) { + status = crypto_alg_available(aalg_list[i].name, 0); + if (aalg_list[i].available != status) + aalg_list[i].available = status; + } + + for (i = 0; i < ealg_entries(); i++) { + status = crypto_alg_available(ealg_list[i].name, 0); + if (ealg_list[i].available != status) + ealg_list[i].available = status; + } + + for (i = 0; i < calg_entries(); i++) { + status = crypto_alg_available(calg_list[i].name, 0); + if (calg_list[i].available != status) + calg_list[i].available = status; + } +#endif +} +EXPORT_SYMBOL_GPL(xfrm_probe_algs); + +int xfrm_count_auth_supported(void) +{ + int i, n; + + for (i = 0, n = 0; i < aalg_entries(); i++) + if (aalg_list[i].available) + n++; + return n; +} +EXPORT_SYMBOL_GPL(xfrm_count_auth_supported); + +int xfrm_count_enc_supported(void) +{ + int i, n; + + for (i = 0, n = 0; i < ealg_entries(); i++) + if (ealg_list[i].available) + n++; + return n; +} +EXPORT_SYMBOL_GPL(xfrm_count_enc_supported); + +/* Move to common area: it is shared with AH. */ + +void skb_icv_walk(const struct sk_buff *skb, struct crypto_tfm *tfm, + int offset, int len, icv_update_fn_t icv_update) +{ + int start = skb_headlen(skb); + int i, copy = start - offset; + struct scatterlist sg; + + /* Checksum header. */ + if (copy > 0) { + if (copy > len) + copy = len; + + sg.page = virt_to_page(skb->data + offset); + sg.offset = (unsigned long)(skb->data + offset) % PAGE_SIZE; + sg.length = copy; + + icv_update(tfm, &sg, 1); + + if ((len -= copy) == 0) + return; + offset += copy; + } + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + int end; + + BUG_TRAP(start <= offset + len); + + end = start + skb_shinfo(skb)->frags[i].size; + if ((copy = end - offset) > 0) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + + if (copy > len) + copy = len; + + sg.page = frag->page; + sg.offset = frag->page_offset + offset-start; + sg.length = copy; + + icv_update(tfm, &sg, 1); + + if (!(len -= copy)) + return; + offset += copy; + } + start = end; + } + + if (skb_shinfo(skb)->frag_list) { + struct sk_buff *list = skb_shinfo(skb)->frag_list; + + for (; list; list = list->next) { + int end; + + BUG_TRAP(start <= offset + len); + + end = start + list->len; + if ((copy = end - offset) > 0) { + if (copy > len) + copy = len; + skb_icv_walk(list, tfm, offset-start, copy, icv_update); + if ((len -= copy) == 0) + return; + offset += copy; + } + start = end; + } + } + if (len) + BUG(); +} +EXPORT_SYMBOL_GPL(skb_icv_walk); + +#if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE) + +/* Looking generic it is not used in another places. */ + +int +skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) +{ + int start = skb_headlen(skb); + int i, copy = start - offset; + int elt = 0; + + if (copy > 0) { + if (copy > len) + copy = len; + sg[elt].page = virt_to_page(skb->data + offset); + sg[elt].offset = (unsigned long)(skb->data + offset) % PAGE_SIZE; + sg[elt].length = copy; + elt++; + if ((len -= copy) == 0) + return elt; + offset += copy; + } + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + int end; + + BUG_TRAP(start <= offset + len); + + end = start + skb_shinfo(skb)->frags[i].size; + if ((copy = end - offset) > 0) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + + if (copy > len) + copy = len; + sg[elt].page = frag->page; + sg[elt].offset = frag->page_offset+offset-start; + sg[elt].length = copy; + elt++; + if (!(len -= copy)) + return elt; + offset += copy; + } + start = end; + } + + if (skb_shinfo(skb)->frag_list) { + struct sk_buff *list = skb_shinfo(skb)->frag_list; + + for (; list; list = list->next) { + int end; + + BUG_TRAP(start <= offset + len); + + end = start + list->len; + if ((copy = end - offset) > 0) { + if (copy > len) + copy = len; + elt += skb_to_sgvec(list, sg+elt, offset - start, copy); + if ((len -= copy) == 0) + return elt; + offset += copy; + } + start = end; + } + } + if (len) + BUG(); + return elt; +} +EXPORT_SYMBOL_GPL(skb_to_sgvec); + +/* Check that skb data bits are writable. If they are not, copy data + * to newly created private area. If "tailbits" is given, make sure that + * tailbits bytes beyond current end of skb are writable. + * + * Returns amount of elements of scatterlist to load for subsequent + * transformations and pointer to writable trailer skb. + */ + +int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer) +{ + int copyflag; + int elt; + struct sk_buff *skb1, **skb_p; + + /* If skb is cloned or its head is paged, reallocate + * head pulling out all the pages (pages are considered not writable + * at the moment even if they are anonymous). + */ + if ((skb_cloned(skb) || skb_shinfo(skb)->nr_frags) && + __pskb_pull_tail(skb, skb_pagelen(skb)-skb_headlen(skb)) == NULL) + return -ENOMEM; + + /* Easy case. Most of packets will go this way. */ + if (!skb_shinfo(skb)->frag_list) { + /* A little of trouble, not enough of space for trailer. + * This should not happen, when stack is tuned to generate + * good frames. OK, on miss we reallocate and reserve even more + * space, 128 bytes is fair. */ + + if (skb_tailroom(skb) < tailbits && + pskb_expand_head(skb, 0, tailbits-skb_tailroom(skb)+128, GFP_ATOMIC)) + return -ENOMEM; + + /* Voila! */ + *trailer = skb; + return 1; + } + + /* Misery. We are in troubles, going to mincer fragments... */ + + elt = 1; + skb_p = &skb_shinfo(skb)->frag_list; + copyflag = 0; + + while ((skb1 = *skb_p) != NULL) { + int ntail = 0; + + /* The fragment is partially pulled by someone, + * this can happen on input. Copy it and everything + * after it. */ + + if (skb_shared(skb1)) + copyflag = 1; + + /* If the skb is the last, worry about trailer. */ + + if (skb1->next == NULL && tailbits) { + if (skb_shinfo(skb1)->nr_frags || + skb_shinfo(skb1)->frag_list || + skb_tailroom(skb1) < tailbits) + ntail = tailbits + 128; + } + + if (copyflag || + skb_cloned(skb1) || + ntail || + skb_shinfo(skb1)->nr_frags || + skb_shinfo(skb1)->frag_list) { + struct sk_buff *skb2; + + /* Fuck, we are miserable poor guys... */ + if (ntail == 0) + skb2 = skb_copy(skb1, GFP_ATOMIC); + else + skb2 = skb_copy_expand(skb1, + skb_headroom(skb1), + ntail, + GFP_ATOMIC); + if (unlikely(skb2 == NULL)) + return -ENOMEM; + + if (skb1->sk) + skb_set_owner_w(skb, skb1->sk); + + /* Looking around. Are we still alive? + * OK, link new skb, drop old one */ + + skb2->next = skb1->next; + *skb_p = skb2; + kfree_skb(skb1); + skb1 = skb2; + } + elt++; + *trailer = skb1; + skb_p = &skb1->next; + } + + return elt; +} +EXPORT_SYMBOL_GPL(skb_cow_data); + +void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len) +{ + if (tail != skb) { + skb->data_len += len; + skb->len += len; + } + return skb_put(tail, len); +} +EXPORT_SYMBOL_GPL(pskb_put); +#endif diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c new file mode 100644 index 000000000000..c58a6f05a0b6 --- /dev/null +++ b/net/xfrm/xfrm_input.c @@ -0,0 +1,89 @@ +/* + * xfrm_input.c + * + * Changes: + * YOSHIFUJI Hideaki @USAGI + * Split up af-specific portion + * + */ + +#include <linux/slab.h> +#include <linux/module.h> +#include <net/ip.h> +#include <net/xfrm.h> + +static kmem_cache_t *secpath_cachep; + +void __secpath_destroy(struct sec_path *sp) +{ + int i; + for (i = 0; i < sp->len; i++) + xfrm_state_put(sp->x[i].xvec); + kmem_cache_free(secpath_cachep, sp); +} +EXPORT_SYMBOL(__secpath_destroy); + +struct sec_path *secpath_dup(struct sec_path *src) +{ + struct sec_path *sp; + + sp = kmem_cache_alloc(secpath_cachep, SLAB_ATOMIC); + if (!sp) + return NULL; + + sp->len = 0; + if (src) { + int i; + + memcpy(sp, src, sizeof(*sp)); + for (i = 0; i < sp->len; i++) + xfrm_state_hold(sp->x[i].xvec); + } + atomic_set(&sp->refcnt, 1); + return sp; +} +EXPORT_SYMBOL(secpath_dup); + +/* Fetch spi and seq from ipsec header */ + +int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq) +{ + int offset, offset_seq; + + switch (nexthdr) { + case IPPROTO_AH: + offset = offsetof(struct ip_auth_hdr, spi); + offset_seq = offsetof(struct ip_auth_hdr, seq_no); + break; + case IPPROTO_ESP: + offset = offsetof(struct ip_esp_hdr, spi); + offset_seq = offsetof(struct ip_esp_hdr, seq_no); + break; + case IPPROTO_COMP: + if (!pskb_may_pull(skb, sizeof(struct ip_comp_hdr))) + return -EINVAL; + *spi = ntohl(ntohs(*(u16*)(skb->h.raw + 2))); + *seq = 0; + return 0; + default: + return 1; + } + + if (!pskb_may_pull(skb, 16)) + return -EINVAL; + + *spi = *(u32*)(skb->h.raw + offset); + *seq = *(u32*)(skb->h.raw + offset_seq); + return 0; +} +EXPORT_SYMBOL(xfrm_parse_spi); + +void __init xfrm_input_init(void) +{ + secpath_cachep = kmem_cache_create("secpath_cache", + sizeof(struct sec_path), + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (!secpath_cachep) + panic("XFRM: failed to allocate secpath_cache\n"); +} diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c new file mode 100644 index 000000000000..80828078733d --- /dev/null +++ b/net/xfrm/xfrm_policy.c @@ -0,0 +1,1367 @@ +/* + * xfrm_policy.c + * + * Changes: + * Mitsuru KANDA @USAGI + * Kazunori MIYAZAWA @USAGI + * Kunihiro Ishiguro <kunihiro@ipinfusion.com> + * IPv6 support + * Kazunori MIYAZAWA @USAGI + * YOSHIFUJI Hideaki + * Split up af-specific portion + * Derek Atkins <derek@ihtfp.com> Add the post_input processor + * + */ + +#include <asm/bug.h> +#include <linux/config.h> +#include <linux/slab.h> +#include <linux/kmod.h> +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/workqueue.h> +#include <linux/notifier.h> +#include <linux/netdevice.h> +#include <linux/module.h> +#include <net/xfrm.h> +#include <net/ip.h> + +DECLARE_MUTEX(xfrm_cfg_sem); +EXPORT_SYMBOL(xfrm_cfg_sem); + +static DEFINE_RWLOCK(xfrm_policy_lock); + +struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2]; +EXPORT_SYMBOL(xfrm_policy_list); + +static DEFINE_RWLOCK(xfrm_policy_afinfo_lock); +static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO]; + +static kmem_cache_t *xfrm_dst_cache; + +static struct work_struct xfrm_policy_gc_work; +static struct list_head xfrm_policy_gc_list = + LIST_HEAD_INIT(xfrm_policy_gc_list); +static DEFINE_SPINLOCK(xfrm_policy_gc_lock); + +static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family); +static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo); + +int xfrm_register_type(struct xfrm_type *type, unsigned short family) +{ + struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); + struct xfrm_type_map *typemap; + int err = 0; + + if (unlikely(afinfo == NULL)) + return -EAFNOSUPPORT; + typemap = afinfo->type_map; + + write_lock(&typemap->lock); + if (likely(typemap->map[type->proto] == NULL)) + typemap->map[type->proto] = type; + else + err = -EEXIST; + write_unlock(&typemap->lock); + xfrm_policy_put_afinfo(afinfo); + return err; +} +EXPORT_SYMBOL(xfrm_register_type); + +int xfrm_unregister_type(struct xfrm_type *type, unsigned short family) +{ + struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); + struct xfrm_type_map *typemap; + int err = 0; + + if (unlikely(afinfo == NULL)) + return -EAFNOSUPPORT; + typemap = afinfo->type_map; + + write_lock(&typemap->lock); + if (unlikely(typemap->map[type->proto] != type)) + err = -ENOENT; + else + typemap->map[type->proto] = NULL; + write_unlock(&typemap->lock); + xfrm_policy_put_afinfo(afinfo); + return err; +} +EXPORT_SYMBOL(xfrm_unregister_type); + +struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family) +{ + struct xfrm_policy_afinfo *afinfo; + struct xfrm_type_map *typemap; + struct xfrm_type *type; + int modload_attempted = 0; + +retry: + afinfo = xfrm_policy_get_afinfo(family); + if (unlikely(afinfo == NULL)) + return NULL; + typemap = afinfo->type_map; + + read_lock(&typemap->lock); + type = typemap->map[proto]; + if (unlikely(type && !try_module_get(type->owner))) + type = NULL; + read_unlock(&typemap->lock); + if (!type && !modload_attempted) { + xfrm_policy_put_afinfo(afinfo); + request_module("xfrm-type-%d-%d", + (int) family, (int) proto); + modload_attempted = 1; + goto retry; + } + + xfrm_policy_put_afinfo(afinfo); + return type; +} +EXPORT_SYMBOL(xfrm_get_type); + +int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, + unsigned short family) +{ + struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); + int err = 0; + + if (unlikely(afinfo == NULL)) + return -EAFNOSUPPORT; + + if (likely(afinfo->dst_lookup != NULL)) + err = afinfo->dst_lookup(dst, fl); + else + err = -EINVAL; + xfrm_policy_put_afinfo(afinfo); + return err; +} +EXPORT_SYMBOL(xfrm_dst_lookup); + +void xfrm_put_type(struct xfrm_type *type) +{ + module_put(type->owner); +} + +static inline unsigned long make_jiffies(long secs) +{ + if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ) + return MAX_SCHEDULE_TIMEOUT-1; + else + return secs*HZ; +} + +static void xfrm_policy_timer(unsigned long data) +{ + struct xfrm_policy *xp = (struct xfrm_policy*)data; + unsigned long now = (unsigned long)xtime.tv_sec; + long next = LONG_MAX; + int warn = 0; + int dir; + + read_lock(&xp->lock); + + if (xp->dead) + goto out; + + dir = xp->index & 7; + + if (xp->lft.hard_add_expires_seconds) { + long tmo = xp->lft.hard_add_expires_seconds + + xp->curlft.add_time - now; + if (tmo <= 0) + goto expired; + if (tmo < next) + next = tmo; + } + if (xp->lft.hard_use_expires_seconds) { + long tmo = xp->lft.hard_use_expires_seconds + + (xp->curlft.use_time ? : xp->curlft.add_time) - now; + if (tmo <= 0) + goto expired; + if (tmo < next) + next = tmo; + } + if (xp->lft.soft_add_expires_seconds) { + long tmo = xp->lft.soft_add_expires_seconds + + xp->curlft.add_time - now; + if (tmo <= 0) { + warn = 1; + tmo = XFRM_KM_TIMEOUT; + } + if (tmo < next) + next = tmo; + } + if (xp->lft.soft_use_expires_seconds) { + long tmo = xp->lft.soft_use_expires_seconds + + (xp->curlft.use_time ? : xp->curlft.add_time) - now; + if (tmo <= 0) { + warn = 1; + tmo = XFRM_KM_TIMEOUT; + } + if (tmo < next) + next = tmo; + } + + if (warn) + km_policy_expired(xp, dir, 0); + if (next != LONG_MAX && + !mod_timer(&xp->timer, jiffies + make_jiffies(next))) + xfrm_pol_hold(xp); + +out: + read_unlock(&xp->lock); + xfrm_pol_put(xp); + return; + +expired: + read_unlock(&xp->lock); + km_policy_expired(xp, dir, 1); + xfrm_policy_delete(xp, dir); + xfrm_pol_put(xp); +} + + +/* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2 + * SPD calls. + */ + +struct xfrm_policy *xfrm_policy_alloc(int gfp) +{ + struct xfrm_policy *policy; + + policy = kmalloc(sizeof(struct xfrm_policy), gfp); + + if (policy) { + memset(policy, 0, sizeof(struct xfrm_policy)); + atomic_set(&policy->refcnt, 1); + rwlock_init(&policy->lock); + init_timer(&policy->timer); + policy->timer.data = (unsigned long)policy; + policy->timer.function = xfrm_policy_timer; + } + return policy; +} +EXPORT_SYMBOL(xfrm_policy_alloc); + +/* Destroy xfrm_policy: descendant resources must be released to this moment. */ + +void __xfrm_policy_destroy(struct xfrm_policy *policy) +{ + if (!policy->dead) + BUG(); + + if (policy->bundles) + BUG(); + + if (del_timer(&policy->timer)) + BUG(); + + kfree(policy); +} +EXPORT_SYMBOL(__xfrm_policy_destroy); + +static void xfrm_policy_gc_kill(struct xfrm_policy *policy) +{ + struct dst_entry *dst; + + while ((dst = policy->bundles) != NULL) { + policy->bundles = dst->next; + dst_free(dst); + } + + if (del_timer(&policy->timer)) + atomic_dec(&policy->refcnt); + + if (atomic_read(&policy->refcnt) > 1) + flow_cache_flush(); + + xfrm_pol_put(policy); +} + +static void xfrm_policy_gc_task(void *data) +{ + struct xfrm_policy *policy; + struct list_head *entry, *tmp; + struct list_head gc_list = LIST_HEAD_INIT(gc_list); + + spin_lock_bh(&xfrm_policy_gc_lock); + list_splice_init(&xfrm_policy_gc_list, &gc_list); + spin_unlock_bh(&xfrm_policy_gc_lock); + + list_for_each_safe(entry, tmp, &gc_list) { + policy = list_entry(entry, struct xfrm_policy, list); + xfrm_policy_gc_kill(policy); + } +} + +/* Rule must be locked. Release descentant resources, announce + * entry dead. The rule must be unlinked from lists to the moment. + */ + +static void xfrm_policy_kill(struct xfrm_policy *policy) +{ + int dead; + + write_lock_bh(&policy->lock); + dead = policy->dead; + policy->dead = 1; + write_unlock_bh(&policy->lock); + + if (unlikely(dead)) { + WARN_ON(1); + return; + } + + spin_lock(&xfrm_policy_gc_lock); + list_add(&policy->list, &xfrm_policy_gc_list); + spin_unlock(&xfrm_policy_gc_lock); + + schedule_work(&xfrm_policy_gc_work); +} + +/* Generate new index... KAME seems to generate them ordered by cost + * of an absolute inpredictability of ordering of rules. This will not pass. */ +static u32 xfrm_gen_index(int dir) +{ + u32 idx; + struct xfrm_policy *p; + static u32 idx_generator; + + for (;;) { + idx = (idx_generator | dir); + idx_generator += 8; + if (idx == 0) + idx = 8; + for (p = xfrm_policy_list[dir]; p; p = p->next) { + if (p->index == idx) + break; + } + if (!p) + return idx; + } +} + +int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) +{ + struct xfrm_policy *pol, **p; + struct xfrm_policy *delpol = NULL; + struct xfrm_policy **newpos = NULL; + + write_lock_bh(&xfrm_policy_lock); + for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL;) { + if (!delpol && memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0) { + if (excl) { + write_unlock_bh(&xfrm_policy_lock); + return -EEXIST; + } + *p = pol->next; + delpol = pol; + if (policy->priority > pol->priority) + continue; + } else if (policy->priority >= pol->priority) { + p = &pol->next; + continue; + } + if (!newpos) + newpos = p; + if (delpol) + break; + p = &pol->next; + } + if (newpos) + p = newpos; + xfrm_pol_hold(policy); + policy->next = *p; + *p = policy; + atomic_inc(&flow_cache_genid); + policy->index = delpol ? delpol->index : xfrm_gen_index(dir); + policy->curlft.add_time = (unsigned long)xtime.tv_sec; + policy->curlft.use_time = 0; + if (!mod_timer(&policy->timer, jiffies + HZ)) + xfrm_pol_hold(policy); + write_unlock_bh(&xfrm_policy_lock); + + if (delpol) { + xfrm_policy_kill(delpol); + } + return 0; +} +EXPORT_SYMBOL(xfrm_policy_insert); + +struct xfrm_policy *xfrm_policy_bysel(int dir, struct xfrm_selector *sel, + int delete) +{ + struct xfrm_policy *pol, **p; + + write_lock_bh(&xfrm_policy_lock); + for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) { + if (memcmp(sel, &pol->selector, sizeof(*sel)) == 0) { + xfrm_pol_hold(pol); + if (delete) + *p = pol->next; + break; + } + } + write_unlock_bh(&xfrm_policy_lock); + + if (pol && delete) { + atomic_inc(&flow_cache_genid); + xfrm_policy_kill(pol); + } + return pol; +} +EXPORT_SYMBOL(xfrm_policy_bysel); + +struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete) +{ + struct xfrm_policy *pol, **p; + + write_lock_bh(&xfrm_policy_lock); + for (p = &xfrm_policy_list[id & 7]; (pol=*p)!=NULL; p = &pol->next) { + if (pol->index == id) { + xfrm_pol_hold(pol); + if (delete) + *p = pol->next; + break; + } + } + write_unlock_bh(&xfrm_policy_lock); + + if (pol && delete) { + atomic_inc(&flow_cache_genid); + xfrm_policy_kill(pol); + } + return pol; +} +EXPORT_SYMBOL(xfrm_policy_byid); + +void xfrm_policy_flush(void) +{ + struct xfrm_policy *xp; + int dir; + + write_lock_bh(&xfrm_policy_lock); + for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { + while ((xp = xfrm_policy_list[dir]) != NULL) { + xfrm_policy_list[dir] = xp->next; + write_unlock_bh(&xfrm_policy_lock); + + xfrm_policy_kill(xp); + + write_lock_bh(&xfrm_policy_lock); + } + } + atomic_inc(&flow_cache_genid); + write_unlock_bh(&xfrm_policy_lock); +} +EXPORT_SYMBOL(xfrm_policy_flush); + +int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*), + void *data) +{ + struct xfrm_policy *xp; + int dir; + int count = 0; + int error = 0; + + read_lock_bh(&xfrm_policy_lock); + for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) { + for (xp = xfrm_policy_list[dir]; xp; xp = xp->next) + count++; + } + + if (count == 0) { + error = -ENOENT; + goto out; + } + + for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) { + for (xp = xfrm_policy_list[dir]; xp; xp = xp->next) { + error = func(xp, dir%XFRM_POLICY_MAX, --count, data); + if (error) + goto out; + } + } + +out: + read_unlock_bh(&xfrm_policy_lock); + return error; +} +EXPORT_SYMBOL(xfrm_policy_walk); + +/* Find policy to apply to this flow. */ + +static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, + void **objp, atomic_t **obj_refp) +{ + struct xfrm_policy *pol; + + read_lock_bh(&xfrm_policy_lock); + for (pol = xfrm_policy_list[dir]; pol; pol = pol->next) { + struct xfrm_selector *sel = &pol->selector; + int match; + + if (pol->family != family) + continue; + + match = xfrm_selector_match(sel, fl, family); + if (match) { + xfrm_pol_hold(pol); + break; + } + } + read_unlock_bh(&xfrm_policy_lock); + if ((*objp = (void *) pol) != NULL) + *obj_refp = &pol->refcnt; +} + +static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl) +{ + struct xfrm_policy *pol; + + read_lock_bh(&xfrm_policy_lock); + if ((pol = sk->sk_policy[dir]) != NULL) { + int match = xfrm_selector_match(&pol->selector, fl, + sk->sk_family); + if (match) + xfrm_pol_hold(pol); + else + pol = NULL; + } + read_unlock_bh(&xfrm_policy_lock); + return pol; +} + +static void __xfrm_policy_link(struct xfrm_policy *pol, int dir) +{ + pol->next = xfrm_policy_list[dir]; + xfrm_policy_list[dir] = pol; + xfrm_pol_hold(pol); +} + +static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, + int dir) +{ + struct xfrm_policy **polp; + + for (polp = &xfrm_policy_list[dir]; + *polp != NULL; polp = &(*polp)->next) { + if (*polp == pol) { + *polp = pol->next; + return pol; + } + } + return NULL; +} + +void xfrm_policy_delete(struct xfrm_policy *pol, int dir) +{ + write_lock_bh(&xfrm_policy_lock); + pol = __xfrm_policy_unlink(pol, dir); + write_unlock_bh(&xfrm_policy_lock); + if (pol) { + if (dir < XFRM_POLICY_MAX) + atomic_inc(&flow_cache_genid); + xfrm_policy_kill(pol); + } +} + +int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) +{ + struct xfrm_policy *old_pol; + + write_lock_bh(&xfrm_policy_lock); + old_pol = sk->sk_policy[dir]; + sk->sk_policy[dir] = pol; + if (pol) { + pol->curlft.add_time = (unsigned long)xtime.tv_sec; + pol->index = xfrm_gen_index(XFRM_POLICY_MAX+dir); + __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir); + } + if (old_pol) + __xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir); + write_unlock_bh(&xfrm_policy_lock); + + if (old_pol) { + xfrm_policy_kill(old_pol); + } + return 0; +} + +static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir) +{ + struct xfrm_policy *newp = xfrm_policy_alloc(GFP_ATOMIC); + + if (newp) { + newp->selector = old->selector; + newp->lft = old->lft; + newp->curlft = old->curlft; + newp->action = old->action; + newp->flags = old->flags; + newp->xfrm_nr = old->xfrm_nr; + newp->index = old->index; + memcpy(newp->xfrm_vec, old->xfrm_vec, + newp->xfrm_nr*sizeof(struct xfrm_tmpl)); + write_lock_bh(&xfrm_policy_lock); + __xfrm_policy_link(newp, XFRM_POLICY_MAX+dir); + write_unlock_bh(&xfrm_policy_lock); + xfrm_pol_put(newp); + } + return newp; +} + +int __xfrm_sk_clone_policy(struct sock *sk) +{ + struct xfrm_policy *p0 = sk->sk_policy[0], + *p1 = sk->sk_policy[1]; + + sk->sk_policy[0] = sk->sk_policy[1] = NULL; + if (p0 && (sk->sk_policy[0] = clone_policy(p0, 0)) == NULL) + return -ENOMEM; + if (p1 && (sk->sk_policy[1] = clone_policy(p1, 1)) == NULL) + return -ENOMEM; + return 0; +} + +/* Resolve list of templates for the flow, given policy. */ + +static int +xfrm_tmpl_resolve(struct xfrm_policy *policy, struct flowi *fl, + struct xfrm_state **xfrm, + unsigned short family) +{ + int nx; + int i, error; + xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family); + xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family); + + for (nx=0, i = 0; i < policy->xfrm_nr; i++) { + struct xfrm_state *x; + xfrm_address_t *remote = daddr; + xfrm_address_t *local = saddr; + struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i]; + + if (tmpl->mode) { + remote = &tmpl->id.daddr; + local = &tmpl->saddr; + } + + x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family); + + if (x && x->km.state == XFRM_STATE_VALID) { + xfrm[nx++] = x; + daddr = remote; + saddr = local; + continue; + } + if (x) { + error = (x->km.state == XFRM_STATE_ERROR ? + -EINVAL : -EAGAIN); + xfrm_state_put(x); + } + + if (!tmpl->optional) + goto fail; + } + return nx; + +fail: + for (nx--; nx>=0; nx--) + xfrm_state_put(xfrm[nx]); + return error; +} + +/* Check that the bundle accepts the flow and its components are + * still valid. + */ + +static struct dst_entry * +xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short family) +{ + struct dst_entry *x; + struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); + if (unlikely(afinfo == NULL)) + return ERR_PTR(-EINVAL); + x = afinfo->find_bundle(fl, policy); + xfrm_policy_put_afinfo(afinfo); + return x; +} + +/* Allocate chain of dst_entry's, attach known xfrm's, calculate + * all the metrics... Shortly, bundle a bundle. + */ + +static int +xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx, + struct flowi *fl, struct dst_entry **dst_p, + unsigned short family) +{ + int err; + struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); + if (unlikely(afinfo == NULL)) + return -EINVAL; + err = afinfo->bundle_create(policy, xfrm, nx, fl, dst_p); + xfrm_policy_put_afinfo(afinfo); + return err; +} + +static inline int policy_to_flow_dir(int dir) +{ + if (XFRM_POLICY_IN == FLOW_DIR_IN && + XFRM_POLICY_OUT == FLOW_DIR_OUT && + XFRM_POLICY_FWD == FLOW_DIR_FWD) + return dir; + switch (dir) { + default: + case XFRM_POLICY_IN: + return FLOW_DIR_IN; + case XFRM_POLICY_OUT: + return FLOW_DIR_OUT; + case XFRM_POLICY_FWD: + return FLOW_DIR_FWD; + }; +} + +static int stale_bundle(struct dst_entry *dst); + +/* Main function: finds/creates a bundle for given flow. + * + * At the moment we eat a raw IP route. Mostly to speed up lookups + * on interfaces with disabled IPsec. + */ +int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, + struct sock *sk, int flags) +{ + struct xfrm_policy *policy; + struct xfrm_state *xfrm[XFRM_MAX_DEPTH]; + struct dst_entry *dst, *dst_orig = *dst_p; + int nx = 0; + int err; + u32 genid; + u16 family = dst_orig->ops->family; +restart: + genid = atomic_read(&flow_cache_genid); + policy = NULL; + if (sk && sk->sk_policy[1]) + policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); + + if (!policy) { + /* To accelerate a bit... */ + if ((dst_orig->flags & DST_NOXFRM) || !xfrm_policy_list[XFRM_POLICY_OUT]) + return 0; + + policy = flow_cache_lookup(fl, family, + policy_to_flow_dir(XFRM_POLICY_OUT), + xfrm_policy_lookup); + } + + if (!policy) + return 0; + + policy->curlft.use_time = (unsigned long)xtime.tv_sec; + + switch (policy->action) { + case XFRM_POLICY_BLOCK: + /* Prohibit the flow */ + xfrm_pol_put(policy); + return -EPERM; + + case XFRM_POLICY_ALLOW: + if (policy->xfrm_nr == 0) { + /* Flow passes not transformed. */ + xfrm_pol_put(policy); + return 0; + } + + /* Try to find matching bundle. + * + * LATER: help from flow cache. It is optional, this + * is required only for output policy. + */ + dst = xfrm_find_bundle(fl, policy, family); + if (IS_ERR(dst)) { + xfrm_pol_put(policy); + return PTR_ERR(dst); + } + + if (dst) + break; + + nx = xfrm_tmpl_resolve(policy, fl, xfrm, family); + + if (unlikely(nx<0)) { + err = nx; + if (err == -EAGAIN && flags) { + DECLARE_WAITQUEUE(wait, current); + + add_wait_queue(&km_waitq, &wait); + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + set_current_state(TASK_RUNNING); + remove_wait_queue(&km_waitq, &wait); + + nx = xfrm_tmpl_resolve(policy, fl, xfrm, family); + + if (nx == -EAGAIN && signal_pending(current)) { + err = -ERESTART; + goto error; + } + if (nx == -EAGAIN || + genid != atomic_read(&flow_cache_genid)) { + xfrm_pol_put(policy); + goto restart; + } + err = nx; + } + if (err < 0) + goto error; + } + if (nx == 0) { + /* Flow passes not transformed. */ + xfrm_pol_put(policy); + return 0; + } + + dst = dst_orig; + err = xfrm_bundle_create(policy, xfrm, nx, fl, &dst, family); + + if (unlikely(err)) { + int i; + for (i=0; i<nx; i++) + xfrm_state_put(xfrm[i]); + goto error; + } + + write_lock_bh(&policy->lock); + if (unlikely(policy->dead || stale_bundle(dst))) { + /* Wow! While we worked on resolving, this + * policy has gone. Retry. It is not paranoia, + * we just cannot enlist new bundle to dead object. + * We can't enlist stable bundles either. + */ + write_unlock_bh(&policy->lock); + + xfrm_pol_put(policy); + if (dst) + dst_free(dst); + goto restart; + } + dst->next = policy->bundles; + policy->bundles = dst; + dst_hold(dst); + write_unlock_bh(&policy->lock); + } + *dst_p = dst; + dst_release(dst_orig); + xfrm_pol_put(policy); + return 0; + +error: + dst_release(dst_orig); + xfrm_pol_put(policy); + *dst_p = NULL; + return err; +} +EXPORT_SYMBOL(xfrm_lookup); + +/* When skb is transformed back to its "native" form, we have to + * check policy restrictions. At the moment we make this in maximally + * stupid way. Shame on me. :-) Of course, connected sockets must + * have policy cached at them. + */ + +static inline int +xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x, + unsigned short family) +{ + if (xfrm_state_kern(x)) + return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, family); + return x->id.proto == tmpl->id.proto && + (x->id.spi == tmpl->id.spi || !tmpl->id.spi) && + (x->props.reqid == tmpl->reqid || !tmpl->reqid) && + x->props.mode == tmpl->mode && + (tmpl->aalgos & (1<<x->props.aalgo)) && + !(x->props.mode && xfrm_state_addr_cmp(tmpl, x, family)); +} + +static inline int +xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start, + unsigned short family) +{ + int idx = start; + + if (tmpl->optional) { + if (!tmpl->mode) + return start; + } else + start = -1; + for (; idx < sp->len; idx++) { + if (xfrm_state_ok(tmpl, sp->x[idx].xvec, family)) + return ++idx; + if (sp->x[idx].xvec->props.mode) + break; + } + return start; +} + +static int +_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family) +{ + struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); + + if (unlikely(afinfo == NULL)) + return -EAFNOSUPPORT; + + afinfo->decode_session(skb, fl); + xfrm_policy_put_afinfo(afinfo); + return 0; +} + +static inline int secpath_has_tunnel(struct sec_path *sp, int k) +{ + for (; k < sp->len; k++) { + if (sp->x[k].xvec->props.mode) + return 1; + } + + return 0; +} + +int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, + unsigned short family) +{ + struct xfrm_policy *pol; + struct flowi fl; + + if (_decode_session(skb, &fl, family) < 0) + return 0; + + /* First, check used SA against their selectors. */ + if (skb->sp) { + int i; + + for (i=skb->sp->len-1; i>=0; i--) { + struct sec_decap_state *xvec = &(skb->sp->x[i]); + if (!xfrm_selector_match(&xvec->xvec->sel, &fl, family)) + return 0; + + /* If there is a post_input processor, try running it */ + if (xvec->xvec->type->post_input && + (xvec->xvec->type->post_input)(xvec->xvec, + &(xvec->decap), + skb) != 0) + return 0; + } + } + + pol = NULL; + if (sk && sk->sk_policy[dir]) + pol = xfrm_sk_policy_lookup(sk, dir, &fl); + + if (!pol) + pol = flow_cache_lookup(&fl, family, + policy_to_flow_dir(dir), + xfrm_policy_lookup); + + if (!pol) + return !skb->sp || !secpath_has_tunnel(skb->sp, 0); + + pol->curlft.use_time = (unsigned long)xtime.tv_sec; + + if (pol->action == XFRM_POLICY_ALLOW) { + struct sec_path *sp; + static struct sec_path dummy; + int i, k; + + if ((sp = skb->sp) == NULL) + sp = &dummy; + + /* For each tunnel xfrm, find the first matching tmpl. + * For each tmpl before that, find corresponding xfrm. + * Order is _important_. Later we will implement + * some barriers, but at the moment barriers + * are implied between each two transformations. + */ + for (i = pol->xfrm_nr-1, k = 0; i >= 0; i--) { + k = xfrm_policy_ok(pol->xfrm_vec+i, sp, k, family); + if (k < 0) + goto reject; + } + + if (secpath_has_tunnel(sp, k)) + goto reject; + + xfrm_pol_put(pol); + return 1; + } + +reject: + xfrm_pol_put(pol); + return 0; +} +EXPORT_SYMBOL(__xfrm_policy_check); + +int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) +{ + struct flowi fl; + + if (_decode_session(skb, &fl, family) < 0) + return 0; + + return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0; +} +EXPORT_SYMBOL(__xfrm_route_forward); + +/* Optimize later using cookies and generation ids. */ + +static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) +{ + if (!stale_bundle(dst)) + return dst; + + return NULL; +} + +static int stale_bundle(struct dst_entry *dst) +{ + return !xfrm_bundle_ok((struct xfrm_dst *)dst, NULL, AF_UNSPEC); +} + +static void xfrm_dst_destroy(struct dst_entry *dst) +{ + struct xfrm_dst *xdst = (struct xfrm_dst *)dst; + + dst_release(xdst->route); + + if (!dst->xfrm) + return; + xfrm_state_put(dst->xfrm); + dst->xfrm = NULL; +} + +static void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev, + int unregister) +{ + if (!unregister) + return; + + while ((dst = dst->child) && dst->xfrm && dst->dev == dev) { + dst->dev = &loopback_dev; + dev_hold(&loopback_dev); + dev_put(dev); + } +} + +static void xfrm_link_failure(struct sk_buff *skb) +{ + /* Impossible. Such dst must be popped before reaches point of failure. */ + return; +} + +static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst) +{ + if (dst) { + if (dst->obsolete) { + dst_release(dst); + dst = NULL; + } + } + return dst; +} + +static void xfrm_prune_bundles(int (*func)(struct dst_entry *)) +{ + int i; + struct xfrm_policy *pol; + struct dst_entry *dst, **dstp, *gc_list = NULL; + + read_lock_bh(&xfrm_policy_lock); + for (i=0; i<2*XFRM_POLICY_MAX; i++) { + for (pol = xfrm_policy_list[i]; pol; pol = pol->next) { + write_lock(&pol->lock); + dstp = &pol->bundles; + while ((dst=*dstp) != NULL) { + if (func(dst)) { + *dstp = dst->next; + dst->next = gc_list; + gc_list = dst; + } else { + dstp = &dst->next; + } + } + write_unlock(&pol->lock); + } + } + read_unlock_bh(&xfrm_policy_lock); + + while (gc_list) { + dst = gc_list; + gc_list = dst->next; + dst_free(dst); + } +} + +static int unused_bundle(struct dst_entry *dst) +{ + return !atomic_read(&dst->__refcnt); +} + +static void __xfrm_garbage_collect(void) +{ + xfrm_prune_bundles(unused_bundle); +} + +int xfrm_flush_bundles(void) +{ + xfrm_prune_bundles(stale_bundle); + return 0; +} + +void xfrm_init_pmtu(struct dst_entry *dst) +{ + do { + struct xfrm_dst *xdst = (struct xfrm_dst *)dst; + u32 pmtu, route_mtu_cached; + + pmtu = dst_mtu(dst->child); + xdst->child_mtu_cached = pmtu; + + pmtu = xfrm_state_mtu(dst->xfrm, pmtu); + + route_mtu_cached = dst_mtu(xdst->route); + xdst->route_mtu_cached = route_mtu_cached; + + if (pmtu > route_mtu_cached) + pmtu = route_mtu_cached; + + dst->metrics[RTAX_MTU-1] = pmtu; + } while ((dst = dst->next)); +} + +EXPORT_SYMBOL(xfrm_init_pmtu); + +/* Check that the bundle accepts the flow and its components are + * still valid. + */ + +int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family) +{ + struct dst_entry *dst = &first->u.dst; + struct xfrm_dst *last; + u32 mtu; + + if (!dst_check(dst->path, 0) || + (dst->dev && !netif_running(dst->dev))) + return 0; + + last = NULL; + + do { + struct xfrm_dst *xdst = (struct xfrm_dst *)dst; + + if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family)) + return 0; + if (dst->xfrm->km.state != XFRM_STATE_VALID) + return 0; + + mtu = dst_mtu(dst->child); + if (xdst->child_mtu_cached != mtu) { + last = xdst; + xdst->child_mtu_cached = mtu; + } + + if (!dst_check(xdst->route, 0)) + return 0; + mtu = dst_mtu(xdst->route); + if (xdst->route_mtu_cached != mtu) { + last = xdst; + xdst->route_mtu_cached = mtu; + } + + dst = dst->child; + } while (dst->xfrm); + + if (likely(!last)) + return 1; + + mtu = last->child_mtu_cached; + for (;;) { + dst = &last->u.dst; + + mtu = xfrm_state_mtu(dst->xfrm, mtu); + if (mtu > last->route_mtu_cached) + mtu = last->route_mtu_cached; + dst->metrics[RTAX_MTU-1] = mtu; + + if (last == first) + break; + + last = last->u.next; + last->child_mtu_cached = mtu; + } + + return 1; +} + +EXPORT_SYMBOL(xfrm_bundle_ok); + +/* Well... that's _TASK_. We need to scan through transformation + * list and figure out what mss tcp should generate in order to + * final datagram fit to mtu. Mama mia... :-) + * + * Apparently, some easy way exists, but we used to choose the most + * bizarre ones. :-) So, raising Kalashnikov... tra-ta-ta. + * + * Consider this function as something like dark humour. :-) + */ +static int xfrm_get_mss(struct dst_entry *dst, u32 mtu) +{ + int res = mtu - dst->header_len; + + for (;;) { + struct dst_entry *d = dst; + int m = res; + + do { + struct xfrm_state *x = d->xfrm; + if (x) { + spin_lock_bh(&x->lock); + if (x->km.state == XFRM_STATE_VALID && + x->type && x->type->get_max_size) + m = x->type->get_max_size(d->xfrm, m); + else + m += x->props.header_len; + spin_unlock_bh(&x->lock); + } + } while ((d = d->child) != NULL); + + if (m <= mtu) + break; + res -= (m - mtu); + if (res < 88) + return mtu; + } + + return res + dst->header_len; +} + +int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) +{ + int err = 0; + if (unlikely(afinfo == NULL)) + return -EINVAL; + if (unlikely(afinfo->family >= NPROTO)) + return -EAFNOSUPPORT; + write_lock(&xfrm_policy_afinfo_lock); + if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL)) + err = -ENOBUFS; + else { + struct dst_ops *dst_ops = afinfo->dst_ops; + if (likely(dst_ops->kmem_cachep == NULL)) + dst_ops->kmem_cachep = xfrm_dst_cache; + if (likely(dst_ops->check == NULL)) + dst_ops->check = xfrm_dst_check; + if (likely(dst_ops->destroy == NULL)) + dst_ops->destroy = xfrm_dst_destroy; + if (likely(dst_ops->ifdown == NULL)) + dst_ops->ifdown = xfrm_dst_ifdown; + if (likely(dst_ops->negative_advice == NULL)) + dst_ops->negative_advice = xfrm_negative_advice; + if (likely(dst_ops->link_failure == NULL)) + dst_ops->link_failure = xfrm_link_failure; + if (likely(dst_ops->get_mss == NULL)) + dst_ops->get_mss = xfrm_get_mss; + if (likely(afinfo->garbage_collect == NULL)) + afinfo->garbage_collect = __xfrm_garbage_collect; + xfrm_policy_afinfo[afinfo->family] = afinfo; + } + write_unlock(&xfrm_policy_afinfo_lock); + return err; +} +EXPORT_SYMBOL(xfrm_policy_register_afinfo); + +int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo) +{ + int err = 0; + if (unlikely(afinfo == NULL)) + return -EINVAL; + if (unlikely(afinfo->family >= NPROTO)) + return -EAFNOSUPPORT; + write_lock(&xfrm_policy_afinfo_lock); + if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) { + if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo)) + err = -EINVAL; + else { + struct dst_ops *dst_ops = afinfo->dst_ops; + xfrm_policy_afinfo[afinfo->family] = NULL; + dst_ops->kmem_cachep = NULL; + dst_ops->check = NULL; + dst_ops->destroy = NULL; + dst_ops->ifdown = NULL; + dst_ops->negative_advice = NULL; + dst_ops->link_failure = NULL; + dst_ops->get_mss = NULL; + afinfo->garbage_collect = NULL; + } + } + write_unlock(&xfrm_policy_afinfo_lock); + return err; +} +EXPORT_SYMBOL(xfrm_policy_unregister_afinfo); + +static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family) +{ + struct xfrm_policy_afinfo *afinfo; + if (unlikely(family >= NPROTO)) + return NULL; + read_lock(&xfrm_policy_afinfo_lock); + afinfo = xfrm_policy_afinfo[family]; + if (likely(afinfo != NULL)) + read_lock(&afinfo->lock); + read_unlock(&xfrm_policy_afinfo_lock); + return afinfo; +} + +static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo) +{ + if (unlikely(afinfo == NULL)) + return; + read_unlock(&afinfo->lock); +} + +static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) +{ + switch (event) { + case NETDEV_DOWN: + xfrm_flush_bundles(); + } + return NOTIFY_DONE; +} + +static struct notifier_block xfrm_dev_notifier = { + xfrm_dev_event, + NULL, + 0 +}; + +static void __init xfrm_policy_init(void) +{ + xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache", + sizeof(struct xfrm_dst), + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (!xfrm_dst_cache) + panic("XFRM: failed to allocate xfrm_dst_cache\n"); + + INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task, NULL); + register_netdevice_notifier(&xfrm_dev_notifier); +} + +void __init xfrm_init(void) +{ + xfrm_state_init(); + xfrm_policy_init(); + xfrm_input_init(); +} + diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c new file mode 100644 index 000000000000..1db59f11f37d --- /dev/null +++ b/net/xfrm/xfrm_state.c @@ -0,0 +1,1037 @@ +/* + * xfrm_state.c + * + * Changes: + * Mitsuru KANDA @USAGI + * Kazunori MIYAZAWA @USAGI + * Kunihiro Ishiguro <kunihiro@ipinfusion.com> + * IPv6 support + * YOSHIFUJI Hideaki @USAGI + * Split up af-specific functions + * Derek Atkins <derek@ihtfp.com> + * Add UDP Encapsulation + * + */ + +#include <linux/workqueue.h> +#include <net/xfrm.h> +#include <linux/pfkeyv2.h> +#include <linux/ipsec.h> +#include <linux/module.h> +#include <asm/uaccess.h> + +/* Each xfrm_state may be linked to two tables: + + 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl) + 2. Hash table by daddr to find what SAs exist for given + destination/tunnel endpoint. (output) + */ + +static DEFINE_SPINLOCK(xfrm_state_lock); + +/* Hash table to find appropriate SA towards given target (endpoint + * of tunnel or destination of transport mode) allowed by selector. + * + * Main use is finding SA after policy selected tunnel or transport mode. + * Also, it can be used by ah/esp icmp error handler to find offending SA. + */ +static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE]; +static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE]; + +DECLARE_WAIT_QUEUE_HEAD(km_waitq); +EXPORT_SYMBOL(km_waitq); + +static DEFINE_RWLOCK(xfrm_state_afinfo_lock); +static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO]; + +static struct work_struct xfrm_state_gc_work; +static struct list_head xfrm_state_gc_list = LIST_HEAD_INIT(xfrm_state_gc_list); +static DEFINE_SPINLOCK(xfrm_state_gc_lock); + +static int xfrm_state_gc_flush_bundles; + +static void __xfrm_state_delete(struct xfrm_state *x); + +static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family); +static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo); + +static int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol); +static void km_state_expired(struct xfrm_state *x, int hard); + +static void xfrm_state_gc_destroy(struct xfrm_state *x) +{ + if (del_timer(&x->timer)) + BUG(); + if (x->aalg) + kfree(x->aalg); + if (x->ealg) + kfree(x->ealg); + if (x->calg) + kfree(x->calg); + if (x->encap) + kfree(x->encap); + if (x->type) { + x->type->destructor(x); + xfrm_put_type(x->type); + } + kfree(x); +} + +static void xfrm_state_gc_task(void *data) +{ + struct xfrm_state *x; + struct list_head *entry, *tmp; + struct list_head gc_list = LIST_HEAD_INIT(gc_list); + + if (xfrm_state_gc_flush_bundles) { + xfrm_state_gc_flush_bundles = 0; + xfrm_flush_bundles(); + } + + spin_lock_bh(&xfrm_state_gc_lock); + list_splice_init(&xfrm_state_gc_list, &gc_list); + spin_unlock_bh(&xfrm_state_gc_lock); + + list_for_each_safe(entry, tmp, &gc_list) { + x = list_entry(entry, struct xfrm_state, bydst); + xfrm_state_gc_destroy(x); + } + wake_up(&km_waitq); +} + +static inline unsigned long make_jiffies(long secs) +{ + if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ) + return MAX_SCHEDULE_TIMEOUT-1; + else + return secs*HZ; +} + +static void xfrm_timer_handler(unsigned long data) +{ + struct xfrm_state *x = (struct xfrm_state*)data; + unsigned long now = (unsigned long)xtime.tv_sec; + long next = LONG_MAX; + int warn = 0; + + spin_lock(&x->lock); + if (x->km.state == XFRM_STATE_DEAD) + goto out; + if (x->km.state == XFRM_STATE_EXPIRED) + goto expired; + if (x->lft.hard_add_expires_seconds) { + long tmo = x->lft.hard_add_expires_seconds + + x->curlft.add_time - now; + if (tmo <= 0) + goto expired; + if (tmo < next) + next = tmo; + } + if (x->lft.hard_use_expires_seconds) { + long tmo = x->lft.hard_use_expires_seconds + + (x->curlft.use_time ? : now) - now; + if (tmo <= 0) + goto expired; + if (tmo < next) + next = tmo; + } + if (x->km.dying) + goto resched; + if (x->lft.soft_add_expires_seconds) { + long tmo = x->lft.soft_add_expires_seconds + + x->curlft.add_time - now; + if (tmo <= 0) + warn = 1; + else if (tmo < next) + next = tmo; + } + if (x->lft.soft_use_expires_seconds) { + long tmo = x->lft.soft_use_expires_seconds + + (x->curlft.use_time ? : now) - now; + if (tmo <= 0) + warn = 1; + else if (tmo < next) + next = tmo; + } + + if (warn) + km_state_expired(x, 0); +resched: + if (next != LONG_MAX && + !mod_timer(&x->timer, jiffies + make_jiffies(next))) + xfrm_state_hold(x); + goto out; + +expired: + if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) { + x->km.state = XFRM_STATE_EXPIRED; + wake_up(&km_waitq); + next = 2; + goto resched; + } + if (x->id.spi != 0) + km_state_expired(x, 1); + __xfrm_state_delete(x); + +out: + spin_unlock(&x->lock); + xfrm_state_put(x); +} + +struct xfrm_state *xfrm_state_alloc(void) +{ + struct xfrm_state *x; + + x = kmalloc(sizeof(struct xfrm_state), GFP_ATOMIC); + + if (x) { + memset(x, 0, sizeof(struct xfrm_state)); + atomic_set(&x->refcnt, 1); + atomic_set(&x->tunnel_users, 0); + INIT_LIST_HEAD(&x->bydst); + INIT_LIST_HEAD(&x->byspi); + init_timer(&x->timer); + x->timer.function = xfrm_timer_handler; + x->timer.data = (unsigned long)x; + x->curlft.add_time = (unsigned long)xtime.tv_sec; + x->lft.soft_byte_limit = XFRM_INF; + x->lft.soft_packet_limit = XFRM_INF; + x->lft.hard_byte_limit = XFRM_INF; + x->lft.hard_packet_limit = XFRM_INF; + spin_lock_init(&x->lock); + } + return x; +} +EXPORT_SYMBOL(xfrm_state_alloc); + +void __xfrm_state_destroy(struct xfrm_state *x) +{ + BUG_TRAP(x->km.state == XFRM_STATE_DEAD); + + spin_lock_bh(&xfrm_state_gc_lock); + list_add(&x->bydst, &xfrm_state_gc_list); + spin_unlock_bh(&xfrm_state_gc_lock); + schedule_work(&xfrm_state_gc_work); +} +EXPORT_SYMBOL(__xfrm_state_destroy); + +static void __xfrm_state_delete(struct xfrm_state *x) +{ + if (x->km.state != XFRM_STATE_DEAD) { + x->km.state = XFRM_STATE_DEAD; + spin_lock(&xfrm_state_lock); + list_del(&x->bydst); + atomic_dec(&x->refcnt); + if (x->id.spi) { + list_del(&x->byspi); + atomic_dec(&x->refcnt); + } + spin_unlock(&xfrm_state_lock); + if (del_timer(&x->timer)) + atomic_dec(&x->refcnt); + + /* The number two in this test is the reference + * mentioned in the comment below plus the reference + * our caller holds. A larger value means that + * there are DSTs attached to this xfrm_state. + */ + if (atomic_read(&x->refcnt) > 2) { + xfrm_state_gc_flush_bundles = 1; + schedule_work(&xfrm_state_gc_work); + } + + /* All xfrm_state objects are created by xfrm_state_alloc. + * The xfrm_state_alloc call gives a reference, and that + * is what we are dropping here. + */ + atomic_dec(&x->refcnt); + } +} + +void xfrm_state_delete(struct xfrm_state *x) +{ + spin_lock_bh(&x->lock); + __xfrm_state_delete(x); + spin_unlock_bh(&x->lock); +} +EXPORT_SYMBOL(xfrm_state_delete); + +void xfrm_state_flush(u8 proto) +{ + int i; + struct xfrm_state *x; + + spin_lock_bh(&xfrm_state_lock); + for (i = 0; i < XFRM_DST_HSIZE; i++) { +restart: + list_for_each_entry(x, xfrm_state_bydst+i, bydst) { + if (!xfrm_state_kern(x) && + (proto == IPSEC_PROTO_ANY || x->id.proto == proto)) { + xfrm_state_hold(x); + spin_unlock_bh(&xfrm_state_lock); + + xfrm_state_delete(x); + xfrm_state_put(x); + + spin_lock_bh(&xfrm_state_lock); + goto restart; + } + } + } + spin_unlock_bh(&xfrm_state_lock); + wake_up(&km_waitq); +} +EXPORT_SYMBOL(xfrm_state_flush); + +static int +xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl, + struct xfrm_tmpl *tmpl, + xfrm_address_t *daddr, xfrm_address_t *saddr, + unsigned short family) +{ + struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); + if (!afinfo) + return -1; + afinfo->init_tempsel(x, fl, tmpl, daddr, saddr); + xfrm_state_put_afinfo(afinfo); + return 0; +} + +struct xfrm_state * +xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, + struct flowi *fl, struct xfrm_tmpl *tmpl, + struct xfrm_policy *pol, int *err, + unsigned short family) +{ + unsigned h = xfrm_dst_hash(daddr, family); + struct xfrm_state *x, *x0; + int acquire_in_progress = 0; + int error = 0; + struct xfrm_state *best = NULL; + struct xfrm_state_afinfo *afinfo; + + afinfo = xfrm_state_get_afinfo(family); + if (afinfo == NULL) { + *err = -EAFNOSUPPORT; + return NULL; + } + + spin_lock_bh(&xfrm_state_lock); + list_for_each_entry(x, xfrm_state_bydst+h, bydst) { + if (x->props.family == family && + x->props.reqid == tmpl->reqid && + xfrm_state_addr_check(x, daddr, saddr, family) && + tmpl->mode == x->props.mode && + tmpl->id.proto == x->id.proto && + (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) { + /* Resolution logic: + 1. There is a valid state with matching selector. + Done. + 2. Valid state with inappropriate selector. Skip. + + Entering area of "sysdeps". + + 3. If state is not valid, selector is temporary, + it selects only session which triggered + previous resolution. Key manager will do + something to install a state with proper + selector. + */ + if (x->km.state == XFRM_STATE_VALID) { + if (!xfrm_selector_match(&x->sel, fl, family)) + continue; + if (!best || + best->km.dying > x->km.dying || + (best->km.dying == x->km.dying && + best->curlft.add_time < x->curlft.add_time)) + best = x; + } else if (x->km.state == XFRM_STATE_ACQ) { + acquire_in_progress = 1; + } else if (x->km.state == XFRM_STATE_ERROR || + x->km.state == XFRM_STATE_EXPIRED) { + if (xfrm_selector_match(&x->sel, fl, family)) + error = -ESRCH; + } + } + } + + x = best; + if (!x && !error && !acquire_in_progress) { + x0 = afinfo->state_lookup(&tmpl->id.daddr, tmpl->id.spi, tmpl->id.proto); + if (x0 != NULL) { + xfrm_state_put(x0); + error = -EEXIST; + goto out; + } + x = xfrm_state_alloc(); + if (x == NULL) { + error = -ENOMEM; + goto out; + } + /* Initialize temporary selector matching only + * to current session. */ + xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family); + + if (km_query(x, tmpl, pol) == 0) { + x->km.state = XFRM_STATE_ACQ; + list_add_tail(&x->bydst, xfrm_state_bydst+h); + xfrm_state_hold(x); + if (x->id.spi) { + h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family); + list_add(&x->byspi, xfrm_state_byspi+h); + xfrm_state_hold(x); + } + x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES; + xfrm_state_hold(x); + x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ; + add_timer(&x->timer); + } else { + x->km.state = XFRM_STATE_DEAD; + xfrm_state_put(x); + x = NULL; + error = -ESRCH; + } + } +out: + if (x) + xfrm_state_hold(x); + else + *err = acquire_in_progress ? -EAGAIN : error; + spin_unlock_bh(&xfrm_state_lock); + xfrm_state_put_afinfo(afinfo); + return x; +} + +static void __xfrm_state_insert(struct xfrm_state *x) +{ + unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family); + + list_add(&x->bydst, xfrm_state_bydst+h); + xfrm_state_hold(x); + + h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family); + + list_add(&x->byspi, xfrm_state_byspi+h); + xfrm_state_hold(x); + + if (!mod_timer(&x->timer, jiffies + HZ)) + xfrm_state_hold(x); + + wake_up(&km_waitq); +} + +void xfrm_state_insert(struct xfrm_state *x) +{ + spin_lock_bh(&xfrm_state_lock); + __xfrm_state_insert(x); + spin_unlock_bh(&xfrm_state_lock); +} +EXPORT_SYMBOL(xfrm_state_insert); + +static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq); + +int xfrm_state_add(struct xfrm_state *x) +{ + struct xfrm_state_afinfo *afinfo; + struct xfrm_state *x1; + int family; + int err; + + family = x->props.family; + afinfo = xfrm_state_get_afinfo(family); + if (unlikely(afinfo == NULL)) + return -EAFNOSUPPORT; + + spin_lock_bh(&xfrm_state_lock); + + x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto); + if (x1) { + xfrm_state_put(x1); + x1 = NULL; + err = -EEXIST; + goto out; + } + + if (x->km.seq) { + x1 = __xfrm_find_acq_byseq(x->km.seq); + if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) { + xfrm_state_put(x1); + x1 = NULL; + } + } + + if (!x1) + x1 = afinfo->find_acq( + x->props.mode, x->props.reqid, x->id.proto, + &x->id.daddr, &x->props.saddr, 0); + + __xfrm_state_insert(x); + err = 0; + +out: + spin_unlock_bh(&xfrm_state_lock); + xfrm_state_put_afinfo(afinfo); + + if (x1) { + xfrm_state_delete(x1); + xfrm_state_put(x1); + } + + return err; +} +EXPORT_SYMBOL(xfrm_state_add); + +int xfrm_state_update(struct xfrm_state *x) +{ + struct xfrm_state_afinfo *afinfo; + struct xfrm_state *x1; + int err; + + afinfo = xfrm_state_get_afinfo(x->props.family); + if (unlikely(afinfo == NULL)) + return -EAFNOSUPPORT; + + spin_lock_bh(&xfrm_state_lock); + x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto); + + err = -ESRCH; + if (!x1) + goto out; + + if (xfrm_state_kern(x1)) { + xfrm_state_put(x1); + err = -EEXIST; + goto out; + } + + if (x1->km.state == XFRM_STATE_ACQ) { + __xfrm_state_insert(x); + x = NULL; + } + err = 0; + +out: + spin_unlock_bh(&xfrm_state_lock); + xfrm_state_put_afinfo(afinfo); + + if (err) + return err; + + if (!x) { + xfrm_state_delete(x1); + xfrm_state_put(x1); + return 0; + } + + err = -EINVAL; + spin_lock_bh(&x1->lock); + if (likely(x1->km.state == XFRM_STATE_VALID)) { + if (x->encap && x1->encap) + memcpy(x1->encap, x->encap, sizeof(*x1->encap)); + memcpy(&x1->lft, &x->lft, sizeof(x1->lft)); + x1->km.dying = 0; + + if (!mod_timer(&x1->timer, jiffies + HZ)) + xfrm_state_hold(x1); + if (x1->curlft.use_time) + xfrm_state_check_expire(x1); + + err = 0; + } + spin_unlock_bh(&x1->lock); + + xfrm_state_put(x1); + + return err; +} +EXPORT_SYMBOL(xfrm_state_update); + +int xfrm_state_check_expire(struct xfrm_state *x) +{ + if (!x->curlft.use_time) + x->curlft.use_time = (unsigned long)xtime.tv_sec; + + if (x->km.state != XFRM_STATE_VALID) + return -EINVAL; + + if (x->curlft.bytes >= x->lft.hard_byte_limit || + x->curlft.packets >= x->lft.hard_packet_limit) { + km_state_expired(x, 1); + if (!mod_timer(&x->timer, jiffies + XFRM_ACQ_EXPIRES*HZ)) + xfrm_state_hold(x); + return -EINVAL; + } + + if (!x->km.dying && + (x->curlft.bytes >= x->lft.soft_byte_limit || + x->curlft.packets >= x->lft.soft_packet_limit)) + km_state_expired(x, 0); + return 0; +} +EXPORT_SYMBOL(xfrm_state_check_expire); + +static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb) +{ + int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev) + - skb_headroom(skb); + + if (nhead > 0) + return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC); + + /* Check tail too... */ + return 0; +} + +int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb) +{ + int err = xfrm_state_check_expire(x); + if (err < 0) + goto err; + err = xfrm_state_check_space(x, skb); +err: + return err; +} +EXPORT_SYMBOL(xfrm_state_check); + +struct xfrm_state * +xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, + unsigned short family) +{ + struct xfrm_state *x; + struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); + if (!afinfo) + return NULL; + + spin_lock_bh(&xfrm_state_lock); + x = afinfo->state_lookup(daddr, spi, proto); + spin_unlock_bh(&xfrm_state_lock); + xfrm_state_put_afinfo(afinfo); + return x; +} +EXPORT_SYMBOL(xfrm_state_lookup); + +struct xfrm_state * +xfrm_find_acq(u8 mode, u32 reqid, u8 proto, + xfrm_address_t *daddr, xfrm_address_t *saddr, + int create, unsigned short family) +{ + struct xfrm_state *x; + struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); + if (!afinfo) + return NULL; + + spin_lock_bh(&xfrm_state_lock); + x = afinfo->find_acq(mode, reqid, proto, daddr, saddr, create); + spin_unlock_bh(&xfrm_state_lock); + xfrm_state_put_afinfo(afinfo); + return x; +} +EXPORT_SYMBOL(xfrm_find_acq); + +/* Silly enough, but I'm lazy to build resolution list */ + +static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq) +{ + int i; + struct xfrm_state *x; + + for (i = 0; i < XFRM_DST_HSIZE; i++) { + list_for_each_entry(x, xfrm_state_bydst+i, bydst) { + if (x->km.seq == seq && x->km.state == XFRM_STATE_ACQ) { + xfrm_state_hold(x); + return x; + } + } + } + return NULL; +} + +struct xfrm_state *xfrm_find_acq_byseq(u32 seq) +{ + struct xfrm_state *x; + + spin_lock_bh(&xfrm_state_lock); + x = __xfrm_find_acq_byseq(seq); + spin_unlock_bh(&xfrm_state_lock); + return x; +} +EXPORT_SYMBOL(xfrm_find_acq_byseq); + +u32 xfrm_get_acqseq(void) +{ + u32 res; + static u32 acqseq; + static DEFINE_SPINLOCK(acqseq_lock); + + spin_lock_bh(&acqseq_lock); + res = (++acqseq ? : ++acqseq); + spin_unlock_bh(&acqseq_lock); + return res; +} +EXPORT_SYMBOL(xfrm_get_acqseq); + +void +xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi) +{ + u32 h; + struct xfrm_state *x0; + + if (x->id.spi) + return; + + if (minspi == maxspi) { + x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family); + if (x0) { + xfrm_state_put(x0); + return; + } + x->id.spi = minspi; + } else { + u32 spi = 0; + minspi = ntohl(minspi); + maxspi = ntohl(maxspi); + for (h=0; h<maxspi-minspi+1; h++) { + spi = minspi + net_random()%(maxspi-minspi+1); + x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family); + if (x0 == NULL) { + x->id.spi = htonl(spi); + break; + } + xfrm_state_put(x0); + } + } + if (x->id.spi) { + spin_lock_bh(&xfrm_state_lock); + h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family); + list_add(&x->byspi, xfrm_state_byspi+h); + xfrm_state_hold(x); + spin_unlock_bh(&xfrm_state_lock); + wake_up(&km_waitq); + } +} +EXPORT_SYMBOL(xfrm_alloc_spi); + +int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), + void *data) +{ + int i; + struct xfrm_state *x; + int count = 0; + int err = 0; + + spin_lock_bh(&xfrm_state_lock); + for (i = 0; i < XFRM_DST_HSIZE; i++) { + list_for_each_entry(x, xfrm_state_bydst+i, bydst) { + if (proto == IPSEC_PROTO_ANY || x->id.proto == proto) + count++; + } + } + if (count == 0) { + err = -ENOENT; + goto out; + } + + for (i = 0; i < XFRM_DST_HSIZE; i++) { + list_for_each_entry(x, xfrm_state_bydst+i, bydst) { + if (proto != IPSEC_PROTO_ANY && x->id.proto != proto) + continue; + err = func(x, --count, data); + if (err) + goto out; + } + } +out: + spin_unlock_bh(&xfrm_state_lock); + return err; +} +EXPORT_SYMBOL(xfrm_state_walk); + +int xfrm_replay_check(struct xfrm_state *x, u32 seq) +{ + u32 diff; + + seq = ntohl(seq); + + if (unlikely(seq == 0)) + return -EINVAL; + + if (likely(seq > x->replay.seq)) + return 0; + + diff = x->replay.seq - seq; + if (diff >= x->props.replay_window) { + x->stats.replay_window++; + return -EINVAL; + } + + if (x->replay.bitmap & (1U << diff)) { + x->stats.replay++; + return -EINVAL; + } + return 0; +} +EXPORT_SYMBOL(xfrm_replay_check); + +void xfrm_replay_advance(struct xfrm_state *x, u32 seq) +{ + u32 diff; + + seq = ntohl(seq); + + if (seq > x->replay.seq) { + diff = seq - x->replay.seq; + if (diff < x->props.replay_window) + x->replay.bitmap = ((x->replay.bitmap) << diff) | 1; + else + x->replay.bitmap = 1; + x->replay.seq = seq; + } else { + diff = x->replay.seq - seq; + x->replay.bitmap |= (1U << diff); + } +} +EXPORT_SYMBOL(xfrm_replay_advance); + +static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list); +static DEFINE_RWLOCK(xfrm_km_lock); + +static void km_state_expired(struct xfrm_state *x, int hard) +{ + struct xfrm_mgr *km; + + if (hard) + x->km.state = XFRM_STATE_EXPIRED; + else + x->km.dying = 1; + + read_lock(&xfrm_km_lock); + list_for_each_entry(km, &xfrm_km_list, list) + km->notify(x, hard); + read_unlock(&xfrm_km_lock); + + if (hard) + wake_up(&km_waitq); +} + +static int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol) +{ + int err = -EINVAL; + struct xfrm_mgr *km; + + read_lock(&xfrm_km_lock); + list_for_each_entry(km, &xfrm_km_list, list) { + err = km->acquire(x, t, pol, XFRM_POLICY_OUT); + if (!err) + break; + } + read_unlock(&xfrm_km_lock); + return err; +} + +int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport) +{ + int err = -EINVAL; + struct xfrm_mgr *km; + + read_lock(&xfrm_km_lock); + list_for_each_entry(km, &xfrm_km_list, list) { + if (km->new_mapping) + err = km->new_mapping(x, ipaddr, sport); + if (!err) + break; + } + read_unlock(&xfrm_km_lock); + return err; +} +EXPORT_SYMBOL(km_new_mapping); + +void km_policy_expired(struct xfrm_policy *pol, int dir, int hard) +{ + struct xfrm_mgr *km; + + read_lock(&xfrm_km_lock); + list_for_each_entry(km, &xfrm_km_list, list) + if (km->notify_policy) + km->notify_policy(pol, dir, hard); + read_unlock(&xfrm_km_lock); + + if (hard) + wake_up(&km_waitq); +} + +int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen) +{ + int err; + u8 *data; + struct xfrm_mgr *km; + struct xfrm_policy *pol = NULL; + + if (optlen <= 0 || optlen > PAGE_SIZE) + return -EMSGSIZE; + + data = kmalloc(optlen, GFP_KERNEL); + if (!data) + return -ENOMEM; + + err = -EFAULT; + if (copy_from_user(data, optval, optlen)) + goto out; + + err = -EINVAL; + read_lock(&xfrm_km_lock); + list_for_each_entry(km, &xfrm_km_list, list) { + pol = km->compile_policy(sk->sk_family, optname, data, + optlen, &err); + if (err >= 0) + break; + } + read_unlock(&xfrm_km_lock); + + if (err >= 0) { + xfrm_sk_policy_insert(sk, err, pol); + xfrm_pol_put(pol); + err = 0; + } + +out: + kfree(data); + return err; +} +EXPORT_SYMBOL(xfrm_user_policy); + +int xfrm_register_km(struct xfrm_mgr *km) +{ + write_lock_bh(&xfrm_km_lock); + list_add_tail(&km->list, &xfrm_km_list); + write_unlock_bh(&xfrm_km_lock); + return 0; +} +EXPORT_SYMBOL(xfrm_register_km); + +int xfrm_unregister_km(struct xfrm_mgr *km) +{ + write_lock_bh(&xfrm_km_lock); + list_del(&km->list); + write_unlock_bh(&xfrm_km_lock); + return 0; +} +EXPORT_SYMBOL(xfrm_unregister_km); + +int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo) +{ + int err = 0; + if (unlikely(afinfo == NULL)) + return -EINVAL; + if (unlikely(afinfo->family >= NPROTO)) + return -EAFNOSUPPORT; + write_lock(&xfrm_state_afinfo_lock); + if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL)) + err = -ENOBUFS; + else { + afinfo->state_bydst = xfrm_state_bydst; + afinfo->state_byspi = xfrm_state_byspi; + xfrm_state_afinfo[afinfo->family] = afinfo; + } + write_unlock(&xfrm_state_afinfo_lock); + return err; +} +EXPORT_SYMBOL(xfrm_state_register_afinfo); + +int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo) +{ + int err = 0; + if (unlikely(afinfo == NULL)) + return -EINVAL; + if (unlikely(afinfo->family >= NPROTO)) + return -EAFNOSUPPORT; + write_lock(&xfrm_state_afinfo_lock); + if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) { + if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo)) + err = -EINVAL; + else { + xfrm_state_afinfo[afinfo->family] = NULL; + afinfo->state_byspi = NULL; + afinfo->state_bydst = NULL; + } + } + write_unlock(&xfrm_state_afinfo_lock); + return err; +} +EXPORT_SYMBOL(xfrm_state_unregister_afinfo); + +static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family) +{ + struct xfrm_state_afinfo *afinfo; + if (unlikely(family >= NPROTO)) + return NULL; + read_lock(&xfrm_state_afinfo_lock); + afinfo = xfrm_state_afinfo[family]; + if (likely(afinfo != NULL)) + read_lock(&afinfo->lock); + read_unlock(&xfrm_state_afinfo_lock); + return afinfo; +} + +static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo) +{ + if (unlikely(afinfo == NULL)) + return; + read_unlock(&afinfo->lock); +} + +/* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */ +void xfrm_state_delete_tunnel(struct xfrm_state *x) +{ + if (x->tunnel) { + struct xfrm_state *t = x->tunnel; + + if (atomic_read(&t->tunnel_users) == 2) + xfrm_state_delete(t); + atomic_dec(&t->tunnel_users); + xfrm_state_put(t); + x->tunnel = NULL; + } +} +EXPORT_SYMBOL(xfrm_state_delete_tunnel); + +int xfrm_state_mtu(struct xfrm_state *x, int mtu) +{ + int res = mtu; + + res -= x->props.header_len; + + for (;;) { + int m = res; + + if (m < 68) + return 68; + + spin_lock_bh(&x->lock); + if (x->km.state == XFRM_STATE_VALID && + x->type && x->type->get_max_size) + m = x->type->get_max_size(x, m); + else + m += x->props.header_len; + spin_unlock_bh(&x->lock); + + if (m <= mtu) + break; + res -= (m - mtu); + } + + return res; +} + +EXPORT_SYMBOL(xfrm_state_mtu); + +void __init xfrm_state_init(void) +{ + int i; + + for (i=0; i<XFRM_DST_HSIZE; i++) { + INIT_LIST_HEAD(&xfrm_state_bydst[i]); + INIT_LIST_HEAD(&xfrm_state_byspi[i]); + } + INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL); +} + diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c new file mode 100644 index 000000000000..63661b0fd736 --- /dev/null +++ b/net/xfrm/xfrm_user.c @@ -0,0 +1,1253 @@ +/* xfrm_user.c: User interface to configure xfrm engine. + * + * Copyright (C) 2002 David S. Miller (davem@redhat.com) + * + * Changes: + * Mitsuru KANDA @USAGI + * Kazunori MIYAZAWA @USAGI + * Kunihiro Ishiguro <kunihiro@ipinfusion.com> + * IPv6 support + * + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/socket.h> +#include <linux/string.h> +#include <linux/net.h> +#include <linux/skbuff.h> +#include <linux/netlink.h> +#include <linux/rtnetlink.h> +#include <linux/pfkeyv2.h> +#include <linux/ipsec.h> +#include <linux/init.h> +#include <linux/security.h> +#include <net/sock.h> +#include <net/xfrm.h> +#include <asm/uaccess.h> + +static struct sock *xfrm_nl; + +static int verify_one_alg(struct rtattr **xfrma, enum xfrm_attr_type_t type) +{ + struct rtattr *rt = xfrma[type - 1]; + struct xfrm_algo *algp; + + if (!rt) + return 0; + + if ((rt->rta_len - sizeof(*rt)) < sizeof(*algp)) + return -EINVAL; + + algp = RTA_DATA(rt); + switch (type) { + case XFRMA_ALG_AUTH: + if (!algp->alg_key_len && + strcmp(algp->alg_name, "digest_null") != 0) + return -EINVAL; + break; + + case XFRMA_ALG_CRYPT: + if (!algp->alg_key_len && + strcmp(algp->alg_name, "cipher_null") != 0) + return -EINVAL; + break; + + case XFRMA_ALG_COMP: + /* Zero length keys are legal. */ + break; + + default: + return -EINVAL; + }; + + algp->alg_name[CRYPTO_MAX_ALG_NAME - 1] = '\0'; + return 0; +} + +static int verify_encap_tmpl(struct rtattr **xfrma) +{ + struct rtattr *rt = xfrma[XFRMA_ENCAP - 1]; + struct xfrm_encap_tmpl *encap; + + if (!rt) + return 0; + + if ((rt->rta_len - sizeof(*rt)) < sizeof(*encap)) + return -EINVAL; + + return 0; +} + +static int verify_newsa_info(struct xfrm_usersa_info *p, + struct rtattr **xfrma) +{ + int err; + + err = -EINVAL; + switch (p->family) { + case AF_INET: + break; + + case AF_INET6: +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + break; +#else + err = -EAFNOSUPPORT; + goto out; +#endif + + default: + goto out; + }; + + err = -EINVAL; + switch (p->id.proto) { + case IPPROTO_AH: + if (!xfrma[XFRMA_ALG_AUTH-1] || + xfrma[XFRMA_ALG_CRYPT-1] || + xfrma[XFRMA_ALG_COMP-1]) + goto out; + break; + + case IPPROTO_ESP: + if ((!xfrma[XFRMA_ALG_AUTH-1] && + !xfrma[XFRMA_ALG_CRYPT-1]) || + xfrma[XFRMA_ALG_COMP-1]) + goto out; + break; + + case IPPROTO_COMP: + if (!xfrma[XFRMA_ALG_COMP-1] || + xfrma[XFRMA_ALG_AUTH-1] || + xfrma[XFRMA_ALG_CRYPT-1]) + goto out; + break; + + default: + goto out; + }; + + if ((err = verify_one_alg(xfrma, XFRMA_ALG_AUTH))) + goto out; + if ((err = verify_one_alg(xfrma, XFRMA_ALG_CRYPT))) + goto out; + if ((err = verify_one_alg(xfrma, XFRMA_ALG_COMP))) + goto out; + if ((err = verify_encap_tmpl(xfrma))) + goto out; + + err = -EINVAL; + switch (p->mode) { + case 0: + case 1: + break; + + default: + goto out; + }; + + err = 0; + +out: + return err; +} + +static int attach_one_algo(struct xfrm_algo **algpp, u8 *props, + struct xfrm_algo_desc *(*get_byname)(char *, int), + struct rtattr *u_arg) +{ + struct rtattr *rta = u_arg; + struct xfrm_algo *p, *ualg; + struct xfrm_algo_desc *algo; + + if (!rta) + return 0; + + ualg = RTA_DATA(rta); + + algo = get_byname(ualg->alg_name, 1); + if (!algo) + return -ENOSYS; + *props = algo->desc.sadb_alg_id; + + p = kmalloc(sizeof(*ualg) + ualg->alg_key_len, GFP_KERNEL); + if (!p) + return -ENOMEM; + + memcpy(p, ualg, sizeof(*ualg) + ualg->alg_key_len); + *algpp = p; + return 0; +} + +static int attach_encap_tmpl(struct xfrm_encap_tmpl **encapp, struct rtattr *u_arg) +{ + struct rtattr *rta = u_arg; + struct xfrm_encap_tmpl *p, *uencap; + + if (!rta) + return 0; + + uencap = RTA_DATA(rta); + p = kmalloc(sizeof(*p), GFP_KERNEL); + if (!p) + return -ENOMEM; + + memcpy(p, uencap, sizeof(*p)); + *encapp = p; + return 0; +} + +static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p) +{ + memcpy(&x->id, &p->id, sizeof(x->id)); + memcpy(&x->sel, &p->sel, sizeof(x->sel)); + memcpy(&x->lft, &p->lft, sizeof(x->lft)); + x->props.mode = p->mode; + x->props.replay_window = p->replay_window; + x->props.reqid = p->reqid; + x->props.family = p->family; + x->props.saddr = p->saddr; + x->props.flags = p->flags; +} + +static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p, + struct rtattr **xfrma, + int *errp) +{ + struct xfrm_state *x = xfrm_state_alloc(); + int err = -ENOMEM; + + if (!x) + goto error_no_put; + + copy_from_user_state(x, p); + + if ((err = attach_one_algo(&x->aalg, &x->props.aalgo, + xfrm_aalg_get_byname, + xfrma[XFRMA_ALG_AUTH-1]))) + goto error; + if ((err = attach_one_algo(&x->ealg, &x->props.ealgo, + xfrm_ealg_get_byname, + xfrma[XFRMA_ALG_CRYPT-1]))) + goto error; + if ((err = attach_one_algo(&x->calg, &x->props.calgo, + xfrm_calg_get_byname, + xfrma[XFRMA_ALG_COMP-1]))) + goto error; + if ((err = attach_encap_tmpl(&x->encap, xfrma[XFRMA_ENCAP-1]))) + goto error; + + err = -ENOENT; + x->type = xfrm_get_type(x->id.proto, x->props.family); + if (x->type == NULL) + goto error; + + err = x->type->init_state(x, NULL); + if (err) + goto error; + + x->curlft.add_time = (unsigned long) xtime.tv_sec; + x->km.state = XFRM_STATE_VALID; + x->km.seq = p->seq; + + return x; + +error: + x->km.state = XFRM_STATE_DEAD; + xfrm_state_put(x); +error_no_put: + *errp = err; + return NULL; +} + +static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +{ + struct xfrm_usersa_info *p = NLMSG_DATA(nlh); + struct xfrm_state *x; + int err; + + err = verify_newsa_info(p, (struct rtattr **) xfrma); + if (err) + return err; + + x = xfrm_state_construct(p, (struct rtattr **) xfrma, &err); + if (!x) + return err; + + if (nlh->nlmsg_type == XFRM_MSG_NEWSA) + err = xfrm_state_add(x); + else + err = xfrm_state_update(x); + + if (err < 0) { + x->km.state = XFRM_STATE_DEAD; + xfrm_state_put(x); + } + + return err; +} + +static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +{ + struct xfrm_state *x; + struct xfrm_usersa_id *p = NLMSG_DATA(nlh); + + x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family); + if (x == NULL) + return -ESRCH; + + if (xfrm_state_kern(x)) { + xfrm_state_put(x); + return -EPERM; + } + + xfrm_state_delete(x); + xfrm_state_put(x); + + return 0; +} + +static void copy_to_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p) +{ + memcpy(&p->id, &x->id, sizeof(p->id)); + memcpy(&p->sel, &x->sel, sizeof(p->sel)); + memcpy(&p->lft, &x->lft, sizeof(p->lft)); + memcpy(&p->curlft, &x->curlft, sizeof(p->curlft)); + memcpy(&p->stats, &x->stats, sizeof(p->stats)); + p->saddr = x->props.saddr; + p->mode = x->props.mode; + p->replay_window = x->props.replay_window; + p->reqid = x->props.reqid; + p->family = x->props.family; + p->flags = x->props.flags; + p->seq = x->km.seq; +} + +struct xfrm_dump_info { + struct sk_buff *in_skb; + struct sk_buff *out_skb; + u32 nlmsg_seq; + u16 nlmsg_flags; + int start_idx; + int this_idx; +}; + +static int dump_one_state(struct xfrm_state *x, int count, void *ptr) +{ + struct xfrm_dump_info *sp = ptr; + struct sk_buff *in_skb = sp->in_skb; + struct sk_buff *skb = sp->out_skb; + struct xfrm_usersa_info *p; + struct nlmsghdr *nlh; + unsigned char *b = skb->tail; + + if (sp->this_idx < sp->start_idx) + goto out; + + nlh = NLMSG_PUT(skb, NETLINK_CB(in_skb).pid, + sp->nlmsg_seq, + XFRM_MSG_NEWSA, sizeof(*p)); + nlh->nlmsg_flags = sp->nlmsg_flags; + + p = NLMSG_DATA(nlh); + copy_to_user_state(x, p); + + if (x->aalg) + RTA_PUT(skb, XFRMA_ALG_AUTH, + sizeof(*(x->aalg))+(x->aalg->alg_key_len+7)/8, x->aalg); + if (x->ealg) + RTA_PUT(skb, XFRMA_ALG_CRYPT, + sizeof(*(x->ealg))+(x->ealg->alg_key_len+7)/8, x->ealg); + if (x->calg) + RTA_PUT(skb, XFRMA_ALG_COMP, sizeof(*(x->calg)), x->calg); + + if (x->encap) + RTA_PUT(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap); + + nlh->nlmsg_len = skb->tail - b; +out: + sp->this_idx++; + return 0; + +nlmsg_failure: +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; +} + +static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct xfrm_dump_info info; + + info.in_skb = cb->skb; + info.out_skb = skb; + info.nlmsg_seq = cb->nlh->nlmsg_seq; + info.nlmsg_flags = NLM_F_MULTI; + info.this_idx = 0; + info.start_idx = cb->args[0]; + (void) xfrm_state_walk(IPSEC_PROTO_ANY, dump_one_state, &info); + cb->args[0] = info.this_idx; + + return skb->len; +} + +static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb, + struct xfrm_state *x, u32 seq) +{ + struct xfrm_dump_info info; + struct sk_buff *skb; + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); + if (!skb) + return ERR_PTR(-ENOMEM); + + NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; + info.in_skb = in_skb; + info.out_skb = skb; + info.nlmsg_seq = seq; + info.nlmsg_flags = 0; + info.this_idx = info.start_idx = 0; + + if (dump_one_state(x, 0, &info)) { + kfree_skb(skb); + return NULL; + } + + return skb; +} + +static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +{ + struct xfrm_usersa_id *p = NLMSG_DATA(nlh); + struct xfrm_state *x; + struct sk_buff *resp_skb; + int err; + + x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family); + err = -ESRCH; + if (x == NULL) + goto out_noput; + + resp_skb = xfrm_state_netlink(skb, x, nlh->nlmsg_seq); + if (IS_ERR(resp_skb)) { + err = PTR_ERR(resp_skb); + } else { + err = netlink_unicast(xfrm_nl, resp_skb, + NETLINK_CB(skb).pid, MSG_DONTWAIT); + } + xfrm_state_put(x); +out_noput: + return err; +} + +static int verify_userspi_info(struct xfrm_userspi_info *p) +{ + switch (p->info.id.proto) { + case IPPROTO_AH: + case IPPROTO_ESP: + break; + + case IPPROTO_COMP: + /* IPCOMP spi is 16-bits. */ + if (p->max >= 0x10000) + return -EINVAL; + break; + + default: + return -EINVAL; + }; + + if (p->min > p->max) + return -EINVAL; + + return 0; +} + +static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +{ + struct xfrm_state *x; + struct xfrm_userspi_info *p; + struct sk_buff *resp_skb; + xfrm_address_t *daddr; + int family; + int err; + + p = NLMSG_DATA(nlh); + err = verify_userspi_info(p); + if (err) + goto out_noput; + + family = p->info.family; + daddr = &p->info.id.daddr; + + x = NULL; + if (p->info.seq) { + x = xfrm_find_acq_byseq(p->info.seq); + if (x && xfrm_addr_cmp(&x->id.daddr, daddr, family)) { + xfrm_state_put(x); + x = NULL; + } + } + + if (!x) + x = xfrm_find_acq(p->info.mode, p->info.reqid, + p->info.id.proto, daddr, + &p->info.saddr, 1, + family); + err = -ENOENT; + if (x == NULL) + goto out_noput; + + resp_skb = ERR_PTR(-ENOENT); + + spin_lock_bh(&x->lock); + if (x->km.state != XFRM_STATE_DEAD) { + xfrm_alloc_spi(x, htonl(p->min), htonl(p->max)); + if (x->id.spi) + resp_skb = xfrm_state_netlink(skb, x, nlh->nlmsg_seq); + } + spin_unlock_bh(&x->lock); + + if (IS_ERR(resp_skb)) { + err = PTR_ERR(resp_skb); + goto out; + } + + err = netlink_unicast(xfrm_nl, resp_skb, + NETLINK_CB(skb).pid, MSG_DONTWAIT); + +out: + xfrm_state_put(x); +out_noput: + return err; +} + +static int verify_policy_dir(__u8 dir) +{ + switch (dir) { + case XFRM_POLICY_IN: + case XFRM_POLICY_OUT: + case XFRM_POLICY_FWD: + break; + + default: + return -EINVAL; + }; + + return 0; +} + +static int verify_newpolicy_info(struct xfrm_userpolicy_info *p) +{ + switch (p->share) { + case XFRM_SHARE_ANY: + case XFRM_SHARE_SESSION: + case XFRM_SHARE_USER: + case XFRM_SHARE_UNIQUE: + break; + + default: + return -EINVAL; + }; + + switch (p->action) { + case XFRM_POLICY_ALLOW: + case XFRM_POLICY_BLOCK: + break; + + default: + return -EINVAL; + }; + + switch (p->sel.family) { + case AF_INET: + break; + + case AF_INET6: +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + break; +#else + return -EAFNOSUPPORT; +#endif + + default: + return -EINVAL; + }; + + return verify_policy_dir(p->dir); +} + +static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut, + int nr) +{ + int i; + + xp->xfrm_nr = nr; + for (i = 0; i < nr; i++, ut++) { + struct xfrm_tmpl *t = &xp->xfrm_vec[i]; + + memcpy(&t->id, &ut->id, sizeof(struct xfrm_id)); + memcpy(&t->saddr, &ut->saddr, + sizeof(xfrm_address_t)); + t->reqid = ut->reqid; + t->mode = ut->mode; + t->share = ut->share; + t->optional = ut->optional; + t->aalgos = ut->aalgos; + t->ealgos = ut->ealgos; + t->calgos = ut->calgos; + } +} + +static int copy_from_user_tmpl(struct xfrm_policy *pol, struct rtattr **xfrma) +{ + struct rtattr *rt = xfrma[XFRMA_TMPL-1]; + struct xfrm_user_tmpl *utmpl; + int nr; + + if (!rt) { + pol->xfrm_nr = 0; + } else { + nr = (rt->rta_len - sizeof(*rt)) / sizeof(*utmpl); + + if (nr > XFRM_MAX_DEPTH) + return -EINVAL; + + copy_templates(pol, RTA_DATA(rt), nr); + } + return 0; +} + +static void copy_from_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_info *p) +{ + xp->priority = p->priority; + xp->index = p->index; + memcpy(&xp->selector, &p->sel, sizeof(xp->selector)); + memcpy(&xp->lft, &p->lft, sizeof(xp->lft)); + xp->action = p->action; + xp->flags = p->flags; + xp->family = p->sel.family; + /* XXX xp->share = p->share; */ +} + +static void copy_to_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_info *p, int dir) +{ + memcpy(&p->sel, &xp->selector, sizeof(p->sel)); + memcpy(&p->lft, &xp->lft, sizeof(p->lft)); + memcpy(&p->curlft, &xp->curlft, sizeof(p->curlft)); + p->priority = xp->priority; + p->index = xp->index; + p->sel.family = xp->family; + p->dir = dir; + p->action = xp->action; + p->flags = xp->flags; + p->share = XFRM_SHARE_ANY; /* XXX xp->share */ +} + +static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p, struct rtattr **xfrma, int *errp) +{ + struct xfrm_policy *xp = xfrm_policy_alloc(GFP_KERNEL); + int err; + + if (!xp) { + *errp = -ENOMEM; + return NULL; + } + + copy_from_user_policy(xp, p); + err = copy_from_user_tmpl(xp, xfrma); + if (err) { + *errp = err; + kfree(xp); + xp = NULL; + } + + return xp; +} + +static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +{ + struct xfrm_userpolicy_info *p = NLMSG_DATA(nlh); + struct xfrm_policy *xp; + int err; + int excl; + + err = verify_newpolicy_info(p); + if (err) + return err; + + xp = xfrm_policy_construct(p, (struct rtattr **) xfrma, &err); + if (!xp) + return err; + + excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY; + err = xfrm_policy_insert(p->dir, xp, excl); + if (err) { + kfree(xp); + return err; + } + + xfrm_pol_put(xp); + + return 0; +} + +static int copy_to_user_tmpl(struct xfrm_policy *xp, struct sk_buff *skb) +{ + struct xfrm_user_tmpl vec[XFRM_MAX_DEPTH]; + int i; + + if (xp->xfrm_nr == 0) + return 0; + + for (i = 0; i < xp->xfrm_nr; i++) { + struct xfrm_user_tmpl *up = &vec[i]; + struct xfrm_tmpl *kp = &xp->xfrm_vec[i]; + + memcpy(&up->id, &kp->id, sizeof(up->id)); + up->family = xp->family; + memcpy(&up->saddr, &kp->saddr, sizeof(up->saddr)); + up->reqid = kp->reqid; + up->mode = kp->mode; + up->share = kp->share; + up->optional = kp->optional; + up->aalgos = kp->aalgos; + up->ealgos = kp->ealgos; + up->calgos = kp->calgos; + } + RTA_PUT(skb, XFRMA_TMPL, + (sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr), + vec); + + return 0; + +rtattr_failure: + return -1; +} + +static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr) +{ + struct xfrm_dump_info *sp = ptr; + struct xfrm_userpolicy_info *p; + struct sk_buff *in_skb = sp->in_skb; + struct sk_buff *skb = sp->out_skb; + struct nlmsghdr *nlh; + unsigned char *b = skb->tail; + + if (sp->this_idx < sp->start_idx) + goto out; + + nlh = NLMSG_PUT(skb, NETLINK_CB(in_skb).pid, + sp->nlmsg_seq, + XFRM_MSG_NEWPOLICY, sizeof(*p)); + p = NLMSG_DATA(nlh); + nlh->nlmsg_flags = sp->nlmsg_flags; + + copy_to_user_policy(xp, p, dir); + if (copy_to_user_tmpl(xp, skb) < 0) + goto nlmsg_failure; + + nlh->nlmsg_len = skb->tail - b; +out: + sp->this_idx++; + return 0; + +nlmsg_failure: + skb_trim(skb, b - skb->data); + return -1; +} + +static int xfrm_dump_policy(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct xfrm_dump_info info; + + info.in_skb = cb->skb; + info.out_skb = skb; + info.nlmsg_seq = cb->nlh->nlmsg_seq; + info.nlmsg_flags = NLM_F_MULTI; + info.this_idx = 0; + info.start_idx = cb->args[0]; + (void) xfrm_policy_walk(dump_one_policy, &info); + cb->args[0] = info.this_idx; + + return skb->len; +} + +static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb, + struct xfrm_policy *xp, + int dir, u32 seq) +{ + struct xfrm_dump_info info; + struct sk_buff *skb; + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb) + return ERR_PTR(-ENOMEM); + + NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; + info.in_skb = in_skb; + info.out_skb = skb; + info.nlmsg_seq = seq; + info.nlmsg_flags = 0; + info.this_idx = info.start_idx = 0; + + if (dump_one_policy(xp, dir, 0, &info) < 0) { + kfree_skb(skb); + return NULL; + } + + return skb; +} + +static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +{ + struct xfrm_policy *xp; + struct xfrm_userpolicy_id *p; + int err; + int delete; + + p = NLMSG_DATA(nlh); + delete = nlh->nlmsg_type == XFRM_MSG_DELPOLICY; + + err = verify_policy_dir(p->dir); + if (err) + return err; + + if (p->index) + xp = xfrm_policy_byid(p->dir, p->index, delete); + else + xp = xfrm_policy_bysel(p->dir, &p->sel, delete); + if (xp == NULL) + return -ENOENT; + + if (!delete) { + struct sk_buff *resp_skb; + + resp_skb = xfrm_policy_netlink(skb, xp, p->dir, nlh->nlmsg_seq); + if (IS_ERR(resp_skb)) { + err = PTR_ERR(resp_skb); + } else { + err = netlink_unicast(xfrm_nl, resp_skb, + NETLINK_CB(skb).pid, + MSG_DONTWAIT); + } + } + + xfrm_pol_put(xp); + + return err; +} + +static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +{ + struct xfrm_usersa_flush *p = NLMSG_DATA(nlh); + + xfrm_state_flush(p->proto); + return 0; +} + +static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +{ + xfrm_policy_flush(); + return 0; +} + +static const int xfrm_msg_min[(XFRM_MSG_MAX + 1 - XFRM_MSG_BASE)] = { + NLMSG_LENGTH(sizeof(struct xfrm_usersa_info)), /* NEW SA */ + NLMSG_LENGTH(sizeof(struct xfrm_usersa_id)), /* DEL SA */ + NLMSG_LENGTH(sizeof(struct xfrm_usersa_id)), /* GET SA */ + NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_info)),/* NEW POLICY */ + NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id)), /* DEL POLICY */ + NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id)), /* GET POLICY */ + NLMSG_LENGTH(sizeof(struct xfrm_userspi_info)), /* ALLOC SPI */ + NLMSG_LENGTH(sizeof(struct xfrm_user_acquire)), /* ACQUIRE */ + NLMSG_LENGTH(sizeof(struct xfrm_user_expire)), /* EXPIRE */ + NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_info)),/* UPD POLICY */ + NLMSG_LENGTH(sizeof(struct xfrm_usersa_info)), /* UPD SA */ + NLMSG_LENGTH(sizeof(struct xfrm_user_polexpire)), /* POLEXPIRE */ + NLMSG_LENGTH(sizeof(struct xfrm_usersa_flush)), /* FLUSH SA */ + NLMSG_LENGTH(0), /* FLUSH POLICY */ +}; + +static struct xfrm_link { + int (*doit)(struct sk_buff *, struct nlmsghdr *, void **); + int (*dump)(struct sk_buff *, struct netlink_callback *); +} xfrm_dispatch[] = { + { .doit = xfrm_add_sa, }, + { .doit = xfrm_del_sa, }, + { + .doit = xfrm_get_sa, + .dump = xfrm_dump_sa, + }, + { .doit = xfrm_add_policy }, + { .doit = xfrm_get_policy }, + { + .doit = xfrm_get_policy, + .dump = xfrm_dump_policy, + }, + { .doit = xfrm_alloc_userspi }, + {}, + {}, + { .doit = xfrm_add_policy }, + { .doit = xfrm_add_sa, }, + {}, + { .doit = xfrm_flush_sa }, + { .doit = xfrm_flush_policy }, +}; + +static int xfrm_done(struct netlink_callback *cb) +{ + return 0; +} + +static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp) +{ + struct rtattr *xfrma[XFRMA_MAX]; + struct xfrm_link *link; + int type, min_len; + + if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) + return 0; + + type = nlh->nlmsg_type; + + /* A control message: ignore them */ + if (type < XFRM_MSG_BASE) + return 0; + + /* Unknown message: reply with EINVAL */ + if (type > XFRM_MSG_MAX) + goto err_einval; + + type -= XFRM_MSG_BASE; + link = &xfrm_dispatch[type]; + + /* All operations require privileges, even GET */ + if (security_netlink_recv(skb)) { + *errp = -EPERM; + return -1; + } + + if ((type == 2 || type == 5) && (nlh->nlmsg_flags & NLM_F_DUMP)) { + u32 rlen; + + if (link->dump == NULL) + goto err_einval; + + if ((*errp = netlink_dump_start(xfrm_nl, skb, nlh, + link->dump, + xfrm_done)) != 0) { + return -1; + } + rlen = NLMSG_ALIGN(nlh->nlmsg_len); + if (rlen > skb->len) + rlen = skb->len; + skb_pull(skb, rlen); + return -1; + } + + memset(xfrma, 0, sizeof(xfrma)); + + if (nlh->nlmsg_len < (min_len = xfrm_msg_min[type])) + goto err_einval; + + if (nlh->nlmsg_len > min_len) { + int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len); + struct rtattr *attr = (void *) nlh + NLMSG_ALIGN(min_len); + + while (RTA_OK(attr, attrlen)) { + unsigned short flavor = attr->rta_type; + if (flavor) { + if (flavor > XFRMA_MAX) + goto err_einval; + xfrma[flavor - 1] = attr; + } + attr = RTA_NEXT(attr, attrlen); + } + } + + if (link->doit == NULL) + goto err_einval; + *errp = link->doit(skb, nlh, (void **) &xfrma); + + return *errp; + +err_einval: + *errp = -EINVAL; + return -1; +} + +static int xfrm_user_rcv_skb(struct sk_buff *skb) +{ + int err; + struct nlmsghdr *nlh; + + while (skb->len >= NLMSG_SPACE(0)) { + u32 rlen; + + nlh = (struct nlmsghdr *) skb->data; + if (nlh->nlmsg_len < sizeof(*nlh) || + skb->len < nlh->nlmsg_len) + return 0; + rlen = NLMSG_ALIGN(nlh->nlmsg_len); + if (rlen > skb->len) + rlen = skb->len; + if (xfrm_user_rcv_msg(skb, nlh, &err) < 0) { + if (err == 0) + return -1; + netlink_ack(skb, nlh, err); + } else if (nlh->nlmsg_flags & NLM_F_ACK) + netlink_ack(skb, nlh, 0); + skb_pull(skb, rlen); + } + + return 0; +} + +static void xfrm_netlink_rcv(struct sock *sk, int len) +{ + do { + struct sk_buff *skb; + + down(&xfrm_cfg_sem); + + while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { + if (xfrm_user_rcv_skb(skb)) { + if (skb->len) + skb_queue_head(&sk->sk_receive_queue, + skb); + else + kfree_skb(skb); + break; + } + kfree_skb(skb); + } + + up(&xfrm_cfg_sem); + + } while (xfrm_nl && xfrm_nl->sk_receive_queue.qlen); +} + +static int build_expire(struct sk_buff *skb, struct xfrm_state *x, int hard) +{ + struct xfrm_user_expire *ue; + struct nlmsghdr *nlh; + unsigned char *b = skb->tail; + + nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_EXPIRE, + sizeof(*ue)); + ue = NLMSG_DATA(nlh); + nlh->nlmsg_flags = 0; + + copy_to_user_state(x, &ue->state); + ue->hard = (hard != 0) ? 1 : 0; + + nlh->nlmsg_len = skb->tail - b; + return skb->len; + +nlmsg_failure: + skb_trim(skb, b - skb->data); + return -1; +} + +static int xfrm_send_state_notify(struct xfrm_state *x, int hard) +{ + struct sk_buff *skb; + + skb = alloc_skb(sizeof(struct xfrm_user_expire) + 16, GFP_ATOMIC); + if (skb == NULL) + return -ENOMEM; + + if (build_expire(skb, x, hard) < 0) + BUG(); + + NETLINK_CB(skb).dst_groups = XFRMGRP_EXPIRE; + + return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_EXPIRE, GFP_ATOMIC); +} + +static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, + struct xfrm_tmpl *xt, struct xfrm_policy *xp, + int dir) +{ + struct xfrm_user_acquire *ua; + struct nlmsghdr *nlh; + unsigned char *b = skb->tail; + __u32 seq = xfrm_get_acqseq(); + + nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_ACQUIRE, + sizeof(*ua)); + ua = NLMSG_DATA(nlh); + nlh->nlmsg_flags = 0; + + memcpy(&ua->id, &x->id, sizeof(ua->id)); + memcpy(&ua->saddr, &x->props.saddr, sizeof(ua->saddr)); + memcpy(&ua->sel, &x->sel, sizeof(ua->sel)); + copy_to_user_policy(xp, &ua->policy, dir); + ua->aalgos = xt->aalgos; + ua->ealgos = xt->ealgos; + ua->calgos = xt->calgos; + ua->seq = x->km.seq = seq; + + if (copy_to_user_tmpl(xp, skb) < 0) + goto nlmsg_failure; + + nlh->nlmsg_len = skb->tail - b; + return skb->len; + +nlmsg_failure: + skb_trim(skb, b - skb->data); + return -1; +} + +static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, + struct xfrm_policy *xp, int dir) +{ + struct sk_buff *skb; + size_t len; + + len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr); + len += NLMSG_SPACE(sizeof(struct xfrm_user_acquire)); + skb = alloc_skb(len, GFP_ATOMIC); + if (skb == NULL) + return -ENOMEM; + + if (build_acquire(skb, x, xt, xp, dir) < 0) + BUG(); + + NETLINK_CB(skb).dst_groups = XFRMGRP_ACQUIRE; + + return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_ACQUIRE, GFP_ATOMIC); +} + +/* User gives us xfrm_user_policy_info followed by an array of 0 + * or more templates. + */ +static struct xfrm_policy *xfrm_compile_policy(u16 family, int opt, + u8 *data, int len, int *dir) +{ + struct xfrm_userpolicy_info *p = (struct xfrm_userpolicy_info *)data; + struct xfrm_user_tmpl *ut = (struct xfrm_user_tmpl *) (p + 1); + struct xfrm_policy *xp; + int nr; + + switch (family) { + case AF_INET: + if (opt != IP_XFRM_POLICY) { + *dir = -EOPNOTSUPP; + return NULL; + } + break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case AF_INET6: + if (opt != IPV6_XFRM_POLICY) { + *dir = -EOPNOTSUPP; + return NULL; + } + break; +#endif + default: + *dir = -EINVAL; + return NULL; + } + + *dir = -EINVAL; + + if (len < sizeof(*p) || + verify_newpolicy_info(p)) + return NULL; + + nr = ((len - sizeof(*p)) / sizeof(*ut)); + if (nr > XFRM_MAX_DEPTH) + return NULL; + + xp = xfrm_policy_alloc(GFP_KERNEL); + if (xp == NULL) { + *dir = -ENOBUFS; + return NULL; + } + + copy_from_user_policy(xp, p); + copy_templates(xp, ut, nr); + + *dir = p->dir; + + return xp; +} + +static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp, + int dir, int hard) +{ + struct xfrm_user_polexpire *upe; + struct nlmsghdr *nlh; + unsigned char *b = skb->tail; + + nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_POLEXPIRE, sizeof(*upe)); + upe = NLMSG_DATA(nlh); + nlh->nlmsg_flags = 0; + + copy_to_user_policy(xp, &upe->pol, dir); + if (copy_to_user_tmpl(xp, skb) < 0) + goto nlmsg_failure; + upe->hard = !!hard; + + nlh->nlmsg_len = skb->tail - b; + return skb->len; + +nlmsg_failure: + skb_trim(skb, b - skb->data); + return -1; +} + +static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, int hard) +{ + struct sk_buff *skb; + size_t len; + + len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr); + len += NLMSG_SPACE(sizeof(struct xfrm_user_polexpire)); + skb = alloc_skb(len, GFP_ATOMIC); + if (skb == NULL) + return -ENOMEM; + + if (build_polexpire(skb, xp, dir, hard) < 0) + BUG(); + + NETLINK_CB(skb).dst_groups = XFRMGRP_EXPIRE; + + return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_EXPIRE, GFP_ATOMIC); +} + +static struct xfrm_mgr netlink_mgr = { + .id = "netlink", + .notify = xfrm_send_state_notify, + .acquire = xfrm_send_acquire, + .compile_policy = xfrm_compile_policy, + .notify_policy = xfrm_send_policy_notify, +}; + +static int __init xfrm_user_init(void) +{ + printk(KERN_INFO "Initializing IPsec netlink socket\n"); + + xfrm_nl = netlink_kernel_create(NETLINK_XFRM, xfrm_netlink_rcv); + if (xfrm_nl == NULL) + return -ENOMEM; + + xfrm_register_km(&netlink_mgr); + + return 0; +} + +static void __exit xfrm_user_exit(void) +{ + xfrm_unregister_km(&netlink_mgr); + sock_release(xfrm_nl->sk_socket); +} + +module_init(xfrm_user_init); +module_exit(xfrm_user_exit); +MODULE_LICENSE("GPL"); |