summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2015-05-11 10:50:19 -0400
committerDavid S. Miller <davem@davemloft.net>2015-05-11 10:50:19 -0400
commit0198e09c4bdd7bce00c451c51a86a239c356a315 (patch)
tree0f787a635911a4c854638de7ff36cb93a32ca5aa /include
parent80ba92fa1a92dea128283f69f55b02242e213650 (diff)
parentaffb9792f1d99e1e4d64411e147b648d65f2576e (diff)
Merge branch 'kernel_socket_netns'
Eric W. Biederman says: ==================== Cleanup the kernel sockets. Right now the situtation for allocating kernel sockets is a mess. - sock_create_kern does not take a namespace parameter. - kernel sockets must not reference count a network namespace and keep it alive or else we will have a reference counting loop. - The way we avoid the reference counting loop with sk_change_net and sk_release_kernel are major hacks. This patchset addresses this mess by fixing sock_create_kern to do everything necessary to create a kernel socket. None of the current users of kernel sockets need the network namespace reference counted. Either kernel sockets are network namespace aware (and using the current hacks) or kernel sockets are limited to the initial network namespace in which case it does not matter. This patchset starts by addressing tun which should be using normal userspace sockets like macvtap. Then sock_create_kern is fixed to take a network namespace. Then the in kernel status of sockets are passed through to sk_alloc. Then sk_alloc is fixed to not reference count the network namespace of kernel sockets. Then the callers of sock_create_kern are fixed up to stop using hacks. Then netlink which uses it's own flavor of sock_create_kern is fixed. Finally the hacks that are sk_change_net and sk_release_kernel are removed. When it is all done the code is easier to follow, easier to use, easier to maintain and shorter by about 70 lines. ==================== Reported-by: Ying Xue <ying.xue@windriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r--include/linux/if_pppox.h2
-rw-r--r--include/linux/net.h3
-rw-r--r--include/net/af_vsock.h2
-rw-r--r--include/net/inet_common.h2
-rw-r--r--include/net/llc_conn.h2
-rw-r--r--include/net/sock.h21
6 files changed, 8 insertions, 24 deletions
diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h
index 66a7d7600f43..b49cf923becc 100644
--- a/include/linux/if_pppox.h
+++ b/include/linux/if_pppox.h
@@ -74,7 +74,7 @@ static inline struct sock *sk_pppox(struct pppox_sock *po)
struct module;
struct pppox_proto {
- int (*create)(struct net *net, struct socket *sock);
+ int (*create)(struct net *net, struct socket *sock, int kern);
int (*ioctl)(struct socket *sock, unsigned int cmd,
unsigned long arg);
struct module *owner;
diff --git a/include/linux/net.h b/include/linux/net.h
index 738ea48be889..04aa06852771 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -38,7 +38,6 @@ struct net;
#define SOCK_NOSPACE 2
#define SOCK_PASSCRED 3
#define SOCK_PASSSEC 4
-#define SOCK_EXTERNALLY_ALLOCATED 5
#ifndef ARCH_HAS_SOCKET_TYPES
/**
@@ -208,7 +207,7 @@ void sock_unregister(int family);
int __sock_create(struct net *net, int family, int type, int proto,
struct socket **res, int kern);
int sock_create(int family, int type, int proto, struct socket **res);
-int sock_create_kern(int family, int type, int proto, struct socket **res);
+int sock_create_kern(struct net *net, int family, int type, int proto, struct socket **res);
int sock_create_lite(int family, int type, int proto, struct socket **res);
void sock_release(struct socket *sock);
int sock_sendmsg(struct socket *sock, struct msghdr *msg);
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index 172632dd9930..db639a4c5ab8 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -74,7 +74,7 @@ void vsock_pending_work(struct work_struct *work);
struct sock *__vsock_create(struct net *net,
struct socket *sock,
struct sock *parent,
- gfp_t priority, unsigned short type);
+ gfp_t priority, unsigned short type, int kern);
/**** TRANSPORT ****/
diff --git a/include/net/inet_common.h b/include/net/inet_common.h
index 4a92423eefa5..279f83591971 100644
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@@ -41,7 +41,7 @@ int inet_recv_error(struct sock *sk, struct msghdr *msg, int len,
static inline void inet_ctl_sock_destroy(struct sock *sk)
{
- sk_release_kernel(sk);
+ sock_release(sk->sk_socket);
}
#endif
diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h
index 0134681acc4c..fe994d2e5286 100644
--- a/include/net/llc_conn.h
+++ b/include/net/llc_conn.h
@@ -96,7 +96,7 @@ static __inline__ char llc_backlog_type(struct sk_buff *skb)
}
struct sock *llc_sk_alloc(struct net *net, int family, gfp_t priority,
- struct proto *prot);
+ struct proto *prot, int kern);
void llc_sk_free(struct sock *sk);
void llc_sk_reset(struct sock *sk);
diff --git a/include/net/sock.h b/include/net/sock.h
index 3a4898ec8c67..d882f4c8e438 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -184,6 +184,7 @@ struct sock_common {
unsigned char skc_reuse:4;
unsigned char skc_reuseport:1;
unsigned char skc_ipv6only:1;
+ unsigned char skc_net_refcnt:1;
int skc_bound_dev_if;
union {
struct hlist_node skc_bind_node;
@@ -323,6 +324,7 @@ struct sock {
#define sk_reuse __sk_common.skc_reuse
#define sk_reuseport __sk_common.skc_reuseport
#define sk_ipv6only __sk_common.skc_ipv6only
+#define sk_net_refcnt __sk_common.skc_net_refcnt
#define sk_bound_dev_if __sk_common.skc_bound_dev_if
#define sk_bind_node __sk_common.skc_bind_node
#define sk_prot __sk_common.skc_prot
@@ -1514,9 +1516,8 @@ static inline void unlock_sock_fast(struct sock *sk, bool slow)
struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
- struct proto *prot);
+ struct proto *prot, int kern);
void sk_free(struct sock *sk);
-void sk_release_kernel(struct sock *sk);
struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority);
struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
@@ -2192,22 +2193,6 @@ void sock_net_set(struct sock *sk, struct net *net)
write_pnet(&sk->sk_net, net);
}
-/*
- * Kernel sockets, f.e. rtnl or icmp_socket, are a part of a namespace.
- * They should not hold a reference to a namespace in order to allow
- * to stop it.
- * Sockets after sk_change_net should be released using sk_release_kernel
- */
-static inline void sk_change_net(struct sock *sk, struct net *net)
-{
- struct net *current_net = sock_net(sk);
-
- if (!net_eq(current_net, net)) {
- put_net(current_net);
- sock_net_set(sk, net);
- }
-}
-
static inline struct sock *skb_steal_sock(struct sk_buff *skb)
{
if (skb->sk) {