summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/802/garp.c18
-rw-r--r--net/802/stp.c4
-rw-r--r--net/8021q/vlan.c6
-rw-r--r--net/ax25/af_ax25.c2
-rw-r--r--net/caif/caif_config_util.c13
-rw-r--r--net/caif/caif_dev.c2
-rw-r--r--net/caif/caif_socket.c45
-rw-r--r--net/caif/cfcnfg.c17
-rw-r--r--net/caif/cfctrl.c3
-rw-r--r--net/caif/cfdbgl.c14
-rw-r--r--net/caif/cfrfml.c2
-rw-r--r--net/compat.c10
-rw-r--r--net/core/dev.c40
-rw-r--r--net/core/dst.c1
-rw-r--r--net/core/fib_rules.c21
-rw-r--r--net/core/filter.c68
-rw-r--r--net/core/iovec.c20
-rw-r--r--net/core/net-sysfs.c20
-rw-r--r--net/core/net_namespace.c4
-rw-r--r--net/core/pktgen.c41
-rw-r--r--net/core/sock.c16
-rw-r--r--net/core/sysctl_net_core.c3
-rw-r--r--net/dccp/ccid.h34
-rw-r--r--net/dccp/ccids/ccid2.c23
-rw-r--r--net/dccp/ccids/ccid2.h5
-rw-r--r--net/dccp/ccids/ccid3.c12
-rw-r--r--net/dccp/dccp.h5
-rw-r--r--net/dccp/output.c209
-rw-r--r--net/dccp/proto.c21
-rw-r--r--net/dccp/timer.c27
-rw-r--r--net/decnet/af_decnet.c2
-rw-r--r--net/decnet/sysctl_net_decnet.c4
-rw-r--r--net/ipv4/fib_frontend.c2
-rw-r--r--net/ipv4/fib_hash.c54
-rw-r--r--net/ipv4/fib_lookup.h5
-rw-r--r--net/ipv4/fib_trie.c5
-rw-r--r--net/ipv4/gre.c5
-rw-r--r--net/ipv4/igmp.c4
-rw-r--r--net/ipv4/inet_diag.c27
-rw-r--r--net/ipv4/inetpeer.c138
-rw-r--r--net/ipv4/ip_gre.c7
-rw-r--r--net/ipv4/ip_sockglue.c10
-rw-r--r--net/ipv4/ipip.c1
-rw-r--r--net/ipv4/netfilter/arp_tables.c1
-rw-r--r--net/ipv4/netfilter/ip_tables.c1
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c40
-rw-r--r--net/ipv4/proc.c8
-rw-r--r--net/ipv4/protocol.c8
-rw-r--r--net/ipv4/route.c75
-rw-r--r--net/ipv4/sysctl_net_ipv4.c5
-rw-r--r--net/ipv4/tcp.c6
-rw-r--r--net/ipv4/tcp_input.c11
-rw-r--r--net/ipv4/tunnel4.c29
-rw-r--r--net/ipv4/udp.c6
-rw-r--r--net/ipv6/addrconf.c16
-rw-r--r--net/ipv6/ip6_tunnel.c2
-rw-r--r--net/ipv6/ipv6_sockglue.c4
-rw-r--r--net/ipv6/netfilter/Kconfig5
-rw-r--r--net/ipv6/netfilter/Makefile5
-rw-r--r--net/ipv6/netfilter/ip6_tables.c1
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c5
-rw-r--r--net/ipv6/proc.c4
-rw-r--r--net/ipv6/protocol.c8
-rw-r--r--net/ipv6/raw.c2
-rw-r--r--net/ipv6/reassembly.c2
-rw-r--r--net/ipv6/route.c2
-rw-r--r--net/ipv6/sit.c1
-rw-r--r--net/ipv6/tunnel6.c24
-rw-r--r--net/ipv6/udp.c2
-rw-r--r--net/l2tp/l2tp_core.c53
-rw-r--r--net/l2tp/l2tp_core.h33
-rw-r--r--net/l2tp/l2tp_debugfs.c2
-rw-r--r--net/l2tp/l2tp_ip.c2
-rw-r--r--net/netfilter/Kconfig2
-rw-r--r--net/netfilter/nf_conntrack_core.c3
-rw-r--r--net/netfilter/nf_conntrack_proto.c6
-rw-r--r--net/netfilter/xt_TPROXY.c10
-rw-r--r--net/netfilter/xt_socket.c19
-rw-r--r--net/netlink/af_netlink.c65
-rw-r--r--net/packet/af_packet.c3
-rw-r--r--net/rds/loop.c4
-rw-r--r--net/rds/message.c7
-rw-r--r--net/rds/rdma.c126
-rw-r--r--net/rds/send.c4
-rw-r--r--net/rds/tcp.c6
-rw-r--r--net/sched/cls_basic.c4
-rw-r--r--net/sched/cls_cgroup.c2
-rw-r--r--net/sched/em_text.c3
-rw-r--r--net/sctp/protocol.c2
-rw-r--r--net/sctp/socket.c4
-rw-r--r--net/sctp/sysctl.c4
-rw-r--r--net/socket.c4
-rw-r--r--net/tipc/socket.c1
-rw-r--r--net/x25/x25_facilities.c8
-rw-r--r--net/x25/x25_in.c2
95 files changed, 967 insertions, 655 deletions
diff --git a/net/802/garp.c b/net/802/garp.c
index 941f2a324d3a..c1df2dad8c6b 100644
--- a/net/802/garp.c
+++ b/net/802/garp.c
@@ -346,8 +346,8 @@ int garp_request_join(const struct net_device *dev,
const struct garp_application *appl,
const void *data, u8 len, u8 type)
{
- struct garp_port *port = dev->garp_port;
- struct garp_applicant *app = port->applicants[appl->type];
+ struct garp_port *port = rtnl_dereference(dev->garp_port);
+ struct garp_applicant *app = rtnl_dereference(port->applicants[appl->type]);
struct garp_attr *attr;
spin_lock_bh(&app->lock);
@@ -366,8 +366,8 @@ void garp_request_leave(const struct net_device *dev,
const struct garp_application *appl,
const void *data, u8 len, u8 type)
{
- struct garp_port *port = dev->garp_port;
- struct garp_applicant *app = port->applicants[appl->type];
+ struct garp_port *port = rtnl_dereference(dev->garp_port);
+ struct garp_applicant *app = rtnl_dereference(port->applicants[appl->type]);
struct garp_attr *attr;
spin_lock_bh(&app->lock);
@@ -546,11 +546,11 @@ static int garp_init_port(struct net_device *dev)
static void garp_release_port(struct net_device *dev)
{
- struct garp_port *port = dev->garp_port;
+ struct garp_port *port = rtnl_dereference(dev->garp_port);
unsigned int i;
for (i = 0; i <= GARP_APPLICATION_MAX; i++) {
- if (port->applicants[i])
+ if (rtnl_dereference(port->applicants[i]))
return;
}
rcu_assign_pointer(dev->garp_port, NULL);
@@ -565,7 +565,7 @@ int garp_init_applicant(struct net_device *dev, struct garp_application *appl)
ASSERT_RTNL();
- if (!dev->garp_port) {
+ if (!rtnl_dereference(dev->garp_port)) {
err = garp_init_port(dev);
if (err < 0)
goto err1;
@@ -601,8 +601,8 @@ EXPORT_SYMBOL_GPL(garp_init_applicant);
void garp_uninit_applicant(struct net_device *dev, struct garp_application *appl)
{
- struct garp_port *port = dev->garp_port;
- struct garp_applicant *app = port->applicants[appl->type];
+ struct garp_port *port = rtnl_dereference(dev->garp_port);
+ struct garp_applicant *app = rtnl_dereference(port->applicants[appl->type]);
ASSERT_RTNL();
diff --git a/net/802/stp.c b/net/802/stp.c
index 53c8f77f0ccd..978c30b1b36b 100644
--- a/net/802/stp.c
+++ b/net/802/stp.c
@@ -21,8 +21,8 @@
#define GARP_ADDR_MAX 0x2F
#define GARP_ADDR_RANGE (GARP_ADDR_MAX - GARP_ADDR_MIN)
-static const struct stp_proto *garp_protos[GARP_ADDR_RANGE + 1] __read_mostly;
-static const struct stp_proto *stp_proto __read_mostly;
+static const struct stp_proto __rcu *garp_protos[GARP_ADDR_RANGE + 1] __read_mostly;
+static const struct stp_proto __rcu *stp_proto __read_mostly;
static struct llc_sap *sap __read_mostly;
static unsigned int sap_registered;
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 05b867e43757..52077ca22072 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -112,7 +112,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
ASSERT_RTNL();
- grp = real_dev->vlgrp;
+ grp = rtnl_dereference(real_dev->vlgrp);
BUG_ON(!grp);
/* Take it out of our own structures, but be sure to interlock with
@@ -177,7 +177,7 @@ int register_vlan_dev(struct net_device *dev)
struct vlan_group *grp, *ngrp = NULL;
int err;
- grp = real_dev->vlgrp;
+ grp = rtnl_dereference(real_dev->vlgrp);
if (!grp) {
ngrp = grp = vlan_group_alloc(real_dev);
if (!grp)
@@ -385,7 +385,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
dev->netdev_ops->ndo_vlan_rx_add_vid(dev, 0);
}
- grp = dev->vlgrp;
+ grp = rtnl_dereference(dev->vlgrp);
if (!grp)
goto out;
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 26eaebf4aaa9..bb86d2932394 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1392,6 +1392,7 @@ static int ax25_getname(struct socket *sock, struct sockaddr *uaddr,
ax25_cb *ax25;
int err = 0;
+ memset(fsa, 0, sizeof(fsa));
lock_sock(sk);
ax25 = ax25_sk(sk);
@@ -1403,7 +1404,6 @@ static int ax25_getname(struct socket *sock, struct sockaddr *uaddr,
fsa->fsa_ax25.sax25_family = AF_AX25;
fsa->fsa_ax25.sax25_call = ax25->dest_addr;
- fsa->fsa_ax25.sax25_ndigis = 0;
if (ax25->digipeat != NULL) {
ndigi = ax25->digipeat->ndigi;
diff --git a/net/caif/caif_config_util.c b/net/caif/caif_config_util.c
index 76ae68303d3a..d522d8c1703e 100644
--- a/net/caif/caif_config_util.c
+++ b/net/caif/caif_config_util.c
@@ -16,11 +16,18 @@ int connect_req_to_link_param(struct cfcnfg *cnfg,
{
struct dev_info *dev_info;
enum cfcnfg_phy_preference pref;
+ int res;
+
memset(l, 0, sizeof(*l));
- l->priority = s->priority;
+ /* In caif protocol low value is high priority */
+ l->priority = CAIF_PRIO_MAX - s->priority + 1;
- if (s->link_name[0] != '\0')
- l->phyid = cfcnfg_get_named(cnfg, s->link_name);
+ if (s->ifindex != 0){
+ res = cfcnfg_get_id_from_ifi(cnfg, s->ifindex);
+ if (res < 0)
+ return res;
+ l->phyid = res;
+ }
else {
switch (s->link_selector) {
case CAIF_LINK_HIGH_BANDW:
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index b99369a055d1..a42a408306e4 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -307,6 +307,8 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
case NETDEV_UNREGISTER:
caifd = caif_get(dev);
+ if (caifd == NULL)
+ break;
netdev_info(dev, "unregister\n");
atomic_set(&caifd->state, what);
caif_device_destroy(dev);
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 2eca2dd0000f..1bf0cf503796 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -716,8 +716,7 @@ static int setsockopt(struct socket *sock,
{
struct sock *sk = sock->sk;
struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
- int prio, linksel;
- struct ifreq ifreq;
+ int linksel;
if (cf_sk->sk.sk_socket->state != SS_UNCONNECTED)
return -ENOPROTOOPT;
@@ -735,33 +734,6 @@ static int setsockopt(struct socket *sock,
release_sock(&cf_sk->sk);
return 0;
- case SO_PRIORITY:
- if (lvl != SOL_SOCKET)
- goto bad_sol;
- if (ol < sizeof(int))
- return -EINVAL;
- if (copy_from_user(&prio, ov, sizeof(int)))
- return -EINVAL;
- lock_sock(&(cf_sk->sk));
- cf_sk->conn_req.priority = prio;
- release_sock(&cf_sk->sk);
- return 0;
-
- case SO_BINDTODEVICE:
- if (lvl != SOL_SOCKET)
- goto bad_sol;
- if (ol < sizeof(struct ifreq))
- return -EINVAL;
- if (copy_from_user(&ifreq, ov, sizeof(ifreq)))
- return -EFAULT;
- lock_sock(&(cf_sk->sk));
- strncpy(cf_sk->conn_req.link_name, ifreq.ifr_name,
- sizeof(cf_sk->conn_req.link_name));
- cf_sk->conn_req.link_name
- [sizeof(cf_sk->conn_req.link_name)-1] = 0;
- release_sock(&cf_sk->sk);
- return 0;
-
case CAIFSO_REQ_PARAM:
if (lvl != SOL_CAIF)
goto bad_sol;
@@ -880,6 +852,18 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr,
sock->state = SS_CONNECTING;
sk->sk_state = CAIF_CONNECTING;
+ /* Check priority value comming from socket */
+ /* if priority value is out of range it will be ajusted */
+ if (cf_sk->sk.sk_priority > CAIF_PRIO_MAX)
+ cf_sk->conn_req.priority = CAIF_PRIO_MAX;
+ else if (cf_sk->sk.sk_priority < CAIF_PRIO_MIN)
+ cf_sk->conn_req.priority = CAIF_PRIO_MIN;
+ else
+ cf_sk->conn_req.priority = cf_sk->sk.sk_priority;
+
+ /*ifindex = id of the interface.*/
+ cf_sk->conn_req.ifindex = cf_sk->sk.sk_bound_dev_if;
+
dbfs_atomic_inc(&cnt.num_connect_req);
cf_sk->layer.receive = caif_sktrecv_cb;
err = caif_connect_client(&cf_sk->conn_req,
@@ -905,6 +889,7 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr,
cf_sk->maxframe = mtu - (headroom + tailroom);
if (cf_sk->maxframe < 1) {
pr_warn("CAIF Interface MTU too small (%d)\n", dev->mtu);
+ err = -ENODEV;
goto out;
}
@@ -1142,7 +1127,7 @@ static int caif_create(struct net *net, struct socket *sock, int protocol,
set_rx_flow_on(cf_sk);
/* Set default options on configuration */
- cf_sk->conn_req.priority = CAIF_PRIO_NORMAL;
+ cf_sk->sk.sk_priority= CAIF_PRIO_NORMAL;
cf_sk->conn_req.link_selector = CAIF_LINK_LOW_LATENCY;
cf_sk->conn_req.protocol = protocol;
/* Increase the number of sockets created. */
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index 41adafd18914..21ede141018a 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -173,18 +173,15 @@ static struct cfcnfg_phyinfo *cfcnfg_get_phyinfo(struct cfcnfg *cnfg,
return NULL;
}
-int cfcnfg_get_named(struct cfcnfg *cnfg, char *name)
+
+int cfcnfg_get_id_from_ifi(struct cfcnfg *cnfg, int ifi)
{
int i;
-
- /* Try to match with specified name */
- for (i = 0; i < MAX_PHY_LAYERS; i++) {
- if (cnfg->phy_layers[i].frm_layer != NULL
- && strcmp(cnfg->phy_layers[i].phy_layer->name,
- name) == 0)
- return cnfg->phy_layers[i].frm_layer->id;
- }
- return 0;
+ for (i = 0; i < MAX_PHY_LAYERS; i++)
+ if (cnfg->phy_layers[i].frm_layer != NULL &&
+ cnfg->phy_layers[i].ifindex == ifi)
+ return i;
+ return -ENODEV;
}
int cfcnfg_disconn_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer)
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index 08f267a109aa..3cd8f978e309 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -361,11 +361,10 @@ void cfctrl_cancel_req(struct cflayer *layr, struct cflayer *adap_layer)
struct cfctrl_request_info *p, *tmp;
struct cfctrl *ctrl = container_obj(layr);
spin_lock(&ctrl->info_list_lock);
- pr_warn("enter\n");
list_for_each_entry_safe(p, tmp, &ctrl->list, list) {
if (p->client_layer == adap_layer) {
- pr_warn("cancel req :%d\n", p->sequence_no);
+ pr_debug("cancel req :%d\n", p->sequence_no);
list_del(&p->list);
kfree(p);
}
diff --git a/net/caif/cfdbgl.c b/net/caif/cfdbgl.c
index 496fda9ac66f..11a2af4c162a 100644
--- a/net/caif/cfdbgl.c
+++ b/net/caif/cfdbgl.c
@@ -12,6 +12,8 @@
#include <net/caif/cfsrvl.h>
#include <net/caif/cfpkt.h>
+#define container_obj(layr) ((struct cfsrvl *) layr)
+
static int cfdbgl_receive(struct cflayer *layr, struct cfpkt *pkt);
static int cfdbgl_transmit(struct cflayer *layr, struct cfpkt *pkt);
@@ -38,5 +40,17 @@ static int cfdbgl_receive(struct cflayer *layr, struct cfpkt *pkt)
static int cfdbgl_transmit(struct cflayer *layr, struct cfpkt *pkt)
{
+ struct cfsrvl *service = container_obj(layr);
+ struct caif_payload_info *info;
+ int ret;
+
+ if (!cfsrvl_ready(service, &ret))
+ return ret;
+
+ /* Add info for MUX-layer to route the packet out */
+ info = cfpkt_info(pkt);
+ info->channel_id = service->layer.id;
+ info->dev_info = &service->dev_info;
+
return layr->dn->transmit(layr->dn, pkt);
}
diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
index bde8481e8d25..e2fb5fa75795 100644
--- a/net/caif/cfrfml.c
+++ b/net/caif/cfrfml.c
@@ -193,7 +193,7 @@ out:
static int cfrfml_transmit_segment(struct cfrfml *rfml, struct cfpkt *pkt)
{
- caif_assert(cfpkt_getlen(pkt) >= rfml->fragment_size);
+ caif_assert(cfpkt_getlen(pkt) < rfml->fragment_size);
/* Add info for MUX-layer to route the packet out. */
cfpkt_info(pkt)->channel_id = rfml->serv.layer.id;
diff --git a/net/compat.c b/net/compat.c
index 63d260e81472..3649d5895361 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -41,10 +41,12 @@ static inline int iov_from_user_compat_to_kern(struct iovec *kiov,
compat_size_t len;
if (get_user(len, &uiov32->iov_len) ||
- get_user(buf, &uiov32->iov_base)) {
- tot_len = -EFAULT;
- break;
- }
+ get_user(buf, &uiov32->iov_base))
+ return -EFAULT;
+
+ if (len > INT_MAX - tot_len)
+ len = INT_MAX - tot_len;
+
tot_len += len;
kiov->iov_base = compat_ptr(buf);
kiov->iov_len = (__kernel_size_t) len;
diff --git a/net/core/dev.c b/net/core/dev.c
index 78b5a89b0f40..0dd54a69dace 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1685,10 +1685,10 @@ EXPORT_SYMBOL(netif_device_attach);
static bool can_checksum_protocol(unsigned long features, __be16 protocol)
{
- return ((features & NETIF_F_GEN_CSUM) ||
- ((features & NETIF_F_IP_CSUM) &&
+ return ((features & NETIF_F_NO_CSUM) ||
+ ((features & NETIF_F_V4_CSUM) &&
protocol == htons(ETH_P_IP)) ||
- ((features & NETIF_F_IPV6_CSUM) &&
+ ((features & NETIF_F_V6_CSUM) &&
protocol == htons(ETH_P_IPV6)) ||
((features & NETIF_F_FCOE_CRC) &&
protocol == htons(ETH_P_FCOE)));
@@ -1696,22 +1696,18 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol)
static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
{
+ __be16 protocol = skb->protocol;
int features = dev->features;
- if (vlan_tx_tag_present(skb))
+ if (vlan_tx_tag_present(skb)) {
features &= dev->vlan_features;
-
- if (can_checksum_protocol(features, skb->protocol))
- return true;
-
- if (skb->protocol == htons(ETH_P_8021Q)) {
+ } else if (protocol == htons(ETH_P_8021Q)) {
struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
- if (can_checksum_protocol(dev->features & dev->vlan_features,
- veh->h_vlan_encapsulated_proto))
- return true;
+ protocol = veh->h_vlan_encapsulated_proto;
+ features &= dev->vlan_features;
}
- return false;
+ return can_checksum_protocol(features, protocol);
}
/**
@@ -2135,7 +2131,7 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
} else {
struct sock *sk = skb->sk;
queue_index = sk_tx_queue_get(sk);
- if (queue_index < 0) {
+ if (queue_index < 0 || queue_index >= dev->real_num_tx_queues) {
queue_index = 0;
if (dev->real_num_tx_queues > 1)
@@ -2213,7 +2209,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
}
static DEFINE_PER_CPU(int, xmit_recursion);
-#define RECURSION_LIMIT 3
+#define RECURSION_LIMIT 10
/**
* dev_queue_xmit - transmit a buffer
@@ -2413,7 +2409,7 @@ EXPORT_SYMBOL(__skb_get_rxhash);
#ifdef CONFIG_RPS
/* One global table that all flow-based protocols share. */
-struct rps_sock_flow_table *rps_sock_flow_table __read_mostly;
+struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
EXPORT_SYMBOL(rps_sock_flow_table);
/*
@@ -2425,7 +2421,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
struct rps_dev_flow **rflowp)
{
struct netdev_rx_queue *rxqueue;
- struct rps_map *map = NULL;
+ struct rps_map *map;
struct rps_dev_flow_table *flow_table;
struct rps_sock_flow_table *sock_flow_table;
int cpu = -1;
@@ -2444,15 +2440,15 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
} else
rxqueue = dev->_rx;
- if (rxqueue->rps_map) {
- map = rcu_dereference(rxqueue->rps_map);
- if (map && map->len == 1) {
+ map = rcu_dereference(rxqueue->rps_map);
+ if (map) {
+ if (map->len == 1) {
tcpu = map->cpus[0];
if (cpu_online(tcpu))
cpu = tcpu;
goto done;
}
- } else if (!rxqueue->rps_flow_table) {
+ } else if (!rcu_dereference_raw(rxqueue->rps_flow_table)) {
goto done;
}
@@ -5416,7 +5412,7 @@ void netdev_run_todo(void)
/* paranoia */
BUG_ON(netdev_refcnt_read(dev));
WARN_ON(rcu_dereference_raw(dev->ip_ptr));
- WARN_ON(dev->ip6_ptr);
+ WARN_ON(rcu_dereference_raw(dev->ip6_ptr));
WARN_ON(dev->dn_ptr);
if (dev->destructor)
diff --git a/net/core/dst.c b/net/core/dst.c
index 8abe628b79f1..b99c7c7ffce2 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -370,6 +370,7 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event,
static struct notifier_block dst_dev_notifier = {
.notifier_call = dst_dev_event,
+ .priority = -10, /* must be called after other network notifiers */
};
void __init dst_init(void)
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 1bc3f253ba6c..82a4369ae150 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -351,12 +351,12 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
list_for_each_entry(r, &ops->rules_list, list) {
if (r->pref == rule->target) {
- rule->ctarget = r;
+ RCU_INIT_POINTER(rule->ctarget, r);
break;
}
}
- if (rule->ctarget == NULL)
+ if (rcu_dereference_protected(rule->ctarget, 1) == NULL)
unresolved = 1;
} else if (rule->action == FR_ACT_GOTO)
goto errout_free;
@@ -373,6 +373,11 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
fib_rule_get(rule);
+ if (last)
+ list_add_rcu(&rule->list, &last->list);
+ else
+ list_add_rcu(&rule->list, &ops->rules_list);
+
if (ops->unresolved_rules) {
/*
* There are unresolved goto rules in the list, check if
@@ -381,7 +386,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
list_for_each_entry(r, &ops->rules_list, list) {
if (r->action == FR_ACT_GOTO &&
r->target == rule->pref) {
- BUG_ON(r->ctarget != NULL);
+ BUG_ON(rtnl_dereference(r->ctarget) != NULL);
rcu_assign_pointer(r->ctarget, rule);
if (--ops->unresolved_rules == 0)
break;
@@ -395,11 +400,6 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
if (unresolved)
ops->unresolved_rules++;
- if (last)
- list_add_rcu(&rule->list, &last->list);
- else
- list_add_rcu(&rule->list, &ops->rules_list);
-
notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid);
flush_route_cache(ops);
rules_ops_put(ops);
@@ -487,7 +487,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
*/
if (ops->nr_goto_rules > 0) {
list_for_each_entry(tmp, &ops->rules_list, list) {
- if (tmp->ctarget == rule) {
+ if (rtnl_dereference(tmp->ctarget) == rule) {
rcu_assign_pointer(tmp->ctarget, NULL);
ops->unresolved_rules++;
}
@@ -545,7 +545,8 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
frh->action = rule->action;
frh->flags = rule->flags;
- if (rule->action == FR_ACT_GOTO && rule->ctarget == NULL)
+ if (rule->action == FR_ACT_GOTO &&
+ rcu_dereference_raw(rule->ctarget) == NULL)
frh->flags |= FIB_RULE_UNRESOLVED;
if (rule->iifname[0]) {
diff --git a/net/core/filter.c b/net/core/filter.c
index 7adf50352918..23e9b2a6b4c8 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -89,8 +89,8 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
rcu_read_lock_bh();
filter = rcu_dereference_bh(sk->sk_filter);
if (filter) {
- unsigned int pkt_len = sk_run_filter(skb, filter->insns,
- filter->len);
+ unsigned int pkt_len = sk_run_filter(skb, filter->insns, filter->len);
+
err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
}
rcu_read_unlock_bh();
@@ -112,39 +112,41 @@ EXPORT_SYMBOL(sk_filter);
*/
unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
{
- struct sock_filter *fentry; /* We walk down these */
void *ptr;
u32 A = 0; /* Accumulator */
u32 X = 0; /* Index Register */
u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */
+ unsigned long memvalid = 0;
u32 tmp;
int k;
int pc;
+ BUILD_BUG_ON(BPF_MEMWORDS > BITS_PER_LONG);
/*
* Process array of filter instructions.
*/
for (pc = 0; pc < flen; pc++) {
- fentry = &filter[pc];
+ const struct sock_filter *fentry = &filter[pc];
+ u32 f_k = fentry->k;
switch (fentry->code) {
case BPF_S_ALU_ADD_X:
A += X;
continue;
case BPF_S_ALU_ADD_K:
- A += fentry->k;
+ A += f_k;
continue;
case BPF_S_ALU_SUB_X:
A -= X;
continue;
case BPF_S_ALU_SUB_K:
- A -= fentry->k;
+ A -= f_k;
continue;
case BPF_S_ALU_MUL_X:
A *= X;
continue;
case BPF_S_ALU_MUL_K:
- A *= fentry->k;
+ A *= f_k;
continue;
case BPF_S_ALU_DIV_X:
if (X == 0)
@@ -152,49 +154,49 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
A /= X;
continue;
case BPF_S_ALU_DIV_K:
- A /= fentry->k;
+ A /= f_k;
continue;
case BPF_S_ALU_AND_X:
A &= X;
continue;
case BPF_S_ALU_AND_K:
- A &= fentry->k;
+ A &= f_k;
continue;
case BPF_S_ALU_OR_X:
A |= X;
continue;
case BPF_S_ALU_OR_K:
- A |= fentry->k;
+ A |= f_k;
continue;
case BPF_S_ALU_LSH_X:
A <<= X;
continue;
case BPF_S_ALU_LSH_K:
- A <<= fentry->k;
+ A <<= f_k;
continue;
case BPF_S_ALU_RSH_X:
A >>= X;
continue;
case BPF_S_ALU_RSH_K:
- A >>= fentry->k;
+ A >>= f_k;
continue;
case BPF_S_ALU_NEG:
A = -A;
continue;
case BPF_S_JMP_JA:
- pc += fentry->k;
+ pc += f_k;
continue;
case BPF_S_JMP_JGT_K:
- pc += (A > fentry->k) ? fentry->jt : fentry->jf;
+ pc += (A > f_k) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JGE_K:
- pc += (A >= fentry->k) ? fentry->jt : fentry->jf;
+ pc += (A >= f_k) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JEQ_K:
- pc += (A == fentry->k) ? fentry->jt : fentry->jf;
+ pc += (A == f_k) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JSET_K:
- pc += (A & fentry->k) ? fentry->jt : fentry->jf;
+ pc += (A & f_k) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JGT_X:
pc += (A > X) ? fentry->jt : fentry->jf;
@@ -209,7 +211,7 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
pc += (A & X) ? fentry->jt : fentry->jf;
continue;
case BPF_S_LD_W_ABS:
- k = fentry->k;
+ k = f_k;
load_w:
ptr = load_pointer(skb, k, 4, &tmp);
if (ptr != NULL) {
@@ -218,7 +220,7 @@ load_w:
}
break;
case BPF_S_LD_H_ABS:
- k = fentry->k;
+ k = f_k;
load_h:
ptr = load_pointer(skb, k, 2, &tmp);
if (ptr != NULL) {
@@ -227,7 +229,7 @@ load_h:
}
break;
case BPF_S_LD_B_ABS:
- k = fentry->k;
+ k = f_k;
load_b:
ptr = load_pointer(skb, k, 1, &tmp);
if (ptr != NULL) {
@@ -242,32 +244,34 @@ load_b:
X = skb->len;
continue;
case BPF_S_LD_W_IND:
- k = X + fentry->k;
+ k = X + f_k;
goto load_w;
case BPF_S_LD_H_IND:
- k = X + fentry->k;
+ k = X + f_k;
goto load_h;
case BPF_S_LD_B_IND:
- k = X + fentry->k;
+ k = X + f_k;
goto load_b;
case BPF_S_LDX_B_MSH:
- ptr = load_pointer(skb, fentry->k, 1, &tmp);
+ ptr = load_pointer(skb, f_k, 1, &tmp);
if (ptr != NULL) {
X = (*(u8 *)ptr & 0xf) << 2;
continue;
}
return 0;
case BPF_S_LD_IMM:
- A = fentry->k;
+ A = f_k;
continue;
case BPF_S_LDX_IMM:
- X = fentry->k;
+ X = f_k;
continue;
case BPF_S_LD_MEM:
- A = mem[fentry->k];
+ A = (memvalid & (1UL << f_k)) ?
+ mem[f_k] : 0;
continue;
case BPF_S_LDX_MEM:
- X = mem[fentry->k];
+ X = (memvalid & (1UL << f_k)) ?
+ mem[f_k] : 0;
continue;
case BPF_S_MISC_TAX:
X = A;
@@ -276,14 +280,16 @@ load_b:
A = X;
continue;
case BPF_S_RET_K:
- return fentry->k;
+ return f_k;
case BPF_S_RET_A:
return A;
case BPF_S_ST:
- mem[fentry->k] = A;
+ memvalid |= 1UL << f_k;
+ mem[f_k] = A;
continue;
case BPF_S_STX:
- mem[fentry->k] = X;
+ memvalid |= 1UL << f_k;
+ mem[f_k] = X;
continue;
default:
WARN_ON(1);
diff --git a/net/core/iovec.c b/net/core/iovec.c
index 72aceb1fe4fa..c40f27e7d208 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -35,10 +35,9 @@
* in any case.
*/
-long verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode)
+int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode)
{
- int size, ct;
- long err;
+ int size, ct, err;
if (m->msg_namelen) {
if (mode == VERIFY_READ) {
@@ -62,14 +61,13 @@ long verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address,
err = 0;
for (ct = 0; ct < m->msg_iovlen; ct++) {
- err += iov[ct].iov_len;
- /*
- * Goal is not to verify user data, but to prevent returning
- * negative value, which is interpreted as errno.
- * Overflow is still possible, but it is harmless.
- */
- if (err < 0)
- return -EMSGSIZE;
+ size_t len = iov[ct].iov_len;
+
+ if (len > INT_MAX - err) {
+ len = INT_MAX - err;
+ iov[ct].iov_len = len;
+ }
+ err += len;
}
return err;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index b143173e3eb2..a5ff5a89f376 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -598,7 +598,8 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
}
spin_lock(&rps_map_lock);
- old_map = queue->rps_map;
+ old_map = rcu_dereference_protected(queue->rps_map,
+ lockdep_is_held(&rps_map_lock));
rcu_assign_pointer(queue->rps_map, map);
spin_unlock(&rps_map_lock);
@@ -677,7 +678,8 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
table = NULL;
spin_lock(&rps_dev_flow_lock);
- old_table = queue->rps_flow_table;
+ old_table = rcu_dereference_protected(queue->rps_flow_table,
+ lockdep_is_held(&rps_dev_flow_lock));
rcu_assign_pointer(queue->rps_flow_table, table);
spin_unlock(&rps_dev_flow_lock);
@@ -705,13 +707,17 @@ static void rx_queue_release(struct kobject *kobj)
{
struct netdev_rx_queue *queue = to_rx_queue(kobj);
struct netdev_rx_queue *first = queue->first;
+ struct rps_map *map;
+ struct rps_dev_flow_table *flow_table;
- if (queue->rps_map)
- call_rcu(&queue->rps_map->rcu, rps_map_release);
- if (queue->rps_flow_table)
- call_rcu(&queue->rps_flow_table->rcu,
- rps_dev_flow_table_release);
+ map = rcu_dereference_raw(queue->rps_map);
+ if (map)
+ call_rcu(&map->rcu, rps_map_release);
+
+ flow_table = rcu_dereference_raw(queue->rps_flow_table);
+ if (flow_table)
+ call_rcu(&flow_table->rcu, rps_dev_flow_table_release);
if (atomic_dec_and_test(&first->count))
kfree(first);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index c988e685433a..3f860261c5ee 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -42,7 +42,9 @@ static int net_assign_generic(struct net *net, int id, void *data)
BUG_ON(!mutex_is_locked(&net_mutex));
BUG_ON(id == 0);
- ng = old_ng = net->gen;
+ old_ng = rcu_dereference_protected(net->gen,
+ lockdep_is_held(&net_mutex));
+ ng = old_ng;
if (old_ng->len >= id)
goto assign;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 2c0df0f95b3d..33bc3823ac6f 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -771,10 +771,10 @@ done:
static unsigned long num_arg(const char __user * user_buffer,
unsigned long maxlen, unsigned long *num)
{
- int i = 0;
+ int i;
*num = 0;
- for (; i < maxlen; i++) {
+ for (i = 0; i < maxlen; i++) {
char c;
if (get_user(c, &user_buffer[i]))
return -EFAULT;
@@ -789,9 +789,9 @@ static unsigned long num_arg(const char __user * user_buffer,
static int strn_len(const char __user * user_buffer, unsigned int maxlen)
{
- int i = 0;
+ int i;
- for (; i < maxlen; i++) {
+ for (i = 0; i < maxlen; i++) {
char c;
if (get_user(c, &user_buffer[i]))
return -EFAULT;
@@ -846,7 +846,7 @@ static ssize_t pktgen_if_write(struct file *file,
{
struct seq_file *seq = file->private_data;
struct pktgen_dev *pkt_dev = seq->private;
- int i = 0, max, len;
+ int i, max, len;
char name[16], valstr[32];
unsigned long value = 0;
char *pg_result = NULL;
@@ -860,13 +860,13 @@ static ssize_t pktgen_if_write(struct file *file,
return -EINVAL;
}
- max = count - i;
- tmp = count_trail_chars(&user_buffer[i], max);
+ max = count;
+ tmp = count_trail_chars(user_buffer, max);
if (tmp < 0) {
pr_warning("illegal format\n");
return tmp;
}
- i += tmp;
+ i = tmp;
/* Read variable name */
@@ -887,10 +887,11 @@ static ssize_t pktgen_if_write(struct file *file,
i += len;
if (debug) {
- char tb[count + 1];
- if (copy_from_user(tb, user_buffer, count))
+ size_t copy = min_t(size_t, count, 1023);
+ char tb[copy + 1];
+ if (copy_from_user(tb, user_buffer, copy))
return -EFAULT;
- tb[count] = 0;
+ tb[copy] = 0;
printk(KERN_DEBUG "pktgen: %s,%lu buffer -:%s:-\n", name,
(unsigned long)count, tb);
}
@@ -1764,7 +1765,7 @@ static ssize_t pktgen_thread_write(struct file *file,
{
struct seq_file *seq = file->private_data;
struct pktgen_thread *t = seq->private;
- int i = 0, max, len, ret;
+ int i, max, len, ret;
char name[40];
char *pg_result;
@@ -1773,12 +1774,12 @@ static ssize_t pktgen_thread_write(struct file *file,
return -EINVAL;
}
- max = count - i;
- len = count_trail_chars(&user_buffer[i], max);
+ max = count;
+ len = count_trail_chars(user_buffer, max);
if (len < 0)
return len;
- i += len;
+ i = len;
/* Read variable name */
@@ -1975,7 +1976,7 @@ static struct net_device *pktgen_dev_get_by_name(struct pktgen_dev *pkt_dev,
const char *ifname)
{
char b[IFNAMSIZ+5];
- int i = 0;
+ int i;
for (i = 0; ifname[i] != '@'; i++) {
if (i == IFNAMSIZ)
@@ -2519,8 +2520,8 @@ static void free_SAs(struct pktgen_dev *pkt_dev)
{
if (pkt_dev->cflows) {
/* let go of the SAs if we have them */
- int i = 0;
- for (; i < pkt_dev->cflows; i++) {
+ int i;
+ for (i = 0; i < pkt_dev->cflows; i++) {
struct xfrm_state *x = pkt_dev->flows[i].x;
if (x) {
xfrm_state_put(x);
@@ -2611,8 +2612,8 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
/* Update any of the values, used when we're incrementing various
* fields.
*/
- queue_map = pkt_dev->cur_queue_map;
mod_cur_headers(pkt_dev);
+ queue_map = pkt_dev->cur_queue_map;
datalen = (odev->hard_header_len + 16) & ~0xf;
@@ -2975,8 +2976,8 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
/* Update any of the values, used when we're incrementing various
* fields.
*/
- queue_map = pkt_dev->cur_queue_map;
mod_cur_headers(pkt_dev);
+ queue_map = pkt_dev->cur_queue_map;
skb = __netdev_alloc_skb(odev,
pkt_dev->cur_pkt_size + 64
diff --git a/net/core/sock.c b/net/core/sock.c
index 11db43632df8..fb6080111461 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1225,7 +1225,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
sock_reset_flag(newsk, SOCK_DONE);
skb_queue_head_init(&newsk->sk_error_queue);
- filter = newsk->sk_filter;
+ filter = rcu_dereference_protected(newsk->sk_filter, 1);
if (filter != NULL)
sk_filter_charge(newsk, filter);
@@ -1653,10 +1653,10 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
{
struct proto *prot = sk->sk_prot;
int amt = sk_mem_pages(size);
- int allocated;
+ long allocated;
sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
- allocated = atomic_add_return(amt, prot->memory_allocated);
+ allocated = atomic_long_add_return(amt, prot->memory_allocated);
/* Under limit. */
if (allocated <= prot->sysctl_mem[0]) {
@@ -1714,7 +1714,7 @@ suppress_allocation:
/* Alas. Undo changes. */
sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
- atomic_sub(amt, prot->memory_allocated);
+ atomic_long_sub(amt, prot->memory_allocated);
return 0;
}
EXPORT_SYMBOL(__sk_mem_schedule);
@@ -1727,12 +1727,12 @@ void __sk_mem_reclaim(struct sock *sk)
{
struct proto *prot = sk->sk_prot;
- atomic_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT,
+ atomic_long_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT,
prot->memory_allocated);
sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1;
if (prot->memory_pressure && *prot->memory_pressure &&
- (atomic_read(prot->memory_allocated) < prot->sysctl_mem[0]))
+ (atomic_long_read(prot->memory_allocated) < prot->sysctl_mem[0]))
*prot->memory_pressure = 0;
}
EXPORT_SYMBOL(__sk_mem_reclaim);
@@ -2452,12 +2452,12 @@ static char proto_method_implemented(const void *method)
static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
{
- seq_printf(seq, "%-9s %4u %6d %6d %-3s %6u %-3s %-10s "
+ seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s "
"%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
proto->name,
proto->obj_size,
sock_prot_inuse_get(seq_file_net(seq), proto),
- proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
+ proto->memory_allocated != NULL ? atomic_long_read(proto->memory_allocated) : -1L,
proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
proto->max_header,
proto->slab == NULL ? "no" : "yes",
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 01eee5d984be..385b6095fdc4 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -34,7 +34,8 @@ static int rps_sock_flow_sysctl(ctl_table *table, int write,
mutex_lock(&sock_flow_mutex);
- orig_sock_table = rps_sock_flow_table;
+ orig_sock_table = rcu_dereference_protected(rps_sock_flow_table,
+ lockdep_is_held(&sock_flow_mutex));
size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0;
ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index 117fb093dcaf..75c3582a7678 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -134,13 +134,41 @@ static inline int ccid_get_current_tx_ccid(struct dccp_sock *dp)
extern void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk);
extern void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk);
+/*
+ * Congestion control of queued data packets via CCID decision.
+ *
+ * The TX CCID performs its congestion-control by indicating whether and when a
+ * queued packet may be sent, using the return code of ccid_hc_tx_send_packet().
+ * The following modes are supported via the symbolic constants below:
+ * - timer-based pacing (CCID returns a delay value in milliseconds);
+ * - autonomous dequeueing (CCID internally schedules dccps_xmitlet).
+ */
+
+enum ccid_dequeueing_decision {
+ CCID_PACKET_SEND_AT_ONCE = 0x00000, /* "green light": no delay */
+ CCID_PACKET_DELAY_MAX = 0x0FFFF, /* maximum delay in msecs */
+ CCID_PACKET_DELAY = 0x10000, /* CCID msec-delay mode */
+ CCID_PACKET_WILL_DEQUEUE_LATER = 0x20000, /* CCID autonomous mode */
+ CCID_PACKET_ERR = 0xF0000, /* error condition */
+};
+
+static inline int ccid_packet_dequeue_eval(const int return_code)
+{
+ if (return_code < 0)
+ return CCID_PACKET_ERR;
+ if (return_code == 0)
+ return CCID_PACKET_SEND_AT_ONCE;
+ if (return_code <= CCID_PACKET_DELAY_MAX)
+ return CCID_PACKET_DELAY;
+ return return_code;
+}
+
static inline int ccid_hc_tx_send_packet(struct ccid *ccid, struct sock *sk,
struct sk_buff *skb)
{
- int rc = 0;
if (ccid->ccid_ops->ccid_hc_tx_send_packet != NULL)
- rc = ccid->ccid_ops->ccid_hc_tx_send_packet(sk, skb);
- return rc;
+ return ccid->ccid_ops->ccid_hc_tx_send_packet(sk, skb);
+ return CCID_PACKET_SEND_AT_ONCE;
}
static inline void ccid_hc_tx_packet_sent(struct ccid *ccid, struct sock *sk,
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index d850e291f87c..6576eae9e779 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -78,12 +78,9 @@ static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hc)
static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
{
- struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
-
- if (hc->tx_pipe < hc->tx_cwnd)
- return 0;
-
- return 1; /* XXX CCID should dequeue when ready instead of polling */
+ if (ccid2_cwnd_network_limited(ccid2_hc_tx_sk(sk)))
+ return CCID_PACKET_WILL_DEQUEUE_LATER;
+ return CCID_PACKET_SEND_AT_ONCE;
}
static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
@@ -115,6 +112,7 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
{
struct sock *sk = (struct sock *)data;
struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
+ const bool sender_was_blocked = ccid2_cwnd_network_limited(hc);
bh_lock_sock(sk);
if (sock_owned_by_user(sk)) {
@@ -129,8 +127,6 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
if (hc->tx_rto > DCCP_RTO_MAX)
hc->tx_rto = DCCP_RTO_MAX;
- sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
-
/* adjust pipe, cwnd etc */
hc->tx_ssthresh = hc->tx_cwnd / 2;
if (hc->tx_ssthresh < 2)
@@ -146,6 +142,12 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
hc->tx_rpseq = 0;
hc->tx_rpdupack = -1;
ccid2_change_l_ack_ratio(sk, 1);
+
+ /* if we were blocked before, we may now send cwnd=1 packet */
+ if (sender_was_blocked)
+ tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet);
+ /* restart backed-off timer */
+ sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
out:
bh_unlock_sock(sk);
sock_put(sk);
@@ -434,6 +436,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
+ const bool sender_was_blocked = ccid2_cwnd_network_limited(hc);
u64 ackno, seqno;
struct ccid2_seq *seqp;
unsigned char *vector;
@@ -631,6 +634,10 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
sk_stop_timer(sk, &hc->tx_rtotimer);
else
sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
+
+ /* check if incoming Acks allow pending packets to be sent */
+ if (sender_was_blocked && !ccid2_cwnd_network_limited(hc))
+ tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet);
}
static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index 9731c2dc1487..25cb6b216eda 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -81,6 +81,11 @@ struct ccid2_hc_tx_sock {
u64 tx_high_ack;
};
+static inline bool ccid2_cwnd_network_limited(struct ccid2_hc_tx_sock *hc)
+{
+ return hc->tx_pipe >= hc->tx_cwnd;
+}
+
struct ccid2_hc_rx_sock {
int rx_data;
};
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 3060a60ed5ab..3d604e1349c0 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -268,11 +268,11 @@ out:
sock_put(sk);
}
-/*
- * returns
- * > 0: delay (in msecs) that should pass before actually sending
- * = 0: can send immediately
- * < 0: error condition; do not send packet
+/**
+ * ccid3_hc_tx_send_packet - Delay-based dequeueing of TX packets
+ * @skb: next packet candidate to send on @sk
+ * This function uses the convention of ccid_packet_dequeue_eval() and
+ * returns a millisecond-delay value between 0 and t_mbi = 64000 msec.
*/
static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
{
@@ -348,7 +348,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
/* set the nominal send time for the next following packet */
hc->tx_t_nom = ktime_add_us(hc->tx_t_nom, hc->tx_t_ipi);
- return 0;
+ return CCID_PACKET_SEND_AT_ONCE;
}
static void ccid3_hc_tx_packet_sent(struct sock *sk, unsigned int len)
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 3eb264b60823..a8ed459508b2 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -243,8 +243,9 @@ extern void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
extern void dccp_send_sync(struct sock *sk, const u64 seq,
const enum dccp_pkt_type pkt_type);
-extern void dccp_write_xmit(struct sock *sk, int block);
-extern void dccp_write_space(struct sock *sk);
+extern void dccp_write_xmit(struct sock *sk);
+extern void dccp_write_space(struct sock *sk);
+extern void dccp_flush_write_queue(struct sock *sk, long *time_budget);
extern void dccp_init_xmit_timers(struct sock *sk);
static inline void dccp_clear_xmit_timers(struct sock *sk)
diff --git a/net/dccp/output.c b/net/dccp/output.c
index a988fe9ffcba..45b91853f5ae 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -209,108 +209,150 @@ void dccp_write_space(struct sock *sk)
}
/**
- * dccp_wait_for_ccid - Wait for ccid to tell us we can send a packet
+ * dccp_wait_for_ccid - Await CCID send permission
* @sk: socket to wait for
- * @skb: current skb to pass on for waiting
- * @delay: sleep timeout in milliseconds (> 0)
- * This function is called by default when the socket is closed, and
- * when a non-zero linger time is set on the socket. For consistency
+ * @delay: timeout in jiffies
+ * This is used by CCIDs which need to delay the send time in process context.
*/
-static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, int delay)
+static int dccp_wait_for_ccid(struct sock *sk, unsigned long delay)
{
- struct dccp_sock *dp = dccp_sk(sk);
DEFINE_WAIT(wait);
- unsigned long jiffdelay;
- int rc;
+ long remaining;
+
+ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+ sk->sk_write_pending++;
+ release_sock(sk);
+
+ remaining = schedule_timeout(delay);
+
+ lock_sock(sk);
+ sk->sk_write_pending--;
+ finish_wait(sk_sleep(sk), &wait);
+
+ if (signal_pending(current) || sk->sk_err)
+ return -1;
+ return remaining;
+}
+
+/**
+ * dccp_xmit_packet - Send data packet under control of CCID
+ * Transmits next-queued payload and informs CCID to account for the packet.
+ */
+static void dccp_xmit_packet(struct sock *sk)
+{
+ int err, len;
+ struct dccp_sock *dp = dccp_sk(sk);
+ struct sk_buff *skb = skb_dequeue(&sk->sk_write_queue);
- do {
- dccp_pr_debug("delayed send by %d msec\n", delay);
- jiffdelay = msecs_to_jiffies(delay);
+ if (unlikely(skb == NULL))
+ return;
+ len = skb->len;
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+ if (sk->sk_state == DCCP_PARTOPEN) {
+ const u32 cur_mps = dp->dccps_mss_cache - DCCP_FEATNEG_OVERHEAD;
+ /*
+ * See 8.1.5 - Handshake Completion.
+ *
+ * For robustness we resend Confirm options until the client has
+ * entered OPEN. During the initial feature negotiation, the MPS
+ * is smaller than usual, reduced by the Change/Confirm options.
+ */
+ if (!list_empty(&dp->dccps_featneg) && len > cur_mps) {
+ DCCP_WARN("Payload too large (%d) for featneg.\n", len);
+ dccp_send_ack(sk);
+ dccp_feat_list_purge(&dp->dccps_featneg);
+ }
- sk->sk_write_pending++;
- release_sock(sk);
- schedule_timeout(jiffdelay);
- lock_sock(sk);
- sk->sk_write_pending--;
+ inet_csk_schedule_ack(sk);
+ inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
+ inet_csk(sk)->icsk_rto,
+ DCCP_RTO_MAX);
+ DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATAACK;
+ } else if (dccp_ack_pending(sk)) {
+ DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATAACK;
+ } else {
+ DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATA;
+ }
+
+ err = dccp_transmit_skb(sk, skb);
+ if (err)
+ dccp_pr_debug("transmit_skb() returned err=%d\n", err);
+ /*
+ * Register this one as sent even if an error occurred. To the remote
+ * end a local packet drop is indistinguishable from network loss, i.e.
+ * any local drop will eventually be reported via receiver feedback.
+ */
+ ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len);
+}
- if (sk->sk_err)
- goto do_error;
- if (signal_pending(current))
- goto do_interrupted;
+/**
+ * dccp_flush_write_queue - Drain queue at end of connection
+ * Since dccp_sendmsg queues packets without waiting for them to be sent, it may
+ * happen that the TX queue is not empty at the end of a connection. We give the
+ * HC-sender CCID a grace period of up to @time_budget jiffies. If this function
+ * returns with a non-empty write queue, it will be purged later.
+ */
+void dccp_flush_write_queue(struct sock *sk, long *time_budget)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+ struct sk_buff *skb;
+ long delay, rc;
+ while (*time_budget > 0 && (skb = skb_peek(&sk->sk_write_queue))) {
rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
- } while ((delay = rc) > 0);
-out:
- finish_wait(sk_sleep(sk), &wait);
- return rc;
-
-do_error:
- rc = -EPIPE;
- goto out;
-do_interrupted:
- rc = -EINTR;
- goto out;
+
+ switch (ccid_packet_dequeue_eval(rc)) {
+ case CCID_PACKET_WILL_DEQUEUE_LATER:
+ /*
+ * If the CCID determines when to send, the next sending
+ * time is unknown or the CCID may not even send again
+ * (e.g. remote host crashes or lost Ack packets).
+ */
+ DCCP_WARN("CCID did not manage to send all packets\n");
+ return;
+ case CCID_PACKET_DELAY:
+ delay = msecs_to_jiffies(rc);
+ if (delay > *time_budget)
+ return;
+ rc = dccp_wait_for_ccid(sk, delay);
+ if (rc < 0)
+ return;
+ *time_budget -= (delay - rc);
+ /* check again if we can send now */
+ break;
+ case CCID_PACKET_SEND_AT_ONCE:
+ dccp_xmit_packet(sk);
+ break;
+ case CCID_PACKET_ERR:
+ skb_dequeue(&sk->sk_write_queue);
+ kfree_skb(skb);
+ dccp_pr_debug("packet discarded due to err=%ld\n", rc);
+ }
+ }
}
-void dccp_write_xmit(struct sock *sk, int block)
+void dccp_write_xmit(struct sock *sk)
{
struct dccp_sock *dp = dccp_sk(sk);
struct sk_buff *skb;
while ((skb = skb_peek(&sk->sk_write_queue))) {
- int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
-
- if (err > 0) {
- if (!block) {
- sk_reset_timer(sk, &dp->dccps_xmit_timer,
- msecs_to_jiffies(err)+jiffies);
- break;
- } else
- err = dccp_wait_for_ccid(sk, skb, err);
- if (err && err != -EINTR)
- DCCP_BUG("err=%d after dccp_wait_for_ccid", err);
- }
+ int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
- skb_dequeue(&sk->sk_write_queue);
- if (err == 0) {
- struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
- const int len = skb->len;
-
- if (sk->sk_state == DCCP_PARTOPEN) {
- const u32 cur_mps = dp->dccps_mss_cache - DCCP_FEATNEG_OVERHEAD;
- /*
- * See 8.1.5 - Handshake Completion.
- *
- * For robustness we resend Confirm options until the client has
- * entered OPEN. During the initial feature negotiation, the MPS
- * is smaller than usual, reduced by the Change/Confirm options.
- */
- if (!list_empty(&dp->dccps_featneg) && len > cur_mps) {
- DCCP_WARN("Payload too large (%d) for featneg.\n", len);
- dccp_send_ack(sk);
- dccp_feat_list_purge(&dp->dccps_featneg);
- }
-
- inet_csk_schedule_ack(sk);
- inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
- inet_csk(sk)->icsk_rto,
- DCCP_RTO_MAX);
- dcb->dccpd_type = DCCP_PKT_DATAACK;
- } else if (dccp_ack_pending(sk))
- dcb->dccpd_type = DCCP_PKT_DATAACK;
- else
- dcb->dccpd_type = DCCP_PKT_DATA;
-
- err = dccp_transmit_skb(sk, skb);
- ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len);
- if (err)
- DCCP_BUG("err=%d after ccid_hc_tx_packet_sent",
- err);
- } else {
- dccp_pr_debug("packet discarded due to err=%d\n", err);
+ switch (ccid_packet_dequeue_eval(rc)) {
+ case CCID_PACKET_WILL_DEQUEUE_LATER:
+ return;
+ case CCID_PACKET_DELAY:
+ sk_reset_timer(sk, &dp->dccps_xmit_timer,
+ jiffies + msecs_to_jiffies(rc));
+ return;
+ case CCID_PACKET_SEND_AT_ONCE:
+ dccp_xmit_packet(sk);
+ break;
+ case CCID_PACKET_ERR:
+ skb_dequeue(&sk->sk_write_queue);
kfree_skb(skb);
+ dccp_pr_debug("packet discarded due to err=%d\n", rc);
}
}
}
@@ -622,7 +664,6 @@ void dccp_send_close(struct sock *sk, const int active)
DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_CLOSE;
if (active) {
- dccp_write_xmit(sk, 1);
dccp_skb_entail(sk, skb);
dccp_transmit_skb(sk, skb_clone(skb, prio));
/*
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 7e5fc04eb6d1..ef343d53fcea 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -726,7 +726,13 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
goto out_discard;
skb_queue_tail(&sk->sk_write_queue, skb);
- dccp_write_xmit(sk,0);
+ /*
+ * The xmit_timer is set if the TX CCID is rate-based and will expire
+ * when congestion control permits to release further packets into the
+ * network. Window-based CCIDs do not use this timer.
+ */
+ if (!timer_pending(&dp->dccps_xmit_timer))
+ dccp_write_xmit(sk);
out_release:
release_sock(sk);
return rc ? : len;
@@ -951,9 +957,22 @@ void dccp_close(struct sock *sk, long timeout)
/* Check zero linger _after_ checking for unread data. */
sk->sk_prot->disconnect(sk, 0);
} else if (sk->sk_state != DCCP_CLOSED) {
+ /*
+ * Normal connection termination. May need to wait if there are
+ * still packets in the TX queue that are delayed by the CCID.
+ */
+ dccp_flush_write_queue(sk, &timeout);
dccp_terminate_connection(sk);
}
+ /*
+ * Flush write queue. This may be necessary in several cases:
+ * - we have been closed by the peer but still have application data;
+ * - abortive termination (unread data or zero linger time),
+ * - normal termination but queue could not be flushed within time limit
+ */
+ __skb_queue_purge(&sk->sk_write_queue);
+
sk_stream_wait_close(sk, timeout);
adjudge_to_death:
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 1a9aa05d4dc4..7587870b7040 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -237,32 +237,35 @@ out:
sock_put(sk);
}
-/* Transmit-delay timer: used by the CCIDs to delay actual send time */
-static void dccp_write_xmit_timer(unsigned long data)
+/**
+ * dccp_write_xmitlet - Workhorse for CCID packet dequeueing interface
+ * See the comments above %ccid_dequeueing_decision for supported modes.
+ */
+static void dccp_write_xmitlet(unsigned long data)
{
struct sock *sk = (struct sock *)data;
- struct dccp_sock *dp = dccp_sk(sk);
bh_lock_sock(sk);
if (sock_owned_by_user(sk))
- sk_reset_timer(sk, &dp->dccps_xmit_timer, jiffies+1);
+ sk_reset_timer(sk, &dccp_sk(sk)->dccps_xmit_timer, jiffies + 1);
else
- dccp_write_xmit(sk, 0);
+ dccp_write_xmit(sk);
bh_unlock_sock(sk);
- sock_put(sk);
}
-static void dccp_init_write_xmit_timer(struct sock *sk)
+static void dccp_write_xmit_timer(unsigned long data)
{
- struct dccp_sock *dp = dccp_sk(sk);
-
- setup_timer(&dp->dccps_xmit_timer, dccp_write_xmit_timer,
- (unsigned long)sk);
+ dccp_write_xmitlet(data);
+ sock_put((struct sock *)data);
}
void dccp_init_xmit_timers(struct sock *sk)
{
- dccp_init_write_xmit_timer(sk);
+ struct dccp_sock *dp = dccp_sk(sk);
+
+ tasklet_init(&dp->dccps_xmitlet, dccp_write_xmitlet, (unsigned long)sk);
+ setup_timer(&dp->dccps_xmit_timer, dccp_write_xmit_timer,
+ (unsigned long)sk);
inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer,
&dccp_keepalive_timer);
}
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index d6b93d19790f..a76b78de679f 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -155,7 +155,7 @@ static const struct proto_ops dn_proto_ops;
static DEFINE_RWLOCK(dn_hash_lock);
static struct hlist_head dn_sk_hash[DN_SK_HASH_SIZE];
static struct hlist_head dn_wild_sk;
-static atomic_t decnet_memory_allocated;
+static atomic_long_t decnet_memory_allocated;
static int __dn_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen, int flags);
static int __dn_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen, int flags);
diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c
index be3eb8e23288..28f8b5e5f73b 100644
--- a/net/decnet/sysctl_net_decnet.c
+++ b/net/decnet/sysctl_net_decnet.c
@@ -38,7 +38,7 @@ int decnet_log_martians = 1;
int decnet_no_fc_max_cwnd = NSP_MIN_WINDOW;
/* Reasonable defaults, I hope, based on tcp's defaults */
-int sysctl_decnet_mem[3] = { 768 << 3, 1024 << 3, 1536 << 3 };
+long sysctl_decnet_mem[3] = { 768 << 3, 1024 << 3, 1536 << 3 };
int sysctl_decnet_wmem[3] = { 4 * 1024, 16 * 1024, 128 * 1024 };
int sysctl_decnet_rmem[3] = { 4 * 1024, 87380, 87380 * 2 };
@@ -324,7 +324,7 @@ static ctl_table dn_table[] = {
.data = &sysctl_decnet_mem,
.maxlen = sizeof(sysctl_decnet_mem),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_doulongvec_minmax
},
{
.procname = "decnet_rmem",
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 36e27c2107de..eb6f69a8f27a 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1052,7 +1052,7 @@ static void ip_fib_net_exit(struct net *net)
hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
hlist_del(node);
fib_table_flush(tb);
- kfree(tb);
+ fib_free_table(tb);
}
}
kfree(net->ipv4.fib_table_hash);
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 43e1c594ce8f..b3acb0417b21 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -120,11 +120,12 @@ static inline void fn_rebuild_zone(struct fn_zone *fz,
struct fib_node *f;
hlist_for_each_entry_safe(f, node, n, &old_ht[i], fn_hash) {
- struct hlist_head __rcu *new_head;
+ struct hlist_head *new_head;
hlist_del_rcu(&f->fn_hash);
- new_head = &fz->fz_hash[fn_hash(f->fn_key, fz)];
+ new_head = rcu_dereference_protected(fz->fz_hash, 1) +
+ fn_hash(f->fn_key, fz);
hlist_add_head_rcu(&f->fn_hash, new_head);
}
}
@@ -179,8 +180,8 @@ static void fn_rehash_zone(struct fn_zone *fz)
memcpy(&nfz, fz, sizeof(nfz));
write_seqlock_bh(&fz->fz_lock);
- old_ht = fz->fz_hash;
- nfz.fz_hash = ht;
+ old_ht = rcu_dereference_protected(fz->fz_hash, 1);
+ RCU_INIT_POINTER(nfz.fz_hash, ht);
nfz.fz_hashmask = new_hashmask;
nfz.fz_divisor = new_divisor;
fn_rebuild_zone(&nfz, old_ht, old_divisor);
@@ -236,7 +237,7 @@ fn_new_zone(struct fn_hash *table, int z)
seqlock_init(&fz->fz_lock);
fz->fz_divisor = z ? EMBEDDED_HASH_SIZE : 1;
fz->fz_hashmask = fz->fz_divisor - 1;
- fz->fz_hash = fz->fz_embedded_hash;
+ RCU_INIT_POINTER(fz->fz_hash, fz->fz_embedded_hash);
fz->fz_order = z;
fz->fz_revorder = 32 - z;
fz->fz_mask = inet_make_mask(z);
@@ -272,7 +273,7 @@ int fib_table_lookup(struct fib_table *tb,
for (fz = rcu_dereference(t->fn_zone_list);
fz != NULL;
fz = rcu_dereference(fz->fz_next)) {
- struct hlist_head __rcu *head;
+ struct hlist_head *head;
struct hlist_node *node;
struct fib_node *f;
__be32 k;
@@ -282,7 +283,7 @@ int fib_table_lookup(struct fib_table *tb,
seq = read_seqbegin(&fz->fz_lock);
k = fz_key(flp->fl4_dst, fz);
- head = &fz->fz_hash[fn_hash(k, fz)];
+ head = rcu_dereference(fz->fz_hash) + fn_hash(k, fz);
hlist_for_each_entry_rcu(f, node, head, fn_hash) {
if (f->fn_key != k)
continue;
@@ -311,6 +312,7 @@ void fib_table_select_default(struct fib_table *tb,
struct fib_info *last_resort;
struct fn_hash *t = (struct fn_hash *)tb->tb_data;
struct fn_zone *fz = t->fn_zones[0];
+ struct hlist_head *head;
if (fz == NULL)
return;
@@ -320,7 +322,8 @@ void fib_table_select_default(struct fib_table *tb,
order = -1;
rcu_read_lock();
- hlist_for_each_entry_rcu(f, node, &fz->fz_hash[0], fn_hash) {
+ head = rcu_dereference(fz->fz_hash);
+ hlist_for_each_entry_rcu(f, node, head, fn_hash) {
struct fib_alias *fa;
list_for_each_entry_rcu(fa, &f->fn_alias, fa_list) {
@@ -374,7 +377,7 @@ out:
/* Insert node F to FZ. */
static inline void fib_insert_node(struct fn_zone *fz, struct fib_node *f)
{
- struct hlist_head *head = &fz->fz_hash[fn_hash(f->fn_key, fz)];
+ struct hlist_head *head = rtnl_dereference(fz->fz_hash) + fn_hash(f->fn_key, fz);
hlist_add_head_rcu(&f->fn_hash, head);
}
@@ -382,7 +385,7 @@ static inline void fib_insert_node(struct fn_zone *fz, struct fib_node *f)
/* Return the node in FZ matching KEY. */
static struct fib_node *fib_find_node(struct fn_zone *fz, __be32 key)
{
- struct hlist_head *head = &fz->fz_hash[fn_hash(key, fz)];
+ struct hlist_head *head = rtnl_dereference(fz->fz_hash) + fn_hash(key, fz);
struct hlist_node *node;
struct fib_node *f;
@@ -662,7 +665,7 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
static int fn_flush_list(struct fn_zone *fz, int idx)
{
- struct hlist_head *head = &fz->fz_hash[idx];
+ struct hlist_head *head = rtnl_dereference(fz->fz_hash) + idx;
struct hlist_node *node, *n;
struct fib_node *f;
int found = 0;
@@ -713,6 +716,24 @@ int fib_table_flush(struct fib_table *tb)
return found;
}
+void fib_free_table(struct fib_table *tb)
+{
+ struct fn_hash *table = (struct fn_hash *) tb->tb_data;
+ struct fn_zone *fz, *next;
+
+ next = table->fn_zone_list;
+ while (next != NULL) {
+ fz = next;
+ next = fz->fz_next;
+
+ if (fz->fz_hash != fz->fz_embedded_hash)
+ fz_hash_free(fz->fz_hash, fz->fz_divisor);
+
+ kfree(fz);
+ }
+
+ kfree(tb);
+}
static inline int
fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
@@ -761,14 +782,15 @@ fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb,
struct fn_zone *fz)
{
int h, s_h;
+ struct hlist_head *head = rcu_dereference(fz->fz_hash);
- if (fz->fz_hash == NULL)
+ if (head == NULL)
return skb->len;
s_h = cb->args[3];
for (h = s_h; h < fz->fz_divisor; h++) {
- if (hlist_empty(&fz->fz_hash[h]))
+ if (hlist_empty(head + h))
continue;
- if (fn_hash_dump_bucket(skb, cb, tb, fz, &fz->fz_hash[h]) < 0) {
+ if (fn_hash_dump_bucket(skb, cb, tb, fz, head + h) < 0) {
cb->args[3] = h;
return -1;
}
@@ -872,7 +894,7 @@ static struct fib_alias *fib_get_first(struct seq_file *seq)
if (!iter->zone->fz_nent)
continue;
- iter->hash_head = iter->zone->fz_hash;
+ iter->hash_head = rcu_dereference(iter->zone->fz_hash);
maxslot = iter->zone->fz_divisor;
for (iter->bucket = 0; iter->bucket < maxslot;
@@ -957,7 +979,7 @@ static struct fib_alias *fib_get_next(struct seq_file *seq)
goto out;
iter->bucket = 0;
- iter->hash_head = iter->zone->fz_hash;
+ iter->hash_head = rcu_dereference(iter->zone->fz_hash);
hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) {
list_for_each_entry(fa, &fn->fn_alias, fa_list) {
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index a29edf2219c8..c079cc0ec651 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -47,11 +47,8 @@ extern int fib_detect_death(struct fib_info *fi, int order,
static inline void fib_result_assign(struct fib_result *res,
struct fib_info *fi)
{
- if (res->fi != NULL)
- fib_info_put(res->fi);
+ /* we used to play games with refcounts, but we now use RCU */
res->fi = fi;
- if (fi != NULL)
- atomic_inc(&fi->fib_clntref);
}
#endif /* _FIB_LOOKUP_H */
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index b14450895102..200eb538fbb3 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1797,6 +1797,11 @@ int fib_table_flush(struct fib_table *tb)
return found;
}
+void fib_free_table(struct fib_table *tb)
+{
+ kfree(tb);
+}
+
void fib_table_select_default(struct fib_table *tb,
const struct flowi *flp,
struct fib_result *res)
diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c
index caea6885fdbd..c6933f2ea310 100644
--- a/net/ipv4/gre.c
+++ b/net/ipv4/gre.c
@@ -22,7 +22,7 @@
#include <net/gre.h>
-static const struct gre_protocol *gre_proto[GREPROTO_MAX] __read_mostly;
+static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly;
static DEFINE_SPINLOCK(gre_proto_lock);
int gre_add_protocol(const struct gre_protocol *proto, u8 version)
@@ -51,7 +51,8 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version)
goto err_out;
spin_lock(&gre_proto_lock);
- if (gre_proto[version] != proto)
+ if (rcu_dereference_protected(gre_proto[version],
+ lockdep_is_held(&gre_proto_lock)) != proto)
goto err_out_unlock;
rcu_assign_pointer(gre_proto[version], NULL);
spin_unlock(&gre_proto_lock);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index c8877c6c7216..3c53c2d89e3b 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2306,10 +2306,8 @@ void ip_mc_drop_socket(struct sock *sk)
in_dev = inetdev_by_index(net, iml->multi.imr_ifindex);
(void) ip_mc_leave_src(sk, iml, in_dev);
- if (in_dev != NULL) {
+ if (in_dev != NULL)
ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr);
- in_dev_put(in_dev);
- }
/* decrease mem now to avoid the memleak warning */
atomic_sub(sizeof(*iml), &sk->sk_omem_alloc);
call_rcu(&iml->rcu, ip_mc_socklist_reclaim);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index ba8042665849..2ada17129fce 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -490,9 +490,11 @@ static int inet_csk_diag_dump(struct sock *sk,
{
struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
- if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) {
+ if (nlmsg_attrlen(cb->nlh, sizeof(*r))) {
struct inet_diag_entry entry;
- struct rtattr *bc = (struct rtattr *)(r + 1);
+ const struct nlattr *bc = nlmsg_find_attr(cb->nlh,
+ sizeof(*r),
+ INET_DIAG_REQ_BYTECODE);
struct inet_sock *inet = inet_sk(sk);
entry.family = sk->sk_family;
@@ -512,7 +514,7 @@ static int inet_csk_diag_dump(struct sock *sk,
entry.dport = ntohs(inet->inet_dport);
entry.userlocks = sk->sk_userlocks;
- if (!inet_diag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry))
+ if (!inet_diag_bc_run(nla_data(bc), nla_len(bc), &entry))
return 0;
}
@@ -527,9 +529,11 @@ static int inet_twsk_diag_dump(struct inet_timewait_sock *tw,
{
struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
- if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) {
+ if (nlmsg_attrlen(cb->nlh, sizeof(*r))) {
struct inet_diag_entry entry;
- struct rtattr *bc = (struct rtattr *)(r + 1);
+ const struct nlattr *bc = nlmsg_find_attr(cb->nlh,
+ sizeof(*r),
+ INET_DIAG_REQ_BYTECODE);
entry.family = tw->tw_family;
#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
@@ -548,7 +552,7 @@ static int inet_twsk_diag_dump(struct inet_timewait_sock *tw,
entry.dport = ntohs(tw->tw_dport);
entry.userlocks = 0;
- if (!inet_diag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry))
+ if (!inet_diag_bc_run(nla_data(bc), nla_len(bc), &entry))
return 0;
}
@@ -618,7 +622,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
struct inet_connection_sock *icsk = inet_csk(sk);
struct listen_sock *lopt;
- struct rtattr *bc = NULL;
+ const struct nlattr *bc = NULL;
struct inet_sock *inet = inet_sk(sk);
int j, s_j;
int reqnum, s_reqnum;
@@ -638,8 +642,9 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
if (!lopt || !lopt->qlen)
goto out;
- if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) {
- bc = (struct rtattr *)(r + 1);
+ if (nlmsg_attrlen(cb->nlh, sizeof(*r))) {
+ bc = nlmsg_find_attr(cb->nlh, sizeof(*r),
+ INET_DIAG_REQ_BYTECODE);
entry.sport = inet->inet_num;
entry.userlocks = sk->sk_userlocks;
}
@@ -672,8 +677,8 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
&ireq->rmt_addr;
entry.dport = ntohs(ireq->rmt_port);
- if (!inet_diag_bc_run(RTA_DATA(bc),
- RTA_PAYLOAD(bc), &entry))
+ if (!inet_diag_bc_run(nla_data(bc),
+ nla_len(bc), &entry))
continue;
}
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 9ffa24b9a804..9e94d7cf4f8a 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -72,18 +72,19 @@ static struct kmem_cache *peer_cachep __read_mostly;
#define node_height(x) x->avl_height
#define peer_avl_empty ((struct inet_peer *)&peer_fake_node)
+#define peer_avl_empty_rcu ((struct inet_peer __rcu __force *)&peer_fake_node)
static const struct inet_peer peer_fake_node = {
- .avl_left = peer_avl_empty,
- .avl_right = peer_avl_empty,
+ .avl_left = peer_avl_empty_rcu,
+ .avl_right = peer_avl_empty_rcu,
.avl_height = 0
};
static struct {
- struct inet_peer *root;
+ struct inet_peer __rcu *root;
spinlock_t lock;
int total;
} peers = {
- .root = peer_avl_empty,
+ .root = peer_avl_empty_rcu,
.lock = __SPIN_LOCK_UNLOCKED(peers.lock),
.total = 0,
};
@@ -156,11 +157,14 @@ static void unlink_from_unused(struct inet_peer *p)
*/
#define lookup(_daddr, _stack) \
({ \
- struct inet_peer *u, **v; \
+ struct inet_peer *u; \
+ struct inet_peer __rcu **v; \
\
stackptr = _stack; \
*stackptr++ = &peers.root; \
- for (u = peers.root; u != peer_avl_empty; ) { \
+ for (u = rcu_dereference_protected(peers.root, \
+ lockdep_is_held(&peers.lock)); \
+ u != peer_avl_empty; ) { \
if (_daddr == u->v4daddr) \
break; \
if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \
@@ -168,7 +172,8 @@ static void unlink_from_unused(struct inet_peer *p)
else \
v = &u->avl_right; \
*stackptr++ = v; \
- u = *v; \
+ u = rcu_dereference_protected(*v, \
+ lockdep_is_held(&peers.lock)); \
} \
u; \
})
@@ -209,13 +214,17 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr)
/* Called with local BH disabled and the pool lock held. */
#define lookup_rightempty(start) \
({ \
- struct inet_peer *u, **v; \
+ struct inet_peer *u; \
+ struct inet_peer __rcu **v; \
*stackptr++ = &start->avl_left; \
v = &start->avl_left; \
- for (u = *v; u->avl_right != peer_avl_empty; ) { \
+ for (u = rcu_dereference_protected(*v, \
+ lockdep_is_held(&peers.lock)); \
+ u->avl_right != peer_avl_empty_rcu; ) { \
v = &u->avl_right; \
*stackptr++ = v; \
- u = *v; \
+ u = rcu_dereference_protected(*v, \
+ lockdep_is_held(&peers.lock)); \
} \
u; \
})
@@ -224,74 +233,86 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr)
* Variable names are the proof of operation correctness.
* Look into mm/map_avl.c for more detail description of the ideas.
*/
-static void peer_avl_rebalance(struct inet_peer **stack[],
- struct inet_peer ***stackend)
+static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
+ struct inet_peer __rcu ***stackend)
{
- struct inet_peer **nodep, *node, *l, *r;
+ struct inet_peer __rcu **nodep;
+ struct inet_peer *node, *l, *r;
int lh, rh;
while (stackend > stack) {
nodep = *--stackend;
- node = *nodep;
- l = node->avl_left;
- r = node->avl_right;
+ node = rcu_dereference_protected(*nodep,
+ lockdep_is_held(&peers.lock));
+ l = rcu_dereference_protected(node->avl_left,
+ lockdep_is_held(&peers.lock));
+ r = rcu_dereference_protected(node->avl_right,
+ lockdep_is_held(&peers.lock));
lh = node_height(l);
rh = node_height(r);
if (lh > rh + 1) { /* l: RH+2 */
struct inet_peer *ll, *lr, *lrl, *lrr;
int lrh;
- ll = l->avl_left;
- lr = l->avl_right;
+ ll = rcu_dereference_protected(l->avl_left,
+ lockdep_is_held(&peers.lock));
+ lr = rcu_dereference_protected(l->avl_right,
+ lockdep_is_held(&peers.lock));
lrh = node_height(lr);
if (lrh <= node_height(ll)) { /* ll: RH+1 */
- node->avl_left = lr; /* lr: RH or RH+1 */
- node->avl_right = r; /* r: RH */
+ RCU_INIT_POINTER(node->avl_left, lr); /* lr: RH or RH+1 */
+ RCU_INIT_POINTER(node->avl_right, r); /* r: RH */
node->avl_height = lrh + 1; /* RH+1 or RH+2 */
- l->avl_left = ll; /* ll: RH+1 */
- l->avl_right = node; /* node: RH+1 or RH+2 */
+ RCU_INIT_POINTER(l->avl_left, ll); /* ll: RH+1 */
+ RCU_INIT_POINTER(l->avl_right, node); /* node: RH+1 or RH+2 */
l->avl_height = node->avl_height + 1;
- *nodep = l;
+ RCU_INIT_POINTER(*nodep, l);
} else { /* ll: RH, lr: RH+1 */
- lrl = lr->avl_left; /* lrl: RH or RH-1 */
- lrr = lr->avl_right; /* lrr: RH or RH-1 */
- node->avl_left = lrr; /* lrr: RH or RH-1 */
- node->avl_right = r; /* r: RH */
+ lrl = rcu_dereference_protected(lr->avl_left,
+ lockdep_is_held(&peers.lock)); /* lrl: RH or RH-1 */
+ lrr = rcu_dereference_protected(lr->avl_right,
+ lockdep_is_held(&peers.lock)); /* lrr: RH or RH-1 */
+ RCU_INIT_POINTER(node->avl_left, lrr); /* lrr: RH or RH-1 */
+ RCU_INIT_POINTER(node->avl_right, r); /* r: RH */
node->avl_height = rh + 1; /* node: RH+1 */
- l->avl_left = ll; /* ll: RH */
- l->avl_right = lrl; /* lrl: RH or RH-1 */
+ RCU_INIT_POINTER(l->avl_left, ll); /* ll: RH */
+ RCU_INIT_POINTER(l->avl_right, lrl); /* lrl: RH or RH-1 */
l->avl_height = rh + 1; /* l: RH+1 */
- lr->avl_left = l; /* l: RH+1 */
- lr->avl_right = node; /* node: RH+1 */
+ RCU_INIT_POINTER(lr->avl_left, l); /* l: RH+1 */
+ RCU_INIT_POINTER(lr->avl_right, node); /* node: RH+1 */
lr->avl_height = rh + 2;
- *nodep = lr;
+ RCU_INIT_POINTER(*nodep, lr);
}
} else if (rh > lh + 1) { /* r: LH+2 */
struct inet_peer *rr, *rl, *rlr, *rll;
int rlh;
- rr = r->avl_right;
- rl = r->avl_left;
+ rr = rcu_dereference_protected(r->avl_right,
+ lockdep_is_held(&peers.lock));
+ rl = rcu_dereference_protected(r->avl_left,
+ lockdep_is_held(&peers.lock));
rlh = node_height(rl);
if (rlh <= node_height(rr)) { /* rr: LH+1 */
- node->avl_right = rl; /* rl: LH or LH+1 */
- node->avl_left = l; /* l: LH */
+ RCU_INIT_POINTER(node->avl_right, rl); /* rl: LH or LH+1 */
+ RCU_INIT_POINTER(node->avl_left, l); /* l: LH */
node->avl_height = rlh + 1; /* LH+1 or LH+2 */
- r->avl_right = rr; /* rr: LH+1 */
- r->avl_left = node; /* node: LH+1 or LH+2 */
+ RCU_INIT_POINTER(r->avl_right, rr); /* rr: LH+1 */
+ RCU_INIT_POINTER(r->avl_left, node); /* node: LH+1 or LH+2 */
r->avl_height = node->avl_height + 1;
- *nodep = r;
+ RCU_INIT_POINTER(*nodep, r);
} else { /* rr: RH, rl: RH+1 */
- rlr = rl->avl_right; /* rlr: LH or LH-1 */
- rll = rl->avl_left; /* rll: LH or LH-1 */
- node->avl_right = rll; /* rll: LH or LH-1 */
- node->avl_left = l; /* l: LH */
+ rlr = rcu_dereference_protected(rl->avl_right,
+ lockdep_is_held(&peers.lock)); /* rlr: LH or LH-1 */
+ rll = rcu_dereference_protected(rl->avl_left,
+ lockdep_is_held(&peers.lock)); /* rll: LH or LH-1 */
+ RCU_INIT_POINTER(node->avl_right, rll); /* rll: LH or LH-1 */
+ RCU_INIT_POINTER(node->avl_left, l); /* l: LH */
node->avl_height = lh + 1; /* node: LH+1 */
- r->avl_right = rr; /* rr: LH */
- r->avl_left = rlr; /* rlr: LH or LH-1 */
+ RCU_INIT_POINTER(r->avl_right, rr); /* rr: LH */
+ RCU_INIT_POINTER(r->avl_left, rlr); /* rlr: LH or LH-1 */
r->avl_height = lh + 1; /* r: LH+1 */
- rl->avl_right = r; /* r: LH+1 */
- rl->avl_left = node; /* node: LH+1 */
+ RCU_INIT_POINTER(rl->avl_right, r); /* r: LH+1 */
+ RCU_INIT_POINTER(rl->avl_left, node); /* node: LH+1 */
rl->avl_height = lh + 2;
- *nodep = rl;
+ RCU_INIT_POINTER(*nodep, rl);
}
} else {
node->avl_height = (lh > rh ? lh : rh) + 1;
@@ -303,10 +324,10 @@ static void peer_avl_rebalance(struct inet_peer **stack[],
#define link_to_pool(n) \
do { \
n->avl_height = 1; \
- n->avl_left = peer_avl_empty; \
- n->avl_right = peer_avl_empty; \
- smp_wmb(); /* lockless readers can catch us now */ \
- **--stackptr = n; \
+ n->avl_left = peer_avl_empty_rcu; \
+ n->avl_right = peer_avl_empty_rcu; \
+ /* lockless readers can catch us now */ \
+ rcu_assign_pointer(**--stackptr, n); \
peer_avl_rebalance(stack, stackptr); \
} while (0)
@@ -330,24 +351,25 @@ static void unlink_from_pool(struct inet_peer *p)
* We use refcnt=-1 to alert lockless readers this entry is deleted.
*/
if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) {
- struct inet_peer **stack[PEER_MAXDEPTH];
- struct inet_peer ***stackptr, ***delp;
+ struct inet_peer __rcu **stack[PEER_MAXDEPTH];
+ struct inet_peer __rcu ***stackptr, ***delp;
if (lookup(p->v4daddr, stack) != p)
BUG();
delp = stackptr - 1; /* *delp[0] == p */
- if (p->avl_left == peer_avl_empty) {
+ if (p->avl_left == peer_avl_empty_rcu) {
*delp[0] = p->avl_right;
--stackptr;
} else {
/* look for a node to insert instead of p */
struct inet_peer *t;
t = lookup_rightempty(p);
- BUG_ON(*stackptr[-1] != t);
+ BUG_ON(rcu_dereference_protected(*stackptr[-1],
+ lockdep_is_held(&peers.lock)) != t);
**--stackptr = t->avl_left;
/* t is removed, t->v4daddr > x->v4daddr for any
* x in p->avl_left subtree.
* Put t in the old place of p. */
- *delp[0] = t;
+ RCU_INIT_POINTER(*delp[0], t);
t->avl_left = p->avl_left;
t->avl_right = p->avl_right;
t->avl_height = p->avl_height;
@@ -414,7 +436,7 @@ static int cleanup_once(unsigned long ttl)
struct inet_peer *inet_getpeer(__be32 daddr, int create)
{
struct inet_peer *p;
- struct inet_peer **stack[PEER_MAXDEPTH], ***stackptr;
+ struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr;
/* Look up for the address quickly, lockless.
* Because of a concurrent writer, we might not find an existing entry.
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index d0ffcbe369b7..70ff77f02eee 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1072,6 +1072,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
break;
}
ipgre_tunnel_unlink(ign, t);
+ synchronize_net();
t->parms.iph.saddr = p.iph.saddr;
t->parms.iph.daddr = p.iph.daddr;
t->parms.i_key = p.i_key;
@@ -1324,7 +1325,6 @@ static void ipgre_fb_tunnel_init(struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
struct iphdr *iph = &tunnel->parms.iph;
- struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
tunnel->dev = dev;
strcpy(tunnel->parms.name, dev->name);
@@ -1335,7 +1335,6 @@ static void ipgre_fb_tunnel_init(struct net_device *dev)
tunnel->hlen = sizeof(struct iphdr) + 4;
dev_hold(dev);
- rcu_assign_pointer(ign->tunnels_wc[0], tunnel);
}
@@ -1382,10 +1381,12 @@ static int __net_init ipgre_init_net(struct net *net)
if ((err = register_netdev(ign->fb_tunnel_dev)))
goto err_reg_dev;
+ rcu_assign_pointer(ign->tunnels_wc[0],
+ netdev_priv(ign->fb_tunnel_dev));
return 0;
err_reg_dev:
- free_netdev(ign->fb_tunnel_dev);
+ ipgre_dev_free(ign->fb_tunnel_dev);
err_alloc_dev:
return err;
}
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 64b70ad162e3..3948c86e59ca 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -238,7 +238,7 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc)
but receiver should be enough clever f.e. to forward mtrace requests,
sent to multicast group to reach destination designated router.
*/
-struct ip_ra_chain *ip_ra_chain;
+struct ip_ra_chain __rcu *ip_ra_chain;
static DEFINE_SPINLOCK(ip_ra_lock);
@@ -253,7 +253,8 @@ static void ip_ra_destroy_rcu(struct rcu_head *head)
int ip_ra_control(struct sock *sk, unsigned char on,
void (*destructor)(struct sock *))
{
- struct ip_ra_chain *ra, *new_ra, **rap;
+ struct ip_ra_chain *ra, *new_ra;
+ struct ip_ra_chain __rcu **rap;
if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num == IPPROTO_RAW)
return -EINVAL;
@@ -261,7 +262,10 @@ int ip_ra_control(struct sock *sk, unsigned char on,
new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
spin_lock_bh(&ip_ra_lock);
- for (rap = &ip_ra_chain; (ra = *rap) != NULL; rap = &ra->next) {
+ for (rap = &ip_ra_chain;
+ (ra = rcu_dereference_protected(*rap,
+ lockdep_is_held(&ip_ra_lock))) != NULL;
+ rap = &ra->next) {
if (ra->sk == sk) {
if (on) {
spin_unlock_bh(&ip_ra_lock);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index e9b816e6cd73..cd300aaee78f 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -676,6 +676,7 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
}
t = netdev_priv(dev);
ipip_tunnel_unlink(ipn, t);
+ synchronize_net();
t->parms.iph.saddr = p.iph.saddr;
t->parms.iph.daddr = p.iph.daddr;
memcpy(dev->dev_addr, &p.iph.saddr, 4);
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 3cad2591ace0..3fac340a28d5 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -927,6 +927,7 @@ static int get_info(struct net *net, void __user *user,
private = &tmp;
}
#endif
+ memset(&info, 0, sizeof(info));
info.valid_hooks = t->valid_hooks;
memcpy(info.hook_entry, private->hook_entry,
sizeof(info.hook_entry));
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index d31b007a6d80..a846d633b3b6 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1124,6 +1124,7 @@ static int get_info(struct net *net, void __user *user,
private = &tmp;
}
#endif
+ memset(&info, 0, sizeof(info));
info.valid_hooks = t->valid_hooks;
memcpy(info.hook_entry, private->hook_entry,
sizeof(info.hook_entry));
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 295c97431e43..c04787ce1a71 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -47,26 +47,6 @@ __nf_nat_proto_find(u_int8_t protonum)
return rcu_dereference(nf_nat_protos[protonum]);
}
-static const struct nf_nat_protocol *
-nf_nat_proto_find_get(u_int8_t protonum)
-{
- const struct nf_nat_protocol *p;
-
- rcu_read_lock();
- p = __nf_nat_proto_find(protonum);
- if (!try_module_get(p->me))
- p = &nf_nat_unknown_protocol;
- rcu_read_unlock();
-
- return p;
-}
-
-static void
-nf_nat_proto_put(const struct nf_nat_protocol *p)
-{
- module_put(p->me);
-}
-
/* We keep an extra hash for each conntrack, for fast searching. */
static inline unsigned int
hash_by_src(const struct net *net, u16 zone,
@@ -588,6 +568,26 @@ static struct nf_ct_ext_type nat_extend __read_mostly = {
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_conntrack.h>
+static const struct nf_nat_protocol *
+nf_nat_proto_find_get(u_int8_t protonum)
+{
+ const struct nf_nat_protocol *p;
+
+ rcu_read_lock();
+ p = __nf_nat_proto_find(protonum);
+ if (!try_module_get(p->me))
+ p = &nf_nat_unknown_protocol;
+ rcu_read_unlock();
+
+ return p;
+}
+
+static void
+nf_nat_proto_put(const struct nf_nat_protocol *p)
+{
+ module_put(p->me);
+}
+
static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = {
[CTA_PROTONAT_PORT_MIN] = { .type = NLA_U16 },
[CTA_PROTONAT_PORT_MAX] = { .type = NLA_U16 },
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 4ae1f203f7cb..1b48eb1ed453 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -59,13 +59,13 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
local_bh_enable();
socket_seq_show(seq);
- seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n",
+ seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n",
sock_prot_inuse_get(net, &tcp_prot), orphans,
tcp_death_row.tw_count, sockets,
- atomic_read(&tcp_memory_allocated));
- seq_printf(seq, "UDP: inuse %d mem %d\n",
+ atomic_long_read(&tcp_memory_allocated));
+ seq_printf(seq, "UDP: inuse %d mem %ld\n",
sock_prot_inuse_get(net, &udp_prot),
- atomic_read(&udp_memory_allocated));
+ atomic_long_read(&udp_memory_allocated));
seq_printf(seq, "UDPLITE: inuse %d\n",
sock_prot_inuse_get(net, &udplite_prot));
seq_printf(seq, "RAW: inuse %d\n",
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index 65699c24411c..9ae5c01cd0b2 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -28,7 +28,7 @@
#include <linux/spinlock.h>
#include <net/protocol.h>
-const struct net_protocol *inet_protos[MAX_INET_PROTOS] __read_mostly;
+const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly;
/*
* Add a protocol handler to the hash tables
@@ -38,7 +38,8 @@ int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol)
{
int hash = protocol & (MAX_INET_PROTOS - 1);
- return !cmpxchg(&inet_protos[hash], NULL, prot) ? 0 : -1;
+ return !cmpxchg((const struct net_protocol **)&inet_protos[hash],
+ NULL, prot) ? 0 : -1;
}
EXPORT_SYMBOL(inet_add_protocol);
@@ -50,7 +51,8 @@ int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol)
{
int ret, hash = protocol & (MAX_INET_PROTOS - 1);
- ret = (cmpxchg(&inet_protos[hash], prot, NULL) == prot) ? 0 : -1;
+ ret = (cmpxchg((const struct net_protocol **)&inet_protos[hash],
+ prot, NULL) == prot) ? 0 : -1;
synchronize_net();
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d6cb2bfcd8e1..987bf9adb318 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -198,7 +198,7 @@ const __u8 ip_tos2prio[16] = {
*/
struct rt_hash_bucket {
- struct rtable *chain;
+ struct rtable __rcu *chain;
};
#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \
@@ -280,7 +280,7 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq)
struct rtable *r = NULL;
for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) {
- if (!rt_hash_table[st->bucket].chain)
+ if (!rcu_dereference_raw(rt_hash_table[st->bucket].chain))
continue;
rcu_read_lock_bh();
r = rcu_dereference_bh(rt_hash_table[st->bucket].chain);
@@ -300,17 +300,17 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq,
{
struct rt_cache_iter_state *st = seq->private;
- r = r->dst.rt_next;
+ r = rcu_dereference_bh(r->dst.rt_next);
while (!r) {
rcu_read_unlock_bh();
do {
if (--st->bucket < 0)
return NULL;
- } while (!rt_hash_table[st->bucket].chain);
+ } while (!rcu_dereference_raw(rt_hash_table[st->bucket].chain));
rcu_read_lock_bh();
- r = rt_hash_table[st->bucket].chain;
+ r = rcu_dereference_bh(rt_hash_table[st->bucket].chain);
}
- return rcu_dereference_bh(r);
+ return r;
}
static struct rtable *rt_cache_get_next(struct seq_file *seq,
@@ -721,19 +721,23 @@ static void rt_do_flush(int process_context)
for (i = 0; i <= rt_hash_mask; i++) {
if (process_context && need_resched())
cond_resched();
- rth = rt_hash_table[i].chain;
+ rth = rcu_dereference_raw(rt_hash_table[i].chain);
if (!rth)
continue;
spin_lock_bh(rt_hash_lock_addr(i));
#ifdef CONFIG_NET_NS
{
- struct rtable ** prev, * p;
+ struct rtable __rcu **prev;
+ struct rtable *p;
- rth = rt_hash_table[i].chain;
+ rth = rcu_dereference_protected(rt_hash_table[i].chain,
+ lockdep_is_held(rt_hash_lock_addr(i)));
/* defer releasing the head of the list after spin_unlock */
- for (tail = rth; tail; tail = tail->dst.rt_next)
+ for (tail = rth; tail;
+ tail = rcu_dereference_protected(tail->dst.rt_next,
+ lockdep_is_held(rt_hash_lock_addr(i))))
if (!rt_is_expired(tail))
break;
if (rth != tail)
@@ -741,8 +745,12 @@ static void rt_do_flush(int process_context)
/* call rt_free on entries after the tail requiring flush */
prev = &rt_hash_table[i].chain;
- for (p = *prev; p; p = next) {
- next = p->dst.rt_next;
+ for (p = rcu_dereference_protected(*prev,
+ lockdep_is_held(rt_hash_lock_addr(i)));
+ p != NULL;
+ p = next) {
+ next = rcu_dereference_protected(p->dst.rt_next,
+ lockdep_is_held(rt_hash_lock_addr(i)));
if (!rt_is_expired(p)) {
prev = &p->dst.rt_next;
} else {
@@ -752,14 +760,15 @@ static void rt_do_flush(int process_context)
}
}
#else
- rth = rt_hash_table[i].chain;
- rt_hash_table[i].chain = NULL;
+ rth = rcu_dereference_protected(rt_hash_table[i].chain,
+ lockdep_is_held(rt_hash_lock_addr(i)));
+ rcu_assign_pointer(rt_hash_table[i].chain, NULL);
tail = NULL;
#endif
spin_unlock_bh(rt_hash_lock_addr(i));
for (; rth != tail; rth = next) {
- next = rth->dst.rt_next;
+ next = rcu_dereference_protected(rth->dst.rt_next, 1);
rt_free(rth);
}
}
@@ -790,7 +799,7 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth)
while (aux != rth) {
if (compare_hash_inputs(&aux->fl, &rth->fl))
return 0;
- aux = aux->dst.rt_next;
+ aux = rcu_dereference_protected(aux->dst.rt_next, 1);
}
return ONE;
}
@@ -799,7 +808,8 @@ static void rt_check_expire(void)
{
static unsigned int rover;
unsigned int i = rover, goal;
- struct rtable *rth, **rthp;
+ struct rtable *rth;
+ struct rtable __rcu **rthp;
unsigned long samples = 0;
unsigned long sum = 0, sum2 = 0;
unsigned long delta;
@@ -825,11 +835,12 @@ static void rt_check_expire(void)
samples++;
- if (*rthp == NULL)
+ if (rcu_dereference_raw(*rthp) == NULL)
continue;
length = 0;
spin_lock_bh(rt_hash_lock_addr(i));
- while ((rth = *rthp) != NULL) {
+ while ((rth = rcu_dereference_protected(*rthp,
+ lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) {
prefetch(rth->dst.rt_next);
if (rt_is_expired(rth)) {
*rthp = rth->dst.rt_next;
@@ -941,7 +952,8 @@ static int rt_garbage_collect(struct dst_ops *ops)
static unsigned long last_gc;
static int rover;
static int equilibrium;
- struct rtable *rth, **rthp;
+ struct rtable *rth;
+ struct rtable __rcu **rthp;
unsigned long now = jiffies;
int goal;
int entries = dst_entries_get_fast(&ipv4_dst_ops);
@@ -995,7 +1007,8 @@ static int rt_garbage_collect(struct dst_ops *ops)
k = (k + 1) & rt_hash_mask;
rthp = &rt_hash_table[k].chain;
spin_lock_bh(rt_hash_lock_addr(k));
- while ((rth = *rthp) != NULL) {
+ while ((rth = rcu_dereference_protected(*rthp,
+ lockdep_is_held(rt_hash_lock_addr(k)))) != NULL) {
if (!rt_is_expired(rth) &&
!rt_may_expire(rth, tmo, expire)) {
tmo >>= 1;
@@ -1071,7 +1084,7 @@ static int slow_chain_length(const struct rtable *head)
while (rth) {
length += has_noalias(head, rth);
- rth = rth->dst.rt_next;
+ rth = rcu_dereference_protected(rth->dst.rt_next, 1);
}
return length >> FRACT_BITS;
}
@@ -1079,9 +1092,9 @@ static int slow_chain_length(const struct rtable *head)
static int rt_intern_hash(unsigned hash, struct rtable *rt,
struct rtable **rp, struct sk_buff *skb, int ifindex)
{
- struct rtable *rth, **rthp;
+ struct rtable *rth, *cand;
+ struct rtable __rcu **rthp, **candp;
unsigned long now;
- struct rtable *cand, **candp;
u32 min_score;
int chain_length;
int attempts = !in_softirq();
@@ -1128,7 +1141,8 @@ restart:
rthp = &rt_hash_table[hash].chain;
spin_lock_bh(rt_hash_lock_addr(hash));
- while ((rth = *rthp) != NULL) {
+ while ((rth = rcu_dereference_protected(*rthp,
+ lockdep_is_held(rt_hash_lock_addr(hash)))) != NULL) {
if (rt_is_expired(rth)) {
*rthp = rth->dst.rt_next;
rt_free(rth);
@@ -1324,12 +1338,14 @@ EXPORT_SYMBOL(__ip_select_ident);
static void rt_del(unsigned hash, struct rtable *rt)
{
- struct rtable **rthp, *aux;
+ struct rtable __rcu **rthp;
+ struct rtable *aux;
rthp = &rt_hash_table[hash].chain;
spin_lock_bh(rt_hash_lock_addr(hash));
ip_rt_put(rt);
- while ((aux = *rthp) != NULL) {
+ while ((aux = rcu_dereference_protected(*rthp,
+ lockdep_is_held(rt_hash_lock_addr(hash)))) != NULL) {
if (aux == rt || rt_is_expired(aux)) {
*rthp = aux->dst.rt_next;
rt_free(aux);
@@ -1346,7 +1362,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
{
int i, k;
struct in_device *in_dev = __in_dev_get_rcu(dev);
- struct rtable *rth, **rthp;
+ struct rtable *rth;
+ struct rtable __rcu **rthp;
__be32 skeys[2] = { saddr, 0 };
int ikeys[2] = { dev->ifindex, 0 };
struct netevent_redirect netevent;
@@ -1379,7 +1396,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
unsigned hash = rt_hash(daddr, skeys[i], ikeys[k],
rt_genid(net));
- rthp=&rt_hash_table[hash].chain;
+ rthp = &rt_hash_table[hash].chain;
while ((rth = rcu_dereference(*rthp)) != NULL) {
struct rtable *rt;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index d96c1da4b17c..e91911d7aae2 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -398,7 +398,7 @@ static struct ctl_table ipv4_table[] = {
.data = &sysctl_tcp_mem,
.maxlen = sizeof(sysctl_tcp_mem),
.mode = 0644,
- .proc_handler = proc_dointvec
+ .proc_handler = proc_doulongvec_minmax
},
{
.procname = "tcp_wmem",
@@ -602,8 +602,7 @@ static struct ctl_table ipv4_table[] = {
.data = &sysctl_udp_mem,
.maxlen = sizeof(sysctl_udp_mem),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &zero
+ .proc_handler = proc_doulongvec_minmax,
},
{
.procname = "udp_rmem_min",
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1664a0590bb8..081419969485 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -282,7 +282,7 @@ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
struct percpu_counter tcp_orphan_count;
EXPORT_SYMBOL_GPL(tcp_orphan_count);
-int sysctl_tcp_mem[3] __read_mostly;
+long sysctl_tcp_mem[3] __read_mostly;
int sysctl_tcp_wmem[3] __read_mostly;
int sysctl_tcp_rmem[3] __read_mostly;
@@ -290,7 +290,7 @@ EXPORT_SYMBOL(sysctl_tcp_mem);
EXPORT_SYMBOL(sysctl_tcp_rmem);
EXPORT_SYMBOL(sysctl_tcp_wmem);
-atomic_t tcp_memory_allocated; /* Current allocated memory. */
+atomic_long_t tcp_memory_allocated; /* Current allocated memory. */
EXPORT_SYMBOL(tcp_memory_allocated);
/*
@@ -2246,7 +2246,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
/* Values greater than interface MTU won't take effect. However
* at the point when this call is done we typically don't yet
* know which interface is going to be used */
- if (val < 8 || val > MAX_TCP_WINDOW) {
+ if (val < 64 || val > MAX_TCP_WINDOW) {
err = -EINVAL;
break;
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3357f69e353d..6d8ab1c4efc3 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -259,8 +259,11 @@ static void tcp_fixup_sndbuf(struct sock *sk)
int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 +
sizeof(struct sk_buff);
- if (sk->sk_sndbuf < 3 * sndmem)
- sk->sk_sndbuf = min(3 * sndmem, sysctl_tcp_wmem[2]);
+ if (sk->sk_sndbuf < 3 * sndmem) {
+ sk->sk_sndbuf = 3 * sndmem;
+ if (sk->sk_sndbuf > sysctl_tcp_wmem[2])
+ sk->sk_sndbuf = sysctl_tcp_wmem[2];
+ }
}
/* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
@@ -396,7 +399,7 @@ static void tcp_clamp_window(struct sock *sk)
if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
!tcp_memory_pressure &&
- atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
+ atomic_long_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
sysctl_tcp_rmem[2]);
}
@@ -4861,7 +4864,7 @@ static int tcp_should_expand_sndbuf(struct sock *sk)
return 0;
/* If we are under soft global TCP memory pressure, do not expand. */
- if (atomic_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0])
+ if (atomic_long_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0])
return 0;
/* If we filled the congestion window, do not expand. */
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index 9a17bd2a0a37..ac3b3ee4b07c 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -14,27 +14,32 @@
#include <net/protocol.h>
#include <net/xfrm.h>
-static struct xfrm_tunnel *tunnel4_handlers __read_mostly;
-static struct xfrm_tunnel *tunnel64_handlers __read_mostly;
+static struct xfrm_tunnel __rcu *tunnel4_handlers __read_mostly;
+static struct xfrm_tunnel __rcu *tunnel64_handlers __read_mostly;
static DEFINE_MUTEX(tunnel4_mutex);
-static inline struct xfrm_tunnel **fam_handlers(unsigned short family)
+static inline struct xfrm_tunnel __rcu **fam_handlers(unsigned short family)
{
return (family == AF_INET) ? &tunnel4_handlers : &tunnel64_handlers;
}
int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family)
{
- struct xfrm_tunnel **pprev;
+ struct xfrm_tunnel __rcu **pprev;
+ struct xfrm_tunnel *t;
+
int ret = -EEXIST;
int priority = handler->priority;
mutex_lock(&tunnel4_mutex);
- for (pprev = fam_handlers(family); *pprev; pprev = &(*pprev)->next) {
- if ((*pprev)->priority > priority)
+ for (pprev = fam_handlers(family);
+ (t = rcu_dereference_protected(*pprev,
+ lockdep_is_held(&tunnel4_mutex))) != NULL;
+ pprev = &t->next) {
+ if (t->priority > priority)
break;
- if ((*pprev)->priority == priority)
+ if (t->priority == priority)
goto err;
}
@@ -52,13 +57,17 @@ EXPORT_SYMBOL(xfrm4_tunnel_register);
int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family)
{
- struct xfrm_tunnel **pprev;
+ struct xfrm_tunnel __rcu **pprev;
+ struct xfrm_tunnel *t;
int ret = -ENOENT;
mutex_lock(&tunnel4_mutex);
- for (pprev = fam_handlers(family); *pprev; pprev = &(*pprev)->next) {
- if (*pprev == handler) {
+ for (pprev = fam_handlers(family);
+ (t = rcu_dereference_protected(*pprev,
+ lockdep_is_held(&tunnel4_mutex))) != NULL;
+ pprev = &t->next) {
+ if (t == handler) {
*pprev = handler->next;
ret = 0;
break;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index b3f7e8cf18ac..5e0a3a582a59 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -110,7 +110,7 @@
struct udp_table udp_table __read_mostly;
EXPORT_SYMBOL(udp_table);
-int sysctl_udp_mem[3] __read_mostly;
+long sysctl_udp_mem[3] __read_mostly;
EXPORT_SYMBOL(sysctl_udp_mem);
int sysctl_udp_rmem_min __read_mostly;
@@ -119,7 +119,7 @@ EXPORT_SYMBOL(sysctl_udp_rmem_min);
int sysctl_udp_wmem_min __read_mostly;
EXPORT_SYMBOL(sysctl_udp_wmem_min);
-atomic_t udp_memory_allocated;
+atomic_long_t udp_memory_allocated;
EXPORT_SYMBOL(udp_memory_allocated);
#define MAX_UDP_PORTS 65536
@@ -1413,7 +1413,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
}
}
- if (sk->sk_filter) {
+ if (rcu_dereference_raw(sk->sk_filter)) {
if (udp_lib_checksum_complete(skb))
goto drop;
}
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index ec7a91d9e865..e048ec62d109 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -836,7 +836,7 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *i
{
struct inet6_dev *idev = ifp->idev;
struct in6_addr addr, *tmpaddr;
- unsigned long tmp_prefered_lft, tmp_valid_lft, tmp_cstamp, tmp_tstamp;
+ unsigned long tmp_prefered_lft, tmp_valid_lft, tmp_cstamp, tmp_tstamp, age;
unsigned long regen_advance;
int tmp_plen;
int ret = 0;
@@ -886,12 +886,13 @@ retry:
goto out;
}
memcpy(&addr.s6_addr[8], idev->rndid, 8);
+ age = (jiffies - ifp->tstamp) / HZ;
tmp_valid_lft = min_t(__u32,
ifp->valid_lft,
- idev->cnf.temp_valid_lft);
+ idev->cnf.temp_valid_lft + age);
tmp_prefered_lft = min_t(__u32,
ifp->prefered_lft,
- idev->cnf.temp_prefered_lft -
+ idev->cnf.temp_prefered_lft + age -
idev->cnf.max_desync_factor);
tmp_plen = ifp->prefix_len;
max_addresses = idev->cnf.max_addresses;
@@ -1426,8 +1427,10 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp)
{
struct inet6_dev *idev = ifp->idev;
- if (addrconf_dad_end(ifp))
+ if (addrconf_dad_end(ifp)) {
+ in6_ifa_put(ifp);
return;
+ }
if (net_ratelimit())
printk(KERN_INFO "%s: IPv6 duplicate address %pI6c detected!\n",
@@ -2021,10 +2024,11 @@ ok:
ipv6_ifa_notify(0, ift);
}
- if (create && in6_dev->cnf.use_tempaddr > 0) {
+ if ((create || list_empty(&in6_dev->tempaddr_list)) && in6_dev->cnf.use_tempaddr > 0) {
/*
* When a new public address is created as described in [ADDRCONF],
- * also create a new temporary address.
+ * also create a new temporary address. Also create a temporary
+ * address if it's enabled but no temporary address currently exists.
*/
read_unlock_bh(&in6_dev->lock);
ipv6_create_tempaddr(ifp, NULL);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index c2c0f89397b1..2a59610c2a58 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1284,6 +1284,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
t = netdev_priv(dev);
ip6_tnl_unlink(ip6n, t);
+ synchronize_net();
err = ip6_tnl_change(t, &p);
ip6_tnl_link(ip6n, t);
netdev_state_change(dev);
@@ -1371,6 +1372,7 @@ static void ip6_tnl_dev_setup(struct net_device *dev)
dev->flags |= IFF_NOARP;
dev->addr_len = sizeof(struct in6_addr);
dev->features |= NETIF_F_NETNS_LOCAL;
+ dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
}
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 0553867a317f..d1770e061c08 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -343,6 +343,10 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
break;
case IPV6_TRANSPARENT:
+ if (!capable(CAP_NET_ADMIN)) {
+ retv = -EPERM;
+ break;
+ }
if (optlen < sizeof(int))
goto e_inval;
/* we don't have a separate transparent bit for IPV6 we use the one in the IPv4 socket */
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 44d2eeac089b..448464844a25 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -5,10 +5,15 @@
menu "IPv6: Netfilter Configuration"
depends on INET && IPV6 && NETFILTER
+config NF_DEFRAG_IPV6
+ tristate
+ default n
+
config NF_CONNTRACK_IPV6
tristate "IPv6 connection tracking support"
depends on INET && IPV6 && NF_CONNTRACK
default m if NETFILTER_ADVANCED=n
+ select NF_DEFRAG_IPV6
---help---
Connection tracking keeps a record of what packets have passed
through your machine, in order to figure out how they are related
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 3f8e4a3d83ce..0a432c9b0795 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -12,11 +12,14 @@ obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o
# objects for l3 independent conntrack
nf_conntrack_ipv6-objs := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o
-nf_defrag_ipv6-objs := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
# l3 independent conntrack
obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o nf_defrag_ipv6.o
+# defrag
+nf_defrag_ipv6-objs := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
+obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o
+
# matches
obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 51df035897e7..455582384ece 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1137,6 +1137,7 @@ static int get_info(struct net *net, void __user *user,
private = &tmp;
}
#endif
+ memset(&info, 0, sizeof(info));
info.valid_hooks = t->valid_hooks;
memcpy(info.hook_entry, private->hook_entry,
sizeof(info.hook_entry));
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 489d71b844ac..3a3f129a44cb 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -625,21 +625,24 @@ int nf_ct_frag6_init(void)
inet_frags_init_net(&nf_init_frags);
inet_frags_init(&nf_frags);
+#ifdef CONFIG_SYSCTL
nf_ct_frag6_sysctl_header = register_sysctl_paths(nf_net_netfilter_sysctl_path,
nf_ct_frag6_sysctl_table);
if (!nf_ct_frag6_sysctl_header) {
inet_frags_fini(&nf_frags);
return -ENOMEM;
}
+#endif
return 0;
}
void nf_ct_frag6_cleanup(void)
{
+#ifdef CONFIG_SYSCTL
unregister_sysctl_table(nf_ct_frag6_sysctl_header);
nf_ct_frag6_sysctl_header = NULL;
-
+#endif
inet_frags_fini(&nf_frags);
nf_init_frags.low_thresh = 0;
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index d082eaeefa25..24b3558b8e67 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -126,6 +126,8 @@ static const struct snmp_mib snmp6_udp6_list[] = {
SNMP_MIB_ITEM("Udp6NoPorts", UDP_MIB_NOPORTS),
SNMP_MIB_ITEM("Udp6InErrors", UDP_MIB_INERRORS),
SNMP_MIB_ITEM("Udp6OutDatagrams", UDP_MIB_OUTDATAGRAMS),
+ SNMP_MIB_ITEM("Udp6RcvbufErrors", UDP_MIB_RCVBUFERRORS),
+ SNMP_MIB_ITEM("Udp6SndbufErrors", UDP_MIB_SNDBUFERRORS),
SNMP_MIB_SENTINEL
};
@@ -134,6 +136,8 @@ static const struct snmp_mib snmp6_udplite6_list[] = {
SNMP_MIB_ITEM("UdpLite6NoPorts", UDP_MIB_NOPORTS),
SNMP_MIB_ITEM("UdpLite6InErrors", UDP_MIB_INERRORS),
SNMP_MIB_ITEM("UdpLite6OutDatagrams", UDP_MIB_OUTDATAGRAMS),
+ SNMP_MIB_ITEM("UdpLite6RcvbufErrors", UDP_MIB_RCVBUFERRORS),
+ SNMP_MIB_ITEM("UdpLite6SndbufErrors", UDP_MIB_SNDBUFERRORS),
SNMP_MIB_SENTINEL
};
diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c
index 9bb936ae2452..9a7978fdc02a 100644
--- a/net/ipv6/protocol.c
+++ b/net/ipv6/protocol.c
@@ -25,13 +25,14 @@
#include <linux/spinlock.h>
#include <net/protocol.h>
-const struct inet6_protocol *inet6_protos[MAX_INET_PROTOS] __read_mostly;
+const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS] __read_mostly;
int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol)
{
int hash = protocol & (MAX_INET_PROTOS - 1);
- return !cmpxchg(&inet6_protos[hash], NULL, prot) ? 0 : -1;
+ return !cmpxchg((const struct inet6_protocol **)&inet6_protos[hash],
+ NULL, prot) ? 0 : -1;
}
EXPORT_SYMBOL(inet6_add_protocol);
@@ -43,7 +44,8 @@ int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char protocol
{
int ret, hash = protocol & (MAX_INET_PROTOS - 1);
- ret = (cmpxchg(&inet6_protos[hash], prot, NULL) == prot) ? 0 : -1;
+ ret = (cmpxchg((const struct inet6_protocol **)&inet6_protos[hash],
+ prot, NULL) == prot) ? 0 : -1;
synchronize_net();
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 45e6efb7f171..86c39526ba5e 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -373,7 +373,7 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb)
{
- if ((raw6_sk(sk)->checksum || sk->sk_filter) &&
+ if ((raw6_sk(sk)->checksum || rcu_dereference_raw(sk->sk_filter)) &&
skb_checksum_complete(skb)) {
atomic_inc(&sk->sk_drops);
kfree_skb(skb);
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index c7ba3149633f..0f2766453759 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -349,7 +349,7 @@ found:
/* Check for overlap with preceding fragment. */
if (prev &&
- (FRAG6_CB(prev)->offset + prev->len) - offset > 0)
+ (FRAG6_CB(prev)->offset + prev->len) > offset)
goto discard_fq;
/* Look for overlap with succeeding segment. */
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 25661f968f3f..fc328339be99 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2741,6 +2741,7 @@ static void __net_exit ip6_route_net_exit(struct net *net)
kfree(net->ipv6.ip6_prohibit_entry);
kfree(net->ipv6.ip6_blk_hole_entry);
#endif
+ dst_entries_destroy(&net->ipv6.ip6_dst_ops);
}
static struct pernet_operations ip6_route_net_ops = {
@@ -2832,5 +2833,6 @@ void ip6_route_cleanup(void)
xfrm6_fini();
fib6_gc_cleanup();
unregister_pernet_subsys(&ip6_route_net_ops);
+ dst_entries_destroy(&ip6_dst_blackhole_ops);
kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
}
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 367a6cc584cc..d6bfaec3bbbf 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -963,6 +963,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
}
t = netdev_priv(dev);
ipip6_tunnel_unlink(sitn, t);
+ synchronize_net();
t->parms.iph.saddr = p.iph.saddr;
t->parms.iph.daddr = p.iph.daddr;
memcpy(dev->dev_addr, &p.iph.saddr, 4);
diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c
index d9864725d0c6..4f3cec12aa85 100644
--- a/net/ipv6/tunnel6.c
+++ b/net/ipv6/tunnel6.c
@@ -30,23 +30,26 @@
#include <net/protocol.h>
#include <net/xfrm.h>
-static struct xfrm6_tunnel *tunnel6_handlers __read_mostly;
-static struct xfrm6_tunnel *tunnel46_handlers __read_mostly;
+static struct xfrm6_tunnel __rcu *tunnel6_handlers __read_mostly;
+static struct xfrm6_tunnel __rcu *tunnel46_handlers __read_mostly;
static DEFINE_MUTEX(tunnel6_mutex);
int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family)
{
- struct xfrm6_tunnel **pprev;
+ struct xfrm6_tunnel __rcu **pprev;
+ struct xfrm6_tunnel *t;
int ret = -EEXIST;
int priority = handler->priority;
mutex_lock(&tunnel6_mutex);
for (pprev = (family == AF_INET6) ? &tunnel6_handlers : &tunnel46_handlers;
- *pprev; pprev = &(*pprev)->next) {
- if ((*pprev)->priority > priority)
+ (t = rcu_dereference_protected(*pprev,
+ lockdep_is_held(&tunnel6_mutex))) != NULL;
+ pprev = &t->next) {
+ if (t->priority > priority)
break;
- if ((*pprev)->priority == priority)
+ if (t->priority == priority)
goto err;
}
@@ -65,14 +68,17 @@ EXPORT_SYMBOL(xfrm6_tunnel_register);
int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family)
{
- struct xfrm6_tunnel **pprev;
+ struct xfrm6_tunnel __rcu **pprev;
+ struct xfrm6_tunnel *t;
int ret = -ENOENT;
mutex_lock(&tunnel6_mutex);
for (pprev = (family == AF_INET6) ? &tunnel6_handlers : &tunnel46_handlers;
- *pprev; pprev = &(*pprev)->next) {
- if (*pprev == handler) {
+ (t = rcu_dereference_protected(*pprev,
+ lockdep_is_held(&tunnel6_mutex))) != NULL;
+ pprev = &t->next) {
+ if (t == handler) {
*pprev = handler->next;
ret = 0;
break;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index c84dad432114..91def93bec85 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -527,7 +527,7 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
}
}
- if (sk->sk_filter) {
+ if (rcu_dereference_raw(sk->sk_filter)) {
if (udp_lib_checksum_complete(skb))
goto drop;
}
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 1712af1c7b3f..c64ce0a0bb03 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -111,6 +111,10 @@ struct l2tp_net {
spinlock_t l2tp_session_hlist_lock;
};
+static void l2tp_session_set_header_len(struct l2tp_session *session, int version);
+static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel);
+static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel);
+
static inline struct l2tp_net *l2tp_pernet(struct net *net)
{
BUG_ON(!net);
@@ -118,6 +122,34 @@ static inline struct l2tp_net *l2tp_pernet(struct net *net)
return net_generic(net, l2tp_net_id);
}
+
+/* Tunnel reference counts. Incremented per session that is added to
+ * the tunnel.
+ */
+static inline void l2tp_tunnel_inc_refcount_1(struct l2tp_tunnel *tunnel)
+{
+ atomic_inc(&tunnel->ref_count);
+}
+
+static inline void l2tp_tunnel_dec_refcount_1(struct l2tp_tunnel *tunnel)
+{
+ if (atomic_dec_and_test(&tunnel->ref_count))
+ l2tp_tunnel_free(tunnel);
+}
+#ifdef L2TP_REFCNT_DEBUG
+#define l2tp_tunnel_inc_refcount(_t) do { \
+ printk(KERN_DEBUG "l2tp_tunnel_inc_refcount: %s:%d %s: cnt=%d\n", __func__, __LINE__, (_t)->name, atomic_read(&_t->ref_count)); \
+ l2tp_tunnel_inc_refcount_1(_t); \
+ } while (0)
+#define l2tp_tunnel_dec_refcount(_t) do { \
+ printk(KERN_DEBUG "l2tp_tunnel_dec_refcount: %s:%d %s: cnt=%d\n", __func__, __LINE__, (_t)->name, atomic_read(&_t->ref_count)); \
+ l2tp_tunnel_dec_refcount_1(_t); \
+ } while (0)
+#else
+#define l2tp_tunnel_inc_refcount(t) l2tp_tunnel_inc_refcount_1(t)
+#define l2tp_tunnel_dec_refcount(t) l2tp_tunnel_dec_refcount_1(t)
+#endif
+
/* Session hash global list for L2TPv3.
* The session_id SHOULD be random according to RFC3931, but several
* L2TP implementations use incrementing session_ids. So we do a real
@@ -699,8 +731,8 @@ EXPORT_SYMBOL(l2tp_recv_common);
* Returns 1 if the packet was not a good data packet and could not be
* forwarded. All such packets are passed up to userspace to deal with.
*/
-int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
- int (*payload_hook)(struct sk_buff *skb))
+static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
+ int (*payload_hook)(struct sk_buff *skb))
{
struct l2tp_session *session = NULL;
unsigned char *ptr, *optr;
@@ -812,7 +844,6 @@ error:
return 1;
}
-EXPORT_SYMBOL_GPL(l2tp_udp_recv_core);
/* UDP encapsulation receive handler. See net/ipv4/udp.c.
* Return codes:
@@ -922,7 +953,8 @@ static int l2tp_build_l2tpv3_header(struct l2tp_session *session, void *buf)
return bufp - optr;
}
-int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, size_t data_len)
+static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb,
+ size_t data_len)
{
struct l2tp_tunnel *tunnel = session->tunnel;
unsigned int len = skb->len;
@@ -970,7 +1002,6 @@ int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, size_t dat
return 0;
}
-EXPORT_SYMBOL_GPL(l2tp_xmit_core);
/* Automatically called when the skb is freed.
*/
@@ -1089,7 +1120,7 @@ EXPORT_SYMBOL_GPL(l2tp_xmit_skb);
* The tunnel context is deleted only when all session sockets have been
* closed.
*/
-void l2tp_tunnel_destruct(struct sock *sk)
+static void l2tp_tunnel_destruct(struct sock *sk)
{
struct l2tp_tunnel *tunnel;
@@ -1128,11 +1159,10 @@ void l2tp_tunnel_destruct(struct sock *sk)
end:
return;
}
-EXPORT_SYMBOL(l2tp_tunnel_destruct);
/* When the tunnel is closed, all the attached sessions need to go too.
*/
-void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel)
+static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel)
{
int hash;
struct hlist_node *walk;
@@ -1193,12 +1223,11 @@ again:
}
write_unlock_bh(&tunnel->hlist_lock);
}
-EXPORT_SYMBOL_GPL(l2tp_tunnel_closeall);
/* Really kill the tunnel.
* Come here only when all sessions have been cleared from the tunnel.
*/
-void l2tp_tunnel_free(struct l2tp_tunnel *tunnel)
+static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel)
{
struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
@@ -1217,7 +1246,6 @@ void l2tp_tunnel_free(struct l2tp_tunnel *tunnel)
atomic_dec(&l2tp_tunnel_count);
kfree(tunnel);
}
-EXPORT_SYMBOL_GPL(l2tp_tunnel_free);
/* Create a socket for the tunnel, if one isn't set up by
* userspace. This is used for static tunnels where there is no
@@ -1512,7 +1540,7 @@ EXPORT_SYMBOL_GPL(l2tp_session_delete);
/* We come here whenever a session's send_seq, cookie_len or
* l2specific_len parameters are set.
*/
-void l2tp_session_set_header_len(struct l2tp_session *session, int version)
+static void l2tp_session_set_header_len(struct l2tp_session *session, int version)
{
if (version == L2TP_HDR_VER_2) {
session->hdr_len = 6;
@@ -1525,7 +1553,6 @@ void l2tp_session_set_header_len(struct l2tp_session *session, int version)
}
}
-EXPORT_SYMBOL_GPL(l2tp_session_set_header_len);
struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
{
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index f0f318edd3f1..a16a48e79fab 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -231,48 +231,15 @@ extern int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_i
extern int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel);
extern struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg);
extern int l2tp_session_delete(struct l2tp_session *session);
-extern void l2tp_tunnel_free(struct l2tp_tunnel *tunnel);
extern void l2tp_session_free(struct l2tp_session *session);
extern void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, unsigned char *ptr, unsigned char *optr, u16 hdrflags, int length, int (*payload_hook)(struct sk_buff *skb));
-extern int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, int (*payload_hook)(struct sk_buff *skb));
extern int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb);
-extern int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, size_t data_len);
extern int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len);
-extern void l2tp_tunnel_destruct(struct sock *sk);
-extern void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel);
-extern void l2tp_session_set_header_len(struct l2tp_session *session, int version);
extern int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops *ops);
extern void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type);
-/* Tunnel reference counts. Incremented per session that is added to
- * the tunnel.
- */
-static inline void l2tp_tunnel_inc_refcount_1(struct l2tp_tunnel *tunnel)
-{
- atomic_inc(&tunnel->ref_count);
-}
-
-static inline void l2tp_tunnel_dec_refcount_1(struct l2tp_tunnel *tunnel)
-{
- if (atomic_dec_and_test(&tunnel->ref_count))
- l2tp_tunnel_free(tunnel);
-}
-#ifdef L2TP_REFCNT_DEBUG
-#define l2tp_tunnel_inc_refcount(_t) do { \
- printk(KERN_DEBUG "l2tp_tunnel_inc_refcount: %s:%d %s: cnt=%d\n", __func__, __LINE__, (_t)->name, atomic_read(&_t->ref_count)); \
- l2tp_tunnel_inc_refcount_1(_t); \
- } while (0)
-#define l2tp_tunnel_dec_refcount(_t) do { \
- printk(KERN_DEBUG "l2tp_tunnel_dec_refcount: %s:%d %s: cnt=%d\n", __func__, __LINE__, (_t)->name, atomic_read(&_t->ref_count)); \
- l2tp_tunnel_dec_refcount_1(_t); \
- } while (0)
-#else
-#define l2tp_tunnel_inc_refcount(t) l2tp_tunnel_inc_refcount_1(t)
-#define l2tp_tunnel_dec_refcount(t) l2tp_tunnel_dec_refcount_1(t)
-#endif
-
/* Session reference counts. Incremented when code obtains a reference
* to a session.
*/
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index 104ec3b283d4..b8dbae82fab8 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -249,7 +249,7 @@ static int l2tp_dfs_seq_open(struct inode *inode, struct file *file)
struct seq_file *seq;
int rc = -ENOMEM;
- pd = kzalloc(GFP_KERNEL, sizeof(*pd));
+ pd = kzalloc(sizeof(*pd), GFP_KERNEL);
if (pd == NULL)
goto out;
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 1c770c0644d1..0bf6a59545ab 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -576,7 +576,7 @@ out:
return copied;
}
-struct proto l2tp_ip_prot = {
+static struct proto l2tp_ip_prot = {
.name = "L2TP/IP",
.owner = THIS_MODULE,
.init = l2tp_ip_open,
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 43288259f4a1..1534f2b44caf 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -525,6 +525,7 @@ config NETFILTER_XT_TARGET_TPROXY
depends on NETFILTER_XTABLES
depends on NETFILTER_ADVANCED
select NF_DEFRAG_IPV4
+ select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES
help
This option adds a `TPROXY' target, which is somewhat similar to
REDIRECT. It can only be used in the mangle table and is useful
@@ -927,6 +928,7 @@ config NETFILTER_XT_MATCH_SOCKET
depends on NETFILTER_ADVANCED
depends on !NF_CONNTRACK || NF_CONNTRACK
select NF_DEFRAG_IPV4
+ select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES
help
This option adds a `socket' match, which can be used to match
packets for which a TCP or UDP socket lookup finds a valid socket.
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 1eacf8d9966a..27a5ea6b6a0f 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1312,7 +1312,8 @@ void *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced, int nulls)
if (!hash) {
*vmalloced = 1;
printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n");
- hash = __vmalloc(sz, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL);
+ hash = __vmalloc(sz, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
+ PAGE_KERNEL);
}
if (hash && nulls)
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index ed6d92958023..dc7bb74110df 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -292,6 +292,12 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
for (i = 0; i < MAX_NF_CT_PROTO; i++)
proto_array[i] = &nf_conntrack_l4proto_generic;
+
+ /* Before making proto_array visible to lockless readers,
+ * we must make sure its content is committed to memory.
+ */
+ smp_wmb();
+
nf_ct_protos[l4proto->l3proto] = proto_array;
} else if (nf_ct_protos[l4proto->l3proto][l4proto->l4proto] !=
&nf_conntrack_l4proto_generic) {
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 19c482caf30b..640678f47a2a 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -21,7 +21,9 @@
#include <linux/netfilter_ipv4/ip_tables.h>
#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+#define XT_TPROXY_HAVE_IPV6 1
#include <net/if_inet6.h>
#include <net/addrconf.h>
#include <linux/netfilter_ipv6/ip6_tables.h>
@@ -172,7 +174,7 @@ tproxy_tg4_v1(struct sk_buff *skb, const struct xt_action_param *par)
return tproxy_tg4(skb, tgi->laddr.ip, tgi->lport, tgi->mark_mask, tgi->mark_value);
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#ifdef XT_TPROXY_HAVE_IPV6
static inline const struct in6_addr *
tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr,
@@ -372,7 +374,7 @@ static struct xt_target tproxy_tg_reg[] __read_mostly = {
.hooks = 1 << NF_INET_PRE_ROUTING,
.me = THIS_MODULE,
},
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#ifdef XT_TPROXY_HAVE_IPV6
{
.name = "TPROXY",
.family = NFPROTO_IPV6,
@@ -391,7 +393,7 @@ static struct xt_target tproxy_tg_reg[] __read_mostly = {
static int __init tproxy_tg_init(void)
{
nf_defrag_ipv4_enable();
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#ifdef XT_TPROXY_HAVE_IPV6
nf_defrag_ipv6_enable();
#endif
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 2dbd4c857735..00d6ae838303 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -14,7 +14,6 @@
#include <linux/skbuff.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv6/ip6_tables.h>
#include <net/tcp.h>
#include <net/udp.h>
#include <net/icmp.h>
@@ -22,7 +21,12 @@
#include <net/inet_sock.h>
#include <net/netfilter/nf_tproxy_core.h>
#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
+
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+#define XT_SOCKET_HAVE_IPV6 1
+#include <linux/netfilter_ipv6/ip6_tables.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+#endif
#include <linux/netfilter/xt_socket.h>
@@ -186,12 +190,12 @@ socket_mt4_v1(const struct sk_buff *skb, struct xt_action_param *par)
return socket_match(skb, par, par->matchinfo);
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#ifdef XT_SOCKET_HAVE_IPV6
static int
extract_icmp6_fields(const struct sk_buff *skb,
unsigned int outside_hdrlen,
- u8 *protocol,
+ int *protocol,
struct in6_addr **raddr,
struct in6_addr **laddr,
__be16 *rport,
@@ -248,8 +252,7 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par)
struct sock *sk;
struct in6_addr *daddr, *saddr;
__be16 dport, sport;
- int thoff;
- u8 tproto;
+ int thoff, tproto;
const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo;
tproto = ipv6_find_hdr(skb, &thoff, -1, NULL);
@@ -301,7 +304,7 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par)
sk = NULL;
}
- pr_debug("proto %hhu %pI6:%hu -> %pI6:%hu "
+ pr_debug("proto %hhd %pI6:%hu -> %pI6:%hu "
"(orig %pI6:%hu) sock %p\n",
tproto, saddr, ntohs(sport),
daddr, ntohs(dport),
@@ -331,7 +334,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
(1 << NF_INET_LOCAL_IN),
.me = THIS_MODULE,
},
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#ifdef XT_SOCKET_HAVE_IPV6
{
.name = "socket",
.revision = 1,
@@ -348,7 +351,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
static int __init socket_mt_init(void)
{
nf_defrag_ipv4_enable();
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#ifdef XT_SOCKET_HAVE_IPV6
nf_defrag_ipv6_enable();
#endif
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index cd96ed3ccee4..478181d53c55 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -83,9 +83,9 @@ struct netlink_sock {
struct module *module;
};
-struct listeners_rcu_head {
- struct rcu_head rcu_head;
- void *ptr;
+struct listeners {
+ struct rcu_head rcu;
+ unsigned long masks[0];
};
#define NETLINK_KERNEL_SOCKET 0x1
@@ -119,7 +119,7 @@ struct nl_pid_hash {
struct netlink_table {
struct nl_pid_hash hash;
struct hlist_head mc_list;
- unsigned long *listeners;
+ struct listeners __rcu *listeners;
unsigned int nl_nonroot;
unsigned int groups;
struct mutex *cb_mutex;
@@ -338,7 +338,7 @@ netlink_update_listeners(struct sock *sk)
if (i < NLGRPLONGS(nlk_sk(sk)->ngroups))
mask |= nlk_sk(sk)->groups[i];
}
- tbl->listeners[i] = mask;
+ tbl->listeners->masks[i] = mask;
}
/* this function is only called with the netlink table "grabbed", which
* makes sure updates are visible before bind or setsockopt return. */
@@ -936,7 +936,7 @@ EXPORT_SYMBOL(netlink_unicast);
int netlink_has_listeners(struct sock *sk, unsigned int group)
{
int res = 0;
- unsigned long *listeners;
+ struct listeners *listeners;
BUG_ON(!netlink_is_kernel(sk));
@@ -944,7 +944,7 @@ int netlink_has_listeners(struct sock *sk, unsigned int group)
listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners);
if (group - 1 < nl_table[sk->sk_protocol].groups)
- res = test_bit(group - 1, listeners);
+ res = test_bit(group - 1, listeners->masks);
rcu_read_unlock();
@@ -1498,7 +1498,7 @@ netlink_kernel_create(struct net *net, int unit, unsigned int groups,
struct socket *sock;
struct sock *sk;
struct netlink_sock *nlk;
- unsigned long *listeners = NULL;
+ struct listeners *listeners = NULL;
BUG_ON(!nl_table);
@@ -1523,8 +1523,7 @@ netlink_kernel_create(struct net *net, int unit, unsigned int groups,
if (groups < 32)
groups = 32;
- listeners = kzalloc(NLGRPSZ(groups) + sizeof(struct listeners_rcu_head),
- GFP_KERNEL);
+ listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL);
if (!listeners)
goto out_sock_release;
@@ -1541,7 +1540,7 @@ netlink_kernel_create(struct net *net, int unit, unsigned int groups,
netlink_table_grab();
if (!nl_table[unit].registered) {
nl_table[unit].groups = groups;
- nl_table[unit].listeners = listeners;
+ rcu_assign_pointer(nl_table[unit].listeners, listeners);
nl_table[unit].cb_mutex = cb_mutex;
nl_table[unit].module = module;
nl_table[unit].registered = 1;
@@ -1572,43 +1571,28 @@ netlink_kernel_release(struct sock *sk)
EXPORT_SYMBOL(netlink_kernel_release);
-static void netlink_free_old_listeners(struct rcu_head *rcu_head)
+static void listeners_free_rcu(struct rcu_head *head)
{
- struct listeners_rcu_head *lrh;
-
- lrh = container_of(rcu_head, struct listeners_rcu_head, rcu_head);
- kfree(lrh->ptr);
+ kfree(container_of(head, struct listeners, rcu));
}
int __netlink_change_ngroups(struct sock *sk, unsigned int groups)
{
- unsigned long *listeners, *old = NULL;
- struct listeners_rcu_head *old_rcu_head;
+ struct listeners *new, *old;
struct netlink_table *tbl = &nl_table[sk->sk_protocol];
if (groups < 32)
groups = 32;
if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) {
- listeners = kzalloc(NLGRPSZ(groups) +
- sizeof(struct listeners_rcu_head),
- GFP_ATOMIC);
- if (!listeners)
+ new = kzalloc(sizeof(*new) + NLGRPSZ(groups), GFP_ATOMIC);
+ if (!new)
return -ENOMEM;
- old = tbl->listeners;
- memcpy(listeners, old, NLGRPSZ(tbl->groups));
- rcu_assign_pointer(tbl->listeners, listeners);
- /*
- * Free the old memory after an RCU grace period so we
- * don't leak it. We use call_rcu() here in order to be
- * able to call this function from atomic contexts. The
- * allocation of this memory will have reserved enough
- * space for struct listeners_rcu_head at the end.
- */
- old_rcu_head = (void *)(tbl->listeners +
- NLGRPLONGS(tbl->groups));
- old_rcu_head->ptr = old;
- call_rcu(&old_rcu_head->rcu_head, netlink_free_old_listeners);
+ old = rcu_dereference_raw(tbl->listeners);
+ memcpy(new->masks, old->masks, NLGRPSZ(tbl->groups));
+ rcu_assign_pointer(tbl->listeners, new);
+
+ call_rcu(&old->rcu, listeners_free_rcu);
}
tbl->groups = groups;
@@ -2104,18 +2088,17 @@ static void __net_exit netlink_net_exit(struct net *net)
static void __init netlink_add_usersock_entry(void)
{
- unsigned long *listeners;
+ struct listeners *listeners;
int groups = 32;
- listeners = kzalloc(NLGRPSZ(groups) + sizeof(struct listeners_rcu_head),
- GFP_KERNEL);
+ listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL);
if (!listeners)
- panic("netlink_add_usersock_entry: Cannot allocate listneres\n");
+ panic("netlink_add_usersock_entry: Cannot allocate listeners\n");
netlink_table_grab();
nl_table[NETLINK_USERSOCK].groups = groups;
- nl_table[NETLINK_USERSOCK].listeners = listeners;
+ rcu_assign_pointer(nl_table[NETLINK_USERSOCK].listeners, listeners);
nl_table[NETLINK_USERSOCK].module = THIS_MODULE;
nl_table[NETLINK_USERSOCK].registered = 1;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 3616f27b9d46..0856a13cb53d 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1719,7 +1719,7 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
rcu_read_lock();
dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex);
if (dev)
- strlcpy(uaddr->sa_data, dev->name, 15);
+ strncpy(uaddr->sa_data, dev->name, 14);
else
memset(uaddr->sa_data, 0, 14);
rcu_read_unlock();
@@ -1742,6 +1742,7 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
sll->sll_family = AF_PACKET;
sll->sll_ifindex = po->ifindex;
sll->sll_protocol = po->num;
+ sll->sll_pkttype = 0;
rcu_read_lock();
dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex);
if (dev) {
diff --git a/net/rds/loop.c b/net/rds/loop.c
index c390156b426f..aeec1d483b17 100644
--- a/net/rds/loop.c
+++ b/net/rds/loop.c
@@ -134,8 +134,12 @@ static int rds_loop_conn_alloc(struct rds_connection *conn, gfp_t gfp)
static void rds_loop_conn_free(void *arg)
{
struct rds_loop_connection *lc = arg;
+ unsigned long flags;
+
rdsdebug("lc %p\n", lc);
+ spin_lock_irqsave(&loop_conns_lock, flags);
list_del(&lc->loop_node);
+ spin_unlock_irqrestore(&loop_conns_lock, flags);
kfree(lc);
}
diff --git a/net/rds/message.c b/net/rds/message.c
index a84545dae370..1fd3d29023d7 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -224,6 +224,9 @@ struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents)
WARN_ON(rm->m_used_sgs + nents > rm->m_total_sgs);
WARN_ON(!nents);
+ if (rm->m_used_sgs + nents > rm->m_total_sgs)
+ return NULL;
+
sg_ret = &sg_first[rm->m_used_sgs];
sg_init_table(sg_ret, nents);
rm->m_used_sgs += nents;
@@ -246,6 +249,10 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in
rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len);
rm->data.op_nents = ceil(total_len, PAGE_SIZE);
rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs);
+ if (!rm->data.op_sg) {
+ rds_message_put(rm);
+ return ERR_PTR(-ENOMEM);
+ }
for (i = 0; i < rm->data.op_nents; ++i) {
sg_set_page(&rm->data.op_sg[i],
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 1a41debca1ce..8920f2a83327 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -479,13 +479,38 @@ void rds_atomic_free_op(struct rm_atomic_op *ao)
/*
- * Count the number of pages needed to describe an incoming iovec.
+ * Count the number of pages needed to describe an incoming iovec array.
*/
-static int rds_rdma_pages(struct rds_rdma_args *args)
+static int rds_rdma_pages(struct rds_iovec iov[], int nr_iovecs)
+{
+ int tot_pages = 0;
+ unsigned int nr_pages;
+ unsigned int i;
+
+ /* figure out the number of pages in the vector */
+ for (i = 0; i < nr_iovecs; i++) {
+ nr_pages = rds_pages_in_vec(&iov[i]);
+ if (nr_pages == 0)
+ return -EINVAL;
+
+ tot_pages += nr_pages;
+
+ /*
+ * nr_pages for one entry is limited to (UINT_MAX>>PAGE_SHIFT)+1,
+ * so tot_pages cannot overflow without first going negative.
+ */
+ if (tot_pages < 0)
+ return -EINVAL;
+ }
+
+ return tot_pages;
+}
+
+int rds_rdma_extra_size(struct rds_rdma_args *args)
{
struct rds_iovec vec;
struct rds_iovec __user *local_vec;
- unsigned int tot_pages = 0;
+ int tot_pages = 0;
unsigned int nr_pages;
unsigned int i;
@@ -502,14 +527,16 @@ static int rds_rdma_pages(struct rds_rdma_args *args)
return -EINVAL;
tot_pages += nr_pages;
- }
- return tot_pages;
-}
+ /*
+ * nr_pages for one entry is limited to (UINT_MAX>>PAGE_SHIFT)+1,
+ * so tot_pages cannot overflow without first going negative.
+ */
+ if (tot_pages < 0)
+ return -EINVAL;
+ }
-int rds_rdma_extra_size(struct rds_rdma_args *args)
-{
- return rds_rdma_pages(args) * sizeof(struct scatterlist);
+ return tot_pages * sizeof(struct scatterlist);
}
/*
@@ -520,13 +547,12 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
struct cmsghdr *cmsg)
{
struct rds_rdma_args *args;
- struct rds_iovec vec;
struct rm_rdma_op *op = &rm->rdma;
int nr_pages;
unsigned int nr_bytes;
struct page **pages = NULL;
- struct rds_iovec __user *local_vec;
- unsigned int nr;
+ struct rds_iovec iovstack[UIO_FASTIOV], *iovs = iovstack;
+ int iov_size;
unsigned int i, j;
int ret = 0;
@@ -546,9 +572,26 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
goto out;
}
- nr_pages = rds_rdma_pages(args);
- if (nr_pages < 0)
+ /* Check whether to allocate the iovec area */
+ iov_size = args->nr_local * sizeof(struct rds_iovec);
+ if (args->nr_local > UIO_FASTIOV) {
+ iovs = sock_kmalloc(rds_rs_to_sk(rs), iov_size, GFP_KERNEL);
+ if (!iovs) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+
+ if (copy_from_user(iovs, (struct rds_iovec __user *)(unsigned long) args->local_vec_addr, iov_size)) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ nr_pages = rds_rdma_pages(iovs, args->nr_local);
+ if (nr_pages < 0) {
+ ret = -EINVAL;
goto out;
+ }
pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
if (!pages) {
@@ -564,6 +607,10 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
op->op_recverr = rs->rs_recverr;
WARN_ON(!nr_pages);
op->op_sg = rds_message_alloc_sgs(rm, nr_pages);
+ if (!op->op_sg) {
+ ret = -ENOMEM;
+ goto out;
+ }
if (op->op_notify || op->op_recverr) {
/* We allocate an uninitialized notifier here, because
@@ -597,50 +644,40 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
(unsigned long long)args->remote_vec.addr,
op->op_rkey);
- local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr;
-
for (i = 0; i < args->nr_local; i++) {
- if (copy_from_user(&vec, &local_vec[i],
- sizeof(struct rds_iovec))) {
- ret = -EFAULT;
- goto out;
- }
-
- nr = rds_pages_in_vec(&vec);
- if (nr == 0) {
- ret = -EINVAL;
- goto out;
- }
+ struct rds_iovec *iov = &iovs[i];
+ /* don't need to check, rds_rdma_pages() verified nr will be +nonzero */
+ unsigned int nr = rds_pages_in_vec(iov);
- rs->rs_user_addr = vec.addr;
- rs->rs_user_bytes = vec.bytes;
+ rs->rs_user_addr = iov->addr;
+ rs->rs_user_bytes = iov->bytes;
/* If it's a WRITE operation, we want to pin the pages for reading.
* If it's a READ operation, we need to pin the pages for writing.
*/
- ret = rds_pin_pages(vec.addr, nr, pages, !op->op_write);
+ ret = rds_pin_pages(iov->addr, nr, pages, !op->op_write);
if (ret < 0)
goto out;
- rdsdebug("RDS: nr_bytes %u nr %u vec.bytes %llu vec.addr %llx\n",
- nr_bytes, nr, vec.bytes, vec.addr);
+ rdsdebug("RDS: nr_bytes %u nr %u iov->bytes %llu iov->addr %llx\n",
+ nr_bytes, nr, iov->bytes, iov->addr);
- nr_bytes += vec.bytes;
+ nr_bytes += iov->bytes;
for (j = 0; j < nr; j++) {
- unsigned int offset = vec.addr & ~PAGE_MASK;
+ unsigned int offset = iov->addr & ~PAGE_MASK;
struct scatterlist *sg;
sg = &op->op_sg[op->op_nents + j];
sg_set_page(sg, pages[j],
- min_t(unsigned int, vec.bytes, PAGE_SIZE - offset),
+ min_t(unsigned int, iov->bytes, PAGE_SIZE - offset),
offset);
- rdsdebug("RDS: sg->offset %x sg->len %x vec.addr %llx vec.bytes %llu\n",
- sg->offset, sg->length, vec.addr, vec.bytes);
+ rdsdebug("RDS: sg->offset %x sg->len %x iov->addr %llx iov->bytes %llu\n",
+ sg->offset, sg->length, iov->addr, iov->bytes);
- vec.addr += sg->length;
- vec.bytes -= sg->length;
+ iov->addr += sg->length;
+ iov->bytes -= sg->length;
}
op->op_nents += nr;
@@ -655,13 +692,14 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
}
op->op_bytes = nr_bytes;
- ret = 0;
out:
+ if (iovs != iovstack)
+ sock_kfree_s(rds_rs_to_sk(rs), iovs, iov_size);
kfree(pages);
if (ret)
rds_rdma_free_op(op);
-
- rds_stats_inc(s_send_rdma);
+ else
+ rds_stats_inc(s_send_rdma);
return ret;
}
@@ -773,6 +811,10 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
rm->atomic.op_active = 1;
rm->atomic.op_recverr = rs->rs_recverr;
rm->atomic.op_sg = rds_message_alloc_sgs(rm, 1);
+ if (!rm->atomic.op_sg) {
+ ret = -ENOMEM;
+ goto err;
+ }
/* verify 8 byte-aligned */
if (args->local_addr & 0x7) {
diff --git a/net/rds/send.c b/net/rds/send.c
index 0bc9db17a87d..35b9c2e9caf1 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -973,6 +973,10 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
/* Attach data to the rm */
if (payload_len) {
rm->data.op_sg = rds_message_alloc_sgs(rm, ceil(payload_len, PAGE_SIZE));
+ if (!rm->data.op_sg) {
+ ret = -ENOMEM;
+ goto out;
+ }
ret = rds_message_copy_from_user(rm, msg->msg_iov, payload_len);
if (ret)
goto out;
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 08a8c6cf2d10..8e0a32001c90 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -221,7 +221,13 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
static void rds_tcp_conn_free(void *arg)
{
struct rds_tcp_connection *tc = arg;
+ unsigned long flags;
rdsdebug("freeing tc %p\n", tc);
+
+ spin_lock_irqsave(&rds_tcp_conn_lock, flags);
+ list_del(&tc->t_tcp_node);
+ spin_unlock_irqrestore(&rds_tcp_conn_lock, flags);
+
kmem_cache_free(rds_tcp_conn_slab, tc);
}
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index efd4f95fd050..f23d9155b1ef 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -268,6 +268,10 @@ static int basic_dump(struct tcf_proto *tp, unsigned long fh,
goto nla_put_failure;
nla_nest_end(skb, nest);
+
+ if (tcf_exts_dump_stats(skb, &f->exts, &basic_ext_map) < 0)
+ goto nla_put_failure;
+
return skb->len;
nla_put_failure:
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 37dff78e9cb1..d49c40fb7e09 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -34,8 +34,6 @@ struct cgroup_subsys net_cls_subsys = {
.populate = cgrp_populate,
#ifdef CONFIG_NET_CLS_CGROUP
.subsys_id = net_cls_subsys_id,
-#else
-#define net_cls_subsys_id net_cls_subsys.subsys_id
#endif
.module = THIS_MODULE,
};
diff --git a/net/sched/em_text.c b/net/sched/em_text.c
index 763253257411..ea8f566e720c 100644
--- a/net/sched/em_text.c
+++ b/net/sched/em_text.c
@@ -103,7 +103,8 @@ retry:
static void em_text_destroy(struct tcf_proto *tp, struct tcf_ematch *m)
{
- textsearch_destroy(EM_TEXT_PRIV(m)->config);
+ if (EM_TEXT_PRIV(m) && EM_TEXT_PRIV(m)->config)
+ textsearch_destroy(EM_TEXT_PRIV(m)->config);
}
static int em_text_dump(struct sk_buff *skb, struct tcf_ematch *m)
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 1ef29c74d85e..e58f9476f29c 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -92,7 +92,7 @@ static struct sctp_af *sctp_af_v6_specific;
struct kmem_cache *sctp_chunk_cachep __read_mostly;
struct kmem_cache *sctp_bucket_cachep __read_mostly;
-int sysctl_sctp_mem[3];
+long sysctl_sctp_mem[3];
int sysctl_sctp_rmem[3];
int sysctl_sctp_wmem[3];
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index e34ca9cc1167..6bd554323a34 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -111,12 +111,12 @@ static void sctp_sock_migrate(struct sock *, struct sock *,
static char *sctp_hmac_alg = SCTP_COOKIE_HMAC_ALG;
extern struct kmem_cache *sctp_bucket_cachep;
-extern int sysctl_sctp_mem[3];
+extern long sysctl_sctp_mem[3];
extern int sysctl_sctp_rmem[3];
extern int sysctl_sctp_wmem[3];
static int sctp_memory_pressure;
-static atomic_t sctp_memory_allocated;
+static atomic_long_t sctp_memory_allocated;
struct percpu_counter sctp_sockets_allocated;
static void sctp_enter_memory_pressure(struct sock *sk)
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 832590bbe0c0..50cb57f0919e 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -54,7 +54,7 @@ static int sack_timer_max = 500;
static int addr_scope_max = 3; /* check sctp_scope_policy_t in include/net/sctp/constants.h for max entries */
static int rwnd_scale_max = 16;
-extern int sysctl_sctp_mem[3];
+extern long sysctl_sctp_mem[3];
extern int sysctl_sctp_rmem[3];
extern int sysctl_sctp_wmem[3];
@@ -203,7 +203,7 @@ static ctl_table sctp_table[] = {
.data = &sysctl_sctp_mem,
.maxlen = sizeof(sysctl_sctp_mem),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_doulongvec_minmax
},
{
.procname = "sctp_rmem",
diff --git a/net/socket.c b/net/socket.c
index abf3e2561521..2808b4db46ee 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1652,6 +1652,8 @@ SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
struct iovec iov;
int fput_needed;
+ if (len > INT_MAX)
+ len = INT_MAX;
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (!sock)
goto out;
@@ -1709,6 +1711,8 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
int err, err2;
int fput_needed;
+ if (size > INT_MAX)
+ size = INT_MAX;
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (!sock)
goto out;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 33217fc3d697..e9f0d5004483 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -396,6 +396,7 @@ static int get_name(struct socket *sock, struct sockaddr *uaddr,
struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
struct tipc_sock *tsock = tipc_sk(sock->sk);
+ memset(addr, 0, sizeof(*addr));
if (peer) {
if ((sock->state != SS_CONNECTED) &&
((peer != 2) || (sock->state != SS_DISCONNECTING)))
diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c
index 771bab00754b..3a8c4c419cd4 100644
--- a/net/x25/x25_facilities.c
+++ b/net/x25/x25_facilities.c
@@ -134,15 +134,15 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities,
case X25_FAC_CLASS_D:
switch (*p) {
case X25_FAC_CALLING_AE:
- if (p[1] > X25_MAX_DTE_FACIL_LEN)
- break;
+ if (p[1] > X25_MAX_DTE_FACIL_LEN || p[1] <= 1)
+ return 0;
dte_facs->calling_len = p[2];
memcpy(dte_facs->calling_ae, &p[3], p[1] - 1);
*vc_fac_mask |= X25_MASK_CALLING_AE;
break;
case X25_FAC_CALLED_AE:
- if (p[1] > X25_MAX_DTE_FACIL_LEN)
- break;
+ if (p[1] > X25_MAX_DTE_FACIL_LEN || p[1] <= 1)
+ return 0;
dte_facs->called_len = p[2];
memcpy(dte_facs->called_ae, &p[3], p[1] - 1);
*vc_fac_mask |= X25_MASK_CALLED_AE;
diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c
index 63178961efac..f729f022be69 100644
--- a/net/x25/x25_in.c
+++ b/net/x25/x25_in.c
@@ -119,6 +119,8 @@ static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametyp
&x25->vc_facil_mask);
if (len > 0)
skb_pull(skb, len);
+ else
+ return -1;
/*
* Copy any Call User Data.
*/