summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorOtavio Salvador <otavio@ossystems.com.br>2016-10-05 16:23:28 -0300
committerOtavio Salvador <otavio@ossystems.com.br>2016-10-05 16:23:28 -0300
commita7f858238e7c8e8c4b9040aac0eb28bb0b6ab9dc (patch)
treef87a76eb101d6b9ea2016c404982991345d95dc8 /net
parent5bc73053cc0a6a0e5c5f953f8b774854316b87a9 (diff)
parent04cb720142764ebf3786eba1feb8fc4b6ef87fcf (diff)
Merge tag 'v4.1.33' into 4.1-2.0.x-imx
Linux 4.1.33 * tag 'v4.1.33': (1760 commits) Linux 4.1.33 Revert "ARC: mm: don't loose PTE_SPECIAL in pte_modify()" x86/AMD: Apply erratum 665 on machines without a BIOS fix x86/paravirt: Do not trace _paravirt_ident_*() functions ovl: listxattr: use strnlen() ovl: remove posix_acl_default from workdir serial: 8250: added acces i/o products quad and octal serial cards sysfs: correctly handle read offset on PREALLOC attrs sysfs: correctly handle short reads on PREALLOC attrs. kernfs: don't depend on d_find_any_alias() when generating notifications dm crypt: fix free of bad values after tfm allocation failure dm crypt: fix error with too large bios dm log writes: fix check of kthread_run() return value dm log writes: fix bug with too large bios dm log writes: move IO accounting earlier to fix error path NFSv4.x: Fix a refcount leak in nfs_callback_up_net xfs: prevent dropping ioend completions during buftarg wait xfs: fix superblock inprogress check USB: serial: option: add WeTelecom 0x6802 and 0x6803 products USB: avoid left shift by -1 ... Signed-off-by: Otavio Salvador <otavio@ossystems.com.br>
Diffstat (limited to 'net')
-rw-r--r--net/ax25/af_ax25.c3
-rw-r--r--net/ax25/ax25_ip.c15
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c10
-rw-r--r--net/batman-adv/hard-interface.h12
-rw-r--r--net/batman-adv/network-coding.c19
-rw-r--r--net/batman-adv/originator.c151
-rw-r--r--net/batman-adv/originator.h1
-rw-r--r--net/batman-adv/routing.c9
-rw-r--r--net/batman-adv/send.c6
-rw-r--r--net/batman-adv/soft-interface.c8
-rw-r--r--net/batman-adv/translation-table.c28
-rw-r--r--net/bluetooth/l2cap_sock.c2
-rw-r--r--net/bluetooth/sco.c3
-rw-r--r--net/bridge/br.c3
-rw-r--r--net/bridge/br_device.c8
-rw-r--r--net/bridge/br_ioctl.c5
-rw-r--r--net/bridge/br_stp_if.c5
-rw-r--r--net/ceph/messenger.c91
-rw-r--r--net/ceph/osdmap.c156
-rw-r--r--net/core/dev.c5
-rw-r--r--net/core/dst.c3
-rw-r--r--net/core/filter.c5
-rw-r--r--net/core/flow_dissector.c13
-rw-r--r--net/core/rtnetlink.c22
-rw-r--r--net/core/scm.c7
-rw-r--r--net/core/skbuff.c8
-rw-r--r--net/core/sock.c5
-rw-r--r--net/core/sysctl_net_core.c10
-rw-r--r--net/dccp/minisocks.c4
-rw-r--r--net/decnet/af_decnet.c3
-rw-r--r--net/decnet/dn_route.c9
-rw-r--r--net/ipv4/af_inet.c3
-rw-r--r--net/ipv4/devinet.c6
-rw-r--r--net/ipv4/fib_frontend.c10
-rw-r--r--net/ipv4/fou.c3
-rw-r--r--net/ipv4/igmp.c3
-rw-r--r--net/ipv4/inet_timewait_sock.c16
-rw-r--r--net/ipv4/ip_output.c3
-rw-r--r--net/ipv4/ip_sockglue.c2
-rw-r--r--net/ipv4/ip_tunnel.c3
-rw-r--r--net/ipv4/netfilter/arp_tables.c336
-rw-r--r--net/ipv4/netfilter/ip_tables.c371
-rw-r--r--net/ipv4/netfilter/nf_nat_masquerade_ipv4.c12
-rw-r--r--net/ipv4/ping.c4
-rw-r--r--net/ipv4/raw.c4
-rw-r--r--net/ipv4/route.c89
-rw-r--r--net/ipv4/tcp.c16
-rw-r--r--net/ipv4/tcp_cubic.c22
-rw-r--r--net/ipv4/tcp_input.c54
-rw-r--r--net/ipv4/tcp_ipv4.c18
-rw-r--r--net/ipv4/tcp_metrics.c2
-rw-r--r--net/ipv4/tcp_minisocks.c13
-rw-r--r--net/ipv4/tcp_output.c29
-rw-r--r--net/ipv4/tcp_yeah.c2
-rw-r--r--net/ipv4/udp.c16
-rw-r--r--net/ipv4/udp_tunnel.c2
-rw-r--r--net/ipv4/xfrm4_policy.c46
-rw-r--r--net/ipv6/addrconf.c36
-rw-r--r--net/ipv6/addrlabel.c2
-rw-r--r--net/ipv6/af_inet6.c3
-rw-r--r--net/ipv6/datagram.c3
-rw-r--r--net/ipv6/exthdrs_core.c6
-rw-r--r--net/ipv6/ip6_flowlabel.c5
-rw-r--r--net/ipv6/ip6_gre.c10
-rw-r--r--net/ipv6/ip6_output.c11
-rw-r--r--net/ipv6/ip6_tunnel.c4
-rw-r--r--net/ipv6/mcast.c3
-rw-r--r--net/ipv6/ndisc.c16
-rw-r--r--net/ipv6/netfilter/ip6_tables.c365
-rw-r--r--net/ipv6/route.c8
-rw-r--r--net/ipv6/tcp_ipv6.c9
-rw-r--r--net/ipv6/udp.c10
-rw-r--r--net/ipv6/xfrm6_mode_tunnel.c2
-rw-r--r--net/ipv6/xfrm6_policy.c53
-rw-r--r--net/irda/af_irda.c10
-rw-r--r--net/iucv/af_iucv.c3
-rw-r--r--net/l2tp/l2tp_ip.c8
-rw-r--r--net/l2tp/l2tp_ip6.c8
-rw-r--r--net/l2tp/l2tp_netlink.c18
-rw-r--r--net/llc/af_llc.c1
-rw-r--r--net/mac80211/agg-rx.c2
-rw-r--r--net/mac80211/cfg.c2
-rw-r--r--net/mac80211/ibss.c23
-rw-r--r--net/mac80211/ieee80211_i.h2
-rw-r--r--net/mac80211/iface.c3
-rw-r--r--net/mac80211/mesh.c15
-rw-r--r--net/mac80211/mesh.h4
-rw-r--r--net/mac80211/mlme.c2
-rw-r--r--net/mac80211/rc80211_minstrel.c2
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c5
-rw-r--r--net/mac80211/rx.c42
-rw-r--r--net/mac80211/scan.c12
-rw-r--r--net/mac80211/sta_info.c37
-rw-r--r--net/mac80211/tx.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c37
-rw-r--r--net/netfilter/ipvs/ip_vs_pe_sip.c2
-rw-r--r--net/netfilter/nf_conntrack_core.c4
-rw-r--r--net/netfilter/x_tables.c295
-rw-r--r--net/netlabel/netlabel_kapi.c12
-rw-r--r--net/netlink/af_netlink.c9
-rw-r--r--net/openvswitch/actions.c4
-rw-r--r--net/openvswitch/datapath.c5
-rw-r--r--net/packet/af_packet.c38
-rw-r--r--net/phonet/af_phonet.c4
-rw-r--r--net/rfkill/core.c16
-rw-r--r--net/sched/sch_api.c10
-rw-r--r--net/sched/sch_cbq.c12
-rw-r--r--net/sched/sch_choke.c6
-rw-r--r--net/sched/sch_codel.c10
-rw-r--r--net/sched/sch_drr.c9
-rw-r--r--net/sched/sch_dsmark.c11
-rw-r--r--net/sched/sch_fq.c4
-rw-r--r--net/sched/sch_fq_codel.c17
-rw-r--r--net/sched/sch_generic.c9
-rw-r--r--net/sched/sch_hfsc.c9
-rw-r--r--net/sched/sch_hhf.c10
-rw-r--r--net/sched/sch_htb.c24
-rw-r--r--net/sched/sch_multiq.c16
-rw-r--r--net/sched/sch_netem.c74
-rw-r--r--net/sched/sch_pie.c5
-rw-r--r--net/sched/sch_prio.c15
-rw-r--r--net/sched/sch_qfq.c9
-rw-r--r--net/sched/sch_red.c10
-rw-r--r--net/sched/sch_sfb.c10
-rw-r--r--net/sched/sch_sfq.c16
-rw-r--r--net/sched/sch_tbf.c15
-rw-r--r--net/sctp/ipv6.c10
-rw-r--r--net/sctp/protocol.c47
-rw-r--r--net/sctp/sm_make_chunk.c4
-rw-r--r--net/sctp/sm_sideeffect.c42
-rw-r--r--net/sctp/sm_statefuns.c6
-rw-r--r--net/sctp/socket.c18
-rw-r--r--net/sctp/sysctl.c2
-rw-r--r--net/socket.c39
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c4
-rw-r--r--net/sunrpc/cache.c8
-rw-r--r--net/sunrpc/clnt.c12
-rw-r--r--net/switchdev/switchdev.c15
-rw-r--r--net/tipc/netlink_compat.c2
-rw-r--r--net/tipc/socket.c3
-rw-r--r--net/tipc/subscr.c7
-rw-r--r--net/unix/af_unix.c113
-rw-r--r--net/unix/diag.c2
-rw-r--r--net/unix/garbage.c17
-rw-r--r--net/vmw_vsock/af_vsock.c21
-rw-r--r--net/wireless/core.c2
-rw-r--r--net/wireless/nl80211.c2
-rw-r--r--net/wireless/wext-core.c77
-rw-r--r--net/x25/x25_facilities.c1
-rw-r--r--net/xfrm/xfrm_input.c3
-rw-r--r--net/xfrm/xfrm_output.c2
-rw-r--r--net/xfrm/xfrm_policy.c38
152 files changed, 2095 insertions, 1594 deletions
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 330c1f4a5a0b..a64884bbf0ce 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -806,6 +806,9 @@ static int ax25_create(struct net *net, struct socket *sock, int protocol,
struct sock *sk;
ax25_cb *ax25;
+ if (protocol < 0 || protocol > SK_PROTOCOL_MAX)
+ return -EINVAL;
+
if (!net_eq(net, &init_net))
return -EAFNOSUPPORT;
diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c
index 7c646bb2c6f7..98d206b22653 100644
--- a/net/ax25/ax25_ip.c
+++ b/net/ax25/ax25_ip.c
@@ -229,8 +229,23 @@ netdev_tx_t ax25_ip_xmit(struct sk_buff *skb)
}
#endif
+static bool ax25_validate_header(const char *header, unsigned int len)
+{
+ ax25_digi digi;
+
+ if (!len)
+ return false;
+
+ if (header[0])
+ return true;
+
+ return ax25_addr_parse(header + 1, len - 1, NULL, NULL, &digi, NULL,
+ NULL);
+}
+
const struct header_ops ax25_header_ops = {
.create = ax25_hard_header,
+ .validate = ax25_validate_header,
};
EXPORT_SYMBOL(ax25_header_ops);
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index ac4b96eccade..bd3357e69c5c 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -112,21 +112,17 @@ batadv_backbone_gw_free_ref(struct batadv_bla_backbone_gw *backbone_gw)
}
/* finally deinitialize the claim */
-static void batadv_claim_free_rcu(struct rcu_head *rcu)
+static void batadv_claim_release(struct batadv_bla_claim *claim)
{
- struct batadv_bla_claim *claim;
-
- claim = container_of(rcu, struct batadv_bla_claim, rcu);
-
batadv_backbone_gw_free_ref(claim->backbone_gw);
- kfree(claim);
+ kfree_rcu(claim, rcu);
}
/* free a claim, call claim_free_rcu if its the last reference */
static void batadv_claim_free_ref(struct batadv_bla_claim *claim)
{
if (atomic_dec_and_test(&claim->refcount))
- call_rcu(&claim->rcu, batadv_claim_free_rcu);
+ batadv_claim_release(claim);
}
/**
diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h
index 1918cd50b62e..b6bff9c1877a 100644
--- a/net/batman-adv/hard-interface.h
+++ b/net/batman-adv/hard-interface.h
@@ -64,18 +64,6 @@ batadv_hardif_free_ref(struct batadv_hard_iface *hard_iface)
call_rcu(&hard_iface->rcu, batadv_hardif_free_rcu);
}
-/**
- * batadv_hardif_free_ref_now - decrement the hard interface refcounter and
- * possibly free it (without rcu callback)
- * @hard_iface: the hard interface to free
- */
-static inline void
-batadv_hardif_free_ref_now(struct batadv_hard_iface *hard_iface)
-{
- if (atomic_dec_and_test(&hard_iface->refcount))
- batadv_hardif_free_rcu(&hard_iface->rcu);
-}
-
static inline struct batadv_hard_iface *
batadv_primary_if_get_selected(struct batadv_priv *bat_priv)
{
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index a449195c5b2b..2fbd3a6bde9a 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -175,28 +175,25 @@ void batadv_nc_init_orig(struct batadv_orig_node *orig_node)
}
/**
- * batadv_nc_node_free_rcu - rcu callback to free an nc node and remove
- * its refcount on the orig_node
- * @rcu: rcu pointer of the nc node
+ * batadv_nc_node_release - release nc_node from lists and queue for free after
+ * rcu grace period
+ * @nc_node: the nc node to free
*/
-static void batadv_nc_node_free_rcu(struct rcu_head *rcu)
+static void batadv_nc_node_release(struct batadv_nc_node *nc_node)
{
- struct batadv_nc_node *nc_node;
-
- nc_node = container_of(rcu, struct batadv_nc_node, rcu);
batadv_orig_node_free_ref(nc_node->orig_node);
- kfree(nc_node);
+ kfree_rcu(nc_node, rcu);
}
/**
- * batadv_nc_node_free_ref - decrements the nc node refcounter and possibly
- * frees it
+ * batadv_nc_node_free_ref - decrement the nc node refcounter and possibly
+ * release it
* @nc_node: the nc node to free
*/
static void batadv_nc_node_free_ref(struct batadv_nc_node *nc_node)
{
if (atomic_dec_and_test(&nc_node->refcount))
- call_rcu(&nc_node->rcu, batadv_nc_node_free_rcu);
+ batadv_nc_node_release(nc_node);
}
/**
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index dfae97408628..77ea1d4de2ba 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -150,86 +150,58 @@ err:
}
/**
- * batadv_neigh_ifinfo_free_rcu - free the neigh_ifinfo object
- * @rcu: rcu pointer of the neigh_ifinfo object
- */
-static void batadv_neigh_ifinfo_free_rcu(struct rcu_head *rcu)
-{
- struct batadv_neigh_ifinfo *neigh_ifinfo;
-
- neigh_ifinfo = container_of(rcu, struct batadv_neigh_ifinfo, rcu);
-
- if (neigh_ifinfo->if_outgoing != BATADV_IF_DEFAULT)
- batadv_hardif_free_ref_now(neigh_ifinfo->if_outgoing);
-
- kfree(neigh_ifinfo);
-}
-
-/**
- * batadv_neigh_ifinfo_free_now - decrement the refcounter and possibly free
- * the neigh_ifinfo (without rcu callback)
+ * batadv_neigh_ifinfo_release - release neigh_ifinfo from lists and queue for
+ * free after rcu grace period
* @neigh_ifinfo: the neigh_ifinfo object to release
*/
static void
-batadv_neigh_ifinfo_free_ref_now(struct batadv_neigh_ifinfo *neigh_ifinfo)
+batadv_neigh_ifinfo_release(struct batadv_neigh_ifinfo *neigh_ifinfo)
{
- if (atomic_dec_and_test(&neigh_ifinfo->refcount))
- batadv_neigh_ifinfo_free_rcu(&neigh_ifinfo->rcu);
+ if (neigh_ifinfo->if_outgoing != BATADV_IF_DEFAULT)
+ batadv_hardif_free_ref(neigh_ifinfo->if_outgoing);
+
+ kfree_rcu(neigh_ifinfo, rcu);
}
/**
- * batadv_neigh_ifinfo_free_ref - decrement the refcounter and possibly free
+ * batadv_neigh_ifinfo_free_ref - decrement the refcounter and possibly release
* the neigh_ifinfo
* @neigh_ifinfo: the neigh_ifinfo object to release
*/
void batadv_neigh_ifinfo_free_ref(struct batadv_neigh_ifinfo *neigh_ifinfo)
{
if (atomic_dec_and_test(&neigh_ifinfo->refcount))
- call_rcu(&neigh_ifinfo->rcu, batadv_neigh_ifinfo_free_rcu);
+ batadv_neigh_ifinfo_release(neigh_ifinfo);
}
/**
- * batadv_neigh_node_free_rcu - free the neigh_node
- * @rcu: rcu pointer of the neigh_node
+ * batadv_neigh_node_release - release neigh_node from lists and queue for
+ * free after rcu grace period
+ * @neigh_node: neigh neighbor to free
*/
-static void batadv_neigh_node_free_rcu(struct rcu_head *rcu)
+static void batadv_neigh_node_release(struct batadv_neigh_node *neigh_node)
{
struct hlist_node *node_tmp;
- struct batadv_neigh_node *neigh_node;
struct batadv_neigh_ifinfo *neigh_ifinfo;
- neigh_node = container_of(rcu, struct batadv_neigh_node, rcu);
-
hlist_for_each_entry_safe(neigh_ifinfo, node_tmp,
&neigh_node->ifinfo_list, list) {
- batadv_neigh_ifinfo_free_ref_now(neigh_ifinfo);
+ batadv_neigh_ifinfo_free_ref(neigh_ifinfo);
}
- batadv_hardif_free_ref_now(neigh_node->if_incoming);
+ batadv_hardif_free_ref(neigh_node->if_incoming);
- kfree(neigh_node);
-}
-
-/**
- * batadv_neigh_node_free_ref_now - decrement the neighbors refcounter
- * and possibly free it (without rcu callback)
- * @neigh_node: neigh neighbor to free
- */
-static void
-batadv_neigh_node_free_ref_now(struct batadv_neigh_node *neigh_node)
-{
- if (atomic_dec_and_test(&neigh_node->refcount))
- batadv_neigh_node_free_rcu(&neigh_node->rcu);
+ kfree_rcu(neigh_node, rcu);
}
/**
* batadv_neigh_node_free_ref - decrement the neighbors refcounter
- * and possibly free it
+ * and possibly release it
* @neigh_node: neigh neighbor to free
*/
void batadv_neigh_node_free_ref(struct batadv_neigh_node *neigh_node)
{
if (atomic_dec_and_test(&neigh_node->refcount))
- call_rcu(&neigh_node->rcu, batadv_neigh_node_free_rcu);
+ batadv_neigh_node_release(neigh_node);
}
/**
@@ -495,108 +467,99 @@ batadv_neigh_node_get(const struct batadv_orig_node *orig_node,
}
/**
- * batadv_orig_ifinfo_free_rcu - free the orig_ifinfo object
- * @rcu: rcu pointer of the orig_ifinfo object
+ * batadv_orig_ifinfo_release - release orig_ifinfo from lists and queue for
+ * free after rcu grace period
+ * @orig_ifinfo: the orig_ifinfo object to release
*/
-static void batadv_orig_ifinfo_free_rcu(struct rcu_head *rcu)
+static void batadv_orig_ifinfo_release(struct batadv_orig_ifinfo *orig_ifinfo)
{
- struct batadv_orig_ifinfo *orig_ifinfo;
struct batadv_neigh_node *router;
- orig_ifinfo = container_of(rcu, struct batadv_orig_ifinfo, rcu);
-
if (orig_ifinfo->if_outgoing != BATADV_IF_DEFAULT)
- batadv_hardif_free_ref_now(orig_ifinfo->if_outgoing);
+ batadv_hardif_free_ref(orig_ifinfo->if_outgoing);
/* this is the last reference to this object */
router = rcu_dereference_protected(orig_ifinfo->router, true);
if (router)
- batadv_neigh_node_free_ref_now(router);
- kfree(orig_ifinfo);
+ batadv_neigh_node_free_ref(router);
+
+ kfree_rcu(orig_ifinfo, rcu);
}
/**
- * batadv_orig_ifinfo_free_ref - decrement the refcounter and possibly free
- * the orig_ifinfo (without rcu callback)
+ * batadv_orig_ifinfo_free_ref - decrement the refcounter and possibly release
+ * the orig_ifinfo
* @orig_ifinfo: the orig_ifinfo object to release
*/
-static void
-batadv_orig_ifinfo_free_ref_now(struct batadv_orig_ifinfo *orig_ifinfo)
+void batadv_orig_ifinfo_free_ref(struct batadv_orig_ifinfo *orig_ifinfo)
{
if (atomic_dec_and_test(&orig_ifinfo->refcount))
- batadv_orig_ifinfo_free_rcu(&orig_ifinfo->rcu);
+ batadv_orig_ifinfo_release(orig_ifinfo);
}
/**
- * batadv_orig_ifinfo_free_ref - decrement the refcounter and possibly free
- * the orig_ifinfo
- * @orig_ifinfo: the orig_ifinfo object to release
+ * batadv_orig_node_free_rcu - free the orig_node
+ * @rcu: rcu pointer of the orig_node
*/
-void batadv_orig_ifinfo_free_ref(struct batadv_orig_ifinfo *orig_ifinfo)
+static void batadv_orig_node_free_rcu(struct rcu_head *rcu)
{
- if (atomic_dec_and_test(&orig_ifinfo->refcount))
- call_rcu(&orig_ifinfo->rcu, batadv_orig_ifinfo_free_rcu);
+ struct batadv_orig_node *orig_node;
+
+ orig_node = container_of(rcu, struct batadv_orig_node, rcu);
+
+ batadv_mcast_purge_orig(orig_node);
+
+ batadv_frag_purge_orig(orig_node, NULL);
+
+ if (orig_node->bat_priv->bat_algo_ops->bat_orig_free)
+ orig_node->bat_priv->bat_algo_ops->bat_orig_free(orig_node);
+
+ kfree(orig_node->tt_buff);
+ kfree(orig_node);
}
-static void batadv_orig_node_free_rcu(struct rcu_head *rcu)
+/**
+ * batadv_orig_node_release - release orig_node from lists and queue for
+ * free after rcu grace period
+ * @orig_node: the orig node to free
+ */
+static void batadv_orig_node_release(struct batadv_orig_node *orig_node)
{
struct hlist_node *node_tmp;
struct batadv_neigh_node *neigh_node;
- struct batadv_orig_node *orig_node;
struct batadv_orig_ifinfo *orig_ifinfo;
- orig_node = container_of(rcu, struct batadv_orig_node, rcu);
-
spin_lock_bh(&orig_node->neigh_list_lock);
/* for all neighbors towards this originator ... */
hlist_for_each_entry_safe(neigh_node, node_tmp,
&orig_node->neigh_list, list) {
hlist_del_rcu(&neigh_node->list);
- batadv_neigh_node_free_ref_now(neigh_node);
+ batadv_neigh_node_free_ref(neigh_node);
}
hlist_for_each_entry_safe(orig_ifinfo, node_tmp,
&orig_node->ifinfo_list, list) {
hlist_del_rcu(&orig_ifinfo->list);
- batadv_orig_ifinfo_free_ref_now(orig_ifinfo);
+ batadv_orig_ifinfo_free_ref(orig_ifinfo);
}
spin_unlock_bh(&orig_node->neigh_list_lock);
- batadv_mcast_purge_orig(orig_node);
-
/* Free nc_nodes */
batadv_nc_purge_orig(orig_node->bat_priv, orig_node, NULL);
- batadv_frag_purge_orig(orig_node, NULL);
-
- if (orig_node->bat_priv->bat_algo_ops->bat_orig_free)
- orig_node->bat_priv->bat_algo_ops->bat_orig_free(orig_node);
-
- kfree(orig_node->tt_buff);
- kfree(orig_node);
+ call_rcu(&orig_node->rcu, batadv_orig_node_free_rcu);
}
/**
* batadv_orig_node_free_ref - decrement the orig node refcounter and possibly
- * schedule an rcu callback for freeing it
+ * release it
* @orig_node: the orig node to free
*/
void batadv_orig_node_free_ref(struct batadv_orig_node *orig_node)
{
if (atomic_dec_and_test(&orig_node->refcount))
- call_rcu(&orig_node->rcu, batadv_orig_node_free_rcu);
-}
-
-/**
- * batadv_orig_node_free_ref_now - decrement the orig node refcounter and
- * possibly free it (without rcu callback)
- * @orig_node: the orig node to free
- */
-void batadv_orig_node_free_ref_now(struct batadv_orig_node *orig_node)
-{
- if (atomic_dec_and_test(&orig_node->refcount))
- batadv_orig_node_free_rcu(&orig_node->rcu);
+ batadv_orig_node_release(orig_node);
}
void batadv_originator_free(struct batadv_priv *bat_priv)
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index aa4a43696295..28b751ad549c 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -25,7 +25,6 @@ int batadv_originator_init(struct batadv_priv *bat_priv);
void batadv_originator_free(struct batadv_priv *bat_priv);
void batadv_purge_orig_ref(struct batadv_priv *bat_priv);
void batadv_orig_node_free_ref(struct batadv_orig_node *orig_node);
-void batadv_orig_node_free_ref_now(struct batadv_orig_node *orig_node);
struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv,
const uint8_t *addr);
struct batadv_neigh_node *
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index da83982bf974..f77dafc114a6 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -88,6 +88,15 @@ static void _batadv_update_route(struct batadv_priv *bat_priv,
neigh_node = NULL;
spin_lock_bh(&orig_node->neigh_list_lock);
+ /* curr_router used earlier may not be the current orig_ifinfo->router
+ * anymore because it was dereferenced outside of the neigh_list_lock
+ * protected region. After the new best neighbor has replace the current
+ * best neighbor the reference counter needs to decrease. Consequently,
+ * the code needs to ensure the curr_router variable contains a pointer
+ * to the replaced best neighbor.
+ */
+ curr_router = rcu_dereference_protected(orig_ifinfo->router, true);
+
rcu_assign_pointer(orig_ifinfo->router, neigh_node);
spin_unlock_bh(&orig_node->neigh_list_lock);
batadv_orig_ifinfo_free_ref(orig_ifinfo);
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 3d64ed20c393..6004c2de7b2a 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -611,6 +611,9 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
if (pending) {
hlist_del(&forw_packet->list);
+ if (!forw_packet->own)
+ atomic_inc(&bat_priv->bcast_queue_left);
+
batadv_forw_packet_free(forw_packet);
}
}
@@ -638,6 +641,9 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
if (pending) {
hlist_del(&forw_packet->list);
+ if (!forw_packet->own)
+ atomic_inc(&bat_priv->batman_queue_left);
+
batadv_forw_packet_free(forw_packet);
}
}
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index a0b1b861b968..b38a8aa3cce8 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -377,11 +377,17 @@ void batadv_interface_rx(struct net_device *soft_iface,
*/
nf_reset(skb);
+ if (unlikely(!pskb_may_pull(skb, ETH_HLEN)))
+ goto dropped;
+
vid = batadv_get_vid(skb, 0);
ethhdr = eth_hdr(skb);
switch (ntohs(ethhdr->h_proto)) {
case ETH_P_8021Q:
+ if (!pskb_may_pull(skb, VLAN_ETH_HLEN))
+ goto dropped;
+
vhdr = (struct vlan_ethhdr *)skb->data;
if (vhdr->h_vlan_encapsulated_proto != ethertype)
@@ -393,8 +399,6 @@ void batadv_interface_rx(struct net_device *soft_iface,
}
/* skb->dev & skb->pkt_type are set here */
- if (unlikely(!pskb_may_pull(skb, ETH_HLEN)))
- goto dropped;
skb->protocol = eth_type_trans(skb, soft_iface);
/* should not be necessary anymore as we use skb_pull_rcsum()
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 4f2a9d2c56db..ddd62c9af5b4 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -219,20 +219,6 @@ int batadv_tt_global_hash_count(struct batadv_priv *bat_priv,
return count;
}
-static void batadv_tt_orig_list_entry_free_rcu(struct rcu_head *rcu)
-{
- struct batadv_tt_orig_list_entry *orig_entry;
-
- orig_entry = container_of(rcu, struct batadv_tt_orig_list_entry, rcu);
-
- /* We are in an rcu callback here, therefore we cannot use
- * batadv_orig_node_free_ref() and its call_rcu():
- * An rcu_barrier() wouldn't wait for that to finish
- */
- batadv_orig_node_free_ref_now(orig_entry->orig_node);
- kfree(orig_entry);
-}
-
/**
* batadv_tt_local_size_mod - change the size by v of the local table identified
* by vid
@@ -328,13 +314,25 @@ static void batadv_tt_global_size_dec(struct batadv_orig_node *orig_node,
batadv_tt_global_size_mod(orig_node, vid, -1);
}
+/**
+ * batadv_tt_orig_list_entry_release - release tt orig entry from lists and
+ * queue for free after rcu grace period
+ * @orig_entry: tt orig entry to be free'd
+ */
+static void
+batadv_tt_orig_list_entry_release(struct batadv_tt_orig_list_entry *orig_entry)
+{
+ batadv_orig_node_free_ref(orig_entry->orig_node);
+ kfree_rcu(orig_entry, rcu);
+}
+
static void
batadv_tt_orig_list_entry_free_ref(struct batadv_tt_orig_list_entry *orig_entry)
{
if (!atomic_dec_and_test(&orig_entry->refcount))
return;
- call_rcu(&orig_entry->rcu, batadv_tt_orig_list_entry_free_rcu);
+ batadv_tt_orig_list_entry_release(orig_entry);
}
/**
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index a7278f05eafb..289c96d3e47d 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -927,7 +927,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
break;
}
- if (get_user(opt, (u32 __user *) optval)) {
+ if (get_user(opt, (u16 __user *) optval)) {
err = -EFAULT;
break;
}
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 4322c833e748..8611bc7bdd32 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -520,6 +520,9 @@ static int sco_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_le
if (!addr || addr->sa_family != AF_BLUETOOTH)
return -EINVAL;
+ if (addr_len < sizeof(struct sockaddr_sco))
+ return -EINVAL;
+
lock_sock(sk);
if (sk->sk_state != BT_OPEN) {
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 02c24cf63c34..c72e01cf09d0 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -121,6 +121,7 @@ static struct notifier_block br_device_notifier = {
.notifier_call = br_device_event
};
+/* called with RTNL */
static int br_netdev_switch_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
@@ -130,7 +131,6 @@ static int br_netdev_switch_event(struct notifier_block *unused,
struct netdev_switch_notifier_fdb_info *fdb_info;
int err = NOTIFY_DONE;
- rtnl_lock();
p = br_port_get_rtnl(dev);
if (!p)
goto out;
@@ -155,7 +155,6 @@ static int br_netdev_switch_event(struct notifier_block *unused,
}
out:
- rtnl_unlock();
return err;
}
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 4ff77a16956c..3d6c8e222391 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -28,6 +28,8 @@
const struct nf_br_ops __rcu *nf_br_ops __read_mostly;
EXPORT_SYMBOL_GPL(nf_br_ops);
+static struct lock_class_key bridge_netdev_addr_lock_key;
+
/* net device transmit always called with BH disabled */
netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
{
@@ -87,6 +89,11 @@ out:
return NETDEV_TX_OK;
}
+static void br_set_lockdep_class(struct net_device *dev)
+{
+ lockdep_set_class(&dev->addr_list_lock, &bridge_netdev_addr_lock_key);
+}
+
static int br_dev_init(struct net_device *dev)
{
struct net_bridge *br = netdev_priv(dev);
@@ -99,6 +106,7 @@ static int br_dev_init(struct net_device *dev)
err = br_vlan_init(br);
if (err)
free_percpu(br->stats);
+ br_set_lockdep_class(dev);
return err;
}
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index 8d423bc649b9..f876f707fd9e 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -21,18 +21,19 @@
#include <asm/uaccess.h>
#include "br_private.h"
-/* called with RTNL */
static int get_bridge_ifindices(struct net *net, int *indices, int num)
{
struct net_device *dev;
int i = 0;
- for_each_netdev(net, dev) {
+ rcu_read_lock();
+ for_each_netdev_rcu(net, dev) {
if (i >= num)
break;
if (dev->priv_flags & IFF_EBRIDGE)
indices[i++] = dev->ifindex;
}
+ rcu_read_unlock();
return i;
}
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 7832d07f48f6..ce658abdc2c8 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -128,7 +128,10 @@ static void br_stp_start(struct net_bridge *br)
char *argv[] = { BR_STP_PROG, br->dev->name, "start", NULL };
char *envp[] = { NULL };
- r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC);
+ if (net_eq(dev_net(br->dev), &init_net))
+ r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC);
+ else
+ r = -ENOENT;
spin_lock_bh(&br->lock);
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 967080a9f043..84201c21705e 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -675,6 +675,8 @@ static void reset_connection(struct ceph_connection *con)
}
con->in_seq = 0;
con->in_seq_acked = 0;
+
+ con->out_skip = 0;
}
/*
@@ -774,6 +776,8 @@ static u32 get_global_seq(struct ceph_messenger *msgr, u32 gt)
static void con_out_kvec_reset(struct ceph_connection *con)
{
+ BUG_ON(con->out_skip);
+
con->out_kvec_left = 0;
con->out_kvec_bytes = 0;
con->out_kvec_cur = &con->out_kvec[0];
@@ -782,9 +786,9 @@ static void con_out_kvec_reset(struct ceph_connection *con)
static void con_out_kvec_add(struct ceph_connection *con,
size_t size, void *data)
{
- int index;
+ int index = con->out_kvec_left;
- index = con->out_kvec_left;
+ BUG_ON(con->out_skip);
BUG_ON(index >= ARRAY_SIZE(con->out_kvec));
con->out_kvec[index].iov_len = size;
@@ -793,6 +797,27 @@ static void con_out_kvec_add(struct ceph_connection *con,
con->out_kvec_bytes += size;
}
+/*
+ * Chop off a kvec from the end. Return residual number of bytes for
+ * that kvec, i.e. how many bytes would have been written if the kvec
+ * hadn't been nuked.
+ */
+static int con_out_kvec_skip(struct ceph_connection *con)
+{
+ int off = con->out_kvec_cur - con->out_kvec;
+ int skip = 0;
+
+ if (con->out_kvec_bytes > 0) {
+ skip = con->out_kvec[off + con->out_kvec_left - 1].iov_len;
+ BUG_ON(con->out_kvec_bytes < skip);
+ BUG_ON(!con->out_kvec_left);
+ con->out_kvec_bytes -= skip;
+ con->out_kvec_left--;
+ }
+
+ return skip;
+}
+
#ifdef CONFIG_BLOCK
/*
@@ -1178,6 +1203,13 @@ static bool ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor,
return new_piece;
}
+static size_t sizeof_footer(struct ceph_connection *con)
+{
+ return (con->peer_features & CEPH_FEATURE_MSG_AUTH) ?
+ sizeof(struct ceph_msg_footer) :
+ sizeof(struct ceph_msg_footer_old);
+}
+
static void prepare_message_data(struct ceph_msg *msg, u32 data_len)
{
BUG_ON(!msg);
@@ -1200,7 +1232,6 @@ static void prepare_write_message_footer(struct ceph_connection *con)
m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE;
dout("prepare_write_message_footer %p\n", con);
- con->out_kvec_is_msg = true;
con->out_kvec[v].iov_base = &m->footer;
if (con->peer_features & CEPH_FEATURE_MSG_AUTH) {
if (con->ops->sign_message)
@@ -1228,7 +1259,6 @@ static void prepare_write_message(struct ceph_connection *con)
u32 crc;
con_out_kvec_reset(con);
- con->out_kvec_is_msg = true;
con->out_msg_done = false;
/* Sneak an ack in there first? If we can get it into the same
@@ -1268,18 +1298,19 @@ static void prepare_write_message(struct ceph_connection *con)
/* tag + hdr + front + middle */
con_out_kvec_add(con, sizeof (tag_msg), &tag_msg);
- con_out_kvec_add(con, sizeof (m->hdr), &m->hdr);
+ con_out_kvec_add(con, sizeof(con->out_hdr), &con->out_hdr);
con_out_kvec_add(con, m->front.iov_len, m->front.iov_base);
if (m->middle)
con_out_kvec_add(con, m->middle->vec.iov_len,
m->middle->vec.iov_base);
- /* fill in crc (except data pages), footer */
+ /* fill in hdr crc and finalize hdr */
crc = crc32c(0, &m->hdr, offsetof(struct ceph_msg_header, crc));
con->out_msg->hdr.crc = cpu_to_le32(crc);
- con->out_msg->footer.flags = 0;
+ memcpy(&con->out_hdr, &con->out_msg->hdr, sizeof(con->out_hdr));
+ /* fill in front and middle crc, footer */
crc = crc32c(0, m->front.iov_base, m->front.iov_len);
con->out_msg->footer.front_crc = cpu_to_le32(crc);
if (m->middle) {
@@ -1291,6 +1322,7 @@ static void prepare_write_message(struct ceph_connection *con)
dout("%s front_crc %u middle_crc %u\n", __func__,
le32_to_cpu(con->out_msg->footer.front_crc),
le32_to_cpu(con->out_msg->footer.middle_crc));
+ con->out_msg->footer.flags = 0;
/* is there a data payload? */
con->out_msg->footer.data_crc = 0;
@@ -1485,7 +1517,6 @@ static int write_partial_kvec(struct ceph_connection *con)
}
}
con->out_kvec_left = 0;
- con->out_kvec_is_msg = false;
ret = 1;
out:
dout("write_partial_kvec %p %d left in %d kvecs ret = %d\n", con,
@@ -1577,6 +1608,7 @@ static int write_partial_skip(struct ceph_connection *con)
{
int ret;
+ dout("%s %p %d left\n", __func__, con, con->out_skip);
while (con->out_skip > 0) {
size_t size = min(con->out_skip, (int) PAGE_CACHE_SIZE);
@@ -2301,9 +2333,9 @@ static int read_partial_message(struct ceph_connection *con)
ceph_pr_addr(&con->peer_addr.in_addr),
seq, con->in_seq + 1);
con->in_base_pos = -front_len - middle_len - data_len -
- sizeof(m->footer);
+ sizeof_footer(con);
con->in_tag = CEPH_MSGR_TAG_READY;
- return 0;
+ return 1;
} else if ((s64)seq - (s64)con->in_seq > 1) {
pr_err("read_partial_message bad seq %lld expected %lld\n",
seq, con->in_seq + 1);
@@ -2333,10 +2365,10 @@ static int read_partial_message(struct ceph_connection *con)
/* skip this message */
dout("alloc_msg said skip message\n");
con->in_base_pos = -front_len - middle_len - data_len -
- sizeof(m->footer);
+ sizeof_footer(con);
con->in_tag = CEPH_MSGR_TAG_READY;
con->in_seq++;
- return 0;
+ return 1;
}
BUG_ON(!con->in_msg);
@@ -2493,13 +2525,13 @@ more:
more_kvec:
/* kvec data queued? */
- if (con->out_skip) {
- ret = write_partial_skip(con);
+ if (con->out_kvec_left) {
+ ret = write_partial_kvec(con);
if (ret <= 0)
goto out;
}
- if (con->out_kvec_left) {
- ret = write_partial_kvec(con);
+ if (con->out_skip) {
+ ret = write_partial_skip(con);
if (ret <= 0)
goto out;
}
@@ -3026,16 +3058,31 @@ void ceph_msg_revoke(struct ceph_msg *msg)
ceph_msg_put(msg);
}
if (con->out_msg == msg) {
- dout("%s %p msg %p - was sending\n", __func__, con, msg);
- con->out_msg = NULL;
- if (con->out_kvec_is_msg) {
- con->out_skip = con->out_kvec_bytes;
- con->out_kvec_is_msg = false;
+ BUG_ON(con->out_skip);
+ /* footer */
+ if (con->out_msg_done) {
+ con->out_skip += con_out_kvec_skip(con);
+ } else {
+ BUG_ON(!msg->data_length);
+ if (con->peer_features & CEPH_FEATURE_MSG_AUTH)
+ con->out_skip += sizeof(msg->footer);
+ else
+ con->out_skip += sizeof(msg->old_footer);
}
+ /* data, middle, front */
+ if (msg->data_length)
+ con->out_skip += msg->cursor.total_resid;
+ if (msg->middle)
+ con->out_skip += con_out_kvec_skip(con);
+ con->out_skip += con_out_kvec_skip(con);
+
+ dout("%s %p msg %p - was sending, will write %d skip %d\n",
+ __func__, con, msg, con->out_kvec_bytes, con->out_skip);
msg->hdr.seq = 0;
-
+ con->out_msg = NULL;
ceph_msg_put(msg);
}
+
mutex_unlock(&con->mutex);
}
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 4a3125836b64..ddc3573894b0 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1192,6 +1192,115 @@ struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end)
}
/*
+ * Encoding order is (new_up_client, new_state, new_weight). Need to
+ * apply in the (new_weight, new_state, new_up_client) order, because
+ * an incremental map may look like e.g.
+ *
+ * new_up_client: { osd=6, addr=... } # set osd_state and addr
+ * new_state: { osd=6, xorstate=EXISTS } # clear osd_state
+ */
+static int decode_new_up_state_weight(void **p, void *end,
+ struct ceph_osdmap *map)
+{
+ void *new_up_client;
+ void *new_state;
+ void *new_weight_end;
+ u32 len;
+
+ new_up_client = *p;
+ ceph_decode_32_safe(p, end, len, e_inval);
+ len *= sizeof(u32) + sizeof(struct ceph_entity_addr);
+ ceph_decode_need(p, end, len, e_inval);
+ *p += len;
+
+ new_state = *p;
+ ceph_decode_32_safe(p, end, len, e_inval);
+ len *= sizeof(u32) + sizeof(u8);
+ ceph_decode_need(p, end, len, e_inval);
+ *p += len;
+
+ /* new_weight */
+ ceph_decode_32_safe(p, end, len, e_inval);
+ while (len--) {
+ s32 osd;
+ u32 w;
+
+ ceph_decode_need(p, end, 2*sizeof(u32), e_inval);
+ osd = ceph_decode_32(p);
+ w = ceph_decode_32(p);
+ BUG_ON(osd >= map->max_osd);
+ pr_info("osd%d weight 0x%x %s\n", osd, w,
+ w == CEPH_OSD_IN ? "(in)" :
+ (w == CEPH_OSD_OUT ? "(out)" : ""));
+ map->osd_weight[osd] = w;
+
+ /*
+ * If we are marking in, set the EXISTS, and clear the
+ * AUTOOUT and NEW bits.
+ */
+ if (w) {
+ map->osd_state[osd] |= CEPH_OSD_EXISTS;
+ map->osd_state[osd] &= ~(CEPH_OSD_AUTOOUT |
+ CEPH_OSD_NEW);
+ }
+ }
+ new_weight_end = *p;
+
+ /* new_state (up/down) */
+ *p = new_state;
+ len = ceph_decode_32(p);
+ while (len--) {
+ s32 osd;
+ u8 xorstate;
+ int ret;
+
+ osd = ceph_decode_32(p);
+ xorstate = ceph_decode_8(p);
+ if (xorstate == 0)
+ xorstate = CEPH_OSD_UP;
+ BUG_ON(osd >= map->max_osd);
+ if ((map->osd_state[osd] & CEPH_OSD_UP) &&
+ (xorstate & CEPH_OSD_UP))
+ pr_info("osd%d down\n", osd);
+ if ((map->osd_state[osd] & CEPH_OSD_EXISTS) &&
+ (xorstate & CEPH_OSD_EXISTS)) {
+ pr_info("osd%d does not exist\n", osd);
+ map->osd_weight[osd] = CEPH_OSD_IN;
+ ret = set_primary_affinity(map, osd,
+ CEPH_OSD_DEFAULT_PRIMARY_AFFINITY);
+ if (ret)
+ return ret;
+ memset(map->osd_addr + osd, 0, sizeof(*map->osd_addr));
+ map->osd_state[osd] = 0;
+ } else {
+ map->osd_state[osd] ^= xorstate;
+ }
+ }
+
+ /* new_up_client */
+ *p = new_up_client;
+ len = ceph_decode_32(p);
+ while (len--) {
+ s32 osd;
+ struct ceph_entity_addr addr;
+
+ osd = ceph_decode_32(p);
+ ceph_decode_copy(p, &addr, sizeof(addr));
+ ceph_decode_addr(&addr);
+ BUG_ON(osd >= map->max_osd);
+ pr_info("osd%d up\n", osd);
+ map->osd_state[osd] |= CEPH_OSD_EXISTS | CEPH_OSD_UP;
+ map->osd_addr[osd] = addr;
+ }
+
+ *p = new_weight_end;
+ return 0;
+
+e_inval:
+ return -EINVAL;
+}
+
+/*
* decode and apply an incremental map update.
*/
struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
@@ -1290,49 +1399,10 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
__remove_pg_pool(&map->pg_pools, pi);
}
- /* new_up */
- ceph_decode_32_safe(p, end, len, e_inval);
- while (len--) {
- u32 osd;
- struct ceph_entity_addr addr;
- ceph_decode_32_safe(p, end, osd, e_inval);
- ceph_decode_copy_safe(p, end, &addr, sizeof(addr), e_inval);
- ceph_decode_addr(&addr);
- pr_info("osd%d up\n", osd);
- BUG_ON(osd >= map->max_osd);
- map->osd_state[osd] |= CEPH_OSD_UP;
- map->osd_addr[osd] = addr;
- }
-
- /* new_state */
- ceph_decode_32_safe(p, end, len, e_inval);
- while (len--) {
- u32 osd;
- u8 xorstate;
- ceph_decode_32_safe(p, end, osd, e_inval);
- xorstate = **(u8 **)p;
- (*p)++; /* clean flag */
- if (xorstate == 0)
- xorstate = CEPH_OSD_UP;
- if (xorstate & CEPH_OSD_UP)
- pr_info("osd%d down\n", osd);
- if (osd < map->max_osd)
- map->osd_state[osd] ^= xorstate;
- }
-
- /* new_weight */
- ceph_decode_32_safe(p, end, len, e_inval);
- while (len--) {
- u32 osd, off;
- ceph_decode_need(p, end, sizeof(u32)*2, e_inval);
- osd = ceph_decode_32(p);
- off = ceph_decode_32(p);
- pr_info("osd%d weight 0x%x %s\n", osd, off,
- off == CEPH_OSD_IN ? "(in)" :
- (off == CEPH_OSD_OUT ? "(out)" : ""));
- if (osd < map->max_osd)
- map->osd_weight[osd] = off;
- }
+ /* new_up_client, new_state, new_weight */
+ err = decode_new_up_state_weight(p, end, map);
+ if (err)
+ goto bad;
/* new_pg_temp */
err = decode_new_pg_temp(p, end, map);
diff --git a/net/core/dev.c b/net/core/dev.c
index a42b232805a5..185a3398c651 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2479,6 +2479,8 @@ static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
*
* It may return NULL if the skb requires no segmentation. This is
* only possible when GSO is used for verifying header integrity.
+ *
+ * Segmentation preserves SKB_SGO_CB_OFFSET bytes of previous skb cb.
*/
struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
netdev_features_t features, bool tx_path)
@@ -2493,6 +2495,9 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
return ERR_PTR(err);
}
+ BUILD_BUG_ON(SKB_SGO_CB_OFFSET +
+ sizeof(*SKB_GSO_CB(skb)) > sizeof(skb->cb));
+
SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb);
SKB_GSO_CB(skb)->encap_level = 0;
diff --git a/net/core/dst.c b/net/core/dst.c
index f8db4032d45a..540066cb33ef 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -282,10 +282,11 @@ void dst_release(struct dst_entry *dst)
{
if (dst) {
int newrefcnt;
+ unsigned short nocache = dst->flags & DST_NOCACHE;
newrefcnt = atomic_dec_return(&dst->__refcnt);
WARN_ON(newrefcnt < 0);
- if (!newrefcnt && unlikely(dst->flags & DST_NOCACHE))
+ if (!newrefcnt && unlikely(nocache))
call_rcu(&dst->rcu_head, dst_destroy_rcu);
}
}
diff --git a/net/core/filter.c b/net/core/filter.c
index 0fa2613b5e35..238bb3f9c51d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -775,6 +775,11 @@ int bpf_check_classic(const struct sock_filter *filter, unsigned int flen)
if (ftest->k == 0)
return -EINVAL;
break;
+ case BPF_ALU | BPF_LSH | BPF_K:
+ case BPF_ALU | BPF_RSH | BPF_K:
+ if (ftest->k >= 32)
+ return -EINVAL;
+ break;
case BPF_LD | BPF_MEM:
case BPF_LDX | BPF_MEM:
case BPF_ST:
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 2c35c02a931e..3556791fdc6e 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -113,7 +113,6 @@ ip:
case htons(ETH_P_IPV6): {
const struct ipv6hdr *iph;
struct ipv6hdr _iph;
- __be32 flow_label;
ipv6:
iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph);
@@ -130,8 +129,9 @@ ipv6:
flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr);
flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr);
- flow_label = ip6_flowlabel(iph);
- if (flow_label) {
+ if (skb && ip6_flowlabel(iph)) {
+ __be32 flow_label = ip6_flowlabel(iph);
+
/* Awesome, IPv6 packet has a flow label so we can
* use that to represent the ports without any
* further dissection.
@@ -233,6 +233,13 @@ ipv6:
return false;
proto = eth->h_proto;
nhoff += sizeof(*eth);
+
+ /* Cap headers that we access via pointers at the
+ * end of the Ethernet header as our maximum alignment
+ * at that point is only 2 bytes.
+ */
+ if (NET_IP_ALIGN)
+ hlen = nhoff;
}
goto again;
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index fe95cb704aaa..e9c4b51525de 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -884,7 +884,9 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ rtnl_link_get_size(dev) /* IFLA_LINKINFO */
+ rtnl_link_get_af_size(dev) /* IFLA_AF_SPEC */
+ nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_PORT_ID */
- + nla_total_size(MAX_PHYS_ITEM_ID_LEN); /* IFLA_PHYS_SWITCH_ID */
+ + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */
+ + nla_total_size(IFNAMSIZ); /* IFLA_PHYS_PORT_NAME */
+
}
static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev)
@@ -1070,14 +1072,16 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
goto nla_put_failure;
if (1) {
- struct rtnl_link_ifmap map = {
- .mem_start = dev->mem_start,
- .mem_end = dev->mem_end,
- .base_addr = dev->base_addr,
- .irq = dev->irq,
- .dma = dev->dma,
- .port = dev->if_port,
- };
+ struct rtnl_link_ifmap map;
+
+ memset(&map, 0, sizeof(map));
+ map.mem_start = dev->mem_start;
+ map.mem_end = dev->mem_end;
+ map.base_addr = dev->base_addr;
+ map.irq = dev->irq;
+ map.dma = dev->dma;
+ map.port = dev->if_port;
+
if (nla_put(skb, IFLA_MAP, sizeof(map), &map))
goto nla_put_failure;
}
diff --git a/net/core/scm.c b/net/core/scm.c
index 8a1741b14302..dce0acb929f1 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -87,6 +87,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
*fplp = fpl;
fpl->count = 0;
fpl->max = SCM_MAX_FD;
+ fpl->user = NULL;
}
fpp = &fpl->fp[fpl->count];
@@ -107,6 +108,10 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
*fpp++ = file;
fpl->count++;
}
+
+ if (!fpl->user)
+ fpl->user = get_uid(current_user());
+
return num;
}
@@ -119,6 +124,7 @@ void __scm_destroy(struct scm_cookie *scm)
scm->fp = NULL;
for (i=fpl->count-1; i>=0; i--)
fput(fpl->fp[i]);
+ free_uid(fpl->user);
kfree(fpl);
}
}
@@ -336,6 +342,7 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
for (i = 0; i < fpl->count; i++)
get_file(fpl->fp[i]);
new_fpl->max = new_fpl->count;
+ new_fpl->user = get_uid(fpl->user);
}
return new_fpl;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 075d2e78c87e..c9793c6c5005 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -79,6 +79,8 @@
struct kmem_cache *skbuff_head_cache __read_mostly;
static struct kmem_cache *skbuff_fclone_cache __read_mostly;
+int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
+EXPORT_SYMBOL(sysctl_max_skb_frags);
/**
* skb_panic - private function for out-of-line support
@@ -3661,7 +3663,8 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb,
serr->ee.ee_info = tstype;
if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) {
serr->ee.ee_data = skb_shinfo(skb)->tskey;
- if (sk->sk_protocol == IPPROTO_TCP)
+ if (sk->sk_protocol == IPPROTO_TCP &&
+ sk->sk_type == SOCK_STREAM)
serr->ee.ee_data -= sk->sk_tskey;
}
@@ -4200,7 +4203,8 @@ static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
return NULL;
}
- memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN);
+ memmove(skb->data - ETH_HLEN, skb->data - skb->mac_len - VLAN_HLEN,
+ 2 * ETH_ALEN);
skb->mac_header += VLAN_HLEN;
return skb;
}
diff --git a/net/core/sock.c b/net/core/sock.c
index dc30dc5bb1b8..47fc8bb3b946 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -421,8 +421,6 @@ static void sock_warn_obsolete_bsdism(const char *name)
}
}
-#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
-
static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
{
if (sk->sk_flags & flags) {
@@ -861,7 +859,8 @@ set_rcvbuf:
if (val & SOF_TIMESTAMPING_OPT_ID &&
!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
- if (sk->sk_protocol == IPPROTO_TCP) {
+ if (sk->sk_protocol == IPPROTO_TCP &&
+ sk->sk_type == SOCK_STREAM) {
if (sk->sk_state != TCP_ESTABLISHED) {
ret = -EINVAL;
break;
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 95b6139d710c..a6beb7b6ae55 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -26,6 +26,7 @@ static int zero = 0;
static int one = 1;
static int min_sndbuf = SOCK_MIN_SNDBUF;
static int min_rcvbuf = SOCK_MIN_RCVBUF;
+static int max_skb_frags = MAX_SKB_FRAGS;
static int net_msg_warn; /* Unused, but still a sysctl */
@@ -392,6 +393,15 @@ static struct ctl_table net_core_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
+ {
+ .procname = "max_skb_frags",
+ .data = &sysctl_max_skb_frags,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &one,
+ .extra2 = &max_skb_frags,
+ },
{ }
};
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 30addee2dd03..838f524cf11a 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -48,8 +48,6 @@ void dccp_time_wait(struct sock *sk, int state, int timeo)
tw->tw_ipv6only = sk->sk_ipv6only;
}
#endif
- /* Linkage updates. */
- __inet_twsk_hashdance(tw, sk, &dccp_hashinfo);
/* Get the TIME_WAIT timeout firing. */
if (timeo < rto)
@@ -60,6 +58,8 @@ void dccp_time_wait(struct sock *sk, int state, int timeo)
timeo = DCCP_TIMEWAIT_LEN;
inet_twsk_schedule(tw, timeo);
+ /* Linkage updates. */
+ __inet_twsk_hashdance(tw, sk, &dccp_hashinfo);
inet_twsk_put(tw);
} else {
/* Sorry, if we're out of memory, just CLOSE this
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 754484b3cd0e..2783c538ec19 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -678,6 +678,9 @@ static int dn_create(struct net *net, struct socket *sock, int protocol,
{
struct sock *sk;
+ if (protocol < 0 || protocol > SK_PROTOCOL_MAX)
+ return -EINVAL;
+
if (!net_eq(net, &init_net))
return -EAFNOSUPPORT;
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 03227ffd19ce..76d3bf70c31a 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1036,10 +1036,13 @@ source_ok:
if (!fld.daddr) {
fld.daddr = fld.saddr;
- err = -EADDRNOTAVAIL;
if (dev_out)
dev_put(dev_out);
+ err = -EINVAL;
dev_out = init_net.loopback_dev;
+ if (!dev_out->dn_ptr)
+ goto out;
+ err = -EADDRNOTAVAIL;
dev_hold(dev_out);
if (!fld.daddr) {
fld.daddr =
@@ -1112,6 +1115,8 @@ source_ok:
if (dev_out == NULL)
goto out;
dn_db = rcu_dereference_raw(dev_out->dn_ptr);
+ if (!dn_db)
+ goto e_inval;
/* Possible improvement - check all devices for local addr */
if (dn_dev_islocal(dev_out, fld.daddr)) {
dev_put(dev_out);
@@ -1153,6 +1158,8 @@ select_source:
dev_put(dev_out);
dev_out = init_net.loopback_dev;
dev_hold(dev_out);
+ if (!dev_out->dn_ptr)
+ goto e_inval;
fld.flowidn_oif = dev_out->ifindex;
if (res.fi)
dn_fib_info_put(res.fi);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index a5aa54ea6533..0cc98b135b8f 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -259,6 +259,9 @@ static int inet_create(struct net *net, struct socket *sock, int protocol,
int try_loading_module = 0;
int err;
+ if (protocol < 0 || protocol >= IPPROTO_MAX)
+ return -EINVAL;
+
sock->state = SS_UNCONNECTED;
/* Look for the requested type/protocol pair. */
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 419d23c53ec7..a57056d87a43 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -334,6 +334,9 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
ASSERT_RTNL();
+ if (in_dev->dead)
+ goto no_promotions;
+
/* 1. Deleting primary ifaddr forces deletion all secondaries
* unless alias promotion is set
**/
@@ -380,6 +383,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
fib_del_ifaddr(ifa, ifa1);
}
+no_promotions:
/* 2. Unlink it */
*ifap = ifa1->ifa_next;
@@ -1839,7 +1843,7 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb,
if (err < 0)
goto errout;
- err = EINVAL;
+ err = -EINVAL;
if (!tb[NETCONFA_IFINDEX])
goto errout;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 872494e6e6eb..513b6aabc5b7 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -861,7 +861,11 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
if (ifa->ifa_flags & IFA_F_SECONDARY) {
prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
if (!prim) {
- pr_warn("%s: bug: prim == NULL\n", __func__);
+ /* if the device has been deleted, we don't perform
+ * address promotion
+ */
+ if (!in_dev->dead)
+ pr_warn("%s: bug: prim == NULL\n", __func__);
return;
}
if (iprim && iprim != prim) {
@@ -876,6 +880,9 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
subnet = 1;
}
+ if (in_dev->dead)
+ goto no_promotions;
+
/* Deletion is more complicated than add.
* We should take care of not to delete too much :-)
*
@@ -951,6 +958,7 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
}
}
+no_promotions:
if (!(ok & BRD_OK))
fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
if (subnet && ifa->ifa_prefixlen < 31) {
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 34968cd5c146..4b67937692c9 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -24,6 +24,7 @@ struct fou {
u16 type;
struct udp_offload udp_offloads;
struct list_head list;
+ struct rcu_head rcu;
};
#define FOU_F_REMCSUM_NOPARTIAL BIT(0)
@@ -421,7 +422,7 @@ static void fou_release(struct fou *fou)
list_del(&fou->list);
udp_tunnel_sock_release(sock);
- kfree(fou);
+ kfree_rcu(fou, rcu);
}
static int fou_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg)
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index a3a697f5ffba..218abf9fb1ed 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -353,9 +353,8 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu)
skb_dst_set(skb, &rt->dst);
skb->dev = dev;
- skb->reserved_tailroom = skb_end_offset(skb) -
- min(mtu, skb_end_offset(skb));
skb_reserve(skb, hlen);
+ skb_tailroom_reserve(skb, mtu, tlen);
skb_reset_network_header(skb);
pip = ip_hdr(skb);
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 00ec8d5d7e7e..bb96c1c4edd6 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -153,13 +153,15 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
/*
* Step 2: Hash TW into tcp ehash chain.
* Notes :
- * - tw_refcnt is set to 3 because :
+ * - tw_refcnt is set to 4 because :
* - We have one reference from bhash chain.
* - We have one reference from ehash chain.
+ * - We have one reference from timer.
+ * - One reference for ourself (our caller will release it).
* We can use atomic_set() because prior spin_lock()/spin_unlock()
* committed into memory all tw fields.
*/
- atomic_set(&tw->tw_refcnt, 1 + 1 + 1);
+ atomic_set(&tw->tw_refcnt, 4);
inet_twsk_add_node_rcu(tw, &ehead->chain);
/* Step 3: Remove SK from hash chain */
@@ -243,7 +245,7 @@ void inet_twsk_deschedule(struct inet_timewait_sock *tw)
}
EXPORT_SYMBOL(inet_twsk_deschedule);
-void inet_twsk_schedule(struct inet_timewait_sock *tw, const int timeo)
+void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm)
{
/* timeout := RTO * 3.5
*
@@ -271,12 +273,14 @@ void inet_twsk_schedule(struct inet_timewait_sock *tw, const int timeo)
*/
tw->tw_kill = timeo <= 4*HZ;
- if (!mod_timer_pinned(&tw->tw_timer, jiffies + timeo)) {
- atomic_inc(&tw->tw_refcnt);
+ if (!rearm) {
+ BUG_ON(mod_timer_pinned(&tw->tw_timer, jiffies + timeo));
atomic_inc(&tw->tw_dr->tw_count);
+ } else {
+ mod_timer_pending(&tw->tw_timer, jiffies + timeo);
}
}
-EXPORT_SYMBOL_GPL(inet_twsk_schedule);
+EXPORT_SYMBOL_GPL(__inet_twsk_schedule);
void inet_twsk_purge(struct inet_hashinfo *hashinfo,
struct inet_timewait_death_row *twdr, int family)
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index c65b93a7b711..51573f8a39bc 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -235,6 +235,7 @@ static int ip_finish_output_gso(struct sock *sk, struct sk_buff *skb)
* from host network stack.
*/
features = netif_skb_features(skb);
+ BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_SGO_CB_OFFSET);
segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
if (IS_ERR_OR_NULL(segs)) {
kfree_skb(skb);
@@ -893,7 +894,7 @@ static int __ip_append_data(struct sock *sk,
if (((length > mtu) || (skb && skb_is_gso(skb))) &&
(sk->sk_protocol == IPPROTO_UDP) &&
(rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len &&
- (sk->sk_type == SOCK_DGRAM)) {
+ (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) {
err = ip_ufo_append_data(sk, queue, getfrag, from, length,
hh_len, fragheaderlen, transhdrlen,
maxfraglen, flags);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 6ddde89996f4..b6c7bdea4853 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -249,6 +249,8 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc,
switch (cmsg->cmsg_type) {
case IP_RETOPTS:
err = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr));
+
+ /* Our caller is responsible for freeing ipc->opt */
err = ip_options_get(net, &ipc->opt, CMSG_DATA(cmsg),
err < 40 ? err : 40);
if (err)
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 626d9e56a6bd..35080a708b59 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -652,6 +652,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
connected = (tunnel->parms.iph.daddr != 0);
+ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+
dst = tnl_params->daddr;
if (dst == 0) {
/* NBMA tunnel */
@@ -749,7 +751,6 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
tunnel->err_count--;
- memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
dst_link_failure(skb);
} else
tunnel->err_count = 0;
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index a61200754f4b..ebf5821caefb 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -354,18 +354,20 @@ unsigned int arpt_do_table(struct sk_buff *skb,
}
/* All zeroes == unconditional rule. */
-static inline bool unconditional(const struct arpt_arp *arp)
+static inline bool unconditional(const struct arpt_entry *e)
{
static const struct arpt_arp uncond;
- return memcmp(arp, &uncond, sizeof(uncond)) == 0;
+ return e->target_offset == sizeof(struct arpt_entry) &&
+ memcmp(&e->arp, &uncond, sizeof(uncond)) == 0;
}
/* Figures out from what hook each rule can be called: returns 0 if
* there are loops. Puts hook bitmask in comefrom.
*/
static int mark_source_chains(const struct xt_table_info *newinfo,
- unsigned int valid_hooks, void *entry0)
+ unsigned int valid_hooks, void *entry0,
+ unsigned int *offsets)
{
unsigned int hook;
@@ -397,11 +399,10 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
|= ((1 << hook) | (1 << NF_ARP_NUMHOOKS));
/* Unconditional return/END. */
- if ((e->target_offset == sizeof(struct arpt_entry) &&
+ if ((unconditional(e) &&
(strcmp(t->target.u.user.name,
XT_STANDARD_TARGET) == 0) &&
- t->verdict < 0 && unconditional(&e->arp)) ||
- visited) {
+ t->verdict < 0) || visited) {
unsigned int oldpos, size;
if ((strcmp(t->target.u.user.name,
@@ -434,6 +435,8 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
size = e->next_offset;
e = (struct arpt_entry *)
(entry0 + pos + size);
+ if (pos + size >= newinfo->size)
+ return 0;
e->counters.pcnt = pos;
pos += size;
} else {
@@ -453,9 +456,16 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
/* This a jump; chase it. */
duprintf("Jump rule %u -> %u\n",
pos, newpos);
+ if (!xt_find_jump_offset(offsets, newpos,
+ newinfo->number))
+ return 0;
+ e = (struct arpt_entry *)
+ (entry0 + newpos);
} else {
/* ... this is a fallthru */
newpos = pos + e->next_offset;
+ if (newpos >= newinfo->size)
+ return 0;
}
e = (struct arpt_entry *)
(entry0 + newpos);
@@ -469,25 +479,6 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
return 1;
}
-static inline int check_entry(const struct arpt_entry *e, const char *name)
-{
- const struct xt_entry_target *t;
-
- if (!arp_checkentry(&e->arp)) {
- duprintf("arp_tables: arp check failed %p %s.\n", e, name);
- return -EINVAL;
- }
-
- if (e->target_offset + sizeof(struct xt_entry_target) > e->next_offset)
- return -EINVAL;
-
- t = arpt_get_target_c(e);
- if (e->target_offset + t->u.target_size > e->next_offset)
- return -EINVAL;
-
- return 0;
-}
-
static inline int check_target(struct arpt_entry *e, const char *name)
{
struct xt_entry_target *t = arpt_get_target(e);
@@ -517,10 +508,6 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size)
struct xt_target *target;
int ret;
- ret = check_entry(e, name);
- if (ret)
- return ret;
-
t = arpt_get_target(e);
target = xt_request_find_target(NFPROTO_ARP, t->u.user.name,
t->u.user.revision);
@@ -546,7 +533,7 @@ static bool check_underflow(const struct arpt_entry *e)
const struct xt_entry_target *t;
unsigned int verdict;
- if (!unconditional(&e->arp))
+ if (!unconditional(e))
return false;
t = arpt_get_target_c(e);
if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
@@ -565,9 +552,11 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e,
unsigned int valid_hooks)
{
unsigned int h;
+ int err;
if ((unsigned long)e % __alignof__(struct arpt_entry) != 0 ||
- (unsigned char *)e + sizeof(struct arpt_entry) >= limit) {
+ (unsigned char *)e + sizeof(struct arpt_entry) >= limit ||
+ (unsigned char *)e + e->next_offset > limit) {
duprintf("Bad offset %p\n", e);
return -EINVAL;
}
@@ -579,6 +568,14 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e,
return -EINVAL;
}
+ if (!arp_checkentry(&e->arp))
+ return -EINVAL;
+
+ err = xt_check_entry_offsets(e, e->elems, e->target_offset,
+ e->next_offset);
+ if (err)
+ return err;
+
/* Check hooks & underflows */
for (h = 0; h < NF_ARP_NUMHOOKS; h++) {
if (!(valid_hooks & (1 << h)))
@@ -587,9 +584,9 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e,
newinfo->hook_entry[h] = hook_entries[h];
if ((unsigned char *)e - base == underflows[h]) {
if (!check_underflow(e)) {
- pr_err("Underflows must be unconditional and "
- "use the STANDARD target with "
- "ACCEPT/DROP\n");
+ pr_debug("Underflows must be unconditional and "
+ "use the STANDARD target with "
+ "ACCEPT/DROP\n");
return -EINVAL;
}
newinfo->underflow[h] = underflows[h];
@@ -623,6 +620,7 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
const struct arpt_replace *repl)
{
struct arpt_entry *iter;
+ unsigned int *offsets;
unsigned int i;
int ret = 0;
@@ -636,6 +634,9 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
}
duprintf("translate_table: size %u\n", newinfo->size);
+ offsets = xt_alloc_entry_offsets(newinfo->number);
+ if (!offsets)
+ return -ENOMEM;
i = 0;
/* Walk through entries, checking offsets. */
@@ -646,20 +647,21 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
repl->underflow,
repl->valid_hooks);
if (ret != 0)
- break;
+ goto out_free;
+ if (i < repl->num_entries)
+ offsets[i] = (void *)iter - entry0;
++i;
if (strcmp(arpt_get_target(iter)->u.user.name,
XT_ERROR_TARGET) == 0)
++newinfo->stacksize;
}
duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret);
- if (ret != 0)
- return ret;
+ ret = -EINVAL;
if (i != repl->num_entries) {
duprintf("translate_table: %u not %u entries\n",
i, repl->num_entries);
- return -EINVAL;
+ goto out_free;
}
/* Check hooks all assigned */
@@ -670,19 +672,20 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
duprintf("Invalid hook entry %u %u\n",
i, repl->hook_entry[i]);
- return -EINVAL;
+ goto out_free;
}
if (newinfo->underflow[i] == 0xFFFFFFFF) {
duprintf("Invalid underflow %u %u\n",
i, repl->underflow[i]);
- return -EINVAL;
+ goto out_free;
}
}
- if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) {
- duprintf("Looping hook\n");
- return -ELOOP;
+ if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) {
+ ret = -ELOOP;
+ goto out_free;
}
+ kvfree(offsets);
/* Finally, each sanity check must pass */
i = 0;
@@ -709,6 +712,9 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
}
return ret;
+ out_free:
+ kvfree(offsets);
+ return ret;
}
static void get_counters(const struct xt_table_info *t,
@@ -1118,56 +1124,18 @@ static int do_add_counters(struct net *net, const void __user *user,
unsigned int i, curcpu;
struct xt_counters_info tmp;
struct xt_counters *paddc;
- unsigned int num_counters;
- const char *name;
- int size;
- void *ptmp;
struct xt_table *t;
const struct xt_table_info *private;
int ret = 0;
void *loc_cpu_entry;
struct arpt_entry *iter;
unsigned int addend;
-#ifdef CONFIG_COMPAT
- struct compat_xt_counters_info compat_tmp;
-
- if (compat) {
- ptmp = &compat_tmp;
- size = sizeof(struct compat_xt_counters_info);
- } else
-#endif
- {
- ptmp = &tmp;
- size = sizeof(struct xt_counters_info);
- }
- if (copy_from_user(ptmp, user, size) != 0)
- return -EFAULT;
-
-#ifdef CONFIG_COMPAT
- if (compat) {
- num_counters = compat_tmp.num_counters;
- name = compat_tmp.name;
- } else
-#endif
- {
- num_counters = tmp.num_counters;
- name = tmp.name;
- }
+ paddc = xt_copy_counters_from_user(user, len, &tmp, compat);
+ if (IS_ERR(paddc))
+ return PTR_ERR(paddc);
- if (len != size + num_counters * sizeof(struct xt_counters))
- return -EINVAL;
-
- paddc = vmalloc(len - size);
- if (!paddc)
- return -ENOMEM;
-
- if (copy_from_user(paddc, user + size, len - size) != 0) {
- ret = -EFAULT;
- goto free;
- }
-
- t = xt_find_table_lock(net, NFPROTO_ARP, name);
+ t = xt_find_table_lock(net, NFPROTO_ARP, tmp.name);
if (IS_ERR_OR_NULL(t)) {
ret = t ? PTR_ERR(t) : -ENOENT;
goto free;
@@ -1175,7 +1143,7 @@ static int do_add_counters(struct net *net, const void __user *user,
local_bh_disable();
private = t->private;
- if (private->number != num_counters) {
+ if (private->number != tmp.num_counters) {
ret = -EINVAL;
goto unlock_up_free;
}
@@ -1201,6 +1169,18 @@ static int do_add_counters(struct net *net, const void __user *user,
}
#ifdef CONFIG_COMPAT
+struct compat_arpt_replace {
+ char name[XT_TABLE_MAXNAMELEN];
+ u32 valid_hooks;
+ u32 num_entries;
+ u32 size;
+ u32 hook_entry[NF_ARP_NUMHOOKS];
+ u32 underflow[NF_ARP_NUMHOOKS];
+ u32 num_counters;
+ compat_uptr_t counters;
+ struct compat_arpt_entry entries[0];
+};
+
static inline void compat_release_entry(struct compat_arpt_entry *e)
{
struct xt_entry_target *t;
@@ -1209,24 +1189,22 @@ static inline void compat_release_entry(struct compat_arpt_entry *e)
module_put(t->u.kernel.target->me);
}
-static inline int
+static int
check_compat_entry_size_and_hooks(struct compat_arpt_entry *e,
struct xt_table_info *newinfo,
unsigned int *size,
const unsigned char *base,
- const unsigned char *limit,
- const unsigned int *hook_entries,
- const unsigned int *underflows,
- const char *name)
+ const unsigned char *limit)
{
struct xt_entry_target *t;
struct xt_target *target;
unsigned int entry_offset;
- int ret, off, h;
+ int ret, off;
duprintf("check_compat_entry_size_and_hooks %p\n", e);
if ((unsigned long)e % __alignof__(struct compat_arpt_entry) != 0 ||
- (unsigned char *)e + sizeof(struct compat_arpt_entry) >= limit) {
+ (unsigned char *)e + sizeof(struct compat_arpt_entry) >= limit ||
+ (unsigned char *)e + e->next_offset > limit) {
duprintf("Bad offset %p, limit = %p\n", e, limit);
return -EINVAL;
}
@@ -1238,8 +1216,11 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e,
return -EINVAL;
}
- /* For purposes of check_entry casting the compat entry is fine */
- ret = check_entry((struct arpt_entry *)e, name);
+ if (!arp_checkentry(&e->arp))
+ return -EINVAL;
+
+ ret = xt_compat_check_entry_offsets(e, e->elems, e->target_offset,
+ e->next_offset);
if (ret)
return ret;
@@ -1263,17 +1244,6 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e,
if (ret)
goto release_target;
- /* Check hooks & underflows */
- for (h = 0; h < NF_ARP_NUMHOOKS; h++) {
- if ((unsigned char *)e - base == hook_entries[h])
- newinfo->hook_entry[h] = hook_entries[h];
- if ((unsigned char *)e - base == underflows[h])
- newinfo->underflow[h] = underflows[h];
- }
-
- /* Clear counters and comefrom */
- memset(&e->counters, 0, sizeof(e->counters));
- e->comefrom = 0;
return 0;
release_target:
@@ -1282,18 +1252,17 @@ out:
return ret;
}
-static int
+static void
compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr,
- unsigned int *size, const char *name,
+ unsigned int *size,
struct xt_table_info *newinfo, unsigned char *base)
{
struct xt_entry_target *t;
struct xt_target *target;
struct arpt_entry *de;
unsigned int origsize;
- int ret, h;
+ int h;
- ret = 0;
origsize = *size;
de = (struct arpt_entry *)*dstptr;
memcpy(de, e, sizeof(struct arpt_entry));
@@ -1314,144 +1283,81 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr,
if ((unsigned char *)de - base < newinfo->underflow[h])
newinfo->underflow[h] -= origsize - *size;
}
- return ret;
}
-static int translate_compat_table(const char *name,
- unsigned int valid_hooks,
- struct xt_table_info **pinfo,
+static int translate_compat_table(struct xt_table_info **pinfo,
void **pentry0,
- unsigned int total_size,
- unsigned int number,
- unsigned int *hook_entries,
- unsigned int *underflows)
+ const struct compat_arpt_replace *compatr)
{
unsigned int i, j;
struct xt_table_info *newinfo, *info;
void *pos, *entry0, *entry1;
struct compat_arpt_entry *iter0;
- struct arpt_entry *iter1;
+ struct arpt_replace repl;
unsigned int size;
int ret = 0;
info = *pinfo;
entry0 = *pentry0;
- size = total_size;
- info->number = number;
-
- /* Init all hooks to impossible value. */
- for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
- info->hook_entry[i] = 0xFFFFFFFF;
- info->underflow[i] = 0xFFFFFFFF;
- }
+ size = compatr->size;
+ info->number = compatr->num_entries;
duprintf("translate_compat_table: size %u\n", info->size);
j = 0;
xt_compat_lock(NFPROTO_ARP);
- xt_compat_init_offsets(NFPROTO_ARP, number);
+ xt_compat_init_offsets(NFPROTO_ARP, compatr->num_entries);
/* Walk through entries, checking offsets. */
- xt_entry_foreach(iter0, entry0, total_size) {
+ xt_entry_foreach(iter0, entry0, compatr->size) {
ret = check_compat_entry_size_and_hooks(iter0, info, &size,
entry0,
- entry0 + total_size,
- hook_entries,
- underflows,
- name);
+ entry0 + compatr->size);
if (ret != 0)
goto out_unlock;
++j;
}
ret = -EINVAL;
- if (j != number) {
+ if (j != compatr->num_entries) {
duprintf("translate_compat_table: %u not %u entries\n",
- j, number);
+ j, compatr->num_entries);
goto out_unlock;
}
- /* Check hooks all assigned */
- for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
- /* Only hooks which are valid */
- if (!(valid_hooks & (1 << i)))
- continue;
- if (info->hook_entry[i] == 0xFFFFFFFF) {
- duprintf("Invalid hook entry %u %u\n",
- i, hook_entries[i]);
- goto out_unlock;
- }
- if (info->underflow[i] == 0xFFFFFFFF) {
- duprintf("Invalid underflow %u %u\n",
- i, underflows[i]);
- goto out_unlock;
- }
- }
-
ret = -ENOMEM;
newinfo = xt_alloc_table_info(size);
if (!newinfo)
goto out_unlock;
- newinfo->number = number;
+ newinfo->number = compatr->num_entries;
for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
newinfo->hook_entry[i] = info->hook_entry[i];
newinfo->underflow[i] = info->underflow[i];
}
entry1 = newinfo->entries[raw_smp_processor_id()];
pos = entry1;
- size = total_size;
- xt_entry_foreach(iter0, entry0, total_size) {
- ret = compat_copy_entry_from_user(iter0, &pos, &size,
- name, newinfo, entry1);
- if (ret != 0)
- break;
- }
+ size = compatr->size;
+ xt_entry_foreach(iter0, entry0, compatr->size)
+ compat_copy_entry_from_user(iter0, &pos, &size,
+ newinfo, entry1);
+
+ /* all module references in entry0 are now gone */
+
xt_compat_flush_offsets(NFPROTO_ARP);
xt_compat_unlock(NFPROTO_ARP);
- if (ret)
- goto free_newinfo;
- ret = -ELOOP;
- if (!mark_source_chains(newinfo, valid_hooks, entry1))
- goto free_newinfo;
+ memcpy(&repl, compatr, sizeof(*compatr));
- i = 0;
- xt_entry_foreach(iter1, entry1, newinfo->size) {
- ret = check_target(iter1, name);
- if (ret != 0)
- break;
- ++i;
- if (strcmp(arpt_get_target(iter1)->u.user.name,
- XT_ERROR_TARGET) == 0)
- ++newinfo->stacksize;
- }
- if (ret) {
- /*
- * The first i matches need cleanup_entry (calls ->destroy)
- * because they had called ->check already. The other j-i
- * entries need only release.
- */
- int skip = i;
- j -= i;
- xt_entry_foreach(iter0, entry0, newinfo->size) {
- if (skip-- > 0)
- continue;
- if (j-- == 0)
- break;
- compat_release_entry(iter0);
- }
- xt_entry_foreach(iter1, entry1, newinfo->size) {
- if (i-- == 0)
- break;
- cleanup_entry(iter1);
- }
- xt_free_table_info(newinfo);
- return ret;
+ for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
+ repl.hook_entry[i] = newinfo->hook_entry[i];
+ repl.underflow[i] = newinfo->underflow[i];
}
- /* And one copy for every other CPU */
- for_each_possible_cpu(i)
- if (newinfo->entries[i] && newinfo->entries[i] != entry1)
- memcpy(newinfo->entries[i], entry1, newinfo->size);
+ repl.num_counters = 0;
+ repl.counters = NULL;
+ repl.size = newinfo->size;
+ ret = translate_table(newinfo, entry1, &repl);
+ if (ret)
+ goto free_newinfo;
*pinfo = newinfo;
*pentry0 = entry1;
@@ -1460,31 +1366,18 @@ static int translate_compat_table(const char *name,
free_newinfo:
xt_free_table_info(newinfo);
-out:
- xt_entry_foreach(iter0, entry0, total_size) {
+ return ret;
+out_unlock:
+ xt_compat_flush_offsets(NFPROTO_ARP);
+ xt_compat_unlock(NFPROTO_ARP);
+ xt_entry_foreach(iter0, entry0, compatr->size) {
if (j-- == 0)
break;
compat_release_entry(iter0);
}
return ret;
-out_unlock:
- xt_compat_flush_offsets(NFPROTO_ARP);
- xt_compat_unlock(NFPROTO_ARP);
- goto out;
}
-struct compat_arpt_replace {
- char name[XT_TABLE_MAXNAMELEN];
- u32 valid_hooks;
- u32 num_entries;
- u32 size;
- u32 hook_entry[NF_ARP_NUMHOOKS];
- u32 underflow[NF_ARP_NUMHOOKS];
- u32 num_counters;
- compat_uptr_t counters;
- struct compat_arpt_entry entries[0];
-};
-
static int compat_do_replace(struct net *net, void __user *user,
unsigned int len)
{
@@ -1518,10 +1411,7 @@ static int compat_do_replace(struct net *net, void __user *user,
goto free_newinfo;
}
- ret = translate_compat_table(tmp.name, tmp.valid_hooks,
- &newinfo, &loc_cpu_entry, tmp.size,
- tmp.num_entries, tmp.hook_entry,
- tmp.underflow);
+ ret = translate_compat_table(&newinfo, &loc_cpu_entry, &tmp);
if (ret != 0)
goto free_newinfo;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 2d0e265fef6e..8e729cba1e59 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -168,11 +168,12 @@ get_entry(const void *base, unsigned int offset)
/* All zeroes == unconditional rule. */
/* Mildly perf critical (only if packet tracing is on) */
-static inline bool unconditional(const struct ipt_ip *ip)
+static inline bool unconditional(const struct ipt_entry *e)
{
static const struct ipt_ip uncond;
- return memcmp(ip, &uncond, sizeof(uncond)) == 0;
+ return e->target_offset == sizeof(struct ipt_entry) &&
+ memcmp(&e->ip, &uncond, sizeof(uncond)) == 0;
#undef FWINV
}
@@ -229,11 +230,10 @@ get_chainname_rulenum(const struct ipt_entry *s, const struct ipt_entry *e,
} else if (s == e) {
(*rulenum)++;
- if (s->target_offset == sizeof(struct ipt_entry) &&
+ if (unconditional(s) &&
strcmp(t->target.u.kernel.target->name,
XT_STANDARD_TARGET) == 0 &&
- t->verdict < 0 &&
- unconditional(&s->ip)) {
+ t->verdict < 0) {
/* Tail of chains: STANDARD target (return/policy) */
*comment = *chainname == hookname
? comments[NF_IP_TRACE_COMMENT_POLICY]
@@ -442,7 +442,8 @@ ipt_do_table(struct sk_buff *skb,
there are loops. Puts hook bitmask in comefrom. */
static int
mark_source_chains(const struct xt_table_info *newinfo,
- unsigned int valid_hooks, void *entry0)
+ unsigned int valid_hooks, void *entry0,
+ unsigned int *offsets)
{
unsigned int hook;
@@ -471,11 +472,10 @@ mark_source_chains(const struct xt_table_info *newinfo,
e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS));
/* Unconditional return/END. */
- if ((e->target_offset == sizeof(struct ipt_entry) &&
+ if ((unconditional(e) &&
(strcmp(t->target.u.user.name,
XT_STANDARD_TARGET) == 0) &&
- t->verdict < 0 && unconditional(&e->ip)) ||
- visited) {
+ t->verdict < 0) || visited) {
unsigned int oldpos, size;
if ((strcmp(t->target.u.user.name,
@@ -516,6 +516,8 @@ mark_source_chains(const struct xt_table_info *newinfo,
size = e->next_offset;
e = (struct ipt_entry *)
(entry0 + pos + size);
+ if (pos + size >= newinfo->size)
+ return 0;
e->counters.pcnt = pos;
pos += size;
} else {
@@ -534,9 +536,16 @@ mark_source_chains(const struct xt_table_info *newinfo,
/* This a jump; chase it. */
duprintf("Jump rule %u -> %u\n",
pos, newpos);
+ if (!xt_find_jump_offset(offsets, newpos,
+ newinfo->number))
+ return 0;
+ e = (struct ipt_entry *)
+ (entry0 + newpos);
} else {
/* ... this is a fallthru */
newpos = pos + e->next_offset;
+ if (newpos >= newinfo->size)
+ return 0;
}
e = (struct ipt_entry *)
(entry0 + newpos);
@@ -564,27 +573,6 @@ static void cleanup_match(struct xt_entry_match *m, struct net *net)
}
static int
-check_entry(const struct ipt_entry *e, const char *name)
-{
- const struct xt_entry_target *t;
-
- if (!ip_checkentry(&e->ip)) {
- duprintf("ip check failed %p %s.\n", e, name);
- return -EINVAL;
- }
-
- if (e->target_offset + sizeof(struct xt_entry_target) >
- e->next_offset)
- return -EINVAL;
-
- t = ipt_get_target_c(e);
- if (e->target_offset + t->u.target_size > e->next_offset)
- return -EINVAL;
-
- return 0;
-}
-
-static int
check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
{
const struct ipt_ip *ip = par->entryinfo;
@@ -661,10 +649,6 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
struct xt_mtchk_param mtpar;
struct xt_entry_match *ematch;
- ret = check_entry(e, name);
- if (ret)
- return ret;
-
j = 0;
mtpar.net = net;
mtpar.table = name;
@@ -708,7 +692,7 @@ static bool check_underflow(const struct ipt_entry *e)
const struct xt_entry_target *t;
unsigned int verdict;
- if (!unconditional(&e->ip))
+ if (!unconditional(e))
return false;
t = ipt_get_target_c(e);
if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
@@ -728,9 +712,11 @@ check_entry_size_and_hooks(struct ipt_entry *e,
unsigned int valid_hooks)
{
unsigned int h;
+ int err;
if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 ||
- (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
+ (unsigned char *)e + sizeof(struct ipt_entry) >= limit ||
+ (unsigned char *)e + e->next_offset > limit) {
duprintf("Bad offset %p\n", e);
return -EINVAL;
}
@@ -742,6 +728,14 @@ check_entry_size_and_hooks(struct ipt_entry *e,
return -EINVAL;
}
+ if (!ip_checkentry(&e->ip))
+ return -EINVAL;
+
+ err = xt_check_entry_offsets(e, e->elems, e->target_offset,
+ e->next_offset);
+ if (err)
+ return err;
+
/* Check hooks & underflows */
for (h = 0; h < NF_INET_NUMHOOKS; h++) {
if (!(valid_hooks & (1 << h)))
@@ -750,9 +744,9 @@ check_entry_size_and_hooks(struct ipt_entry *e,
newinfo->hook_entry[h] = hook_entries[h];
if ((unsigned char *)e - base == underflows[h]) {
if (!check_underflow(e)) {
- pr_err("Underflows must be unconditional and "
- "use the STANDARD target with "
- "ACCEPT/DROP\n");
+ pr_debug("Underflows must be unconditional and "
+ "use the STANDARD target with "
+ "ACCEPT/DROP\n");
return -EINVAL;
}
newinfo->underflow[h] = underflows[h];
@@ -793,6 +787,7 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
const struct ipt_replace *repl)
{
struct ipt_entry *iter;
+ unsigned int *offsets;
unsigned int i;
int ret = 0;
@@ -806,6 +801,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
}
duprintf("translate_table: size %u\n", newinfo->size);
+ offsets = xt_alloc_entry_offsets(newinfo->number);
+ if (!offsets)
+ return -ENOMEM;
i = 0;
/* Walk through entries, checking offsets. */
xt_entry_foreach(iter, entry0, newinfo->size) {
@@ -815,17 +813,20 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
repl->underflow,
repl->valid_hooks);
if (ret != 0)
- return ret;
+ goto out_free;
+ if (i < repl->num_entries)
+ offsets[i] = (void *)iter - entry0;
++i;
if (strcmp(ipt_get_target(iter)->u.user.name,
XT_ERROR_TARGET) == 0)
++newinfo->stacksize;
}
+ ret = -EINVAL;
if (i != repl->num_entries) {
duprintf("translate_table: %u not %u entries\n",
i, repl->num_entries);
- return -EINVAL;
+ goto out_free;
}
/* Check hooks all assigned */
@@ -836,17 +837,20 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
duprintf("Invalid hook entry %u %u\n",
i, repl->hook_entry[i]);
- return -EINVAL;
+ goto out_free;
}
if (newinfo->underflow[i] == 0xFFFFFFFF) {
duprintf("Invalid underflow %u %u\n",
i, repl->underflow[i]);
- return -EINVAL;
+ goto out_free;
}
}
- if (!mark_source_chains(newinfo, repl->valid_hooks, entry0))
- return -ELOOP;
+ if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) {
+ ret = -ELOOP;
+ goto out_free;
+ }
+ kvfree(offsets);
/* Finally, each sanity check must pass */
i = 0;
@@ -873,6 +877,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
}
return ret;
+ out_free:
+ kvfree(offsets);
+ return ret;
}
static void
@@ -1306,56 +1313,18 @@ do_add_counters(struct net *net, const void __user *user,
unsigned int i, curcpu;
struct xt_counters_info tmp;
struct xt_counters *paddc;
- unsigned int num_counters;
- const char *name;
- int size;
- void *ptmp;
struct xt_table *t;
const struct xt_table_info *private;
int ret = 0;
void *loc_cpu_entry;
struct ipt_entry *iter;
unsigned int addend;
-#ifdef CONFIG_COMPAT
- struct compat_xt_counters_info compat_tmp;
-
- if (compat) {
- ptmp = &compat_tmp;
- size = sizeof(struct compat_xt_counters_info);
- } else
-#endif
- {
- ptmp = &tmp;
- size = sizeof(struct xt_counters_info);
- }
- if (copy_from_user(ptmp, user, size) != 0)
- return -EFAULT;
+ paddc = xt_copy_counters_from_user(user, len, &tmp, compat);
+ if (IS_ERR(paddc))
+ return PTR_ERR(paddc);
-#ifdef CONFIG_COMPAT
- if (compat) {
- num_counters = compat_tmp.num_counters;
- name = compat_tmp.name;
- } else
-#endif
- {
- num_counters = tmp.num_counters;
- name = tmp.name;
- }
-
- if (len != size + num_counters * sizeof(struct xt_counters))
- return -EINVAL;
-
- paddc = vmalloc(len - size);
- if (!paddc)
- return -ENOMEM;
-
- if (copy_from_user(paddc, user + size, len - size) != 0) {
- ret = -EFAULT;
- goto free;
- }
-
- t = xt_find_table_lock(net, AF_INET, name);
+ t = xt_find_table_lock(net, AF_INET, tmp.name);
if (IS_ERR_OR_NULL(t)) {
ret = t ? PTR_ERR(t) : -ENOENT;
goto free;
@@ -1363,7 +1332,7 @@ do_add_counters(struct net *net, const void __user *user,
local_bh_disable();
private = t->private;
- if (private->number != num_counters) {
+ if (private->number != tmp.num_counters) {
ret = -EINVAL;
goto unlock_up_free;
}
@@ -1442,7 +1411,6 @@ compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
static int
compat_find_calc_match(struct xt_entry_match *m,
- const char *name,
const struct ipt_ip *ip,
unsigned int hookmask,
int *size)
@@ -1478,21 +1446,19 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
struct xt_table_info *newinfo,
unsigned int *size,
const unsigned char *base,
- const unsigned char *limit,
- const unsigned int *hook_entries,
- const unsigned int *underflows,
- const char *name)
+ const unsigned char *limit)
{
struct xt_entry_match *ematch;
struct xt_entry_target *t;
struct xt_target *target;
unsigned int entry_offset;
unsigned int j;
- int ret, off, h;
+ int ret, off;
duprintf("check_compat_entry_size_and_hooks %p\n", e);
if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 ||
- (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit) {
+ (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit ||
+ (unsigned char *)e + e->next_offset > limit) {
duprintf("Bad offset %p, limit = %p\n", e, limit);
return -EINVAL;
}
@@ -1504,8 +1470,11 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
return -EINVAL;
}
- /* For purposes of check_entry casting the compat entry is fine */
- ret = check_entry((struct ipt_entry *)e, name);
+ if (!ip_checkentry(&e->ip))
+ return -EINVAL;
+
+ ret = xt_compat_check_entry_offsets(e, e->elems,
+ e->target_offset, e->next_offset);
if (ret)
return ret;
@@ -1513,8 +1482,8 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
entry_offset = (void *)e - (void *)base;
j = 0;
xt_ematch_foreach(ematch, e) {
- ret = compat_find_calc_match(ematch, name,
- &e->ip, e->comefrom, &off);
+ ret = compat_find_calc_match(ematch, &e->ip, e->comefrom,
+ &off);
if (ret != 0)
goto release_matches;
++j;
@@ -1537,17 +1506,6 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
if (ret)
goto out;
- /* Check hooks & underflows */
- for (h = 0; h < NF_INET_NUMHOOKS; h++) {
- if ((unsigned char *)e - base == hook_entries[h])
- newinfo->hook_entry[h] = hook_entries[h];
- if ((unsigned char *)e - base == underflows[h])
- newinfo->underflow[h] = underflows[h];
- }
-
- /* Clear counters and comefrom */
- memset(&e->counters, 0, sizeof(e->counters));
- e->comefrom = 0;
return 0;
out:
@@ -1561,19 +1519,18 @@ release_matches:
return ret;
}
-static int
+static void
compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
- unsigned int *size, const char *name,
+ unsigned int *size,
struct xt_table_info *newinfo, unsigned char *base)
{
struct xt_entry_target *t;
struct xt_target *target;
struct ipt_entry *de;
unsigned int origsize;
- int ret, h;
+ int h;
struct xt_entry_match *ematch;
- ret = 0;
origsize = *size;
de = (struct ipt_entry *)*dstptr;
memcpy(de, e, sizeof(struct ipt_entry));
@@ -1582,198 +1539,104 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
*dstptr += sizeof(struct ipt_entry);
*size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
- xt_ematch_foreach(ematch, e) {
- ret = xt_compat_match_from_user(ematch, dstptr, size);
- if (ret != 0)
- return ret;
- }
+ xt_ematch_foreach(ematch, e)
+ xt_compat_match_from_user(ematch, dstptr, size);
+
de->target_offset = e->target_offset - (origsize - *size);
t = compat_ipt_get_target(e);
target = t->u.kernel.target;
xt_compat_target_from_user(t, dstptr, size);
de->next_offset = e->next_offset - (origsize - *size);
+
for (h = 0; h < NF_INET_NUMHOOKS; h++) {
if ((unsigned char *)de - base < newinfo->hook_entry[h])
newinfo->hook_entry[h] -= origsize - *size;
if ((unsigned char *)de - base < newinfo->underflow[h])
newinfo->underflow[h] -= origsize - *size;
}
- return ret;
-}
-
-static int
-compat_check_entry(struct ipt_entry *e, struct net *net, const char *name)
-{
- struct xt_entry_match *ematch;
- struct xt_mtchk_param mtpar;
- unsigned int j;
- int ret = 0;
-
- j = 0;
- mtpar.net = net;
- mtpar.table = name;
- mtpar.entryinfo = &e->ip;
- mtpar.hook_mask = e->comefrom;
- mtpar.family = NFPROTO_IPV4;
- xt_ematch_foreach(ematch, e) {
- ret = check_match(ematch, &mtpar);
- if (ret != 0)
- goto cleanup_matches;
- ++j;
- }
-
- ret = check_target(e, net, name);
- if (ret)
- goto cleanup_matches;
- return 0;
-
- cleanup_matches:
- xt_ematch_foreach(ematch, e) {
- if (j-- == 0)
- break;
- cleanup_match(ematch, net);
- }
- return ret;
}
static int
translate_compat_table(struct net *net,
- const char *name,
- unsigned int valid_hooks,
struct xt_table_info **pinfo,
void **pentry0,
- unsigned int total_size,
- unsigned int number,
- unsigned int *hook_entries,
- unsigned int *underflows)
+ const struct compat_ipt_replace *compatr)
{
unsigned int i, j;
struct xt_table_info *newinfo, *info;
void *pos, *entry0, *entry1;
struct compat_ipt_entry *iter0;
- struct ipt_entry *iter1;
+ struct ipt_replace repl;
unsigned int size;
int ret;
info = *pinfo;
entry0 = *pentry0;
- size = total_size;
- info->number = number;
-
- /* Init all hooks to impossible value. */
- for (i = 0; i < NF_INET_NUMHOOKS; i++) {
- info->hook_entry[i] = 0xFFFFFFFF;
- info->underflow[i] = 0xFFFFFFFF;
- }
+ size = compatr->size;
+ info->number = compatr->num_entries;
duprintf("translate_compat_table: size %u\n", info->size);
j = 0;
xt_compat_lock(AF_INET);
- xt_compat_init_offsets(AF_INET, number);
+ xt_compat_init_offsets(AF_INET, compatr->num_entries);
/* Walk through entries, checking offsets. */
- xt_entry_foreach(iter0, entry0, total_size) {
+ xt_entry_foreach(iter0, entry0, compatr->size) {
ret = check_compat_entry_size_and_hooks(iter0, info, &size,
entry0,
- entry0 + total_size,
- hook_entries,
- underflows,
- name);
+ entry0 + compatr->size);
if (ret != 0)
goto out_unlock;
++j;
}
ret = -EINVAL;
- if (j != number) {
+ if (j != compatr->num_entries) {
duprintf("translate_compat_table: %u not %u entries\n",
- j, number);
+ j, compatr->num_entries);
goto out_unlock;
}
- /* Check hooks all assigned */
- for (i = 0; i < NF_INET_NUMHOOKS; i++) {
- /* Only hooks which are valid */
- if (!(valid_hooks & (1 << i)))
- continue;
- if (info->hook_entry[i] == 0xFFFFFFFF) {
- duprintf("Invalid hook entry %u %u\n",
- i, hook_entries[i]);
- goto out_unlock;
- }
- if (info->underflow[i] == 0xFFFFFFFF) {
- duprintf("Invalid underflow %u %u\n",
- i, underflows[i]);
- goto out_unlock;
- }
- }
-
ret = -ENOMEM;
newinfo = xt_alloc_table_info(size);
if (!newinfo)
goto out_unlock;
- newinfo->number = number;
+ newinfo->number = compatr->num_entries;
for (i = 0; i < NF_INET_NUMHOOKS; i++) {
- newinfo->hook_entry[i] = info->hook_entry[i];
- newinfo->underflow[i] = info->underflow[i];
+ newinfo->hook_entry[i] = compatr->hook_entry[i];
+ newinfo->underflow[i] = compatr->underflow[i];
}
entry1 = newinfo->entries[raw_smp_processor_id()];
pos = entry1;
- size = total_size;
- xt_entry_foreach(iter0, entry0, total_size) {
- ret = compat_copy_entry_from_user(iter0, &pos, &size,
- name, newinfo, entry1);
- if (ret != 0)
- break;
- }
+ size = compatr->size;
+ xt_entry_foreach(iter0, entry0, compatr->size)
+ compat_copy_entry_from_user(iter0, &pos, &size,
+ newinfo, entry1);
+
+ /* all module references in entry0 are now gone.
+ * entry1/newinfo contains a 64bit ruleset that looks exactly as
+ * generated by 64bit userspace.
+ *
+ * Call standard translate_table() to validate all hook_entrys,
+ * underflows, check for loops, etc.
+ */
xt_compat_flush_offsets(AF_INET);
xt_compat_unlock(AF_INET);
- if (ret)
- goto free_newinfo;
- ret = -ELOOP;
- if (!mark_source_chains(newinfo, valid_hooks, entry1))
- goto free_newinfo;
+ memcpy(&repl, compatr, sizeof(*compatr));
- i = 0;
- xt_entry_foreach(iter1, entry1, newinfo->size) {
- ret = compat_check_entry(iter1, net, name);
- if (ret != 0)
- break;
- ++i;
- if (strcmp(ipt_get_target(iter1)->u.user.name,
- XT_ERROR_TARGET) == 0)
- ++newinfo->stacksize;
- }
- if (ret) {
- /*
- * The first i matches need cleanup_entry (calls ->destroy)
- * because they had called ->check already. The other j-i
- * entries need only release.
- */
- int skip = i;
- j -= i;
- xt_entry_foreach(iter0, entry0, newinfo->size) {
- if (skip-- > 0)
- continue;
- if (j-- == 0)
- break;
- compat_release_entry(iter0);
- }
- xt_entry_foreach(iter1, entry1, newinfo->size) {
- if (i-- == 0)
- break;
- cleanup_entry(iter1, net);
- }
- xt_free_table_info(newinfo);
- return ret;
+ for (i = 0; i < NF_INET_NUMHOOKS; i++) {
+ repl.hook_entry[i] = newinfo->hook_entry[i];
+ repl.underflow[i] = newinfo->underflow[i];
}
- /* And one copy for every other CPU */
- for_each_possible_cpu(i)
- if (newinfo->entries[i] && newinfo->entries[i] != entry1)
- memcpy(newinfo->entries[i], entry1, newinfo->size);
+ repl.num_counters = 0;
+ repl.counters = NULL;
+ repl.size = newinfo->size;
+ ret = translate_table(net, newinfo, entry1, &repl);
+ if (ret)
+ goto free_newinfo;
*pinfo = newinfo;
*pentry0 = entry1;
@@ -1782,17 +1645,16 @@ translate_compat_table(struct net *net,
free_newinfo:
xt_free_table_info(newinfo);
-out:
- xt_entry_foreach(iter0, entry0, total_size) {
+ return ret;
+out_unlock:
+ xt_compat_flush_offsets(AF_INET);
+ xt_compat_unlock(AF_INET);
+ xt_entry_foreach(iter0, entry0, compatr->size) {
if (j-- == 0)
break;
compat_release_entry(iter0);
}
return ret;
-out_unlock:
- xt_compat_flush_offsets(AF_INET);
- xt_compat_unlock(AF_INET);
- goto out;
}
static int
@@ -1829,10 +1691,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
goto free_newinfo;
}
- ret = translate_compat_table(net, tmp.name, tmp.valid_hooks,
- &newinfo, &loc_cpu_entry, tmp.size,
- tmp.num_entries, tmp.hook_entry,
- tmp.underflow);
+ ret = translate_compat_table(net, &newinfo, &loc_cpu_entry, &tmp);
if (ret != 0)
goto free_newinfo;
diff --git a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
index c6eb42100e9a..ea91058b5f6f 100644
--- a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
@@ -108,10 +108,18 @@ static int masq_inet_event(struct notifier_block *this,
unsigned long event,
void *ptr)
{
- struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev;
+ struct in_device *idev = ((struct in_ifaddr *)ptr)->ifa_dev;
struct netdev_notifier_info info;
- netdev_notifier_info_init(&info, dev);
+ /* The masq_dev_notifier will catch the case of the device going
+ * down. So if the inetdev is dead and being destroyed we have
+ * no work to do. Otherwise this is an individual address removal
+ * and we have to perform the flush.
+ */
+ if (idev->dead)
+ return NOTIFY_DONE;
+
+ netdev_notifier_info_init(&info, idev->dev);
return masq_device_event(this, event, &info);
}
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 05ff44b758df..f6ee0d561aab 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -745,8 +745,10 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (msg->msg_controllen) {
err = ip_cmsg_send(sock_net(sk), msg, &ipc, false);
- if (err)
+ if (unlikely(err)) {
+ kfree(ipc.opt);
return err;
+ }
if (ipc.opt)
free = 1;
}
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 561cd4b8fc6e..c77aac75759d 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -543,8 +543,10 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (msg->msg_controllen) {
err = ip_cmsg_send(sock_net(sk), msg, &ipc, false);
- if (err)
+ if (unlikely(err)) {
+ kfree(ipc.opt);
goto out;
+ }
if (ipc.opt)
free = 1;
}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index f45f2a12f37b..eb1d9839a257 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -125,6 +125,7 @@ static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
static int ip_rt_min_advmss __read_mostly = 256;
+static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
/*
* Interface to generic destination cache.
*/
@@ -753,7 +754,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
struct fib_nh *nh = &FIB_RES_NH(res);
update_or_create_fnhe(nh, fl4->daddr, new_gw,
- 0, 0);
+ 0, jiffies + ip_rt_gc_timeout);
}
if (kill_route)
rt->dst.obsolete = DST_OBSOLETE_KILL;
@@ -1538,6 +1539,36 @@ static void ip_handle_martian_source(struct net_device *dev,
#endif
}
+static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
+{
+ struct fnhe_hash_bucket *hash;
+ struct fib_nh_exception *fnhe, __rcu **fnhe_p;
+ u32 hval = fnhe_hashfun(daddr);
+
+ spin_lock_bh(&fnhe_lock);
+
+ hash = rcu_dereference_protected(nh->nh_exceptions,
+ lockdep_is_held(&fnhe_lock));
+ hash += hval;
+
+ fnhe_p = &hash->chain;
+ fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock));
+ while (fnhe) {
+ if (fnhe->fnhe_daddr == daddr) {
+ rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
+ fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
+ fnhe_flush_routes(fnhe);
+ kfree_rcu(fnhe, rcu);
+ break;
+ }
+ fnhe_p = &fnhe->fnhe_next;
+ fnhe = rcu_dereference_protected(fnhe->fnhe_next,
+ lockdep_is_held(&fnhe_lock));
+ }
+
+ spin_unlock_bh(&fnhe_lock);
+}
+
/* called in rcu_read_lock() section */
static int __mkroute_input(struct sk_buff *skb,
const struct fib_result *res,
@@ -1592,11 +1623,20 @@ static int __mkroute_input(struct sk_buff *skb,
fnhe = find_exception(&FIB_RES_NH(*res), daddr);
if (do_cache) {
- if (fnhe)
+ if (fnhe) {
rth = rcu_dereference(fnhe->fnhe_rth_input);
- else
- rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
+ if (rth && rth->dst.expires &&
+ time_after(jiffies, rth->dst.expires)) {
+ ip_del_fnhe(&FIB_RES_NH(*res), daddr);
+ fnhe = NULL;
+ } else {
+ goto rt_cache;
+ }
+ }
+
+ rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
+rt_cache:
if (rt_cache_valid(rth)) {
skb_dst_set_noref(skb, &rth->dst);
goto out;
@@ -1936,6 +1976,18 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
*/
if (fi && res->prefixlen < 4)
fi = NULL;
+ } else if ((type == RTN_LOCAL) && (orig_oif != 0) &&
+ (orig_oif != dev_out->ifindex)) {
+ /* For local routes that require a particular output interface
+ * we do not want to cache the result. Caching the result
+ * causes incorrect behaviour when there are multiple source
+ * addresses on the interface, the end result being that if the
+ * intended recipient is waiting on that interface for the
+ * packet he won't receive it because it will be delivered on
+ * the loopback interface and the IP_PKTINFO ipi_ifindex will
+ * be set to the loopback interface as well.
+ */
+ fi = NULL;
}
fnhe = NULL;
@@ -1945,19 +1997,29 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
struct fib_nh *nh = &FIB_RES_NH(*res);
fnhe = find_exception(nh, fl4->daddr);
- if (fnhe)
+ if (fnhe) {
prth = &fnhe->fnhe_rth_output;
- else {
- if (unlikely(fl4->flowi4_flags &
- FLOWI_FLAG_KNOWN_NH &&
- !(nh->nh_gw &&
- nh->nh_scope == RT_SCOPE_LINK))) {
- do_cache = false;
- goto add;
+ rth = rcu_dereference(*prth);
+ if (rth && rth->dst.expires &&
+ time_after(jiffies, rth->dst.expires)) {
+ ip_del_fnhe(nh, fl4->daddr);
+ fnhe = NULL;
+ } else {
+ goto rt_cache;
}
- prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
}
+
+ if (unlikely(fl4->flowi4_flags &
+ FLOWI_FLAG_KNOWN_NH &&
+ !(nh->nh_gw &&
+ nh->nh_scope == RT_SCOPE_LINK))) {
+ do_cache = false;
+ goto add;
+ }
+ prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
rth = rcu_dereference(*prth);
+
+rt_cache:
if (rt_cache_valid(rth)) {
dst_hold(&rth->dst);
return rth;
@@ -2504,7 +2566,6 @@ void ip_rt_multicast_event(struct in_device *in_dev)
}
#ifdef CONFIG_SYSCTL
-static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
static int ip_rt_gc_interval __read_mostly = 60 * HZ;
static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
static int ip_rt_gc_elasticity __read_mostly = 8;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index bb2ce74f6004..19d385a0f02d 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -279,6 +279,7 @@
#include <asm/uaccess.h>
#include <asm/ioctls.h>
+#include <asm/unaligned.h>
#include <net/busy_poll.h>
int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
@@ -921,7 +922,7 @@ new_segment:
i = skb_shinfo(skb)->nr_frags;
can_coalesce = skb_can_coalesce(skb, i, page, offset);
- if (!can_coalesce && i >= MAX_SKB_FRAGS) {
+ if (!can_coalesce && i >= sysctl_max_skb_frags) {
tcp_mark_push(tp, skb);
goto new_segment;
}
@@ -1187,7 +1188,7 @@ new_segment:
if (!skb_can_coalesce(skb, i, pfrag->page,
pfrag->offset)) {
- if (i == MAX_SKB_FRAGS || !sg) {
+ if (i == sysctl_max_skb_frags || !sg) {
tcp_mark_push(tp, skb);
goto new_segment;
}
@@ -2603,6 +2604,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
const struct inet_connection_sock *icsk = inet_csk(sk);
u32 now = tcp_time_stamp;
unsigned int start;
+ u64 rate64;
u32 rate;
memset(info, 0, sizeof(*info));
@@ -2665,15 +2667,17 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_total_retrans = tp->total_retrans;
rate = READ_ONCE(sk->sk_pacing_rate);
- info->tcpi_pacing_rate = rate != ~0U ? rate : ~0ULL;
+ rate64 = rate != ~0U ? rate : ~0ULL;
+ put_unaligned(rate64, &info->tcpi_pacing_rate);
rate = READ_ONCE(sk->sk_max_pacing_rate);
- info->tcpi_max_pacing_rate = rate != ~0U ? rate : ~0ULL;
+ rate64 = rate != ~0U ? rate : ~0ULL;
+ put_unaligned(rate64, &info->tcpi_max_pacing_rate);
do {
start = u64_stats_fetch_begin_irq(&tp->syncp);
- info->tcpi_bytes_acked = tp->bytes_acked;
- info->tcpi_bytes_received = tp->bytes_received;
+ put_unaligned(tp->bytes_acked, &info->tcpi_bytes_acked);
+ put_unaligned(tp->bytes_received, &info->tcpi_bytes_received);
} while (u64_stats_fetch_retry_irq(&tp->syncp, start));
}
EXPORT_SYMBOL_GPL(tcp_get_info);
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 06d3d665a9fd..3ce0b861cc0e 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -151,6 +151,27 @@ static void bictcp_init(struct sock *sk)
tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
}
+static void bictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event)
+{
+ if (event == CA_EVENT_TX_START) {
+ struct bictcp *ca = inet_csk_ca(sk);
+ u32 now = tcp_time_stamp;
+ s32 delta;
+
+ delta = now - tcp_sk(sk)->lsndtime;
+
+ /* We were application limited (idle) for a while.
+ * Shift epoch_start to keep cwnd growth to cubic curve.
+ */
+ if (ca->epoch_start && delta > 0) {
+ ca->epoch_start += delta;
+ if (after(ca->epoch_start, now))
+ ca->epoch_start = now;
+ }
+ return;
+ }
+}
+
/* calculate the cubic root of x using a table lookup followed by one
* Newton-Raphson iteration.
* Avg err ~= 0.195%
@@ -450,6 +471,7 @@ static struct tcp_congestion_ops cubictcp __read_mostly = {
.cong_avoid = bictcp_cong_avoid,
.set_state = bictcp_state,
.undo_cwnd = bictcp_undo_cwnd,
+ .cwnd_event = bictcp_cwnd_event,
.pkts_acked = bictcp_acked,
.owner = THIS_MODULE,
.name = "cubic",
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 87463c814896..36b93adfd7da 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -89,7 +89,7 @@ int sysctl_tcp_adv_win_scale __read_mostly = 1;
EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
/* rfc5961 challenge ack rate limiting */
-int sysctl_tcp_challenge_ack_limit = 100;
+int sysctl_tcp_challenge_ack_limit = 1000;
int sysctl_tcp_stdurg __read_mostly;
int sysctl_tcp_rfc1337 __read_mostly;
@@ -3343,6 +3343,23 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
return flag;
}
+static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
+ u32 *last_oow_ack_time)
+{
+ if (*last_oow_ack_time) {
+ s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);
+
+ if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
+ NET_INC_STATS(net, mib_idx);
+ return true; /* rate-limited: don't send yet! */
+ }
+ }
+
+ *last_oow_ack_time = tcp_time_stamp;
+
+ return false; /* not rate-limited: go ahead, send dupack now! */
+}
+
/* Return true if we're currently rate-limiting out-of-window ACKs and
* thus shouldn't send a dupack right now. We rate-limit dupacks in
* response to out-of-window SYNs or ACKs to mitigate ACK loops or DoS
@@ -3356,21 +3373,9 @@ bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb,
/* Data packets without SYNs are not likely part of an ACK loop. */
if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) &&
!tcp_hdr(skb)->syn)
- goto not_rate_limited;
-
- if (*last_oow_ack_time) {
- s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);
-
- if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
- NET_INC_STATS_BH(net, mib_idx);
- return true; /* rate-limited: don't send yet! */
- }
- }
-
- *last_oow_ack_time = tcp_time_stamp;
+ return false;
-not_rate_limited:
- return false; /* not rate-limited: go ahead, send dupack now! */
+ return __tcp_oow_rate_limited(net, mib_idx, last_oow_ack_time);
}
/* RFC 5961 7 [ACK Throttling] */
@@ -3380,21 +3385,26 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
static u32 challenge_timestamp;
static unsigned int challenge_count;
struct tcp_sock *tp = tcp_sk(sk);
- u32 now;
+ u32 count, now;
/* First check our per-socket dupack rate limit. */
- if (tcp_oow_rate_limited(sock_net(sk), skb,
- LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
- &tp->last_oow_ack_time))
+ if (__tcp_oow_rate_limited(sock_net(sk),
+ LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
+ &tp->last_oow_ack_time))
return;
- /* Then check the check host-wide RFC 5961 rate limit. */
+ /* Then check host-wide RFC 5961 rate limit. */
now = jiffies / HZ;
if (now != challenge_timestamp) {
+ u32 half = (sysctl_tcp_challenge_ack_limit + 1) >> 1;
+
challenge_timestamp = now;
- challenge_count = 0;
+ WRITE_ONCE(challenge_count, half +
+ prandom_u32_max(sysctl_tcp_challenge_ack_limit));
}
- if (++challenge_count <= sysctl_tcp_challenge_ack_limit) {
+ count = READ_ONCE(challenge_count);
+ if (count > 0) {
+ WRITE_ONCE(challenge_count, count - 1);
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK);
tcp_send_ack(sk);
}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 88203e755af8..13b92d595138 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -705,7 +705,8 @@ release_sk1:
outside socket context is ugly, certainly. What can I do?
*/
-static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
+static void tcp_v4_send_ack(struct net *net,
+ struct sk_buff *skb, u32 seq, u32 ack,
u32 win, u32 tsval, u32 tsecr, int oif,
struct tcp_md5sig_key *key,
int reply_flags, u8 tos)
@@ -720,7 +721,6 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
];
} rep;
struct ip_reply_arg arg;
- struct net *net = dev_net(skb_dst(skb)->dev);
memset(&rep.th, 0, sizeof(struct tcphdr));
memset(&arg, 0, sizeof(arg));
@@ -782,7 +782,8 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
struct inet_timewait_sock *tw = inet_twsk(sk);
struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
- tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
+ tcp_v4_send_ack(sock_net(sk), skb,
+ tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
tcp_time_stamp + tcptw->tw_ts_offset,
tcptw->tw_ts_recent,
@@ -801,8 +802,10 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
* sk->sk_state == TCP_SYN_RECV -> for Fast Open.
*/
- tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
- tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
+ u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
+ tcp_sk(sk)->snd_nxt;
+
+ tcp_v4_send_ack(sock_net(sk), skb, seq,
tcp_rsk(req)->rcv_nxt, req->rcv_wnd,
tcp_time_stamp,
req->ts_recent,
@@ -1509,7 +1512,7 @@ bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
if (likely(sk->sk_rx_dst))
skb_dst_drop(skb);
else
- skb_dst_force(skb);
+ skb_dst_force_safe(skb);
__skb_queue_tail(&tp->ucopy.prequeue, skb);
tp->ucopy.memory += skb->truesize;
@@ -1714,8 +1717,7 @@ void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
- if (dst) {
- dst_hold(dst);
+ if (dst && dst_hold_safe(dst)) {
sk->sk_rx_dst = dst;
inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
}
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index a51d63a43e33..9c840c5c6047 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -566,7 +566,7 @@ reset:
*/
if (crtt > tp->srtt_us) {
/* Set RTO like tcp_rtt_estimator(), but from cached RTT. */
- crtt /= 8 * USEC_PER_MSEC;
+ crtt /= 8 * USEC_PER_SEC / HZ;
inet_csk(sk)->icsk_rto = crtt + max(2 * crtt, tcp_rto_min(sk));
} else if (tp->srtt_us == 0) {
/* RFC6298: 5.7 We've failed to get a valid RTT sample from
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 17e7339ee5ca..fec2907b85e8 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -163,9 +163,9 @@ kill_with_rst:
if (tcp_death_row.sysctl_tw_recycle &&
tcptw->tw_ts_recent_stamp &&
tcp_tw_remember_stamp(tw))
- inet_twsk_schedule(tw, tw->tw_timeout);
+ inet_twsk_reschedule(tw, tw->tw_timeout);
else
- inet_twsk_schedule(tw, TCP_TIMEWAIT_LEN);
+ inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN);
return TCP_TW_ACK;
}
@@ -203,7 +203,7 @@ kill:
return TCP_TW_SUCCESS;
}
}
- inet_twsk_schedule(tw, TCP_TIMEWAIT_LEN);
+ inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN);
if (tmp_opt.saw_tstamp) {
tcptw->tw_ts_recent = tmp_opt.rcv_tsval;
@@ -253,7 +253,7 @@ kill:
* Do not reschedule in the last case.
*/
if (paws_reject || th->ack)
- inet_twsk_schedule(tw, TCP_TIMEWAIT_LEN);
+ inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN);
return tcp_timewait_check_oow_rate_limit(
tw, skb, LINUX_MIB_TCPACKSKIPPEDTIMEWAIT);
@@ -324,9 +324,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
} while (0);
#endif
- /* Linkage updates. */
- __inet_twsk_hashdance(tw, sk, &tcp_hashinfo);
-
/* Get the TIME_WAIT timeout firing. */
if (timeo < rto)
timeo = rto;
@@ -340,6 +337,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
}
inet_twsk_schedule(tw, timeo);
+ /* Linkage updates. */
+ __inet_twsk_hashdance(tw, sk, &tcp_hashinfo);
inet_twsk_put(tw);
} else {
/* Sorry, if we're out of memory, just CLOSE this
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 986440b24978..ae66c8426ad0 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2627,8 +2627,10 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
*/
if (unlikely((NET_IP_ALIGN && ((unsigned long)skb->data & 3)) ||
skb_headroom(skb) >= 0xFFFF)) {
- struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER,
- GFP_ATOMIC);
+ struct sk_buff *nskb;
+
+ skb_mstamp_get(&skb->skb_mstamp);
+ nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC);
err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
-ENOBUFS;
} else {
@@ -3143,7 +3145,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
{
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_fastopen_request *fo = tp->fastopen_req;
- int syn_loss = 0, space, err = 0, copied;
+ int syn_loss = 0, space, err = 0;
unsigned long last_syn_loss = 0;
struct sk_buff *syn_data;
@@ -3181,17 +3183,18 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
goto fallback;
syn_data->ip_summed = CHECKSUM_PARTIAL;
memcpy(syn_data->cb, syn->cb, sizeof(syn->cb));
- copied = copy_from_iter(skb_put(syn_data, space), space,
- &fo->data->msg_iter);
- if (unlikely(!copied)) {
- kfree_skb(syn_data);
- goto fallback;
- }
- if (copied != space) {
- skb_trim(syn_data, copied);
- space = copied;
+ if (space) {
+ int copied = copy_from_iter(skb_put(syn_data, space), space,
+ &fo->data->msg_iter);
+ if (unlikely(!copied)) {
+ kfree_skb(syn_data);
+ goto fallback;
+ }
+ if (copied != space) {
+ skb_trim(syn_data, copied);
+ space = copied;
+ }
}
-
/* No more data pending in inet_wait_for_connect() */
if (space == fo->size)
fo->data = NULL;
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index 17d35662930d..3e6a472e6b88 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -219,7 +219,7 @@ static u32 tcp_yeah_ssthresh(struct sock *sk)
yeah->fast_count = 0;
yeah->reno_count = max(yeah->reno_count>>1, 2U);
- return tp->snd_cwnd - reduction;
+ return max_t(int, tp->snd_cwnd - reduction, 2);
}
static struct tcp_congestion_ops tcp_yeah __read_mostly = {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 1b8c5ba7d5f7..031752efe1ab 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -963,8 +963,10 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (msg->msg_controllen) {
err = ip_cmsg_send(sock_net(sk), msg, &ipc,
sk->sk_family == AF_INET6);
- if (err)
+ if (unlikely(err)) {
+ kfree(ipc.opt);
return err;
+ }
if (ipc.opt)
free = 1;
connected = 0;
@@ -1977,10 +1979,14 @@ void udp_v4_early_demux(struct sk_buff *skb)
if (!in_dev)
return;
- ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr,
- iph->protocol);
- if (!ours)
- return;
+ /* we are supposed to accept bcast packets */
+ if (skb->pkt_type == PACKET_MULTICAST) {
+ ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr,
+ iph->protocol);
+ if (!ours)
+ return;
+ }
+
sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr,
uh->source, iph->saddr, dif);
} else if (skb->pkt_type == PACKET_HOST) {
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
index 6bb98cc193c9..7b534ac04056 100644
--- a/net/ipv4/udp_tunnel.c
+++ b/net/ipv4/udp_tunnel.c
@@ -90,6 +90,8 @@ int udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb,
uh->source = src_port;
uh->len = htons(skb->len);
+ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+
udp_set_csum(nocheck, skb, src, dst, skb->len);
return iptunnel_xmit(sk, rt, skb, src, dst, IPPROTO_UDP,
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index bff69746e05f..78526087126d 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -230,7 +230,7 @@ static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
xfrm_dst_ifdown(dst, dev);
}
-static struct dst_ops xfrm4_dst_ops = {
+static struct dst_ops xfrm4_dst_ops_template = {
.family = AF_INET,
.gc = xfrm4_garbage_collect,
.update_pmtu = xfrm4_update_pmtu,
@@ -244,7 +244,7 @@ static struct dst_ops xfrm4_dst_ops = {
static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
.family = AF_INET,
- .dst_ops = &xfrm4_dst_ops,
+ .dst_ops = &xfrm4_dst_ops_template,
.dst_lookup = xfrm4_dst_lookup,
.get_saddr = xfrm4_get_saddr,
.decode_session = _decode_session4,
@@ -266,7 +266,7 @@ static struct ctl_table xfrm4_policy_table[] = {
{ }
};
-static int __net_init xfrm4_net_init(struct net *net)
+static int __net_init xfrm4_net_sysctl_init(struct net *net)
{
struct ctl_table *table;
struct ctl_table_header *hdr;
@@ -294,7 +294,7 @@ err_alloc:
return -ENOMEM;
}
-static void __net_exit xfrm4_net_exit(struct net *net)
+static void __net_exit xfrm4_net_sysctl_exit(struct net *net)
{
struct ctl_table *table;
@@ -306,12 +306,44 @@ static void __net_exit xfrm4_net_exit(struct net *net)
if (!net_eq(net, &init_net))
kfree(table);
}
+#else /* CONFIG_SYSCTL */
+static int inline xfrm4_net_sysctl_init(struct net *net)
+{
+ return 0;
+}
+
+static void inline xfrm4_net_sysctl_exit(struct net *net)
+{
+}
+#endif
+
+static int __net_init xfrm4_net_init(struct net *net)
+{
+ int ret;
+
+ memcpy(&net->xfrm.xfrm4_dst_ops, &xfrm4_dst_ops_template,
+ sizeof(xfrm4_dst_ops_template));
+ ret = dst_entries_init(&net->xfrm.xfrm4_dst_ops);
+ if (ret)
+ return ret;
+
+ ret = xfrm4_net_sysctl_init(net);
+ if (ret)
+ dst_entries_destroy(&net->xfrm.xfrm4_dst_ops);
+
+ return ret;
+}
+
+static void __net_exit xfrm4_net_exit(struct net *net)
+{
+ xfrm4_net_sysctl_exit(net);
+ dst_entries_destroy(&net->xfrm.xfrm4_dst_ops);
+}
static struct pernet_operations __net_initdata xfrm4_net_ops = {
.init = xfrm4_net_init,
.exit = xfrm4_net_exit,
};
-#endif
static void __init xfrm4_policy_init(void)
{
@@ -320,13 +352,9 @@ static void __init xfrm4_policy_init(void)
void __init xfrm4_init(void)
{
- dst_entries_init(&xfrm4_dst_ops);
-
xfrm4_state_init();
xfrm4_policy_init();
xfrm4_protocol_init();
-#ifdef CONFIG_SYSCTL
register_pernet_subsys(&xfrm4_net_ops);
-#endif
}
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index fd3aa6148dd1..f555f4fc1d62 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -195,6 +195,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.max_addresses = IPV6_MAX_ADDRESSES,
.accept_ra_defrtr = 1,
.accept_ra_from_local = 0,
+ .accept_ra_min_hop_limit= 1,
.accept_ra_pinfo = 1,
#ifdef CONFIG_IPV6_ROUTER_PREF
.accept_ra_rtr_pref = 1,
@@ -236,6 +237,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.max_addresses = IPV6_MAX_ADDRESSES,
.accept_ra_defrtr = 1,
.accept_ra_from_local = 0,
+ .accept_ra_min_hop_limit= 1,
.accept_ra_pinfo = 1,
#ifdef CONFIG_IPV6_ROUTER_PREF
.accept_ra_rtr_pref = 1,
@@ -343,6 +345,12 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
setup_timer(&ndev->rs_timer, addrconf_rs_timer,
(unsigned long)ndev);
memcpy(&ndev->cnf, dev_net(dev)->ipv6.devconf_dflt, sizeof(ndev->cnf));
+
+ if (ndev->cnf.stable_secret.initialized)
+ ndev->addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY;
+ else
+ ndev->addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64;
+
ndev->cnf.mtu6 = dev->mtu;
ndev->cnf.sysctl = NULL;
ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl);
@@ -561,7 +569,7 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
if (err < 0)
goto errout;
- err = EINVAL;
+ err = -EINVAL;
if (!tb[NETCONFA_IFINDEX])
goto errout;
@@ -2384,7 +2392,7 @@ ok:
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
if (in6_dev->cnf.optimistic_dad &&
!net->ipv6.devconf_all->forwarding && sllao)
- addr_flags = IFA_F_OPTIMISTIC;
+ addr_flags |= IFA_F_OPTIMISTIC;
#endif
/* Do not allow to create too much of autoconfigured
@@ -3415,6 +3423,7 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
{
struct inet6_dev *idev = ifp->idev;
struct net_device *dev = idev->dev;
+ bool notify = false;
addrconf_join_solict(dev, &ifp->addr);
@@ -3460,7 +3469,7 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
/* Because optimistic nodes can use this address,
* notify listeners. If DAD fails, RTM_DELADDR is sent.
*/
- ipv6_ifa_notify(RTM_NEWADDR, ifp);
+ notify = true;
}
}
@@ -3468,6 +3477,8 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
out:
spin_unlock(&ifp->lock);
read_unlock_bh(&idev->lock);
+ if (notify)
+ ipv6_ifa_notify(RTM_NEWADDR, ifp);
}
static void addrconf_dad_start(struct inet6_ifaddr *ifp)
@@ -4559,6 +4570,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_MAX_DESYNC_FACTOR] = cnf->max_desync_factor;
array[DEVCONF_MAX_ADDRESSES] = cnf->max_addresses;
array[DEVCONF_ACCEPT_RA_DEFRTR] = cnf->accept_ra_defrtr;
+ array[DEVCONF_ACCEPT_RA_MIN_HOP_LIMIT] = cnf->accept_ra_min_hop_limit;
array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo;
#ifdef CONFIG_IPV6_ROUTER_PREF
array[DEVCONF_ACCEPT_RA_RTR_PREF] = cnf->accept_ra_rtr_pref;
@@ -5261,13 +5273,10 @@ static int addrconf_sysctl_stable_secret(struct ctl_table *ctl, int write,
goto out;
}
- if (!write) {
- err = snprintf(str, sizeof(str), "%pI6",
- &secret->secret);
- if (err >= sizeof(str)) {
- err = -EIO;
- goto out;
- }
+ err = snprintf(str, sizeof(str), "%pI6", &secret->secret);
+ if (err >= sizeof(str)) {
+ err = -EIO;
+ goto out;
}
err = proc_dostring(&lctl, write, buffer, lenp, ppos);
@@ -5455,6 +5464,13 @@ static struct addrconf_sysctl_table
.proc_handler = proc_dointvec,
},
{
+ .procname = "accept_ra_min_hop_limit",
+ .data = &ipv6_devconf.accept_ra_min_hop_limit,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
.procname = "accept_ra_pinfo",
.data = &ipv6_devconf.accept_ra_pinfo,
.maxlen = sizeof(int),
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 882124ebb438..a8f6986dcbe5 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -552,7 +552,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh)
rcu_read_lock();
p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index);
- if (p && ip6addrlbl_hold(p))
+ if (p && !ip6addrlbl_hold(p))
p = NULL;
lseq = ip6addrlbl_table.seq;
rcu_read_unlock();
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 2d044d2a2ccf..bad62fa5e70f 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -109,6 +109,9 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol,
int try_loading_module = 0;
int err;
+ if (protocol < 0 || protocol >= IPPROTO_MAX)
+ return -EINVAL;
+
/* Look for the requested type/protocol pair. */
lookup_protocol:
err = -ESOCKTNOSUPPORT;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 13ca4cf5616f..8e6cb3f14326 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -162,6 +162,9 @@ ipv4_connected:
fl6.fl6_dport = inet->inet_dport;
fl6.fl6_sport = inet->inet_sport;
+ if (!fl6.flowi6_oif)
+ fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
+
if (!fl6.flowi6_oif && (addr_type&IPV6_ADDR_MULTICAST))
fl6.flowi6_oif = np->mcast_oif;
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index 5c5d23e59da5..9508a20fbf61 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -257,7 +257,11 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
*fragoff = _frag_off;
return hp->nexthdr;
}
- return -ENOENT;
+ if (!found)
+ return -ENOENT;
+ if (fragoff)
+ *fragoff = _frag_off;
+ break;
}
hdrlen = 8;
} else if (nexthdr == NEXTHDR_AUTH) {
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index d491125011c4..db939e4ac68a 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -540,12 +540,13 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
}
spin_lock_bh(&ip6_sk_fl_lock);
for (sflp = &np->ipv6_fl_list;
- (sfl = rcu_dereference(*sflp)) != NULL;
+ (sfl = rcu_dereference_protected(*sflp,
+ lockdep_is_held(&ip6_sk_fl_lock))) != NULL;
sflp = &sfl->next) {
if (sfl->fl->label == freq.flr_label) {
if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK))
np->flow_label &= ~IPV6_FLOWLABEL_MASK;
- *sflp = rcu_dereference(sfl->next);
+ *sflp = sfl->next;
spin_unlock_bh(&ip6_sk_fl_lock);
fl_release(sfl->fl);
kfree_rcu(sfl, rcu);
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 69f4f689f06a..b1311da5d7b8 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -783,6 +783,8 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
__u32 mtu;
int err;
+ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+
if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
encap_limit = t->parms.encap_limit;
@@ -1553,13 +1555,11 @@ static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[],
return -EEXIST;
} else {
t = nt;
-
- ip6gre_tunnel_unlink(ign, t);
- ip6gre_tnl_change(t, &p, !tb[IFLA_MTU]);
- ip6gre_tunnel_link(ign, t);
- netdev_state_change(dev);
}
+ ip6gre_tunnel_unlink(ign, t);
+ ip6gre_tnl_change(t, &p, !tb[IFLA_MTU]);
+ ip6gre_tunnel_link(ign, t);
return 0;
}
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index bc09cb97b840..06bf4010d3ed 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -376,6 +376,9 @@ int ip6_forward(struct sk_buff *skb)
if (skb->pkt_type != PACKET_HOST)
goto drop;
+ if (unlikely(skb->sk))
+ goto drop;
+
if (skb_warn_if_lro(skb))
goto drop;
@@ -885,6 +888,7 @@ static int ip6_dst_lookup_tail(struct sock *sk,
struct rt6_info *rt;
#endif
int err;
+ int flags = 0;
/* The correct way to handle this would be to do
* ip6_route_get_saddr, and then ip6_route_output; however,
@@ -916,10 +920,13 @@ static int ip6_dst_lookup_tail(struct sock *sk,
dst_release(*dst);
*dst = NULL;
}
+
+ if (fl6->flowi6_oif)
+ flags |= RT6_LOOKUP_F_IFACE;
}
if (!*dst)
- *dst = ip6_route_output(net, sk, fl6);
+ *dst = ip6_route_output_flags(net, sk, fl6, flags);
err = (*dst)->error;
if (err)
@@ -1329,7 +1336,7 @@ emsgsize:
(skb && skb_is_gso(skb))) &&
(sk->sk_protocol == IPPROTO_UDP) &&
(rt->dst.dev->features & NETIF_F_UFO) &&
- (sk->sk_type == SOCK_DGRAM)) {
+ (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) {
err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
hh_len, fragheaderlen,
transhdrlen, mtu, flags, rt);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 5cafd92c2312..c7c2c33aa4af 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -284,12 +284,12 @@ static int ip6_tnl_create2(struct net_device *dev)
t = netdev_priv(dev);
+ dev->rtnl_link_ops = &ip6_link_ops;
err = register_netdevice(dev);
if (err < 0)
goto out;
strcpy(t->parms.name, dev->name);
- dev->rtnl_link_ops = &ip6_link_ops;
dev_hold(dev);
ip6_tnl_link(ip6n, t);
@@ -1124,6 +1124,8 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
u8 tproto;
int err;
+ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+
tproto = ACCESS_ONCE(t->parms.proto);
if (tproto != IPPROTO_IPIP && tproto != 0)
return -1;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 41e3b5ee8d0b..9a63110b6548 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1574,9 +1574,8 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu)
return NULL;
skb->priority = TC_PRIO_CONTROL;
- skb->reserved_tailroom = skb_end_offset(skb) -
- min(mtu, skb_end_offset(skb));
skb_reserve(skb, hlen);
+ skb_tailroom_reserve(skb, mtu, tlen);
if (__ipv6_get_lladdr(idev, &addr_buf, IFA_F_TENTATIVE)) {
/* <draft-ietf-magma-mld-source-05.txt>:
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 96f153c0846b..abb0bdda759a 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1225,18 +1225,16 @@ static void ndisc_router_discovery(struct sk_buff *skb)
if (rt)
rt6_set_expires(rt, jiffies + (HZ * lifetime));
- if (ra_msg->icmph.icmp6_hop_limit) {
- /* Only set hop_limit on the interface if it is higher than
- * the current hop_limit.
- */
- if (in6_dev->cnf.hop_limit < ra_msg->icmph.icmp6_hop_limit) {
+ if (in6_dev->cnf.accept_ra_min_hop_limit < 256 &&
+ ra_msg->icmph.icmp6_hop_limit) {
+ if (in6_dev->cnf.accept_ra_min_hop_limit <= ra_msg->icmph.icmp6_hop_limit) {
in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit;
+ if (rt)
+ dst_metric_set(&rt->dst, RTAX_HOPLIMIT,
+ ra_msg->icmph.icmp6_hop_limit);
} else {
- ND_PRINTK(2, warn, "RA: Got route advertisement with lower hop_limit than current\n");
+ ND_PRINTK(2, warn, "RA: Got route advertisement with lower hop_limit than minimum\n");
}
- if (rt)
- dst_metric_set(&rt->dst, RTAX_HOPLIMIT,
- ra_msg->icmph.icmp6_hop_limit);
}
skip_defrtr:
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 62f5b0d0bc9b..98e99fa833f1 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -198,11 +198,12 @@ get_entry(const void *base, unsigned int offset)
/* All zeroes == unconditional rule. */
/* Mildly perf critical (only if packet tracing is on) */
-static inline bool unconditional(const struct ip6t_ip6 *ipv6)
+static inline bool unconditional(const struct ip6t_entry *e)
{
static const struct ip6t_ip6 uncond;
- return memcmp(ipv6, &uncond, sizeof(uncond)) == 0;
+ return e->target_offset == sizeof(struct ip6t_entry) &&
+ memcmp(&e->ipv6, &uncond, sizeof(uncond)) == 0;
}
static inline const struct xt_entry_target *
@@ -258,11 +259,10 @@ get_chainname_rulenum(const struct ip6t_entry *s, const struct ip6t_entry *e,
} else if (s == e) {
(*rulenum)++;
- if (s->target_offset == sizeof(struct ip6t_entry) &&
+ if (unconditional(s) &&
strcmp(t->target.u.kernel.target->name,
XT_STANDARD_TARGET) == 0 &&
- t->verdict < 0 &&
- unconditional(&s->ipv6)) {
+ t->verdict < 0) {
/* Tail of chains: STANDARD target (return/policy) */
*comment = *chainname == hookname
? comments[NF_IP6_TRACE_COMMENT_POLICY]
@@ -455,7 +455,8 @@ ip6t_do_table(struct sk_buff *skb,
there are loops. Puts hook bitmask in comefrom. */
static int
mark_source_chains(const struct xt_table_info *newinfo,
- unsigned int valid_hooks, void *entry0)
+ unsigned int valid_hooks, void *entry0,
+ unsigned int *offsets)
{
unsigned int hook;
@@ -484,11 +485,10 @@ mark_source_chains(const struct xt_table_info *newinfo,
e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS));
/* Unconditional return/END. */
- if ((e->target_offset == sizeof(struct ip6t_entry) &&
+ if ((unconditional(e) &&
(strcmp(t->target.u.user.name,
XT_STANDARD_TARGET) == 0) &&
- t->verdict < 0 &&
- unconditional(&e->ipv6)) || visited) {
+ t->verdict < 0) || visited) {
unsigned int oldpos, size;
if ((strcmp(t->target.u.user.name,
@@ -529,6 +529,8 @@ mark_source_chains(const struct xt_table_info *newinfo,
size = e->next_offset;
e = (struct ip6t_entry *)
(entry0 + pos + size);
+ if (pos + size >= newinfo->size)
+ return 0;
e->counters.pcnt = pos;
pos += size;
} else {
@@ -547,9 +549,16 @@ mark_source_chains(const struct xt_table_info *newinfo,
/* This a jump; chase it. */
duprintf("Jump rule %u -> %u\n",
pos, newpos);
+ if (!xt_find_jump_offset(offsets, newpos,
+ newinfo->number))
+ return 0;
+ e = (struct ip6t_entry *)
+ (entry0 + newpos);
} else {
/* ... this is a fallthru */
newpos = pos + e->next_offset;
+ if (newpos >= newinfo->size)
+ return 0;
}
e = (struct ip6t_entry *)
(entry0 + newpos);
@@ -576,27 +585,6 @@ static void cleanup_match(struct xt_entry_match *m, struct net *net)
module_put(par.match->me);
}
-static int
-check_entry(const struct ip6t_entry *e, const char *name)
-{
- const struct xt_entry_target *t;
-
- if (!ip6_checkentry(&e->ipv6)) {
- duprintf("ip_tables: ip check failed %p %s.\n", e, name);
- return -EINVAL;
- }
-
- if (e->target_offset + sizeof(struct xt_entry_target) >
- e->next_offset)
- return -EINVAL;
-
- t = ip6t_get_target_c(e);
- if (e->target_offset + t->u.target_size > e->next_offset)
- return -EINVAL;
-
- return 0;
-}
-
static int check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
{
const struct ip6t_ip6 *ipv6 = par->entryinfo;
@@ -675,10 +663,6 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
struct xt_mtchk_param mtpar;
struct xt_entry_match *ematch;
- ret = check_entry(e, name);
- if (ret)
- return ret;
-
j = 0;
mtpar.net = net;
mtpar.table = name;
@@ -722,7 +706,7 @@ static bool check_underflow(const struct ip6t_entry *e)
const struct xt_entry_target *t;
unsigned int verdict;
- if (!unconditional(&e->ipv6))
+ if (!unconditional(e))
return false;
t = ip6t_get_target_c(e);
if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
@@ -742,9 +726,11 @@ check_entry_size_and_hooks(struct ip6t_entry *e,
unsigned int valid_hooks)
{
unsigned int h;
+ int err;
if ((unsigned long)e % __alignof__(struct ip6t_entry) != 0 ||
- (unsigned char *)e + sizeof(struct ip6t_entry) >= limit) {
+ (unsigned char *)e + sizeof(struct ip6t_entry) >= limit ||
+ (unsigned char *)e + e->next_offset > limit) {
duprintf("Bad offset %p\n", e);
return -EINVAL;
}
@@ -756,6 +742,14 @@ check_entry_size_and_hooks(struct ip6t_entry *e,
return -EINVAL;
}
+ if (!ip6_checkentry(&e->ipv6))
+ return -EINVAL;
+
+ err = xt_check_entry_offsets(e, e->elems, e->target_offset,
+ e->next_offset);
+ if (err)
+ return err;
+
/* Check hooks & underflows */
for (h = 0; h < NF_INET_NUMHOOKS; h++) {
if (!(valid_hooks & (1 << h)))
@@ -764,9 +758,9 @@ check_entry_size_and_hooks(struct ip6t_entry *e,
newinfo->hook_entry[h] = hook_entries[h];
if ((unsigned char *)e - base == underflows[h]) {
if (!check_underflow(e)) {
- pr_err("Underflows must be unconditional and "
- "use the STANDARD target with "
- "ACCEPT/DROP\n");
+ pr_debug("Underflows must be unconditional and "
+ "use the STANDARD target with "
+ "ACCEPT/DROP\n");
return -EINVAL;
}
newinfo->underflow[h] = underflows[h];
@@ -806,6 +800,7 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
const struct ip6t_replace *repl)
{
struct ip6t_entry *iter;
+ unsigned int *offsets;
unsigned int i;
int ret = 0;
@@ -819,6 +814,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
}
duprintf("translate_table: size %u\n", newinfo->size);
+ offsets = xt_alloc_entry_offsets(newinfo->number);
+ if (!offsets)
+ return -ENOMEM;
i = 0;
/* Walk through entries, checking offsets. */
xt_entry_foreach(iter, entry0, newinfo->size) {
@@ -828,17 +826,20 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
repl->underflow,
repl->valid_hooks);
if (ret != 0)
- return ret;
+ goto out_free;
+ if (i < repl->num_entries)
+ offsets[i] = (void *)iter - entry0;
++i;
if (strcmp(ip6t_get_target(iter)->u.user.name,
XT_ERROR_TARGET) == 0)
++newinfo->stacksize;
}
+ ret = -EINVAL;
if (i != repl->num_entries) {
duprintf("translate_table: %u not %u entries\n",
i, repl->num_entries);
- return -EINVAL;
+ goto out_free;
}
/* Check hooks all assigned */
@@ -849,17 +850,20 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
duprintf("Invalid hook entry %u %u\n",
i, repl->hook_entry[i]);
- return -EINVAL;
+ goto out_free;
}
if (newinfo->underflow[i] == 0xFFFFFFFF) {
duprintf("Invalid underflow %u %u\n",
i, repl->underflow[i]);
- return -EINVAL;
+ goto out_free;
}
}
- if (!mark_source_chains(newinfo, repl->valid_hooks, entry0))
- return -ELOOP;
+ if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) {
+ ret = -ELOOP;
+ goto out_free;
+ }
+ kvfree(offsets);
/* Finally, each sanity check must pass */
i = 0;
@@ -886,6 +890,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
}
return ret;
+ out_free:
+ kvfree(offsets);
+ return ret;
}
static void
@@ -1319,56 +1326,17 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
unsigned int i, curcpu;
struct xt_counters_info tmp;
struct xt_counters *paddc;
- unsigned int num_counters;
- char *name;
- int size;
- void *ptmp;
struct xt_table *t;
const struct xt_table_info *private;
int ret = 0;
const void *loc_cpu_entry;
struct ip6t_entry *iter;
unsigned int addend;
-#ifdef CONFIG_COMPAT
- struct compat_xt_counters_info compat_tmp;
-
- if (compat) {
- ptmp = &compat_tmp;
- size = sizeof(struct compat_xt_counters_info);
- } else
-#endif
- {
- ptmp = &tmp;
- size = sizeof(struct xt_counters_info);
- }
-
- if (copy_from_user(ptmp, user, size) != 0)
- return -EFAULT;
-
-#ifdef CONFIG_COMPAT
- if (compat) {
- num_counters = compat_tmp.num_counters;
- name = compat_tmp.name;
- } else
-#endif
- {
- num_counters = tmp.num_counters;
- name = tmp.name;
- }
-
- if (len != size + num_counters * sizeof(struct xt_counters))
- return -EINVAL;
-
- paddc = vmalloc(len - size);
- if (!paddc)
- return -ENOMEM;
-
- if (copy_from_user(paddc, user + size, len - size) != 0) {
- ret = -EFAULT;
- goto free;
- }
- t = xt_find_table_lock(net, AF_INET6, name);
+ paddc = xt_copy_counters_from_user(user, len, &tmp, compat);
+ if (IS_ERR(paddc))
+ return PTR_ERR(paddc);
+ t = xt_find_table_lock(net, AF_INET6, tmp.name);
if (IS_ERR_OR_NULL(t)) {
ret = t ? PTR_ERR(t) : -ENOENT;
goto free;
@@ -1377,7 +1345,7 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
local_bh_disable();
private = t->private;
- if (private->number != num_counters) {
+ if (private->number != tmp.num_counters) {
ret = -EINVAL;
goto unlock_up_free;
}
@@ -1457,7 +1425,6 @@ compat_copy_entry_to_user(struct ip6t_entry *e, void __user **dstptr,
static int
compat_find_calc_match(struct xt_entry_match *m,
- const char *name,
const struct ip6t_ip6 *ipv6,
unsigned int hookmask,
int *size)
@@ -1493,21 +1460,19 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
struct xt_table_info *newinfo,
unsigned int *size,
const unsigned char *base,
- const unsigned char *limit,
- const unsigned int *hook_entries,
- const unsigned int *underflows,
- const char *name)
+ const unsigned char *limit)
{
struct xt_entry_match *ematch;
struct xt_entry_target *t;
struct xt_target *target;
unsigned int entry_offset;
unsigned int j;
- int ret, off, h;
+ int ret, off;
duprintf("check_compat_entry_size_and_hooks %p\n", e);
if ((unsigned long)e % __alignof__(struct compat_ip6t_entry) != 0 ||
- (unsigned char *)e + sizeof(struct compat_ip6t_entry) >= limit) {
+ (unsigned char *)e + sizeof(struct compat_ip6t_entry) >= limit ||
+ (unsigned char *)e + e->next_offset > limit) {
duprintf("Bad offset %p, limit = %p\n", e, limit);
return -EINVAL;
}
@@ -1519,8 +1484,11 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
return -EINVAL;
}
- /* For purposes of check_entry casting the compat entry is fine */
- ret = check_entry((struct ip6t_entry *)e, name);
+ if (!ip6_checkentry(&e->ipv6))
+ return -EINVAL;
+
+ ret = xt_compat_check_entry_offsets(e, e->elems,
+ e->target_offset, e->next_offset);
if (ret)
return ret;
@@ -1528,8 +1496,8 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
entry_offset = (void *)e - (void *)base;
j = 0;
xt_ematch_foreach(ematch, e) {
- ret = compat_find_calc_match(ematch, name,
- &e->ipv6, e->comefrom, &off);
+ ret = compat_find_calc_match(ematch, &e->ipv6, e->comefrom,
+ &off);
if (ret != 0)
goto release_matches;
++j;
@@ -1552,17 +1520,6 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
if (ret)
goto out;
- /* Check hooks & underflows */
- for (h = 0; h < NF_INET_NUMHOOKS; h++) {
- if ((unsigned char *)e - base == hook_entries[h])
- newinfo->hook_entry[h] = hook_entries[h];
- if ((unsigned char *)e - base == underflows[h])
- newinfo->underflow[h] = underflows[h];
- }
-
- /* Clear counters and comefrom */
- memset(&e->counters, 0, sizeof(e->counters));
- e->comefrom = 0;
return 0;
out:
@@ -1576,18 +1533,17 @@ release_matches:
return ret;
}
-static int
+static void
compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr,
- unsigned int *size, const char *name,
+ unsigned int *size,
struct xt_table_info *newinfo, unsigned char *base)
{
struct xt_entry_target *t;
struct ip6t_entry *de;
unsigned int origsize;
- int ret, h;
+ int h;
struct xt_entry_match *ematch;
- ret = 0;
origsize = *size;
de = (struct ip6t_entry *)*dstptr;
memcpy(de, e, sizeof(struct ip6t_entry));
@@ -1596,11 +1552,9 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr,
*dstptr += sizeof(struct ip6t_entry);
*size += sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry);
- xt_ematch_foreach(ematch, e) {
- ret = xt_compat_match_from_user(ematch, dstptr, size);
- if (ret != 0)
- return ret;
- }
+ xt_ematch_foreach(ematch, e)
+ xt_compat_match_from_user(ematch, dstptr, size);
+
de->target_offset = e->target_offset - (origsize - *size);
t = compat_ip6t_get_target(e);
xt_compat_target_from_user(t, dstptr, size);
@@ -1612,181 +1566,82 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr,
if ((unsigned char *)de - base < newinfo->underflow[h])
newinfo->underflow[h] -= origsize - *size;
}
- return ret;
-}
-
-static int compat_check_entry(struct ip6t_entry *e, struct net *net,
- const char *name)
-{
- unsigned int j;
- int ret = 0;
- struct xt_mtchk_param mtpar;
- struct xt_entry_match *ematch;
-
- j = 0;
- mtpar.net = net;
- mtpar.table = name;
- mtpar.entryinfo = &e->ipv6;
- mtpar.hook_mask = e->comefrom;
- mtpar.family = NFPROTO_IPV6;
- xt_ematch_foreach(ematch, e) {
- ret = check_match(ematch, &mtpar);
- if (ret != 0)
- goto cleanup_matches;
- ++j;
- }
-
- ret = check_target(e, net, name);
- if (ret)
- goto cleanup_matches;
- return 0;
-
- cleanup_matches:
- xt_ematch_foreach(ematch, e) {
- if (j-- == 0)
- break;
- cleanup_match(ematch, net);
- }
- return ret;
}
static int
translate_compat_table(struct net *net,
- const char *name,
- unsigned int valid_hooks,
struct xt_table_info **pinfo,
void **pentry0,
- unsigned int total_size,
- unsigned int number,
- unsigned int *hook_entries,
- unsigned int *underflows)
+ const struct compat_ip6t_replace *compatr)
{
unsigned int i, j;
struct xt_table_info *newinfo, *info;
void *pos, *entry0, *entry1;
struct compat_ip6t_entry *iter0;
- struct ip6t_entry *iter1;
+ struct ip6t_replace repl;
unsigned int size;
int ret = 0;
info = *pinfo;
entry0 = *pentry0;
- size = total_size;
- info->number = number;
-
- /* Init all hooks to impossible value. */
- for (i = 0; i < NF_INET_NUMHOOKS; i++) {
- info->hook_entry[i] = 0xFFFFFFFF;
- info->underflow[i] = 0xFFFFFFFF;
- }
+ size = compatr->size;
+ info->number = compatr->num_entries;
duprintf("translate_compat_table: size %u\n", info->size);
j = 0;
xt_compat_lock(AF_INET6);
- xt_compat_init_offsets(AF_INET6, number);
+ xt_compat_init_offsets(AF_INET6, compatr->num_entries);
/* Walk through entries, checking offsets. */
- xt_entry_foreach(iter0, entry0, total_size) {
+ xt_entry_foreach(iter0, entry0, compatr->size) {
ret = check_compat_entry_size_and_hooks(iter0, info, &size,
entry0,
- entry0 + total_size,
- hook_entries,
- underflows,
- name);
+ entry0 + compatr->size);
if (ret != 0)
goto out_unlock;
++j;
}
ret = -EINVAL;
- if (j != number) {
+ if (j != compatr->num_entries) {
duprintf("translate_compat_table: %u not %u entries\n",
- j, number);
+ j, compatr->num_entries);
goto out_unlock;
}
- /* Check hooks all assigned */
- for (i = 0; i < NF_INET_NUMHOOKS; i++) {
- /* Only hooks which are valid */
- if (!(valid_hooks & (1 << i)))
- continue;
- if (info->hook_entry[i] == 0xFFFFFFFF) {
- duprintf("Invalid hook entry %u %u\n",
- i, hook_entries[i]);
- goto out_unlock;
- }
- if (info->underflow[i] == 0xFFFFFFFF) {
- duprintf("Invalid underflow %u %u\n",
- i, underflows[i]);
- goto out_unlock;
- }
- }
-
ret = -ENOMEM;
newinfo = xt_alloc_table_info(size);
if (!newinfo)
goto out_unlock;
- newinfo->number = number;
+ newinfo->number = compatr->num_entries;
for (i = 0; i < NF_INET_NUMHOOKS; i++) {
- newinfo->hook_entry[i] = info->hook_entry[i];
- newinfo->underflow[i] = info->underflow[i];
+ newinfo->hook_entry[i] = compatr->hook_entry[i];
+ newinfo->underflow[i] = compatr->underflow[i];
}
entry1 = newinfo->entries[raw_smp_processor_id()];
pos = entry1;
- size = total_size;
- xt_entry_foreach(iter0, entry0, total_size) {
- ret = compat_copy_entry_from_user(iter0, &pos, &size,
- name, newinfo, entry1);
- if (ret != 0)
- break;
- }
+ size = compatr->size;
+ xt_entry_foreach(iter0, entry0, compatr->size)
+ compat_copy_entry_from_user(iter0, &pos, &size,
+ newinfo, entry1);
+
+ /* all module references in entry0 are now gone. */
xt_compat_flush_offsets(AF_INET6);
xt_compat_unlock(AF_INET6);
- if (ret)
- goto free_newinfo;
- ret = -ELOOP;
- if (!mark_source_chains(newinfo, valid_hooks, entry1))
- goto free_newinfo;
+ memcpy(&repl, compatr, sizeof(*compatr));
- i = 0;
- xt_entry_foreach(iter1, entry1, newinfo->size) {
- ret = compat_check_entry(iter1, net, name);
- if (ret != 0)
- break;
- ++i;
- if (strcmp(ip6t_get_target(iter1)->u.user.name,
- XT_ERROR_TARGET) == 0)
- ++newinfo->stacksize;
- }
- if (ret) {
- /*
- * The first i matches need cleanup_entry (calls ->destroy)
- * because they had called ->check already. The other j-i
- * entries need only release.
- */
- int skip = i;
- j -= i;
- xt_entry_foreach(iter0, entry0, newinfo->size) {
- if (skip-- > 0)
- continue;
- if (j-- == 0)
- break;
- compat_release_entry(iter0);
- }
- xt_entry_foreach(iter1, entry1, newinfo->size) {
- if (i-- == 0)
- break;
- cleanup_entry(iter1, net);
- }
- xt_free_table_info(newinfo);
- return ret;
+ for (i = 0; i < NF_INET_NUMHOOKS; i++) {
+ repl.hook_entry[i] = newinfo->hook_entry[i];
+ repl.underflow[i] = newinfo->underflow[i];
}
- /* And one copy for every other CPU */
- for_each_possible_cpu(i)
- if (newinfo->entries[i] && newinfo->entries[i] != entry1)
- memcpy(newinfo->entries[i], entry1, newinfo->size);
+ repl.num_counters = 0;
+ repl.counters = NULL;
+ repl.size = newinfo->size;
+ ret = translate_table(net, newinfo, entry1, &repl);
+ if (ret)
+ goto free_newinfo;
*pinfo = newinfo;
*pentry0 = entry1;
@@ -1795,17 +1650,16 @@ translate_compat_table(struct net *net,
free_newinfo:
xt_free_table_info(newinfo);
-out:
- xt_entry_foreach(iter0, entry0, total_size) {
+ return ret;
+out_unlock:
+ xt_compat_flush_offsets(AF_INET6);
+ xt_compat_unlock(AF_INET6);
+ xt_entry_foreach(iter0, entry0, compatr->size) {
if (j-- == 0)
break;
compat_release_entry(iter0);
}
return ret;
-out_unlock:
- xt_compat_flush_offsets(AF_INET6);
- xt_compat_unlock(AF_INET6);
- goto out;
}
static int
@@ -1842,10 +1696,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
goto free_newinfo;
}
- ret = translate_compat_table(net, tmp.name, tmp.valid_hooks,
- &newinfo, &loc_cpu_entry, tmp.size,
- tmp.num_entries, tmp.hook_entry,
- tmp.underflow);
+ ret = translate_compat_table(net, &newinfo, &loc_cpu_entry, &tmp);
if (ret != 0)
goto free_newinfo;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index f371fefa7fdc..fe70bd6a7516 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1030,11 +1030,9 @@ static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table
return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
}
-struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
- struct flowi6 *fl6)
+struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
+ struct flowi6 *fl6, int flags)
{
- int flags = 0;
-
fl6->flowi6_iif = LOOPBACK_IFINDEX;
if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
@@ -1047,7 +1045,7 @@ struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
}
-EXPORT_SYMBOL(ip6_route_output);
+EXPORT_SYMBOL_GPL(ip6_route_output_flags);
struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
{
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index cfb27f56c62f..ac6c40d08ac5 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -93,10 +93,9 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
- if (dst) {
+ if (dst && dst_hold_safe(dst)) {
const struct rt6_info *rt = (const struct rt6_info *)dst;
- dst_hold(dst);
sk->sk_rx_dst = dst;
inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
if (rt->rt6i_node)
@@ -466,8 +465,10 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
skb_set_queue_mapping(skb, queue_mapping);
+ rcu_read_lock();
err = ip6_xmit(sk, skb, fl6, rcu_dereference(np->opt),
np->tclass);
+ rcu_read_unlock();
err = net_xmit_eval(err);
}
@@ -1690,7 +1691,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
destp = ntohs(inet->inet_dport);
srcp = ntohs(inet->inet_sport);
- if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
+ if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
+ icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
+ icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
timer_active = 1;
timer_expires = icsk->icsk_timeout;
} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 7333f3575fc5..1173557ea551 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -834,8 +834,8 @@ start_lookup:
flush_stack(stack, count, skb, count - 1);
} else {
if (!inner_flushed)
- UDP_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI,
- proto == IPPROTO_UDPLITE);
+ UDP6_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI,
+ proto == IPPROTO_UDPLITE);
consume_skb(skb);
}
return 0;
@@ -913,11 +913,9 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
ret = udpv6_queue_rcv_skb(sk, skb);
sock_put(sk);
- /* a return value > 0 means to resubmit the input, but
- * it wants the return to be -protocol, or 0
- */
+ /* a return value > 0 means to resubmit the input */
if (ret > 0)
- return -ret;
+ return ret;
return 0;
}
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 901ef6f8addc..5266ad2d6419 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -24,7 +24,7 @@ static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
struct ipv6hdr *inner_iph = ipipv6_hdr(skb);
if (INET_ECN_is_ce(ipv6_get_dsfield(outer_iph)))
- IP6_ECN_set_ce(inner_iph);
+ IP6_ECN_set_ce(skb, inner_iph);
}
/* Add encapsulation header.
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index f337a908a76a..4fb94f6ee15b 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -289,7 +289,7 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
xfrm_dst_ifdown(dst, dev);
}
-static struct dst_ops xfrm6_dst_ops = {
+static struct dst_ops xfrm6_dst_ops_template = {
.family = AF_INET6,
.gc = xfrm6_garbage_collect,
.update_pmtu = xfrm6_update_pmtu,
@@ -303,7 +303,7 @@ static struct dst_ops xfrm6_dst_ops = {
static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
.family = AF_INET6,
- .dst_ops = &xfrm6_dst_ops,
+ .dst_ops = &xfrm6_dst_ops_template,
.dst_lookup = xfrm6_dst_lookup,
.get_saddr = xfrm6_get_saddr,
.decode_session = _decode_session6,
@@ -336,7 +336,7 @@ static struct ctl_table xfrm6_policy_table[] = {
{ }
};
-static int __net_init xfrm6_net_init(struct net *net)
+static int __net_init xfrm6_net_sysctl_init(struct net *net)
{
struct ctl_table *table;
struct ctl_table_header *hdr;
@@ -364,7 +364,7 @@ err_alloc:
return -ENOMEM;
}
-static void __net_exit xfrm6_net_exit(struct net *net)
+static void __net_exit xfrm6_net_sysctl_exit(struct net *net)
{
struct ctl_table *table;
@@ -376,24 +376,52 @@ static void __net_exit xfrm6_net_exit(struct net *net)
if (!net_eq(net, &init_net))
kfree(table);
}
+#else /* CONFIG_SYSCTL */
+static int inline xfrm6_net_sysctl_init(struct net *net)
+{
+ return 0;
+}
+
+static void inline xfrm6_net_sysctl_exit(struct net *net)
+{
+}
+#endif
+
+static int __net_init xfrm6_net_init(struct net *net)
+{
+ int ret;
+
+ memcpy(&net->xfrm.xfrm6_dst_ops, &xfrm6_dst_ops_template,
+ sizeof(xfrm6_dst_ops_template));
+ ret = dst_entries_init(&net->xfrm.xfrm6_dst_ops);
+ if (ret)
+ return ret;
+
+ ret = xfrm6_net_sysctl_init(net);
+ if (ret)
+ dst_entries_destroy(&net->xfrm.xfrm6_dst_ops);
+
+ return ret;
+}
+
+static void __net_exit xfrm6_net_exit(struct net *net)
+{
+ xfrm6_net_sysctl_exit(net);
+ dst_entries_destroy(&net->xfrm.xfrm6_dst_ops);
+}
static struct pernet_operations xfrm6_net_ops = {
.init = xfrm6_net_init,
.exit = xfrm6_net_exit,
};
-#endif
int __init xfrm6_init(void)
{
int ret;
- dst_entries_init(&xfrm6_dst_ops);
-
ret = xfrm6_policy_init();
- if (ret) {
- dst_entries_destroy(&xfrm6_dst_ops);
+ if (ret)
goto out;
- }
ret = xfrm6_state_init();
if (ret)
goto out_policy;
@@ -402,9 +430,7 @@ int __init xfrm6_init(void)
if (ret)
goto out_state;
-#ifdef CONFIG_SYSCTL
register_pernet_subsys(&xfrm6_net_ops);
-#endif
out:
return ret;
out_state:
@@ -416,11 +442,8 @@ out_policy:
void xfrm6_fini(void)
{
-#ifdef CONFIG_SYSCTL
unregister_pernet_subsys(&xfrm6_net_ops);
-#endif
xfrm6_protocol_fini();
xfrm6_policy_fini();
xfrm6_state_fini();
- dst_entries_destroy(&xfrm6_dst_ops);
}
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index ee0ea25c8e7a..eca46d3d3ff3 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -1024,8 +1024,11 @@ static int irda_connect(struct socket *sock, struct sockaddr *uaddr,
}
/* Check if we have opened a local TSAP */
- if (!self->tsap)
- irda_open_tsap(self, LSAP_ANY, addr->sir_name);
+ if (!self->tsap) {
+ err = irda_open_tsap(self, LSAP_ANY, addr->sir_name);
+ if (err)
+ goto out;
+ }
/* Move to connecting socket, start sending Connect Requests */
sock->state = SS_CONNECTING;
@@ -1086,6 +1089,9 @@ static int irda_create(struct net *net, struct socket *sock, int protocol,
struct sock *sk;
struct irda_sock *self;
+ if (protocol < 0 || protocol > SK_PROTOCOL_MAX)
+ return -EINVAL;
+
if (net != &init_net)
return -EAFNOSUPPORT;
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 6daa52a18d40..123f6f9f854c 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -709,6 +709,9 @@ static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr,
if (!addr || addr->sa_family != AF_IUCV)
return -EINVAL;
+ if (addr_len < sizeof(struct sockaddr_iucv))
+ return -EINVAL;
+
lock_sock(sk);
if (sk->sk_state != IUCV_OPEN) {
err = -EBADFD;
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 79649937ec71..44ee0683b14b 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -123,12 +123,11 @@ static int l2tp_ip_recv(struct sk_buff *skb)
struct l2tp_tunnel *tunnel = NULL;
int length;
- /* Point to L2TP header */
- optr = ptr = skb->data;
-
if (!pskb_may_pull(skb, 4))
goto discard;
+ /* Point to L2TP header */
+ optr = ptr = skb->data;
session_id = ntohl(*((__be32 *) ptr));
ptr += 4;
@@ -156,6 +155,9 @@ static int l2tp_ip_recv(struct sk_buff *skb)
if (!pskb_may_pull(skb, length))
goto discard;
+ /* Point to L2TP header */
+ optr = ptr = skb->data;
+ ptr += 4;
pr_debug("%s: ip recv\n", tunnel->name);
print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length);
}
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 0ce9da948ad7..36f8fa223a78 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -135,12 +135,11 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
struct l2tp_tunnel *tunnel = NULL;
int length;
- /* Point to L2TP header */
- optr = ptr = skb->data;
-
if (!pskb_may_pull(skb, 4))
goto discard;
+ /* Point to L2TP header */
+ optr = ptr = skb->data;
session_id = ntohl(*((__be32 *) ptr));
ptr += 4;
@@ -168,6 +167,9 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
if (!pskb_may_pull(skb, length))
goto discard;
+ /* Point to L2TP header */
+ optr = ptr = skb->data;
+ ptr += 4;
pr_debug("%s: ip recv\n", tunnel->name);
print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length);
}
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 9e13c2ff8789..fe92a08b3cd5 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -124,8 +124,13 @@ static int l2tp_tunnel_notify(struct genl_family *family,
ret = l2tp_nl_tunnel_send(msg, info->snd_portid, info->snd_seq,
NLM_F_ACK, tunnel, cmd);
- if (ret >= 0)
- return genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC);
+ if (ret >= 0) {
+ ret = genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC);
+ /* We don't care if no one is listening */
+ if (ret == -ESRCH)
+ ret = 0;
+ return ret;
+ }
nlmsg_free(msg);
@@ -147,8 +152,13 @@ static int l2tp_session_notify(struct genl_family *family,
ret = l2tp_nl_session_send(msg, info->snd_portid, info->snd_seq,
NLM_F_ACK, session, cmd);
- if (ret >= 0)
- return genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC);
+ if (ret >= 0) {
+ ret = genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC);
+ /* We don't care if no one is listening */
+ if (ret == -ESRCH)
+ ret = 0;
+ return ret;
+ }
nlmsg_free(msg);
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 17a8dff06090..c58f242c00f1 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -626,6 +626,7 @@ static void llc_cmsg_rcv(struct msghdr *msg, struct sk_buff *skb)
if (llc->cmsg_flags & LLC_CMSG_PKTINFO) {
struct llc_pktinfo info;
+ memset(&info, 0, sizeof(info));
info.lpi_ifindex = llc_sk(skb->sk)->dev->ifindex;
llc_pdu_decode_dsap(skb, &info.lpi_sap);
llc_pdu_decode_da(skb, info.lpi_mac);
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 5c564a68fb50..d71edcbd0c58 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -289,7 +289,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
}
/* prepare A-MPDU MLME for Rx aggregation */
- tid_agg_rx = kmalloc(sizeof(struct tid_ampdu_rx), GFP_KERNEL);
+ tid_agg_rx = kzalloc(sizeof(*tid_agg_rx), GFP_KERNEL);
if (!tid_agg_rx)
goto end;
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index f06d42267306..89ed0206882c 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -907,7 +907,7 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev)
/* free all potentially still buffered bcast frames */
local->total_ps_buffered -= skb_queue_len(&sdata->u.ap.ps.bc_buf);
- skb_queue_purge(&sdata->u.ap.ps.bc_buf);
+ ieee80211_purge_tx_queue(&local->hw, &sdata->u.ap.ps.bc_buf);
mutex_lock(&local->mtx);
ieee80211_vif_copy_chanctx_to_vlans(sdata, true);
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index a9c9d961f039..6f1f3bdddea2 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -7,6 +7,7 @@
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2009, Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 Intel Deutschland GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -1479,14 +1480,21 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata)
sdata_info(sdata, "Trigger new scan to find an IBSS to join\n");
- num = ieee80211_ibss_setup_scan_channels(local->hw.wiphy,
- &ifibss->chandef,
- channels,
- ARRAY_SIZE(channels));
scan_width = cfg80211_chandef_to_scan_width(&ifibss->chandef);
- ieee80211_request_ibss_scan(sdata, ifibss->ssid,
- ifibss->ssid_len, channels, num,
- scan_width);
+
+ if (ifibss->fixed_channel) {
+ num = ieee80211_ibss_setup_scan_channels(local->hw.wiphy,
+ &ifibss->chandef,
+ channels,
+ ARRAY_SIZE(channels));
+ ieee80211_request_ibss_scan(sdata, ifibss->ssid,
+ ifibss->ssid_len, channels,
+ num, scan_width);
+ } else {
+ ieee80211_request_ibss_scan(sdata, ifibss->ssid,
+ ifibss->ssid_len, NULL,
+ 0, scan_width);
+ }
} else {
int interval = IEEE80211_SCAN_INTERVAL;
@@ -1727,7 +1735,6 @@ void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local)
if (sdata->vif.type != NL80211_IFTYPE_ADHOC)
continue;
sdata->u.ibss.last_scan_completed = jiffies;
- ieee80211_queue_work(&local->hw, &sdata->work);
}
mutex_unlock(&local->iflist_mtx);
}
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index c0a9187bc3a9..cdf8609a6240 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -90,7 +90,7 @@ struct ieee80211_fragment_entry {
unsigned int last_frag;
unsigned int extra_len;
struct sk_buff_head skb_list;
- int ccmp; /* Whether fragments were encrypted with CCMP */
+ bool check_sequential_pn; /* needed for CCMP/GCMP */
u8 last_pn[6]; /* PN of the last fragment if CCMP was used */
};
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 84cef600c573..6e89ab8eac44 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -980,7 +980,10 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
if (sdata->vif.txq) {
struct txq_info *txqi = to_txq_info(sdata->vif.txq);
+ spin_lock_bh(&txqi->queue.lock);
ieee80211_purge_tx_queue(&local->hw, &txqi->queue);
+ spin_unlock_bh(&txqi->queue.lock);
+
atomic_set(&sdata->txqs_len[txqi->txq.ac], 0);
}
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 817098add1d6..c5af4e3d4497 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -161,6 +161,10 @@ void mesh_sta_cleanup(struct sta_info *sta)
del_timer_sync(&sta->plink_timer);
}
+ /* make sure no readers can access nexthop sta from here on */
+ mesh_path_flush_by_nexthop(sta);
+ synchronize_net();
+
if (changed)
ieee80211_mbss_info_change_notify(sdata, changed);
}
@@ -1299,17 +1303,6 @@ out:
sdata_unlock(sdata);
}
-void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local)
-{
- struct ieee80211_sub_if_data *sdata;
-
- rcu_read_lock();
- list_for_each_entry_rcu(sdata, &local->interfaces, list)
- if (ieee80211_vif_is_mesh(&sdata->vif) &&
- ieee80211_sdata_running(sdata))
- ieee80211_queue_work(&local->hw, &sdata->work);
- rcu_read_unlock();
-}
void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata)
{
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 50c8473cf9dc..472bdc73e950 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -358,14 +358,10 @@ static inline bool mesh_path_sel_is_hwmp(struct ieee80211_sub_if_data *sdata)
return sdata->u.mesh.mesh_pp_id == IEEE80211_PATH_PROTOCOL_HWMP;
}
-void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local);
-
void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata);
void mesh_sync_adjust_tbtt(struct ieee80211_sub_if_data *sdata);
void ieee80211s_stop(void);
#else
-static inline void
-ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local) {}
static inline bool mesh_path_sel_is_hwmp(struct ieee80211_sub_if_data *sdata)
{ return false; }
static inline void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index a93906103f8b..844825829992 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -4002,8 +4002,6 @@ static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata)
if (!(flags & IEEE80211_HW_CONNECTION_MONITOR))
ieee80211_queue_work(&sdata->local->hw,
&sdata->u.mgd.monitor_work);
- /* and do all the other regular work too */
- ieee80211_queue_work(&sdata->local->hw, &sdata->work);
}
}
diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index 3ece7d1034c8..b54f398cda5d 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -711,7 +711,7 @@ static u32 minstrel_get_expected_throughput(void *priv_sta)
* computing cur_tp
*/
tmp_mrs = &mi->r[idx].stats;
- tmp_cur_tp = minstrel_get_tp_avg(&mi->r[idx], tmp_mrs->prob_ewma);
+ tmp_cur_tp = minstrel_get_tp_avg(&mi->r[idx], tmp_mrs->prob_ewma) * 10;
tmp_cur_tp = tmp_cur_tp * 1200 * 8 / 1024;
return tmp_cur_tp;
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 7430a1df2ab1..1ec889dc2e46 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -691,7 +691,7 @@ minstrel_aggr_check(struct ieee80211_sta *pubsta, struct sk_buff *skb)
if (likely(sta->ampdu_mlme.tid_tx[tid]))
return;
- ieee80211_start_tx_ba_session(pubsta, tid, 5000);
+ ieee80211_start_tx_ba_session(pubsta, tid, 0);
}
static void
@@ -1328,7 +1328,8 @@ static u32 minstrel_ht_get_expected_throughput(void *priv_sta)
prob = mi->groups[i].rates[j].prob_ewma;
/* convert tp_avg from pkt per second in kbps */
- tp_avg = minstrel_ht_get_tp_avg(mi, i, j, prob) * AVG_PKT_SIZE * 8 / 1024;
+ tp_avg = minstrel_ht_get_tp_avg(mi, i, j, prob) * 10;
+ tp_avg = tp_avg * AVG_PKT_SIZE * 8 / 1024;
return tp_avg;
}
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 5793f75c5ffd..3073164a6fcf 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1725,7 +1725,7 @@ ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata,
entry->seq = seq;
entry->rx_queue = rx_queue;
entry->last_frag = frag;
- entry->ccmp = 0;
+ entry->check_sequential_pn = false;
entry->extra_len = 0;
return entry;
@@ -1821,15 +1821,27 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
rx->seqno_idx, &(rx->skb));
if (rx->key &&
(rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP ||
- rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP_256) &&
+ rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP_256 ||
+ rx->key->conf.cipher == WLAN_CIPHER_SUITE_GCMP ||
+ rx->key->conf.cipher == WLAN_CIPHER_SUITE_GCMP_256) &&
ieee80211_has_protected(fc)) {
int queue = rx->security_idx;
- /* Store CCMP PN so that we can verify that the next
- * fragment has a sequential PN value. */
- entry->ccmp = 1;
+
+ /* Store CCMP/GCMP PN so that we can verify that the
+ * next fragment has a sequential PN value.
+ */
+ entry->check_sequential_pn = true;
memcpy(entry->last_pn,
rx->key->u.ccmp.rx_pn[queue],
IEEE80211_CCMP_PN_LEN);
+ BUILD_BUG_ON(offsetof(struct ieee80211_key,
+ u.ccmp.rx_pn) !=
+ offsetof(struct ieee80211_key,
+ u.gcmp.rx_pn));
+ BUILD_BUG_ON(sizeof(rx->key->u.ccmp.rx_pn[queue]) !=
+ sizeof(rx->key->u.gcmp.rx_pn[queue]));
+ BUILD_BUG_ON(IEEE80211_CCMP_PN_LEN !=
+ IEEE80211_GCMP_PN_LEN);
}
return RX_QUEUED;
}
@@ -1844,15 +1856,21 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
return RX_DROP_MONITOR;
}
- /* Verify that MPDUs within one MSDU have sequential PN values.
- * (IEEE 802.11i, 8.3.3.4.5) */
- if (entry->ccmp) {
+ /* "The receiver shall discard MSDUs and MMPDUs whose constituent
+ * MPDU PN values are not incrementing in steps of 1."
+ * see IEEE P802.11-REVmc/D5.0, 12.5.3.4.4, item d (for CCMP)
+ * and IEEE P802.11-REVmc/D5.0, 12.5.5.4.4, item d (for GCMP)
+ */
+ if (entry->check_sequential_pn) {
int i;
u8 pn[IEEE80211_CCMP_PN_LEN], *rpn;
int queue;
+
if (!rx->key ||
(rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP &&
- rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP_256))
+ rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP_256 &&
+ rx->key->conf.cipher != WLAN_CIPHER_SUITE_GCMP &&
+ rx->key->conf.cipher != WLAN_CIPHER_SUITE_GCMP_256))
return RX_DROP_UNUSABLE;
memcpy(pn, entry->last_pn, IEEE80211_CCMP_PN_LEN);
for (i = IEEE80211_CCMP_PN_LEN - 1; i >= 0; i--) {
@@ -2209,7 +2227,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
struct ieee80211_sub_if_data *sdata = rx->sdata;
struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
- u16 q, hdrlen;
+ u16 ac, q, hdrlen;
hdr = (struct ieee80211_hdr *) skb->data;
hdrlen = ieee80211_hdrlen(hdr->frame_control);
@@ -2279,7 +2297,8 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
ether_addr_equal(sdata->vif.addr, hdr->addr3))
return RX_CONTINUE;
- q = ieee80211_select_queue_80211(sdata, skb, hdr);
+ ac = ieee80211_select_queue_80211(sdata, skb, hdr);
+ q = sdata->vif.hw_queue[ac];
if (ieee80211_queue_stopped(&local->hw, q)) {
IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, dropped_frames_congestion);
return RX_DROP_MONITOR;
@@ -3359,6 +3378,7 @@ static bool prepare_for_handlers(struct ieee80211_rx_data *rx,
return false;
/* ignore action frames to TDLS-peers */
if (ieee80211_is_action(hdr->frame_control) &&
+ !is_broadcast_ether_addr(bssid) &&
!ether_addr_equal(bssid, hdr->addr1))
return false;
}
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 7bb6a9383f58..ee9351affa5b 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -310,6 +310,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
bool was_scanning = local->scanning;
struct cfg80211_scan_request *scan_req;
struct ieee80211_sub_if_data *scan_sdata;
+ struct ieee80211_sub_if_data *sdata;
lockdep_assert_held(&local->mtx);
@@ -369,7 +370,16 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
ieee80211_mlme_notify_scan_completed(local);
ieee80211_ibss_notify_scan_completed(local);
- ieee80211_mesh_notify_scan_completed(local);
+
+ /* Requeue all the work that might have been ignored while
+ * the scan was in progress; if there was none this will
+ * just be a no-op for the particular interface.
+ */
+ list_for_each_entry_rcu(sdata, &local->interfaces, list) {
+ if (ieee80211_sdata_running(sdata))
+ ieee80211_queue_work(&sdata->local->hw, &sdata->work);
+ }
+
if (was_scanning)
ieee80211_start_next_roc(local);
}
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 2880f2ae99ab..bcdbda289d75 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -252,11 +252,11 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta)
}
/* Caller must hold local->sta_mtx */
-static void sta_info_hash_add(struct ieee80211_local *local,
- struct sta_info *sta)
+static int sta_info_hash_add(struct ieee80211_local *local,
+ struct sta_info *sta)
{
- rhashtable_insert_fast(&local->sta_hash, &sta->hash_node,
- sta_rht_params);
+ return rhashtable_insert_fast(&local->sta_hash, &sta->hash_node,
+ sta_rht_params);
}
static void sta_deliver_ps_frames(struct work_struct *wk)
@@ -472,11 +472,17 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU)
{
struct ieee80211_local *local = sta->local;
struct ieee80211_sub_if_data *sdata = sta->sdata;
- struct station_info sinfo;
+ struct station_info *sinfo;
int err = 0;
lockdep_assert_held(&local->sta_mtx);
+ sinfo = kzalloc(sizeof(struct station_info), GFP_KERNEL);
+ if (!sinfo) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+
/* check if STA exists already */
if (sta_info_get_bss(sdata, sta->sta.addr)) {
err = -EEXIST;
@@ -491,7 +497,9 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU)
set_sta_flag(sta, WLAN_STA_BLOCK_BA);
/* make the station visible */
- sta_info_hash_add(local, sta);
+ err = sta_info_hash_add(local, sta);
+ if (err)
+ goto out_drop_sta;
list_add_tail_rcu(&sta->list, &local->sta_list);
@@ -508,10 +516,9 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU)
ieee80211_sta_debugfs_add(sta);
rate_control_add_sta_debugfs(sta);
- memset(&sinfo, 0, sizeof(sinfo));
- sinfo.filled = 0;
- sinfo.generation = local->sta_generation;
- cfg80211_new_sta(sdata->dev, sta->sta.addr, &sinfo, GFP_KERNEL);
+ sinfo->generation = local->sta_generation;
+ cfg80211_new_sta(sdata->dev, sta->sta.addr, sinfo, GFP_KERNEL);
+ kfree(sinfo);
sta_dbg(sdata, "Inserted STA %pM\n", sta->sta.addr);
@@ -526,6 +533,7 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU)
out_remove:
sta_info_hash_del(local, sta);
list_del_rcu(&sta->list);
+ out_drop_sta:
local->num_sta--;
synchronize_net();
__cleanup_single_sta(sta);
@@ -873,7 +881,7 @@ static void __sta_info_destroy_part2(struct sta_info *sta)
{
struct ieee80211_local *local = sta->local;
struct ieee80211_sub_if_data *sdata = sta->sdata;
- struct station_info sinfo = {};
+ struct station_info *sinfo;
int ret;
/*
@@ -911,8 +919,11 @@ static void __sta_info_destroy_part2(struct sta_info *sta)
sta_dbg(sdata, "Removed STA %pM\n", sta->sta.addr);
- sta_set_sinfo(sta, &sinfo);
- cfg80211_del_sta_sinfo(sdata->dev, sta->sta.addr, &sinfo, GFP_KERNEL);
+ sinfo = kzalloc(sizeof(*sinfo), GFP_KERNEL);
+ if (sinfo)
+ sta_set_sinfo(sta, sinfo);
+ cfg80211_del_sta_sinfo(sdata->dev, sta->sta.addr, sinfo, GFP_KERNEL);
+ kfree(sinfo);
rate_control_remove_sta_debugfs(sta);
ieee80211_sta_debugfs_remove(sta);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 5787f15a3a12..8dbdbaea70fd 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -355,7 +355,7 @@ static void purge_old_ps_buffers(struct ieee80211_local *local)
skb = skb_dequeue(&ps->bc_buf);
if (skb) {
purged++;
- dev_kfree_skb(skb);
+ ieee80211_free_txskb(&local->hw, skb);
}
total += skb_queue_len(&ps->bc_buf);
}
@@ -438,7 +438,7 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx)
if (skb_queue_len(&ps->bc_buf) >= AP_MAX_BC_BUFFER) {
ps_dbg(tx->sdata,
"BC TX buffer full - dropping the oldest frame\n");
- dev_kfree_skb(skb_dequeue(&ps->bc_buf));
+ ieee80211_free_txskb(&tx->local->hw, skb_dequeue(&ps->bc_buf));
} else
tx->local->total_ps_buffered++;
@@ -3247,7 +3247,7 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw,
sdata = IEEE80211_DEV_TO_SUB_IF(skb->dev);
if (!ieee80211_tx_prepare(sdata, &tx, NULL, skb))
break;
- dev_kfree_skb_any(skb);
+ ieee80211_free_txskb(hw, skb);
}
info = IEEE80211_SKB_CB(skb);
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 38fbc194b9cb..a26bd6532829 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1689,15 +1689,34 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
cp = pp->conn_in_get(af, skb, &iph, 0);
conn_reuse_mode = sysctl_conn_reuse_mode(ipvs);
- if (conn_reuse_mode && !iph.fragoffs &&
- is_new_conn(skb, &iph) && cp &&
- ((unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest &&
- unlikely(!atomic_read(&cp->dest->weight))) ||
- unlikely(is_new_conn_expected(cp, conn_reuse_mode)))) {
- if (!atomic_read(&cp->n_control))
- ip_vs_conn_expire_now(cp);
- __ip_vs_conn_put(cp);
- cp = NULL;
+ if (conn_reuse_mode && !iph.fragoffs && is_new_conn(skb, &iph) && cp) {
+ bool uses_ct = false, resched = false;
+
+ if (unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest &&
+ unlikely(!atomic_read(&cp->dest->weight))) {
+ resched = true;
+ uses_ct = ip_vs_conn_uses_conntrack(cp, skb);
+ } else if (is_new_conn_expected(cp, conn_reuse_mode)) {
+ uses_ct = ip_vs_conn_uses_conntrack(cp, skb);
+ if (!atomic_read(&cp->n_control)) {
+ resched = true;
+ } else {
+ /* Do not reschedule controlling connection
+ * that uses conntrack while it is still
+ * referenced by controlled connection(s).
+ */
+ resched = !uses_ct;
+ }
+ }
+
+ if (resched) {
+ if (!atomic_read(&cp->n_control))
+ ip_vs_conn_expire_now(cp);
+ __ip_vs_conn_put(cp);
+ if (uses_ct)
+ return NF_DROP;
+ cp = NULL;
+ }
}
if (unlikely(!cp) && !iph.fragoffs) {
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
index bed5f7042529..bb318e4623a3 100644
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -88,7 +88,7 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
dptr = skb->data + dataoff;
datalen = skb->len - dataoff;
- if (get_callid(dptr, dataoff, datalen, &matchoff, &matchlen))
+ if (get_callid(dptr, 0, datalen, &matchoff, &matchlen))
return -EINVAL;
/* N.B: pe_data is only set on success,
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 13fad8668f83..bc3f791845aa 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1735,6 +1735,7 @@ void nf_conntrack_init_end(void)
int nf_conntrack_init_net(struct net *net)
{
+ static atomic64_t unique_id;
int ret = -ENOMEM;
int cpu;
@@ -1758,7 +1759,8 @@ int nf_conntrack_init_net(struct net *net)
if (!net->ct.stat)
goto err_pcpu_lists;
- net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%p", net);
+ net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%llu",
+ (u64)atomic64_inc_return(&unique_id));
if (!net->ct.slabname)
goto err_slabname;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 51a459c3c649..703fc9ba6f20 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -418,6 +418,47 @@ int xt_check_match(struct xt_mtchk_param *par,
}
EXPORT_SYMBOL_GPL(xt_check_match);
+/** xt_check_entry_match - check that matches end before start of target
+ *
+ * @match: beginning of xt_entry_match
+ * @target: beginning of this rules target (alleged end of matches)
+ * @alignment: alignment requirement of match structures
+ *
+ * Validates that all matches add up to the beginning of the target,
+ * and that each match covers at least the base structure size.
+ *
+ * Return: 0 on success, negative errno on failure.
+ */
+static int xt_check_entry_match(const char *match, const char *target,
+ const size_t alignment)
+{
+ const struct xt_entry_match *pos;
+ int length = target - match;
+
+ if (length == 0) /* no matches */
+ return 0;
+
+ pos = (struct xt_entry_match *)match;
+ do {
+ if ((unsigned long)pos % alignment)
+ return -EINVAL;
+
+ if (length < (int)sizeof(struct xt_entry_match))
+ return -EINVAL;
+
+ if (pos->u.match_size < sizeof(struct xt_entry_match))
+ return -EINVAL;
+
+ if (pos->u.match_size > length)
+ return -EINVAL;
+
+ length -= pos->u.match_size;
+ pos = ((void *)((char *)(pos) + (pos)->u.match_size));
+ } while (length > 0);
+
+ return 0;
+}
+
#ifdef CONFIG_COMPAT
int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta)
{
@@ -487,13 +528,14 @@ int xt_compat_match_offset(const struct xt_match *match)
}
EXPORT_SYMBOL_GPL(xt_compat_match_offset);
-int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
- unsigned int *size)
+void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
+ unsigned int *size)
{
const struct xt_match *match = m->u.kernel.match;
struct compat_xt_entry_match *cm = (struct compat_xt_entry_match *)m;
int pad, off = xt_compat_match_offset(match);
u_int16_t msize = cm->u.user.match_size;
+ char name[sizeof(m->u.user.name)];
m = *dstptr;
memcpy(m, cm, sizeof(*cm));
@@ -507,10 +549,12 @@ int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
msize += off;
m->u.user.match_size = msize;
+ strlcpy(name, match->name, sizeof(name));
+ module_put(match->me);
+ strncpy(m->u.user.name, name, sizeof(m->u.user.name));
*size += off;
*dstptr += msize;
- return 0;
}
EXPORT_SYMBOL_GPL(xt_compat_match_from_user);
@@ -541,8 +585,175 @@ int xt_compat_match_to_user(const struct xt_entry_match *m,
return 0;
}
EXPORT_SYMBOL_GPL(xt_compat_match_to_user);
+
+/* non-compat version may have padding after verdict */
+struct compat_xt_standard_target {
+ struct compat_xt_entry_target t;
+ compat_uint_t verdict;
+};
+
+int xt_compat_check_entry_offsets(const void *base, const char *elems,
+ unsigned int target_offset,
+ unsigned int next_offset)
+{
+ long size_of_base_struct = elems - (const char *)base;
+ const struct compat_xt_entry_target *t;
+ const char *e = base;
+
+ if (target_offset < size_of_base_struct)
+ return -EINVAL;
+
+ if (target_offset + sizeof(*t) > next_offset)
+ return -EINVAL;
+
+ t = (void *)(e + target_offset);
+ if (t->u.target_size < sizeof(*t))
+ return -EINVAL;
+
+ if (target_offset + t->u.target_size > next_offset)
+ return -EINVAL;
+
+ if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0 &&
+ COMPAT_XT_ALIGN(target_offset + sizeof(struct compat_xt_standard_target)) != next_offset)
+ return -EINVAL;
+
+ /* compat_xt_entry match has less strict aligment requirements,
+ * otherwise they are identical. In case of padding differences
+ * we need to add compat version of xt_check_entry_match.
+ */
+ BUILD_BUG_ON(sizeof(struct compat_xt_entry_match) != sizeof(struct xt_entry_match));
+
+ return xt_check_entry_match(elems, base + target_offset,
+ __alignof__(struct compat_xt_entry_match));
+}
+EXPORT_SYMBOL(xt_compat_check_entry_offsets);
#endif /* CONFIG_COMPAT */
+/**
+ * xt_check_entry_offsets - validate arp/ip/ip6t_entry
+ *
+ * @base: pointer to arp/ip/ip6t_entry
+ * @elems: pointer to first xt_entry_match, i.e. ip(6)t_entry->elems
+ * @target_offset: the arp/ip/ip6_t->target_offset
+ * @next_offset: the arp/ip/ip6_t->next_offset
+ *
+ * validates that target_offset and next_offset are sane and that all
+ * match sizes (if any) align with the target offset.
+ *
+ * This function does not validate the targets or matches themselves, it
+ * only tests that all the offsets and sizes are correct, that all
+ * match structures are aligned, and that the last structure ends where
+ * the target structure begins.
+ *
+ * Also see xt_compat_check_entry_offsets for CONFIG_COMPAT version.
+ *
+ * The arp/ip/ip6t_entry structure @base must have passed following tests:
+ * - it must point to a valid memory location
+ * - base to base + next_offset must be accessible, i.e. not exceed allocated
+ * length.
+ *
+ * A well-formed entry looks like this:
+ *
+ * ip(6)t_entry match [mtdata] match [mtdata] target [tgdata] ip(6)t_entry
+ * e->elems[]-----' | |
+ * matchsize | |
+ * matchsize | |
+ * | |
+ * target_offset---------------------------------' |
+ * next_offset---------------------------------------------------'
+ *
+ * elems[]: flexible array member at end of ip(6)/arpt_entry struct.
+ * This is where matches (if any) and the target reside.
+ * target_offset: beginning of target.
+ * next_offset: start of the next rule; also: size of this rule.
+ * Since targets have a minimum size, target_offset + minlen <= next_offset.
+ *
+ * Every match stores its size, sum of sizes must not exceed target_offset.
+ *
+ * Return: 0 on success, negative errno on failure.
+ */
+int xt_check_entry_offsets(const void *base,
+ const char *elems,
+ unsigned int target_offset,
+ unsigned int next_offset)
+{
+ long size_of_base_struct = elems - (const char *)base;
+ const struct xt_entry_target *t;
+ const char *e = base;
+
+ /* target start is within the ip/ip6/arpt_entry struct */
+ if (target_offset < size_of_base_struct)
+ return -EINVAL;
+
+ if (target_offset + sizeof(*t) > next_offset)
+ return -EINVAL;
+
+ t = (void *)(e + target_offset);
+ if (t->u.target_size < sizeof(*t))
+ return -EINVAL;
+
+ if (target_offset + t->u.target_size > next_offset)
+ return -EINVAL;
+
+ if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0 &&
+ XT_ALIGN(target_offset + sizeof(struct xt_standard_target)) != next_offset)
+ return -EINVAL;
+
+ return xt_check_entry_match(elems, base + target_offset,
+ __alignof__(struct xt_entry_match));
+}
+EXPORT_SYMBOL(xt_check_entry_offsets);
+
+/**
+ * xt_alloc_entry_offsets - allocate array to store rule head offsets
+ *
+ * @size: number of entries
+ *
+ * Return: NULL or kmalloc'd or vmalloc'd array
+ */
+unsigned int *xt_alloc_entry_offsets(unsigned int size)
+{
+ unsigned int *off;
+
+ off = kcalloc(size, sizeof(unsigned int), GFP_KERNEL | __GFP_NOWARN);
+
+ if (off)
+ return off;
+
+ if (size < (SIZE_MAX / sizeof(unsigned int)))
+ off = vmalloc(size * sizeof(unsigned int));
+
+ return off;
+}
+EXPORT_SYMBOL(xt_alloc_entry_offsets);
+
+/**
+ * xt_find_jump_offset - check if target is a valid jump offset
+ *
+ * @offsets: array containing all valid rule start offsets of a rule blob
+ * @target: the jump target to search for
+ * @size: entries in @offset
+ */
+bool xt_find_jump_offset(const unsigned int *offsets,
+ unsigned int target, unsigned int size)
+{
+ int m, low = 0, hi = size;
+
+ while (hi > low) {
+ m = (low + hi) / 2u;
+
+ if (offsets[m] > target)
+ hi = m;
+ else if (offsets[m] < target)
+ low = m + 1;
+ else
+ return true;
+ }
+
+ return false;
+}
+EXPORT_SYMBOL(xt_find_jump_offset);
+
int xt_check_target(struct xt_tgchk_param *par,
unsigned int size, u_int8_t proto, bool inv_proto)
{
@@ -593,6 +804,80 @@ int xt_check_target(struct xt_tgchk_param *par,
}
EXPORT_SYMBOL_GPL(xt_check_target);
+/**
+ * xt_copy_counters_from_user - copy counters and metadata from userspace
+ *
+ * @user: src pointer to userspace memory
+ * @len: alleged size of userspace memory
+ * @info: where to store the xt_counters_info metadata
+ * @compat: true if we setsockopt call is done by 32bit task on 64bit kernel
+ *
+ * Copies counter meta data from @user and stores it in @info.
+ *
+ * vmallocs memory to hold the counters, then copies the counter data
+ * from @user to the new memory and returns a pointer to it.
+ *
+ * If @compat is true, @info gets converted automatically to the 64bit
+ * representation.
+ *
+ * The metadata associated with the counters is stored in @info.
+ *
+ * Return: returns pointer that caller has to test via IS_ERR().
+ * If IS_ERR is false, caller has to vfree the pointer.
+ */
+void *xt_copy_counters_from_user(const void __user *user, unsigned int len,
+ struct xt_counters_info *info, bool compat)
+{
+ void *mem;
+ u64 size;
+
+#ifdef CONFIG_COMPAT
+ if (compat) {
+ /* structures only differ in size due to alignment */
+ struct compat_xt_counters_info compat_tmp;
+
+ if (len <= sizeof(compat_tmp))
+ return ERR_PTR(-EINVAL);
+
+ len -= sizeof(compat_tmp);
+ if (copy_from_user(&compat_tmp, user, sizeof(compat_tmp)) != 0)
+ return ERR_PTR(-EFAULT);
+
+ strlcpy(info->name, compat_tmp.name, sizeof(info->name));
+ info->num_counters = compat_tmp.num_counters;
+ user += sizeof(compat_tmp);
+ } else
+#endif
+ {
+ if (len <= sizeof(*info))
+ return ERR_PTR(-EINVAL);
+
+ len -= sizeof(*info);
+ if (copy_from_user(info, user, sizeof(*info)) != 0)
+ return ERR_PTR(-EFAULT);
+
+ info->name[sizeof(info->name) - 1] = '\0';
+ user += sizeof(*info);
+ }
+
+ size = sizeof(struct xt_counters);
+ size *= info->num_counters;
+
+ if (size != (u64)len)
+ return ERR_PTR(-EINVAL);
+
+ mem = vmalloc(len);
+ if (!mem)
+ return ERR_PTR(-ENOMEM);
+
+ if (copy_from_user(mem, user, len) == 0)
+ return mem;
+
+ vfree(mem);
+ return ERR_PTR(-EFAULT);
+}
+EXPORT_SYMBOL_GPL(xt_copy_counters_from_user);
+
#ifdef CONFIG_COMPAT
int xt_compat_target_offset(const struct xt_target *target)
{
@@ -608,6 +893,7 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr,
struct compat_xt_entry_target *ct = (struct compat_xt_entry_target *)t;
int pad, off = xt_compat_target_offset(target);
u_int16_t tsize = ct->u.user.target_size;
+ char name[sizeof(t->u.user.name)];
t = *dstptr;
memcpy(t, ct, sizeof(*ct));
@@ -621,6 +907,9 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr,
tsize += off;
t->u.user.target_size = tsize;
+ strlcpy(name, target->name, sizeof(name));
+ module_put(target->me);
+ strncpy(t->u.user.name, name, sizeof(t->u.user.name));
*size += off;
*dstptr += tsize;
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 28cddc85b700..bfa2b6d5b5cf 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -824,7 +824,11 @@ socket_setattr_return:
*/
void netlbl_sock_delattr(struct sock *sk)
{
- cipso_v4_sock_delattr(sk);
+ switch (sk->sk_family) {
+ case AF_INET:
+ cipso_v4_sock_delattr(sk);
+ break;
+ }
}
/**
@@ -987,7 +991,11 @@ req_setattr_return:
*/
void netlbl_req_delattr(struct request_sock *req)
{
- cipso_v4_req_delattr(req);
+ switch (req->rsk_ops->family) {
+ case AF_INET:
+ cipso_v4_req_delattr(req);
+ break;
+ }
}
/**
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 0d6038c87bef..dbc32b19c574 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1290,7 +1290,7 @@ static int netlink_release(struct socket *sock)
skb_queue_purge(&sk->sk_write_queue);
- if (nlk->portid) {
+ if (nlk->portid && nlk->bound) {
struct netlink_notify n = {
.net = sock_net(sk),
.protocol = sk->sk_protocol,
@@ -2683,6 +2683,7 @@ static int netlink_dump(struct sock *sk)
struct netlink_callback *cb;
struct sk_buff *skb = NULL;
struct nlmsghdr *nlh;
+ struct module *module;
int len, err = -ENOBUFS;
int alloc_min_size;
int alloc_size;
@@ -2762,9 +2763,11 @@ static int netlink_dump(struct sock *sk)
cb->done(cb);
nlk->cb_running = false;
+ module = cb->module;
+ skb = cb->skb;
mutex_unlock(nlk->cb_mutex);
- module_put(cb->module);
- consume_skb(cb->skb);
+ module_put(module);
+ consume_skb(skb);
return 0;
errout_skb:
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index b491c1c296fe..9920f7502f6d 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -441,7 +441,7 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key,
mask_ipv6_addr(saddr, key->ipv6_src, mask->ipv6_src, masked);
if (unlikely(memcmp(saddr, masked, sizeof(masked)))) {
- set_ipv6_addr(skb, key->ipv6_proto, saddr, masked,
+ set_ipv6_addr(skb, flow_key->ip.proto, saddr, masked,
true);
memcpy(&flow_key->ipv6.addr.src, masked,
sizeof(flow_key->ipv6.addr.src));
@@ -463,7 +463,7 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key,
NULL, &flags)
!= NEXTHDR_ROUTING);
- set_ipv6_addr(skb, key->ipv6_proto, daddr, masked,
+ set_ipv6_addr(skb, flow_key->ip.proto, daddr, masked,
recalc_csum);
memcpy(&flow_key->ipv6.addr.dst, masked,
sizeof(flow_key->ipv6.addr.dst));
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 27e14962b504..b3fe02a2339e 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -337,12 +337,10 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
unsigned short gso_type = skb_shinfo(skb)->gso_type;
struct sw_flow_key later_key;
struct sk_buff *segs, *nskb;
- struct ovs_skb_cb ovs_cb;
int err;
- ovs_cb = *OVS_CB(skb);
+ BUILD_BUG_ON(sizeof(*OVS_CB(skb)) > SKB_SGO_CB_OFFSET);
segs = __skb_gso_segment(skb, NETIF_F_SG, false);
- *OVS_CB(skb) = ovs_cb;
if (IS_ERR(segs))
return PTR_ERR(segs);
if (segs == NULL)
@@ -360,7 +358,6 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
/* Queue all of the segments. */
skb = segs;
do {
- *OVS_CB(skb) = ovs_cb;
if (gso_type & SKB_GSO_UDP && skb != segs)
key = &later_key;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index ebc39e66d704..a3654d929814 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1699,6 +1699,10 @@ retry:
goto retry;
}
+ if (!dev_validate_header(dev, skb->data, len)) {
+ err = -EINVAL;
+ goto out_unlock;
+ }
if (len > (dev->mtu + dev->hard_header_len + extra_len) &&
!packet_extra_vlan_len_allowed(dev, skb)) {
err = -EMSGSIZE;
@@ -2109,18 +2113,6 @@ static void tpacket_destruct_skb(struct sk_buff *skb)
sock_wfree(skb);
}
-static bool ll_header_truncated(const struct net_device *dev, int len)
-{
- /* net device doesn't like empty head */
- if (unlikely(len <= dev->hard_header_len)) {
- net_warn_ratelimited("%s: packet size is too short (%d <= %d)\n",
- current->comm, len, dev->hard_header_len);
- return true;
- }
-
- return false;
-}
-
static void tpacket_set_protocol(const struct net_device *dev,
struct sk_buff *skb)
{
@@ -2203,19 +2195,19 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
if (unlikely(err < 0))
return -EINVAL;
} else if (dev->hard_header_len) {
- if (ll_header_truncated(dev, tp_len))
- return -EINVAL;
+ int hdrlen = min_t(int, dev->hard_header_len, tp_len);
skb_push(skb, dev->hard_header_len);
- err = skb_store_bits(skb, 0, data,
- dev->hard_header_len);
+ err = skb_store_bits(skb, 0, data, hdrlen);
if (unlikely(err))
return err;
+ if (!dev_validate_header(dev, skb->data, hdrlen))
+ return -EINVAL;
if (!skb->protocol)
tpacket_set_protocol(dev, skb);
- data += dev->hard_header_len;
- to_write -= dev->hard_header_len;
+ data += hdrlen;
+ to_write -= hdrlen;
}
offset = offset_in_page(data);
@@ -2538,9 +2530,6 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len);
if (unlikely(offset < 0))
goto out_free;
- } else {
- if (ll_header_truncated(dev, len))
- goto out_free;
}
/* Returns -EFAULT on error */
@@ -2548,6 +2537,12 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
if (err)
goto out_free;
+ if (sock->type == SOCK_RAW &&
+ !dev_validate_header(dev, skb->data, len)) {
+ err = -EINVAL;
+ goto out_free;
+ }
+
sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
if (!gso_type && (len > dev->mtu + reserve + extra_len) &&
@@ -3212,6 +3207,7 @@ static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
i->ifindex = mreq->mr_ifindex;
i->alen = mreq->mr_alen;
memcpy(i->addr, mreq->mr_address, i->alen);
+ memset(i->addr + i->alen, 0, sizeof(i->addr) - i->alen);
i->count = 1;
i->next = po->mclist;
po->mclist = i;
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index 32ab87d34828..11d0b29ce4b8 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -377,6 +377,10 @@ static int phonet_rcv(struct sk_buff *skb, struct net_device *dev,
struct sockaddr_pn sa;
u16 len;
+ skb = skb_share_check(skb, GFP_ATOMIC);
+ if (!skb)
+ return NET_RX_DROP;
+
/* check we have at least a full Phonet header */
if (!pskb_pull(skb, sizeof(struct phonethdr)))
goto out;
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index fa7cd792791c..a97bb7332607 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -1081,17 +1081,6 @@ static unsigned int rfkill_fop_poll(struct file *file, poll_table *wait)
return res;
}
-static bool rfkill_readable(struct rfkill_data *data)
-{
- bool r;
-
- mutex_lock(&data->mtx);
- r = !list_empty(&data->events);
- mutex_unlock(&data->mtx);
-
- return r;
-}
-
static ssize_t rfkill_fop_read(struct file *file, char __user *buf,
size_t count, loff_t *pos)
{
@@ -1108,8 +1097,11 @@ static ssize_t rfkill_fop_read(struct file *file, char __user *buf,
goto out;
}
mutex_unlock(&data->mtx);
+ /* since we re-check and it just compares pointers,
+ * using !list_empty() without locking isn't a problem
+ */
ret = wait_event_interruptible(data->read_wait,
- rfkill_readable(data));
+ !list_empty(&data->events));
mutex_lock(&data->mtx);
if (ret)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index d4b6f3682c14..c244a49ae4ac 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -744,14 +744,15 @@ static u32 qdisc_alloc_handle(struct net_device *dev)
return 0;
}
-void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
+void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
+ unsigned int len)
{
const struct Qdisc_class_ops *cops;
unsigned long cl;
u32 parentid;
int drops;
- if (n == 0)
+ if (n == 0 && len == 0)
return;
drops = max_t(int, n, 0);
rcu_read_lock();
@@ -774,11 +775,12 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
cops->put(sch, cl);
}
sch->q.qlen -= n;
+ sch->qstats.backlog -= len;
__qdisc_qstats_drop(sch, drops);
}
rcu_read_unlock();
}
-EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
+EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
static void notify_and_destroy(struct net *net, struct sk_buff *skb,
struct nlmsghdr *n, u32 clid,
@@ -950,7 +952,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
}
lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
if (!netif_is_multiqueue(dev))
- sch->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
+ sch->flags |= TCQ_F_ONETXQUEUE;
}
sch->handle = handle;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index beeb75f80fdb..f6e7a60012b1 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1624,13 +1624,8 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
new->reshape_fail = cbq_reshape_fail;
#endif
}
- sch_tree_lock(sch);
- *old = cl->q;
- cl->q = new;
- qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
- qdisc_reset(*old);
- sch_tree_unlock(sch);
+ *old = qdisc_replace(sch, new, &cl->q);
return 0;
}
@@ -1914,7 +1909,7 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg)
{
struct cbq_sched_data *q = qdisc_priv(sch);
struct cbq_class *cl = (struct cbq_class *)arg;
- unsigned int qlen;
+ unsigned int qlen, backlog;
if (cl->filters || cl->children || cl == &q->link)
return -EBUSY;
@@ -1922,8 +1917,9 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg)
sch_tree_lock(sch);
qlen = cl->q->q.qlen;
+ backlog = cl->q->qstats.backlog;
qdisc_reset(cl->q);
- qdisc_tree_decrease_qlen(cl->q, qlen);
+ qdisc_tree_reduce_backlog(cl->q, qlen, backlog);
if (cl->next_alive)
cbq_deactivate_class(cl);
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index c009eb9045ce..3f6437db9b0f 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -128,8 +128,8 @@ static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx)
choke_zap_tail_holes(q);
qdisc_qstats_backlog_dec(sch, skb);
+ qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
qdisc_drop(skb, sch);
- qdisc_tree_decrease_qlen(sch, 1);
--sch->q.qlen;
}
@@ -449,6 +449,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
old = q->tab;
if (old) {
unsigned int oqlen = sch->q.qlen, tail = 0;
+ unsigned dropped = 0;
while (q->head != q->tail) {
struct sk_buff *skb = q->tab[q->head];
@@ -460,11 +461,12 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
ntab[tail++] = skb;
continue;
}
+ dropped += qdisc_pkt_len(skb);
qdisc_qstats_backlog_dec(sch, skb);
--sch->q.qlen;
qdisc_drop(skb, sch);
}
- qdisc_tree_decrease_qlen(sch, oqlen - sch->q.qlen);
+ qdisc_tree_reduce_backlog(sch, oqlen - sch->q.qlen, dropped);
q->head = 0;
q->tail = tail;
}
diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index 7a0bdb16ac92..9a9068d00833 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c
@@ -79,12 +79,13 @@ static struct sk_buff *codel_qdisc_dequeue(struct Qdisc *sch)
skb = codel_dequeue(sch, &q->params, &q->vars, &q->stats, dequeue);
- /* We cant call qdisc_tree_decrease_qlen() if our qlen is 0,
+ /* We cant call qdisc_tree_reduce_backlog() if our qlen is 0,
* or HTB crashes. Defer it for next round.
*/
if (q->stats.drop_count && sch->q.qlen) {
- qdisc_tree_decrease_qlen(sch, q->stats.drop_count);
+ qdisc_tree_reduce_backlog(sch, q->stats.drop_count, q->stats.drop_len);
q->stats.drop_count = 0;
+ q->stats.drop_len = 0;
}
if (skb)
qdisc_bstats_update(sch, skb);
@@ -115,7 +116,7 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt)
{
struct codel_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_CODEL_MAX + 1];
- unsigned int qlen;
+ unsigned int qlen, dropped = 0;
int err;
if (!opt)
@@ -149,10 +150,11 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt)
while (sch->q.qlen > sch->limit) {
struct sk_buff *skb = __skb_dequeue(&sch->q);
+ dropped += qdisc_pkt_len(skb);
qdisc_qstats_backlog_dec(sch, skb);
qdisc_drop(skb, sch);
}
- qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
+ qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
sch_tree_unlock(sch);
return 0;
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 338706092c27..e599803caa1e 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -53,9 +53,10 @@ static struct drr_class *drr_find_class(struct Qdisc *sch, u32 classid)
static void drr_purge_queue(struct drr_class *cl)
{
unsigned int len = cl->qdisc->q.qlen;
+ unsigned int backlog = cl->qdisc->qstats.backlog;
qdisc_reset(cl->qdisc);
- qdisc_tree_decrease_qlen(cl->qdisc, len);
+ qdisc_tree_reduce_backlog(cl->qdisc, len, backlog);
}
static const struct nla_policy drr_policy[TCA_DRR_MAX + 1] = {
@@ -226,11 +227,7 @@ static int drr_graft_class(struct Qdisc *sch, unsigned long arg,
new = &noop_qdisc;
}
- sch_tree_lock(sch);
- drr_purge_queue(cl);
- *old = cl->qdisc;
- cl->qdisc = new;
- sch_tree_unlock(sch);
+ *old = qdisc_replace(sch, new, &cl->qdisc);
return 0;
}
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 66700a6116aa..7288dda2a7fb 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -67,13 +67,7 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg,
new = &noop_qdisc;
}
- sch_tree_lock(sch);
- *old = p->q;
- p->q = new;
- qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
- qdisc_reset(*old);
- sch_tree_unlock(sch);
-
+ *old = qdisc_replace(sch, new, &p->q);
return 0;
}
@@ -262,6 +256,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
return err;
}
+ qdisc_qstats_backlog_inc(sch, skb);
sch->q.qlen++;
return NET_XMIT_SUCCESS;
@@ -284,6 +279,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
return NULL;
qdisc_bstats_update(sch, skb);
+ qdisc_qstats_backlog_dec(sch, skb);
sch->q.qlen--;
index = skb->tc_index & (p->indices - 1);
@@ -399,6 +395,7 @@ static void dsmark_reset(struct Qdisc *sch)
pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
qdisc_reset(p->q);
+ sch->qstats.backlog = 0;
sch->q.qlen = 0;
}
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index f377702d4b91..4816778566d3 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -659,6 +659,7 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
struct fq_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_FQ_MAX + 1];
int err, drop_count = 0;
+ unsigned drop_len = 0;
u32 fq_log;
if (!opt)
@@ -733,10 +734,11 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
if (!skb)
break;
+ drop_len += qdisc_pkt_len(skb);
kfree_skb(skb);
drop_count++;
}
- qdisc_tree_decrease_qlen(sch, drop_count);
+ qdisc_tree_reduce_backlog(sch, drop_count, drop_len);
sch_tree_unlock(sch);
return err;
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 9291598b5aad..96971c7ab228 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -173,7 +173,7 @@ static unsigned int fq_codel_drop(struct Qdisc *sch)
static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
struct fq_codel_sched_data *q = qdisc_priv(sch);
- unsigned int idx;
+ unsigned int idx, prev_backlog;
struct fq_codel_flow *flow;
int uninitialized_var(ret);
@@ -201,6 +201,7 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (++sch->q.qlen <= sch->limit)
return NET_XMIT_SUCCESS;
+ prev_backlog = sch->qstats.backlog;
q->drop_overlimit++;
/* Return Congestion Notification only if we dropped a packet
* from this flow.
@@ -209,7 +210,7 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
return NET_XMIT_CN;
/* As we dropped a packet, better let upper stack know this */
- qdisc_tree_decrease_qlen(sch, 1);
+ qdisc_tree_reduce_backlog(sch, 1, prev_backlog - sch->qstats.backlog);
return NET_XMIT_SUCCESS;
}
@@ -239,6 +240,7 @@ static struct sk_buff *fq_codel_dequeue(struct Qdisc *sch)
struct fq_codel_flow *flow;
struct list_head *head;
u32 prev_drop_count, prev_ecn_mark;
+ unsigned int prev_backlog;
begin:
head = &q->new_flows;
@@ -257,6 +259,7 @@ begin:
prev_drop_count = q->cstats.drop_count;
prev_ecn_mark = q->cstats.ecn_mark;
+ prev_backlog = sch->qstats.backlog;
skb = codel_dequeue(sch, &q->cparams, &flow->cvars, &q->cstats,
dequeue);
@@ -274,12 +277,14 @@ begin:
}
qdisc_bstats_update(sch, skb);
flow->deficit -= qdisc_pkt_len(skb);
- /* We cant call qdisc_tree_decrease_qlen() if our qlen is 0,
+ /* We cant call qdisc_tree_reduce_backlog() if our qlen is 0,
* or HTB crashes. Defer it for next round.
*/
if (q->cstats.drop_count && sch->q.qlen) {
- qdisc_tree_decrease_qlen(sch, q->cstats.drop_count);
+ qdisc_tree_reduce_backlog(sch, q->cstats.drop_count,
+ q->cstats.drop_len);
q->cstats.drop_count = 0;
+ q->cstats.drop_len = 0;
}
return skb;
}
@@ -347,11 +352,13 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt)
while (sch->q.qlen > sch->limit) {
struct sk_buff *skb = fq_codel_dequeue(sch);
+ q->cstats.drop_len += qdisc_pkt_len(skb);
kfree_skb(skb);
q->cstats.drop_count++;
}
- qdisc_tree_decrease_qlen(sch, q->cstats.drop_count);
+ qdisc_tree_reduce_backlog(sch, q->cstats.drop_count, q->cstats.drop_len);
q->cstats.drop_count = 0;
+ q->cstats.drop_len = 0;
sch_tree_unlock(sch);
return 0;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index b453270be3fd..9821e6d641bb 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -159,12 +159,15 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
if (validate)
skb = validate_xmit_skb_list(skb, dev);
- if (skb) {
+ if (likely(skb)) {
HARD_TX_LOCK(dev, txq, smp_processor_id());
if (!netif_xmit_frozen_or_stopped(txq))
skb = dev_hard_start_xmit(skb, dev, txq, &ret);
HARD_TX_UNLOCK(dev, txq);
+ } else {
+ spin_lock(root_lock);
+ return qdisc_qlen(q);
}
spin_lock(root_lock);
@@ -666,8 +669,10 @@ static void qdisc_rcu_free(struct rcu_head *head)
{
struct Qdisc *qdisc = container_of(head, struct Qdisc, rcu_head);
- if (qdisc_is_percpu_stats(qdisc))
+ if (qdisc_is_percpu_stats(qdisc)) {
free_percpu(qdisc->cpu_bstats);
+ free_percpu(qdisc->cpu_qstats);
+ }
kfree((char *) qdisc - qdisc->padded);
}
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index e6c7416d0332..d3e21dac8b40 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -895,9 +895,10 @@ static void
hfsc_purge_queue(struct Qdisc *sch, struct hfsc_class *cl)
{
unsigned int len = cl->qdisc->q.qlen;
+ unsigned int backlog = cl->qdisc->qstats.backlog;
qdisc_reset(cl->qdisc);
- qdisc_tree_decrease_qlen(cl->qdisc, len);
+ qdisc_tree_reduce_backlog(cl->qdisc, len, backlog);
}
static void
@@ -1215,11 +1216,7 @@ hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
new = &noop_qdisc;
}
- sch_tree_lock(sch);
- hfsc_purge_queue(sch, cl);
- *old = cl->qdisc;
- cl->qdisc = new;
- sch_tree_unlock(sch);
+ *old = qdisc_replace(sch, new, &cl->qdisc);
return 0;
}
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index 15d3aabfe250..792c6f330f77 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -390,6 +390,7 @@ static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
struct hhf_sched_data *q = qdisc_priv(sch);
enum wdrr_bucket_idx idx;
struct wdrr_bucket *bucket;
+ unsigned int prev_backlog;
idx = hhf_classify(skb, sch);
@@ -417,6 +418,7 @@ static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (++sch->q.qlen <= sch->limit)
return NET_XMIT_SUCCESS;
+ prev_backlog = sch->qstats.backlog;
q->drop_overlimit++;
/* Return Congestion Notification only if we dropped a packet from this
* bucket.
@@ -425,7 +427,7 @@ static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
return NET_XMIT_CN;
/* As we dropped a packet, better let upper stack know this. */
- qdisc_tree_decrease_qlen(sch, 1);
+ qdisc_tree_reduce_backlog(sch, 1, prev_backlog - sch->qstats.backlog);
return NET_XMIT_SUCCESS;
}
@@ -535,7 +537,7 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt)
{
struct hhf_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_HHF_MAX + 1];
- unsigned int qlen;
+ unsigned int qlen, prev_backlog;
int err;
u64 non_hh_quantum;
u32 new_quantum = q->quantum;
@@ -585,12 +587,14 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt)
}
qlen = sch->q.qlen;
+ prev_backlog = sch->qstats.backlog;
while (sch->q.qlen > sch->limit) {
struct sk_buff *skb = hhf_dequeue(sch);
kfree_skb(skb);
}
- qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
+ qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen,
+ prev_backlog - sch->qstats.backlog);
sch_tree_unlock(sch);
return 0;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index f1acb0f60dc3..ccff00640713 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -600,6 +600,7 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
htb_activate(q, cl);
}
+ qdisc_qstats_backlog_inc(sch, skb);
sch->q.qlen++;
return NET_XMIT_SUCCESS;
}
@@ -889,6 +890,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
ok:
qdisc_bstats_update(sch, skb);
qdisc_unthrottled(sch);
+ qdisc_qstats_backlog_dec(sch, skb);
sch->q.qlen--;
return skb;
}
@@ -955,6 +957,7 @@ static unsigned int htb_drop(struct Qdisc *sch)
unsigned int len;
if (cl->un.leaf.q->ops->drop &&
(len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
+ sch->qstats.backlog -= len;
sch->q.qlen--;
if (!cl->un.leaf.q->q.qlen)
htb_deactivate(q, cl);
@@ -984,12 +987,12 @@ static void htb_reset(struct Qdisc *sch)
}
cl->prio_activity = 0;
cl->cmode = HTB_CAN_SEND;
-
}
}
qdisc_watchdog_cancel(&q->watchdog);
__skb_queue_purge(&q->direct_queue);
sch->q.qlen = 0;
+ sch->qstats.backlog = 0;
memset(q->hlevel, 0, sizeof(q->hlevel));
memset(q->row_mask, 0, sizeof(q->row_mask));
for (i = 0; i < TC_HTB_NUMPRIO; i++)
@@ -1165,14 +1168,7 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
cl->common.classid)) == NULL)
return -ENOBUFS;
- sch_tree_lock(sch);
- *old = cl->un.leaf.q;
- cl->un.leaf.q = new;
- if (*old != NULL) {
- qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
- qdisc_reset(*old);
- }
- sch_tree_unlock(sch);
+ *old = qdisc_replace(sch, new, &cl->un.leaf.q);
return 0;
}
@@ -1274,7 +1270,6 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
{
struct htb_sched *q = qdisc_priv(sch);
struct htb_class *cl = (struct htb_class *)arg;
- unsigned int qlen;
struct Qdisc *new_q = NULL;
int last_child = 0;
@@ -1294,9 +1289,11 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
sch_tree_lock(sch);
if (!cl->level) {
- qlen = cl->un.leaf.q->q.qlen;
+ unsigned int qlen = cl->un.leaf.q->q.qlen;
+ unsigned int backlog = cl->un.leaf.q->qstats.backlog;
+
qdisc_reset(cl->un.leaf.q);
- qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen);
+ qdisc_tree_reduce_backlog(cl->un.leaf.q, qlen, backlog);
}
/* delete from hash and active; remainder in destroy_class */
@@ -1430,10 +1427,11 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
sch_tree_lock(sch);
if (parent && !parent->level) {
unsigned int qlen = parent->un.leaf.q->q.qlen;
+ unsigned int backlog = parent->un.leaf.q->qstats.backlog;
/* turn parent into inner node */
qdisc_reset(parent->un.leaf.q);
- qdisc_tree_decrease_qlen(parent->un.leaf.q, qlen);
+ qdisc_tree_reduce_backlog(parent->un.leaf.q, qlen, backlog);
qdisc_destroy(parent->un.leaf.q);
if (parent->prio_activity)
htb_deactivate(q, parent);
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 42dd218871e0..23437d62a8db 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -218,7 +218,8 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
if (q->queues[i] != &noop_qdisc) {
struct Qdisc *child = q->queues[i];
q->queues[i] = &noop_qdisc;
- qdisc_tree_decrease_qlen(child, child->q.qlen);
+ qdisc_tree_reduce_backlog(child, child->q.qlen,
+ child->qstats.backlog);
qdisc_destroy(child);
}
}
@@ -238,8 +239,9 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
q->queues[i] = child;
if (old != &noop_qdisc) {
- qdisc_tree_decrease_qlen(old,
- old->q.qlen);
+ qdisc_tree_reduce_backlog(old,
+ old->q.qlen,
+ old->qstats.backlog);
qdisc_destroy(old);
}
sch_tree_unlock(sch);
@@ -303,13 +305,7 @@ static int multiq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
if (new == NULL)
new = &noop_qdisc;
- sch_tree_lock(sch);
- *old = q->queues[band];
- q->queues[band] = new;
- qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
- qdisc_reset(*old);
- sch_tree_unlock(sch);
-
+ *old = qdisc_replace(sch, new, &q->queues[band]);
return 0;
}
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 956ead2cab9a..80124c1edbba 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -395,6 +395,25 @@ static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
sch->q.qlen++;
}
+/* netem can't properly corrupt a megapacket (like we get from GSO), so instead
+ * when we statistically choose to corrupt one, we instead segment it, returning
+ * the first packet to be corrupted, and re-enqueue the remaining frames
+ */
+static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch)
+{
+ struct sk_buff *segs;
+ netdev_features_t features = netif_skb_features(skb);
+
+ segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
+
+ if (IS_ERR_OR_NULL(segs)) {
+ qdisc_reshape_fail(skb, sch);
+ return NULL;
+ }
+ consume_skb(skb);
+ return segs;
+}
+
/*
* Insert one skb into qdisc.
* Note: parent depends on return value to account for queue length.
@@ -407,7 +426,11 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
/* We don't fill cb now as skb_unshare() may invalidate it */
struct netem_skb_cb *cb;
struct sk_buff *skb2;
+ struct sk_buff *segs = NULL;
+ unsigned int len = 0, last_len, prev_len = qdisc_pkt_len(skb);
+ int nb = 0;
int count = 1;
+ int rc = NET_XMIT_SUCCESS;
/* Random duplication */
if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
@@ -453,10 +476,23 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
* do it now in software before we mangle it.
*/
if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
+ if (skb_is_gso(skb)) {
+ segs = netem_segment(skb, sch);
+ if (!segs)
+ return NET_XMIT_DROP;
+ } else {
+ segs = skb;
+ }
+
+ skb = segs;
+ segs = segs->next;
+
if (!(skb = skb_unshare(skb, GFP_ATOMIC)) ||
(skb->ip_summed == CHECKSUM_PARTIAL &&
- skb_checksum_help(skb)))
- return qdisc_drop(skb, sch);
+ skb_checksum_help(skb))) {
+ rc = qdisc_drop(skb, sch);
+ goto finish_segs;
+ }
skb->data[prandom_u32() % skb_headlen(skb)] ^=
1<<(prandom_u32() % 8);
@@ -516,6 +552,27 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
sch->qstats.requeues++;
}
+finish_segs:
+ if (segs) {
+ while (segs) {
+ skb2 = segs->next;
+ segs->next = NULL;
+ qdisc_skb_cb(segs)->pkt_len = segs->len;
+ last_len = segs->len;
+ rc = qdisc_enqueue(segs, sch);
+ if (rc != NET_XMIT_SUCCESS) {
+ if (net_xmit_drop_count(rc))
+ qdisc_qstats_drop(sch);
+ } else {
+ nb++;
+ len += last_len;
+ }
+ segs = skb2;
+ }
+ sch->q.qlen += nb;
+ if (nb > 1)
+ qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len);
+ }
return NET_XMIT_SUCCESS;
}
@@ -598,7 +655,8 @@ deliver:
if (unlikely(err != NET_XMIT_SUCCESS)) {
if (net_xmit_drop_count(err)) {
qdisc_qstats_drop(sch);
- qdisc_tree_decrease_qlen(sch, 1);
+ qdisc_tree_reduce_backlog(sch, 1,
+ qdisc_pkt_len(skb));
}
}
goto tfifo_dequeue;
@@ -1037,15 +1095,7 @@ static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
{
struct netem_sched_data *q = qdisc_priv(sch);
- sch_tree_lock(sch);
- *old = q->qdisc;
- q->qdisc = new;
- if (*old) {
- qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
- qdisc_reset(*old);
- }
- sch_tree_unlock(sch);
-
+ *old = qdisc_replace(sch, new, &q->qdisc);
return 0;
}
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index b783a446d884..71ae3b9629f9 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -183,7 +183,7 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt)
{
struct pie_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_PIE_MAX + 1];
- unsigned int qlen;
+ unsigned int qlen, dropped = 0;
int err;
if (!opt)
@@ -232,10 +232,11 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt)
while (sch->q.qlen > sch->limit) {
struct sk_buff *skb = __skb_dequeue(&sch->q);
+ dropped += qdisc_pkt_len(skb);
qdisc_qstats_backlog_dec(sch, skb);
qdisc_drop(skb, sch);
}
- qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
+ qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
sch_tree_unlock(sch);
return 0;
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 8e5cd34aaa74..e671b1a4e815 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -191,7 +191,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
struct Qdisc *child = q->queues[i];
q->queues[i] = &noop_qdisc;
if (child != &noop_qdisc) {
- qdisc_tree_decrease_qlen(child, child->q.qlen);
+ qdisc_tree_reduce_backlog(child, child->q.qlen, child->qstats.backlog);
qdisc_destroy(child);
}
}
@@ -210,8 +210,9 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
q->queues[i] = child;
if (old != &noop_qdisc) {
- qdisc_tree_decrease_qlen(old,
- old->q.qlen);
+ qdisc_tree_reduce_backlog(old,
+ old->q.qlen,
+ old->qstats.backlog);
qdisc_destroy(old);
}
sch_tree_unlock(sch);
@@ -268,13 +269,7 @@ static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
if (new == NULL)
new = &noop_qdisc;
- sch_tree_lock(sch);
- *old = q->queues[band];
- q->queues[band] = new;
- qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
- qdisc_reset(*old);
- sch_tree_unlock(sch);
-
+ *old = qdisc_replace(sch, new, &q->queues[band]);
return 0;
}
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 3ec7e88a43ca..e2b8fd47008b 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -221,9 +221,10 @@ static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid)
static void qfq_purge_queue(struct qfq_class *cl)
{
unsigned int len = cl->qdisc->q.qlen;
+ unsigned int backlog = cl->qdisc->qstats.backlog;
qdisc_reset(cl->qdisc);
- qdisc_tree_decrease_qlen(cl->qdisc, len);
+ qdisc_tree_reduce_backlog(cl->qdisc, len, backlog);
}
static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = {
@@ -619,11 +620,7 @@ static int qfq_graft_class(struct Qdisc *sch, unsigned long arg,
new = &noop_qdisc;
}
- sch_tree_lock(sch);
- qfq_purge_queue(cl);
- *old = cl->qdisc;
- cl->qdisc = new;
- sch_tree_unlock(sch);
+ *old = qdisc_replace(sch, new, &cl->qdisc);
return 0;
}
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 6c0534cc7758..8c0508c0e287 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -210,7 +210,8 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
q->flags = ctl->flags;
q->limit = ctl->limit;
if (child) {
- qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
+ qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
+ q->qdisc->qstats.backlog);
qdisc_destroy(q->qdisc);
q->qdisc = child;
}
@@ -313,12 +314,7 @@ static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
if (new == NULL)
new = &noop_qdisc;
- sch_tree_lock(sch);
- *old = q->qdisc;
- q->qdisc = new;
- qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
- qdisc_reset(*old);
- sch_tree_unlock(sch);
+ *old = qdisc_replace(sch, new, &q->qdisc);
return 0;
}
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 5819dd82630d..e1d634e3c255 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -518,7 +518,8 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt)
sch_tree_lock(sch);
- qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
+ qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
+ q->qdisc->qstats.backlog);
qdisc_destroy(q->qdisc);
q->qdisc = child;
@@ -614,12 +615,7 @@ static int sfb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
if (new == NULL)
new = &noop_qdisc;
- sch_tree_lock(sch);
- *old = q->qdisc;
- q->qdisc = new;
- qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
- qdisc_reset(*old);
- sch_tree_unlock(sch);
+ *old = qdisc_replace(sch, new, &q->qdisc);
return 0;
}
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index b877140beda5..4417fb25166f 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -369,7 +369,7 @@ static int
sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
struct sfq_sched_data *q = qdisc_priv(sch);
- unsigned int hash;
+ unsigned int hash, dropped;
sfq_index x, qlen;
struct sfq_slot *slot;
int uninitialized_var(ret);
@@ -484,7 +484,7 @@ enqueue:
return NET_XMIT_SUCCESS;
qlen = slot->qlen;
- sfq_drop(sch);
+ dropped = sfq_drop(sch);
/* Return Congestion Notification only if we dropped a packet
* from this flow.
*/
@@ -492,7 +492,7 @@ enqueue:
return NET_XMIT_CN;
/* As we dropped a packet, better let upper stack know this */
- qdisc_tree_decrease_qlen(sch, 1);
+ qdisc_tree_reduce_backlog(sch, 1, dropped);
return NET_XMIT_SUCCESS;
}
@@ -560,6 +560,7 @@ static void sfq_rehash(struct Qdisc *sch)
struct sfq_slot *slot;
struct sk_buff_head list;
int dropped = 0;
+ unsigned int drop_len = 0;
__skb_queue_head_init(&list);
@@ -588,6 +589,7 @@ static void sfq_rehash(struct Qdisc *sch)
if (x >= SFQ_MAX_FLOWS) {
drop:
qdisc_qstats_backlog_dec(sch, skb);
+ drop_len += qdisc_pkt_len(skb);
kfree_skb(skb);
dropped++;
continue;
@@ -617,7 +619,7 @@ drop:
}
}
sch->q.qlen -= dropped;
- qdisc_tree_decrease_qlen(sch, dropped);
+ qdisc_tree_reduce_backlog(sch, dropped, drop_len);
}
static void sfq_perturbation(unsigned long arg)
@@ -641,7 +643,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
struct sfq_sched_data *q = qdisc_priv(sch);
struct tc_sfq_qopt *ctl = nla_data(opt);
struct tc_sfq_qopt_v1 *ctl_v1 = NULL;
- unsigned int qlen;
+ unsigned int qlen, dropped = 0;
struct red_parms *p = NULL;
if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
@@ -690,8 +692,8 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
qlen = sch->q.qlen;
while (sch->q.qlen > q->limit)
- sfq_drop(sch);
- qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
+ dropped += sfq_drop(sch);
+ qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
del_timer(&q->perturb_timer);
if (q->perturb_period) {
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index a4afde14e865..c2fbde742f37 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -160,6 +160,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch)
struct tbf_sched_data *q = qdisc_priv(sch);
struct sk_buff *segs, *nskb;
netdev_features_t features = netif_skb_features(skb);
+ unsigned int len = 0, prev_len = qdisc_pkt_len(skb);
int ret, nb;
segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
@@ -172,6 +173,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch)
nskb = segs->next;
segs->next = NULL;
qdisc_skb_cb(segs)->pkt_len = segs->len;
+ len += segs->len;
ret = qdisc_enqueue(segs, q->qdisc);
if (ret != NET_XMIT_SUCCESS) {
if (net_xmit_drop_count(ret))
@@ -183,7 +185,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch)
}
sch->q.qlen += nb;
if (nb > 1)
- qdisc_tree_decrease_qlen(sch, 1 - nb);
+ qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len);
consume_skb(skb);
return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP;
}
@@ -399,7 +401,8 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
sch_tree_lock(sch);
if (child) {
- qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
+ qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
+ q->qdisc->qstats.backlog);
qdisc_destroy(q->qdisc);
q->qdisc = child;
}
@@ -502,13 +505,7 @@ static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
if (new == NULL)
new = &noop_qdisc;
- sch_tree_lock(sch);
- *old = q->qdisc;
- q->qdisc = new;
- qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
- qdisc_reset(*old);
- sch_tree_unlock(sch);
-
+ *old = qdisc_replace(sch, new, &q->qdisc);
return 0;
}
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 0e4198ee2370..18361cbfc882 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -519,6 +519,8 @@ static int sctp_v6_cmp_addr(const union sctp_addr *addr1,
}
return 0;
}
+ if (addr1->v6.sin6_port != addr2->v6.sin6_port)
+ return 0;
if (!ipv6_addr_equal(&addr1->v6.sin6_addr, &addr2->v6.sin6_addr))
return 0;
/* If this is a linklocal address, compare the scope_id. */
@@ -634,6 +636,7 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
struct sock *newsk;
struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
struct sctp6_sock *newsctp6sk;
+ struct ipv6_txoptions *opt;
newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot);
if (!newsk)
@@ -653,6 +656,13 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
memcpy(newnp, np, sizeof(struct ipv6_pinfo));
+ rcu_read_lock();
+ opt = rcu_dereference(np->opt);
+ if (opt)
+ opt = ipv6_dup_options(newsk, opt);
+ RCU_INIT_POINTER(newnp->opt, opt);
+ rcu_read_unlock();
+
/* Initialize sk's sport, dport, rcv_saddr and daddr for getsockname()
* and getpeername().
*/
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index e13c3c3ea4ac..9d134ab3351f 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -60,6 +60,8 @@
#include <net/inet_common.h>
#include <net/inet_ecn.h>
+#define MAX_SCTP_PORT_HASH_ENTRIES (64 * 1024)
+
/* Global data structures. */
struct sctp_globals sctp_globals __read_mostly;
@@ -1332,6 +1334,8 @@ static __init int sctp_init(void)
unsigned long limit;
int max_share;
int order;
+ int num_entries;
+ int max_entry_order;
sock_skb_cb_check_size(sizeof(struct sctp_ulpevent));
@@ -1384,14 +1388,24 @@ static __init int sctp_init(void)
/* Size and allocate the association hash table.
* The methodology is similar to that of the tcp hash tables.
+ * Though not identical. Start by getting a goal size
*/
if (totalram_pages >= (128 * 1024))
goal = totalram_pages >> (22 - PAGE_SHIFT);
else
goal = totalram_pages >> (24 - PAGE_SHIFT);
- for (order = 0; (1UL << order) < goal; order++)
- ;
+ /* Then compute the page order for said goal */
+ order = get_order(goal);
+
+ /* Now compute the required page order for the maximum sized table we
+ * want to create
+ */
+ max_entry_order = get_order(MAX_SCTP_PORT_HASH_ENTRIES *
+ sizeof(struct sctp_bind_hashbucket));
+
+ /* Limit the page order by that maximum hash table size */
+ order = min(order, max_entry_order);
do {
sctp_assoc_hashsize = (1UL << order) * PAGE_SIZE /
@@ -1425,27 +1439,42 @@ static __init int sctp_init(void)
INIT_HLIST_HEAD(&sctp_ep_hashtable[i].chain);
}
- /* Allocate and initialize the SCTP port hash table. */
+ /* Allocate and initialize the SCTP port hash table.
+ * Note that order is initalized to start at the max sized
+ * table we want to support. If we can't get that many pages
+ * reduce the order and try again
+ */
do {
- sctp_port_hashsize = (1UL << order) * PAGE_SIZE /
- sizeof(struct sctp_bind_hashbucket);
- if ((sctp_port_hashsize > (64 * 1024)) && order > 0)
- continue;
sctp_port_hashtable = (struct sctp_bind_hashbucket *)
__get_free_pages(GFP_ATOMIC|__GFP_NOWARN, order);
} while (!sctp_port_hashtable && --order > 0);
+
if (!sctp_port_hashtable) {
pr_err("Failed bind hash alloc\n");
status = -ENOMEM;
goto err_bhash_alloc;
}
+
+ /* Now compute the number of entries that will fit in the
+ * port hash space we allocated
+ */
+ num_entries = (1UL << order) * PAGE_SIZE /
+ sizeof(struct sctp_bind_hashbucket);
+
+ /* And finish by rounding it down to the nearest power of two
+ * this wastes some memory of course, but its needed because
+ * the hash function operates based on the assumption that
+ * that the number of entries is a power of two
+ */
+ sctp_port_hashsize = rounddown_pow_of_two(num_entries);
+
for (i = 0; i < sctp_port_hashsize; i++) {
spin_lock_init(&sctp_port_hashtable[i].lock);
INIT_HLIST_HEAD(&sctp_port_hashtable[i].chain);
}
- pr_info("Hash tables configured (established %d bind %d)\n",
- sctp_assoc_hashsize, sctp_port_hashsize);
+ pr_info("Hash tables configured (established %d bind %d/%d)\n",
+ sctp_assoc_hashsize, sctp_port_hashsize, num_entries);
sctp_sysctl_register();
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 06320c8c1c86..83a07d468644 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1652,7 +1652,7 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep,
/* Set an expiration time for the cookie. */
cookie->c.expiration = ktime_add(asoc->cookie_life,
- ktime_get());
+ ktime_get_real());
/* Copy the peer's init packet. */
memcpy(&cookie->c.peer_init[0], init_chunk->chunk_hdr,
@@ -1780,7 +1780,7 @@ no_hmac:
if (sock_flag(ep->base.sk, SOCK_TIMESTAMP))
kt = skb_get_ktime(skb);
else
- kt = ktime_get();
+ kt = ktime_get_real();
if (!asoc && ktime_before(bear_cookie->expiration, kt)) {
/*
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index fef2acdf4a2e..ecae5561b912 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -244,12 +244,13 @@ void sctp_generate_t3_rtx_event(unsigned long peer)
int error;
struct sctp_transport *transport = (struct sctp_transport *) peer;
struct sctp_association *asoc = transport->asoc;
- struct net *net = sock_net(asoc->base.sk);
+ struct sock *sk = asoc->base.sk;
+ struct net *net = sock_net(sk);
/* Check whether a task is in the sock. */
- bh_lock_sock(asoc->base.sk);
- if (sock_owned_by_user(asoc->base.sk)) {
+ bh_lock_sock(sk);
+ if (sock_owned_by_user(sk)) {
pr_debug("%s: sock is busy\n", __func__);
/* Try again later. */
@@ -272,10 +273,10 @@ void sctp_generate_t3_rtx_event(unsigned long peer)
transport, GFP_ATOMIC);
if (error)
- asoc->base.sk->sk_err = -error;
+ sk->sk_err = -error;
out_unlock:
- bh_unlock_sock(asoc->base.sk);
+ bh_unlock_sock(sk);
sctp_transport_put(transport);
}
@@ -285,11 +286,12 @@ out_unlock:
static void sctp_generate_timeout_event(struct sctp_association *asoc,
sctp_event_timeout_t timeout_type)
{
- struct net *net = sock_net(asoc->base.sk);
+ struct sock *sk = asoc->base.sk;
+ struct net *net = sock_net(sk);
int error = 0;
- bh_lock_sock(asoc->base.sk);
- if (sock_owned_by_user(asoc->base.sk)) {
+ bh_lock_sock(sk);
+ if (sock_owned_by_user(sk)) {
pr_debug("%s: sock is busy: timer %d\n", __func__,
timeout_type);
@@ -312,10 +314,10 @@ static void sctp_generate_timeout_event(struct sctp_association *asoc,
(void *)timeout_type, GFP_ATOMIC);
if (error)
- asoc->base.sk->sk_err = -error;
+ sk->sk_err = -error;
out_unlock:
- bh_unlock_sock(asoc->base.sk);
+ bh_unlock_sock(sk);
sctp_association_put(asoc);
}
@@ -365,10 +367,11 @@ void sctp_generate_heartbeat_event(unsigned long data)
int error = 0;
struct sctp_transport *transport = (struct sctp_transport *) data;
struct sctp_association *asoc = transport->asoc;
- struct net *net = sock_net(asoc->base.sk);
+ struct sock *sk = asoc->base.sk;
+ struct net *net = sock_net(sk);
- bh_lock_sock(asoc->base.sk);
- if (sock_owned_by_user(asoc->base.sk)) {
+ bh_lock_sock(sk);
+ if (sock_owned_by_user(sk)) {
pr_debug("%s: sock is busy\n", __func__);
/* Try again later. */
@@ -389,10 +392,10 @@ void sctp_generate_heartbeat_event(unsigned long data)
transport, GFP_ATOMIC);
if (error)
- asoc->base.sk->sk_err = -error;
+ sk->sk_err = -error;
out_unlock:
- bh_unlock_sock(asoc->base.sk);
+ bh_unlock_sock(sk);
sctp_transport_put(transport);
}
@@ -403,10 +406,11 @@ void sctp_generate_proto_unreach_event(unsigned long data)
{
struct sctp_transport *transport = (struct sctp_transport *) data;
struct sctp_association *asoc = transport->asoc;
- struct net *net = sock_net(asoc->base.sk);
+ struct sock *sk = asoc->base.sk;
+ struct net *net = sock_net(sk);
- bh_lock_sock(asoc->base.sk);
- if (sock_owned_by_user(asoc->base.sk)) {
+ bh_lock_sock(sk);
+ if (sock_owned_by_user(sk)) {
pr_debug("%s: sock is busy\n", __func__);
/* Try again later. */
@@ -427,7 +431,7 @@ void sctp_generate_proto_unreach_event(unsigned long data)
asoc->state, asoc->ep, asoc, transport, GFP_ATOMIC);
out_unlock:
- bh_unlock_sock(asoc->base.sk);
+ bh_unlock_sock(sk);
sctp_association_put(asoc);
}
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 3ee27b7704ff..e6bb98e583fb 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -4829,7 +4829,8 @@ sctp_disposition_t sctp_sf_do_9_1_prm_abort(
retval = SCTP_DISPOSITION_CONSUME;
- sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
+ if (abort)
+ sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
/* Even if we can't send the ABORT due to low memory delete the
* TCB. This is a departure from our typical NOMEM handling.
@@ -4966,7 +4967,8 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort(
SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT));
retval = SCTP_DISPOSITION_CONSUME;
- sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
+ if (abort)
+ sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
SCTP_STATE(SCTP_STATE_CLOSED));
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 66d796075050..3c5833058b03 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1513,8 +1513,7 @@ static void sctp_close(struct sock *sk, long timeout)
struct sctp_chunk *chunk;
chunk = sctp_make_abort_user(asoc, NULL, 0);
- if (chunk)
- sctp_primitive_ABORT(net, asoc, chunk);
+ sctp_primitive_ABORT(net, asoc, chunk);
} else
sctp_primitive_SHUTDOWN(net, asoc, NULL);
}
@@ -5556,6 +5555,7 @@ static int sctp_getsockopt_hmac_ident(struct sock *sk, int len,
struct sctp_hmac_algo_param *hmacs;
__u16 data_len = 0;
u32 num_idents;
+ int i;
if (!ep->auth_enable)
return -EACCES;
@@ -5573,8 +5573,12 @@ static int sctp_getsockopt_hmac_ident(struct sock *sk, int len,
return -EFAULT;
if (put_user(num_idents, &p->shmac_num_idents))
return -EFAULT;
- if (copy_to_user(p->shmac_idents, hmacs->hmac_ids, data_len))
- return -EFAULT;
+ for (i = 0; i < num_idents; i++) {
+ __u16 hmacid = ntohs(hmacs->hmac_ids[i]);
+
+ if (copy_to_user(&p->shmac_idents[i], &hmacid, sizeof(__u16)))
+ return -EFAULT;
+ }
return 0;
}
@@ -6654,6 +6658,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs)
if (cmsgs->srinfo->sinfo_flags &
~(SCTP_UNORDERED | SCTP_ADDR_OVER |
+ SCTP_SACK_IMMEDIATELY |
SCTP_ABORT | SCTP_EOF))
return -EINVAL;
break;
@@ -6677,6 +6682,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs)
if (cmsgs->sinfo->snd_flags &
~(SCTP_UNORDERED | SCTP_ADDR_OVER |
+ SCTP_SACK_IMMEDIATELY |
SCTP_ABORT | SCTP_EOF))
return -EINVAL;
break;
@@ -7175,6 +7181,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk,
newsk->sk_type = sk->sk_type;
newsk->sk_bound_dev_if = sk->sk_bound_dev_if;
newsk->sk_flags = sk->sk_flags;
+ newsk->sk_tsflags = sk->sk_tsflags;
newsk->sk_no_check_tx = sk->sk_no_check_tx;
newsk->sk_no_check_rx = sk->sk_no_check_rx;
newsk->sk_reuse = sk->sk_reuse;
@@ -7207,6 +7214,9 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk,
newinet->mc_ttl = 1;
newinet->mc_index = 0;
newinet->mc_list = NULL;
+
+ if (newsk->sk_flags & SK_FLAGS_TIMESTAMP)
+ net_enable_timestamp();
}
static inline void sctp_copy_descendant(struct sock *sk_to,
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 26d50c565f54..3e0fc5127225 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -320,7 +320,7 @@ static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write,
struct ctl_table tbl;
bool changed = false;
char *none = "none";
- char tmp[8];
+ char tmp[8] = {0};
int ret;
memset(&tbl, 0, sizeof(struct ctl_table));
diff --git a/net/socket.c b/net/socket.c
index 884e32997698..e66e4f357506 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1705,6 +1705,7 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
/* We assume all kernel code knows the size of sockaddr_storage */
msg.msg_namelen = 0;
+ msg.msg_iocb = NULL;
if (sock->file->f_flags & O_NONBLOCK)
flags |= MSG_DONTWAIT;
err = sock_recvmsg(sock, &msg, iov_iter_count(&msg.msg_iter), flags);
@@ -2246,31 +2247,31 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
break;
}
-out_put:
- fput_light(sock->file, fput_needed);
-
if (err == 0)
- return datagrams;
+ goto out_put;
- if (datagrams != 0) {
+ if (datagrams == 0) {
+ datagrams = err;
+ goto out_put;
+ }
+
+ /*
+ * We may return less entries than requested (vlen) if the
+ * sock is non block and there aren't enough datagrams...
+ */
+ if (err != -EAGAIN) {
/*
- * We may return less entries than requested (vlen) if the
- * sock is non block and there aren't enough datagrams...
+ * ... or if recvmsg returns an error after we
+ * received some datagrams, where we record the
+ * error to return on the next call or if the
+ * app asks about it using getsockopt(SO_ERROR).
*/
- if (err != -EAGAIN) {
- /*
- * ... or if recvmsg returns an error after we
- * received some datagrams, where we record the
- * error to return on the next call or if the
- * app asks about it using getsockopt(SO_ERROR).
- */
- sock->sk->sk_err = -err;
- }
-
- return datagrams;
+ sock->sk->sk_err = -err;
}
+out_put:
+ fput_light(sock->file, fput_needed);
- return err;
+ return datagrams;
}
SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 1095be9c80ab..4605dc73def6 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -857,8 +857,8 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g
goto out;
if (svc_getnl(&buf->head[0]) != seq)
goto out;
- /* trim off the mic at the end before returning */
- xdr_buf_trim(buf, mic.len + 4);
+ /* trim off the mic and padding at the end before returning */
+ xdr_buf_trim(buf, round_up_to_quad(mic.len) + 4);
stat = 0;
out:
kfree(mic.data);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 2928afffbb81..9ec709b9707c 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1175,14 +1175,14 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h)
}
crq->q.reader = 0;
- crq->item = cache_get(h);
crq->buf = buf;
crq->len = 0;
crq->readers = 0;
spin_lock(&queue_lock);
- if (test_bit(CACHE_PENDING, &h->flags))
+ if (test_bit(CACHE_PENDING, &h->flags)) {
+ crq->item = cache_get(h);
list_add_tail(&crq->q.list, &detail->queue);
- else
+ } else
/* Lost a race, no longer PENDING, so don't enqueue */
ret = -EAGAIN;
spin_unlock(&queue_lock);
@@ -1218,7 +1218,7 @@ int qword_get(char **bpp, char *dest, int bufsize)
if (bp[0] == '\\' && bp[1] == 'x') {
/* HEX STRING */
bp += 2;
- while (len < bufsize) {
+ while (len < bufsize - 1) {
int h, l;
h = hex_to_bin(bp[0]);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index e6ce1517367f..16e831dcfde0 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -442,7 +442,7 @@ out_no_rpciod:
return ERR_PTR(err);
}
-struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args,
+static struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args,
struct rpc_xprt *xprt)
{
struct rpc_clnt *clnt = NULL;
@@ -474,7 +474,6 @@ struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args,
return clnt;
}
-EXPORT_SYMBOL_GPL(rpc_create_xprt);
/**
* rpc_create - create an RPC client and transport with one call
@@ -500,6 +499,15 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
};
char servername[48];
+ if (args->bc_xprt) {
+ WARN_ON(args->protocol != XPRT_TRANSPORT_BC_TCP);
+ xprt = args->bc_xprt->xpt_bc_xprt;
+ if (xprt) {
+ xprt_get(xprt);
+ return rpc_create_xprt(args, xprt);
+ }
+ }
+
if (args->flags & RPC_CLNT_CREATE_INFINITE_SLOTS)
xprtargs.flags |= XPRT_CREATE_INFINITE_SLOTS;
if (args->flags & RPC_CLNT_CREATE_NO_IDLE_TIMEOUT)
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 055453d48668..a8dbe8001e46 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -15,6 +15,7 @@
#include <linux/mutex.h>
#include <linux/notifier.h>
#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
#include <net/ip_fib.h>
#include <net/switchdev.h>
@@ -64,7 +65,6 @@ int netdev_switch_port_stp_update(struct net_device *dev, u8 state)
}
EXPORT_SYMBOL_GPL(netdev_switch_port_stp_update);
-static DEFINE_MUTEX(netdev_switch_mutex);
static RAW_NOTIFIER_HEAD(netdev_switch_notif_chain);
/**
@@ -79,9 +79,9 @@ int register_netdev_switch_notifier(struct notifier_block *nb)
{
int err;
- mutex_lock(&netdev_switch_mutex);
+ rtnl_lock();
err = raw_notifier_chain_register(&netdev_switch_notif_chain, nb);
- mutex_unlock(&netdev_switch_mutex);
+ rtnl_unlock();
return err;
}
EXPORT_SYMBOL_GPL(register_netdev_switch_notifier);
@@ -97,9 +97,9 @@ int unregister_netdev_switch_notifier(struct notifier_block *nb)
{
int err;
- mutex_lock(&netdev_switch_mutex);
+ rtnl_lock();
err = raw_notifier_chain_unregister(&netdev_switch_notif_chain, nb);
- mutex_unlock(&netdev_switch_mutex);
+ rtnl_unlock();
return err;
}
EXPORT_SYMBOL_GPL(unregister_netdev_switch_notifier);
@@ -113,16 +113,17 @@ EXPORT_SYMBOL_GPL(unregister_netdev_switch_notifier);
* Call all network notifier blocks. This should be called by driver
* when it needs to propagate hardware event.
* Return values are same as for atomic_notifier_call_chain().
+ * rtnl_lock must be held.
*/
int call_netdev_switch_notifiers(unsigned long val, struct net_device *dev,
struct netdev_switch_notifier_info *info)
{
int err;
+ ASSERT_RTNL();
+
info->dev = dev;
- mutex_lock(&netdev_switch_mutex);
err = raw_notifier_call_chain(&netdev_switch_notif_chain, val, info);
- mutex_unlock(&netdev_switch_mutex);
return err;
}
EXPORT_SYMBOL_GPL(call_netdev_switch_notifiers);
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index ce9121e8e990..e5ec86dd8dc1 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -712,7 +712,7 @@ static int tipc_nl_compat_name_table_dump(struct tipc_nl_compat_msg *msg,
goto out;
tipc_tlv_sprintf(msg->rep, "%-10u %s",
- nla_get_u32(publ[TIPC_NLA_PUBL_REF]),
+ nla_get_u32(publ[TIPC_NLA_PUBL_KEY]),
scope_str[nla_get_u32(publ[TIPC_NLA_PUBL_SCOPE])]);
out:
tipc_tlv_sprintf(msg->rep, "\n");
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 20cc6df07157..d41d424b9913 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2804,6 +2804,9 @@ int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb)
if (err)
return err;
+ if (!attrs[TIPC_NLA_SOCK])
+ return -EINVAL;
+
err = nla_parse_nested(sock, TIPC_NLA_SOCK_MAX,
attrs[TIPC_NLA_SOCK],
tipc_nl_sock_policy);
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 1c147c869c2e..948f316019d7 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -302,11 +302,10 @@ static void subscr_conn_msg_event(struct net *net, int conid,
struct tipc_net *tn = net_generic(net, tipc_net_id);
spin_lock_bh(&subscriber->lock);
- subscr_subscribe(net, (struct tipc_subscr *)buf, subscriber, &sub);
- if (sub)
- tipc_nametbl_subscribe(sub);
- else
+ if (subscr_subscribe(net, (struct tipc_subscr *)buf, subscriber, &sub))
tipc_conn_terminate(tn->topsrv, subscriber->conid);
+ else
+ tipc_nametbl_subscribe(sub);
spin_unlock_bh(&subscriber->lock);
}
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 1975fd8d1c10..03da879008d7 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -935,32 +935,20 @@ fail:
return NULL;
}
-static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
+static int unix_mknod(struct dentry *dentry, struct path *path, umode_t mode,
+ struct path *res)
{
- struct dentry *dentry;
- struct path path;
- int err = 0;
- /*
- * Get the parent directory, calculate the hash for last
- * component.
- */
- dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
- err = PTR_ERR(dentry);
- if (IS_ERR(dentry))
- return err;
+ int err;
- /*
- * All right, let's create it.
- */
- err = security_path_mknod(&path, dentry, mode, 0);
+ err = security_path_mknod(path, dentry, mode, 0);
if (!err) {
- err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
+ err = vfs_mknod(d_inode(path->dentry), dentry, mode, 0);
if (!err) {
- res->mnt = mntget(path.mnt);
+ res->mnt = mntget(path->mnt);
res->dentry = dget(dentry);
}
}
- done_path_create(&path, dentry);
+
return err;
}
@@ -971,10 +959,12 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
struct unix_sock *u = unix_sk(sk);
struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
char *sun_path = sunaddr->sun_path;
- int err;
+ int err, name_err;
unsigned int hash;
struct unix_address *addr;
struct hlist_head *list;
+ struct path path;
+ struct dentry *dentry;
err = -EINVAL;
if (sunaddr->sun_family != AF_UNIX)
@@ -990,14 +980,34 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
goto out;
addr_len = err;
+ name_err = 0;
+ dentry = NULL;
+ if (sun_path[0]) {
+ /* Get the parent directory, calculate the hash for last
+ * component.
+ */
+ dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
+
+ if (IS_ERR(dentry)) {
+ /* delay report until after 'already bound' check */
+ name_err = PTR_ERR(dentry);
+ dentry = NULL;
+ }
+ }
+
err = mutex_lock_interruptible(&u->readlock);
if (err)
- goto out;
+ goto out_path;
err = -EINVAL;
if (u->addr)
goto out_up;
+ if (name_err) {
+ err = name_err == -EEXIST ? -EADDRINUSE : name_err;
+ goto out_up;
+ }
+
err = -ENOMEM;
addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
if (!addr)
@@ -1008,11 +1018,11 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
addr->hash = hash ^ sk->sk_type;
atomic_set(&addr->refcnt, 1);
- if (sun_path[0]) {
- struct path path;
+ if (dentry) {
+ struct path u_path;
umode_t mode = S_IFSOCK |
(SOCK_INODE(sock)->i_mode & ~current_umask());
- err = unix_mknod(sun_path, mode, &path);
+ err = unix_mknod(dentry, &path, mode, &u_path);
if (err) {
if (err == -EEXIST)
err = -EADDRINUSE;
@@ -1020,9 +1030,9 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
goto out_up;
}
addr->hash = UNIX_HASH_SIZE;
- hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE-1);
+ hash = d_backing_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1);
spin_lock(&unix_table_lock);
- u->path = path;
+ u->path = u_path;
list = &unix_socket_table[hash];
} else {
spin_lock(&unix_table_lock);
@@ -1045,6 +1055,10 @@ out_unlock:
spin_unlock(&unix_table_lock);
out_up:
mutex_unlock(&u->readlock);
+out_path:
+ if (dentry)
+ done_path_create(&path, dentry);
+
out:
return err;
}
@@ -1464,7 +1478,7 @@ static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
UNIXCB(skb).fp = NULL;
for (i = scm->fp->count-1; i >= 0; i--)
- unix_notinflight(scm->fp->fp[i]);
+ unix_notinflight(scm->fp->user, scm->fp->fp[i]);
}
static void unix_destruct_scm(struct sk_buff *skb)
@@ -1481,6 +1495,21 @@ static void unix_destruct_scm(struct sk_buff *skb)
sock_wfree(skb);
}
+/*
+ * The "user->unix_inflight" variable is protected by the garbage
+ * collection lock, and we just read it locklessly here. If you go
+ * over the limit, there might be a tiny race in actually noticing
+ * it across threads. Tough.
+ */
+static inline bool too_many_unix_fds(struct task_struct *p)
+{
+ struct user_struct *user = current_user();
+
+ if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
+ return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
+ return false;
+}
+
#define MAX_RECURSION_LEVEL 4
static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
@@ -1489,6 +1518,9 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
unsigned char max_level = 0;
int unix_sock_count = 0;
+ if (too_many_unix_fds(current))
+ return -ETOOMANYREFS;
+
for (i = scm->fp->count - 1; i >= 0; i--) {
struct sock *sk = unix_get_socket(scm->fp->fp[i]);
@@ -1510,10 +1542,8 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
if (!UNIXCB(skb).fp)
return -ENOMEM;
- if (unix_sock_count) {
- for (i = scm->fp->count - 1; i >= 0; i--)
- unix_inflight(scm->fp->fp[i]);
- }
+ for (i = scm->fp->count - 1; i >= 0; i--)
+ unix_inflight(scm->fp->user, scm->fp->fp[i]);
return max_level;
}
@@ -1698,7 +1728,12 @@ restart_locked:
goto out_unlock;
}
- if (unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
+ /* other == sk && unix_peer(other) != sk if
+ * - unix_peer(sk) == NULL, destination address bound to sk
+ * - unix_peer(sk) == sk by time of get but disconnected before lock
+ */
+ if (other != sk &&
+ unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
if (timeo) {
timeo = unix_wait_for_peer(other, timeo);
@@ -2072,14 +2107,7 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
memset(&scm, 0, sizeof(scm));
- err = mutex_lock_interruptible(&u->readlock);
- if (unlikely(err)) {
- /* recvmsg() in non blocking mode is supposed to return -EAGAIN
- * sk_rcvtimeo is not honored by mutex_lock_interruptible()
- */
- err = noblock ? -EAGAIN : -ERESTARTSYS;
- goto out;
- }
+ mutex_lock(&u->readlock);
if (flags & MSG_PEEK)
skip = sk_peek_offset(sk, flags);
@@ -2120,12 +2148,13 @@ again:
timeo = unix_stream_data_wait(sk, timeo, last);
- if (signal_pending(current)
- || mutex_lock_interruptible(&u->readlock)) {
+ if (signal_pending(current)) {
err = sock_intr_errno(timeo);
+ scm_destroy(&scm);
goto out;
}
+ mutex_lock(&u->readlock);
continue;
unlock:
unix_state_unlock(sk);
diff --git a/net/unix/diag.c b/net/unix/diag.c
index c512f64d5287..4d9679701a6d 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -220,7 +220,7 @@ done:
return skb->len;
}
-static struct sock *unix_lookup_by_ino(int ino)
+static struct sock *unix_lookup_by_ino(unsigned int ino)
{
int i;
struct sock *sk;
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index a73a226f2d33..6a0d48525fcf 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -116,15 +116,15 @@ struct sock *unix_get_socket(struct file *filp)
* descriptor if it is for an AF_UNIX socket.
*/
-void unix_inflight(struct file *fp)
+void unix_inflight(struct user_struct *user, struct file *fp)
{
struct sock *s = unix_get_socket(fp);
+ spin_lock(&unix_gc_lock);
+
if (s) {
struct unix_sock *u = unix_sk(s);
- spin_lock(&unix_gc_lock);
-
if (atomic_long_inc_return(&u->inflight) == 1) {
BUG_ON(!list_empty(&u->link));
list_add_tail(&u->link, &gc_inflight_list);
@@ -132,25 +132,28 @@ void unix_inflight(struct file *fp)
BUG_ON(list_empty(&u->link));
}
unix_tot_inflight++;
- spin_unlock(&unix_gc_lock);
}
+ user->unix_inflight++;
+ spin_unlock(&unix_gc_lock);
}
-void unix_notinflight(struct file *fp)
+void unix_notinflight(struct user_struct *user, struct file *fp)
{
struct sock *s = unix_get_socket(fp);
+ spin_lock(&unix_gc_lock);
+
if (s) {
struct unix_sock *u = unix_sk(s);
- spin_lock(&unix_gc_lock);
BUG_ON(list_empty(&u->link));
if (atomic_long_dec_and_test(&u->inflight))
list_del_init(&u->link);
unix_tot_inflight--;
- spin_unlock(&unix_gc_lock);
}
+ user->unix_inflight--;
+ spin_unlock(&unix_gc_lock);
}
static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 2ec86e652a19..e1c69b216db3 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1794,27 +1794,8 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
else if (sk->sk_shutdown & RCV_SHUTDOWN)
err = 0;
- if (copied > 0) {
- /* We only do these additional bookkeeping/notification steps
- * if we actually copied something out of the queue pair
- * instead of just peeking ahead.
- */
-
- if (!(flags & MSG_PEEK)) {
- /* If the other side has shutdown for sending and there
- * is nothing more to read, then modify the socket
- * state.
- */
- if (vsk->peer_shutdown & SEND_SHUTDOWN) {
- if (vsock_stream_has_data(vsk) <= 0) {
- sk->sk_state = SS_UNCONNECTED;
- sock_set_flag(sk, SOCK_DONE);
- sk->sk_state_change(sk);
- }
- }
- }
+ if (copied > 0)
err = copied;
- }
out_wait:
finish_wait(sk_sleep(sk), &wait);
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 2a0bbd22854b..71e9b84847f3 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -1138,6 +1138,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
return NOTIFY_DONE;
}
+ wireless_nlevent_flush();
+
return NOTIFY_OK;
}
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 04b6f3f6ee0b..f24138681b80 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -12777,7 +12777,7 @@ static int nl80211_netlink_notify(struct notifier_block * nb,
struct wireless_dev *wdev;
struct cfg80211_beacon_registration *reg, *tmp;
- if (state != NETLINK_URELEASE)
+ if (state != NETLINK_URELEASE || notify->protocol != NETLINK_GENERIC)
return NOTIFY_DONE;
rcu_read_lock();
diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index c8717c1d082e..c753211cb83f 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -342,6 +342,40 @@ static const int compat_event_type_size[] = {
/* IW event code */
+void wireless_nlevent_flush(void)
+{
+ struct sk_buff *skb;
+ struct net *net;
+
+ ASSERT_RTNL();
+
+ for_each_net(net) {
+ while ((skb = skb_dequeue(&net->wext_nlevents)))
+ rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL,
+ GFP_KERNEL);
+ }
+}
+EXPORT_SYMBOL_GPL(wireless_nlevent_flush);
+
+static int wext_netdev_notifier_call(struct notifier_block *nb,
+ unsigned long state, void *ptr)
+{
+ /*
+ * When a netdev changes state in any way, flush all pending messages
+ * to avoid them going out in a strange order, e.g. RTM_NEWLINK after
+ * RTM_DELLINK, or with IFF_UP after without IFF_UP during dev_close()
+ * or similar - all of which could otherwise happen due to delays from
+ * schedule_work().
+ */
+ wireless_nlevent_flush();
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block wext_netdev_notifier = {
+ .notifier_call = wext_netdev_notifier_call,
+};
+
static int __net_init wext_pernet_init(struct net *net)
{
skb_queue_head_init(&net->wext_nlevents);
@@ -360,7 +394,12 @@ static struct pernet_operations wext_pernet_ops = {
static int __init wireless_nlevent_init(void)
{
- return register_pernet_subsys(&wext_pernet_ops);
+ int err = register_pernet_subsys(&wext_pernet_ops);
+
+ if (err)
+ return err;
+
+ return register_netdevice_notifier(&wext_netdev_notifier);
}
subsys_initcall(wireless_nlevent_init);
@@ -368,17 +407,8 @@ subsys_initcall(wireless_nlevent_init);
/* Process events generated by the wireless layer or the driver. */
static void wireless_nlevent_process(struct work_struct *work)
{
- struct sk_buff *skb;
- struct net *net;
-
rtnl_lock();
-
- for_each_net(net) {
- while ((skb = skb_dequeue(&net->wext_nlevents)))
- rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL,
- GFP_KERNEL);
- }
-
+ wireless_nlevent_flush();
rtnl_unlock();
}
@@ -925,8 +955,29 @@ static int wireless_process_ioctl(struct net *net, struct ifreq *ifr,
return private(dev, iwr, cmd, info, handler);
}
/* Old driver API : call driver ioctl handler */
- if (dev->netdev_ops->ndo_do_ioctl)
- return dev->netdev_ops->ndo_do_ioctl(dev, ifr, cmd);
+ if (dev->netdev_ops->ndo_do_ioctl) {
+#ifdef CONFIG_COMPAT
+ if (info->flags & IW_REQUEST_FLAG_COMPAT) {
+ int ret = 0;
+ struct iwreq iwr_lcl;
+ struct compat_iw_point *iwp_compat = (void *) &iwr->u.data;
+
+ memcpy(&iwr_lcl, iwr, sizeof(struct iwreq));
+ iwr_lcl.u.data.pointer = compat_ptr(iwp_compat->pointer);
+ iwr_lcl.u.data.length = iwp_compat->length;
+ iwr_lcl.u.data.flags = iwp_compat->flags;
+
+ ret = dev->netdev_ops->ndo_do_ioctl(dev, (void *) &iwr_lcl, cmd);
+
+ iwp_compat->pointer = ptr_to_compat(iwr_lcl.u.data.pointer);
+ iwp_compat->length = iwr_lcl.u.data.length;
+ iwp_compat->flags = iwr_lcl.u.data.flags;
+
+ return ret;
+ } else
+#endif
+ return dev->netdev_ops->ndo_do_ioctl(dev, ifr, cmd);
+ }
return -EOPNOTSUPP;
}
diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c
index 7ecd04c21360..997ff7b2509b 100644
--- a/net/x25/x25_facilities.c
+++ b/net/x25/x25_facilities.c
@@ -277,6 +277,7 @@ int x25_negotiate_facilities(struct sk_buff *skb, struct sock *sk,
memset(&theirs, 0, sizeof(theirs));
memcpy(new, ours, sizeof(*new));
+ memset(dte, 0, sizeof(*dte));
len = x25_parse_facilities(skb, &theirs, dte, &x25->vc_facil_mask);
if (len < 0)
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index b58286ecd156..cbaf52c837f4 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -292,12 +292,15 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
XFRM_SKB_CB(skb)->seq.input.hi = seq_hi;
skb_dst_force(skb);
+ dev_hold(skb->dev);
nexthdr = x->type->input(x, skb);
if (nexthdr == -EINPROGRESS)
return 0;
resume:
+ dev_put(skb->dev);
+
spin_lock(&x->lock);
if (nexthdr <= 0) {
if (nexthdr == -EBADMSG) {
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index fbcedbe33190..5097dce5b916 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -153,6 +153,8 @@ static int xfrm_output_gso(struct sock *sk, struct sk_buff *skb)
{
struct sk_buff *segs;
+ BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_SGO_CB_OFFSET);
+ BUILD_BUG_ON(sizeof(*IP6CB(skb)) > SKB_SGO_CB_OFFSET);
segs = skb_gso_segment(skb, 0);
kfree_skb(skb);
if (IS_ERR(segs))
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 638af0655aaf..4cd2076ff84b 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2806,7 +2806,6 @@ static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst,
int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
{
- struct net *net;
int err = 0;
if (unlikely(afinfo == NULL))
return -EINVAL;
@@ -2837,26 +2836,6 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
}
spin_unlock(&xfrm_policy_afinfo_lock);
- rtnl_lock();
- for_each_net(net) {
- struct dst_ops *xfrm_dst_ops;
-
- switch (afinfo->family) {
- case AF_INET:
- xfrm_dst_ops = &net->xfrm.xfrm4_dst_ops;
- break;
-#if IS_ENABLED(CONFIG_IPV6)
- case AF_INET6:
- xfrm_dst_ops = &net->xfrm.xfrm6_dst_ops;
- break;
-#endif
- default:
- BUG();
- }
- *xfrm_dst_ops = *afinfo->dst_ops;
- }
- rtnl_unlock();
-
return err;
}
EXPORT_SYMBOL(xfrm_policy_register_afinfo);
@@ -2892,22 +2871,6 @@ int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
}
EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
-static void __net_init xfrm_dst_ops_init(struct net *net)
-{
- struct xfrm_policy_afinfo *afinfo;
-
- rcu_read_lock();
- afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET]);
- if (afinfo)
- net->xfrm.xfrm4_dst_ops = *afinfo->dst_ops;
-#if IS_ENABLED(CONFIG_IPV6)
- afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET6]);
- if (afinfo)
- net->xfrm.xfrm6_dst_ops = *afinfo->dst_ops;
-#endif
- rcu_read_unlock();
-}
-
static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
@@ -3056,7 +3019,6 @@ static int __net_init xfrm_net_init(struct net *net)
rv = xfrm_policy_init(net);
if (rv < 0)
goto out_policy;
- xfrm_dst_ops_init(net);
rv = xfrm_sysctl_init(net);
if (rv < 0)
goto out_sysctl;